113 lines
3.6 KiB
Python
113 lines
3.6 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
OCR订单处理系统 - 无界面自动化接口
|
||
-----------------------------
|
||
专为与 openclaw 等自动化平台对接设计。
|
||
处理流程:输入图片 -> OCR识别 -> 数据清洗 -> 价格校验 -> 输出结果路径。
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import logging
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
# 添加当前目录到路径
|
||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
from app.config.settings import ConfigManager
|
||
from app.services.ocr_service import OCRService
|
||
from app.services.order_service import OrderService
|
||
from app.core.utils.log_utils import set_log_level
|
||
|
||
# 配置日志输出到 stderr,以免干扰 stdout 的路径输出
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||
stream=sys.stderr
|
||
)
|
||
logger = logging.getLogger("HeadlessAPI")
|
||
|
||
def get_latest_input_image() -> Optional[str]:
|
||
"""获取 input 目录中最新的图片文件"""
|
||
input_dir = Path("data/input")
|
||
if not input_dir.exists():
|
||
return None
|
||
|
||
extensions = ['.jpg', '.jpeg', '.png', '.bmp']
|
||
files = []
|
||
for ext in extensions:
|
||
files.extend(input_dir.glob(f"*{ext}"))
|
||
files.extend(input_dir.glob(f"*{ext.upper()}"))
|
||
|
||
if not files:
|
||
return None
|
||
|
||
# 按修改时间排序
|
||
latest_file = max(files, key=lambda p: p.stat().st_mtime)
|
||
return str(latest_file)
|
||
|
||
def run_pipeline(image_path: Optional[str] = None):
|
||
"""运行处理流水线"""
|
||
try:
|
||
# 1. 确定输入文件
|
||
if not image_path:
|
||
image_path = get_latest_input_image()
|
||
|
||
if not image_path:
|
||
print("ERROR: No input image found.", file=sys.stderr)
|
||
return None
|
||
|
||
logger.info(f"开始处理图片: {image_path}")
|
||
|
||
# 2. 初始化服务
|
||
config_manager = ConfigManager()
|
||
ocr_service = OCRService(config_manager)
|
||
order_service = OrderService(config_manager)
|
||
|
||
# 3. OCR 识别
|
||
start_time = time.perf_counter()
|
||
excel_intermediate = ocr_service.process_image(image_path)
|
||
if not excel_intermediate:
|
||
print(f"ERROR: OCR failed for {image_path}", file=sys.stderr)
|
||
return None
|
||
|
||
# 4. Excel 处理与清洗
|
||
final_excel = order_service.process_excel(excel_intermediate)
|
||
if not final_excel:
|
||
print(f"ERROR: Excel processing failed for {excel_intermediate}", file=sys.stderr)
|
||
return None
|
||
|
||
# 5. 单价校验 (输出到 stderr)
|
||
discrepancies = order_service.validate_unit_price(final_excel)
|
||
if discrepancies:
|
||
print(f"WARNING: Price validation found {len(discrepancies)} issues:", file=sys.stderr)
|
||
for d in discrepancies:
|
||
print(f" - {d}", file=sys.stderr)
|
||
else:
|
||
logger.info("单价校验通过")
|
||
|
||
duration = time.perf_counter() - start_time
|
||
logger.info(f"处理完成,耗时: {duration:.2f}s")
|
||
|
||
# 6. 输出最终结果路径到 stdout
|
||
# 确保是绝对路径
|
||
abs_path = os.path.abspath(final_excel)
|
||
print(abs_path)
|
||
return abs_path
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
print(f"CRITICAL ERROR: {str(e)}", file=sys.stderr)
|
||
traceback.print_exc(file=sys.stderr)
|
||
return None
|
||
|
||
if __name__ == "__main__":
|
||
# 支持命令行传入图片路径
|
||
input_path = sys.argv[1] if len(sys.argv) > 1 else None
|
||
result = run_pipeline(input_path)
|
||
sys.exit(0 if result else 1)
|