#!/usr/bin/env python # -*- coding: utf-8 -*- """ OCR订单处理系统 - 无界面自动化接口 ----------------------------- 专为与 openclaw 等自动化平台对接设计。 处理流程:输入图片 -> OCR识别 -> 数据清洗 -> 价格校验 -> 输出结果路径。 """ import os import sys import logging import time from pathlib import Path from typing import Optional # 添加当前目录到路径 sys.path.append(os.path.dirname(os.path.abspath(__file__))) from app.config.settings import ConfigManager from app.services.ocr_service import OCRService from app.services.order_service import OrderService from app.core.utils.log_utils import set_log_level # 配置日志输出到 stderr,以免干扰 stdout 的路径输出 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr ) logger = logging.getLogger("HeadlessAPI") def get_latest_input_image() -> Optional[str]: """获取 input 目录中最新的图片文件""" input_dir = Path("data/input") if not input_dir.exists(): return None extensions = ['.jpg', '.jpeg', '.png', '.bmp'] files = [] for ext in extensions: files.extend(input_dir.glob(f"*{ext}")) files.extend(input_dir.glob(f"*{ext.upper()}")) if not files: return None # 按修改时间排序 latest_file = max(files, key=lambda p: p.stat().st_mtime) return str(latest_file) def run_pipeline(image_path: Optional[str] = None): """运行处理流水线""" try: # 1. 确定输入文件 if not image_path: image_path = get_latest_input_image() if not image_path: print("ERROR: No input image found.", file=sys.stderr) return None logger.info(f"开始处理图片: {image_path}") # 2. 初始化服务 config_manager = ConfigManager() ocr_service = OCRService(config_manager) order_service = OrderService(config_manager) # 3. OCR 识别 start_time = time.perf_counter() excel_intermediate = ocr_service.process_image(image_path) if not excel_intermediate: print(f"ERROR: OCR failed for {image_path}", file=sys.stderr) return None # 4. Excel 处理与清洗 final_excel = order_service.process_excel(excel_intermediate) if not final_excel: print(f"ERROR: Excel processing failed for {excel_intermediate}", file=sys.stderr) return None # 5. 单价校验 (输出到 stderr) discrepancies = order_service.validate_unit_price(final_excel) if discrepancies: print(f"WARNING: Price validation found {len(discrepancies)} issues:", file=sys.stderr) for d in discrepancies: print(f" - {d}", file=sys.stderr) else: logger.info("单价校验通过") duration = time.perf_counter() - start_time logger.info(f"处理完成,耗时: {duration:.2f}s") # 6. 输出最终结果路径到 stdout # 确保是绝对路径 abs_path = os.path.abspath(final_excel) print(abs_path) return abs_path except Exception as e: import traceback print(f"CRITICAL ERROR: {str(e)}", file=sys.stderr) traceback.print_exc(file=sys.stderr) return None if __name__ == "__main__": # 支持命令行传入图片路径 input_path = sys.argv[1] if len(sys.argv) > 1 else None result = run_pipeline(input_path) sys.exit(0 if result else 1)