orc-order-v2/headless_api.py

113 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
OCR订单处理系统 - 无界面自动化接口
-----------------------------
专为与 openclaw 等自动化平台对接设计。
处理流程:输入图片 -> OCR识别 -> 数据清洗 -> 价格校验 -> 输出结果路径。
"""
import os
import sys
import logging
import time
from pathlib import Path
from typing import Optional
# 添加当前目录到路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from app.config.settings import ConfigManager
from app.services.ocr_service import OCRService
from app.services.order_service import OrderService
from app.core.utils.log_utils import set_log_level
# 配置日志输出到 stderr以免干扰 stdout 的路径输出
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stderr
)
logger = logging.getLogger("HeadlessAPI")
def get_latest_input_image() -> Optional[str]:
"""获取 input 目录中最新的图片文件"""
input_dir = Path("data/input")
if not input_dir.exists():
return None
extensions = ['.jpg', '.jpeg', '.png', '.bmp']
files = []
for ext in extensions:
files.extend(input_dir.glob(f"*{ext}"))
files.extend(input_dir.glob(f"*{ext.upper()}"))
if not files:
return None
# 按修改时间排序
latest_file = max(files, key=lambda p: p.stat().st_mtime)
return str(latest_file)
def run_pipeline(image_path: Optional[str] = None):
"""运行处理流水线"""
try:
# 1. 确定输入文件
if not image_path:
image_path = get_latest_input_image()
if not image_path:
print("ERROR: No input image found.", file=sys.stderr)
return None
logger.info(f"开始处理图片: {image_path}")
# 2. 初始化服务
config_manager = ConfigManager()
ocr_service = OCRService(config_manager)
order_service = OrderService(config_manager)
# 3. OCR 识别
start_time = time.perf_counter()
excel_intermediate = ocr_service.process_image(image_path)
if not excel_intermediate:
print(f"ERROR: OCR failed for {image_path}", file=sys.stderr)
return None
# 4. Excel 处理与清洗
final_excel = order_service.process_excel(excel_intermediate)
if not final_excel:
print(f"ERROR: Excel processing failed for {excel_intermediate}", file=sys.stderr)
return None
# 5. 单价校验 (输出到 stderr)
discrepancies = order_service.validate_unit_price(final_excel)
if discrepancies:
print(f"WARNING: Price validation found {len(discrepancies)} issues:", file=sys.stderr)
for d in discrepancies:
print(f" - {d}", file=sys.stderr)
else:
logger.info("单价校验通过")
duration = time.perf_counter() - start_time
logger.info(f"处理完成,耗时: {duration:.2f}s")
# 6. 输出最终结果路径到 stdout
# 确保是绝对路径
abs_path = os.path.abspath(final_excel)
print(abs_path)
return abs_path
except Exception as e:
import traceback
print(f"CRITICAL ERROR: {str(e)}", file=sys.stderr)
traceback.print_exc(file=sys.stderr)
return None
if __name__ == "__main__":
# 支持命令行传入图片路径
input_path = sys.argv[1] if len(sys.argv) > 1 else None
result = run_pipeline(input_path)
sys.exit(0 if result else 1)