- 新增智能识别功能,自动检测蓉城易购、烟草公司、杨碧月订单特征 - 修改订单服务流程,在Excel处理前自动执行专用预处理 - 更新无界面API,支持智能识别模式,简化OpenClaw集成 - 完善供应商专用预处理逻辑,修复数量计算和单位换算问题 - 添加变更日志和最终更新报告文档,记录v2.1版本变更
178 lines
7.0 KiB
Python
178 lines
7.0 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
OCR订单处理系统 - 无界面自动化接口
|
|
-----------------------------
|
|
专为与 openclaw 等自动化平台对接设计。
|
|
处理流程:输入图片 -> OCR识别 -> 数据清洗 -> 价格校验 -> 输出结果路径。
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import logging
|
|
import time
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict
|
|
|
|
# 添加当前目录到路径
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from app.config.settings import ConfigManager
|
|
from app.services.ocr_service import OCRService
|
|
from app.services.order_service import OrderService
|
|
from app.services.tobacco_service import TobaccoService
|
|
from app.services.special_suppliers_service import SpecialSuppliersService
|
|
from app.core.utils.log_utils import set_log_level
|
|
|
|
# 配置日志输出到 stderr
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
stream=sys.stderr
|
|
)
|
|
logger = logging.getLogger("HeadlessAPI")
|
|
|
|
def get_latest_file(directory: str, extensions: List[str]) -> Optional[str]:
|
|
"""获取目录中最新的指定后缀文件"""
|
|
dir_path = Path(directory)
|
|
if not dir_path.exists():
|
|
return None
|
|
|
|
files = []
|
|
for ext in extensions:
|
|
files.extend(dir_path.glob(f"*{ext}"))
|
|
files.extend(dir_path.glob(f"*{ext.upper()}"))
|
|
|
|
if not files:
|
|
return None
|
|
|
|
latest_file = max(files, key=lambda p: p.stat().st_mtime)
|
|
return str(latest_file)
|
|
|
|
def update_barcode_mapping(barcode: str, target_barcode: str):
|
|
"""更新条码映射"""
|
|
try:
|
|
config_path = os.path.join("config", "barcode_mappings.json")
|
|
mappings = {}
|
|
if os.path.exists(config_path):
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
mappings = json.load(f)
|
|
|
|
mappings[barcode] = target_barcode
|
|
|
|
with open(config_path, 'w', encoding='utf-8') as f:
|
|
json.dump(mappings, f, ensure_ascii=False, indent=2)
|
|
|
|
logger.info(f"成功更新条码映射: {barcode} -> {target_barcode}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"更新条码映射失败: {e}")
|
|
return False
|
|
|
|
def run_pipeline(args):
|
|
"""运行处理流水线"""
|
|
try:
|
|
config_manager = ConfigManager()
|
|
order_service = OrderService(config_manager)
|
|
start_time = time.perf_counter()
|
|
final_excel = None
|
|
|
|
# 1. 处理条码映射更新
|
|
if args.update_mapping:
|
|
if not args.barcode or not args.target:
|
|
print("ERROR: --barcode and --target are required for --update-mapping", file=sys.stderr)
|
|
return None
|
|
if update_barcode_mapping(args.barcode, args.target):
|
|
print(f"SUCCESS: Mapping updated {args.barcode} -> {args.target}")
|
|
return "MAPPING_UPDATED"
|
|
return None
|
|
|
|
# 2. 烟草公司处理 (显式指定)
|
|
if args.tobacco:
|
|
input_path = args.input or get_latest_file("data/output", [".xlsx", ".xls"])
|
|
if not input_path:
|
|
print("ERROR: No tobacco order file found.", file=sys.stderr)
|
|
return None
|
|
logger.info(f"开始显式处理烟草订单: {input_path}")
|
|
# 这里的 process_tobacco_order 会调用 preprocess 并生成银豹格式
|
|
tobacco_service = TobaccoService(config_manager)
|
|
final_excel = tobacco_service.process_tobacco_order(input_path)
|
|
|
|
# 3. 蓉城易购处理 (显式指定)
|
|
elif args.rongcheng:
|
|
input_path = args.input or get_latest_file("data/output", [".xlsx", ".xls"])
|
|
if not input_path:
|
|
print("ERROR: No Rongcheng Yigou order file found.", file=sys.stderr)
|
|
return None
|
|
logger.info(f"开始显式处理蓉城易购订单: {input_path}")
|
|
special_service = SpecialSuppliersService(config_manager)
|
|
final_excel = special_service.process_rongcheng_yigou(input_path)
|
|
|
|
# 4. 普通 Excel 处理 (支持自动识别烟草/蓉城/杨碧月)
|
|
elif args.excel:
|
|
input_path = args.input or get_latest_file("data/input", [".xlsx", ".xls"])
|
|
if not input_path:
|
|
print("ERROR: No Excel file found in input.", file=sys.stderr)
|
|
return None
|
|
logger.info(f"开始处理 Excel (支持智能识别): {input_path}")
|
|
# OrderService.process_excel 内部会自动调用 _check_special_preprocess
|
|
final_excel = order_service.process_excel(input_path)
|
|
|
|
# 5. 图片 OCR 处理 (默认)
|
|
else:
|
|
input_path = args.input or get_latest_file("data/input", [".jpg", ".jpeg", ".png", ".bmp"])
|
|
if not input_path:
|
|
print("ERROR: No input image found.", file=sys.stderr)
|
|
return None
|
|
logger.info(f"开始 OCR 处理图片: {input_path}")
|
|
ocr_service = OCRService(config_manager)
|
|
excel_intermediate = ocr_service.process_image(input_path)
|
|
if excel_intermediate:
|
|
final_excel = order_service.process_excel(excel_intermediate)
|
|
|
|
# 6. 后续处理 (校验与输出)
|
|
if final_excel:
|
|
# 单价校验
|
|
discrepancies = order_service.validate_unit_price(final_excel)
|
|
if discrepancies:
|
|
print(f"WARNING: Price validation found {len(discrepancies)} issues:", file=sys.stderr)
|
|
for d in discrepancies:
|
|
print(f" - {d}", file=sys.stderr)
|
|
|
|
duration = time.perf_counter() - start_time
|
|
logger.info(f"处理完成,耗时: {duration:.2f}s")
|
|
|
|
# 输出最终路径
|
|
abs_path = os.path.abspath(final_excel)
|
|
print(abs_path)
|
|
return abs_path
|
|
else:
|
|
print("ERROR: Processing failed.", file=sys.stderr)
|
|
return None
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
print(f"CRITICAL ERROR: {str(e)}", file=sys.stderr)
|
|
traceback.print_exc(file=sys.stderr)
|
|
return None
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="OCR订单处理系统 - 无界面自动化接口")
|
|
parser.add_argument('input', nargs='?', help='输入文件路径 (图片或Excel)')
|
|
|
|
group = parser.add_mutually_exclusive_group()
|
|
group.add_argument('--excel', action='store_true', help='处理普通 Excel 文件')
|
|
group.add_argument('--tobacco', action='store_true', help='处理烟草公司订单')
|
|
group.add_argument('--rongcheng', action='store_true', help='处理蓉城易购订单')
|
|
group.add_argument('--update-mapping', action='store_true', help='更新条码映射')
|
|
|
|
parser.add_argument('--barcode', help='待映射的原始条码 (用于 --update-mapping)')
|
|
parser.add_argument('--target', help='目标条码 (用于 --update-mapping)')
|
|
|
|
args = parser.parse_args()
|
|
result = run_pipeline(args)
|
|
sys.exit(0 if result else 1)
|