feat: 益选 OCR 订单处理系统初始提交
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用) - 百度 OCR 表格识别集成 - 规则引擎(列映射/数据清洗/单位转换/规格推断) - 条码映射管理与云端同步(Gitea REST API) - 云端同步支持:条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理(图片→OCR→Excel→合并) - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+212
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
OCR订单处理系统 - 无界面自动化接口
|
||||
-----------------------------
|
||||
专为与 openclaw 等自动化平台对接设计。
|
||||
处理流程:输入图片 -> OCR识别 -> 数据清洗 -> 价格校验 -> 输出结果路径。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
# 添加当前目录到路径
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from app.config.settings import ConfigManager
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.order_service import OrderService
|
||||
from app.services.tobacco_service import TobaccoService
|
||||
from app.services.special_suppliers_service import SpecialSuppliersService
|
||||
from app.core.utils.log_utils import get_logger, set_log_level
|
||||
|
||||
logger = get_logger("HeadlessAPI")
|
||||
|
||||
def get_latest_file(directory: str, extensions: List[str]) -> Optional[str]:
|
||||
"""获取目录中最新的指定后缀文件"""
|
||||
dir_path = Path(directory)
|
||||
if not dir_path.exists():
|
||||
return None
|
||||
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(dir_path.glob(f"*{ext}"))
|
||||
files.extend(dir_path.glob(f"*{ext.upper()}"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
|
||||
latest_file = max(files, key=lambda p: p.stat().st_mtime)
|
||||
return str(latest_file)
|
||||
|
||||
def update_barcode_mapping(barcode: str, target_barcode: str = None, multiplier: float = None, unit: str = None, price: float = None, spec: str = None):
|
||||
"""更新条码映射或特殊处理配置"""
|
||||
try:
|
||||
config_path = os.path.join("config", "barcode_mappings.json")
|
||||
mappings = {}
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
mappings = json.load(f)
|
||||
|
||||
# 获取或创建该条码的配置
|
||||
config = mappings.get(barcode, {})
|
||||
|
||||
if target_barcode:
|
||||
config["map_to"] = target_barcode
|
||||
config["description"] = config.get("description", "") + f" 条码映射 -> {target_barcode}"
|
||||
|
||||
if multiplier is not None:
|
||||
config["multiplier"] = multiplier
|
||||
config["description"] = config.get("description", "") + f" 数量倍数*{multiplier}"
|
||||
|
||||
if unit:
|
||||
config["target_unit"] = unit
|
||||
|
||||
if price is not None:
|
||||
config["fixed_price"] = price
|
||||
|
||||
if spec:
|
||||
config["specification"] = spec
|
||||
|
||||
if not config.get("description"):
|
||||
config["description"] = f"特殊条码配置: {barcode}"
|
||||
|
||||
mappings[barcode] = config
|
||||
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(mappings, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"成功更新条码配置: {barcode} -> {config}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"更新条码配置失败: {e}")
|
||||
return False
|
||||
|
||||
def run_pipeline(args):
|
||||
"""运行处理流水线"""
|
||||
try:
|
||||
config_manager = ConfigManager()
|
||||
order_service = OrderService(config_manager)
|
||||
start_time = time.perf_counter()
|
||||
final_excel = None
|
||||
|
||||
input_folder = config_manager.get('Paths', 'input_folder', fallback='data/input')
|
||||
output_folder = config_manager.get('Paths', 'output_folder', fallback='data/output')
|
||||
|
||||
# 1. 处理条码映射更新
|
||||
if args.update_mapping:
|
||||
if not args.barcode:
|
||||
print("ERROR: --barcode is required for --update-mapping", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# 至少需要一个更新项
|
||||
if not any([args.target, args.multiplier, args.unit, args.price, args.spec]):
|
||||
print("ERROR: At least one update option (--target, --multiplier, --unit, --price, --spec) is required", file=sys.stderr)
|
||||
return None
|
||||
|
||||
if update_barcode_mapping(args.barcode, args.target, args.multiplier, args.unit, args.price, args.spec):
|
||||
print(f"SUCCESS: Barcode configuration updated for {args.barcode}")
|
||||
return "MAPPING_UPDATED"
|
||||
return None
|
||||
|
||||
# 2. 烟草公司处理 (显式指定)
|
||||
if args.tobacco:
|
||||
input_path = args.input or get_latest_file(output_folder, [".xlsx", ".xls"])
|
||||
if not input_path:
|
||||
print("ERROR: No tobacco order file found.", file=sys.stderr)
|
||||
return None
|
||||
logger.info(f"开始显式处理烟草订单: {input_path}")
|
||||
# 这里的 process_tobacco_order 会调用 preprocess 并生成银豹格式
|
||||
tobacco_service = TobaccoService(config_manager)
|
||||
final_excel = tobacco_service.process_tobacco_order(input_path)
|
||||
|
||||
# 3. 蓉城易购处理 (显式指定)
|
||||
elif args.rongcheng:
|
||||
input_path = args.input or get_latest_file(output_folder, [".xlsx", ".xls"])
|
||||
if not input_path:
|
||||
print("ERROR: No Rongcheng Yigou order file found.", file=sys.stderr)
|
||||
return None
|
||||
logger.info(f"开始显式处理蓉城易购订单: {input_path}")
|
||||
special_service = SpecialSuppliersService(config_manager)
|
||||
final_excel = special_service.process_rongcheng_yigou(input_path)
|
||||
|
||||
# 4. 普通 Excel 处理 (支持自动识别烟草/蓉城/杨碧月)
|
||||
elif args.excel:
|
||||
input_path = args.input or get_latest_file(input_folder, [".xlsx", ".xls"])
|
||||
if not input_path:
|
||||
print("ERROR: No Excel file found in input.", file=sys.stderr)
|
||||
return None
|
||||
logger.info(f"开始处理 Excel (支持智能识别): {input_path}")
|
||||
# OrderService.process_excel 内部会自动调用 _check_special_preprocess
|
||||
final_excel = order_service.process_excel(input_path)
|
||||
|
||||
# 5. 智能处理 (默认逻辑:自动判断图片还是 Excel)
|
||||
else:
|
||||
input_path = args.input or get_latest_file(input_folder, [".jpg", ".jpeg", ".png", ".bmp", ".xlsx", ".xls"])
|
||||
if not input_path:
|
||||
print(f"ERROR: No input file found in {input_folder}.", file=sys.stderr)
|
||||
return None
|
||||
|
||||
ext = os.path.splitext(input_path)[1].lower()
|
||||
if ext in [".xlsx", ".xls"]:
|
||||
logger.info(f"智能识别为 Excel 文件,开始处理: {input_path}")
|
||||
final_excel = order_service.process_excel(input_path)
|
||||
else:
|
||||
logger.info(f"智能识别为图片文件,开始 OCR 处理: {input_path}")
|
||||
ocr_service = OCRService(config_manager)
|
||||
excel_intermediate = ocr_service.process_image(input_path)
|
||||
if excel_intermediate:
|
||||
final_excel = order_service.process_excel(excel_intermediate)
|
||||
|
||||
# 6. 后续处理 (校验与输出)
|
||||
if final_excel:
|
||||
# 单价校验
|
||||
discrepancies = order_service.validate_unit_price(final_excel)
|
||||
if discrepancies:
|
||||
print(f"WARNING: Price validation found {len(discrepancies)} issues:", file=sys.stderr)
|
||||
for d in discrepancies:
|
||||
print(f" - {d}", file=sys.stderr)
|
||||
|
||||
duration = time.perf_counter() - start_time
|
||||
logger.info(f"处理完成,耗时: {duration:.2f}s")
|
||||
|
||||
# 输出最终路径
|
||||
abs_path = os.path.abspath(final_excel)
|
||||
print(abs_path)
|
||||
return abs_path
|
||||
else:
|
||||
print("ERROR: Processing failed.", file=sys.stderr)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"CRITICAL ERROR: {str(e)}", file=sys.stderr)
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="OCR订单处理系统 - 无界面自动化接口")
|
||||
parser.add_argument('input', nargs='?', help='输入文件路径 (图片或Excel)')
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument('--excel', action='store_true', help='处理普通 Excel 文件')
|
||||
group.add_argument('--tobacco', action='store_true', help='处理烟草公司订单')
|
||||
group.add_argument('--rongcheng', action='store_true', help='处理蓉城易购订单')
|
||||
group.add_argument('--update-mapping', action='store_true', help='更新条码映射')
|
||||
|
||||
parser.add_argument('--barcode', help='待映射的原始条码 (用于 --update-mapping)')
|
||||
parser.add_argument('--target', help='目标条码 (用于 --update-mapping)')
|
||||
parser.add_argument('--multiplier', type=float, help='数量倍数 (例如箱转瓶填写30)')
|
||||
parser.add_argument('--unit', help='目标单位 (例如"瓶")')
|
||||
parser.add_argument('--price', type=float, help='固定单价')
|
||||
parser.add_argument('--spec', help='固定规格 (例如"1*30")')
|
||||
|
||||
args = parser.parse_args()
|
||||
result = run_pipeline(args)
|
||||
sys.exit(0 if result else 1)
|
||||
Reference in New Issue
Block a user