feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别(蓉城易购/烟草/杨碧月/通用)
- 百度 OCR 表格识别集成
- 规则引擎(列映射/数据清洗/单位转换/规格推断)
- 条码映射管理与云端同步(Gitea REST API)
- 云端同步支持:条码映射、供应商配置、商品资料、采购模板
- 拖拽一键处理(图片→OCR→Excel→合并)
- 191 个单元测试
- 移除无用的模板管理功能
- 清理 IDE 产物目录

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
+245
View File
@@ -0,0 +1,245 @@
"""
订单服务模块
---------
提供订单处理服务,协调Excel处理和订单合并流程。
"""
import os
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
from ..core.excel.processor import ExcelProcessor
from ..core.excel.merger import PurchaseOrderMerger
from ..core.db.product_db import ProductDatabase
logger = get_logger(__name__)
class OrderService:
"""
订单服务:协调Excel处理和订单合并流程
"""
def __init__(self, config: Optional[ConfigManager] = None):
"""
初始化订单服务
Args:
config: 配置管理器,如果为None则创建新的
"""
logger.info("初始化OrderService")
self.config = config or ConfigManager()
# 创建Excel处理器和采购单合并器
self.excel_processor = ExcelProcessor(self.config)
self.order_merger = PurchaseOrderMerger(self.config)
logger.info("OrderService初始化完成")
def get_latest_excel(self) -> Optional[str]:
"""
获取最新的Excel文件
Returns:
最新Excel文件路径,如果未找到则返回None
"""
return self.excel_processor.get_latest_excel()
def process_excel(self, file_path: Optional[str] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
处理Excel订单文件,生成标准采购单
Args:
file_path: Excel文件路径,如果为None则处理最新的文件
Returns:
输出采购单文件路径,如果处理失败则返回None
"""
if not file_path:
file_path = self.excel_processor.get_latest_excel()
if not file_path:
logger.warning("未找到可处理的Excel文件")
return None
logger.info("OrderService开始处理最新Excel文件")
else:
logger.info(f"OrderService开始处理指定Excel文件: {file_path}")
# 检查是否需要特殊的供应商预处理(如杨碧月)
try:
from .special_suppliers_service import SpecialSuppliersService
special_service = SpecialSuppliersService(self.config)
# 尝试识别并预处理(注意:这里不再传入 progress_cb 避免无限递归或重复进度条,
# 或者我们在 special_service 内部逻辑中处理完后直接返回结果)
# 为了避免循环调用,我们在 SpecialSuppliersService 内部不再调用 process_excel
# 而是让 process_excel 识别后自己决定是否处理预处理后的文件。
# 我们新增一个 check_and_preprocess 方法
preprocessed_path = self._check_special_preprocess(file_path)
if preprocessed_path:
logger.info(f"检测到特殊供应商,已生成预处理文件: {preprocessed_path}")
file_path = preprocessed_path
except Exception as e:
logger.error(f"检查特殊预处理时出错: {e}")
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
def _check_special_preprocess(self, file_path: str) -> Optional[str]:
"""检查并执行特殊的预处理(支持杨碧月、烟草公司、蓉城易购)"""
try:
from app.core.utils.file_utils import smart_read_excel
import pandas as pd
import re
# 仅读取前 50 行进行智能识别 (header=None 确保能读到第一行内容)
df_head = smart_read_excel(file_path, nrows=50, header=None)
df_str = df_head.astype(str)
# 1. 识别:烟草公司 (Tobacco)
# 特征:内容中包含“专卖证号”或特定证号“510109104938”
is_tobacco = df_str.apply(lambda x: x.str.contains('专卖证号|510109104938')).any().any()
if is_tobacco:
logger.info("识别到烟草公司订单,执行专用预处理...")
from .tobacco_service import TobaccoService
tobacco_svc = TobaccoService(self.config)
return tobacco_svc.preprocess_tobacco_order(file_path)
# 2. 识别:蓉城易购 (Rongcheng Yigou)
# 特征:内容中包含单号标识“RCDH”
is_rongcheng = df_str.apply(lambda x: x.str.contains('RCDH')).any().any()
if is_rongcheng:
logger.info("识别到蓉城易购订单,执行专用预处理...")
from .special_suppliers_service import SpecialSuppliersService
special_svc = SpecialSuppliersService(self.config)
return special_svc.preprocess_rongcheng_yigou(file_path)
# 3. 识别:杨碧月 (Yang Biyue)
# 特征:经手人列包含“杨碧月”
handler_col = None
for col in df_head.columns:
# 在前50行中搜索“经手人”关键字
if df_head[col].astype(str).str.contains('经手人').any():
handler_col = col
break
if handler_col is not None:
# 检查该列是否有“杨碧月”
if df_head[handler_col].astype(str).str.contains('杨碧月').any():
logger.info("识别到杨碧月订单,执行专用预处理...")
from .special_suppliers_service import SpecialSuppliersService
special_svc = SpecialSuppliersService(self.config)
return special_svc.process_yang_biyue_only(file_path)
except Exception as e:
logger.warning(f"智能预处理识别失败: {e}")
return None
def get_purchase_orders(self) -> List[str]:
"""
获取采购单文件列表
Returns:
采购单文件路径列表
"""
return self.order_merger.get_purchase_orders()
def merge_purchase_orders(self, file_paths: List[str], progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并指定的采购单文件
Args:
file_paths: 采购单文件路径列表
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
return self.merge_orders(file_paths, progress_cb)
def merge_all_purchase_orders(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并所有可用的采购单文件
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info("OrderService开始合并所有采购单")
return self.merge_orders(None, progress_cb)
def merge_orders(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并采购单
Args:
file_paths: 采购单文件路径列表,如果为None则处理所有采购单
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
if file_paths:
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
else:
logger.info("OrderService开始合并所有采购单")
return self.order_merger.process(file_paths, progress_cb)
def validate_unit_price(self, result_path: str) -> List[str]:
"""
校验采购单单价与商品资料进货价的差异
Args:
result_path: 待校验的采购单路径
Returns:
差异信息列表,无差异返回空列表
"""
try:
import pandas as pd
import os
from app.core.utils.file_utils import smart_read_excel
from app.core.handlers.column_mapper import ColumnMapper as CM
config = ConfigManager()
template_folder = config.get('Paths', 'template_folder', fallback='templates')
item_data = config.get('Templates', 'item_data', fallback='商品资料.xlsx')
item_path = os.path.join(template_folder, item_data)
product_db_path = config.get('Paths', 'product_db', fallback='data/product_cache.db')
# 使用 SQLite 查询商品进货价
product_db = ProductDatabase(product_db_path, item_path)
# 读取待校验的采购单
df_res = smart_read_excel(result_path)
res_barcode_col = CM.find_column(list(df_res.columns), 'barcode')
res_price_col = CM.find_column(list(df_res.columns), 'unit_price')
if not res_barcode_col or not res_price_col:
logger.warning("未能在采购单中找到条码或单价列")
return []
# 批量查询进货价
barcodes = df_res[res_barcode_col].astype(str).str.strip().tolist()
item_prices = product_db.get_prices(barcodes)
results = []
for _, row in df_res.iterrows():
bc = str(row[res_barcode_col]).strip()
if bc not in item_prices:
continue
try:
res_price = float(row[res_price_col])
except (ValueError, TypeError):
continue
item_price = item_prices[bc]
diff = abs(res_price - item_price)
if diff > 1.0:
results.append(f"条码 {bc}: 采购单价={res_price} vs 进货价={item_price} 差异={diff:.2f}")
return results
except Exception as e:
logger.error(f"单价校验过程中发生错误: {e}")
return []