refactor: unify special suppliers processing into a single intelligent flow

This commit is contained in:
2026-03-30 13:38:05 +08:00
parent 708402c7fb
commit 26835e265a
2 changed files with 31 additions and 109 deletions
+27 -18
View File
@@ -84,15 +84,35 @@ class OrderService:
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
def _check_special_preprocess(self, file_path: str) -> Optional[str]:
"""检查并执行特殊的预处理"""
"""检查并执行特殊的预处理(支持杨碧月、烟草公司、蓉城易购)"""
try:
from app.core.utils.file_utils import smart_read_excel
import pandas as pd
import re
# 仅读取前行进行识别
# 仅读取前 50 行进行智能识别
df_head = smart_read_excel(file_path, nrows=50)
df_str = df_head.astype(str)
# 1. 检查“杨碧月”
# 1. 识别:烟草公司 (Tobacco)
# 特征:通常包含“烟草”、“卷烟”等关键字,且有特定的表头结构
is_tobacco = df_str.apply(lambda x: x.str.contains('烟草|卷烟|营销中心')).any().any()
if is_tobacco:
logger.info("识别到烟草公司订单,执行专用预处理...")
from .tobacco_service import TobaccoService
tobacco_svc = TobaccoService(self.config)
return tobacco_svc.process_tobacco_order(file_path)
# 2. 识别:蓉城易购 (Rongcheng Yigou)
# 特征:通常文件名包含“订单”或内容包含“订购单位”等
is_rongcheng = df_str.apply(lambda x: x.str.contains('蓉城易购|订购单位|出库小计')).any().any()
if is_rongcheng:
logger.info("识别到蓉城易购订单,执行专用预处理...")
from .special_suppliers_service import SpecialSuppliersService
special_svc = SpecialSuppliersService(self.config)
return special_svc.process_rongcheng_yigou(file_path)
# 3. 识别:杨碧月 (Yang Biyue)
handler_col = None
for col in df_head.columns:
if '经手人' in str(col):
@@ -100,38 +120,27 @@ class OrderService:
break
if handler_col is not None and df_head[handler_col].astype(str).str.contains('杨碧月').any():
logger.info("识别到杨碧月订单,执行预处理...")
# 重新读取完整数据
logger.info("识别到杨碧月订单,执行通用预处理...")
df = smart_read_excel(file_path)
column_map = {
'商品条码': '商品条码', '商品名称': '商品名称', '规格': '规格',
'单位': '单位', '数量': '数量', '单价': '单价', '金额': '金额'
}
found_cols = {}
# 优先级排序,确保更精确的匹配
for target_zh, std_name in column_map.items():
for col in df.columns:
col_str = str(col)
if target_zh == col_str: # 精确匹配优先
found_cols[col] = std_name
break
if target_zh == col_str: found_cols[col] = std_name; break
if std_name not in found_cols.values():
for col in df.columns:
col_str = str(col)
if target_zh in col_str: # 模糊匹配
found_cols[col] = std_name
break
if target_zh in str(col): found_cols[col] = std_name; break
if len(found_cols) >= 4:
df_clean = df[list(found_cols.keys())].copy()
df_clean = df_clean.rename(columns=found_cols)
# 确保数量和价格是数值
# 这里的列名已经改回中文了
for c in ['数量', '单价', '金额']:
if c in df_clean.columns:
df_clean[c] = pd.to_numeric(df_clean[c], errors='coerce').fillna(0)
df_clean = df_clean.dropna(subset=['商品条码'])
out_dir = os.path.dirname(file_path)
final_path = os.path.join(out_dir, "预处理之后.xlsx")
@@ -139,7 +148,7 @@ class OrderService:
return final_path
except Exception as e:
logger.warning(f"预处理识别失败: {e}")
logger.warning(f"智能预处理识别失败: {e}")
return None
def get_purchase_orders(self) -> List[str]: