refactor: unify special suppliers processing into a single intelligent flow
This commit is contained in:
@@ -84,15 +84,35 @@ class OrderService:
|
||||
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
|
||||
|
||||
def _check_special_preprocess(self, file_path: str) -> Optional[str]:
|
||||
"""检查并执行特殊的预处理"""
|
||||
"""检查并执行特殊的预处理(支持杨碧月、烟草公司、蓉城易购)"""
|
||||
try:
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
# 仅读取前几行进行识别
|
||||
# 仅读取前 50 行进行智能识别
|
||||
df_head = smart_read_excel(file_path, nrows=50)
|
||||
df_str = df_head.astype(str)
|
||||
|
||||
# 1. 检查“杨碧月”
|
||||
# 1. 识别:烟草公司 (Tobacco)
|
||||
# 特征:通常包含“烟草”、“卷烟”等关键字,且有特定的表头结构
|
||||
is_tobacco = df_str.apply(lambda x: x.str.contains('烟草|卷烟|营销中心')).any().any()
|
||||
if is_tobacco:
|
||||
logger.info("识别到烟草公司订单,执行专用预处理...")
|
||||
from .tobacco_service import TobaccoService
|
||||
tobacco_svc = TobaccoService(self.config)
|
||||
return tobacco_svc.process_tobacco_order(file_path)
|
||||
|
||||
# 2. 识别:蓉城易购 (Rongcheng Yigou)
|
||||
# 特征:通常文件名包含“订单”或内容包含“订购单位”等
|
||||
is_rongcheng = df_str.apply(lambda x: x.str.contains('蓉城易购|订购单位|出库小计')).any().any()
|
||||
if is_rongcheng:
|
||||
logger.info("识别到蓉城易购订单,执行专用预处理...")
|
||||
from .special_suppliers_service import SpecialSuppliersService
|
||||
special_svc = SpecialSuppliersService(self.config)
|
||||
return special_svc.process_rongcheng_yigou(file_path)
|
||||
|
||||
# 3. 识别:杨碧月 (Yang Biyue)
|
||||
handler_col = None
|
||||
for col in df_head.columns:
|
||||
if '经手人' in str(col):
|
||||
@@ -100,38 +120,27 @@ class OrderService:
|
||||
break
|
||||
|
||||
if handler_col is not None and df_head[handler_col].astype(str).str.contains('杨碧月').any():
|
||||
logger.info("识别到杨碧月订单,执行预处理...")
|
||||
# 重新读取完整数据
|
||||
logger.info("识别到杨碧月订单,执行通用预处理...")
|
||||
df = smart_read_excel(file_path)
|
||||
column_map = {
|
||||
'商品条码': '商品条码', '商品名称': '商品名称', '规格': '规格',
|
||||
'单位': '单位', '数量': '数量', '单价': '单价', '金额': '金额'
|
||||
}
|
||||
found_cols = {}
|
||||
# 优先级排序,确保更精确的匹配
|
||||
for target_zh, std_name in column_map.items():
|
||||
for col in df.columns:
|
||||
col_str = str(col)
|
||||
if target_zh == col_str: # 精确匹配优先
|
||||
found_cols[col] = std_name
|
||||
break
|
||||
if target_zh == col_str: found_cols[col] = std_name; break
|
||||
if std_name not in found_cols.values():
|
||||
for col in df.columns:
|
||||
col_str = str(col)
|
||||
if target_zh in col_str: # 模糊匹配
|
||||
found_cols[col] = std_name
|
||||
break
|
||||
if target_zh in str(col): found_cols[col] = std_name; break
|
||||
|
||||
if len(found_cols) >= 4:
|
||||
df_clean = df[list(found_cols.keys())].copy()
|
||||
df_clean = df_clean.rename(columns=found_cols)
|
||||
|
||||
# 确保数量和价格是数值
|
||||
# 这里的列名已经改回中文了
|
||||
for c in ['数量', '单价', '金额']:
|
||||
if c in df_clean.columns:
|
||||
df_clean[c] = pd.to_numeric(df_clean[c], errors='coerce').fillna(0)
|
||||
|
||||
df_clean = df_clean.dropna(subset=['商品条码'])
|
||||
out_dir = os.path.dirname(file_path)
|
||||
final_path = os.path.join(out_dir, "预处理之后.xlsx")
|
||||
@@ -139,7 +148,7 @@ class OrderService:
|
||||
return final_path
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"预处理识别失败: {e}")
|
||||
logger.warning(f"智能预处理识别失败: {e}")
|
||||
return None
|
||||
|
||||
def get_purchase_orders(self) -> List[str]:
|
||||
|
||||
Reference in New Issue
Block a user