feat(供应商管理): 新增规则引擎与词典配置支持
refactor(处理器): 重构通用供应商处理器以支持规则引擎 docs: 更新README与文档说明供应商管理功能 build: 更新打包脚本注入版本信息 test: 添加规则引擎单元测试
This commit is contained in:
@@ -11,6 +11,7 @@ from pathlib import Path
|
||||
|
||||
from ..base import BaseProcessor
|
||||
from ...utils.log_utils import get_logger
|
||||
from ...handlers.rule_engine import apply_rules
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -118,10 +119,17 @@ class GenericSupplierProcessor(BaseProcessor):
|
||||
self.logger.error("数据清洗失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
try:
|
||||
rules = self.supplier_config.get('rules', [])
|
||||
dictionary = self.supplier_config.get('dictionary')
|
||||
standardized_df = apply_rules(cleaned_df, rules, dictionary)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"规则执行失败: {e}")
|
||||
standardized_df = cleaned_df
|
||||
|
||||
# 步骤4: 计算处理
|
||||
self.logger.info("步骤4/4: 计算处理...")
|
||||
calculated_df = self._apply_calculations(cleaned_df)
|
||||
calculated_df = self._apply_calculations(standardized_df)
|
||||
if calculated_df is None:
|
||||
self.logger.error("计算处理失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
@@ -205,15 +213,26 @@ class GenericSupplierProcessor(BaseProcessor):
|
||||
数据DataFrame或None
|
||||
"""
|
||||
try:
|
||||
df = self._read_excel_safely(file_path)
|
||||
|
||||
if df.empty:
|
||||
specified = self.supplier_config.get('header_row')
|
||||
if specified is not None:
|
||||
try:
|
||||
df = self._read_excel_safely(file_path, header=int(specified))
|
||||
except Exception:
|
||||
df = self._read_excel_safely(file_path)
|
||||
else:
|
||||
df0 = self._read_excel_safely(file_path, header=None)
|
||||
if df0 is None:
|
||||
return None
|
||||
header_row = self._find_header_row(df0)
|
||||
if header_row is not None:
|
||||
df = self._read_excel_safely(file_path, header=header_row)
|
||||
else:
|
||||
df = self._read_excel_safely(file_path)
|
||||
if df is None or df.empty:
|
||||
self.logger.warning("数据文件为空")
|
||||
return None
|
||||
|
||||
self.logger.info(f"成功读取数据,形状: {df.shape}")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取数据失败: {e}")
|
||||
return None
|
||||
@@ -235,6 +254,40 @@ class GenericSupplierProcessor(BaseProcessor):
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取Excel失败: {file_path} - {e}")
|
||||
raise
|
||||
|
||||
def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
|
||||
try:
|
||||
header_keywords = [
|
||||
'条码','条形码','商品编码','商品名称','名称','数量','单位','单价','规格',
|
||||
'金额','小计','总计','合计','合计金额'
|
||||
]
|
||||
scores = []
|
||||
rows_to_check = min(30, len(df))
|
||||
for r in range(rows_to_check):
|
||||
row = df.iloc[r]
|
||||
score = 0
|
||||
for cell in row:
|
||||
if isinstance(cell, str):
|
||||
s = cell.strip().lower()
|
||||
for kw in header_keywords:
|
||||
if kw.lower() in s:
|
||||
score += 5
|
||||
non_empty = row.count()
|
||||
if non_empty / max(1, len(row)) > 0.5:
|
||||
score += 2
|
||||
str_count = sum(1 for c in row if isinstance(c, str))
|
||||
if str_count / max(1, len(row)) > 0.5:
|
||||
score += 3
|
||||
scores.append((r, score))
|
||||
scores.sort(key=lambda x: x[1], reverse=True)
|
||||
if scores and scores[0][1] >= 5:
|
||||
return scores[0][0]
|
||||
for r in range(len(df)):
|
||||
if df.iloc[r].notna().sum() > 3:
|
||||
return r
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _apply_column_mapping(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
||||
"""应用列映射
|
||||
|
||||
Reference in New Issue
Block a user