feat(供应商管理): 新增规则引擎与词典配置支持
refactor(处理器): 重构通用供应商处理器以支持规则引擎 docs: 更新README与文档说明供应商管理功能 build: 更新打包脚本注入版本信息 test: 添加规则引擎单元测试
This commit is contained in:
@@ -0,0 +1,150 @@
|
||||
import re
|
||||
import pandas as pd
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
def _split_quantity_unit(df: pd.DataFrame, source: str, dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if source in df.columns:
|
||||
vals = df[source].astype(str).fillna("")
|
||||
nums = []
|
||||
units = []
|
||||
default_unit = (dictionary or {}).get("default_unit", "")
|
||||
unit_synonyms = (dictionary or {}).get("unit_synonyms", {})
|
||||
for v in vals:
|
||||
m = re.search(r"(\d+(?:\.\d+)?)(箱|件|提|盒|瓶)", v)
|
||||
if m:
|
||||
nums.append(float(m.group(1)))
|
||||
u = unit_synonyms.get(m.group(2), m.group(2))
|
||||
units.append(u)
|
||||
else:
|
||||
try:
|
||||
nums.append(float(v))
|
||||
units.append(unit_synonyms.get(default_unit, default_unit))
|
||||
except:
|
||||
nums.append(0.0)
|
||||
units.append(unit_synonyms.get(default_unit, default_unit))
|
||||
df["quantity"] = nums
|
||||
df["unit"] = units
|
||||
return df
|
||||
|
||||
def _extract_spec_from_name(df: pd.DataFrame, source: str, dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if source in df.columns:
|
||||
names = df[source].astype(str).fillna("")
|
||||
specs = []
|
||||
packs = []
|
||||
ignore_words = (dictionary or {}).get("ignore_words", [])
|
||||
name_patterns = (dictionary or {}).get("name_patterns", [])
|
||||
for s in names:
|
||||
if ignore_words:
|
||||
for w in ignore_words:
|
||||
s = s.replace(w, "")
|
||||
matched = False
|
||||
for pat in name_patterns:
|
||||
try:
|
||||
m = re.search(pat, s)
|
||||
if m and len(m.groups()) >= 2:
|
||||
try:
|
||||
qty = int(m.group(len(m.groups())))
|
||||
except:
|
||||
qty = None
|
||||
specs.append(s)
|
||||
packs.append(qty)
|
||||
matched = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if matched:
|
||||
continue
|
||||
m = re.search(r"(\d+(?:\.\d+)?)(ml|l|升|毫升)[*×xX](\d+)", s, re.IGNORECASE)
|
||||
if m:
|
||||
specs.append(f"{m.group(1)}{m.group(2)}*{m.group(3)}")
|
||||
packs.append(int(m.group(3)))
|
||||
continue
|
||||
m2 = re.search(r"(\d+)[*×xX](\d+)", s)
|
||||
if m2:
|
||||
specs.append(f"1*{m2.group(2)}")
|
||||
packs.append(int(m2.group(2)))
|
||||
continue
|
||||
m3 = re.search(r"(\d{2,3})\D*(\d{1,3})\D*", s)
|
||||
if m3:
|
||||
specs.append(f"1*{m3.group(2)}")
|
||||
packs.append(int(m3.group(2)))
|
||||
continue
|
||||
specs.append("")
|
||||
packs.append(None)
|
||||
df["specification"] = df.get("specification", pd.Series(specs))
|
||||
df["package_quantity"] = packs
|
||||
return df
|
||||
|
||||
def _normalize_unit(df: pd.DataFrame, target: str, unit_map: Dict[str, str], dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if target in df.columns:
|
||||
df[target] = df[target].astype(str)
|
||||
df[target] = df[target].apply(lambda u: unit_map.get(u, u))
|
||||
pack_multipliers = (dictionary or {}).get("pack_multipliers", {})
|
||||
default_pq = (dictionary or {}).get("default_package_quantity", 1)
|
||||
try:
|
||||
if "quantity" in df.columns:
|
||||
def convert_qty(row):
|
||||
u = row.get(target)
|
||||
q = row.get("quantity")
|
||||
pq = row.get("package_quantity")
|
||||
if u in ("件", "箱", "提", "盒"):
|
||||
mult = pq or pack_multipliers.get(u, default_pq)
|
||||
if pd.notna(q) and pd.notna(mult) and float(mult) > 0:
|
||||
return float(q) * float(mult)
|
||||
return q
|
||||
df["quantity"] = df.apply(convert_qty, axis=1)
|
||||
df[target] = df[target].apply(lambda u: "瓶" if u in ("件","箱","提","盒") else u)
|
||||
except Exception:
|
||||
pass
|
||||
return df
|
||||
|
||||
def _compute_quantity_from_total(df: pd.DataFrame) -> pd.DataFrame:
|
||||
if "quantity" in df.columns and "unit_price" in df.columns:
|
||||
qty = df["quantity"].fillna(0)
|
||||
up = pd.to_numeric(df.get("unit_price", 0), errors="coerce").fillna(0)
|
||||
tp = pd.to_numeric(df.get("total_price", 0), errors="coerce").fillna(0)
|
||||
need = (qty <= 0) & (up > 0) & (tp > 0)
|
||||
df.loc[need, "quantity"] = (tp[need] / up[need]).round(6)
|
||||
return df
|
||||
|
||||
def _fill_missing(df: pd.DataFrame, fills: Dict[str, Any]) -> pd.DataFrame:
|
||||
for k, v in fills.items():
|
||||
if k in df.columns:
|
||||
df[k] = df[k].fillna(v)
|
||||
else:
|
||||
df[k] = v
|
||||
return df
|
||||
|
||||
def _mark_gift(df: pd.DataFrame) -> pd.DataFrame:
|
||||
df["is_gift"] = False
|
||||
tp = df.get("total_price")
|
||||
up = df.get("unit_price")
|
||||
flags = pd.Series([False]*len(df))
|
||||
if tp is not None:
|
||||
tpn = pd.to_numeric(tp, errors="coerce").fillna(0)
|
||||
flags = flags | (tpn == 0)
|
||||
if up is not None:
|
||||
upn = pd.to_numeric(up, errors="coerce").fillna(0)
|
||||
flags = flags | (upn == 0)
|
||||
if "name" in df.columns:
|
||||
flags = flags | df["name"].astype(str).str.contains(r"赠品|^o$|^O$", regex=True)
|
||||
df.loc[flags, "is_gift"] = True
|
||||
return df
|
||||
|
||||
def apply_rules(df: pd.DataFrame, rules: List[Dict[str, Any]], dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
out = df.copy()
|
||||
for r in rules or []:
|
||||
t = r.get("type")
|
||||
if t == "split_quantity_unit":
|
||||
out = _split_quantity_unit(out, r.get("source", "quantity"), dictionary)
|
||||
elif t == "extract_spec_from_name":
|
||||
out = _extract_spec_from_name(out, r.get("source", "name"), dictionary)
|
||||
elif t == "normalize_unit":
|
||||
out = _normalize_unit(out, r.get("target", "unit"), r.get("map", {}), dictionary)
|
||||
elif t == "compute_quantity_from_total":
|
||||
out = _compute_quantity_from_total(out)
|
||||
elif t == "fill_missing":
|
||||
out = _fill_missing(out, r.get("fills", {}))
|
||||
elif t == "mark_gift":
|
||||
out = _mark_gift(out)
|
||||
return out
|
||||
Reference in New Issue
Block a user