更新了README文件,添加了版本信息和更新日志

This commit is contained in:
2025-05-02 19:58:27 +08:00
parent 71ca90ba6e
commit 693c17283b
25 changed files with 875 additions and 41 deletions
Binary file not shown.
Binary file not shown.
+74 -20
View File
@@ -27,6 +27,11 @@ class UnitConverter:
'multiplier': 10, # 数量乘以10
'target_unit': '', # 目标单位
'description': '特殊处理:数量*10,单位转换为瓶'
},
'6921168593804': {
'multiplier': 30, # 数量乘以30
'target_unit': '', # 目标单位
'description': 'NFC产品特殊处理:每箱30瓶'
}
# 可以添加更多特殊条码的配置
}
@@ -53,6 +58,13 @@ class UnitConverter:
"""
从数量字符串中提取单位
支持的格式:
1. "2箱" -> (2, "")
2. "3件" -> (3, "")
3. "1.5提" -> (1.5, "")
4. "数量: 5盒" -> (5, "")
5. "× 2瓶" -> (2, "")
Args:
quantity_str: 数量字符串,如"2箱""5件"
@@ -62,13 +74,29 @@ class UnitConverter:
if not quantity_str or not isinstance(quantity_str, str):
return None, None
# 匹配数字+单位格式
match = re.match(r'^([\d\.]+)\s*([^\d\s\.]+)$', quantity_str.strip())
if match:
# 清理字符串,移除前后空白和一些常见前缀
cleaned_str = quantity_str.strip()
for prefix in ['数量:', '数量:', '×', 'x', 'X', '*']:
cleaned_str = cleaned_str.replace(prefix, '').strip()
# 匹配数字+单位格式 (基本格式)
basic_match = re.match(r'^([\d\.]+)\s*([^\d\s\.]+)$', cleaned_str)
if basic_match:
try:
num = float(match.group(1))
unit = match.group(2)
logger.info(f"从数量提取单位: {quantity_str} -> 数量={num}, 单位={unit}")
num = float(basic_match.group(1))
unit = basic_match.group(2)
logger.info(f"从数量提取单位(基本格式): {quantity_str} -> 数量={num}, 单位={unit}")
return num, unit
except ValueError:
pass
# 匹配更复杂的格式,如包含其他文本的情况
complex_match = re.search(r'([\d\.]+)\s*([箱|件|瓶|提|盒|袋|桶|包|kg|g|升|毫升|L|ml|个])', cleaned_str)
if complex_match:
try:
num = float(complex_match.group(1))
unit = complex_match.group(2)
logger.info(f"从数量提取单位(复杂格式): {quantity_str} -> 数量={num}, 单位={unit}")
return num, unit
except ValueError:
pass
@@ -115,6 +143,13 @@ class UnitConverter:
"""
从商品名称中推断规格
规则:
1. "xx入纸箱" -> 1*xx (如"15入纸箱" -> 1*15)
2. 直接包含规格 "1*15" -> 1*15
3. "xx纸箱" -> 1*xx (如"15纸箱" -> 1*15)
4. "xx白膜" -> 1*xx (如"12白膜" -> 1*12)
5. "xxL" 容量单位特殊处理
Args:
name: 商品名称
@@ -124,34 +159,53 @@ class UnitConverter:
if not name or not isinstance(name, str):
return None
# 特殊模式的名称处理
# 如"445水溶C血橙15入纸箱" -> "1*15"
pattern1 = r'.*(\d+)入'
match = re.match(pattern1, name)
# 记录原始商品名称,用于日志
original_name = name
# 特殊模式1: "xx入纸箱" 格式,如"445水溶C血橙15入纸箱" -> "1*15"
pattern1 = r'.*?(\d+)入纸箱'
match = re.search(pattern1, name)
if match:
inferred_spec = f"1*{match.group(1)}"
logger.info(f"从名称推断规格(入): {name} -> {inferred_spec}")
logger.info(f"从名称推断规格(入纸箱): {original_name} -> {inferred_spec}")
return inferred_spec
# 如"500-东方树叶-绿茶1*15-纸箱装" -> "1*15"
pattern2 = r'.*(\d+)[*xX×](\d+).*'
match = re.match(pattern2, name)
# 特殊模式2: 直接包含规格,如"500-东方树叶-乌龙茶1*15-纸箱装" -> "1*15"
pattern2 = r'.*?(\d+)[*xX×](\d+).*'
match = re.search(pattern2, name)
if match:
inferred_spec = f"{match.group(1)}*{match.group(2)}"
logger.info(f"从名称推断规格(直接): {name} -> {inferred_spec}")
logger.info(f"从名称推断规格(直接格式): {original_name} -> {inferred_spec}")
return inferred_spec
# 如"12.9L桶装水" -> "12.9L*1"
pattern3 = r'.*?([\d\.]+)L.*'
match = re.match(pattern3, name)
# 特殊模式3: "xx纸箱" 格式,如"500茶π蜜桃乌龙15纸箱" -> "1*15"
pattern3 = r'.*?(\d+)纸箱'
match = re.search(pattern3, name)
if match:
inferred_spec = f"1*{match.group(1)}"
logger.info(f"从名称推断规格(纸箱): {original_name} -> {inferred_spec}")
return inferred_spec
# 特殊模式4: "xx白膜" 格式,如"1.5L水12白膜" 或 "550水24白膜" -> "1*12" 或 "1*24"
pattern4 = r'.*?(\d+)白膜'
match = re.search(pattern4, name)
if match:
inferred_spec = f"1*{match.group(1)}"
logger.info(f"从名称推断规格(白膜): {original_name} -> {inferred_spec}")
return inferred_spec
# 特殊模式5: 容量单位如"12.9L桶装水" -> "12.9L*1"
pattern5 = r'.*?([\d\.]+)L.*'
match = re.search(pattern5, name)
if match:
inferred_spec = f"{match.group(1)}L*1"
logger.info(f"从名称推断规格(L): {name} -> {inferred_spec}")
logger.info(f"从名称推断规格(容量): {original_name} -> {inferred_spec}")
return inferred_spec
# 从名称中提取规格
# 尝试通用模式匹配
spec = self.extract_specification(name)
if spec:
logger.info(f"从名称推断规格(通用模式): {original_name} -> {spec}")
return spec
return None
+11 -7
View File
@@ -270,6 +270,10 @@ class ExcelProcessor:
logger.info(f"列名映射结果: {mapped_columns}")
# 检查是否有规格列
has_specification_column = 'specification' in mapped_columns
logger.info(f"是否存在规格列: {has_specification_column}")
# 提取商品信息
products = []
@@ -298,13 +302,6 @@ class ExcelProcessor:
product['name'] = f"商品 ({product['barcode']})"
logger.info(f"商品名称为空,使用条码作为名称: {product['name']}")
# 推断规格
if not product['specification'] and product['name']:
inferred_spec = self.unit_converter.infer_specification_from_name(product['name'])
if inferred_spec:
product['specification'] = inferred_spec
logger.info(f"从商品名称推断规格: {product['name']} -> {inferred_spec}")
# 单位处理:如果单位为空但数量包含单位信息
quantity_str = str(row.get(mapped_columns.get('quantity', ''), ''))
if not product['unit'] and 'quantity' in mapped_columns:
@@ -316,6 +313,13 @@ class ExcelProcessor:
if num is not None:
product['quantity'] = num
# 推断规格:如果规格为空或不存在规格列,尝试从商品名称推断
if (not product['specification'] or not has_specification_column) and product['name']:
inferred_spec = self.unit_converter.infer_specification_from_name(product['name'])
if inferred_spec:
product['specification'] = inferred_spec
logger.info(f"从商品名称推断规格: {product['name']} -> {inferred_spec}")
# 应用单位转换规则
product = self.unit_converter.process_unit_conversion(product)