This commit is contained in:
2025-05-10 14:28:50 +08:00
parent 9b2007a995
commit c3a0e29b19
24 changed files with 365 additions and 14 deletions
Binary file not shown.
+11 -1
View File
@@ -237,10 +237,17 @@ class ExcelProcessor:
# 处理每一行数据
for idx, row in df.iterrows():
try:
# 跳过无效行:名称为空、包含小计/合计/总计/空行等
name_val = str(row[column_mapping['name']]) if column_mapping.get('name') and not pd.isna(row[column_mapping['name']]) else ''
if not name_val or any(key in name_val for key in ["小计", "合计", "总计"]):
continue
# 条码处理 - 确保条码总是字符串格式且不带小数点
barcode_raw = row[column_mapping['barcode']] if column_mapping.get('barcode') else ''
if pd.isna(barcode_raw) or barcode_raw == '' or str(barcode_raw).strip() in ['nan', 'None']:
continue
# 跳过条码长度异常、数量为0、单价为0且名称疑似无效的行
if (len(str(barcode_raw)) < 7) or (column_mapping.get('quantity') and (pd.isna(row[column_mapping['quantity']]) or str(row[column_mapping['quantity']]).strip() in ['nan', 'None', '0', '0.0'])):
continue
# 使用format_barcode函数处理条码,确保无小数点
barcode = format_barcode(barcode_raw)
@@ -296,13 +303,16 @@ class ExcelProcessor:
# 提取规格并解析包装数量
if '规格' in df.columns and not pd.isna(row['规格']):
product['specification'] = str(row['规格'])
# 修正OCR误识别的4.51*4为4.5L*4
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
package_quantity = self.parse_specification(product['specification'])
if package_quantity:
product['package_quantity'] = package_quantity
logger.info(f"解析规格: {product['specification']} -> 包装数量={package_quantity}")
elif column_mapping.get('specification') and not pd.isna(row[column_mapping['specification']]):
# 添加这段逻辑以处理通过列映射找到的规格列
product['specification'] = str(row[column_mapping['specification']])
# 修正OCR误识别的4.51*4为4.5L*4
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
package_quantity = self.parse_specification(product['specification'])
if package_quantity:
product['package_quantity'] = package_quantity