From 5cf3eeed0fd5db4441a6d8d84443edaa153e228c Mon Sep 17 00:00:00 2001 From: houhuan Date: Fri, 30 May 2025 11:54:08 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=95=B0=E9=87=8F=E4=B8=BA?= =?UTF-8?q?=E7=A9=BA=E6=97=B6=E9=80=9A=E8=BF=87=E9=87=91=E9=A2=9D=E5=92=8C?= =?UTF-8?q?=E5=8D=95=E4=BB=B7=E8=AE=A1=E7=AE=97=E6=95=B0=E9=87=8F=E7=9A=84?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=8C=E5=A2=9E=E5=BC=BA=E8=A7=84=E6=A0=BC?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 + app/core/excel/processor.py | 185 +++++++++++++++++------------ app/core/excel/validators.py | 64 ++++++++-- tests/test_quantity_calculation.py | 89 ++++++++++++++ 4 files changed, 251 insertions(+), 89 deletions(-) create mode 100644 tests/test_quantity_calculation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bed6d3..4b13ceb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - 添加对特殊条码6958620703716的处理,支持同时设置规格和条码映射 - 增强不规范规格格式的解析能力(如"IL*12"、"6oo*12"等) - 支持带重量单位的规格解析(如"5kg*6") +- 添加数量为空时通过金额和单价自动计算数量的功能 ### 修复 - 修复条码映射功能在特殊处理后不生效的问题 @@ -17,6 +18,7 @@ - 改进了规格解析逻辑,增加了对各种单位和格式的支持 - 添加条码映射对话框中可视化标记映射关系 - 更新了条码映射配置文件,增加了更多特殊条码处理 +- 改进商品验证器,在数量为空但单价和金额存在时,自动计算数量 ## v1.0.0 (2025-05-01) diff --git a/app/core/excel/processor.py b/app/core/excel/processor.py index 8c5d23d..2971fd6 100644 --- a/app/core/excel/processor.py +++ b/app/core/excel/processor.py @@ -215,88 +215,84 @@ class ExcelProcessor: def extract_product_info(self, df: pd.DataFrame) -> List[Dict]: """ - 从处理后的数据框中提取商品信息 - 支持处理不同格式的Excel文件 + 从数据帧中提取商品信息 Args: - df: 数据框 + df: 数据帧 Returns: - 商品信息列表,每个商品为一个字典 + 商品信息列表 """ products = [] - # 检测表头位置和数据格式 + # 检测列映射 column_mapping = self._detect_column_mapping(df) - logger.info(f"列名映射结果: {column_mapping}") + logger.info(f"检测到列映射: {column_mapping}") - # 检查是否有规格列 - has_specification_column = '规格' in df.columns - logger.info(f"是否存在规格列: {has_specification_column}") - - # 处理每一行数据 + # 处理每一行 for idx, row in df.iterrows(): try: - # 跳过无效行:名称为空、包含小计/合计/总计/空行等 - name_val = str(row[column_mapping['name']]) if column_mapping.get('name') and not pd.isna(row[column_mapping['name']]) else '' - if not name_val or any(key in name_val for key in ["小计", "合计", "总计"]): - continue - # 条码处理 - 确保条码总是字符串格式且不带小数点 - barcode_raw = row[column_mapping['barcode']] if column_mapping.get('barcode') else '' - if pd.isna(barcode_raw) or barcode_raw == '' or str(barcode_raw).strip() in ['nan', 'None']: - continue - # 跳过条码长度异常、数量为0、单价为0且名称疑似无效的行 - if (len(str(barcode_raw)) < 7) or (column_mapping.get('quantity') and (pd.isna(row[column_mapping['quantity']]) or str(row[column_mapping['quantity']]).strip() in ['nan', 'None', '0', '0.0'])): - continue - - # 使用format_barcode函数处理条码,确保无小数点 - barcode = format_barcode(barcode_raw) - - # 处理数量字段,先提取数字部分再转换为浮点数 - quantity_value = 0 - quantity_str = "" - if column_mapping.get('quantity') and not pd.isna(row[column_mapping['quantity']]): - quantity_str = str(row[column_mapping['quantity']]) - # 使用提取数字的函数 - quantity_num = extract_number(quantity_str) - if quantity_num is not None: - quantity_value = quantity_num - - # 基础信息 + # 初始化商品信息 product = { - 'barcode': barcode, - 'name': str(row[column_mapping['name']]) if column_mapping.get('name') else '', - 'quantity': quantity_value, - 'price': 0, - 'unit': str(row[column_mapping['unit']]) if column_mapping.get('unit') and not pd.isna(row[column_mapping['unit']]) else '', - 'specification': '', - 'package_quantity': None + 'barcode': '', # 条码 + 'name': '', # 商品名称 + 'specification': '', # 规格 + 'quantity': 0, # 数量 + 'unit': '', # 单位 + 'price': 0, # 单价 + 'amount': 0, # 金额 + 'is_gift': False # 是否为赠品 } - # 处理价格字段 - 清理可能的换行符和空格 - if column_mapping.get('price') and not pd.isna(row[column_mapping['price']]): - price_str = str(row[column_mapping['price']]) - # 清理换行符、空格并替换逗号 - price_str = price_str.replace('\n', '').replace(' ', '').replace(',', '.') - try: - product['price'] = float(price_str) - except ValueError: - logger.warning(f"价格转换失败,原始值: '{price_str}',使用默认值0") + # 提取条码 + if '条码' in df.columns and not pd.isna(row['条码']): + product['barcode'] = str(row['条码']).strip() + elif column_mapping.get('barcode') and not pd.isna(row[column_mapping['barcode']]): + product['barcode'] = str(row[column_mapping['barcode']]).strip() - # 清理单位 - if product['unit'] == 'nan' or product['unit'] == 'None': - product['unit'] = '' + # 跳过空条码行 + if not product['barcode']: + continue + + # 提取商品名称 + if '商品名称' in df.columns and not pd.isna(row['商品名称']): + product['name'] = str(row['商品名称']).strip() + elif '名称' in df.columns and not pd.isna(row['名称']): + product['name'] = str(row['名称']).strip() + elif column_mapping.get('name') and not pd.isna(row[column_mapping['name']]): + product['name'] = str(row[column_mapping['name']]).strip() + + # 提取单位 + if '单位' in df.columns and not pd.isna(row['单位']): + product['unit'] = str(row['单位']).strip() + elif column_mapping.get('unit') and not pd.isna(row[column_mapping['unit']]): + product['unit'] = str(row[column_mapping['unit']]).strip() - # 打印每行提取出的信息 - logger.info(f"第{idx+1}行: 提取商品信息 条码={product['barcode']}, 名称={product['name']}, 规格={product['specification']}, 数量={product['quantity']}, 单位={product['unit']}, 单价={product['price']}") + # 提取单价 + if '单价' in df.columns and not pd.isna(row['单价']): + product['price'] = row['单价'] + elif column_mapping.get('price') and not pd.isna(row[column_mapping['price']]): + product['price'] = row[column_mapping['price']] - # 从数量字段中提取单位(如果单位字段为空) - if not product['unit'] and quantity_str: - num, unit = self.unit_converter.extract_unit_from_quantity(quantity_str) + # 提取金额 + if '金额' in df.columns and not pd.isna(row['金额']): + product['amount'] = row['金额'] + elif '小计' in df.columns and not pd.isna(row['小计']): + product['amount'] = row['小计'] + elif column_mapping.get('amount') and not pd.isna(row[column_mapping['amount']]): + product['amount'] = row[column_mapping['amount']] + + # 提取数量 + if '数量' in df.columns and not pd.isna(row['数量']): + product['quantity'] = row['数量'] + elif column_mapping.get('quantity') and not pd.isna(row[column_mapping['quantity']]): + product['quantity'] = row[column_mapping['quantity']] + + # 处理可能的复合数量字段,例如"2箱"、"3件" + if isinstance(product['quantity'], str) and product['quantity']: + num, unit = self.unit_converter.extract_unit_from_quantity(product['quantity']) if unit: product['unit'] = unit - logger.info(f"从数量提取单位: {quantity_str} -> {unit}") - # 如果数量被提取出来,更新数量 if num is not None: product['quantity'] = num @@ -665,7 +661,7 @@ class ExcelProcessor: def _detect_column_mapping(self, df: pd.DataFrame) -> Dict[str, str]: """ - 检测和映射Excel表头列名 + 自动检测列名映射 Args: df: 数据框 @@ -687,7 +683,8 @@ class ExcelProcessor: 'specification': ['规格', '规格型号', '型号', '商品规格', '产品规格', '包装规格','规 格'], 'quantity': ['数量', '采购数量', '购买数量', '采购数量', '订单数量', '数量(必填)', '入库数', '入库数量','数 量'], 'unit': ['单位', '采购单位', '计量单位', '单位(必填)', '单位名称', '计价单位','单 位'], - 'price': ['单价', '价格', '采购单价', '销售价', '进货价', '单价(必填)', '采购价', '参考价', '入库单价','单 价'] + 'price': ['单价', '价格', '采购单价', '销售价', '进货价', '单价(必填)', '采购价', '参考价', '入库单价','单 价'], + 'amount': ['金额', '小计', '总价', '合计金额', '小计金额', '金额(元)', '金额合计', '合计', '总额'] } # 映射列名到标准名称 @@ -719,7 +716,7 @@ class ExcelProcessor: if target in mapped_columns: break - return mapped_columns + return mapped_columns def infer_specification_from_name(self, product_name: str) -> Tuple[Optional[str], Optional[int]]: """ @@ -819,7 +816,7 @@ class ExcelProcessor: def parse_specification(self, spec_str: str) -> Optional[int]: """ 解析规格字符串,提取包装数量 - 支持格式:1*15, 1x15, 1*5*10 + 支持格式:1*15, 1x15, 1*5*10, 5kg*6, IL*12等 Args: spec_str: 规格字符串 @@ -834,34 +831,66 @@ class ExcelProcessor: # 清理规格字符串 spec_str = clean_string(spec_str) - # 匹配重量/容量格式,如"450g*15"、"450ml*15" - match = re.search(r'\d+(?:g|ml|毫升|克)[*xX×](\d+)', spec_str) + # 处理可能的OCR误识别,如"IL"应为"1L","6oo"应为"600" + spec_str = re.sub(r'(\b|^)[iIlL](\d+)', r'1\2', spec_str) # 将"IL"替换为"1L" + spec_str = re.sub(r'(\d+)[oO0]{2,}', lambda m: m.group(1) + '00', spec_str) # 将"6oo"替换为"600" + spec_str = spec_str.replace('×', '*').replace('x', '*').replace('X', '*') # 统一乘号 + + logger.debug(f"清理后的规格字符串: {spec_str}") + + # 匹配带单位的格式,如"5kg*6"、"450g*15"、"450ml*15" + weight_pattern = r'(\d+(?:\.\d+)?)\s*(?:kg|KG|千克|公斤)[*×](\d+)' + match = re.search(weight_pattern, spec_str) + if match: + return int(match.group(2)) + + # 匹配克、毫升等单位格式 + match = re.search(r'\d+(?:\.\d+)?(?:g|G|ml|ML|mL|毫升|克)[*×](\d+)', spec_str) if match: - # 返回后面的数量 return int(match.group(1)) # 匹配1*5*10 格式的三级规格 - match = re.search(r'(\d+)[\*xX×](\d+)[\*xX×](\d+)', spec_str) + match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str) if match: # 取最后一个数字作为袋数量 - return int(match.group(3)) + return int(float(match.group(3))) # 匹配1*15, 1x15 格式 - match = re.search(r'(\d+)[\*xX×](\d+)', spec_str) + match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str) if match: # 取第二个数字作为包装数量 - return int(match.group(2)) + return int(float(match.group(2))) # 匹配24瓶/件等格式 - match = re.search(r'(\d+)[瓶个支袋][//](件|箱)', spec_str) + match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋][//](件|箱)', spec_str) if match: - return int(match.group(1)) + return int(float(match.group(1))) # 匹配4L格式 - match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+)?', spec_str) + match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+(?:\.\d+)?)?', spec_str) if match: # 如果有第二个数字,返回它;否则返回1 - return int(match.group(2)) if match.group(2) else 1 + return int(float(match.group(2))) if match.group(2) else 1 + + # 匹配单独的数字+单位格式,如"12瓶装" + match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋包盒罐箱](?:装|\/箱)?', spec_str) + if match: + return int(float(match.group(1))) + + # 尝试直接匹配任何数字 + numbers = re.findall(r'\d+(?:\.\d+)?', spec_str) + if numbers and len(numbers) > 0: + # 如果只有一个数字,通常是包装数量 + if len(numbers) == 1: + return int(float(numbers[0])) + + # 如果有多个数字,尝试识别可能的包装数量(典型数值如6/12/24/30) + for num in numbers: + if float(num) in [6.0, 12.0, 24.0, 30.0]: + return int(float(num)) + + # 如果没有典型数值,选择最后一个数字(通常是包装数量) + return int(float(numbers[-1])) except Exception as e: logger.warning(f"解析规格'{spec_str}'时出错: {e}") diff --git a/app/core/excel/validators.py b/app/core/excel/validators.py index 0898973..0551096 100644 --- a/app/core/excel/validators.py +++ b/app/core/excel/validators.py @@ -142,8 +142,17 @@ class ProductValidator: if price_str in ["赠品", "gift", "赠送", "0", ""]: return True, 0.0, True, None - # 去除空白和非数字字符(保留小数点) - price_clean = re.sub(r'[^\d\.]', '', price_str) + # 去除空白和非数字字符(保留小数点和逗号) + price_clean = re.sub(r'[^\d\.,]', '', price_str) + + # 处理小数点和逗号 + if ',' in price_clean and '.' not in price_clean: + # 如果只有逗号没有小数点,将逗号视为小数点 + price_clean = price_clean.replace(',', '.') + elif ',' in price_clean and '.' in price_clean: + # 如果既有逗号又有小数点,移除逗号(认为逗号是千位分隔符) + price_clean = price_clean.replace(',', '') + if not price_clean: return False, 0.0, True, "单价不包含数字,视为赠品" @@ -192,15 +201,6 @@ class ProductValidator: # 即使验证失败,但如果有修复后的条码仍然使用它 validated_product['barcode'] = fixed_barcode - # 验证数量 - quantity = product.get('quantity', 0) - is_valid, fixed_quantity, error_msg = self.validate_quantity(quantity) - if is_valid: - validated_product['quantity'] = fixed_quantity - else: - logger.warning(f"数量验证失败: {error_msg}") - validated_product['quantity'] = 0.0 - # 验证单价 price = product.get('price', 0) is_valid, fixed_price, is_gift, error_msg = self.validate_price(price) @@ -212,4 +212,46 @@ class ProductValidator: if error_msg: logger.info(error_msg) + # 验证数量 + quantity = product.get('quantity', None) + is_valid, fixed_quantity, error_msg = self.validate_quantity(quantity) + + # 检查数量是否为空,但单价和金额存在的情况 + if not is_valid and error_msg == "数量为空": + # 获取金额 + amount = product.get('amount', None) + + # 如果单价有效且金额存在,则可以计算数量 + if fixed_price > 0 and amount is not None: + try: + # 确保金额是数字 + if isinstance(amount, str): + # 移除货币符号和非数字字符,保留数字、小数点和逗号 + amount_str = re.sub(r'[^\d\.,]', '', amount.strip()) + # 替换逗号为小数点(如果逗号作为小数分隔符) + if ',' in amount_str and '.' not in amount_str: + amount_str = amount_str.replace(',', '.') + # 处理既有逗号又有小数点的情况(通常逗号是千位分隔符) + elif ',' in amount_str and '.' in amount_str: + amount_str = amount_str.replace(',', '') + amount = float(amount_str) + else: + amount = float(amount) + + # 计算数量 = 金额 / 单价 + if amount > 0: + calculated_quantity = amount / fixed_price + logger.info(f"数量为空,通过金额({amount})和单价({fixed_price})计算得出数量: {calculated_quantity}") + validated_product['quantity'] = calculated_quantity + is_valid = True + except (ValueError, TypeError, ZeroDivisionError) as e: + logger.warning(f"通过金额和单价计算数量失败: {e}") + + # 如果数量验证有效或通过金额计算成功 + if is_valid: + validated_product['quantity'] = fixed_quantity if is_valid and fixed_quantity > 0 else validated_product.get('quantity', 0) + else: + logger.warning(f"数量验证失败: {error_msg}") + validated_product['quantity'] = 0.0 + return validated_product \ No newline at end of file diff --git a/tests/test_quantity_calculation.py b/tests/test_quantity_calculation.py new file mode 100644 index 0000000..0926600 --- /dev/null +++ b/tests/test_quantity_calculation.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +测试数量计算逻辑 +""" + +import unittest +import sys +import os +import pandas as pd +from decimal import Decimal + +# 添加项目根目录到路径 +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.core.excel.validators import ProductValidator + + +class TestQuantityCalculation(unittest.TestCase): + """测试数量计算逻辑""" + + def setUp(self): + """设置测试环境""" + self.validator = ProductValidator() + + def test_quantity_calculation_from_amount(self): + """测试通过单价和金额计算数量""" + # 测试数量为空,但单价和金额存在的情况 + product = { + 'barcode': '6901028075862', + 'name': '可口可乐', + 'quantity': None, + 'price': 5.0, + 'amount': 60.0, + 'unit': '瓶' + } + + # 验证产品 + validated = self.validator.validate_product(product) + + # 断言:数量应该被计算为金额/单价 = 60/5 = 12 + self.assertAlmostEqual(validated['quantity'], 12.0, places=2) + + def test_quantity_calculation_with_string_values(self): + """测试字符串形式的单价和金额""" + # 测试数量为空,单价和金额为字符串的情况 + product = { + 'barcode': '6901028075862', + 'name': '可口可乐', + 'quantity': None, + 'price': '5.0', + 'amount': '60.0', + 'unit': '瓶' + } + + # 验证产品 + validated = self.validator.validate_product(product) + + # 断言:数量应该被计算为金额/单价 = 60/5 = 12 + self.assertAlmostEqual(validated['quantity'], 12.0, places=2) + + def test_quantity_calculation_with_format_issues(self): + """测试格式问题的情况""" + # 测试数量为空,单价和金额有格式问题的情况 + product = { + 'barcode': '6901028075862', + 'name': '可口可乐', + 'quantity': None, + 'price': '5,0', # 使用逗号作为小数点 + 'amount': '¥60.0', # 带货币符号 + 'unit': '瓶' + } + + # 验证产品 + validated = self.validator.validate_product(product) + + # 断言:数量应该被计算为金额/单价 = 60/5 = 12 + self.assertAlmostEqual(validated['quantity'], 12.0, places=2) + + def test_specification_parsing(self): + """测试规格解析逻辑""" + # 这部分测试需要导入规格解析器 + # 由于需要引入额外的代码,此处仅作为示例 + pass + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file