feat: 益选 OCR 订单处理系统初始提交
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用) - 百度 OCR 表格识别集成 - 规则引擎(列映射/数据清洗/单位转换/规格推断) - 条码映射管理与云端同步(Gitea REST API) - 云端同步支持:条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理(图片→OCR→Excel→合并) - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - Excel处理模块
|
||||
----------------------------
|
||||
提供Excel文件处理、数据提取和转换功能。
|
||||
"""
|
||||
@@ -0,0 +1,535 @@
|
||||
"""
|
||||
单位转换模块
|
||||
----------
|
||||
提供单位转换功能,支持规格推断和单位自动提取。
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Tuple, Optional, Any, List, Union
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from .handlers.barcode_mapper import BarcodeMapper
|
||||
from .handlers.unit_converter_handlers import (
|
||||
JianUnitHandler, BoxUnitHandler, TiHeUnitHandler,
|
||||
GiftUnitHandler, UnitHandler
|
||||
)
|
||||
from .validators import ProductValidator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 条码映射配置文件路径
|
||||
BARCODE_MAPPING_CONFIG = "config/barcode_mappings.json"
|
||||
|
||||
class UnitConverter:
|
||||
"""
|
||||
单位转换器:处理不同单位之间的转换,支持从商品名称推断规格
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化单位转换器
|
||||
"""
|
||||
# 加载特殊条码配置
|
||||
self.special_barcodes = self.load_barcode_mappings()
|
||||
|
||||
# 规格推断的正则表达式模式
|
||||
self.spec_patterns = [
|
||||
# 1*6、1x12、1X20等格式
|
||||
(r'(\d+)[*xX×](\d+)', r'\1*\2'),
|
||||
# 1*5*12和1x5x12等三级格式
|
||||
(r'(\d+)[*xX×](\d+)[*xX×](\d+)', r'\1*\2*\3'),
|
||||
# "xx入"格式,如"12入"、"24入"
|
||||
(r'(\d+)入', r'1*\1'),
|
||||
# "xxL*1"或"xx升*1"格式
|
||||
(r'([\d\.]+)[L升][*xX×]?(\d+)?', r'\1L*\2' if r'\2' else r'\1L*1'),
|
||||
# "xxkg*1"或"xx公斤*1"格式
|
||||
(r'([\d\.]+)(?:kg|公斤)[*xX×]?(\d+)?', r'\1kg*\2' if r'\2' else r'\1kg*1'),
|
||||
# "xxg*1"或"xx克*1"格式
|
||||
(r'([\d\.]+)(?:g|克)[*xX×]?(\d+)?', r'\1g*\2' if r'\2' else r'\1g*1'),
|
||||
# "xxmL*1"或"xx毫升*1"格式
|
||||
(r'([\d\.]+)(?:mL|毫升)[*xX×]?(\d+)?', r'\1mL*\2' if r'\2' else r'\1mL*1'),
|
||||
]
|
||||
|
||||
# 初始化处理程序
|
||||
self._init_handlers()
|
||||
|
||||
# 初始化验证器
|
||||
self.validator = ProductValidator()
|
||||
|
||||
def _init_handlers(self):
|
||||
"""
|
||||
初始化各种处理程序
|
||||
"""
|
||||
# 创建条码处理程序
|
||||
self.barcode_mapper = BarcodeMapper(self.special_barcodes)
|
||||
|
||||
# 创建单位处理程序列表,优先级从高到低
|
||||
self.unit_handlers: List[UnitHandler] = [
|
||||
GiftUnitHandler(), # 首先处理赠品,优先级最高
|
||||
JianUnitHandler(), # 处理"件"单位
|
||||
BoxUnitHandler(), # 处理"箱"单位
|
||||
TiHeUnitHandler() # 处理"提"和"盒"单位
|
||||
]
|
||||
|
||||
def extract_unit_from_quantity(self, quantity_str: str) -> Tuple[Optional[float], Optional[str]]:
|
||||
"""
|
||||
从数量字符串中提取单位
|
||||
|
||||
支持的格式:
|
||||
1. "2箱" -> (2, "箱")
|
||||
2. "3件" -> (3, "件")
|
||||
3. "1.5提" -> (1.5, "提")
|
||||
4. "数量: 5盒" -> (5, "盒")
|
||||
5. "× 2瓶" -> (2, "瓶")
|
||||
|
||||
Args:
|
||||
quantity_str: 数量字符串,如"2箱"、"5件"
|
||||
|
||||
Returns:
|
||||
(数量, 单位)的元组,如果无法提取则返回(None, None)
|
||||
"""
|
||||
if not quantity_str or not isinstance(quantity_str, str):
|
||||
return None, None
|
||||
|
||||
# 清理字符串,移除前后空白和一些常见前缀
|
||||
cleaned_str = quantity_str.strip()
|
||||
for prefix in ['数量:', '数量:', '×', 'x', 'X', '*']:
|
||||
cleaned_str = cleaned_str.replace(prefix, '').strip()
|
||||
|
||||
# 匹配数字+单位格式 (基本格式)
|
||||
basic_match = re.match(r'^([\d\.]+)\s*([^\d\s\.]+)$', cleaned_str)
|
||||
if basic_match:
|
||||
try:
|
||||
num = float(basic_match.group(1))
|
||||
unit = basic_match.group(2)
|
||||
logger.info(f"从数量提取单位(基本格式): {quantity_str} -> 数量={num}, 单位={unit}")
|
||||
return num, unit
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 匹配更复杂的格式,如包含其他文本的情况
|
||||
complex_match = re.search(r'([\d\.]+)\s*([箱|件|瓶|提|盒|袋|桶|包|kg|g|升|毫升|L|ml|个])', cleaned_str)
|
||||
if complex_match:
|
||||
try:
|
||||
num = float(complex_match.group(1))
|
||||
unit = complex_match.group(2)
|
||||
logger.info(f"从数量提取单位(复杂格式): {quantity_str} -> 数量={num}, 单位={unit}")
|
||||
return num, unit
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None, None
|
||||
|
||||
def extract_specification(self, text: str) -> Optional[str]:
|
||||
"""
|
||||
从文本中提取规格信息
|
||||
|
||||
Args:
|
||||
text: 文本字符串
|
||||
|
||||
Returns:
|
||||
提取的规格字符串,如果无法提取则返回None
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return None
|
||||
|
||||
# 处理XX入白膜格式,如"550纯净水24入白膜"
|
||||
match = re.search(r'.*?(\d+)入白膜', text)
|
||||
if match:
|
||||
result = f"1*{match.group(1)}"
|
||||
logger.info(f"提取规格(入白膜): {text} -> {result}")
|
||||
return result
|
||||
|
||||
# 尝试所有模式
|
||||
for pattern, replacement in self.spec_patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
# 特殊处理三级格式,确保正确显示为1*5*12
|
||||
if '*' in replacement and replacement.count('*') == 1 and len(match.groups()) >= 2:
|
||||
result = f"{match.group(1)}*{match.group(2)}"
|
||||
logger.info(f"提取规格: {text} -> {result}")
|
||||
return result
|
||||
# 特殊处理三级规格格式
|
||||
elif '*' in replacement and replacement.count('*') == 2 and len(match.groups()) >= 3:
|
||||
result = f"{match.group(1)}*{match.group(2)}*{match.group(3)}"
|
||||
logger.info(f"提取三级规格: {text} -> {result}")
|
||||
return result
|
||||
# 一般情况
|
||||
else:
|
||||
result = re.sub(pattern, replacement, text)
|
||||
logger.info(f"提取规格: {text} -> {result}")
|
||||
return result
|
||||
|
||||
# 没有匹配任何模式
|
||||
return None
|
||||
|
||||
def infer_specification_from_name(self, name: str) -> Optional[str]:
|
||||
"""
|
||||
从商品名称中推断规格
|
||||
|
||||
规则:
|
||||
1. "xx入纸箱" -> 1*xx (如"15入纸箱" -> 1*15)
|
||||
2. 直接包含规格 "1*15" -> 1*15
|
||||
3. "xx纸箱" -> 1*xx (如"15纸箱" -> 1*15)
|
||||
4. "xx白膜" -> 1*xx (如"12白膜" -> 1*12)
|
||||
5. "xxL" 容量单位特殊处理
|
||||
6. "xx(g|ml|毫升|克)*数字" -> 1*数字 (如"450g*15" -> 1*15)
|
||||
|
||||
Args:
|
||||
name: 商品名称
|
||||
|
||||
Returns:
|
||||
推断的规格,如果无法推断则返回None
|
||||
"""
|
||||
if not name or not isinstance(name, str):
|
||||
return None
|
||||
|
||||
# 记录原始商品名称,用于日志
|
||||
original_name = name
|
||||
|
||||
# 新增模式: 处理重量/容量*数字格式,如"450g*15", "450ml*15"
|
||||
# 忽略重量/容量值,只提取后面的数量作为规格
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(重量/容量*数量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式1.1: "xx入白膜" 格式,如"550纯净水24入白膜" -> "1*24"
|
||||
pattern1_1 = r'.*?(\d+)入白膜'
|
||||
match = re.search(pattern1_1, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(入白膜): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式1: "xx入纸箱" 格式,如"445水溶C血橙15入纸箱" -> "1*15"
|
||||
pattern1 = r'.*?(\d+)入纸箱'
|
||||
match = re.search(pattern1, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(入纸箱): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式2: 直接包含规格,如"500-东方树叶-乌龙茶1*15-纸箱装" -> "1*15"
|
||||
pattern2 = r'.*?(\d+)[*xX×](\d+).*'
|
||||
match = re.search(pattern2, name)
|
||||
if match:
|
||||
inferred_spec = f"{match.group(1)}*{match.group(2)}"
|
||||
logger.info(f"从名称推断规格(直接格式): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式3: "xx纸箱" 格式,如"500茶π蜜桃乌龙15纸箱" -> "1*15"
|
||||
pattern3 = r'.*?(\d+)纸箱'
|
||||
match = re.search(pattern3, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(纸箱): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式4: "xx白膜" 格式,如"1.5L水12白膜" 或 "550水24白膜" -> "1*12" 或 "1*24"
|
||||
pattern4 = r'.*?(\d+)白膜'
|
||||
match = re.search(pattern4, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(白膜): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式5: 容量单位带数量格式 "1.8L*8瓶" -> "1.8L*8"
|
||||
volume_count_pattern = r'.*?([\d\.]+)[Ll升][*×xX](\d+).*'
|
||||
match = re.search(volume_count_pattern, name)
|
||||
if match:
|
||||
volume = match.group(1)
|
||||
count = match.group(2)
|
||||
inferred_spec = f"{volume}L*{count}"
|
||||
logger.info(f"从名称推断规格(容量*数量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式6: 简单容量单位如"12.9L桶装水" -> "12.9L*1"
|
||||
simple_volume_pattern = r'.*?([\d\.]+)[Ll升].*'
|
||||
match = re.search(simple_volume_pattern, name)
|
||||
if match:
|
||||
inferred_spec = f"{match.group(1)}L*1"
|
||||
logger.info(f"从名称推断规格(简单容量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 尝试通用模式匹配
|
||||
spec = self.extract_specification(name)
|
||||
if spec:
|
||||
logger.info(f"从名称推断规格(通用模式): {original_name} -> {spec}")
|
||||
return spec
|
||||
|
||||
return None
|
||||
|
||||
def parse_specification(self, spec: str) -> Tuple[int, int, Optional[int]]:
|
||||
"""
|
||||
解析规格字符串,支持1*12和1*5*12等格式
|
||||
|
||||
Args:
|
||||
spec: 规格字符串
|
||||
|
||||
Returns:
|
||||
(一级包装, 二级包装, 三级包装)元组,如果是二级包装,第三个值为None
|
||||
"""
|
||||
if not spec or not isinstance(spec, str):
|
||||
return 1, 1, None
|
||||
|
||||
try:
|
||||
# 清理规格字符串,确保格式统一
|
||||
spec = re.sub(r'\s+', '', spec) # 移除所有空白
|
||||
spec = re.sub(r'[xX×]', '*', spec) # 统一分隔符为*
|
||||
|
||||
logger.debug(f"解析规格: {spec}")
|
||||
|
||||
# 新增:处理“1件=12桶/袋/盒...”等等式规格,统一为1*12
|
||||
eq_match = re.match(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[==]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec)
|
||||
if eq_match:
|
||||
try:
|
||||
level2 = int(eq_match.group(2))
|
||||
logger.info(f"解析等式规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理三级包装,如1*5*12
|
||||
three_level_match = re.match(r'(\d+)[*](\d+)[*](\d+)', spec)
|
||||
if three_level_match:
|
||||
try:
|
||||
level1 = int(three_level_match.group(1))
|
||||
level2 = int(three_level_match.group(2))
|
||||
level3 = int(three_level_match.group(3))
|
||||
logger.info(f"解析三级规格: {spec} -> {level1}*{level2}*{level3}")
|
||||
return level1, level2, level3
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带重量单位的规格,如5kg*6、500g*12等
|
||||
weight_match = re.match(r'([\d\.]+)(?:kg|g|克|千克|公斤)[*](\d+)', spec, re.IGNORECASE)
|
||||
if weight_match:
|
||||
try:
|
||||
# 对于重量单位,使用1作为一级包装,后面的数字作为二级包装
|
||||
level2 = int(weight_match.group(2))
|
||||
logger.info(f"解析重量规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带容量单位的规格,如500ml*15, 1L*12等
|
||||
ml_match = re.match(r'(\d+)(?:ml|毫升)[*](\d+)', spec, re.IGNORECASE)
|
||||
if ml_match:
|
||||
try:
|
||||
# 对于ml单位,使用1作为一级包装,后面的数字作为二级包装
|
||||
level2 = int(ml_match.group(2))
|
||||
logger.info(f"解析容量(ml)规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带L单位的规格,如1L*12等
|
||||
l_match = re.match(r'(\d+(?:\.\d+)?)[Ll升][*](\d+)', spec)
|
||||
if l_match:
|
||||
try:
|
||||
# 对于L单位,正确提取第二部分作为包装数量
|
||||
level2 = int(l_match.group(2))
|
||||
logger.info(f"解析容量(L)规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理二级包装,如1*12
|
||||
two_level_match = re.match(r'(\d+)[*](\d+)', spec)
|
||||
if two_level_match:
|
||||
try:
|
||||
level1 = int(two_level_match.group(1))
|
||||
level2 = int(two_level_match.group(2))
|
||||
logger.info(f"解析二级规格: {spec} -> {level1}*{level2}")
|
||||
return level1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 特殊处理L/升为单位的规格,如12.5L*1
|
||||
volume_match = re.match(r'([\d\.]+)[L升][*xX×](\d+)', spec)
|
||||
if volume_match:
|
||||
try:
|
||||
volume = float(volume_match.group(1))
|
||||
quantity = int(volume_match.group(2))
|
||||
logger.info(f"解析容量规格: {spec} -> {volume}L*{quantity}")
|
||||
return 1, quantity, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理不规范格式,如IL*12, 6oo*12等,从中提取数字部分作为包装数量
|
||||
# 只要规格中包含*和数字,就尝试提取*后面的数字作为件数
|
||||
irregular_match = re.search(r'[^0-9]*\*(\d+)', spec)
|
||||
if irregular_match:
|
||||
try:
|
||||
level2 = int(irregular_match.group(1))
|
||||
logger.info(f"解析不规范规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 默认值
|
||||
logger.warning(f"无法解析规格: {spec},使用默认值1*1")
|
||||
return 1, 1, None
|
||||
except Exception as e:
|
||||
logger.error(f"解析规格时出错: {e}")
|
||||
return 1, 1, None
|
||||
|
||||
def process_unit_conversion(self, product: Dict) -> Dict:
|
||||
"""
|
||||
处理单位转换,按照以下规则:
|
||||
1. 特殊条码: 优先处理特殊条码
|
||||
2. 赠品处理: 对于赠品,维持数量转换但单价为0
|
||||
3. "件"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
|
||||
4. "箱"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
|
||||
5. "提"和"盒"单位: 如果是三级规格, 按件处理; 如果是二级规格, 保持不变
|
||||
6. 其他单位: 保持不变
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
# 首先验证商品数据
|
||||
product = self.validator.validate_product(product)
|
||||
|
||||
# 复制原始数据,避免修改原始字典
|
||||
result = product.copy()
|
||||
|
||||
barcode = result.get('barcode', '')
|
||||
specification = result.get('specification', '')
|
||||
|
||||
# 跳过无效数据
|
||||
if not barcode:
|
||||
return result
|
||||
|
||||
# 先处理条码映射
|
||||
result = self.barcode_mapper.map_barcode(result)
|
||||
|
||||
# 如果没有规格信息,无法进行单位转换
|
||||
if not specification:
|
||||
# 尝试从商品名称推断规格
|
||||
inferred_spec = self.infer_specification_from_name(result.get('name', ''))
|
||||
if inferred_spec:
|
||||
result['specification'] = inferred_spec
|
||||
logger.info(f"从商品名称推断规格: {result.get('name', '')} -> {inferred_spec}")
|
||||
else:
|
||||
return result
|
||||
|
||||
# 解析规格信息
|
||||
level1, level2, level3 = self.parse_specification(result.get('specification', ''))
|
||||
|
||||
# 使用单位处理程序处理单位转换
|
||||
for handler in self.unit_handlers:
|
||||
if handler.can_handle(result):
|
||||
return handler.handle(result, level1, level2, level3)
|
||||
|
||||
# 没有找到适用的处理程序,保持不变
|
||||
logger.info(f"其他单位处理: 保持原样 数量: {result.get('quantity', 0)}, 单价: {result.get('price', 0)}, 单位: {result.get('unit', '')}")
|
||||
return result
|
||||
|
||||
def load_barcode_mappings(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
从配置文件加载条码映射
|
||||
|
||||
Returns:
|
||||
条码映射字典
|
||||
"""
|
||||
# 默认映射
|
||||
default_mappings = {
|
||||
'6925019900087': {
|
||||
'multiplier': 10,
|
||||
'target_unit': '瓶',
|
||||
'description': '特殊处理:数量*10,单位转换为瓶'
|
||||
},
|
||||
'6921168593804': {
|
||||
'multiplier': 30,
|
||||
'target_unit': '瓶',
|
||||
'description': 'NFC产品特殊处理:每箱30瓶'
|
||||
},
|
||||
'6901826888138': {
|
||||
'multiplier': 30,
|
||||
'target_unit': '瓶',
|
||||
'fixed_price': 112/30,
|
||||
'specification': '1*30',
|
||||
'description': '特殊处理: 规格1*30,数量*30,单价=112/30'
|
||||
},
|
||||
# 条码映射配置
|
||||
'6920584471055': {
|
||||
'map_to': '6920584471017',
|
||||
'description': '条码映射:6920584471055 -> 6920584471017'
|
||||
},
|
||||
'6925861571159': {
|
||||
'map_to': '69021824',
|
||||
'description': '条码映射:6925861571159 -> 69021824'
|
||||
},
|
||||
'6923644268923': {
|
||||
'map_to': '6923644268480',
|
||||
'description': '条码映射:6923644268923 -> 6923644268480'
|
||||
},
|
||||
# 添加特殊条码6958620703716,既需要特殊处理又需要映射
|
||||
'6958620703716': {
|
||||
'specification': '1*14',
|
||||
'map_to': '6958620703907',
|
||||
'description': '特殊处理: 规格1*14,同时映射到6958620703907'
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
# 检查配置文件是否存在
|
||||
if os.path.exists(BARCODE_MAPPING_CONFIG):
|
||||
with open(BARCODE_MAPPING_CONFIG, 'r', encoding='utf-8') as file:
|
||||
mappings = json.load(file)
|
||||
logger.info(f"成功加载条码映射配置,共{len(mappings)}项")
|
||||
return mappings
|
||||
else:
|
||||
# 创建默认配置文件
|
||||
self.save_barcode_mappings(default_mappings)
|
||||
logger.info(f"创建默认条码映射配置,共{len(default_mappings)}项")
|
||||
return default_mappings
|
||||
except Exception as e:
|
||||
logger.error(f"加载条码映射配置失败: {e}")
|
||||
return default_mappings
|
||||
|
||||
def save_barcode_mappings(self, mappings: Dict[str, Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
保存条码映射到配置文件
|
||||
|
||||
Args:
|
||||
mappings: 条码映射字典
|
||||
|
||||
Returns:
|
||||
保存是否成功
|
||||
"""
|
||||
try:
|
||||
# 确保配置目录存在
|
||||
os.makedirs(os.path.dirname(BARCODE_MAPPING_CONFIG), exist_ok=True)
|
||||
|
||||
# 写入配置文件
|
||||
with open(BARCODE_MAPPING_CONFIG, 'w', encoding='utf-8') as file:
|
||||
json.dump(mappings, file, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"条码映射配置保存成功,共{len(mappings)}项")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"保存条码映射配置失败: {e}")
|
||||
return False
|
||||
|
||||
def update_barcode_mappings(self, new_mappings: Dict[str, Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
更新条码映射配置
|
||||
|
||||
Args:
|
||||
new_mappings: 新的条码映射字典
|
||||
|
||||
Returns:
|
||||
更新是否成功
|
||||
"""
|
||||
self.special_barcodes = new_mappings
|
||||
return self.save_barcode_mappings(new_mappings)
|
||||
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
单位转换处理程序包
|
||||
-----------------
|
||||
提供单位转换和条码处理的各种处理程序
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
# 导出所有处理程序类
|
||||
from .barcode_mapper import BarcodeMapper
|
||||
from .unit_converter_handlers import JianUnitHandler, BoxUnitHandler, TiHeUnitHandler, GiftUnitHandler, UnitHandler
|
||||
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
条码映射处理程序
|
||||
-------------
|
||||
处理特殊条码的映射和转换
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
from ...utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BarcodeMapper:
|
||||
"""
|
||||
条码映射器:负责特殊条码的映射和处理
|
||||
"""
|
||||
|
||||
def __init__(self, special_barcodes: Dict[str, Dict[str, Any]]):
|
||||
"""
|
||||
初始化条码映射器
|
||||
|
||||
Args:
|
||||
special_barcodes: 特殊条码配置字典
|
||||
"""
|
||||
self.special_barcodes = special_barcodes or {}
|
||||
|
||||
def map_barcode(self, product: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
映射商品条码,处理特殊情况
|
||||
|
||||
Args:
|
||||
product: 包含条码的商品信息字典
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
barcode = result.get('barcode', '')
|
||||
|
||||
# 如果条码不在特殊条码列表中,直接返回
|
||||
if not barcode or barcode not in self.special_barcodes:
|
||||
return result
|
||||
|
||||
special_config = self.special_barcodes[barcode]
|
||||
|
||||
# 处理特殊倍数
|
||||
if 'multiplier' in special_config:
|
||||
multiplier = special_config.get('multiplier', 1)
|
||||
target_unit = special_config.get('target_unit', '瓶')
|
||||
|
||||
# 数量乘以倍数
|
||||
quantity = result.get('quantity', 0)
|
||||
new_quantity = quantity * multiplier
|
||||
|
||||
# 单价除以倍数
|
||||
price = result.get('price', 0)
|
||||
new_price = price / multiplier if price else 0
|
||||
|
||||
# 如果有固定单价,优先使用
|
||||
if 'fixed_price' in special_config:
|
||||
new_price = special_config['fixed_price']
|
||||
logger.info(f"特殊条码({barcode})使用固定单价: {new_price}")
|
||||
|
||||
# 如果有固定规格,设置规格
|
||||
if 'specification' in special_config:
|
||||
result['specification'] = special_config['specification']
|
||||
logger.info(f"特殊条码({barcode})使用固定规格: {special_config['specification']}")
|
||||
|
||||
logger.info(f"特殊条码处理: {barcode}, 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {result.get('unit', '')} -> {target_unit}")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = target_unit
|
||||
|
||||
# 处理条码映射 - 放在后面以便可以同时进行特殊处理和条码映射
|
||||
if 'map_to' in special_config:
|
||||
new_barcode = special_config['map_to']
|
||||
logger.info(f"条码映射: {barcode} -> {new_barcode}")
|
||||
result['barcode'] = new_barcode
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
单位转换处理程序
|
||||
-------------
|
||||
处理不同单位的转换逻辑
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Optional, Any, Tuple, Protocol
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from ...utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class UnitHandler(ABC):
|
||||
"""
|
||||
单位处理器基类:定义单位处理接口
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理单位转换
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class JianUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"件"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"件")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
# 匹配"件"、"件、"、"件装"等
|
||||
return unit == '件' or unit.startswith('件')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"件"单位转换:数量×包装数量,单价÷包装数量,单位转为"瓶"
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"件单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 件 -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class BoxUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"箱"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"箱")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
# 匹配"箱"、"箱、"、"箱装"等
|
||||
return unit == '箱' or unit.startswith('箱')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"箱"单位转换:数量×包装数量,单价÷包装数量,单位转为"瓶"
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"箱单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 箱 -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class TiHeUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"提"和"盒"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"提"或"盒")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
return unit in ['提', '盒'] or unit.startswith('提') or unit.startswith('盒')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"提"和"盒"单位转换:
|
||||
- 如果是三级规格,按件处理(数量×包装数量,单价÷包装数量,单位转为"瓶")
|
||||
- 如果是二级规格,保持不变
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
unit = result.get('unit', '')
|
||||
|
||||
# 如果是三级规格,按件处理
|
||||
if level3 is not None:
|
||||
# 计算包装数量 - 只乘以最后一级数量
|
||||
packaging_count = level3
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"提/盒单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
else:
|
||||
# 如果是二级规格,保持不变
|
||||
logger.info(f"提/盒单位(二级规格)处理: 保持原样 数量: {quantity}, 单价: {price}, 单位: {unit}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class GiftUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理赠品的特殊情况
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(是否为赠品)
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
return product.get('is_gift', False) is True
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理赠品的单位转换:
|
||||
- 对于件/箱单位,数量仍然需要转换,但赠品的单价保持为0
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
unit = result.get('unit', '')
|
||||
quantity = result.get('quantity', 0)
|
||||
|
||||
# 根据单位类型选择适当的包装数计算
|
||||
if unit in ['件', '箱']:
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
logger.info(f"赠品{unit}单位处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['unit'] = '瓶'
|
||||
elif unit in ['提', '盒'] and level3 is not None:
|
||||
# 对于三级规格的提/盒,类似件处理
|
||||
new_quantity = quantity * level3
|
||||
|
||||
logger.info(f"赠品{unit}单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['unit'] = '瓶'
|
||||
else:
|
||||
# 其他情况保持不变
|
||||
logger.info(f"赠品{unit}单位处理: 保持原样 数量: {quantity}, 单价: 0, 单位: {unit}")
|
||||
|
||||
# 确保单价为0
|
||||
result['price'] = 0
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,423 @@
|
||||
"""
|
||||
订单合并模块
|
||||
----------
|
||||
提供采购单合并功能,将多个采购单合并为一个。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import xlrd
|
||||
import xlwt
|
||||
from xlutils.copy import copy as xlcopy
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from ...config.settings import ConfigManager
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..handlers.column_mapper import ColumnMapper
|
||||
from ..utils.file_utils import (
|
||||
ensure_dir,
|
||||
get_file_extension,
|
||||
get_files_by_extensions,
|
||||
load_json,
|
||||
save_json
|
||||
)
|
||||
from ..utils.string_utils import (
|
||||
clean_string,
|
||||
clean_barcode,
|
||||
format_barcode
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class PurchaseOrderMerger:
|
||||
"""
|
||||
采购单合并器:将多个采购单Excel文件合并成一个文件
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化采购单合并器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 修复ConfigParser对象没有get_path方法的问题
|
||||
try:
|
||||
# 获取输出目录
|
||||
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
# 记录实际路径
|
||||
logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
|
||||
|
||||
# 获取模板文件路径
|
||||
template_folder = config.get('Paths', 'template_folder', fallback='templates')
|
||||
template_name = config.get('Templates', 'purchase_order', fallback='银豹-采购单模板.xls')
|
||||
|
||||
self.template_path = os.path.join(template_folder, template_name)
|
||||
|
||||
# 检查模板文件是否存在
|
||||
if not os.path.exists(self.template_path):
|
||||
logger.warning(f"模板文件不存在: {self.template_path}")
|
||||
|
||||
# 用于记录已合并的文件
|
||||
self.merged_files_json = os.path.join(self.output_dir, "merged_files.json")
|
||||
self.merged_files = self._load_merged_files()
|
||||
|
||||
logger.info(f"初始化PurchaseOrderMerger完成,模板文件: {self.template_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化PurchaseOrderMerger失败: {e}")
|
||||
raise
|
||||
|
||||
def _load_merged_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载已合并文件的缓存
|
||||
|
||||
Returns:
|
||||
合并记录字典
|
||||
"""
|
||||
return load_json(self.merged_files_json, {})
|
||||
|
||||
def _save_merged_files(self) -> None:
|
||||
"""保存已合并文件的缓存"""
|
||||
save_json(self.merged_files, self.merged_files_json)
|
||||
|
||||
def get_purchase_orders(self) -> List[str]:
|
||||
"""
|
||||
获取result目录下的采购单Excel文件
|
||||
|
||||
Returns:
|
||||
采购单文件路径列表
|
||||
"""
|
||||
# 采购单文件保存在data/result目录
|
||||
result_dir = "data/result"
|
||||
logger.info(f"搜索目录 {result_dir} 中的采购单Excel文件")
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
# 获取所有Excel文件
|
||||
all_files = get_files_by_extensions(result_dir, ['.xls', '.xlsx'])
|
||||
|
||||
# 筛选采购单文件
|
||||
purchase_orders = [
|
||||
file for file in all_files
|
||||
if os.path.basename(file).startswith('采购单_')
|
||||
]
|
||||
|
||||
if not purchase_orders:
|
||||
logger.warning(f"未在 {result_dir} 目录下找到采购单Excel文件")
|
||||
return []
|
||||
|
||||
# 按修改时间排序,最新的在前
|
||||
purchase_orders.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
||||
|
||||
logger.info(f"找到 {len(purchase_orders)} 个采购单Excel文件")
|
||||
return purchase_orders
|
||||
|
||||
def read_purchase_order(self, file_path: str) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
读取采购单Excel文件
|
||||
|
||||
Args:
|
||||
file_path: 采购单文件路径
|
||||
|
||||
Returns:
|
||||
数据帧,如果读取失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 读取Excel文件
|
||||
df = pd.read_excel(file_path)
|
||||
logger.info(f"成功读取采购单文件: {file_path}")
|
||||
|
||||
# 打印列名,用于调试
|
||||
logger.debug(f"Excel文件的列名: {df.columns.tolist()}")
|
||||
|
||||
# 处理特殊情况:检查是否需要读取指定行作为标题行
|
||||
header_row_idx = ColumnMapper.detect_header_row(df, max_rows=5, min_matches=3)
|
||||
if header_row_idx >= 0:
|
||||
logger.info(f"检测到表头在第 {header_row_idx+1} 行")
|
||||
|
||||
# 使用此行作为列名,数据从下一行开始
|
||||
header_row = df.iloc[header_row_idx].astype(str)
|
||||
data_rows = df.iloc[header_row_idx+1:].reset_index(drop=True)
|
||||
|
||||
# 为每一列分配名称(避免重复的列名)
|
||||
new_columns = []
|
||||
for i, col in enumerate(header_row):
|
||||
col_str = str(col)
|
||||
if col_str == 'nan' or col_str == 'None' or pd.isna(col):
|
||||
new_columns.append(f"Col_{i}")
|
||||
else:
|
||||
new_columns.append(col_str)
|
||||
|
||||
# 使用新列名创建新的DataFrame
|
||||
data_rows.columns = new_columns
|
||||
df = data_rows
|
||||
logger.debug(f"重新构建的数据帧列名: {df.columns.tolist()}")
|
||||
|
||||
# 使用 ColumnMapper 统一查找列名(保留中文键名以兼容下游代码)
|
||||
all_columns = df.columns.tolist()
|
||||
logger.info(f"列名: {all_columns}")
|
||||
|
||||
standard_to_chinese = {
|
||||
'barcode': '条码',
|
||||
'quantity': '采购量',
|
||||
'unit_price': '采购单价',
|
||||
'gift_quantity': '赠送量',
|
||||
}
|
||||
|
||||
mapped_columns = {}
|
||||
for std_name, chinese_name in standard_to_chinese.items():
|
||||
matched = ColumnMapper.find_column(all_columns, std_name)
|
||||
if matched:
|
||||
mapped_columns[chinese_name] = matched
|
||||
logger.info(f"列名映射: {matched} -> {chinese_name}")
|
||||
|
||||
# 如果找到了必要的列,重命名列
|
||||
if mapped_columns:
|
||||
rename_dict = {mapped_columns[key]: key for key in mapped_columns}
|
||||
logger.info(f"列名重命名映射: {rename_dict}")
|
||||
df = df.rename(columns=rename_dict)
|
||||
logger.info(f"重命名后的列名: {df.columns.tolist()}")
|
||||
else:
|
||||
logger.warning(f"未找到可映射的列名: {file_path}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"读取采购单文件失败: {file_path}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def merge_purchase_orders(self, file_paths: List[str]) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
合并多个采购单文件
|
||||
|
||||
Args:
|
||||
file_paths: 采购单文件路径列表
|
||||
|
||||
Returns:
|
||||
合并后的数据帧,如果合并失败则返回None
|
||||
"""
|
||||
if not file_paths:
|
||||
logger.warning("没有需要合并的采购单文件")
|
||||
return None
|
||||
|
||||
# 读取所有采购单文件
|
||||
dfs = []
|
||||
for file_path in file_paths:
|
||||
df = self.read_purchase_order(file_path)
|
||||
if df is not None:
|
||||
dfs.append(df)
|
||||
|
||||
if not dfs:
|
||||
logger.warning("没有成功读取的采购单文件")
|
||||
return None
|
||||
|
||||
# 合并数据
|
||||
logger.info(f"开始合并 {len(dfs)} 个采购单文件")
|
||||
|
||||
# 首先,整理每个数据帧以确保它们有相同的结构
|
||||
processed_dfs = []
|
||||
for i, df in enumerate(dfs):
|
||||
# 确保必要的列存在
|
||||
required_columns = ['条码', '采购量', '采购单价']
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
|
||||
if missing_columns:
|
||||
logger.warning(f"数据帧 {i} 缺少必要的列: {missing_columns}")
|
||||
continue
|
||||
|
||||
# 处理赠送量列不存在的情况
|
||||
if '赠送量' not in df.columns:
|
||||
df['赠送量'] = 0
|
||||
|
||||
# 选择并清理需要的列
|
||||
cleaned_df = pd.DataFrame()
|
||||
|
||||
# 清理条码 - 确保是字符串且无小数点
|
||||
cleaned_df['条码'] = df['条码'].apply(lambda x: format_barcode(x) if pd.notna(x) else '')
|
||||
|
||||
# 清理采购量 - 确保是数字
|
||||
cleaned_df['采购量'] = pd.to_numeric(df['采购量'], errors='coerce').fillna(0)
|
||||
|
||||
# 清理单价 - 确保是数字并保留4位小数
|
||||
cleaned_df['采购单价'] = pd.to_numeric(df['采购单价'], errors='coerce').fillna(0).round(4)
|
||||
|
||||
# 清理赠送量 - 确保是数字
|
||||
cleaned_df['赠送量'] = pd.to_numeric(df['赠送量'], errors='coerce').fillna(0)
|
||||
|
||||
# 过滤无效行 - 条码为空或采购量为0的行跳过
|
||||
valid_df = cleaned_df[(cleaned_df['条码'] != '') & (cleaned_df['采购量'] > 0)]
|
||||
|
||||
if len(valid_df) > 0:
|
||||
processed_dfs.append(valid_df)
|
||||
logger.info(f"处理文件 {i+1}: 有效记录 {len(valid_df)} 行")
|
||||
else:
|
||||
logger.warning(f"处理文件 {i+1}: 没有有效记录")
|
||||
|
||||
if not processed_dfs:
|
||||
logger.warning("没有有效的数据帧用于合并")
|
||||
return None
|
||||
|
||||
# 将所有数据帧合并
|
||||
merged_df = pd.concat(processed_dfs, ignore_index=True)
|
||||
|
||||
# 按条码和单价分组,合并相同商品
|
||||
# 四舍五入到4位小数,避免浮点误差导致相同价格被当作不同价格
|
||||
merged_df['采购单价'] = merged_df['采购单价'].round(4)
|
||||
|
||||
# 对于同一条码和单价的商品,合并数量和赠送量
|
||||
result = merged_df.groupby(['条码', '采购单价'], as_index=False).agg({
|
||||
'采购量': 'sum',
|
||||
'赠送量': 'sum'
|
||||
})
|
||||
|
||||
# 排序,按条码升序
|
||||
result = result.sort_values('条码').reset_index(drop=True)
|
||||
|
||||
# 设置为0的赠送量设为空
|
||||
result.loc[result['赠送量'] == 0, '赠送量'] = pd.NA
|
||||
|
||||
logger.info(f"合并完成,共 {len(result)} 条商品记录")
|
||||
return result
|
||||
|
||||
def create_merged_purchase_order(self, df: pd.DataFrame) -> Optional[str]:
|
||||
"""
|
||||
创建合并的采购单文件,完全按照银豹格式要求
|
||||
|
||||
Args:
|
||||
df: 合并后的数据帧
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果创建失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 打开模板文件
|
||||
template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
|
||||
template_sheet = template_workbook.sheet_by_index(0)
|
||||
|
||||
# 首先分析模板结构,确定关键列的位置
|
||||
logger.info(f"分析模板结构")
|
||||
for i in range(min(5, template_sheet.nrows)):
|
||||
row_values = [str(cell.value).strip() for cell in template_sheet.row(i)]
|
||||
logger.debug(f"模板第{i+1}行: {row_values}")
|
||||
|
||||
# 银豹模板的标准列位置:
|
||||
# 条码列(商品条码): B列(索引1)
|
||||
barcode_col = 1
|
||||
# 采购量列: C列(索引2)
|
||||
quantity_col = 2
|
||||
# 赠送量列: D列(索引3)
|
||||
gift_col = 3
|
||||
# 采购单价列: E列(索引4)
|
||||
price_col = 4
|
||||
|
||||
# 找到数据开始行 - 通常是第二行(索引1)
|
||||
data_start_row = 1
|
||||
|
||||
# 创建可写的副本
|
||||
output_workbook = xlcopy(template_workbook)
|
||||
output_sheet = output_workbook.get_sheet(0)
|
||||
|
||||
# 设置单价的格式样式(保留4位小数)
|
||||
price_style = xlwt.XFStyle()
|
||||
price_style.num_format_str = '0.0000'
|
||||
|
||||
# 数量格式
|
||||
quantity_style = xlwt.XFStyle()
|
||||
quantity_style.num_format_str = '0'
|
||||
|
||||
# 遍历数据并填充到Excel
|
||||
for i, (_, row) in enumerate(df.iterrows()):
|
||||
r = data_start_row + i
|
||||
|
||||
# 只填充银豹采购单格式要求的4个列:条码、采购量、赠送量、采购单价
|
||||
|
||||
# 条码(必填)- B列(1)
|
||||
output_sheet.write(r, barcode_col, row['条码'])
|
||||
|
||||
# 采购量(必填)- C列(2)
|
||||
output_sheet.write(r, quantity_col, float(row['采购量']), quantity_style)
|
||||
|
||||
# 赠送量 - D列(3)
|
||||
if pd.notna(row['赠送量']) and float(row['赠送量']) > 0:
|
||||
output_sheet.write(r, gift_col, float(row['赠送量']), quantity_style)
|
||||
|
||||
# 采购单价(必填)- E列(4)
|
||||
output_sheet.write(r, price_col, float(row['采购单价']), price_style)
|
||||
|
||||
# 生成输出文件名,保存到data/result目录
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
result_dir = "data/result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
output_file = os.path.join(result_dir, f"合并采购单_{timestamp}.xls")
|
||||
|
||||
# 保存文件
|
||||
output_workbook.save(output_file)
|
||||
logger.info(f"合并采购单已保存到: {output_file},共{len(df)}条记录")
|
||||
return output_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"创建合并采购单时出错: {e}")
|
||||
return None
|
||||
|
||||
def process(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理采购单合并
|
||||
|
||||
Args:
|
||||
file_paths: 指定要合并的文件路径列表,如果为None则自动获取
|
||||
|
||||
Returns:
|
||||
合并后的文件路径,如果合并失败则返回None
|
||||
"""
|
||||
# 如果未指定文件路径,则获取所有采购单文件
|
||||
if file_paths is None:
|
||||
file_paths = self.get_purchase_orders()
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(97)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查是否有文件需要合并
|
||||
if not file_paths:
|
||||
logger.warning("没有找到可合并的采购单文件")
|
||||
return None
|
||||
|
||||
# 合并采购单
|
||||
merged_df = self.merge_purchase_orders(file_paths)
|
||||
if merged_df is None:
|
||||
logger.error("合并采购单失败")
|
||||
return None
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(98)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 创建合并的采购单文件
|
||||
output_file = self.create_merged_purchase_order(merged_df)
|
||||
if output_file is None:
|
||||
logger.error("创建合并采购单文件失败")
|
||||
return None
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(100)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 记录已合并文件
|
||||
for file_path in file_paths:
|
||||
self.merged_files[file_path] = output_file
|
||||
self._save_merged_files()
|
||||
|
||||
return output_file
|
||||
@@ -0,0 +1,860 @@
|
||||
"""
|
||||
Excel处理核心模块
|
||||
--------------
|
||||
提供Excel文件处理功能,包括表格解析、数据提取和处理。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import xlrd
|
||||
import xlwt
|
||||
from xlutils.copy import copy as xlcopy
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from ...config.settings import ConfigManager
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.file_utils import (
|
||||
ensure_dir,
|
||||
get_file_extension,
|
||||
get_latest_file,
|
||||
load_json,
|
||||
save_json
|
||||
)
|
||||
from ..utils.string_utils import (
|
||||
clean_string,
|
||||
extract_number,
|
||||
format_barcode,
|
||||
parse_monetary_string
|
||||
)
|
||||
from .converter import UnitConverter
|
||||
from ..handlers.column_mapper import ColumnMapper
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class ExcelProcessor:
|
||||
"""
|
||||
Excel处理器:处理OCR识别后的Excel文件,
|
||||
提取条码、单价和数量,并按照采购单模板的格式填充
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化Excel处理器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 修复ConfigParser对象没有get_path方法的问题
|
||||
try:
|
||||
# 获取输入和输出目录
|
||||
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
self.temp_dir = config.get('Paths', 'temp_folder', fallback='data/temp')
|
||||
|
||||
# 获取模板文件路径
|
||||
self.template_path = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
|
||||
if not os.path.exists(self.template_path):
|
||||
logger.warning(f"模板文件不存在: {self.template_path}")
|
||||
|
||||
# 设置缓存文件路径
|
||||
self.cache_file = os.path.join(self.output_dir, "processed_files.json")
|
||||
self.processed_files = self._load_processed_files()
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
os.makedirs(self.temp_dir, exist_ok=True)
|
||||
|
||||
# 记录实际路径
|
||||
logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
|
||||
logger.info(f"使用临时目录: {os.path.abspath(self.temp_dir)}")
|
||||
|
||||
# 加载单位转换器和配置
|
||||
self.unit_converter = UnitConverter()
|
||||
logger.info(f"初始化ExcelProcessor完成,模板文件: {self.template_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化ExcelProcessor失败: {e}")
|
||||
raise
|
||||
|
||||
def _load_processed_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载已处理文件的缓存
|
||||
|
||||
Returns:
|
||||
处理记录字典
|
||||
"""
|
||||
return load_json(self.cache_file, {})
|
||||
|
||||
def _save_processed_files(self) -> None:
|
||||
"""保存已处理文件的缓存"""
|
||||
save_json(self.processed_files, self.cache_file)
|
||||
|
||||
def get_latest_excel(self) -> Optional[str]:
|
||||
"""
|
||||
获取output目录下最新的Excel文件(排除采购单文件)
|
||||
|
||||
Returns:
|
||||
最新Excel文件的路径,如果未找到则返回None
|
||||
"""
|
||||
logger.info(f"搜索目录 {self.output_dir} 中的Excel文件")
|
||||
|
||||
# 使用文件工具获取最新文件
|
||||
latest_file = get_latest_file(
|
||||
self.output_dir,
|
||||
pattern="", # 不限制文件名
|
||||
extensions=['.xlsx', '.xls'] # 限制为Excel文件
|
||||
)
|
||||
|
||||
# 如果没有找到文件
|
||||
if not latest_file:
|
||||
logger.warning(f"未在 {self.output_dir} 目录下找到未处理的Excel文件")
|
||||
return None
|
||||
|
||||
# 检查是否是采购单(以"采购单_"开头的文件)
|
||||
file_name = os.path.basename(latest_file)
|
||||
if file_name.startswith('采购单_'):
|
||||
logger.warning(f"找到的最新文件是采购单,不作处理: {latest_file}")
|
||||
return None
|
||||
|
||||
logger.info(f"找到最新的Excel文件: {latest_file}")
|
||||
return latest_file
|
||||
|
||||
def extract_barcode(self, df: pd.DataFrame) -> List[str]:
|
||||
"""
|
||||
从数据帧中提取条码列名
|
||||
|
||||
Args:
|
||||
df: 数据帧
|
||||
|
||||
Returns:
|
||||
可能的条码列名列表
|
||||
"""
|
||||
possible_barcode_columns = ColumnMapper.STANDARD_COLUMNS['barcode']
|
||||
|
||||
found_columns = []
|
||||
|
||||
# 检查精确匹配
|
||||
for col in df.columns:
|
||||
col_str = str(col).strip()
|
||||
if col_str in possible_barcode_columns:
|
||||
found_columns.append(col)
|
||||
logger.info(f"找到精确匹配的条码列: {col_str}")
|
||||
|
||||
# 如果找不到精确匹配,尝试部分匹配
|
||||
if not found_columns:
|
||||
for col in df.columns:
|
||||
col_str = str(col).strip().lower()
|
||||
for keyword in ['条码', '条形码', 'barcode', '编码']:
|
||||
if keyword.lower() in col_str:
|
||||
found_columns.append(col)
|
||||
logger.info(f"找到部分匹配的条码列: {col} (包含关键词: {keyword})")
|
||||
break
|
||||
|
||||
# 如果仍然找不到,尝试使用数据特征识别
|
||||
if not found_columns and len(df) > 0:
|
||||
for col in df.columns:
|
||||
# 检查此列数据是否符合条码特征
|
||||
sample_values = df[col].dropna().astype(str).tolist()[:10] # 取前10个非空值
|
||||
|
||||
if sample_values and all(len(val) >= 8 and len(val) <= 14 for val in sample_values):
|
||||
# 大多数条码长度在8-14之间
|
||||
if all(val.isdigit() for val in sample_values):
|
||||
found_columns.append(col)
|
||||
logger.info(f"基于数据特征识别的可能条码列: {col}")
|
||||
|
||||
return found_columns
|
||||
|
||||
def extract_product_info(self, df: pd.DataFrame) -> List[Dict]:
|
||||
"""
|
||||
从数据帧中提取商品信息
|
||||
|
||||
Args:
|
||||
df: 数据帧
|
||||
|
||||
Returns:
|
||||
商品信息列表
|
||||
"""
|
||||
products = []
|
||||
|
||||
# 检测列映射
|
||||
column_mapping = self._detect_column_mapping(df)
|
||||
logger.info(f"检测到列映射: {column_mapping}")
|
||||
|
||||
# 处理每一行
|
||||
for idx, row in df.iterrows():
|
||||
try:
|
||||
# 初始化商品信息
|
||||
product = {
|
||||
'barcode': '', # 条码
|
||||
'name': '', # 商品名称
|
||||
'specification': '', # 规格
|
||||
'quantity': 0, # 数量
|
||||
'unit': '', # 单位
|
||||
'price': 0, # 单价
|
||||
'amount': 0, # 金额
|
||||
'is_gift': False # 是否为赠品
|
||||
}
|
||||
|
||||
# 提取条码
|
||||
if '条码' in df.columns and not pd.isna(row['条码']):
|
||||
product['barcode'] = str(row['条码']).strip()
|
||||
elif column_mapping.get('barcode') and not pd.isna(row[column_mapping['barcode']]):
|
||||
product['barcode'] = str(row[column_mapping['barcode']]).strip()
|
||||
|
||||
# 跳过空条码行
|
||||
if not product['barcode']:
|
||||
continue
|
||||
|
||||
# 检查备注列,过滤换货、退货、作废等非采购行
|
||||
skip_row = False
|
||||
for col in df.columns:
|
||||
col_str = str(col)
|
||||
if any(k in col_str for k in ['备注', '说明', '类型', '备注1']):
|
||||
val = str(row[col]).strip()
|
||||
# 过滤常见的非采购关键字
|
||||
if any(k in val for k in ['换货', '退货', '作废', '减钱', '冲减', '赠品单', '补货']):
|
||||
logger.info(f"过滤非采购行: {product['barcode']} - {product.get('name', '')}, 原因: {col_str}包含 '{val}'")
|
||||
skip_row = True
|
||||
break
|
||||
if skip_row:
|
||||
continue
|
||||
|
||||
# 提取商品名称
|
||||
if '商品名称' in df.columns and not pd.isna(row['商品名称']):
|
||||
product['name'] = str(row['商品名称']).strip()
|
||||
elif '名称' in df.columns and not pd.isna(row['名称']):
|
||||
product['name'] = str(row['名称']).strip()
|
||||
elif column_mapping.get('name') and not pd.isna(row[column_mapping['name']]):
|
||||
product['name'] = str(row[column_mapping['name']]).strip()
|
||||
|
||||
# 提取单位
|
||||
if '单位' in df.columns and not pd.isna(row['单位']):
|
||||
product['unit'] = str(row['单位']).strip()
|
||||
elif column_mapping.get('unit') and not pd.isna(row[column_mapping['unit']]):
|
||||
product['unit'] = str(row[column_mapping['unit']]).strip()
|
||||
|
||||
# 提取单价
|
||||
if '单价' in df.columns and not pd.isna(row['单价']):
|
||||
product['price'] = row['单价']
|
||||
elif column_mapping.get('price') and not pd.isna(row[column_mapping['price']]):
|
||||
product['price'] = row[column_mapping['price']]
|
||||
|
||||
# 提取金额
|
||||
if '金额' in df.columns and not pd.isna(row['金额']):
|
||||
product['amount'] = row['金额']
|
||||
elif '小计' in df.columns and not pd.isna(row['小计']):
|
||||
product['amount'] = row['小计']
|
||||
elif column_mapping.get('amount') and not pd.isna(row[column_mapping['amount']]):
|
||||
product['amount'] = row[column_mapping['amount']]
|
||||
# 根据金额判断赠品:金额为0、为空、或为o/O
|
||||
amt = product.get('amount', None)
|
||||
try:
|
||||
is_amt_gift = False
|
||||
if amt is None:
|
||||
is_amt_gift = True
|
||||
elif isinstance(amt, str):
|
||||
parsed = parse_monetary_string(amt)
|
||||
is_amt_gift = (parsed is None or parsed == 0.0)
|
||||
else:
|
||||
parsed = parse_monetary_string(amt)
|
||||
is_amt_gift = (parsed is not None and parsed == 0.0)
|
||||
if is_amt_gift:
|
||||
product['is_gift'] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 提取数量
|
||||
if '数量' in df.columns and not pd.isna(row['数量']):
|
||||
product['quantity'] = row['数量']
|
||||
elif column_mapping.get('quantity') and not pd.isna(row[column_mapping['quantity']]):
|
||||
product['quantity'] = row[column_mapping['quantity']]
|
||||
|
||||
# 处理可能的复合数量字段,例如"2箱"、"3件"
|
||||
if isinstance(product['quantity'], str) and product['quantity']:
|
||||
num, unit = self.unit_converter.extract_unit_from_quantity(product['quantity'])
|
||||
if unit:
|
||||
product['unit'] = unit
|
||||
if num is not None:
|
||||
product['quantity'] = num
|
||||
|
||||
# 提取规格并解析包装数量
|
||||
if '规格' in df.columns and not pd.isna(row['规格']):
|
||||
product['specification'] = str(row['规格'])
|
||||
# 修正OCR误识别的4.51*4为4.5L*4
|
||||
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"解析规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
elif column_mapping.get('specification') and not pd.isna(row[column_mapping['specification']]):
|
||||
product['specification'] = str(row[column_mapping['specification']])
|
||||
# 修正OCR误识别的4.51*4为4.5L*4
|
||||
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"从映射列解析规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
else:
|
||||
# 只有在无法从Excel获取规格时,才尝试从商品名称推断规格
|
||||
if product['name']:
|
||||
# 特殊处理:优先检查名称中是否包含"容量*数量"格式
|
||||
container_pattern = r'.*?(\d+(?:\.\d+)?)\s*(?:ml|[mM][lL]|[lL]|升|毫升)[*×xX](\d+).*'
|
||||
match = re.search(container_pattern, product['name'])
|
||||
if match:
|
||||
# 容量单位*数量格式,如"1.8L*8瓶",取数量部分作为包装数量
|
||||
volume = match.group(1)
|
||||
count = match.group(2)
|
||||
inferred_spec = f"{volume}L*{count}"
|
||||
inferred_qty = int(count)
|
||||
product['specification'] = inferred_spec
|
||||
product['package_quantity'] = inferred_qty
|
||||
logger.info(f"从商品名称提取容量*数量格式: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
# 原来的重量/容量*数字格式处理逻辑
|
||||
else:
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, product['name'])
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
inferred_qty = int(match.group(1))
|
||||
product['specification'] = inferred_spec
|
||||
product['package_quantity'] = inferred_qty
|
||||
logger.info(f"从商品名称提取重量/容量规格: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
else:
|
||||
# 一般情况的规格推断
|
||||
inferred_spec = self.unit_converter.infer_specification_from_name(product['name'])
|
||||
if inferred_spec:
|
||||
product['specification'] = inferred_spec
|
||||
package_quantity = self.parse_specification(inferred_spec)
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"从商品名称推断规格: {product['name']} -> {inferred_spec}, 包装数量={package_quantity}")
|
||||
|
||||
# 检查已设置的规格但未设置包装数量的情况
|
||||
if product.get('specification') and not product.get('package_quantity'):
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"解析已设置的规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
|
||||
# 新增逻辑:根据规格推断单位为"件"
|
||||
if not product['unit'] and product.get('barcode') and product.get('specification') and product.get('quantity') and product.get('price') is not None:
|
||||
# 检查规格是否符合容量*数量格式
|
||||
volume_pattern = r'(\d+(?:\.\d+)?)\s*(?:ml|[mL]L|l|L|升|毫升)[*×xX](\d+)'
|
||||
match = re.search(volume_pattern, product['specification'])
|
||||
|
||||
# 判断是否需要推断单位为"件"
|
||||
if match:
|
||||
product['unit'] = '件'
|
||||
logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
|
||||
else:
|
||||
# 检查简单的数量*数量格式
|
||||
simple_pattern = r'(\d+)[*×xX](\d+)'
|
||||
match = re.search(simple_pattern, product['specification'])
|
||||
if match:
|
||||
product['unit'] = '件'
|
||||
logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
|
||||
|
||||
# 应用单位转换规则
|
||||
product = self.unit_converter.process_unit_conversion(product)
|
||||
|
||||
# 如果数量为0但单价和金额都存在,计算数量 = 金额/单价
|
||||
if (product['quantity'] == 0 or product['quantity'] is None) and product['price'] > 0 and product['amount']:
|
||||
try:
|
||||
amount = parse_monetary_string(product['amount'])
|
||||
if amount is not None and amount > 0:
|
||||
quantity = amount / product['price']
|
||||
logger.info(f"数量为空或为0,通过金额({amount})和单价({product['price']})计算得出数量: {quantity}")
|
||||
product['quantity'] = quantity
|
||||
except Exception as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
products.append(product)
|
||||
except Exception as e:
|
||||
logger.error(f"提取第{idx+1}行商品信息时出错: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
logger.info(f"提取到 {len(products)} 个商品信息")
|
||||
return products
|
||||
|
||||
def fill_template(self, products: List[Dict], output_file_path: str) -> bool:
|
||||
"""
|
||||
填充采购单模板
|
||||
|
||||
Args:
|
||||
products: 商品信息列表
|
||||
output_file_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
是否成功填充
|
||||
"""
|
||||
try:
|
||||
# 打开模板文件
|
||||
template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
|
||||
template_sheet = template_workbook.sheet_by_index(0)
|
||||
|
||||
# 创建可写的副本
|
||||
output_workbook = xlcopy(template_workbook)
|
||||
output_sheet = output_workbook.get_sheet(0)
|
||||
|
||||
# 先对产品按条码分组,区分正常商品和赠品
|
||||
barcode_groups = {}
|
||||
|
||||
# 遍历所有产品,按条码分组
|
||||
logger.info(f"开始处理{len(products)} 个产品信息")
|
||||
for product in products:
|
||||
barcode = product.get('barcode', '')
|
||||
# 确保条码是整数字符串
|
||||
barcode = format_barcode(barcode)
|
||||
|
||||
if not barcode:
|
||||
logger.warning(f"跳过无条码商品")
|
||||
continue
|
||||
|
||||
# 获取数量和单价
|
||||
quantity = product.get('quantity', 0)
|
||||
price = product.get('price', 0)
|
||||
amount = product.get('amount', 0)
|
||||
|
||||
# 如果数量为0但单价和金额都存在,计算数量 = 金额/单价
|
||||
if (quantity == 0 or quantity is None) and price > 0 and amount:
|
||||
try:
|
||||
amount = parse_monetary_string(amount)
|
||||
if amount is not None and amount > 0:
|
||||
quantity = amount / price
|
||||
logger.info(f"数量为空或为0,通过金额({amount})和单价({price})计算得出数量: {quantity}")
|
||||
product['quantity'] = quantity
|
||||
except Exception as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
# 判断是否为赠品(价格为0)
|
||||
is_gift = bool(product.get('is_gift', False)) or (price == 0)
|
||||
|
||||
logger.info(f"处理商品: 条码={barcode}, 数量={quantity}, 单价={price}, 是否赠品={is_gift}")
|
||||
|
||||
if barcode not in barcode_groups:
|
||||
barcode_groups[barcode] = {
|
||||
'normal': None, # 正常商品信息
|
||||
'gift_quantity': 0 # 赠品数量
|
||||
}
|
||||
|
||||
if is_gift:
|
||||
# 是赠品,累加赠品数量
|
||||
barcode_groups[barcode]['gift_quantity'] += quantity
|
||||
logger.info(f"发现赠品:条码{barcode}, 数量={quantity}")
|
||||
else:
|
||||
# 是正常商品
|
||||
if barcode_groups[barcode]['normal'] is None:
|
||||
barcode_groups[barcode]['normal'] = {
|
||||
'product': product,
|
||||
'quantity': quantity,
|
||||
'price': price
|
||||
}
|
||||
logger.info(f"发现正常商品:条码{barcode}, 数量={quantity}, 单价={price}")
|
||||
else:
|
||||
# 如果有多个正常商品记录,累加数量
|
||||
barcode_groups[barcode]['normal']['quantity'] += quantity
|
||||
logger.info(f"累加正常商品数量:条码{barcode}, 新增={quantity}, 累计={barcode_groups[barcode]['normal']['quantity']}")
|
||||
|
||||
# 如果单价不同,取平均值
|
||||
if price != barcode_groups[barcode]['normal']['price']:
|
||||
avg_price = (barcode_groups[barcode]['normal']['price'] + price) / 2
|
||||
barcode_groups[barcode]['normal']['price'] = avg_price
|
||||
logger.info(f"调整单价(取平均值):条码{barcode}, 原价={barcode_groups[barcode]['normal']['price']}, 新价={price}, 平均={avg_price}")
|
||||
|
||||
# 输出调试信息
|
||||
logger.info(f"分组后共{len(barcode_groups)} 个不同条码的商品")
|
||||
for barcode, group in barcode_groups.items():
|
||||
if group['normal'] is not None:
|
||||
logger.info(f"条码 {barcode} 处理结果:正常商品数量{group['normal']['quantity']},单价{group['normal']['price']},赠品数量{group['gift_quantity']}")
|
||||
else:
|
||||
logger.info(f"条码 {barcode} 处理结果:只有赠品,数量={group['gift_quantity']}")
|
||||
|
||||
# 准备填充数据
|
||||
row_index = 1 # 从第2行开始填充(索引从0开始)
|
||||
|
||||
for barcode, group in barcode_groups.items():
|
||||
# 1. 列B(1): 条码(必填)
|
||||
output_sheet.write(row_index, 1, barcode)
|
||||
|
||||
if group['normal'] is not None:
|
||||
# 有正常商品
|
||||
product = group['normal']['product']
|
||||
|
||||
# 2. 列C(2): 采购量(必填) 使用正常商品的采购量
|
||||
normal_quantity = group['normal']['quantity']
|
||||
output_sheet.write(row_index, 2, normal_quantity)
|
||||
|
||||
# 3. 列D(3): 赠送量 - 添加赠品数量
|
||||
if group['gift_quantity'] > 0:
|
||||
output_sheet.write(row_index, 3, group['gift_quantity'])
|
||||
logger.info(f"条码 {barcode} 填充:采购量={normal_quantity},赠品数量{group['gift_quantity']}")
|
||||
|
||||
# 4. 列E(4): 采购单价(必填)
|
||||
purchase_price = group['normal']['price']
|
||||
style = xlwt.XFStyle()
|
||||
style.num_format_str = '0.0000'
|
||||
output_sheet.write(row_index, 4, round(purchase_price, 4), style)
|
||||
else:
|
||||
# 只有赠品,没有正常商品
|
||||
# 采购量填0,赠送量填赠品数量
|
||||
output_sheet.write(row_index, 2, 0) # 采购量为0
|
||||
output_sheet.write(row_index, 3, group['gift_quantity']) # 赠送量
|
||||
output_sheet.write(row_index, 4, 0) # 单价为0
|
||||
|
||||
logger.info(f"条码 {barcode} 填充:仅有赠品,采购量=0,赠品数量={group['gift_quantity']}")
|
||||
|
||||
# 移到下一行
|
||||
row_index += 1
|
||||
|
||||
# 保存文件
|
||||
output_workbook.save(output_file_path)
|
||||
logger.info(f"采购单已保存到: {output_file_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"填充模板时出错: {e}")
|
||||
return False
|
||||
|
||||
def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
|
||||
"""自动识别表头行,委托给 ColumnMapper.detect_header_row"""
|
||||
result = ColumnMapper.detect_header_row(df, max_rows=30)
|
||||
if result >= 0:
|
||||
logger.info(f"找到表头行: 第{result+1}行")
|
||||
return result
|
||||
# 回退:找第一个非空行
|
||||
for row in range(len(df)):
|
||||
if df.iloc[row].notna().sum() > 3:
|
||||
logger.info(f"未找到明确表头,使用第一个有效行: 第{row+1}行")
|
||||
return row
|
||||
logger.warning("无法识别表头行")
|
||||
return None
|
||||
|
||||
def process_specific_file(self, file_path: str, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理指定的Excel文件
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果处理失败则返回None
|
||||
"""
|
||||
logger.info(f"开始处理Excel文件: {file_path}")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# 读取Excel文件时不立即指定表头
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(92)
|
||||
except Exception:
|
||||
pass
|
||||
df = pd.read_excel(file_path, header=None)
|
||||
logger.info(f"成功读取Excel文件: {file_path}, 共 {len(df)} 行")
|
||||
|
||||
# 自动识别表头行
|
||||
header_row = self._find_header_row(df)
|
||||
if header_row is None:
|
||||
logger.error("无法识别表头行")
|
||||
return None
|
||||
|
||||
logger.info(f"识别到表头在第 {header_row+1} 行")
|
||||
|
||||
# 重新设置表头,避免二次读取
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(94)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 使用识别到的表头行设置列名,并过滤掉表头之前的行
|
||||
df.columns = df.iloc[header_row]
|
||||
df = df.iloc[header_row + 1:].reset_index(drop=True)
|
||||
|
||||
logger.info(f"重新整理数据结构,共 {len(df)} 行有效数据")
|
||||
|
||||
# 提取商品信息
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(96)
|
||||
except Exception:
|
||||
pass
|
||||
products = self.extract_product_info(df)
|
||||
|
||||
if not products:
|
||||
logger.warning("未提取到有效商品信息")
|
||||
return None
|
||||
|
||||
# 生成输出文件名,保存到data/result目录
|
||||
file_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
result_dir = "data/result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
output_file = os.path.join(result_dir, f"采购单_{file_name}.xls")
|
||||
|
||||
# 填充模板并保存
|
||||
if self.fill_template(products, output_file):
|
||||
# 记录已处理文件
|
||||
self.processed_files[file_path] = output_file
|
||||
self._save_processed_files()
|
||||
|
||||
# 不再自动打开输出目录
|
||||
logger.info(f"采购单已保存到: {output_file}")
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(100)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_file
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理Excel文件时出错: {file_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def process_latest_file(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理最新的Excel文件
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果处理失败则返回None
|
||||
"""
|
||||
# 获取最新的Excel文件
|
||||
latest_file = self.get_latest_excel()
|
||||
if not latest_file:
|
||||
logger.warning("未找到可处理的Excel文件")
|
||||
return None
|
||||
|
||||
# 处理文件
|
||||
return self.process_specific_file(latest_file, progress_cb=progress_cb)
|
||||
|
||||
def _detect_column_mapping(self, df: pd.DataFrame) -> Dict[str, str]:
|
||||
"""
|
||||
自动检测列名映射
|
||||
|
||||
Args:
|
||||
df: 数据框
|
||||
|
||||
Returns:
|
||||
列名映射字典,键为标准列名,值为实际列名
|
||||
"""
|
||||
# 提取有用的列
|
||||
barcode_cols = self.extract_barcode(df)
|
||||
|
||||
# 如果没有找到条码列,无法继续处理
|
||||
if not barcode_cols:
|
||||
logger.error("未找到条码列,无法处理")
|
||||
return {}
|
||||
|
||||
# 使用 ColumnMapper 统一查找列名
|
||||
mapped_columns = {'barcode': barcode_cols[0]}
|
||||
logger.info(f"使用条码列: {mapped_columns['barcode']}")
|
||||
|
||||
# 内部键名 -> 标准列名映射 (processor.py 使用 price/amount 作为内部键名)
|
||||
field_map = [
|
||||
('name', 'name'),
|
||||
('specification', 'specification'),
|
||||
('quantity', 'quantity'),
|
||||
('unit', 'unit'),
|
||||
('price', 'unit_price'),
|
||||
('amount', 'total_price'),
|
||||
]
|
||||
|
||||
for internal_key, standard_name in field_map:
|
||||
matched = ColumnMapper.find_column(list(df.columns), standard_name)
|
||||
if matched:
|
||||
mapped_columns[internal_key] = matched
|
||||
logger.info(f"找到{internal_key}列: {matched}")
|
||||
|
||||
return mapped_columns
|
||||
|
||||
def infer_specification_from_name(self, product_name: str) -> Tuple[Optional[str], Optional[int]]:
|
||||
"""
|
||||
从商品名称推断规格
|
||||
根据特定的命名规则匹配规格信息
|
||||
|
||||
Args:
|
||||
product_name: 商品名称
|
||||
|
||||
Returns:
|
||||
规格字符串和包装数量的元组
|
||||
"""
|
||||
if not product_name or not isinstance(product_name, str):
|
||||
logger.warning(f"无效的商品名: {product_name}")
|
||||
return None, None
|
||||
|
||||
product_name = product_name.strip()
|
||||
|
||||
# 特殊处理:重量/容量*数字格式
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, product_name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
inferred_qty = int(match.group(1))
|
||||
logger.info(f"从商品名称提取重量/容量规格: {product_name} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
return inferred_spec, inferred_qty
|
||||
|
||||
# 使用单位转换器推断规格
|
||||
inferred_spec = self.unit_converter.infer_specification_from_name(product_name)
|
||||
if inferred_spec:
|
||||
# 解析规格中的包装数量
|
||||
package_quantity = self.parse_specification(inferred_spec)
|
||||
if package_quantity:
|
||||
logger.info(f"从商品名称推断规格: {product_name} -> {inferred_spec}, 包装数量={package_quantity}")
|
||||
return inferred_spec, package_quantity
|
||||
|
||||
# 特定商品规则匹配
|
||||
spec_rules = [
|
||||
# XX入白膜格式,如"550纯净水24入白膜"
|
||||
(r'.*?(\d+)入白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 白膜格式,如"550水24白膜"
|
||||
(r'.*?(\d+)白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 445水溶C系列
|
||||
(r'445水溶C.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 东方树叶系列
|
||||
(r'东方树叶.*?(\d+\*\d+).*纸箱', lambda m: (m.group(1), int(m.group(1).split('*')[1]))),
|
||||
|
||||
# 桶装
|
||||
(r'(\d+\.?\d*L)桶装', lambda m: (f"{m.group(1)}*1", 1)),
|
||||
|
||||
# 树叶茶系
|
||||
(r'树叶.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 茶π系列
|
||||
(r'茶[πΠπ].*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 通用入数匹配
|
||||
(r'.*?(\d+)[入个](?:纸箱|箱装|白膜)', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 通用数字+纸箱格式
|
||||
(r'.*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1))))
|
||||
]
|
||||
|
||||
# 尝试所有规则
|
||||
for pattern, formatter in spec_rules:
|
||||
match = re.search(pattern, product_name)
|
||||
if match:
|
||||
spec, qty = formatter(match)
|
||||
logger.info(f"根据特定规则推断规格: {product_name} -> {spec}, 包装数量={qty}")
|
||||
return spec, qty
|
||||
|
||||
# 尝试直接从名称中提取数字*数字格式
|
||||
match = re.search(r'(\d+\*\d+)', product_name)
|
||||
if match:
|
||||
spec = match.group(1)
|
||||
package_quantity = self.parse_specification(spec)
|
||||
if package_quantity:
|
||||
logger.info(f"从名称中直接提取规格: {spec}, 包装数量={package_quantity}")
|
||||
return spec, package_quantity
|
||||
|
||||
# 最后尝试提取任何位置的数字,默认典型件装数
|
||||
numbers = re.findall(r'\d+', product_name)
|
||||
if numbers:
|
||||
for num in numbers:
|
||||
# 检查是否为典型的件装数(12/15/24/30)
|
||||
if num in ['12', '15', '24', '30']:
|
||||
spec = f"1*{num}"
|
||||
logger.info(f"从名称中提取可能的件装数: {spec}, 包装数量={int(num)}")
|
||||
return spec, int(num)
|
||||
|
||||
logger.warning(f"无法从商品名'{product_name}' 推断规格")
|
||||
return None, None
|
||||
|
||||
def parse_specification(self, spec_str: str) -> Optional[int]:
|
||||
"""
|
||||
解析规格字符串,提取包装数量
|
||||
支持格式:1*15, 1x15, 1*5*10, 5kg*6, IL*12等
|
||||
|
||||
Args:
|
||||
spec_str: 规格字符串
|
||||
|
||||
Returns:
|
||||
包装数量,如果无法解析则返回None
|
||||
"""
|
||||
if not spec_str or not isinstance(spec_str, str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# 清理规格字符串
|
||||
spec_str = clean_string(spec_str)
|
||||
|
||||
# 处理可能的OCR误识别,如"IL"应为"1L","6oo"应为"600"
|
||||
spec_str = re.sub(r'(\b|^)[iIlL](\d+)', r'1\2', spec_str) # 将"IL"替换为"1L"
|
||||
spec_str = re.sub(r'(\d+)[oO0]{2,}', lambda m: m.group(1) + '00', spec_str) # 将"6oo"替换为"600"
|
||||
spec_str = spec_str.replace('×', '*').replace('x', '*').replace('X', '*') # 统一乘号
|
||||
|
||||
logger.debug(f"清理后的规格字符串: {spec_str}")
|
||||
|
||||
# 新增:匹配“1件=12桶/袋/盒…”等等式规格,取右侧数量作为包装数量
|
||||
eq_match = re.search(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[==]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec_str)
|
||||
if eq_match:
|
||||
return int(eq_match.group(2))
|
||||
|
||||
# 匹配带单位的格式,如"5kg*6"、"450g*15"、"450ml*15"
|
||||
weight_pattern = r'(\d+(?:\.\d+)?)\s*(?:kg|KG|千克|公斤)[*×](\d+)'
|
||||
match = re.search(weight_pattern, spec_str)
|
||||
if match:
|
||||
return int(match.group(2))
|
||||
|
||||
# 匹配克、毫升等单位格式
|
||||
match = re.search(r'\d+(?:\.\d+)?(?:g|G|ml|ML|mL|毫升|克)[*×](\d+)', spec_str)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
# 匹配1*5*10 格式的三级规格
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
|
||||
if match:
|
||||
# 取最后一个数字作为袋数量
|
||||
return int(float(match.group(3)))
|
||||
|
||||
# 匹配1*15, 1x15 格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
|
||||
if match:
|
||||
# 取第二个数字作为包装数量
|
||||
return int(float(match.group(2)))
|
||||
|
||||
# 匹配24瓶/件等格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋][//](件|箱)', spec_str)
|
||||
if match:
|
||||
return int(float(match.group(1)))
|
||||
|
||||
# 匹配4L格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+(?:\.\d+)?)?', spec_str)
|
||||
if match:
|
||||
# 如果有第二个数字,返回它;否则返回1
|
||||
return int(float(match.group(2))) if match.group(2) else 1
|
||||
|
||||
# 匹配单独的数字+单位格式,如"12瓶装"
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋包盒罐箱](?:装|\/箱)?', spec_str)
|
||||
if match:
|
||||
return int(float(match.group(1)))
|
||||
|
||||
# 尝试直接匹配任何数字
|
||||
numbers = re.findall(r'\d+(?:\.\d+)?', spec_str)
|
||||
if numbers and len(numbers) > 0:
|
||||
# 如果只有一个数字,通常是包装数量
|
||||
if len(numbers) == 1:
|
||||
return int(float(numbers[0]))
|
||||
|
||||
# 如果有多个数字,尝试识别可能的包装数量(典型数值如6/12/24/30)
|
||||
for num in numbers:
|
||||
if float(num) in [6.0, 12.0, 24.0, 30.0]:
|
||||
return int(float(num))
|
||||
|
||||
# 如果没有典型数值,选择最后一个数字(通常是包装数量)
|
||||
return int(float(numbers[-1]))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"解析规格'{spec_str}'时出错: {e}")
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,259 @@
|
||||
"""
|
||||
数据验证器模块
|
||||
----------
|
||||
提供对商品数据的验证和修复功能
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List, Tuple, Union
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.string_utils import parse_monetary_string
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ProductValidator:
|
||||
"""
|
||||
商品数据验证器:验证和修复商品数据
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化商品数据验证器
|
||||
"""
|
||||
# 仓库标识列表
|
||||
self.warehouse_identifiers = ["仓库", "仓库全名", "warehouse"]
|
||||
|
||||
def validate_barcode(self, barcode: Any) -> Tuple[bool, str, Optional[str]]:
|
||||
"""
|
||||
验证并修复条码
|
||||
|
||||
Args:
|
||||
barcode: 原始条码值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的条码, 错误信息)元组
|
||||
"""
|
||||
error_message = None
|
||||
|
||||
# 处理空值
|
||||
if barcode is None:
|
||||
return False, "", "条码为空"
|
||||
|
||||
# 转为字符串
|
||||
barcode_str = str(barcode).strip()
|
||||
|
||||
# 处理"仓库"特殊情况
|
||||
if barcode_str in self.warehouse_identifiers:
|
||||
return False, barcode_str, "条码为仓库标识"
|
||||
|
||||
# 清理条码格式(移除非数字字符)
|
||||
barcode_clean = re.sub(r'\D', '', barcode_str)
|
||||
|
||||
# 如果清理后为空,无效
|
||||
if not barcode_clean:
|
||||
return False, barcode_str, "条码不包含数字"
|
||||
|
||||
# 对特定的错误条码进行修正(5开头改为6开头)
|
||||
if len(barcode_clean) > 8 and barcode_clean.startswith('5') and not barcode_clean.startswith('53'):
|
||||
original_barcode = barcode_clean
|
||||
barcode_clean = '6' + barcode_clean[1:]
|
||||
logger.info(f"修正条码前缀 5->6: {original_barcode} -> {barcode_clean}")
|
||||
|
||||
# 新增:处理14位条码,如果多余长度都是0,截断为13位
|
||||
if len(barcode_clean) > 13:
|
||||
original_length = len(barcode_clean)
|
||||
# 检查多余部分是否都是0
|
||||
if barcode_clean.endswith('0'):
|
||||
# 从末尾开始移除0,直到条码长度为13位或不再以0结尾
|
||||
while len(barcode_clean) > 13 and barcode_clean.endswith('0'):
|
||||
barcode_clean = barcode_clean[:-1]
|
||||
logger.info(f"修正条码长度: 从{original_length}位截断到{len(barcode_clean)}位")
|
||||
else:
|
||||
error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 验证条码长度
|
||||
if len(barcode_clean) < 8 or len(barcode_clean) > 13:
|
||||
error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 验证条码是否全为数字
|
||||
if not barcode_clean.isdigit():
|
||||
error_message = f"条码包含非数字字符: {barcode_clean}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 对于序号9的特殊情况,允许其条码格式
|
||||
if barcode_clean == "5321545613":
|
||||
logger.info(f"特殊条码验证通过: {barcode_clean}")
|
||||
return True, barcode_clean, None
|
||||
|
||||
logger.debug(f"条码验证通过: {barcode_clean}")
|
||||
return True, barcode_clean, None
|
||||
|
||||
def validate_quantity(self, quantity: Any) -> Tuple[bool, float, Optional[str]]:
|
||||
"""
|
||||
验证并修复数量
|
||||
|
||||
Args:
|
||||
quantity: 原始数量值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的数量, 错误信息)元组
|
||||
"""
|
||||
# 处理空值
|
||||
if quantity is None:
|
||||
return False, 0.0, "数量为空"
|
||||
|
||||
# 如果是字符串,尝试解析
|
||||
if isinstance(quantity, str):
|
||||
# 去除空白和非数字字符(保留小数点)
|
||||
quantity_clean = re.sub(r'[^\d\.]', '', quantity.strip())
|
||||
if not quantity_clean:
|
||||
return False, 0.0, "数量不包含数字"
|
||||
|
||||
try:
|
||||
quantity_value = float(quantity_clean)
|
||||
except ValueError:
|
||||
return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
|
||||
else:
|
||||
# 尝试直接转换
|
||||
try:
|
||||
quantity_value = float(quantity)
|
||||
except (ValueError, TypeError):
|
||||
return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
|
||||
|
||||
# 数量必须大于0
|
||||
if quantity_value <= 0:
|
||||
return False, 0.0, f"数量必须大于0,当前值: {quantity_value}"
|
||||
|
||||
return True, quantity_value, None
|
||||
|
||||
def validate_price(self, price: Any) -> Tuple[bool, float, bool, Optional[str]]:
|
||||
"""
|
||||
验证并修复单价
|
||||
|
||||
Args:
|
||||
price: 原始单价值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的单价, 是否为赠品, 错误信息)元组
|
||||
"""
|
||||
# 初始化不是赠品
|
||||
is_gift = False
|
||||
|
||||
# 处理空值
|
||||
if price is None:
|
||||
return False, 0.0, True, "单价为空,视为赠品"
|
||||
|
||||
# 如果是字符串,检查赠品标识
|
||||
if isinstance(price, str):
|
||||
price_str = price.strip().lower()
|
||||
if price_str in ["赠品", "gift", "赠送", "0", ""]:
|
||||
return True, 0.0, True, None
|
||||
|
||||
price_value = parse_monetary_string(price_str)
|
||||
if price_value is None:
|
||||
return False, 0.0, True, f"无法将单价 '{price}' 转换为数字,视为赠品"
|
||||
else:
|
||||
# 尝试直接转换
|
||||
try:
|
||||
price_value = float(price)
|
||||
except (ValueError, TypeError):
|
||||
return False, 0.0, True, f"无法将单价 '{price}' 转换为数字,视为赠品"
|
||||
|
||||
# 单价为0视为赠品
|
||||
if price_value == 0:
|
||||
return True, 0.0, True, None
|
||||
|
||||
# 单价必须大于0
|
||||
if price_value < 0:
|
||||
return False, 0.0, True, f"单价不能为负数: {price_value},视为赠品"
|
||||
|
||||
return True, price_value, False, None
|
||||
|
||||
def validate_product(self, product: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
验证并修复商品数据
|
||||
|
||||
Args:
|
||||
product: 商品数据字典
|
||||
|
||||
Returns:
|
||||
修复后的商品数据字典
|
||||
"""
|
||||
# 创建新字典,避免修改原始数据
|
||||
validated_product = product.copy()
|
||||
|
||||
# 验证条码
|
||||
barcode = product.get('barcode', '')
|
||||
is_valid, fixed_barcode, error_msg = self.validate_barcode(barcode)
|
||||
if is_valid:
|
||||
validated_product['barcode'] = fixed_barcode
|
||||
else:
|
||||
logger.warning(f"条码验证失败: {error_msg}")
|
||||
if fixed_barcode:
|
||||
# 即使验证失败,但如果有修复后的条码仍然使用它
|
||||
validated_product['barcode'] = fixed_barcode
|
||||
|
||||
# 验证单价
|
||||
price = product.get('price', 0)
|
||||
is_valid, fixed_price, is_gift, error_msg = self.validate_price(price)
|
||||
validated_product['price'] = fixed_price
|
||||
|
||||
# 如果单价验证结果表示为赠品,更新赠品标识
|
||||
if is_gift:
|
||||
validated_product['is_gift'] = True
|
||||
if error_msg:
|
||||
logger.info(error_msg)
|
||||
|
||||
amount = product.get('amount', None)
|
||||
try:
|
||||
is_amount_gift = False
|
||||
parsed_amount = parse_monetary_string(amount)
|
||||
if parsed_amount is None or parsed_amount == 0.0:
|
||||
is_amount_gift = True
|
||||
if is_amount_gift:
|
||||
validated_product['is_gift'] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 验证数量
|
||||
quantity = product.get('quantity', None)
|
||||
is_valid, fixed_quantity, error_msg = self.validate_quantity(quantity)
|
||||
|
||||
# 检查数量是否为空,但单价和金额存在的情况
|
||||
if not is_valid and error_msg == "数量为空":
|
||||
# 获取金额
|
||||
amount = product.get('amount', None)
|
||||
|
||||
# 如果单价有效且金额存在,则可以计算数量
|
||||
if fixed_price > 0 and amount is not None:
|
||||
try:
|
||||
# 确保金额是数字
|
||||
amount = parse_monetary_string(amount)
|
||||
if amount is None:
|
||||
raise ValueError("无法解析金额")
|
||||
|
||||
# 计算数量 = 金额 / 单价
|
||||
if amount > 0:
|
||||
calculated_quantity = amount / fixed_price
|
||||
logger.info(f"数量为空,通过金额({amount})和单价({fixed_price})计算得出数量: {calculated_quantity}")
|
||||
validated_product['quantity'] = calculated_quantity
|
||||
is_valid = True
|
||||
except (ValueError, TypeError, ZeroDivisionError) as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
# 如果数量验证有效或通过金额计算成功
|
||||
if is_valid:
|
||||
validated_product['quantity'] = fixed_quantity if is_valid and fixed_quantity > 0 else validated_product.get('quantity', 0)
|
||||
else:
|
||||
logger.warning(f"数量验证失败: {error_msg}")
|
||||
validated_product['quantity'] = 0.0
|
||||
|
||||
return validated_product
|
||||
Reference in New Issue
Block a user