feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别（蓉城易购/烟草/杨碧月/通用） - 百度 OCR 表格识别集成 - 规则引擎（列映射/数据清洗/单位转换/规格推断） - 条码映射管理与云端同步（Gitea REST API） - 云端同步支持：条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理（图片→OCR→Excel→合并） - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
@@ -0,0 +1,5 @@
+"""
+OCR订单处理系统 - Excel处理模块
+----------------------------
+提供Excel文件处理、数据提取和转换功能。
+""" 
@@ -0,0 +1,535 @@
+"""
+单位转换模块
+----------
+提供单位转换功能，支持规格推断和单位自动提取。
+"""
+
+import re
+import logging
+import os
+import json
+from typing import Dict, Tuple, Optional, Any, List, Union
+
+from ..utils.log_utils import get_logger
+from .handlers.barcode_mapper import BarcodeMapper
+from .handlers.unit_converter_handlers import (
+    JianUnitHandler, BoxUnitHandler, TiHeUnitHandler, 
+    GiftUnitHandler, UnitHandler
+)
+from .validators import ProductValidator
+
+logger = get_logger(__name__)
+
+# 条码映射配置文件路径
+BARCODE_MAPPING_CONFIG = "config/barcode_mappings.json"
+
+class UnitConverter:
+    """
+    单位转换器：处理不同单位之间的转换，支持从商品名称推断规格
+    """
+    
+    def __init__(self):
+        """
+        初始化单位转换器
+        """
+        # 加载特殊条码配置
+        self.special_barcodes = self.load_barcode_mappings()
+        
+        # 规格推断的正则表达式模式
+        self.spec_patterns = [
+            # 1*6、1x12、1X20等格式
+            (r'(\d+)[*xX×](\d+)', r'\1*\2'),
+            # 1*5*12和1x5x12等三级格式
+            (r'(\d+)[*xX×](\d+)[*xX×](\d+)', r'\1*\2*\3'),
+            # "xx入"格式，如"12入"、"24入"
+            (r'(\d+)入', r'1*\1'),
+            # "xxL*1"或"xx升*1"格式
+            (r'([\d\.]+)[L升][*xX×]?(\d+)?', r'\1L*\2' if r'\2' else r'\1L*1'),
+            # "xxkg*1"或"xx公斤*1"格式
+            (r'([\d\.]+)(?:kg|公斤)[*xX×]?(\d+)?', r'\1kg*\2' if r'\2' else r'\1kg*1'),
+            # "xxg*1"或"xx克*1"格式
+            (r'([\d\.]+)(?:g|克)[*xX×]?(\d+)?', r'\1g*\2' if r'\2' else r'\1g*1'),
+            # "xxmL*1"或"xx毫升*1"格式
+            (r'([\d\.]+)(?:mL|毫升)[*xX×]?(\d+)?', r'\1mL*\2' if r'\2' else r'\1mL*1'),
+        ]
+        
+        # 初始化处理程序
+        self._init_handlers()
+        
+        # 初始化验证器
+        self.validator = ProductValidator()
+
+    def _init_handlers(self):
+        """
+        初始化各种处理程序
+        """
+        # 创建条码处理程序
+        self.barcode_mapper = BarcodeMapper(self.special_barcodes)
+        
+        # 创建单位处理程序列表，优先级从高到低
+        self.unit_handlers: List[UnitHandler] = [
+            GiftUnitHandler(),   # 首先处理赠品，优先级最高
+            JianUnitHandler(),   # 处理"件"单位
+            BoxUnitHandler(),    # 处理"箱"单位
+            TiHeUnitHandler()    # 处理"提"和"盒"单位
+        ]
+
+    def extract_unit_from_quantity(self, quantity_str: str) -> Tuple[Optional[float], Optional[str]]:
+        """
+        从数量字符串中提取单位
+        
+        支持的格式:
+        1. "2箱" -> (2, "箱")
+        2. "3件" -> (3, "件")
+        3. "1.5提" -> (1.5, "提")
+        4. "数量: 5盒" -> (5, "盒")
+        5. "× 2瓶" -> (2, "瓶")
+        
+        Args:
+            quantity_str: 数量字符串，如"2箱"、"5件"
+            
+        Returns:
+            (数量, 单位)的元组，如果无法提取则返回(None, None)
+        """
+        if not quantity_str or not isinstance(quantity_str, str):
+            return None, None
+        
+        # 清理字符串，移除前后空白和一些常见前缀
+        cleaned_str = quantity_str.strip()
+        for prefix in ['数量:', '数量：', '×', 'x', 'X', '*']:
+            cleaned_str = cleaned_str.replace(prefix, '').strip()
+        
+        # 匹配数字+单位格式 (基本格式)
+        basic_match = re.match(r'^([\d\.]+)\s*([^\d\s\.]+)$', cleaned_str)
+        if basic_match:
+            try:
+                num = float(basic_match.group(1))
+                unit = basic_match.group(2)
+                logger.info(f"从数量提取单位(基本格式): {quantity_str} -> 数量={num}, 单位={unit}")
+                return num, unit
+            except ValueError:
+                pass
+        
+        # 匹配更复杂的格式，如包含其他文本的情况
+        complex_match = re.search(r'([\d\.]+)\s*([箱|件|瓶|提|盒|袋|桶|包|kg|g|升|毫升|L|ml|个])', cleaned_str)
+        if complex_match:
+            try:
+                num = float(complex_match.group(1))
+                unit = complex_match.group(2)
+                logger.info(f"从数量提取单位(复杂格式): {quantity_str} -> 数量={num}, 单位={unit}")
+                return num, unit
+            except ValueError:
+                pass
+        
+        return None, None
+    
+    def extract_specification(self, text: str) -> Optional[str]:
+        """
+        从文本中提取规格信息
+        
+        Args:
+            text: 文本字符串
+            
+        Returns:
+            提取的规格字符串，如果无法提取则返回None
+        """
+        if not text or not isinstance(text, str):
+            return None
+        
+        # 处理XX入白膜格式，如"550纯净水24入白膜"
+        match = re.search(r'.*?(\d+)入白膜', text)
+        if match:
+            result = f"1*{match.group(1)}"
+            logger.info(f"提取规格(入白膜): {text} -> {result}")
+            return result
+            
+        # 尝试所有模式
+        for pattern, replacement in self.spec_patterns:
+            match = re.search(pattern, text)
+            if match:
+                # 特殊处理三级格式，确保正确显示为1*5*12
+                if '*' in replacement and replacement.count('*') == 1 and len(match.groups()) >= 2:
+                    result = f"{match.group(1)}*{match.group(2)}"
+                    logger.info(f"提取规格: {text} -> {result}")
+                    return result
+                # 特殊处理三级规格格式
+                elif '*' in replacement and replacement.count('*') == 2 and len(match.groups()) >= 3:
+                    result = f"{match.group(1)}*{match.group(2)}*{match.group(3)}"
+                    logger.info(f"提取三级规格: {text} -> {result}")
+                    return result
+                # 一般情况
+                else:
+                    result = re.sub(pattern, replacement, text)
+                    logger.info(f"提取规格: {text} -> {result}")
+                    return result
+                
+        # 没有匹配任何模式
+        return None
+    
+    def infer_specification_from_name(self, name: str) -> Optional[str]:
+        """
+        从商品名称中推断规格
+        
+        规则:
+        1. "xx入纸箱" -> 1*xx (如"15入纸箱" -> 1*15)
+        2. 直接包含规格 "1*15" -> 1*15
+        3. "xx纸箱" -> 1*xx (如"15纸箱" -> 1*15)
+        4. "xx白膜" -> 1*xx (如"12白膜" -> 1*12)
+        5. "xxL" 容量单位特殊处理
+        6. "xx(g|ml|毫升|克)*数字" -> 1*数字 (如"450g*15" -> 1*15)
+        
+        Args:
+            name: 商品名称
+            
+        Returns:
+            推断的规格，如果无法推断则返回None
+        """
+        if not name or not isinstance(name, str):
+            return None
+        
+        # 记录原始商品名称，用于日志
+        original_name = name
+        
+        # 新增模式: 处理重量/容量*数字格式，如"450g*15", "450ml*15"
+        # 忽略重量/容量值，只提取后面的数量作为规格
+        weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
+        match = re.search(weight_volume_pattern, name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            logger.info(f"从名称推断规格(重量/容量*数量): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式1.1: "xx入白膜" 格式，如"550纯净水24入白膜" -> "1*24"
+        pattern1_1 = r'.*?(\d+)入白膜'
+        match = re.search(pattern1_1, name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            logger.info(f"从名称推断规格(入白膜): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式1: "xx入纸箱" 格式，如"445水溶C血橙15入纸箱" -> "1*15"
+        pattern1 = r'.*?(\d+)入纸箱'
+        match = re.search(pattern1, name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            logger.info(f"从名称推断规格(入纸箱): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式2: 直接包含规格，如"500-东方树叶-乌龙茶1*15-纸箱装" -> "1*15"
+        pattern2 = r'.*?(\d+)[*xX×](\d+).*'
+        match = re.search(pattern2, name)
+        if match:
+            inferred_spec = f"{match.group(1)}*{match.group(2)}"
+            logger.info(f"从名称推断规格(直接格式): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式3: "xx纸箱" 格式，如"500茶π蜜桃乌龙15纸箱" -> "1*15"
+        pattern3 = r'.*?(\d+)纸箱'
+        match = re.search(pattern3, name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            logger.info(f"从名称推断规格(纸箱): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式4: "xx白膜" 格式，如"1.5L水12白膜" 或 "550水24白膜" -> "1*12" 或 "1*24"
+        pattern4 = r'.*?(\d+)白膜'
+        match = re.search(pattern4, name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            logger.info(f"从名称推断规格(白膜): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 特殊模式5: 容量单位带数量格式 "1.8L*8瓶" -> "1.8L*8"
+        volume_count_pattern = r'.*?([\d\.]+)[Ll升][*×xX](\d+).*'
+        match = re.search(volume_count_pattern, name)
+        if match:
+            volume = match.group(1)
+            count = match.group(2)
+            inferred_spec = f"{volume}L*{count}"
+            logger.info(f"从名称推断规格(容量*数量): {original_name} -> {inferred_spec}")
+            return inferred_spec
+            
+        # 特殊模式6: 简单容量单位如"12.9L桶装水" -> "12.9L*1"
+        simple_volume_pattern = r'.*?([\d\.]+)[Ll升].*'
+        match = re.search(simple_volume_pattern, name)
+        if match:
+            inferred_spec = f"{match.group(1)}L*1"
+            logger.info(f"从名称推断规格(简单容量): {original_name} -> {inferred_spec}")
+            return inferred_spec
+        
+        # 尝试通用模式匹配
+        spec = self.extract_specification(name)
+        if spec:
+            logger.info(f"从名称推断规格(通用模式): {original_name} -> {spec}")
+            return spec
+            
+        return None
+        
+    def parse_specification(self, spec: str) -> Tuple[int, int, Optional[int]]:
+        """
+        解析规格字符串，支持1*12和1*5*12等格式
+        
+        Args:
+            spec: 规格字符串
+            
+        Returns:
+            (一级包装, 二级包装, 三级包装)元组，如果是二级包装，第三个值为None
+        """
+        if not spec or not isinstance(spec, str):
+            return 1, 1, None
+            
+        try:
+            # 清理规格字符串，确保格式统一
+            spec = re.sub(r'\s+', '', spec)  # 移除所有空白
+            spec = re.sub(r'[xX×]', '*', spec)  # 统一分隔符为*
+            
+            logger.debug(f"解析规格: {spec}")
+            
+            # 新增：处理“1件=12桶/袋/盒...”等等式规格，统一为1*12
+            eq_match = re.match(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[=＝]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec)
+            if eq_match:
+                try:
+                    level2 = int(eq_match.group(2))
+                    logger.info(f"解析等式规格: {spec} -> 1*{level2}")
+                    return 1, level2, None
+                except ValueError:
+                    pass
+
+            # 处理三级包装，如1*5*12
+            three_level_match = re.match(r'(\d+)[*](\d+)[*](\d+)', spec)
+            if three_level_match:
+                try:
+                    level1 = int(three_level_match.group(1))
+                    level2 = int(three_level_match.group(2))
+                    level3 = int(three_level_match.group(3))
+                    logger.info(f"解析三级规格: {spec} -> {level1}*{level2}*{level3}")
+                    return level1, level2, level3
+                except ValueError:
+                    pass
+            
+            # 处理带重量单位的规格，如5kg*6、500g*12等
+            weight_match = re.match(r'([\d\.]+)(?:kg|g|克|千克|公斤)[*](\d+)', spec, re.IGNORECASE)
+            if weight_match:
+                try:
+                    # 对于重量单位，使用1作为一级包装，后面的数字作为二级包装
+                    level2 = int(weight_match.group(2))
+                    logger.info(f"解析重量规格: {spec} -> 1*{level2}")
+                    return 1, level2, None
+                except ValueError:
+                    pass
+            
+            # 处理带容量单位的规格，如500ml*15, 1L*12等
+            ml_match = re.match(r'(\d+)(?:ml|毫升)[*](\d+)', spec, re.IGNORECASE)
+            if ml_match:
+                try:
+                    # 对于ml单位，使用1作为一级包装，后面的数字作为二级包装
+                    level2 = int(ml_match.group(2))
+                    logger.info(f"解析容量(ml)规格: {spec} -> 1*{level2}")
+                    return 1, level2, None
+                except ValueError:
+                    pass
+            
+            # 处理带L单位的规格，如1L*12等
+            l_match = re.match(r'(\d+(?:\.\d+)?)[Ll升][*](\d+)', spec)
+            if l_match:
+                try:
+                    # 对于L单位，正确提取第二部分作为包装数量
+                    level2 = int(l_match.group(2))
+                    logger.info(f"解析容量(L)规格: {spec} -> 1*{level2}")
+                    return 1, level2, None
+                except ValueError:
+                    pass
+            
+            # 处理二级包装，如1*12
+            two_level_match = re.match(r'(\d+)[*](\d+)', spec)
+            if two_level_match:
+                try:
+                    level1 = int(two_level_match.group(1))
+                    level2 = int(two_level_match.group(2))
+                    logger.info(f"解析二级规格: {spec} -> {level1}*{level2}")
+                    return level1, level2, None
+                except ValueError:
+                    pass
+                
+            # 特殊处理L/升为单位的规格，如12.5L*1
+            volume_match = re.match(r'([\d\.]+)[L升][*xX×](\d+)', spec)
+            if volume_match:
+                try:
+                    volume = float(volume_match.group(1))
+                    quantity = int(volume_match.group(2))
+                    logger.info(f"解析容量规格: {spec} -> {volume}L*{quantity}")
+                    return 1, quantity, None
+                except ValueError:
+                    pass
+                    
+            # 处理不规范格式，如IL*12, 6oo*12等，从中提取数字部分作为包装数量
+            # 只要规格中包含*和数字，就尝试提取*后面的数字作为件数
+            irregular_match = re.search(r'[^0-9]*\*(\d+)', spec)
+            if irregular_match:
+                try:
+                    level2 = int(irregular_match.group(1))
+                    logger.info(f"解析不规范规格: {spec} -> 1*{level2}")
+                    return 1, level2, None
+                except ValueError:
+                    pass
+            
+            # 默认值
+            logger.warning(f"无法解析规格: {spec}，使用默认值1*1")
+            return 1, 1, None
+        except Exception as e:
+            logger.error(f"解析规格时出错: {e}")
+            return 1, 1, None
+        
+    def process_unit_conversion(self, product: Dict) -> Dict:
+        """
+        处理单位转换，按照以下规则：
+        1. 特殊条码: 优先处理特殊条码
+        2. 赠品处理: 对于赠品，维持数量转换但单价为0
+        3. "件"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
+        4. "箱"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
+        5. "提"和"盒"单位: 如果是三级规格, 按件处理; 如果是二级规格, 保持不变
+        6. 其他单位: 保持不变
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        # 首先验证商品数据
+        product = self.validator.validate_product(product)
+        
+        # 复制原始数据，避免修改原始字典
+        result = product.copy()
+        
+        barcode = result.get('barcode', '')
+        specification = result.get('specification', '')
+        
+        # 跳过无效数据
+        if not barcode:
+            return result
+        
+        # 先处理条码映射
+        result = self.barcode_mapper.map_barcode(result)
+        
+        # 如果没有规格信息，无法进行单位转换
+        if not specification:
+            # 尝试从商品名称推断规格
+            inferred_spec = self.infer_specification_from_name(result.get('name', ''))
+            if inferred_spec:
+                result['specification'] = inferred_spec
+                logger.info(f"从商品名称推断规格: {result.get('name', '')} -> {inferred_spec}")
+            else:
+                return result
+            
+        # 解析规格信息
+        level1, level2, level3 = self.parse_specification(result.get('specification', ''))
+        
+        # 使用单位处理程序处理单位转换
+        for handler in self.unit_handlers:
+            if handler.can_handle(result):
+                return handler.handle(result, level1, level2, level3)
+        
+        # 没有找到适用的处理程序，保持不变
+        logger.info(f"其他单位处理: 保持原样 数量: {result.get('quantity', 0)}, 单价: {result.get('price', 0)}, 单位: {result.get('unit', '')}")
+        return result 
+
+    def load_barcode_mappings(self) -> Dict[str, Dict[str, Any]]:
+        """
+        从配置文件加载条码映射
+        
+        Returns:
+            条码映射字典
+        """
+        # 默认映射
+        default_mappings = {
+            '6925019900087': {
+                'multiplier': 10,
+                'target_unit': '瓶',
+                'description': '特殊处理：数量*10，单位转换为瓶'
+            },
+            '6921168593804': {
+                'multiplier': 30,
+                'target_unit': '瓶',
+                'description': 'NFC产品特殊处理：每箱30瓶'
+            },
+            '6901826888138': {
+                'multiplier': 30,
+                'target_unit': '瓶',
+                'fixed_price': 112/30,
+                'specification': '1*30',
+                'description': '特殊处理: 规格1*30，数量*30，单价=112/30'
+            },
+            # 条码映射配置
+            '6920584471055': {
+                'map_to': '6920584471017',
+                'description': '条码映射：6920584471055 -> 6920584471017'
+            },
+            '6925861571159': {
+                'map_to': '69021824',
+                'description': '条码映射：6925861571159 -> 69021824'
+            },
+            '6923644268923': {
+                'map_to': '6923644268480',
+                'description': '条码映射：6923644268923 -> 6923644268480'
+            },
+            # 添加特殊条码6958620703716，既需要特殊处理又需要映射
+            '6958620703716': {
+                'specification': '1*14',
+                'map_to': '6958620703907',
+                'description': '特殊处理: 规格1*14，同时映射到6958620703907'
+            }
+        }
+        
+        try:
+            # 检查配置文件是否存在
+            if os.path.exists(BARCODE_MAPPING_CONFIG):
+                with open(BARCODE_MAPPING_CONFIG, 'r', encoding='utf-8') as file:
+                    mappings = json.load(file)
+                    logger.info(f"成功加载条码映射配置，共{len(mappings)}项")
+                    return mappings
+            else:
+                # 创建默认配置文件
+                self.save_barcode_mappings(default_mappings)
+                logger.info(f"创建默认条码映射配置，共{len(default_mappings)}项")
+                return default_mappings
+        except Exception as e:
+            logger.error(f"加载条码映射配置失败: {e}")
+            return default_mappings
+    
+    def save_barcode_mappings(self, mappings: Dict[str, Dict[str, Any]]) -> bool:
+        """
+        保存条码映射到配置文件
+        
+        Args:
+            mappings: 条码映射字典
+            
+        Returns:
+            保存是否成功
+        """
+        try:
+            # 确保配置目录存在
+            os.makedirs(os.path.dirname(BARCODE_MAPPING_CONFIG), exist_ok=True)
+            
+            # 写入配置文件
+            with open(BARCODE_MAPPING_CONFIG, 'w', encoding='utf-8') as file:
+                json.dump(mappings, file, ensure_ascii=False, indent=2)
+            
+            logger.info(f"条码映射配置保存成功，共{len(mappings)}项")
+            return True
+        except Exception as e:
+            logger.error(f"保存条码映射配置失败: {e}")
+            return False
+    
+    def update_barcode_mappings(self, new_mappings: Dict[str, Dict[str, Any]]) -> bool:
+        """
+        更新条码映射配置
+        
+        Args:
+            new_mappings: 新的条码映射字典
+            
+        Returns:
+            更新是否成功
+        """
+        self.special_barcodes = new_mappings
+        return self.save_barcode_mappings(new_mappings) 
@@ -0,0 +1,11 @@
+"""
+单位转换处理程序包
+-----------------
+提供单位转换和条码处理的各种处理程序
+"""
+
+from typing import Dict, Any
+
+# 导出所有处理程序类
+from .barcode_mapper import BarcodeMapper
+from .unit_converter_handlers import JianUnitHandler, BoxUnitHandler, TiHeUnitHandler, GiftUnitHandler, UnitHandler 
@@ -0,0 +1,83 @@
+"""
+条码映射处理程序
+-------------
+处理特殊条码的映射和转换
+"""
+
+import logging
+from typing import Dict, Optional, Any
+
+from ...utils.log_utils import get_logger
+
+logger = get_logger(__name__)
+
+
+class BarcodeMapper:
+    """
+    条码映射器：负责特殊条码的映射和处理
+    """
+    
+    def __init__(self, special_barcodes: Dict[str, Dict[str, Any]]):
+        """
+        初始化条码映射器
+        
+        Args:
+            special_barcodes: 特殊条码配置字典
+        """
+        self.special_barcodes = special_barcodes or {}
+        
+    def map_barcode(self, product: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        映射商品条码，处理特殊情况
+        
+        Args:
+            product: 包含条码的商品信息字典
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        result = product.copy()
+        barcode = result.get('barcode', '')
+        
+        # 如果条码不在特殊条码列表中，直接返回
+        if not barcode or barcode not in self.special_barcodes:
+            return result
+        
+        special_config = self.special_barcodes[barcode]
+        
+        # 处理特殊倍数
+        if 'multiplier' in special_config:
+            multiplier = special_config.get('multiplier', 1)
+            target_unit = special_config.get('target_unit', '瓶')
+            
+            # 数量乘以倍数
+            quantity = result.get('quantity', 0)
+            new_quantity = quantity * multiplier
+            
+            # 单价除以倍数
+            price = result.get('price', 0)
+            new_price = price / multiplier if price else 0
+            
+            # 如果有固定单价，优先使用
+            if 'fixed_price' in special_config:
+                new_price = special_config['fixed_price']
+                logger.info(f"特殊条码({barcode})使用固定单价: {new_price}")
+            
+            # 如果有固定规格，设置规格
+            if 'specification' in special_config:
+                result['specification'] = special_config['specification']
+                logger.info(f"特殊条码({barcode})使用固定规格: {special_config['specification']}")
+            
+            logger.info(f"特殊条码处理: {barcode}, 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {result.get('unit', '')} -> {target_unit}")
+            
+            result['quantity'] = new_quantity
+            result['price'] = new_price
+            result['unit'] = target_unit
+        
+        # 处理条码映射 - 放在后面以便可以同时进行特殊处理和条码映射
+        if 'map_to' in special_config:
+            new_barcode = special_config['map_to']
+            logger.info(f"条码映射: {barcode} -> {new_barcode}")
+            result['barcode'] = new_barcode
+            
+        return result 
@@ -0,0 +1,286 @@
+"""
+单位转换处理程序
+-------------
+处理不同单位的转换逻辑
+"""
+
+import logging
+from typing import Dict, Optional, Any, Tuple, Protocol
+from abc import ABC, abstractmethod
+
+from ...utils.log_utils import get_logger
+
+logger = get_logger(__name__)
+
+
+class UnitHandler(ABC):
+    """
+    单位处理器基类：定义单位处理接口
+    """
+    
+    @abstractmethod
+    def can_handle(self, product: Dict[str, Any]) -> bool:
+        """
+        检查是否可以处理该商品
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            是否可以处理
+        """
+        pass
+    
+    @abstractmethod
+    def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
+        """
+        处理单位转换
+        
+        Args:
+            product: 商品信息字典
+            level1: 一级包装数量
+            level2: 二级包装数量
+            level3: 三级包装数量，可能为None
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        pass
+
+
+class JianUnitHandler(UnitHandler):
+    """
+    处理"件"单位的转换
+    """
+    
+    def can_handle(self, product: Dict[str, Any]) -> bool:
+        """
+        检查是否可以处理该商品（单位为"件"）
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            是否可以处理
+        """
+        unit = str(product.get('unit', '')).strip()
+        # 匹配"件"、"件、"、"件装"等
+        return unit == '件' or unit.startswith('件')
+    
+    def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
+        """
+        处理"件"单位转换：数量×包装数量，单价÷包装数量，单位转为"瓶"
+        
+        Args:
+            product: 商品信息字典
+            level1: 一级包装数量
+            level2: 二级包装数量
+            level3: 三级包装数量，可能为None
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        result = product.copy()
+        
+        quantity = result.get('quantity', 0)
+        price = result.get('price', 0)
+        
+        # 计算包装数量（二级*三级，如果无三级则仅二级）
+        packaging_count = level2 * (level3 or 1)
+        
+        # 数量×包装数量
+        new_quantity = quantity * packaging_count
+        
+        # 单价÷包装数量
+        new_price = price / packaging_count if price else 0
+        
+        logger.info(f"件单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 件 -> 瓶")
+        
+        result['quantity'] = new_quantity
+        result['price'] = new_price
+        result['unit'] = '瓶'
+        
+        return result
+
+
+class BoxUnitHandler(UnitHandler):
+    """
+    处理"箱"单位的转换
+    """
+    
+    def can_handle(self, product: Dict[str, Any]) -> bool:
+        """
+        检查是否可以处理该商品（单位为"箱"）
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            是否可以处理
+        """
+        unit = str(product.get('unit', '')).strip()
+        # 匹配"箱"、"箱、"、"箱装"等
+        return unit == '箱' or unit.startswith('箱')
+    
+    def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
+        """
+        处理"箱"单位转换：数量×包装数量，单价÷包装数量，单位转为"瓶"
+        
+        Args:
+            product: 商品信息字典
+            level1: 一级包装数量
+            level2: 二级包装数量
+            level3: 三级包装数量，可能为None
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        result = product.copy()
+        
+        quantity = result.get('quantity', 0)
+        price = result.get('price', 0)
+        
+        # 计算包装数量（二级*三级，如果无三级则仅二级）
+        packaging_count = level2 * (level3 or 1)
+        
+        # 数量×包装数量
+        new_quantity = quantity * packaging_count
+        
+        # 单价÷包装数量
+        new_price = price / packaging_count if price else 0
+        
+        logger.info(f"箱单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 箱 -> 瓶")
+        
+        result['quantity'] = new_quantity
+        result['price'] = new_price
+        result['unit'] = '瓶'
+        
+        return result
+
+
+class TiHeUnitHandler(UnitHandler):
+    """
+    处理"提"和"盒"单位的转换
+    """
+    
+    def can_handle(self, product: Dict[str, Any]) -> bool:
+        """
+        检查是否可以处理该商品（单位为"提"或"盒"）
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            是否可以处理
+        """
+        unit = str(product.get('unit', '')).strip()
+        return unit in ['提', '盒'] or unit.startswith('提') or unit.startswith('盒')
+    
+    def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
+        """
+        处理"提"和"盒"单位转换：
+        - 如果是三级规格，按件处理（数量×包装数量，单价÷包装数量，单位转为"瓶"）
+        - 如果是二级规格，保持不变
+        
+        Args:
+            product: 商品信息字典
+            level1: 一级包装数量
+            level2: 二级包装数量
+            level3: 三级包装数量，可能为None
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        result = product.copy()
+        
+        quantity = result.get('quantity', 0)
+        price = result.get('price', 0)
+        unit = result.get('unit', '')
+        
+        # 如果是三级规格，按件处理
+        if level3 is not None:
+            # 计算包装数量 - 只乘以最后一级数量
+            packaging_count = level3
+            
+            # 数量×包装数量
+            new_quantity = quantity * packaging_count
+            
+            # 单价÷包装数量
+            new_price = price / packaging_count if price else 0
+            
+            logger.info(f"提/盒单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {unit} -> 瓶")
+            
+            result['quantity'] = new_quantity
+            result['price'] = new_price
+            result['unit'] = '瓶'
+        else:
+            # 如果是二级规格，保持不变
+            logger.info(f"提/盒单位(二级规格)处理: 保持原样 数量: {quantity}, 单价: {price}, 单位: {unit}")
+        
+        return result
+
+
+class GiftUnitHandler(UnitHandler):
+    """
+    处理赠品的特殊情况
+    """
+    
+    def can_handle(self, product: Dict[str, Any]) -> bool:
+        """
+        检查是否可以处理该商品（是否为赠品）
+        
+        Args:
+            product: 商品信息字典
+            
+        Returns:
+            是否可以处理
+        """
+        return product.get('is_gift', False) is True
+    
+    def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
+        """
+        处理赠品的单位转换：
+        - 对于件/箱单位，数量仍然需要转换，但赠品的单价保持为0
+        
+        Args:
+            product: 商品信息字典
+            level1: 一级包装数量
+            level2: 二级包装数量
+            level3: 三级包装数量，可能为None
+            
+        Returns:
+            处理后的商品信息字典
+        """
+        result = product.copy()
+        
+        unit = result.get('unit', '')
+        quantity = result.get('quantity', 0)
+        
+        # 根据单位类型选择适当的包装数计算
+        if unit in ['件', '箱']:
+            # 计算包装数量（二级*三级，如果无三级则仅二级）
+            packaging_count = level2 * (level3 or 1)
+            
+            # 数量×包装数量
+            new_quantity = quantity * packaging_count
+            
+            logger.info(f"赠品{unit}单位处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
+            
+            result['quantity'] = new_quantity
+            result['unit'] = '瓶'
+        elif unit in ['提', '盒'] and level3 is not None:
+            # 对于三级规格的提/盒，类似件处理
+            new_quantity = quantity * level3
+            
+            logger.info(f"赠品{unit}单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
+            
+            result['quantity'] = new_quantity
+            result['unit'] = '瓶'
+        else:
+            # 其他情况保持不变
+            logger.info(f"赠品{unit}单位处理: 保持原样 数量: {quantity}, 单价: 0, 单位: {unit}")
+        
+        # 确保单价为0
+        result['price'] = 0
+        
+        return result 
@@ -0,0 +1,423 @@
+"""
+订单合并模块
+----------
+提供采购单合并功能，将多个采购单合并为一个。
+"""
+
+import os
+import re
+import pandas as pd
+import numpy as np
+import xlrd
+import xlwt
+from xlutils.copy import copy as xlcopy
+from typing import Dict, List, Optional, Tuple, Union, Any, Callable
+from datetime import datetime
+
+from ...config.settings import ConfigManager
+from ..utils.log_utils import get_logger
+from ..handlers.column_mapper import ColumnMapper
+from ..utils.file_utils import (
+    ensure_dir,
+    get_file_extension,
+    get_files_by_extensions,
+    load_json,
+    save_json
+)
+from ..utils.string_utils import (
+    clean_string,
+    clean_barcode,
+    format_barcode
+)
+
+logger = get_logger(__name__)
+
+class PurchaseOrderMerger:
+    """
+    采购单合并器：将多个采购单Excel文件合并成一个文件
+    """
+    
+    def __init__(self, config):
+        """
+        初始化采购单合并器
+        
+        Args:
+            config: 配置信息
+        """
+        self.config = config
+        
+        # 修复ConfigParser对象没有get_path方法的问题
+        try:
+            # 获取输出目录
+            self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
+            
+            # 确保目录存在
+            os.makedirs(self.output_dir, exist_ok=True)
+            
+            # 记录实际路径
+            logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
+            
+            # 获取模板文件路径
+            template_folder = config.get('Paths', 'template_folder', fallback='templates')
+            template_name = config.get('Templates', 'purchase_order', fallback='银豹-采购单模板.xls')
+            
+            self.template_path = os.path.join(template_folder, template_name)
+            
+            # 检查模板文件是否存在
+            if not os.path.exists(self.template_path):
+                logger.warning(f"模板文件不存在: {self.template_path}")
+            
+            # 用于记录已合并的文件
+            self.merged_files_json = os.path.join(self.output_dir, "merged_files.json")
+            self.merged_files = self._load_merged_files()
+            
+            logger.info(f"初始化PurchaseOrderMerger完成，模板文件: {self.template_path}")
+        except Exception as e:
+            logger.error(f"初始化PurchaseOrderMerger失败: {e}")
+            raise
+    
+    def _load_merged_files(self) -> Dict[str, str]:
+        """
+        加载已合并文件的缓存
+        
+        Returns:
+            合并记录字典
+        """
+        return load_json(self.merged_files_json, {})
+        
+    def _save_merged_files(self) -> None:
+        """保存已合并文件的缓存"""
+        save_json(self.merged_files, self.merged_files_json)
+    
+    def get_purchase_orders(self) -> List[str]:
+        """
+        获取result目录下的采购单Excel文件
+        
+        Returns:
+            采购单文件路径列表
+        """
+        # 采购单文件保存在data/result目录
+        result_dir = "data/result"
+        logger.info(f"搜索目录 {result_dir} 中的采购单Excel文件")
+        
+        # 确保目录存在
+        os.makedirs(result_dir, exist_ok=True)
+        
+        # 获取所有Excel文件
+        all_files = get_files_by_extensions(result_dir, ['.xls', '.xlsx'])
+        
+        # 筛选采购单文件
+        purchase_orders = [
+            file for file in all_files 
+            if os.path.basename(file).startswith('采购单_')
+        ]
+        
+        if not purchase_orders:
+            logger.warning(f"未在 {result_dir} 目录下找到采购单Excel文件")
+            return []
+        
+        # 按修改时间排序，最新的在前
+        purchase_orders.sort(key=lambda x: os.path.getmtime(x), reverse=True)
+        
+        logger.info(f"找到 {len(purchase_orders)} 个采购单Excel文件")
+        return purchase_orders
+    
+    def read_purchase_order(self, file_path: str) -> Optional[pd.DataFrame]:
+        """
+        读取采购单Excel文件
+        
+        Args:
+            file_path: 采购单文件路径
+            
+        Returns:
+            数据帧，如果读取失败则返回None
+        """
+        try:
+            # 读取Excel文件
+            df = pd.read_excel(file_path)
+            logger.info(f"成功读取采购单文件: {file_path}")
+            
+            # 打印列名，用于调试
+            logger.debug(f"Excel文件的列名: {df.columns.tolist()}")
+            
+            # 处理特殊情况：检查是否需要读取指定行作为标题行
+            header_row_idx = ColumnMapper.detect_header_row(df, max_rows=5, min_matches=3)
+            if header_row_idx >= 0:
+                logger.info(f"检测到表头在第 {header_row_idx+1} 行")
+
+                # 使用此行作为列名，数据从下一行开始
+                header_row = df.iloc[header_row_idx].astype(str)
+                data_rows = df.iloc[header_row_idx+1:].reset_index(drop=True)
+
+                # 为每一列分配名称（避免重复的列名）
+                new_columns = []
+                for i, col in enumerate(header_row):
+                    col_str = str(col)
+                    if col_str == 'nan' or col_str == 'None' or pd.isna(col):
+                        new_columns.append(f"Col_{i}")
+                    else:
+                        new_columns.append(col_str)
+
+                # 使用新列名创建新的DataFrame
+                data_rows.columns = new_columns
+                df = data_rows
+                logger.debug(f"重新构建的数据帧列名: {df.columns.tolist()}")
+
+            # 使用 ColumnMapper 统一查找列名（保留中文键名以兼容下游代码）
+            all_columns = df.columns.tolist()
+            logger.info(f"列名: {all_columns}")
+
+            standard_to_chinese = {
+                'barcode': '条码',
+                'quantity': '采购量',
+                'unit_price': '采购单价',
+                'gift_quantity': '赠送量',
+            }
+
+            mapped_columns = {}
+            for std_name, chinese_name in standard_to_chinese.items():
+                matched = ColumnMapper.find_column(all_columns, std_name)
+                if matched:
+                    mapped_columns[chinese_name] = matched
+                    logger.info(f"列名映射: {matched} -> {chinese_name}")
+
+            # 如果找到了必要的列，重命名列
+            if mapped_columns:
+                rename_dict = {mapped_columns[key]: key for key in mapped_columns}
+                logger.info(f"列名重命名映射: {rename_dict}")
+                df = df.rename(columns=rename_dict)
+                logger.info(f"重命名后的列名: {df.columns.tolist()}")
+            else:
+                logger.warning(f"未找到可映射的列名: {file_path}")
+            
+            return df
+            
+        except Exception as e:
+            logger.error(f"读取采购单文件失败: {file_path}, 错误: {str(e)}")
+            return None
+    
+    def merge_purchase_orders(self, file_paths: List[str]) -> Optional[pd.DataFrame]:
+        """
+        合并多个采购单文件
+        
+        Args:
+            file_paths: 采购单文件路径列表
+            
+        Returns:
+            合并后的数据帧，如果合并失败则返回None
+        """
+        if not file_paths:
+            logger.warning("没有需要合并的采购单文件")
+            return None
+        
+        # 读取所有采购单文件
+        dfs = []
+        for file_path in file_paths:
+            df = self.read_purchase_order(file_path)
+            if df is not None:
+                dfs.append(df)
+        
+        if not dfs:
+            logger.warning("没有成功读取的采购单文件")
+            return None
+        
+        # 合并数据
+        logger.info(f"开始合并 {len(dfs)} 个采购单文件")
+        
+        # 首先，整理每个数据帧以确保它们有相同的结构
+        processed_dfs = []
+        for i, df in enumerate(dfs):
+            # 确保必要的列存在
+            required_columns = ['条码', '采购量', '采购单价']
+            missing_columns = [col for col in required_columns if col not in df.columns]
+            
+            if missing_columns:
+                logger.warning(f"数据帧 {i} 缺少必要的列: {missing_columns}")
+                continue
+            
+            # 处理赠送量列不存在的情况
+            if '赠送量' not in df.columns:
+                df['赠送量'] = 0
+            
+            # 选择并清理需要的列
+            cleaned_df = pd.DataFrame()
+            
+            # 清理条码 - 确保是字符串且无小数点
+            cleaned_df['条码'] = df['条码'].apply(lambda x: format_barcode(x) if pd.notna(x) else '')
+            
+            # 清理采购量 - 确保是数字
+            cleaned_df['采购量'] = pd.to_numeric(df['采购量'], errors='coerce').fillna(0)
+            
+            # 清理单价 - 确保是数字并保留4位小数
+            cleaned_df['采购单价'] = pd.to_numeric(df['采购单价'], errors='coerce').fillna(0).round(4)
+            
+            # 清理赠送量 - 确保是数字
+            cleaned_df['赠送量'] = pd.to_numeric(df['赠送量'], errors='coerce').fillna(0)
+            
+            # 过滤无效行 - 条码为空或采购量为0的行跳过
+            valid_df = cleaned_df[(cleaned_df['条码'] != '') & (cleaned_df['采购量'] > 0)]
+            
+            if len(valid_df) > 0:
+                processed_dfs.append(valid_df)
+                logger.info(f"处理文件 {i+1}: 有效记录 {len(valid_df)} 行")
+            else:
+                logger.warning(f"处理文件 {i+1}: 没有有效记录")
+        
+        if not processed_dfs:
+            logger.warning("没有有效的数据帧用于合并")
+            return None
+        
+        # 将所有数据帧合并
+        merged_df = pd.concat(processed_dfs, ignore_index=True)
+        
+        # 按条码和单价分组，合并相同商品
+        # 四舍五入到4位小数，避免浮点误差导致相同价格被当作不同价格
+        merged_df['采购单价'] = merged_df['采购单价'].round(4)  
+        
+        # 对于同一条码和单价的商品，合并数量和赠送量
+        result = merged_df.groupby(['条码', '采购单价'], as_index=False).agg({
+            '采购量': 'sum',
+            '赠送量': 'sum'
+        })
+        
+        # 排序，按条码升序
+        result = result.sort_values('条码').reset_index(drop=True)
+        
+        # 设置为0的赠送量设为空
+        result.loc[result['赠送量'] == 0, '赠送量'] = pd.NA
+        
+        logger.info(f"合并完成，共 {len(result)} 条商品记录")
+        return result
+    
+    def create_merged_purchase_order(self, df: pd.DataFrame) -> Optional[str]:
+        """
+        创建合并的采购单文件，完全按照银豹格式要求
+        
+        Args:
+            df: 合并后的数据帧
+            
+        Returns:
+            输出文件路径，如果创建失败则返回None
+        """
+        try:
+            # 打开模板文件
+            template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
+            template_sheet = template_workbook.sheet_by_index(0)
+            
+            # 首先分析模板结构，确定关键列的位置
+            logger.info(f"分析模板结构")
+            for i in range(min(5, template_sheet.nrows)):
+                row_values = [str(cell.value).strip() for cell in template_sheet.row(i)]
+                logger.debug(f"模板第{i+1}行: {row_values}")
+            
+            # 银豹模板的标准列位置：
+            # 条码列(商品条码): B列(索引1)
+            barcode_col = 1
+            # 采购量列: C列(索引2)
+            quantity_col = 2 
+            # 赠送量列: D列(索引3)
+            gift_col = 3
+            # 采购单价列: E列(索引4)
+            price_col = 4
+            
+            # 找到数据开始行 - 通常是第二行(索引1)
+            data_start_row = 1
+            
+            # 创建可写的副本
+            output_workbook = xlcopy(template_workbook)
+            output_sheet = output_workbook.get_sheet(0)
+            
+            # 设置单价的格式样式（保留4位小数）
+            price_style = xlwt.XFStyle()
+            price_style.num_format_str = '0.0000'
+            
+            # 数量格式
+            quantity_style = xlwt.XFStyle()
+            quantity_style.num_format_str = '0'
+            
+            # 遍历数据并填充到Excel
+            for i, (_, row) in enumerate(df.iterrows()):
+                r = data_start_row + i
+                
+                # 只填充银豹采购单格式要求的4个列：条码、采购量、赠送量、采购单价
+                
+                # 条码（必填）- B列(1)
+                output_sheet.write(r, barcode_col, row['条码'])
+                
+                # 采购量（必填）- C列(2)
+                output_sheet.write(r, quantity_col, float(row['采购量']), quantity_style)
+                
+                # 赠送量 - D列(3)
+                if pd.notna(row['赠送量']) and float(row['赠送量']) > 0:
+                    output_sheet.write(r, gift_col, float(row['赠送量']), quantity_style)
+                
+                # 采购单价（必填）- E列(4)
+                output_sheet.write(r, price_col, float(row['采购单价']), price_style)
+            
+            # 生成输出文件名，保存到data/result目录
+            timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+            result_dir = "data/result"
+            os.makedirs(result_dir, exist_ok=True)
+            output_file = os.path.join(result_dir, f"合并采购单_{timestamp}.xls")
+            
+            # 保存文件
+            output_workbook.save(output_file)
+            logger.info(f"合并采购单已保存到: {output_file}，共{len(df)}条记录")
+            return output_file
+            
+        except Exception as e:
+            logger.error(f"创建合并采购单时出错: {e}")
+            return None
+    
+    def process(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
+        """
+        处理采购单合并
+        
+        Args:
+            file_paths: 指定要合并的文件路径列表，如果为None则自动获取
+            
+        Returns:
+            合并后的文件路径，如果合并失败则返回None
+        """
+        # 如果未指定文件路径，则获取所有采购单文件
+        if file_paths is None:
+            file_paths = self.get_purchase_orders()
+            try:
+                if progress_cb:
+                    progress_cb(97)
+            except Exception:
+                pass
+        
+        # 检查是否有文件需要合并
+        if not file_paths:
+            logger.warning("没有找到可合并的采购单文件")
+            return None
+        
+        # 合并采购单
+        merged_df = self.merge_purchase_orders(file_paths)
+        if merged_df is None:
+            logger.error("合并采购单失败")
+            return None
+        try:
+            if progress_cb:
+                progress_cb(98)
+        except Exception:
+            pass
+        
+        # 创建合并的采购单文件
+        output_file = self.create_merged_purchase_order(merged_df)
+        if output_file is None:
+            logger.error("创建合并采购单文件失败")
+            return None
+        try:
+            if progress_cb:
+                progress_cb(100)
+        except Exception:
+            pass
+        
+        # 记录已合并文件
+        for file_path in file_paths:
+            self.merged_files[file_path] = output_file
+        self._save_merged_files()
+        
+        return output_file
@@ -0,0 +1,860 @@
+"""
+Excel处理核心模块
+--------------
+提供Excel文件处理功能，包括表格解析、数据提取和处理。
+"""
+
+import os
+import re
+import pandas as pd
+import numpy as np
+import xlrd
+import xlwt
+from xlutils.copy import copy as xlcopy
+from typing import Dict, List, Optional, Tuple, Union, Any, Callable
+from datetime import datetime
+
+from ...config.settings import ConfigManager
+from ..utils.log_utils import get_logger
+from ..utils.file_utils import (
+    ensure_dir,
+    get_file_extension,
+    get_latest_file,
+    load_json,
+    save_json
+)
+from ..utils.string_utils import (
+    clean_string,
+    extract_number,
+    format_barcode,
+    parse_monetary_string
+)
+from .converter import UnitConverter
+from ..handlers.column_mapper import ColumnMapper
+
+logger = get_logger(__name__)
+
+class ExcelProcessor:
+    """
+    Excel处理器：处理OCR识别后的Excel文件，
+    提取条码、单价和数量，并按照采购单模板的格式填充
+    """
+    
+    def __init__(self, config):
+        """
+        初始化Excel处理器
+        
+        Args:
+            config: 配置信息
+        """
+        self.config = config
+        
+        # 修复ConfigParser对象没有get_path方法的问题
+        try:
+            # 获取输入和输出目录
+            self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
+            self.temp_dir = config.get('Paths', 'temp_folder', fallback='data/temp')
+            
+            # 获取模板文件路径
+            self.template_path = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
+            if not os.path.exists(self.template_path):
+                logger.warning(f"模板文件不存在: {self.template_path}")
+            
+            # 设置缓存文件路径
+            self.cache_file = os.path.join(self.output_dir, "processed_files.json")
+            self.processed_files = self._load_processed_files()
+            
+            # 确保目录存在
+            os.makedirs(self.output_dir, exist_ok=True)
+            os.makedirs(self.temp_dir, exist_ok=True)
+            
+            # 记录实际路径
+            logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
+            logger.info(f"使用临时目录: {os.path.abspath(self.temp_dir)}")
+            
+            # 加载单位转换器和配置
+            self.unit_converter = UnitConverter()
+            logger.info(f"初始化ExcelProcessor完成，模板文件: {self.template_path}")
+        except Exception as e:
+            logger.error(f"初始化ExcelProcessor失败: {e}")
+            raise
+    
+    def _load_processed_files(self) -> Dict[str, str]:
+        """
+        加载已处理文件的缓存
+        
+        Returns:
+            处理记录字典
+        """
+        return load_json(self.cache_file, {})
+        
+    def _save_processed_files(self) -> None:
+        """保存已处理文件的缓存"""
+        save_json(self.processed_files, self.cache_file)
+    
+    def get_latest_excel(self) -> Optional[str]:
+        """
+        获取output目录下最新的Excel文件（排除采购单文件）
+        
+        Returns:
+            最新Excel文件的路径，如果未找到则返回None
+        """
+        logger.info(f"搜索目录 {self.output_dir} 中的Excel文件")
+        
+        # 使用文件工具获取最新文件
+        latest_file = get_latest_file(
+            self.output_dir,
+            pattern="",  # 不限制文件名
+            extensions=['.xlsx', '.xls']  # 限制为Excel文件
+        )
+        
+        # 如果没有找到文件
+        if not latest_file:
+            logger.warning(f"未在 {self.output_dir} 目录下找到未处理的Excel文件")
+            return None
+        
+        # 检查是否是采购单（以"采购单_"开头的文件）
+        file_name = os.path.basename(latest_file)
+        if file_name.startswith('采购单_'):
+            logger.warning(f"找到的最新文件是采购单，不作处理: {latest_file}")
+            return None
+        
+        logger.info(f"找到最新的Excel文件: {latest_file}")
+        return latest_file
+    
+    def extract_barcode(self, df: pd.DataFrame) -> List[str]:
+        """
+        从数据帧中提取条码列名
+        
+        Args:
+            df: 数据帧
+            
+        Returns:
+            可能的条码列名列表
+        """
+        possible_barcode_columns = ColumnMapper.STANDARD_COLUMNS['barcode']
+        
+        found_columns = []
+        
+        # 检查精确匹配
+        for col in df.columns:
+            col_str = str(col).strip()
+            if col_str in possible_barcode_columns:
+                found_columns.append(col)
+                logger.info(f"找到精确匹配的条码列: {col_str}")
+        
+        # 如果找不到精确匹配，尝试部分匹配
+        if not found_columns:
+            for col in df.columns:
+                col_str = str(col).strip().lower()
+                for keyword in ['条码', '条形码', 'barcode', '编码']:
+                    if keyword.lower() in col_str:
+                        found_columns.append(col)
+                        logger.info(f"找到部分匹配的条码列: {col} (包含关键词: {keyword})")
+                        break
+        
+        # 如果仍然找不到，尝试使用数据特征识别
+        if not found_columns and len(df) > 0:
+            for col in df.columns:
+                # 检查此列数据是否符合条码特征
+                sample_values = df[col].dropna().astype(str).tolist()[:10]  # 取前10个非空值
+                
+                if sample_values and all(len(val) >= 8 and len(val) <= 14 for val in sample_values):
+                    # 大多数条码长度在8-14之间
+                    if all(val.isdigit() for val in sample_values):
+                        found_columns.append(col)
+                        logger.info(f"基于数据特征识别的可能条码列: {col}")
+        
+        return found_columns
+    
+    def extract_product_info(self, df: pd.DataFrame) -> List[Dict]:
+        """
+        从数据帧中提取商品信息
+        
+        Args:
+            df: 数据帧
+            
+        Returns:
+            商品信息列表
+        """
+        products = []
+        
+        # 检测列映射
+        column_mapping = self._detect_column_mapping(df)
+        logger.info(f"检测到列映射: {column_mapping}")
+        
+        # 处理每一行
+        for idx, row in df.iterrows():
+            try:
+                # 初始化商品信息
+                product = {
+                    'barcode': '',     # 条码
+                    'name': '',        # 商品名称
+                    'specification': '',  # 规格
+                    'quantity': 0,     # 数量
+                    'unit': '',        # 单位
+                    'price': 0,        # 单价
+                    'amount': 0,       # 金额
+                    'is_gift': False   # 是否为赠品
+                }
+                
+                # 提取条码
+                if '条码' in df.columns and not pd.isna(row['条码']):
+                    product['barcode'] = str(row['条码']).strip()
+                elif column_mapping.get('barcode') and not pd.isna(row[column_mapping['barcode']]):
+                    product['barcode'] = str(row[column_mapping['barcode']]).strip()
+                
+                # 跳过空条码行
+                if not product['barcode']:
+                    continue
+                
+                # 检查备注列，过滤换货、退货、作废等非采购行
+                skip_row = False
+                for col in df.columns:
+                    col_str = str(col)
+                    if any(k in col_str for k in ['备注', '说明', '类型', '备注1']):
+                        val = str(row[col]).strip()
+                        # 过滤常见的非采购关键字
+                        if any(k in val for k in ['换货', '退货', '作废', '减钱', '冲减', '赠品单', '补货']):
+                            logger.info(f"过滤非采购行: {product['barcode']} - {product.get('name', '')}, 原因: {col_str}包含 '{val}'")
+                            skip_row = True
+                            break
+                if skip_row:
+                    continue
+                    
+                # 提取商品名称
+                if '商品名称' in df.columns and not pd.isna(row['商品名称']):
+                    product['name'] = str(row['商品名称']).strip()
+                elif '名称' in df.columns and not pd.isna(row['名称']):
+                    product['name'] = str(row['名称']).strip()
+                elif column_mapping.get('name') and not pd.isna(row[column_mapping['name']]):
+                    product['name'] = str(row[column_mapping['name']]).strip()
+                    
+                # 提取单位
+                if '单位' in df.columns and not pd.isna(row['单位']):
+                    product['unit'] = str(row['单位']).strip()
+                elif column_mapping.get('unit') and not pd.isna(row[column_mapping['unit']]):
+                    product['unit'] = str(row[column_mapping['unit']]).strip()
+                
+                # 提取单价
+                if '单价' in df.columns and not pd.isna(row['单价']):
+                    product['price'] = row['单价']
+                elif column_mapping.get('price') and not pd.isna(row[column_mapping['price']]):
+                    product['price'] = row[column_mapping['price']]
+                
+                # 提取金额
+                if '金额' in df.columns and not pd.isna(row['金额']):
+                    product['amount'] = row['金额']
+                elif '小计' in df.columns and not pd.isna(row['小计']):
+                    product['amount'] = row['小计']
+                elif column_mapping.get('amount') and not pd.isna(row[column_mapping['amount']]):
+                    product['amount'] = row[column_mapping['amount']]
+                # 根据金额判断赠品：金额为0、为空、或为o/O
+                amt = product.get('amount', None)
+                try:
+                    is_amt_gift = False
+                    if amt is None:
+                        is_amt_gift = True
+                    elif isinstance(amt, str):
+                        parsed = parse_monetary_string(amt)
+                        is_amt_gift = (parsed is None or parsed == 0.0)
+                    else:
+                        parsed = parse_monetary_string(amt)
+                        is_amt_gift = (parsed is not None and parsed == 0.0)
+                    if is_amt_gift:
+                        product['is_gift'] = True
+                except Exception:
+                    pass
+                
+                # 提取数量
+                if '数量' in df.columns and not pd.isna(row['数量']):
+                    product['quantity'] = row['数量']
+                elif column_mapping.get('quantity') and not pd.isna(row[column_mapping['quantity']]):
+                    product['quantity'] = row[column_mapping['quantity']]
+                
+                # 处理可能的复合数量字段，例如"2箱"、"3件"
+                if isinstance(product['quantity'], str) and product['quantity']:
+                    num, unit = self.unit_converter.extract_unit_from_quantity(product['quantity'])
+                    if unit:
+                        product['unit'] = unit
+                        if num is not None:
+                            product['quantity'] = num
+                
+                # 提取规格并解析包装数量
+                if '规格' in df.columns and not pd.isna(row['规格']):
+                    product['specification'] = str(row['规格'])
+                    # 修正OCR误识别的4.51*4为4.5L*4
+                    product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
+                    package_quantity = self.parse_specification(product['specification'])
+                    if package_quantity:
+                        product['package_quantity'] = package_quantity
+                        logger.info(f"解析规格: {product['specification']} -> 包装数量={package_quantity}")
+                elif column_mapping.get('specification') and not pd.isna(row[column_mapping['specification']]):
+                    product['specification'] = str(row[column_mapping['specification']])
+                    # 修正OCR误识别的4.51*4为4.5L*4
+                    product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
+                    package_quantity = self.parse_specification(product['specification'])
+                    if package_quantity:
+                        product['package_quantity'] = package_quantity
+                        logger.info(f"从映射列解析规格: {product['specification']} -> 包装数量={package_quantity}")
+                else:
+                    # 只有在无法从Excel获取规格时，才尝试从商品名称推断规格
+                    if product['name']:
+                        # 特殊处理：优先检查名称中是否包含"容量*数量"格式
+                        container_pattern = r'.*?(\d+(?:\.\d+)?)\s*(?:ml|[mM][lL]|[lL]|升|毫升)[*×xX](\d+).*'
+                        match = re.search(container_pattern, product['name'])
+                        if match:
+                            # 容量单位*数量格式，如"1.8L*8瓶"，取数量部分作为包装数量
+                            volume = match.group(1)
+                            count = match.group(2)
+                            inferred_spec = f"{volume}L*{count}"
+                            inferred_qty = int(count)
+                            product['specification'] = inferred_spec
+                            product['package_quantity'] = inferred_qty
+                            logger.info(f"从商品名称提取容量*数量格式: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
+                        # 原来的重量/容量*数字格式处理逻辑
+                        else:
+                            weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
+                            match = re.search(weight_volume_pattern, product['name'])
+                            if match:
+                                inferred_spec = f"1*{match.group(1)}"
+                                inferred_qty = int(match.group(1))
+                                product['specification'] = inferred_spec
+                                product['package_quantity'] = inferred_qty
+                                logger.info(f"从商品名称提取重量/容量规格: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
+                            else:
+                                # 一般情况的规格推断
+                                inferred_spec = self.unit_converter.infer_specification_from_name(product['name'])
+                                if inferred_spec:
+                                    product['specification'] = inferred_spec
+                                    package_quantity = self.parse_specification(inferred_spec)
+                                    if package_quantity:
+                                        product['package_quantity'] = package_quantity
+                                    logger.info(f"从商品名称推断规格: {product['name']} -> {inferred_spec}, 包装数量={package_quantity}")
+                
+                # 检查已设置的规格但未设置包装数量的情况
+                if product.get('specification') and not product.get('package_quantity'):
+                    package_quantity = self.parse_specification(product['specification'])
+                    if package_quantity:
+                        product['package_quantity'] = package_quantity
+                        logger.info(f"解析已设置的规格: {product['specification']} -> 包装数量={package_quantity}")
+                
+                # 新增逻辑：根据规格推断单位为"件"
+                if not product['unit'] and product.get('barcode') and product.get('specification') and product.get('quantity') and product.get('price') is not None:
+                    # 检查规格是否符合容量*数量格式
+                    volume_pattern = r'(\d+(?:\.\d+)?)\s*(?:ml|[mL]L|l|L|升|毫升)[*×xX](\d+)'
+                    match = re.search(volume_pattern, product['specification'])
+                    
+                    # 判断是否需要推断单位为"件"
+                    if match:
+                        product['unit'] = '件'
+                        logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
+                    else:
+                        # 检查简单的数量*数量格式
+                        simple_pattern = r'(\d+)[*×xX](\d+)'
+                        match = re.search(simple_pattern, product['specification'])
+                        if match:
+                            product['unit'] = '件'
+                            logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
+                
+                # 应用单位转换规则
+                product = self.unit_converter.process_unit_conversion(product)
+                
+                # 如果数量为0但单价和金额都存在，计算数量 = 金额/单价
+                if (product['quantity'] == 0 or product['quantity'] is None) and product['price'] > 0 and product['amount']:
+                    try:
+                        amount = parse_monetary_string(product['amount'])
+                        if amount is not None and amount > 0:
+                            quantity = amount / product['price']
+                            logger.info(f"数量为空或为0，通过金额({amount})和单价({product['price']})计算得出数量: {quantity}")
+                            product['quantity'] = quantity
+                    except Exception as e:
+                        logger.warning(f"通过金额和单价计算数量失败: {e}")
+                
+                products.append(product)
+            except Exception as e:
+                logger.error(f"提取第{idx+1}行商品信息时出错: {e}", exc_info=True)
+                continue
+                
+        logger.info(f"提取到 {len(products)} 个商品信息")
+        return products
+    
+    def fill_template(self, products: List[Dict], output_file_path: str) -> bool:
+        """
+        填充采购单模板
+        
+        Args:
+            products: 商品信息列表
+            output_file_path: 输出文件路径
+            
+        Returns:
+            是否成功填充
+        """
+        try:
+            # 打开模板文件
+            template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
+            template_sheet = template_workbook.sheet_by_index(0)
+            
+            # 创建可写的副本
+            output_workbook = xlcopy(template_workbook)
+            output_sheet = output_workbook.get_sheet(0)
+            
+            # 先对产品按条码分组，区分正常商品和赠品
+            barcode_groups = {}
+            
+            # 遍历所有产品，按条码分组
+            logger.info(f"开始处理{len(products)} 个产品信息")
+            for product in products:
+                barcode = product.get('barcode', '')
+                # 确保条码是整数字符串
+                barcode = format_barcode(barcode)
+                
+                if not barcode:
+                    logger.warning(f"跳过无条码商品")
+                    continue
+                
+                # 获取数量和单价
+                quantity = product.get('quantity', 0)
+                price = product.get('price', 0)
+                amount = product.get('amount', 0)
+                
+                # 如果数量为0但单价和金额都存在，计算数量 = 金额/单价
+                if (quantity == 0 or quantity is None) and price > 0 and amount:
+                    try:
+                        amount = parse_monetary_string(amount)
+                        if amount is not None and amount > 0:
+                            quantity = amount / price
+                            logger.info(f"数量为空或为0，通过金额({amount})和单价({price})计算得出数量: {quantity}")
+                            product['quantity'] = quantity
+                    except Exception as e:
+                        logger.warning(f"通过金额和单价计算数量失败: {e}")
+                
+                # 判断是否为赠品（价格为0）
+                is_gift = bool(product.get('is_gift', False)) or (price == 0)
+                
+                logger.info(f"处理商品: 条码={barcode}, 数量={quantity}, 单价={price}, 是否赠品={is_gift}")
+                
+                if barcode not in barcode_groups:
+                    barcode_groups[barcode] = {
+                        'normal': None,  # 正常商品信息
+                        'gift_quantity': 0  # 赠品数量
+                    }
+                
+                if is_gift:
+                    # 是赠品，累加赠品数量
+                    barcode_groups[barcode]['gift_quantity'] += quantity
+                    logger.info(f"发现赠品：条码{barcode}, 数量={quantity}")
+                else:
+                    # 是正常商品
+                    if barcode_groups[barcode]['normal'] is None:
+                        barcode_groups[barcode]['normal'] = {
+                            'product': product,
+                            'quantity': quantity,
+                            'price': price
+                        }
+                        logger.info(f"发现正常商品：条码{barcode}, 数量={quantity}, 单价={price}")
+                    else:
+                        # 如果有多个正常商品记录，累加数量
+                        barcode_groups[barcode]['normal']['quantity'] += quantity
+                        logger.info(f"累加正常商品数量：条码{barcode}, 新增={quantity}, 累计={barcode_groups[barcode]['normal']['quantity']}")
+                        
+                        # 如果单价不同，取平均值
+                        if price != barcode_groups[barcode]['normal']['price']:
+                            avg_price = (barcode_groups[barcode]['normal']['price'] + price) / 2
+                            barcode_groups[barcode]['normal']['price'] = avg_price
+                            logger.info(f"调整单价(取平均值)：条码{barcode}, 原价={barcode_groups[barcode]['normal']['price']}, 新价={price}, 平均={avg_price}")
+            
+            # 输出调试信息
+            logger.info(f"分组后共{len(barcode_groups)} 个不同条码的商品")
+            for barcode, group in barcode_groups.items():
+                if group['normal'] is not None:
+                    logger.info(f"条码 {barcode} 处理结果：正常商品数量{group['normal']['quantity']}，单价{group['normal']['price']}，赠品数量{group['gift_quantity']}")
+                else:
+                    logger.info(f"条码 {barcode} 处理结果：只有赠品，数量={group['gift_quantity']}")
+            
+            # 准备填充数据
+            row_index = 1  # 从第2行开始填充（索引从0开始）
+            
+            for barcode, group in barcode_groups.items():
+                # 1. 列B(1): 条码（必填）
+                output_sheet.write(row_index, 1, barcode)
+                
+                if group['normal'] is not None:
+                    # 有正常商品
+                    product = group['normal']['product']
+                    
+                    # 2. 列C(2): 采购量（必填） 使用正常商品的采购量
+                    normal_quantity = group['normal']['quantity']
+                    output_sheet.write(row_index, 2, normal_quantity)
+                    
+                    # 3. 列D(3): 赠送量 - 添加赠品数量
+                    if group['gift_quantity'] > 0:
+                        output_sheet.write(row_index, 3, group['gift_quantity'])
+                        logger.info(f"条码 {barcode} 填充：采购量={normal_quantity}，赠品数量{group['gift_quantity']}")
+                    
+                    # 4. 列E(4): 采购单价（必填）
+                    purchase_price = group['normal']['price']
+                    style = xlwt.XFStyle()
+                    style.num_format_str = '0.0000'
+                    output_sheet.write(row_index, 4, round(purchase_price, 4), style)
+                else:
+                    # 只有赠品，没有正常商品
+                    # 采购量填0，赠送量填赠品数量
+                    output_sheet.write(row_index, 2, 0)  # 采购量为0
+                    output_sheet.write(row_index, 3, group['gift_quantity'])  # 赠送量
+                    output_sheet.write(row_index, 4, 0)  # 单价为0
+                    
+                    logger.info(f"条码 {barcode} 填充：仅有赠品，采购量=0，赠品数量={group['gift_quantity']}")
+                
+                # 移到下一行
+                row_index += 1
+            
+            # 保存文件
+            output_workbook.save(output_file_path)
+            logger.info(f"采购单已保存到: {output_file_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"填充模板时出错: {e}")
+            return False
+    
+    def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
+        """自动识别表头行，委托给 ColumnMapper.detect_header_row"""
+        result = ColumnMapper.detect_header_row(df, max_rows=30)
+        if result >= 0:
+            logger.info(f"找到表头行: 第{result+1}行")
+            return result
+        # 回退：找第一个非空行
+        for row in range(len(df)):
+            if df.iloc[row].notna().sum() > 3:
+                logger.info(f"未找到明确表头，使用第一个有效行: 第{row+1}行")
+                return row
+        logger.warning("无法识别表头行")
+        return None
+    
+    def process_specific_file(self, file_path: str, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
+        """
+        处理指定的Excel文件
+        
+        Args:
+            file_path: Excel文件路径
+            
+        Returns:
+            输出文件路径，如果处理失败则返回None
+        """
+        logger.info(f"开始处理Excel文件: {file_path}")
+        
+        if not os.path.exists(file_path):
+            logger.error(f"文件不存在: {file_path}")
+            return None
+        
+        try:
+            # 读取Excel文件时不立即指定表头
+            if progress_cb:
+                try:
+                    progress_cb(92)
+                except Exception:
+                    pass
+            df = pd.read_excel(file_path, header=None)
+            logger.info(f"成功读取Excel文件: {file_path}, 共 {len(df)} 行")
+            
+            # 自动识别表头行
+            header_row = self._find_header_row(df)
+            if header_row is None:
+                logger.error("无法识别表头行")
+                return None
+                
+            logger.info(f"识别到表头在第 {header_row+1} 行")
+            
+            # 重新设置表头，避免二次读取
+            if progress_cb:
+                try:
+                    progress_cb(94)
+                except Exception:
+                    pass
+            
+            # 使用识别到的表头行设置列名，并过滤掉表头之前的行
+            df.columns = df.iloc[header_row]
+            df = df.iloc[header_row + 1:].reset_index(drop=True)
+            
+            logger.info(f"重新整理数据结构，共 {len(df)} 行有效数据")
+            
+            # 提取商品信息
+            if progress_cb:
+                try:
+                    progress_cb(96)
+                except Exception:
+                    pass
+            products = self.extract_product_info(df)
+            
+            if not products:
+                logger.warning("未提取到有效商品信息")
+                return None
+            
+            # 生成输出文件名，保存到data/result目录
+            file_name = os.path.splitext(os.path.basename(file_path))[0]
+            result_dir = "data/result"
+            os.makedirs(result_dir, exist_ok=True)
+            output_file = os.path.join(result_dir, f"采购单_{file_name}.xls")
+            
+            # 填充模板并保存
+            if self.fill_template(products, output_file):
+                # 记录已处理文件
+                self.processed_files[file_path] = output_file
+                self._save_processed_files()
+                
+                # 不再自动打开输出目录
+                logger.info(f"采购单已保存到: {output_file}")
+                if progress_cb:
+                    try:
+                        progress_cb(100)
+                    except Exception:
+                        pass
+                
+                return output_file
+            
+            return None
+            
+        except Exception as e:
+            logger.error(f"处理Excel文件时出错: {file_path}, 错误: {e}")
+            return None
+    
+    def process_latest_file(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
+        """
+        处理最新的Excel文件
+        
+        Returns:
+            输出文件路径，如果处理失败则返回None
+        """
+        # 获取最新的Excel文件
+        latest_file = self.get_latest_excel()
+        if not latest_file:
+            logger.warning("未找到可处理的Excel文件")
+            return None
+        
+        # 处理文件
+        return self.process_specific_file(latest_file, progress_cb=progress_cb)
+    
+    def _detect_column_mapping(self, df: pd.DataFrame) -> Dict[str, str]:
+        """
+        自动检测列名映射
+        
+        Args:
+            df: 数据框
+            
+        Returns:
+            列名映射字典，键为标准列名，值为实际列名
+        """
+        # 提取有用的列
+        barcode_cols = self.extract_barcode(df)
+
+        # 如果没有找到条码列，无法继续处理
+        if not barcode_cols:
+            logger.error("未找到条码列，无法处理")
+            return {}
+
+        # 使用 ColumnMapper 统一查找列名
+        mapped_columns = {'barcode': barcode_cols[0]}
+        logger.info(f"使用条码列: {mapped_columns['barcode']}")
+
+        # 内部键名 -> 标准列名映射 (processor.py 使用 price/amount 作为内部键名)
+        field_map = [
+            ('name', 'name'),
+            ('specification', 'specification'),
+            ('quantity', 'quantity'),
+            ('unit', 'unit'),
+            ('price', 'unit_price'),
+            ('amount', 'total_price'),
+        ]
+
+        for internal_key, standard_name in field_map:
+            matched = ColumnMapper.find_column(list(df.columns), standard_name)
+            if matched:
+                mapped_columns[internal_key] = matched
+                logger.info(f"找到{internal_key}列: {matched}")
+
+        return mapped_columns
+    
+    def infer_specification_from_name(self, product_name: str) -> Tuple[Optional[str], Optional[int]]:
+        """
+        从商品名称推断规格
+        根据特定的命名规则匹配规格信息
+        
+        Args:
+            product_name: 商品名称
+            
+        Returns:
+            规格字符串和包装数量的元组
+        """
+        if not product_name or not isinstance(product_name, str):
+            logger.warning(f"无效的商品名: {product_name}")
+            return None, None
+            
+        product_name = product_name.strip()
+        
+        # 特殊处理：重量/容量*数字格式
+        weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
+        match = re.search(weight_volume_pattern, product_name)
+        if match:
+            inferred_spec = f"1*{match.group(1)}"
+            inferred_qty = int(match.group(1))
+            logger.info(f"从商品名称提取重量/容量规格: {product_name} -> {inferred_spec}, 包装数量={inferred_qty}")
+            return inferred_spec, inferred_qty
+        
+        # 使用单位转换器推断规格
+        inferred_spec = self.unit_converter.infer_specification_from_name(product_name)
+        if inferred_spec:
+            # 解析规格中的包装数量
+            package_quantity = self.parse_specification(inferred_spec)
+            if package_quantity:
+                logger.info(f"从商品名称推断规格: {product_name} -> {inferred_spec}, 包装数量={package_quantity}")
+                return inferred_spec, package_quantity
+        
+        # 特定商品规则匹配
+        spec_rules = [
+            # XX入白膜格式，如"550纯净水24入白膜"
+            (r'.*?(\d+)入白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 白膜格式，如"550水24白膜"
+            (r'.*?(\d+)白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 445水溶C系列
+            (r'445水溶C.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 东方树叶系列
+            (r'东方树叶.*?(\d+\*\d+).*纸箱', lambda m: (m.group(1), int(m.group(1).split('*')[1]))),
+            
+            # 桶装
+            (r'(\d+\.?\d*L)桶装', lambda m: (f"{m.group(1)}*1", 1)),
+            
+            # 树叶茶系
+            (r'树叶.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 茶π系列
+            (r'茶[πΠπ].*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 通用入数匹配
+            (r'.*?(\d+)[入个](?:纸箱|箱装|白膜)', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
+            
+            # 通用数字+纸箱格式
+            (r'.*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1))))
+        ]
+        
+        # 尝试所有规则
+        for pattern, formatter in spec_rules:
+            match = re.search(pattern, product_name)
+            if match:
+                spec, qty = formatter(match)
+                logger.info(f"根据特定规则推断规格: {product_name} -> {spec}, 包装数量={qty}")
+                return spec, qty
+        
+        # 尝试直接从名称中提取数字*数字格式
+        match = re.search(r'(\d+\*\d+)', product_name)
+        if match:
+            spec = match.group(1)
+            package_quantity = self.parse_specification(spec)
+            if package_quantity:
+                logger.info(f"从名称中直接提取规格: {spec}, 包装数量={package_quantity}")
+                return spec, package_quantity
+        
+        # 最后尝试提取任何位置的数字，默认典型件装数
+        numbers = re.findall(r'\d+', product_name)
+        if numbers:
+            for num in numbers:
+                # 检查是否为典型的件装数(12/15/24/30)
+                if num in ['12', '15', '24', '30']:
+                    spec = f"1*{num}"
+                    logger.info(f"从名称中提取可能的件装数: {spec}, 包装数量={int(num)}")
+                    return spec, int(num)
+            
+        logger.warning(f"无法从商品名'{product_name}' 推断规格")
+        return None, None 
+    
+    def parse_specification(self, spec_str: str) -> Optional[int]:
+        """
+        解析规格字符串，提取包装数量
+        支持格式：1*15, 1x15, 1*5*10, 5kg*6, IL*12等
+        
+        Args:
+            spec_str: 规格字符串
+            
+        Returns:
+            包装数量，如果无法解析则返回None
+        """
+        if not spec_str or not isinstance(spec_str, str):
+            return None
+        
+        try:
+            # 清理规格字符串
+            spec_str = clean_string(spec_str)
+            
+            # 处理可能的OCR误识别，如"IL"应为"1L"，"6oo"应为"600"
+            spec_str = re.sub(r'(\b|^)[iIlL](\d+)', r'1\2', spec_str)  # 将"IL"替换为"1L"
+            spec_str = re.sub(r'(\d+)[oO0]{2,}', lambda m: m.group(1) + '00', spec_str)  # 将"6oo"替换为"600"
+            spec_str = spec_str.replace('×', '*').replace('x', '*').replace('X', '*')  # 统一乘号
+            
+            logger.debug(f"清理后的规格字符串: {spec_str}")
+            
+            # 新增：匹配“1件=12桶/袋/盒…”等等式规格，取右侧数量作为包装数量
+            eq_match = re.search(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[=＝]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec_str)
+            if eq_match:
+                return int(eq_match.group(2))
+
+            # 匹配带单位的格式，如"5kg*6"、"450g*15"、"450ml*15"
+            weight_pattern = r'(\d+(?:\.\d+)?)\s*(?:kg|KG|千克|公斤)[*×](\d+)'
+            match = re.search(weight_pattern, spec_str)
+            if match:
+                return int(match.group(2))
+            
+            # 匹配克、毫升等单位格式
+            match = re.search(r'\d+(?:\.\d+)?(?:g|G|ml|ML|mL|毫升|克)[*×](\d+)', spec_str)
+            if match:
+                return int(match.group(1))
+            
+            # 匹配1*5*10 格式的三级规格
+            match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
+            if match:
+                # 取最后一个数字作为袋数量
+                return int(float(match.group(3)))
+            
+            # 匹配1*15, 1x15 格式
+            match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
+            if match:
+                # 取第二个数字作为包装数量
+                return int(float(match.group(2)))
+                
+            # 匹配24瓶/件等格式
+            match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋][/／](件|箱)', spec_str)
+            if match:
+                return int(float(match.group(1)))
+                
+            # 匹配4L格式
+            match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+(?:\.\d+)?)?', spec_str)
+            if match:
+                # 如果有第二个数字，返回它；否则返回1
+                return int(float(match.group(2))) if match.group(2) else 1
+            
+            # 匹配单独的数字+单位格式，如"12瓶装"
+            match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋包盒罐箱](?:装|\/箱)?', spec_str)
+            if match:
+                return int(float(match.group(1)))
+            
+            # 尝试直接匹配任何数字
+            numbers = re.findall(r'\d+(?:\.\d+)?', spec_str)
+            if numbers and len(numbers) > 0:
+                # 如果只有一个数字，通常是包装数量
+                if len(numbers) == 1:
+                    return int(float(numbers[0]))
+                
+                # 如果有多个数字，尝试识别可能的包装数量（典型数值如6/12/24/30）
+                for num in numbers:
+                    if float(num) in [6.0, 12.0, 24.0, 30.0]:
+                        return int(float(num))
+                
+                # 如果没有典型数值，选择最后一个数字（通常是包装数量）
+                return int(float(numbers[-1]))
+                
+        except Exception as e:
+            logger.warning(f"解析规格'{spec_str}'时出错: {e}")
+            
+        return None
@@ -0,0 +1,259 @@
+"""
+数据验证器模块
+----------
+提供对商品数据的验证和修复功能
+"""
+
+import re
+import logging
+from typing import Dict, Any, Optional, List, Tuple, Union
+
+from ..utils.log_utils import get_logger
+from ..utils.string_utils import parse_monetary_string
+
+logger = get_logger(__name__)
+
+
+class ProductValidator:
+    """
+    商品数据验证器：验证和修复商品数据
+    """
+    
+    def __init__(self):
+        """
+        初始化商品数据验证器
+        """
+        # 仓库标识列表
+        self.warehouse_identifiers = ["仓库", "仓库全名", "warehouse"]
+        
+    def validate_barcode(self, barcode: Any) -> Tuple[bool, str, Optional[str]]:
+        """
+        验证并修复条码
+        
+        Args:
+            barcode: 原始条码值
+            
+        Returns:
+            (是否有效, 修复后的条码, 错误信息)元组
+        """
+        error_message = None
+        
+        # 处理空值
+        if barcode is None:
+            return False, "", "条码为空"
+            
+        # 转为字符串
+        barcode_str = str(barcode).strip()
+        
+        # 处理"仓库"特殊情况
+        if barcode_str in self.warehouse_identifiers:
+            return False, barcode_str, "条码为仓库标识"
+            
+        # 清理条码格式（移除非数字字符）
+        barcode_clean = re.sub(r'\D', '', barcode_str)
+        
+        # 如果清理后为空，无效
+        if not barcode_clean:
+            return False, barcode_str, "条码不包含数字"
+            
+        # 对特定的错误条码进行修正（5开头改为6开头）
+        if len(barcode_clean) > 8 and barcode_clean.startswith('5') and not barcode_clean.startswith('53'):
+            original_barcode = barcode_clean
+            barcode_clean = '6' + barcode_clean[1:]
+            logger.info(f"修正条码前缀 5->6: {original_barcode} -> {barcode_clean}")
+        
+        # 新增：处理14位条码，如果多余长度都是0，截断为13位
+        if len(barcode_clean) > 13:
+            original_length = len(barcode_clean)
+            # 检查多余部分是否都是0
+            if barcode_clean.endswith('0'):
+                # 从末尾开始移除0，直到条码长度为13位或不再以0结尾
+                while len(barcode_clean) > 13 and barcode_clean.endswith('0'):
+                    barcode_clean = barcode_clean[:-1]
+                logger.info(f"修正条码长度: 从{original_length}位截断到{len(barcode_clean)}位")
+            else:
+                error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
+                logger.warning(error_message)
+                return False, barcode_clean, error_message
+            
+        # 验证条码长度
+        if len(barcode_clean) < 8 or len(barcode_clean) > 13:
+            error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
+            logger.warning(error_message)
+            return False, barcode_clean, error_message
+            
+        # 验证条码是否全为数字
+        if not barcode_clean.isdigit():
+            error_message = f"条码包含非数字字符: {barcode_clean}"
+            logger.warning(error_message)
+            return False, barcode_clean, error_message
+            
+        # 对于序号9的特殊情况，允许其条码格式
+        if barcode_clean == "5321545613":
+            logger.info(f"特殊条码验证通过: {barcode_clean}")
+            return True, barcode_clean, None
+            
+        logger.debug(f"条码验证通过: {barcode_clean}")
+        return True, barcode_clean, None
+        
+    def validate_quantity(self, quantity: Any) -> Tuple[bool, float, Optional[str]]:
+        """
+        验证并修复数量
+        
+        Args:
+            quantity: 原始数量值
+            
+        Returns:
+            (是否有效, 修复后的数量, 错误信息)元组
+        """
+        # 处理空值
+        if quantity is None:
+            return False, 0.0, "数量为空"
+            
+        # 如果是字符串，尝试解析
+        if isinstance(quantity, str):
+            # 去除空白和非数字字符（保留小数点）
+            quantity_clean = re.sub(r'[^\d\.]', '', quantity.strip())
+            if not quantity_clean:
+                return False, 0.0, "数量不包含数字"
+                
+            try:
+                quantity_value = float(quantity_clean)
+            except ValueError:
+                return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
+        else:
+            # 尝试直接转换
+            try:
+                quantity_value = float(quantity)
+            except (ValueError, TypeError):
+                return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
+        
+        # 数量必须大于0
+        if quantity_value <= 0:
+            return False, 0.0, f"数量必须大于0，当前值: {quantity_value}"
+            
+        return True, quantity_value, None
+        
+    def validate_price(self, price: Any) -> Tuple[bool, float, bool, Optional[str]]:
+        """
+        验证并修复单价
+        
+        Args:
+            price: 原始单价值
+            
+        Returns:
+            (是否有效, 修复后的单价, 是否为赠品, 错误信息)元组
+        """
+        # 初始化不是赠品
+        is_gift = False
+        
+        # 处理空值
+        if price is None:
+            return False, 0.0, True, "单价为空，视为赠品"
+            
+        # 如果是字符串，检查赠品标识
+        if isinstance(price, str):
+            price_str = price.strip().lower()
+            if price_str in ["赠品", "gift", "赠送", "0", ""]:
+                return True, 0.0, True, None
+                
+            price_value = parse_monetary_string(price_str)
+            if price_value is None:
+                return False, 0.0, True, f"无法将单价 '{price}' 转换为数字，视为赠品"
+        else:
+            # 尝试直接转换
+            try:
+                price_value = float(price)
+            except (ValueError, TypeError):
+                return False, 0.0, True, f"无法将单价 '{price}' 转换为数字，视为赠品"
+        
+        # 单价为0视为赠品
+        if price_value == 0:
+            return True, 0.0, True, None
+            
+        # 单价必须大于0
+        if price_value < 0:
+            return False, 0.0, True, f"单价不能为负数: {price_value}，视为赠品"
+            
+        return True, price_value, False, None
+        
+    def validate_product(self, product: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        验证并修复商品数据
+        
+        Args:
+            product: 商品数据字典
+            
+        Returns:
+            修复后的商品数据字典
+        """
+        # 创建新字典，避免修改原始数据
+        validated_product = product.copy()
+        
+        # 验证条码
+        barcode = product.get('barcode', '')
+        is_valid, fixed_barcode, error_msg = self.validate_barcode(barcode)
+        if is_valid:
+            validated_product['barcode'] = fixed_barcode
+        else:
+            logger.warning(f"条码验证失败: {error_msg}")
+            if fixed_barcode:
+                # 即使验证失败，但如果有修复后的条码仍然使用它
+                validated_product['barcode'] = fixed_barcode
+        
+        # 验证单价
+        price = product.get('price', 0)
+        is_valid, fixed_price, is_gift, error_msg = self.validate_price(price)
+        validated_product['price'] = fixed_price
+        
+        # 如果单价验证结果表示为赠品，更新赠品标识
+        if is_gift:
+            validated_product['is_gift'] = True
+            if error_msg:
+                logger.info(error_msg)
+
+        amount = product.get('amount', None)
+        try:
+            is_amount_gift = False
+            parsed_amount = parse_monetary_string(amount)
+            if parsed_amount is None or parsed_amount == 0.0:
+                is_amount_gift = True
+            if is_amount_gift:
+                validated_product['is_gift'] = True
+        except Exception:
+            pass
+        
+        # 验证数量
+        quantity = product.get('quantity', None)
+        is_valid, fixed_quantity, error_msg = self.validate_quantity(quantity)
+        
+        # 检查数量是否为空，但单价和金额存在的情况
+        if not is_valid and error_msg == "数量为空":
+            # 获取金额
+            amount = product.get('amount', None)
+            
+            # 如果单价有效且金额存在，则可以计算数量
+            if fixed_price > 0 and amount is not None:
+                try:
+                    # 确保金额是数字
+                    amount = parse_monetary_string(amount)
+                    if amount is None:
+                        raise ValueError("无法解析金额")
+                    
+                    # 计算数量 = 金额 / 单价
+                    if amount > 0:
+                        calculated_quantity = amount / fixed_price
+                        logger.info(f"数量为空，通过金额({amount})和单价({fixed_price})计算得出数量: {calculated_quantity}")
+                        validated_product['quantity'] = calculated_quantity
+                        is_valid = True
+                except (ValueError, TypeError, ZeroDivisionError) as e:
+                    logger.warning(f"通过金额和单价计算数量失败: {e}")
+        
+        # 如果数量验证有效或通过金额计算成功
+        if is_valid:
+            validated_product['quantity'] = fixed_quantity if is_valid and fixed_quantity > 0 else validated_product.get('quantity', 0)
+        else:
+            logger.warning(f"数量验证失败: {error_msg}")
+            validated_product['quantity'] = 0.0
+        
+        return validated_product