feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别（蓉城易购/烟草/杨碧月/通用） - 百度 OCR 表格识别集成 - 规则引擎（列映射/数据清洗/单位转换/规格推断） - 条码映射管理与云端同步（Gitea REST API） - 云端同步支持：条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理（图片→OCR→Excel→合并） - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
@@ -0,0 +1,9 @@
+"""
+处理器模块初始化文件
+"""
+
+from .base import BaseProcessor
+from .ocr_processor import OCRProcessor
+from .tobacco_processor import TobaccoProcessor
+
+__all__ = ['BaseProcessor', 'OCRProcessor', 'TobaccoProcessor']
@@ -0,0 +1,167 @@
+"""
+基础处理器接口模块
+
+定义所有处理器的基类，提供统一的处理接口
+"""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional, List
+from pathlib import Path
+import logging
+import pandas as pd
+
+from ...core.utils.log_utils import get_logger
+
+logger = get_logger(__name__)
+
+
+class BaseProcessor(ABC):
+    """基础处理器接口 - 所有处理器的基类
+    
+    采用策略模式设计，每个处理器负责特定类型的文件处理
+    """
+    
+    def __init__(self, config: Dict[str, Any]):
+        """初始化处理器
+        
+        Args:
+            config: 处理器配置字典
+        """
+        self.config = config
+        self.name = self.__class__.__name__
+        self.description = ""
+        self._setup_logging()
+    
+    def _setup_logging(self):
+        """设置处理器日志"""
+        self.logger = logging.getLogger(f"{__name__}.{self.name}")
+    
+    @abstractmethod
+    def can_process(self, file_path: Path) -> bool:
+        """判断是否能处理该文件
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否能处理该文件
+        """
+        pass
+    
+    @abstractmethod
+    def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """处理文件，返回输出文件路径
+        
+        Args:
+            input_file: 输入文件路径
+            output_dir: 输出目录路径
+            
+        Returns:
+            输出文件路径，处理失败返回None
+        """
+        pass
+    
+    @abstractmethod
+    def get_required_columns(self) -> List[str]:
+        """返回需要的列名列表
+        
+        Returns:
+            列名列表
+        """
+        pass
+    
+    def validate_input(self, file_path: Path) -> bool:
+        """验证输入文件有效性
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            文件是否有效
+        """
+        try:
+            if not file_path.exists():
+                self.logger.warning(f"文件不存在: {file_path}")
+                return False
+            
+            if not file_path.is_file():
+                self.logger.warning(f"不是文件: {file_path}")
+                return False
+            
+            supported_extensions = self.get_supported_extensions()
+            if supported_extensions and file_path.suffix.lower() not in supported_extensions:
+                self.logger.warning(f"不支持的文件类型: {file_path.suffix}, 支持的类型: {supported_extensions}")
+                return False
+            
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"验证文件时出错: {e}")
+            return False
+    
+    def get_supported_extensions(self) -> List[str]:
+        """获取支持的文件扩展名
+        
+        Returns:
+            支持的扩展名列表，空列表表示支持所有类型
+        """
+        return []
+    
+    def get_output_filename(self, input_file: Path, suffix: str = "_processed") -> str:
+        """生成输出文件名
+
+        Args:
+            input_file: 输入文件路径
+            suffix: 文件名后缀
+
+        Returns:
+            输出文件名
+        """
+        return f"{input_file.stem}{suffix}{input_file.suffix}"
+
+    def _read_excel_safely(self, file_path: Path, **kwargs) -> pd.DataFrame:
+        """根据扩展名选择合适的读取引擎
+
+        Args:
+            file_path: 文件路径
+            **kwargs: 传递给 pd.read_excel 的参数
+
+        Returns:
+            DataFrame
+
+        Raises:
+            Exception: 读取失败时抛出
+        """
+        suffix = file_path.suffix.lower()
+        if suffix == '.xlsx':
+            return pd.read_excel(file_path, engine='openpyxl', **kwargs)
+        elif suffix == '.xls':
+            try:
+                return pd.read_excel(file_path, engine='xlrd', **kwargs)
+            except Exception as e:
+                self.logger.warning(f"读取xls失败，可能缺少xlrd: {e}")
+                raise
+        else:
+            return pd.read_excel(file_path, **kwargs)
+    
+    def log_processing_start(self, input_file: Path):
+        """记录处理开始日志"""
+        self.logger.info(f"开始处理文件: {input_file}")
+        self.logger.info(f"处理器: {self.name} - {self.description}")
+    
+    def log_processing_end(self, input_file: Path, output_file: Optional[Path] = None, success: bool = True):
+        """记录处理结束日志"""
+        if success:
+            self.logger.info(f"处理完成: {input_file}")
+            if output_file:
+                self.logger.info(f"输出文件: {output_file}")
+        else:
+            self.logger.error(f"处理失败: {input_file}")
+    
+    def __str__(self) -> str:
+        """字符串表示"""
+        return f"{self.name}({self.description})"
+    
+    def __repr__(self) -> str:
+        """详细字符串表示"""
+        return f"{self.__class__.__module__}.{self.__class__.__name__}(name='{self.name}', description='{self.description}')"
@@ -0,0 +1,192 @@
+"""
+OCR处理器
+
+处理图片文件的OCR识别完整流程：图片识别 → Excel处理 → 标准采购单生成
+"""
+
+import os
+from pathlib import Path
+from typing import Optional, Dict, Any, List
+
+from .base import BaseProcessor
+from ...services.ocr_service import OCRService
+from ...services.order_service import OrderService
+from ...core.utils.log_utils import get_logger
+
+logger = get_logger(__name__)
+
+
+class OCRProcessor(BaseProcessor):
+    """OCR处理器
+    
+    处理图片文件的完整OCR识别流程：
+    1. OCR识别图片中的表格信息
+    2. 处理识别结果生成Excel文件
+    3. 转换为标准采购单格式
+    """
+    
+    def __init__(self, config: Dict[str, Any]):
+        """初始化OCR处理器
+        
+        Args:
+            config: 配置信息
+        """
+        super().__init__(config)
+        self.description = "OCR识别完整流程（图片→识别→Excel→采购单）"
+        
+        # 初始化服务
+        self.ocr_service = OCRService(config)
+        self.order_service = OrderService(config)
+    
+    def can_process(self, file_path: Path) -> bool:
+        """判断是否为支持的图片文件
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否能处理该文件
+        """
+        if not self.validate_input(file_path):
+            return False
+        
+        # 支持的图片格式
+        supported_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
+        
+        if file_path.suffix.lower() in supported_extensions:
+            self.logger.info(f"识别为图片文件: {file_path.name}")
+            return True
+        
+        return False
+    
+    def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """处理图片文件的完整OCR流程
+        
+        Args:
+            input_file: 输入图片文件路径
+            output_dir: 输出目录路径
+            
+        Returns:
+            输出文件路径，处理失败返回None
+        """
+        self.log_processing_start(input_file)
+        
+        try:
+            self.logger.info("开始OCR识别流程...")
+            
+            # 步骤1: OCR识别
+            self.logger.info("步骤1/3: OCR识别图片...")
+            ocr_result = self._perform_ocr(input_file, output_dir)
+            if not ocr_result:
+                self.logger.error("OCR识别失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            # 步骤2: Excel处理
+            self.logger.info("步骤2/3: 处理Excel文件...")
+            excel_result = self._process_excel(ocr_result, output_dir)
+            if not excel_result:
+                self.logger.error("Excel处理失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            # 步骤3: 生成标准采购单
+            self.logger.info("步骤3/3: 生成标准采购单...")
+            final_result = self._generate_purchase_order(excel_result, output_dir)
+            
+            if final_result:
+                self.logger.info(f"OCR处理流程完成，输出文件: {final_result}")
+                self.log_processing_end(input_file, final_result, success=True)
+                return final_result
+            else:
+                self.logger.error("生成采购单失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"OCR处理流程出错: {e}", exc_info=True)
+            self.log_processing_end(input_file, success=False)
+            return None
+    
+    def get_required_columns(self) -> List[str]:
+        """返回需要的列名列表"""
+        # OCR处理不直接依赖列名，由后续处理步骤决定
+        return []
+    
+    def get_supported_extensions(self) -> List[str]:
+        """支持的文件扩展名"""
+        return ['.jpg', '.jpeg', '.png', '.bmp']
+    
+    def _perform_ocr(self, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """执行OCR识别
+        
+        Args:
+            input_file: 输入图片文件
+            output_dir: 输出目录
+            
+        Returns:
+            OCR生成的Excel文件路径，失败返回None
+        """
+        try:
+            self.logger.info(f"开始OCR识别: {input_file}")
+            
+            # 使用OCR服务处理图片
+            result_path = self.ocr_service.process_image(str(input_file))
+            
+            if result_path:
+                # 确保结果文件在输出目录中
+                result_path = Path(result_path)
+                if result_path.exists():
+                    self.logger.info(f"OCR识别成功，输出文件: {result_path}")
+                    return result_path
+                else:
+                    self.logger.error(f"OCR结果文件不存在: {result_path}")
+                    return None
+            else:
+                self.logger.error("OCR服务返回None")
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"OCR识别失败: {e}", exc_info=True)
+            return None
+    
+    def _process_excel(self, excel_file: Path, output_dir: Path) -> Optional[Path]:
+        """处理Excel文件
+        
+        Args:
+            excel_file: Excel文件路径
+            output_dir: 输出目录
+            
+        Returns:
+            处理后的Excel文件路径，失败返回None
+        """
+        try:
+            self.logger.info(f"开始处理Excel文件: {excel_file}")
+            
+            # 使用订单服务处理Excel文件（生成采购单）
+            result_path = self.order_service.process_excel(str(excel_file))
+            
+            if result_path:
+                result_path = Path(result_path)
+                if result_path.exists():
+                    self.logger.info(f"Excel处理成功，输出文件: {result_path}")
+                    return result_path
+                else:
+                    self.logger.error(f"Excel处理结果文件不存在: {result_path}")
+                    return None
+            else:
+                self.logger.error("Excel处理服务返回None")
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"Excel处理失败: {e}", exc_info=True)
+            return None
+    
+    def _generate_purchase_order(self, processed_file: Path, output_dir: Path) -> Optional[Path]:
+        """采购单生成由OrderService完成，此处直接返回处理结果"""
+        try:
+            if processed_file and processed_file.exists():
+                return processed_file
+            return None
+        except Exception:
+            return None
@@ -0,0 +1,7 @@
+"""
+供应商处理器模块初始化文件
+"""
+
+from .generic_supplier_processor import GenericSupplierProcessor
+
+__all__ = ['GenericSupplierProcessor']
@@ -0,0 +1,340 @@
+"""
+通用供应商处理器
+
+可配置化的供应商处理器，支持通过配置文件定义处理规则
+"""
+
+import fnmatch
+import pandas as pd
+from typing import Optional, Dict, Any, List
+from pathlib import Path
+
+from ..base import BaseProcessor
+from ...utils.log_utils import get_logger
+from ...handlers.rule_engine import apply_rules
+from ...handlers.column_mapper import ColumnMapper
+from ...handlers.data_cleaner import DataCleaner
+from ...handlers.calculator import DataCalculator
+
+logger = get_logger(__name__)
+
+
+class GenericSupplierProcessor(BaseProcessor):
+    """通用供应商处理器
+    
+    基于配置文件处理不同供应商的Excel文件，支持：
+    - 文件名模式匹配
+    - 内容特征识别
+    - 列映射配置
+    - 数据清洗规则
+    - 计算处理规则
+    """
+    
+    def __init__(self, config: Dict[str, Any], supplier_config: Dict[str, Any]):
+        """初始化通用供应商处理器
+        
+        Args:
+            config: 系统配置
+            supplier_config: 供应商特定配置
+        """
+        super().__init__(config)
+        self.supplier_config = supplier_config
+        
+        # 从配置中提取基本信息
+        self.name = supplier_config.get('name', 'GenericSupplier')
+        self.description = supplier_config.get('description', '通用供应商处理器')
+        
+        # 处理规则配置
+        self.filename_patterns = supplier_config.get('filename_patterns', [])
+        self.content_indicators = supplier_config.get('content_indicators', [])
+        self.column_mapping = supplier_config.get('column_mapping', {})
+        self.cleaning_rules = supplier_config.get('cleaning_rules', [])
+        self.calculations = supplier_config.get('calculations', [])
+        
+        # 输出配置
+        self.output_template = supplier_config.get('output_template', 'templates/银豹-采购单模板.xls')
+        self.output_suffix = supplier_config.get('output_suffix', '_银豹采购单')
+    
+    def can_process(self, file_path: Path) -> bool:
+        """判断是否能处理该文件
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否能处理
+        """
+        if not self.validate_input(file_path):
+            return False
+        
+        # 检查文件名模式
+        if self.filename_patterns:
+            filename_match = self._check_filename_patterns(file_path)
+            if filename_match:
+                return True
+        
+        # 检查文件内容特征
+        if self.content_indicators:
+            content_match = self._check_content_indicators(file_path)
+            if content_match:
+                return True
+        
+        # 如果都没有配置，则无法判断
+        if not self.filename_patterns and not self.content_indicators:
+            self.logger.warning(f"处理器 {self.name} 没有配置识别规则")
+            return False
+        
+        return False
+    
+    def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """处理文件
+        
+        Args:
+            input_file: 输入文件路径
+            output_dir: 输出目录路径
+            
+        Returns:
+            输出文件路径，处理失败返回None
+        """
+        self.log_processing_start(input_file)
+        
+        try:
+            # 步骤1: 读取数据
+            self.logger.info("步骤1/4: 读取数据...")
+            df = self._read_supplier_data(input_file)
+            if df is None or df.empty:
+                self.logger.error("读取数据失败或数据为空")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            # 步骤2: 应用列映射
+            self.logger.info("步骤2/4: 应用列映射...")
+            mapped_df = self._apply_column_mapping(df)
+            if mapped_df is None:
+                self.logger.error("列映射失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            # 步骤3: 数据清洗
+            self.logger.info("步骤3/4: 数据清洗...")
+            cleaned_df = self._apply_data_cleaning(mapped_df)
+            if cleaned_df is None:
+                self.logger.error("数据清洗失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+            try:
+                rules = self.supplier_config.get('rules', [])
+                dictionary = self.supplier_config.get('dictionary')
+                standardized_df = apply_rules(cleaned_df, rules, dictionary)
+            except Exception as e:
+                self.logger.warning(f"规则执行失败: {e}")
+                standardized_df = cleaned_df
+            
+            # 步骤4: 计算处理
+            self.logger.info("步骤4/4: 计算处理...")
+            calculated_df = self._apply_calculations(standardized_df)
+            if calculated_df is None:
+                self.logger.error("计算处理失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            # 生成输出文件
+            output_file = self._generate_output(calculated_df, input_file, output_dir)
+            
+            if output_file and output_file.exists():
+                self.logger.info(f"处理完成，输出文件: {output_file}")
+                self.log_processing_end(input_file, output_file, success=True)
+                return output_file
+            else:
+                self.logger.error("输出文件生成失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"处理文件时出错: {e}", exc_info=True)
+            self.log_processing_end(input_file, success=False)
+            return None
+    
+    def get_required_columns(self) -> List[str]:
+        """返回需要的列名列表"""
+        # 从列映射配置中提取目标列名
+        return list(self.column_mapping.values()) if self.column_mapping else []
+    
+    def _check_filename_patterns(self, file_path: Path) -> bool:
+        """检查文件名模式
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否匹配
+        """
+        try:
+            filename = file_path.name
+            for pattern in self.filename_patterns:
+                if fnmatch.fnmatch(filename.lower(), pattern.lower()):
+                    self.logger.info(f"文件名匹配成功: {filename} -> {pattern}")
+                    return True
+            return False
+        except Exception as e:
+            self.logger.error(f"检查文件名模式时出错: {e}")
+            return False
+    
+    def _check_content_indicators(self, file_path: Path) -> bool:
+        """检查文件内容特征
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否匹配
+        """
+        try:
+            df = self._read_excel_safely(file_path, nrows=5)
+            
+            # 检查列名中是否包含指定关键词
+            columns_str = str(list(df.columns)).lower()
+            
+            for indicator in self.content_indicators:
+                if indicator.lower() in columns_str:
+                    self.logger.info(f"内容特征匹配成功: {indicator}")
+                    return True
+            
+            return False
+            
+        except Exception as e:
+            self.logger.error(f"检查内容特征时出错: {e}")
+            return False
+    
+    def _read_supplier_data(self, file_path: Path) -> Optional[pd.DataFrame]:
+        """读取供应商数据
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            数据DataFrame或None
+        """
+        try:
+            specified = self.supplier_config.get('header_row')
+            if specified is not None:
+                try:
+                    df = self._read_excel_safely(file_path, header=int(specified))
+                except Exception:
+                    df = self._read_excel_safely(file_path)
+            else:
+                df0 = self._read_excel_safely(file_path, header=None)
+                if df0 is None:
+                    return None
+                header_row = self._find_header_row(df0)
+                if header_row is not None:
+                    df = self._read_excel_safely(file_path, header=header_row)
+                else:
+                    df = self._read_excel_safely(file_path)
+            if df is None or df.empty:
+                self.logger.warning("数据文件为空")
+                return None
+            self.logger.info(f"成功读取数据，形状: {df.shape}")
+            return df
+        except Exception as e:
+            self.logger.error(f"读取数据失败: {e}")
+            return None
+
+    def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
+        result = ColumnMapper.detect_header_row(df, max_rows=30)
+        return result if result >= 0 else None
+    
+    def _apply_column_mapping(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        """应用列映射
+        
+        Args:
+            df: 原始数据
+            
+        Returns:
+            映射后的数据或None
+        """
+        if not self.column_mapping:
+            self.logger.info("没有列映射配置")
+            return df
+        
+        try:
+            # 应用列重命名
+            df_renamed = df.rename(columns=self.column_mapping)
+            
+            # 检查必需的列是否存在
+            required_columns = self.get_required_columns()
+            missing_columns = [col for col in required_columns if col not in df_renamed.columns]
+            
+            if missing_columns:
+                self.logger.warning(f"缺少必需的列: {missing_columns}")
+                # 创建缺失的列并填充默认值
+                for col in missing_columns:
+                    df_renamed[col] = 0 if '量' in col or '价' in col else ''
+                    self.logger.info(f"创建缺失列: {col}，默认值: {df_renamed[col].iloc[0] if len(df_renamed) > 0 else 'N/A'}")
+            
+            self.logger.info(f"列映射完成，列名: {list(df_renamed.columns)}")
+            return df_renamed
+            
+        except Exception as e:
+            self.logger.error(f"列映射失败: {e}")
+            return None
+    
+    def _apply_data_cleaning(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        """应用数据清洗规则，委托给 DataCleaner"""
+        if not self.cleaning_rules:
+            self.logger.info("没有数据清洗规则")
+            return df
+        try:
+            cleaner = DataCleaner()
+            for rule in self.cleaning_rules:
+                cleaner.add_rule(rule.get('type'), **{k: v for k, v in rule.items() if k != 'type'})
+            result = cleaner.clean(df)
+            self.logger.info(f"数据清洗完成，数据形状: {result.shape}")
+            return result
+        except Exception as e:
+            self.logger.error(f"数据清洗失败: {e}")
+            return None
+    
+    def _apply_calculations(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        """应用计算处理，委托给 DataCalculator"""
+        if not self.calculations:
+            self.logger.info("没有计算规则")
+            return df
+        try:
+            calculator = DataCalculator()
+            for calc in self.calculations:
+                calculator.add_rule(calc.get('type'), **{k: v for k, v in calc.items() if k != 'type'})
+            result = calculator.calculate(df)
+            self.logger.info(f"计算处理完成，数据形状: {result.shape}")
+            return result
+        except Exception as e:
+            self.logger.error(f"计算处理失败: {e}")
+            return None
+    
+    def _generate_output(self, df: pd.DataFrame, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """生成输出文件
+        
+        Args:
+            df: 最终数据
+            input_file: 输入文件路径
+            output_dir: 输出目录
+            
+        Returns:
+            输出文件路径或None
+        """
+        try:
+            # 生成输出文件名
+            timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
+            output_filename = f"{input_file.stem}{self.output_suffix}_{timestamp}.xls"
+            output_file = output_dir / output_filename
+            
+            # 这里应该使用实际的模板生成逻辑
+            # 暂时直接保存为Excel文件
+            df.to_excel(output_file, index=False)
+            
+            self.logger.info(f"输出文件生成成功: {output_file}")
+            return output_file
+            
+        except Exception as e:
+            self.logger.error(f"生成输出文件失败: {e}")
+            return None
@@ -0,0 +1,347 @@
+"""
+烟草订单处理器
+
+处理烟草公司特定格式的订单明细文件，生成银豹采购单
+"""
+
+import os
+import datetime
+import pandas as pd
+import xlrd
+import xlwt
+from xlutils.copy import copy
+from openpyxl import load_workbook
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+
+from .base import BaseProcessor
+from ...core.utils.log_utils import get_logger
+from ...core.utils.string_utils import parse_monetary_string
+from ...core.utils.dialog_utils import show_custom_dialog
+
+logger = get_logger(__name__)
+
+
+class TobaccoProcessor(BaseProcessor):
+    """烟草订单处理器
+    
+    处理烟草公司订单明细文件，提取商品信息并生成标准银豹采购单格式
+    """
+    
+    def __init__(self, config: Dict[str, Any]):
+        """初始化烟草订单处理器
+        
+        Args:
+            config: 配置信息
+        """
+        super().__init__(config)
+        self.description = "处理烟草公司订单明细文件"
+        self.template_file = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
+        
+        # 输出目录配置
+        self.result_dir = Path("data/result")
+        self.result_dir.mkdir(exist_ok=True)
+        
+        # 默认输出文件名
+        self.default_output_name = "银豹采购单_烟草公司.xls"
+    
+    def can_process(self, file_path: Path) -> bool:
+        """判断是否为烟草订单文件
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            是否能处理该文件
+        """
+        if not self.validate_input(file_path):
+            return False
+        
+        # 检查文件名特征
+        filename = file_path.name
+        tobacco_keywords = ['烟草', '卷烟', '订单明细', 'tobacco', '烟']
+        
+        # 检查文件内容特征
+        try:
+            df = self._read_excel_safely(file_path, nrows=5)
+            required_columns = ['商品', '盒码', '订单量']
+            
+            # 检查文件名或内容特征
+            filename_match = any(keyword in filename for keyword in tobacco_keywords)
+            content_match = all(col in df.columns for col in required_columns)
+            
+            if filename_match or content_match:
+                self.logger.info(f"识别为烟草订单文件: {filename}")
+                return True
+            
+            return False
+            
+        except Exception as e:
+            self.logger.warning(f"检查文件内容时出错: {e}")
+            # 如果无法读取内容，仅基于文件名判断
+            return any(keyword in filename for keyword in tobacco_keywords)
+    
+    def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
+        """处理烟草订单
+        
+        Args:
+            input_file: 输入文件路径
+            output_dir: 输出目录路径
+            
+        Returns:
+            输出文件路径，处理失败返回None
+        """
+        self.log_processing_start(input_file)
+        
+        try:
+            # 读取订单信息（时间和总金额）
+            order_info = self._read_order_info(input_file)
+            if not order_info:
+                self.logger.error(f"读取订单信息失败: {input_file}")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            order_time, total_amount = order_info
+            self.logger.info(f"订单信息 - 时间: {order_time}, 总金额: {total_amount}")
+            
+            # 读取订单数据
+            order_data = self._read_order_data(input_file)
+            if order_data is None or order_data.empty:
+                self.logger.error(f"读取订单数据失败或数据为空: {input_file}")
+                self.log_processing_end(input_file, success=False)
+                return None
+            
+            self.logger.info(f"成功读取订单数据，共{len(order_data)}条记录")
+            
+            # 生成输出文件路径
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_filename = f"银豹采购单_烟草公司_{timestamp}.xls"
+            output_file = output_dir / output_filename
+            
+            # 确保输出目录存在
+            output_file.parent.mkdir(parents=True, exist_ok=True)
+            
+            # 生成银豹采购单
+            result = self._generate_pospal_order(order_data, order_time, output_file)
+            
+            if result:
+                self.logger.info(f"采购单生成成功: {output_file}")
+                self.log_processing_end(input_file, output_file, success=True)
+                
+                # 显示处理结果
+                self._show_processing_result(output_file, order_time, len(order_data), total_amount)
+                
+                return output_file
+            else:
+                self.logger.error("生成银豹采购单失败")
+                self.log_processing_end(input_file, success=False)
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"处理烟草订单时发生错误: {e}", exc_info=True)
+            self.log_processing_end(input_file, success=False)
+            return None
+    
+    def get_required_columns(self) -> List[str]:
+        """返回需要的列名列表"""
+        return ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
+    
+    def get_supported_extensions(self) -> List[str]:
+        """支持的文件扩展名"""
+        return ['.xlsx', '.xls']
+    
+    def _read_order_info(self, file_path: Path) -> Optional[Tuple[str, float]]:
+        """读取订单信息（时间和总金额）
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            包含订单时间和总金额的元组或None
+        """
+        try:
+            wb_info = load_workbook(file_path, data_only=True)
+            ws_info = wb_info.active
+            
+            # 从指定单元格读取订单信息
+            order_time = ws_info["H1"].value or "（空）"
+            total_amount = ws_info["H3"].value or 0.0
+            
+            self.logger.info(f"成功读取订单信息: 时间={order_time}, 总金额={total_amount}")
+            return (order_time, total_amount)
+            
+        except Exception as e:
+            self.logger.error(f"读取订单信息出错: {e}")
+            return None
+    
+    def _read_order_data(self, file_path: Path) -> Optional[pd.DataFrame]:
+        """读取订单数据
+        
+        Args:
+            file_path: 文件路径
+            
+        Returns:
+            订单数据DataFrame或None
+        """
+        columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
+        
+        try:
+            df_old = self._read_excel_safely(file_path, header=None, skiprows=3, names=columns)
+            
+            # 过滤订单量不为0的数据，并计算采购量和单价
+            df_filtered = df_old[df_old['订单量'] != 0].copy()
+            
+            if df_filtered.empty:
+                self.logger.warning("没有订单量不为0的记录")
+                return None
+            
+            # 计算采购量和单价
+            df_filtered['采购量'] = df_filtered['订单量'] * 10  # 烟草订单通常需要乘以10
+            df_filtered['采购单价'] = df_filtered['金额'] / df_filtered['采购量']
+            df_filtered = df_filtered.reset_index(drop=True)
+            
+            self.logger.info(f"成功处理订单数据，有效记录数: {len(df_filtered)}")
+            return df_filtered
+            
+        except Exception as e:
+            self.logger.error(f"读取订单数据失败: {e}")
+            return None
+
+    def _generate_pospal_order(self, order_data: pd.DataFrame, order_time: str, output_file: Path) -> bool:
+        """生成银豹采购单
+        
+        Args:
+            order_data: 订单数据
+            order_time: 订单时间
+            output_file: 输出文件路径
+            
+        Returns:
+            是否生成成功
+        """
+        try:
+            # 检查模板文件是否存在
+            template_path = Path(self.template_file)
+            if not template_path.exists():
+                self.logger.error(f"采购单模板文件不存在: {template_path}")
+                return False
+            
+            self.logger.info(f"使用模板文件: {template_path}")
+            
+            # 打开模板，准备写入
+            template_rd = xlrd.open_workbook(str(template_path), formatting_info=True)
+            template_wb = copy(template_rd)
+            template_ws = template_wb.get_sheet(0)
+            
+            # 获取模板中的表头列索引
+            header_row = template_rd.sheet_by_index(0).row_values(0)
+            
+            # 查找需要的列索引
+            try:
+                barcode_col = header_row.index("条码（必填）")
+                amount_col = header_row.index("采购量（必填）")
+                gift_col = header_row.index("赠送量")
+                price_col = header_row.index("采购单价（必填）")
+            except ValueError as e:
+                self.logger.error(f"模板列查找失败: {e}")
+                return False
+            
+            self.logger.info(f"模板列索引 - 条码:{barcode_col}, 采购量:{amount_col}, 赠送量:{gift_col}, 单价:{price_col}")
+            
+            # 写入数据到模板
+            for i, row in order_data.iterrows():
+                template_ws.write(i + 1, barcode_col, row['盒码'])  # 商品条码
+                template_ws.write(i + 1, amount_col, int(row['采购量']))  # 采购量
+                template_ws.write(i + 1, gift_col, "")  # 赠送量为空
+                template_ws.write(i + 1, price_col, round(row['采购单价'], 2))  # 采购单价保留两位小数
+            
+            # 确保输出目录存在
+            output_file.parent.mkdir(parents=True, exist_ok=True)
+            
+            # 保存输出文件
+            template_wb.save(str(output_file))
+            
+            self.logger.info(f"采购单生成成功: {output_file}")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"生成银豹采购单失败: {e}", exc_info=True)
+            return False
+    
+    def _show_processing_result(self, output_file: Path, order_time: str, total_count: int, total_amount: float):
+        """显示处理结果
+        
+        Args:
+            output_file: 输出文件路径
+            order_time: 订单时间
+            total_count: 处理条目数
+            total_amount: 总金额
+        """
+        try:
+            # 创建附加信息
+            additional_info = {
+                "订单来源": "烟草公司",
+                "处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            }
+            
+            # 格式化金额显示
+            parsed = parse_monetary_string(total_amount)
+            total_amount = parsed if parsed is not None else 0.0
+            amount_display = f"¥{total_amount:.2f}"
+            
+            # 显示自定义对话框
+            show_custom_dialog(
+                title="烟草订单处理结果",
+                message="烟草订单处理完成",
+                result_file=str(output_file),
+                time_info=order_time,
+                count_info=f"{total_count}个商品",
+                amount_info=amount_display,
+                additional_info=additional_info
+            )
+            
+            self.logger.info(f"显示处理结果 - 文件:{output_file}, 时间:{order_time}, 数量:{total_count}, 金额:{total_amount}")
+            
+        except Exception as e:
+            self.logger.error(f"显示处理结果时出错: {e}")
+    
+    def get_latest_tobacco_order(self) -> Optional[Path]:
+        """获取最新的烟草订单明细文件（兼容旧接口）
+        
+        Returns:
+            文件路径或None
+        """
+        try:
+            # 获取今日开始时间戳
+            today = datetime.date.today()
+            today_start = datetime.datetime.combine(today, datetime.time.min).timestamp()
+            
+            # 查找订单明细文件
+            result_dir = Path("data/output")
+            if not result_dir.exists():
+                return None
+            
+            # 查找符合条件的文件
+            candidates = []
+            for file_path in result_dir.glob("订单明细*.xlsx"):
+                if file_path.stat().st_ctime >= today_start:
+                    candidates.append(file_path)
+            
+            if not candidates:
+                self.logger.warning("未找到今天创建的烟草订单明细文件")
+                # 返回最新的文件
+                all_files = list(result_dir.glob("订单明细*.xlsx"))
+                if all_files:
+                    all_files.sort(key=lambda x: x.stat().st_ctime, reverse=True)
+                    return all_files[0]
+                return None
+            
+            # 返回最新的文件
+            candidates.sort(key=lambda x: x.stat().st_ctime, reverse=True)
+            latest_file = candidates[0]
+            
+            self.logger.info(f"找到最新烟草订单明细文件: {latest_file}")
+            return latest_file
+            
+        except Exception as e:
+            self.logger.error(f"获取最新烟草订单文件时出错: {e}")
+            return None