feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别(蓉城易购/烟草/杨碧月/通用)
- 百度 OCR 表格识别集成
- 规则引擎(列映射/数据清洗/单位转换/规格推断)
- 条码映射管理与云端同步(Gitea REST API)
- 云端同步支持:条码映射、供应商配置、商品资料、采购模板
- 拖拽一键处理(图片→OCR→Excel→合并)
- 191 个单元测试
- 移除无用的模板管理功能
- 清理 IDE 产物目录

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
+9
View File
@@ -0,0 +1,9 @@
"""
处理器模块初始化文件
"""
from .base import BaseProcessor
from .ocr_processor import OCRProcessor
from .tobacco_processor import TobaccoProcessor
__all__ = ['BaseProcessor', 'OCRProcessor', 'TobaccoProcessor']
+167
View File
@@ -0,0 +1,167 @@
"""
基础处理器接口模块
定义所有处理器的基类,提供统一的处理接口
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List
from pathlib import Path
import logging
import pandas as pd
from ...core.utils.log_utils import get_logger
logger = get_logger(__name__)
class BaseProcessor(ABC):
"""基础处理器接口 - 所有处理器的基类
采用策略模式设计,每个处理器负责特定类型的文件处理
"""
def __init__(self, config: Dict[str, Any]):
"""初始化处理器
Args:
config: 处理器配置字典
"""
self.config = config
self.name = self.__class__.__name__
self.description = ""
self._setup_logging()
def _setup_logging(self):
"""设置处理器日志"""
self.logger = logging.getLogger(f"{__name__}.{self.name}")
@abstractmethod
def can_process(self, file_path: Path) -> bool:
"""判断是否能处理该文件
Args:
file_path: 文件路径
Returns:
是否能处理该文件
"""
pass
@abstractmethod
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
"""处理文件,返回输出文件路径
Args:
input_file: 输入文件路径
output_dir: 输出目录路径
Returns:
输出文件路径,处理失败返回None
"""
pass
@abstractmethod
def get_required_columns(self) -> List[str]:
"""返回需要的列名列表
Returns:
列名列表
"""
pass
def validate_input(self, file_path: Path) -> bool:
"""验证输入文件有效性
Args:
file_path: 文件路径
Returns:
文件是否有效
"""
try:
if not file_path.exists():
self.logger.warning(f"文件不存在: {file_path}")
return False
if not file_path.is_file():
self.logger.warning(f"不是文件: {file_path}")
return False
supported_extensions = self.get_supported_extensions()
if supported_extensions and file_path.suffix.lower() not in supported_extensions:
self.logger.warning(f"不支持的文件类型: {file_path.suffix}, 支持的类型: {supported_extensions}")
return False
return True
except Exception as e:
self.logger.error(f"验证文件时出错: {e}")
return False
def get_supported_extensions(self) -> List[str]:
"""获取支持的文件扩展名
Returns:
支持的扩展名列表,空列表表示支持所有类型
"""
return []
def get_output_filename(self, input_file: Path, suffix: str = "_processed") -> str:
"""生成输出文件名
Args:
input_file: 输入文件路径
suffix: 文件名后缀
Returns:
输出文件名
"""
return f"{input_file.stem}{suffix}{input_file.suffix}"
def _read_excel_safely(self, file_path: Path, **kwargs) -> pd.DataFrame:
"""根据扩展名选择合适的读取引擎
Args:
file_path: 文件路径
**kwargs: 传递给 pd.read_excel 的参数
Returns:
DataFrame
Raises:
Exception: 读取失败时抛出
"""
suffix = file_path.suffix.lower()
if suffix == '.xlsx':
return pd.read_excel(file_path, engine='openpyxl', **kwargs)
elif suffix == '.xls':
try:
return pd.read_excel(file_path, engine='xlrd', **kwargs)
except Exception as e:
self.logger.warning(f"读取xls失败,可能缺少xlrd: {e}")
raise
else:
return pd.read_excel(file_path, **kwargs)
def log_processing_start(self, input_file: Path):
"""记录处理开始日志"""
self.logger.info(f"开始处理文件: {input_file}")
self.logger.info(f"处理器: {self.name} - {self.description}")
def log_processing_end(self, input_file: Path, output_file: Optional[Path] = None, success: bool = True):
"""记录处理结束日志"""
if success:
self.logger.info(f"处理完成: {input_file}")
if output_file:
self.logger.info(f"输出文件: {output_file}")
else:
self.logger.error(f"处理失败: {input_file}")
def __str__(self) -> str:
"""字符串表示"""
return f"{self.name}({self.description})"
def __repr__(self) -> str:
"""详细字符串表示"""
return f"{self.__class__.__module__}.{self.__class__.__name__}(name='{self.name}', description='{self.description}')"
+192
View File
@@ -0,0 +1,192 @@
"""
OCR处理器
处理图片文件的OCR识别完整流程:图片识别 → Excel处理 → 标准采购单生成
"""
import os
from pathlib import Path
from typing import Optional, Dict, Any, List
from .base import BaseProcessor
from ...services.ocr_service import OCRService
from ...services.order_service import OrderService
from ...core.utils.log_utils import get_logger
logger = get_logger(__name__)
class OCRProcessor(BaseProcessor):
"""OCR处理器
处理图片文件的完整OCR识别流程:
1. OCR识别图片中的表格信息
2. 处理识别结果生成Excel文件
3. 转换为标准采购单格式
"""
def __init__(self, config: Dict[str, Any]):
"""初始化OCR处理器
Args:
config: 配置信息
"""
super().__init__(config)
self.description = "OCR识别完整流程(图片→识别→Excel→采购单)"
# 初始化服务
self.ocr_service = OCRService(config)
self.order_service = OrderService(config)
def can_process(self, file_path: Path) -> bool:
"""判断是否为支持的图片文件
Args:
file_path: 文件路径
Returns:
是否能处理该文件
"""
if not self.validate_input(file_path):
return False
# 支持的图片格式
supported_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
if file_path.suffix.lower() in supported_extensions:
self.logger.info(f"识别为图片文件: {file_path.name}")
return True
return False
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
"""处理图片文件的完整OCR流程
Args:
input_file: 输入图片文件路径
output_dir: 输出目录路径
Returns:
输出文件路径,处理失败返回None
"""
self.log_processing_start(input_file)
try:
self.logger.info("开始OCR识别流程...")
# 步骤1: OCR识别
self.logger.info("步骤1/3: OCR识别图片...")
ocr_result = self._perform_ocr(input_file, output_dir)
if not ocr_result:
self.logger.error("OCR识别失败")
self.log_processing_end(input_file, success=False)
return None
# 步骤2: Excel处理
self.logger.info("步骤2/3: 处理Excel文件...")
excel_result = self._process_excel(ocr_result, output_dir)
if not excel_result:
self.logger.error("Excel处理失败")
self.log_processing_end(input_file, success=False)
return None
# 步骤3: 生成标准采购单
self.logger.info("步骤3/3: 生成标准采购单...")
final_result = self._generate_purchase_order(excel_result, output_dir)
if final_result:
self.logger.info(f"OCR处理流程完成,输出文件: {final_result}")
self.log_processing_end(input_file, final_result, success=True)
return final_result
else:
self.logger.error("生成采购单失败")
self.log_processing_end(input_file, success=False)
return None
except Exception as e:
self.logger.error(f"OCR处理流程出错: {e}", exc_info=True)
self.log_processing_end(input_file, success=False)
return None
def get_required_columns(self) -> List[str]:
"""返回需要的列名列表"""
# OCR处理不直接依赖列名,由后续处理步骤决定
return []
def get_supported_extensions(self) -> List[str]:
"""支持的文件扩展名"""
return ['.jpg', '.jpeg', '.png', '.bmp']
def _perform_ocr(self, input_file: Path, output_dir: Path) -> Optional[Path]:
"""执行OCR识别
Args:
input_file: 输入图片文件
output_dir: 输出目录
Returns:
OCR生成的Excel文件路径,失败返回None
"""
try:
self.logger.info(f"开始OCR识别: {input_file}")
# 使用OCR服务处理图片
result_path = self.ocr_service.process_image(str(input_file))
if result_path:
# 确保结果文件在输出目录中
result_path = Path(result_path)
if result_path.exists():
self.logger.info(f"OCR识别成功,输出文件: {result_path}")
return result_path
else:
self.logger.error(f"OCR结果文件不存在: {result_path}")
return None
else:
self.logger.error("OCR服务返回None")
return None
except Exception as e:
self.logger.error(f"OCR识别失败: {e}", exc_info=True)
return None
def _process_excel(self, excel_file: Path, output_dir: Path) -> Optional[Path]:
"""处理Excel文件
Args:
excel_file: Excel文件路径
output_dir: 输出目录
Returns:
处理后的Excel文件路径,失败返回None
"""
try:
self.logger.info(f"开始处理Excel文件: {excel_file}")
# 使用订单服务处理Excel文件(生成采购单)
result_path = self.order_service.process_excel(str(excel_file))
if result_path:
result_path = Path(result_path)
if result_path.exists():
self.logger.info(f"Excel处理成功,输出文件: {result_path}")
return result_path
else:
self.logger.error(f"Excel处理结果文件不存在: {result_path}")
return None
else:
self.logger.error("Excel处理服务返回None")
return None
except Exception as e:
self.logger.error(f"Excel处理失败: {e}", exc_info=True)
return None
def _generate_purchase_order(self, processed_file: Path, output_dir: Path) -> Optional[Path]:
"""采购单生成由OrderService完成,此处直接返回处理结果"""
try:
if processed_file and processed_file.exists():
return processed_file
return None
except Exception:
return None
@@ -0,0 +1,7 @@
"""
供应商处理器模块初始化文件
"""
from .generic_supplier_processor import GenericSupplierProcessor
__all__ = ['GenericSupplierProcessor']
@@ -0,0 +1,340 @@
"""
通用供应商处理器
可配置化的供应商处理器,支持通过配置文件定义处理规则
"""
import fnmatch
import pandas as pd
from typing import Optional, Dict, Any, List
from pathlib import Path
from ..base import BaseProcessor
from ...utils.log_utils import get_logger
from ...handlers.rule_engine import apply_rules
from ...handlers.column_mapper import ColumnMapper
from ...handlers.data_cleaner import DataCleaner
from ...handlers.calculator import DataCalculator
logger = get_logger(__name__)
class GenericSupplierProcessor(BaseProcessor):
"""通用供应商处理器
基于配置文件处理不同供应商的Excel文件,支持:
- 文件名模式匹配
- 内容特征识别
- 列映射配置
- 数据清洗规则
- 计算处理规则
"""
def __init__(self, config: Dict[str, Any], supplier_config: Dict[str, Any]):
"""初始化通用供应商处理器
Args:
config: 系统配置
supplier_config: 供应商特定配置
"""
super().__init__(config)
self.supplier_config = supplier_config
# 从配置中提取基本信息
self.name = supplier_config.get('name', 'GenericSupplier')
self.description = supplier_config.get('description', '通用供应商处理器')
# 处理规则配置
self.filename_patterns = supplier_config.get('filename_patterns', [])
self.content_indicators = supplier_config.get('content_indicators', [])
self.column_mapping = supplier_config.get('column_mapping', {})
self.cleaning_rules = supplier_config.get('cleaning_rules', [])
self.calculations = supplier_config.get('calculations', [])
# 输出配置
self.output_template = supplier_config.get('output_template', 'templates/银豹-采购单模板.xls')
self.output_suffix = supplier_config.get('output_suffix', '_银豹采购单')
def can_process(self, file_path: Path) -> bool:
"""判断是否能处理该文件
Args:
file_path: 文件路径
Returns:
是否能处理
"""
if not self.validate_input(file_path):
return False
# 检查文件名模式
if self.filename_patterns:
filename_match = self._check_filename_patterns(file_path)
if filename_match:
return True
# 检查文件内容特征
if self.content_indicators:
content_match = self._check_content_indicators(file_path)
if content_match:
return True
# 如果都没有配置,则无法判断
if not self.filename_patterns and not self.content_indicators:
self.logger.warning(f"处理器 {self.name} 没有配置识别规则")
return False
return False
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
"""处理文件
Args:
input_file: 输入文件路径
output_dir: 输出目录路径
Returns:
输出文件路径,处理失败返回None
"""
self.log_processing_start(input_file)
try:
# 步骤1: 读取数据
self.logger.info("步骤1/4: 读取数据...")
df = self._read_supplier_data(input_file)
if df is None or df.empty:
self.logger.error("读取数据失败或数据为空")
self.log_processing_end(input_file, success=False)
return None
# 步骤2: 应用列映射
self.logger.info("步骤2/4: 应用列映射...")
mapped_df = self._apply_column_mapping(df)
if mapped_df is None:
self.logger.error("列映射失败")
self.log_processing_end(input_file, success=False)
return None
# 步骤3: 数据清洗
self.logger.info("步骤3/4: 数据清洗...")
cleaned_df = self._apply_data_cleaning(mapped_df)
if cleaned_df is None:
self.logger.error("数据清洗失败")
self.log_processing_end(input_file, success=False)
return None
try:
rules = self.supplier_config.get('rules', [])
dictionary = self.supplier_config.get('dictionary')
standardized_df = apply_rules(cleaned_df, rules, dictionary)
except Exception as e:
self.logger.warning(f"规则执行失败: {e}")
standardized_df = cleaned_df
# 步骤4: 计算处理
self.logger.info("步骤4/4: 计算处理...")
calculated_df = self._apply_calculations(standardized_df)
if calculated_df is None:
self.logger.error("计算处理失败")
self.log_processing_end(input_file, success=False)
return None
# 生成输出文件
output_file = self._generate_output(calculated_df, input_file, output_dir)
if output_file and output_file.exists():
self.logger.info(f"处理完成,输出文件: {output_file}")
self.log_processing_end(input_file, output_file, success=True)
return output_file
else:
self.logger.error("输出文件生成失败")
self.log_processing_end(input_file, success=False)
return None
except Exception as e:
self.logger.error(f"处理文件时出错: {e}", exc_info=True)
self.log_processing_end(input_file, success=False)
return None
def get_required_columns(self) -> List[str]:
"""返回需要的列名列表"""
# 从列映射配置中提取目标列名
return list(self.column_mapping.values()) if self.column_mapping else []
def _check_filename_patterns(self, file_path: Path) -> bool:
"""检查文件名模式
Args:
file_path: 文件路径
Returns:
是否匹配
"""
try:
filename = file_path.name
for pattern in self.filename_patterns:
if fnmatch.fnmatch(filename.lower(), pattern.lower()):
self.logger.info(f"文件名匹配成功: {filename} -> {pattern}")
return True
return False
except Exception as e:
self.logger.error(f"检查文件名模式时出错: {e}")
return False
def _check_content_indicators(self, file_path: Path) -> bool:
"""检查文件内容特征
Args:
file_path: 文件路径
Returns:
是否匹配
"""
try:
df = self._read_excel_safely(file_path, nrows=5)
# 检查列名中是否包含指定关键词
columns_str = str(list(df.columns)).lower()
for indicator in self.content_indicators:
if indicator.lower() in columns_str:
self.logger.info(f"内容特征匹配成功: {indicator}")
return True
return False
except Exception as e:
self.logger.error(f"检查内容特征时出错: {e}")
return False
def _read_supplier_data(self, file_path: Path) -> Optional[pd.DataFrame]:
"""读取供应商数据
Args:
file_path: 文件路径
Returns:
数据DataFrame或None
"""
try:
specified = self.supplier_config.get('header_row')
if specified is not None:
try:
df = self._read_excel_safely(file_path, header=int(specified))
except Exception:
df = self._read_excel_safely(file_path)
else:
df0 = self._read_excel_safely(file_path, header=None)
if df0 is None:
return None
header_row = self._find_header_row(df0)
if header_row is not None:
df = self._read_excel_safely(file_path, header=header_row)
else:
df = self._read_excel_safely(file_path)
if df is None or df.empty:
self.logger.warning("数据文件为空")
return None
self.logger.info(f"成功读取数据,形状: {df.shape}")
return df
except Exception as e:
self.logger.error(f"读取数据失败: {e}")
return None
def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
result = ColumnMapper.detect_header_row(df, max_rows=30)
return result if result >= 0 else None
def _apply_column_mapping(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""应用列映射
Args:
df: 原始数据
Returns:
映射后的数据或None
"""
if not self.column_mapping:
self.logger.info("没有列映射配置")
return df
try:
# 应用列重命名
df_renamed = df.rename(columns=self.column_mapping)
# 检查必需的列是否存在
required_columns = self.get_required_columns()
missing_columns = [col for col in required_columns if col not in df_renamed.columns]
if missing_columns:
self.logger.warning(f"缺少必需的列: {missing_columns}")
# 创建缺失的列并填充默认值
for col in missing_columns:
df_renamed[col] = 0 if '' in col or '' in col else ''
self.logger.info(f"创建缺失列: {col},默认值: {df_renamed[col].iloc[0] if len(df_renamed) > 0 else 'N/A'}")
self.logger.info(f"列映射完成,列名: {list(df_renamed.columns)}")
return df_renamed
except Exception as e:
self.logger.error(f"列映射失败: {e}")
return None
def _apply_data_cleaning(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""应用数据清洗规则,委托给 DataCleaner"""
if not self.cleaning_rules:
self.logger.info("没有数据清洗规则")
return df
try:
cleaner = DataCleaner()
for rule in self.cleaning_rules:
cleaner.add_rule(rule.get('type'), **{k: v for k, v in rule.items() if k != 'type'})
result = cleaner.clean(df)
self.logger.info(f"数据清洗完成,数据形状: {result.shape}")
return result
except Exception as e:
self.logger.error(f"数据清洗失败: {e}")
return None
def _apply_calculations(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""应用计算处理,委托给 DataCalculator"""
if not self.calculations:
self.logger.info("没有计算规则")
return df
try:
calculator = DataCalculator()
for calc in self.calculations:
calculator.add_rule(calc.get('type'), **{k: v for k, v in calc.items() if k != 'type'})
result = calculator.calculate(df)
self.logger.info(f"计算处理完成,数据形状: {result.shape}")
return result
except Exception as e:
self.logger.error(f"计算处理失败: {e}")
return None
def _generate_output(self, df: pd.DataFrame, input_file: Path, output_dir: Path) -> Optional[Path]:
"""生成输出文件
Args:
df: 最终数据
input_file: 输入文件路径
output_dir: 输出目录
Returns:
输出文件路径或None
"""
try:
# 生成输出文件名
timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"{input_file.stem}{self.output_suffix}_{timestamp}.xls"
output_file = output_dir / output_filename
# 这里应该使用实际的模板生成逻辑
# 暂时直接保存为Excel文件
df.to_excel(output_file, index=False)
self.logger.info(f"输出文件生成成功: {output_file}")
return output_file
except Exception as e:
self.logger.error(f"生成输出文件失败: {e}")
return None
+347
View File
@@ -0,0 +1,347 @@
"""
烟草订单处理器
处理烟草公司特定格式的订单明细文件,生成银豹采购单
"""
import os
import datetime
import pandas as pd
import xlrd
import xlwt
from xlutils.copy import copy
from openpyxl import load_workbook
from typing import Optional, Dict, Any, List, Tuple
from pathlib import Path
from .base import BaseProcessor
from ...core.utils.log_utils import get_logger
from ...core.utils.string_utils import parse_monetary_string
from ...core.utils.dialog_utils import show_custom_dialog
logger = get_logger(__name__)
class TobaccoProcessor(BaseProcessor):
"""烟草订单处理器
处理烟草公司订单明细文件,提取商品信息并生成标准银豹采购单格式
"""
def __init__(self, config: Dict[str, Any]):
"""初始化烟草订单处理器
Args:
config: 配置信息
"""
super().__init__(config)
self.description = "处理烟草公司订单明细文件"
self.template_file = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
# 输出目录配置
self.result_dir = Path("data/result")
self.result_dir.mkdir(exist_ok=True)
# 默认输出文件名
self.default_output_name = "银豹采购单_烟草公司.xls"
def can_process(self, file_path: Path) -> bool:
"""判断是否为烟草订单文件
Args:
file_path: 文件路径
Returns:
是否能处理该文件
"""
if not self.validate_input(file_path):
return False
# 检查文件名特征
filename = file_path.name
tobacco_keywords = ['烟草', '卷烟', '订单明细', 'tobacco', '']
# 检查文件内容特征
try:
df = self._read_excel_safely(file_path, nrows=5)
required_columns = ['商品', '盒码', '订单量']
# 检查文件名或内容特征
filename_match = any(keyword in filename for keyword in tobacco_keywords)
content_match = all(col in df.columns for col in required_columns)
if filename_match or content_match:
self.logger.info(f"识别为烟草订单文件: {filename}")
return True
return False
except Exception as e:
self.logger.warning(f"检查文件内容时出错: {e}")
# 如果无法读取内容,仅基于文件名判断
return any(keyword in filename for keyword in tobacco_keywords)
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
"""处理烟草订单
Args:
input_file: 输入文件路径
output_dir: 输出目录路径
Returns:
输出文件路径,处理失败返回None
"""
self.log_processing_start(input_file)
try:
# 读取订单信息(时间和总金额)
order_info = self._read_order_info(input_file)
if not order_info:
self.logger.error(f"读取订单信息失败: {input_file}")
self.log_processing_end(input_file, success=False)
return None
order_time, total_amount = order_info
self.logger.info(f"订单信息 - 时间: {order_time}, 总金额: {total_amount}")
# 读取订单数据
order_data = self._read_order_data(input_file)
if order_data is None or order_data.empty:
self.logger.error(f"读取订单数据失败或数据为空: {input_file}")
self.log_processing_end(input_file, success=False)
return None
self.logger.info(f"成功读取订单数据,共{len(order_data)}条记录")
# 生成输出文件路径
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"银豹采购单_烟草公司_{timestamp}.xls"
output_file = output_dir / output_filename
# 确保输出目录存在
output_file.parent.mkdir(parents=True, exist_ok=True)
# 生成银豹采购单
result = self._generate_pospal_order(order_data, order_time, output_file)
if result:
self.logger.info(f"采购单生成成功: {output_file}")
self.log_processing_end(input_file, output_file, success=True)
# 显示处理结果
self._show_processing_result(output_file, order_time, len(order_data), total_amount)
return output_file
else:
self.logger.error("生成银豹采购单失败")
self.log_processing_end(input_file, success=False)
return None
except Exception as e:
self.logger.error(f"处理烟草订单时发生错误: {e}", exc_info=True)
self.log_processing_end(input_file, success=False)
return None
def get_required_columns(self) -> List[str]:
"""返回需要的列名列表"""
return ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
def get_supported_extensions(self) -> List[str]:
"""支持的文件扩展名"""
return ['.xlsx', '.xls']
def _read_order_info(self, file_path: Path) -> Optional[Tuple[str, float]]:
"""读取订单信息(时间和总金额)
Args:
file_path: 文件路径
Returns:
包含订单时间和总金额的元组或None
"""
try:
wb_info = load_workbook(file_path, data_only=True)
ws_info = wb_info.active
# 从指定单元格读取订单信息
order_time = ws_info["H1"].value or "(空)"
total_amount = ws_info["H3"].value or 0.0
self.logger.info(f"成功读取订单信息: 时间={order_time}, 总金额={total_amount}")
return (order_time, total_amount)
except Exception as e:
self.logger.error(f"读取订单信息出错: {e}")
return None
def _read_order_data(self, file_path: Path) -> Optional[pd.DataFrame]:
"""读取订单数据
Args:
file_path: 文件路径
Returns:
订单数据DataFrame或None
"""
columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
try:
df_old = self._read_excel_safely(file_path, header=None, skiprows=3, names=columns)
# 过滤订单量不为0的数据,并计算采购量和单价
df_filtered = df_old[df_old['订单量'] != 0].copy()
if df_filtered.empty:
self.logger.warning("没有订单量不为0的记录")
return None
# 计算采购量和单价
df_filtered['采购量'] = df_filtered['订单量'] * 10 # 烟草订单通常需要乘以10
df_filtered['采购单价'] = df_filtered['金额'] / df_filtered['采购量']
df_filtered = df_filtered.reset_index(drop=True)
self.logger.info(f"成功处理订单数据,有效记录数: {len(df_filtered)}")
return df_filtered
except Exception as e:
self.logger.error(f"读取订单数据失败: {e}")
return None
def _generate_pospal_order(self, order_data: pd.DataFrame, order_time: str, output_file: Path) -> bool:
"""生成银豹采购单
Args:
order_data: 订单数据
order_time: 订单时间
output_file: 输出文件路径
Returns:
是否生成成功
"""
try:
# 检查模板文件是否存在
template_path = Path(self.template_file)
if not template_path.exists():
self.logger.error(f"采购单模板文件不存在: {template_path}")
return False
self.logger.info(f"使用模板文件: {template_path}")
# 打开模板,准备写入
template_rd = xlrd.open_workbook(str(template_path), formatting_info=True)
template_wb = copy(template_rd)
template_ws = template_wb.get_sheet(0)
# 获取模板中的表头列索引
header_row = template_rd.sheet_by_index(0).row_values(0)
# 查找需要的列索引
try:
barcode_col = header_row.index("条码(必填)")
amount_col = header_row.index("采购量(必填)")
gift_col = header_row.index("赠送量")
price_col = header_row.index("采购单价(必填)")
except ValueError as e:
self.logger.error(f"模板列查找失败: {e}")
return False
self.logger.info(f"模板列索引 - 条码:{barcode_col}, 采购量:{amount_col}, 赠送量:{gift_col}, 单价:{price_col}")
# 写入数据到模板
for i, row in order_data.iterrows():
template_ws.write(i + 1, barcode_col, row['盒码']) # 商品条码
template_ws.write(i + 1, amount_col, int(row['采购量'])) # 采购量
template_ws.write(i + 1, gift_col, "") # 赠送量为空
template_ws.write(i + 1, price_col, round(row['采购单价'], 2)) # 采购单价保留两位小数
# 确保输出目录存在
output_file.parent.mkdir(parents=True, exist_ok=True)
# 保存输出文件
template_wb.save(str(output_file))
self.logger.info(f"采购单生成成功: {output_file}")
return True
except Exception as e:
self.logger.error(f"生成银豹采购单失败: {e}", exc_info=True)
return False
def _show_processing_result(self, output_file: Path, order_time: str, total_count: int, total_amount: float):
"""显示处理结果
Args:
output_file: 输出文件路径
order_time: 订单时间
total_count: 处理条目数
total_amount: 总金额
"""
try:
# 创建附加信息
additional_info = {
"订单来源": "烟草公司",
"处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# 格式化金额显示
parsed = parse_monetary_string(total_amount)
total_amount = parsed if parsed is not None else 0.0
amount_display = f"¥{total_amount:.2f}"
# 显示自定义对话框
show_custom_dialog(
title="烟草订单处理结果",
message="烟草订单处理完成",
result_file=str(output_file),
time_info=order_time,
count_info=f"{total_count}个商品",
amount_info=amount_display,
additional_info=additional_info
)
self.logger.info(f"显示处理结果 - 文件:{output_file}, 时间:{order_time}, 数量:{total_count}, 金额:{total_amount}")
except Exception as e:
self.logger.error(f"显示处理结果时出错: {e}")
def get_latest_tobacco_order(self) -> Optional[Path]:
"""获取最新的烟草订单明细文件(兼容旧接口)
Returns:
文件路径或None
"""
try:
# 获取今日开始时间戳
today = datetime.date.today()
today_start = datetime.datetime.combine(today, datetime.time.min).timestamp()
# 查找订单明细文件
result_dir = Path("data/output")
if not result_dir.exists():
return None
# 查找符合条件的文件
candidates = []
for file_path in result_dir.glob("订单明细*.xlsx"):
if file_path.stat().st_ctime >= today_start:
candidates.append(file_path)
if not candidates:
self.logger.warning("未找到今天创建的烟草订单明细文件")
# 返回最新的文件
all_files = list(result_dir.glob("订单明细*.xlsx"))
if all_files:
all_files.sort(key=lambda x: x.stat().st_ctime, reverse=True)
return all_files[0]
return None
# 返回最新的文件
candidates.sort(key=lambda x: x.stat().st_ctime, reverse=True)
latest_file = candidates[0]
self.logger.info(f"找到最新烟草订单明细文件: {latest_file}")
return latest_file
except Exception as e:
self.logger.error(f"获取最新烟草订单文件时出错: {e}")
return None