feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别(蓉城易购/烟草/杨碧月/通用)
- 百度 OCR 表格识别集成
- 规则引擎(列映射/数据清洗/单位转换/规格推断)
- 条码映射管理与云端同步(Gitea REST API)
- 云端同步支持:条码映射、供应商配置、商品资料、采购模板
- 拖拽一键处理(图片→OCR→Excel→合并)
- 191 个单元测试
- 移除无用的模板管理功能
- 清理 IDE 产物目录

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
"""
OCR订单处理系统 - 服务模块
-----------------------
提供业务逻辑服务,协调各个核心组件完成业务功能。
"""
+193
View File
@@ -0,0 +1,193 @@
"""
OCR服务模块
---------
提供OCR识别服务,协调OCR流程。
"""
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
import os
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
from ..core.ocr.table_ocr import OCRProcessor
logger = get_logger(__name__)
class OCRService:
"""
OCR识别服务:协调OCR流程
"""
def __init__(self, config: Optional[ConfigManager] = None):
"""
初始化OCR服务
Args:
config: 配置管理器,如果为None则创建新的
"""
logger.info("初始化OCRService")
self.config = config or ConfigManager()
# 创建OCR处理器
self.ocr_processor = OCRProcessor(self.config)
logger.info("OCRService初始化完成")
def get_unprocessed_images(self) -> List[str]:
"""
获取待处理的图片列表
Returns:
待处理图片路径列表
"""
return self.ocr_processor.get_unprocessed_images()
def process_image(self, image_path: str) -> Optional[str]:
"""
处理单个图片文件
Args:
image_path: 图片文件路径
Returns:
生成的Excel文件路径,如果处理失败则返回None
"""
try:
# 检查文件是否存在
if not os.path.exists(image_path):
logger.error(f"文件不存在: {image_path}")
return None
# 检查文件类型
if not self._is_valid_image(image_path):
logger.error(f"不支持的文件类型: {image_path}")
return None
# 检查是否已处理
excel_file = self._get_excel_path(image_path)
if os.path.exists(excel_file):
logger.info(f"文件已处理过,跳过OCR识别: {image_path}")
return excel_file
# 执行OCR识别
result = self.ocr_processor.process_image(image_path)
if not result:
logger.error(f"OCR识别失败: {image_path}")
return None
# 生成Excel文件
excel_file = self._generate_excel(result, image_path)
if not excel_file:
logger.error(f"生成Excel文件失败: {image_path}")
return None
logger.info(f"处理完成: {image_path} -> {excel_file}")
return excel_file
except Exception as e:
logger.error(f"处理图片时发生错误: {e}", exc_info=True)
return None
def process_images_batch(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService开始批量处理图片, batch_size={batch_size}, max_workers={max_workers}")
return self.ocr_processor.process_images_batch(batch_size, max_workers, progress_cb)
# 添加batch_process作为process_images_batch的别名,确保兼容性
def batch_process(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片(别名方法,与process_images_batch功能相同)
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService.batch_process被调用,转发到process_images_batch")
return self.process_images_batch(batch_size, max_workers, progress_cb)
def validate_image(self, image_path: str) -> bool:
"""
验证图片是否有效
Args:
image_path: 图片路径
Returns:
图片是否有效
"""
return self.ocr_processor.validate_image(image_path)
def _is_valid_image(self, image_path: str) -> bool:
"""
检查文件是否为有效的图片格式
Args:
image_path: 图片文件路径
Returns:
是否为有效图片格式
"""
return self.validate_image(image_path)
def _get_excel_path(self, image_path: str) -> str:
"""
根据图片路径生成对应的Excel文件路径
Args:
image_path: 图片文件路径
Returns:
Excel文件路径
"""
# 获取文件名(不含扩展名)
base_name = os.path.splitext(os.path.basename(image_path))[0]
# 生成Excel文件路径
output_dir = self.config.get('Paths', 'output_folder', fallback='data/output')
excel_path = os.path.join(output_dir, f"{base_name}.xlsx")
return excel_path
def _generate_excel(self, ocr_result: dict, image_path: str) -> Optional[str]:
"""
根据OCR结果生成Excel文件
Args:
ocr_result: OCR识别结果
image_path: 原始图片路径
Returns:
生成的Excel文件路径,失败返回None
"""
try:
excel_path = self._get_excel_path(image_path)
# 确保输出目录存在
os.makedirs(os.path.dirname(excel_path), exist_ok=True)
# 调用OCR处理器的Excel生成功能
if hasattr(self.ocr_processor, 'generate_excel'):
success = self.ocr_processor.generate_excel(ocr_result, excel_path)
if success:
return excel_path
else:
# 如果OCR处理器没有generate_excel方法,直接返回路径
# 假设OCR处理器已经生成了Excel文件
if os.path.exists(excel_path):
return excel_path
return None
except Exception as e:
logger.error(f"生成Excel文件时发生错误: {e}", exc_info=True)
return None
+245
View File
@@ -0,0 +1,245 @@
"""
订单服务模块
---------
提供订单处理服务,协调Excel处理和订单合并流程。
"""
import os
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
from ..core.excel.processor import ExcelProcessor
from ..core.excel.merger import PurchaseOrderMerger
from ..core.db.product_db import ProductDatabase
logger = get_logger(__name__)
class OrderService:
"""
订单服务:协调Excel处理和订单合并流程
"""
def __init__(self, config: Optional[ConfigManager] = None):
"""
初始化订单服务
Args:
config: 配置管理器,如果为None则创建新的
"""
logger.info("初始化OrderService")
self.config = config or ConfigManager()
# 创建Excel处理器和采购单合并器
self.excel_processor = ExcelProcessor(self.config)
self.order_merger = PurchaseOrderMerger(self.config)
logger.info("OrderService初始化完成")
def get_latest_excel(self) -> Optional[str]:
"""
获取最新的Excel文件
Returns:
最新Excel文件路径,如果未找到则返回None
"""
return self.excel_processor.get_latest_excel()
def process_excel(self, file_path: Optional[str] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
处理Excel订单文件,生成标准采购单
Args:
file_path: Excel文件路径,如果为None则处理最新的文件
Returns:
输出采购单文件路径,如果处理失败则返回None
"""
if not file_path:
file_path = self.excel_processor.get_latest_excel()
if not file_path:
logger.warning("未找到可处理的Excel文件")
return None
logger.info("OrderService开始处理最新Excel文件")
else:
logger.info(f"OrderService开始处理指定Excel文件: {file_path}")
# 检查是否需要特殊的供应商预处理(如杨碧月)
try:
from .special_suppliers_service import SpecialSuppliersService
special_service = SpecialSuppliersService(self.config)
# 尝试识别并预处理(注意:这里不再传入 progress_cb 避免无限递归或重复进度条,
# 或者我们在 special_service 内部逻辑中处理完后直接返回结果)
# 为了避免循环调用,我们在 SpecialSuppliersService 内部不再调用 process_excel
# 而是让 process_excel 识别后自己决定是否处理预处理后的文件。
# 我们新增一个 check_and_preprocess 方法
preprocessed_path = self._check_special_preprocess(file_path)
if preprocessed_path:
logger.info(f"检测到特殊供应商,已生成预处理文件: {preprocessed_path}")
file_path = preprocessed_path
except Exception as e:
logger.error(f"检查特殊预处理时出错: {e}")
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
def _check_special_preprocess(self, file_path: str) -> Optional[str]:
"""检查并执行特殊的预处理(支持杨碧月、烟草公司、蓉城易购)"""
try:
from app.core.utils.file_utils import smart_read_excel
import pandas as pd
import re
# 仅读取前 50 行进行智能识别 (header=None 确保能读到第一行内容)
df_head = smart_read_excel(file_path, nrows=50, header=None)
df_str = df_head.astype(str)
# 1. 识别:烟草公司 (Tobacco)
# 特征:内容中包含“专卖证号”或特定证号“510109104938”
is_tobacco = df_str.apply(lambda x: x.str.contains('专卖证号|510109104938')).any().any()
if is_tobacco:
logger.info("识别到烟草公司订单,执行专用预处理...")
from .tobacco_service import TobaccoService
tobacco_svc = TobaccoService(self.config)
return tobacco_svc.preprocess_tobacco_order(file_path)
# 2. 识别:蓉城易购 (Rongcheng Yigou)
# 特征:内容中包含单号标识“RCDH”
is_rongcheng = df_str.apply(lambda x: x.str.contains('RCDH')).any().any()
if is_rongcheng:
logger.info("识别到蓉城易购订单,执行专用预处理...")
from .special_suppliers_service import SpecialSuppliersService
special_svc = SpecialSuppliersService(self.config)
return special_svc.preprocess_rongcheng_yigou(file_path)
# 3. 识别:杨碧月 (Yang Biyue)
# 特征:经手人列包含“杨碧月”
handler_col = None
for col in df_head.columns:
# 在前50行中搜索“经手人”关键字
if df_head[col].astype(str).str.contains('经手人').any():
handler_col = col
break
if handler_col is not None:
# 检查该列是否有“杨碧月”
if df_head[handler_col].astype(str).str.contains('杨碧月').any():
logger.info("识别到杨碧月订单,执行专用预处理...")
from .special_suppliers_service import SpecialSuppliersService
special_svc = SpecialSuppliersService(self.config)
return special_svc.process_yang_biyue_only(file_path)
except Exception as e:
logger.warning(f"智能预处理识别失败: {e}")
return None
def get_purchase_orders(self) -> List[str]:
"""
获取采购单文件列表
Returns:
采购单文件路径列表
"""
return self.order_merger.get_purchase_orders()
def merge_purchase_orders(self, file_paths: List[str], progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并指定的采购单文件
Args:
file_paths: 采购单文件路径列表
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
return self.merge_orders(file_paths, progress_cb)
def merge_all_purchase_orders(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并所有可用的采购单文件
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info("OrderService开始合并所有采购单")
return self.merge_orders(None, progress_cb)
def merge_orders(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并采购单
Args:
file_paths: 采购单文件路径列表,如果为None则处理所有采购单
Returns:
合并后的采购单文件路径,如果合并失败则返回None
"""
if file_paths:
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
else:
logger.info("OrderService开始合并所有采购单")
return self.order_merger.process(file_paths, progress_cb)
def validate_unit_price(self, result_path: str) -> List[str]:
"""
校验采购单单价与商品资料进货价的差异
Args:
result_path: 待校验的采购单路径
Returns:
差异信息列表,无差异返回空列表
"""
try:
import pandas as pd
import os
from app.core.utils.file_utils import smart_read_excel
from app.core.handlers.column_mapper import ColumnMapper as CM
config = ConfigManager()
template_folder = config.get('Paths', 'template_folder', fallback='templates')
item_data = config.get('Templates', 'item_data', fallback='商品资料.xlsx')
item_path = os.path.join(template_folder, item_data)
product_db_path = config.get('Paths', 'product_db', fallback='data/product_cache.db')
# 使用 SQLite 查询商品进货价
product_db = ProductDatabase(product_db_path, item_path)
# 读取待校验的采购单
df_res = smart_read_excel(result_path)
res_barcode_col = CM.find_column(list(df_res.columns), 'barcode')
res_price_col = CM.find_column(list(df_res.columns), 'unit_price')
if not res_barcode_col or not res_price_col:
logger.warning("未能在采购单中找到条码或单价列")
return []
# 批量查询进货价
barcodes = df_res[res_barcode_col].astype(str).str.strip().tolist()
item_prices = product_db.get_prices(barcodes)
results = []
for _, row in df_res.iterrows():
bc = str(row[res_barcode_col]).strip()
if bc not in item_prices:
continue
try:
res_price = float(row[res_price_col])
except (ValueError, TypeError):
continue
item_price = item_prices[bc]
diff = abs(res_price - item_price)
if diff > 1.0:
results.append(f"条码 {bc}: 采购单价={res_price} vs 进货价={item_price} 差异={diff:.2f}")
return results
except Exception as e:
logger.error(f"单价校验过程中发生错误: {e}")
return []
+297
View File
@@ -0,0 +1,297 @@
"""
处理器调度服务
负责管理和调度各种文件处理器,实现智能文件类型检测和处理器选择
"""
import logging
from typing import Dict, Any, Optional, List
from pathlib import Path
from ..core.processors.base import BaseProcessor
from ..core.processors.tobacco_processor import TobaccoProcessor
from ..core.processors.ocr_processor import OCRProcessor
from ..core.utils.log_utils import get_logger
logger = get_logger(__name__)
class ProcessorService:
"""处理器调度服务
负责管理所有处理器实例,提供统一的文件处理接口
"""
def __init__(self, config: Dict[str, Any]):
"""初始化处理器服务
Args:
config: 系统配置字典
"""
self.config = config
self.processors: List[BaseProcessor] = []
self._load_processors()
logger.info(f"处理器服务初始化完成,加载了{len(self.processors)}个处理器")
def _load_processors(self):
"""加载所有处理器"""
try:
self.processors = [
TobaccoProcessor(self.config),
OCRProcessor(self.config),
]
supplier_configs = []
try:
import json
from pathlib import Path
# 优先从`config/suppliers_config.json`加载
config_path = Path("config/suppliers_config.json")
if not config_path.exists():
# 兼容其它路径
config_path = Path("./suppliers_config.json")
if config_path.exists():
with open(config_path, 'r', encoding='utf-8') as f:
data = json.load(f)
ok, errs, supplier_configs = self._validate_suppliers_config(data)
if not ok:
logger.error("供应商配置校验失败:\n" + "\n".join([f"- {e}" for e in errs]))
else:
logger.info(f"{config_path} 加载供应商配置,共 {len(supplier_configs)}")
else:
logger.info("未找到供应商配置文件,跳过供应商处理器加载")
except Exception as e:
logger.error(f"读取供应商配置失败: {e}")
for supplier_config in supplier_configs:
try:
from ..core.processors.supplier_processors.generic_supplier_processor import GenericSupplierProcessor
processor = GenericSupplierProcessor(self.config, supplier_config)
self.processors.append(processor)
logger.info(f"加载供应商处理器: {processor.name}")
except Exception as e:
logger.error(f"加载供应商处理器失败: {e}")
logger.info(f"成功加载{len(self.processors)}个处理器")
except Exception as e:
logger.error(f"加载处理器时出错: {e}", exc_info=True)
self.processors = [
TobaccoProcessor(self.config),
OCRProcessor(self.config),
]
def _validate_suppliers_config(self, data):
try:
suppliers = data.get('suppliers')
errors = []
valid = []
if not isinstance(suppliers, list) or not suppliers:
errors.append('suppliers必须是非空数组')
return False, errors, []
for idx, s in enumerate(suppliers):
e = self._validate_single_supplier(s, idx)
if e:
errors.extend(e)
else:
valid.append(s)
return len(errors) == 0, errors, valid
except Exception as e:
return False, [f'配置解析异常: {e}'], []
def _validate_single_supplier(self, s, idx):
errs = []
prefix = f'suppliers[{idx}]'
name = s.get('name')
if not name or not isinstance(name, str):
errs.append(f'{prefix}.name 必须为字符串')
fp = s.get('filename_patterns', [])
ci = s.get('content_indicators', [])
if not fp and not ci:
errs.append(f'{prefix} 必须至少提供 filename_patterns 或 content_indicators 之一')
cm = s.get('column_mapping', {})
if cm and not isinstance(cm, dict):
errs.append(f'{prefix}.column_mapping 必须为对象')
cr = s.get('cleaning_rules', [])
if cr and not isinstance(cr, list):
errs.append(f'{prefix}.cleaning_rules 必须为数组')
else:
for i, rule in enumerate(cr):
rtype = rule.get('type')
if rtype not in ('remove_rows','fill_na','convert_type'):
errs.append(f'{prefix}.cleaning_rules[{i}].type 非法: {rtype}')
if rtype == 'remove_rows' and not rule.get('condition'):
errs.append(f'{prefix}.cleaning_rules[{i}].condition 必填')
if rtype in ('fill_na','convert_type'):
if not rule.get('columns') and not rule.get('column'):
errs.append(f'{prefix}.cleaning_rules[{i}] 需提供 columns 或 column')
calc = s.get('calculations', [])
if calc and not isinstance(calc, list):
errs.append(f'{prefix}.calculations 必须为数组')
else:
for i, c in enumerate(calc):
ctype = c.get('type')
if ctype not in ('multiply','divide','formula'):
errs.append(f'{prefix}.calculations[{i}].type 非法: {ctype}')
if ctype in ('multiply','divide'):
if not c.get('source_column') or not c.get('target_column'):
errs.append(f'{prefix}.calculations[{i}] 需提供 source_column 与 target_column')
if ctype == 'formula' and (not c.get('formula') or not c.get('target_column')):
errs.append(f'{prefix}.calculations[{i}] 需提供 formula 与 target_column')
return errs
def process_file(self, input_file: Path, output_dir: Path,
preferred_processor: Optional[str] = None) -> Optional[Path]:
"""处理文件 - 自动选择合适的处理器
Args:
input_file: 输入文件路径
output_dir: 输出目录路径
preferred_processor: 优先使用的处理器名称(可选)
Returns:
输出文件路径,处理失败返回None
"""
if not input_file.exists():
logger.error(f"输入文件不存在: {input_file}")
return None
if not output_dir.exists():
output_dir.mkdir(parents=True, exist_ok=True)
try:
# 如果指定了优先处理器,先尝试使用它
if preferred_processor:
processor = self._get_processor_by_name(preferred_processor)
if processor and processor.can_process(input_file):
logger.info(f"使用指定的处理器: {processor.name}")
return processor.process(input_file, output_dir)
else:
logger.warning(f"指定的处理器不可用或无法处理该文件: {preferred_processor}")
# 自动选择合适的处理器
suitable_processors = [p for p in self.processors if p.can_process(input_file)]
if not suitable_processors:
logger.warning(f"未找到适合处理文件的处理器: {input_file}")
logger.info(f"支持的文件类型: {self.get_supported_types()}")
return None
# 使用第一个合适的处理器
processor = suitable_processors[0]
logger.info(f"使用处理器 {processor.name} 处理文件: {input_file}")
return processor.process(input_file, output_dir)
except Exception as e:
logger.error(f"处理文件时出错: {e}", exc_info=True)
return None
def _get_processor_by_name(self, name: str) -> Optional[BaseProcessor]:
"""根据名称获取处理器
Args:
name: 处理器名称
Returns:
处理器实例或None
"""
for processor in self.processors:
if processor.name == name or processor.__class__.__name__ == name:
return processor
return None
def get_supported_types(self) -> List[Dict[str, Any]]:
"""获取支持的文件类型信息
Returns:
处理器类型信息列表
"""
return [
{
'name': processor.name,
'description': processor.description,
'extensions': processor.get_supported_extensions(),
'class_name': processor.__class__.__name__
}
for processor in self.processors
]
def get_processor_info(self) -> List[Dict[str, Any]]:
"""获取处理器详细信息
Returns:
处理器详细信息列表
"""
return [
{
'name': processor.name,
'description': processor.description,
'extensions': processor.get_supported_extensions(),
'required_columns': processor.get_required_columns(),
'class_name': processor.__class__.__name__,
'module': processor.__class__.__module__
}
for processor in self.processors
]
def can_process_file(self, file_path: Path) -> bool:
"""检查是否有处理器能处理该文件
Args:
file_path: 文件路径
Returns:
是否有处理器能处理
"""
if not file_path.exists():
return False
return any(processor.can_process(file_path) for processor in self.processors)
def get_suitable_processors(self, file_path: Path) -> List[BaseProcessor]:
"""获取能处理该文件的所有处理器
Args:
file_path: 文件路径
Returns:
合适的处理器列表
"""
if not file_path.exists():
return []
return [p for p in self.processors if p.can_process(file_path)]
def reload_processors(self):
"""重新加载处理器"""
logger.info("重新加载处理器...")
self.processors.clear()
self._load_processors()
logger.info(f"重新加载完成,共{len(self.processors)}个处理器")
def add_processor(self, processor: BaseProcessor):
"""添加处理器
Args:
processor: 处理器实例
"""
self.processors.append(processor)
logger.info(f"添加处理器: {processor.name}")
def remove_processor(self, processor_name: str) -> bool:
"""移除处理器
Args:
processor_name: 处理器名称
Returns:
是否成功移除
"""
for i, processor in enumerate(self.processors):
if processor.name == processor_name or processor.__class__.__name__ == processor_name:
del self.processors[i]
logger.info(f"移除处理器: {processor_name}")
return True
logger.warning(f"未找到要移除的处理器: {processor_name}")
return False
+227
View File
@@ -0,0 +1,227 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import time
import pandas as pd
from typing import Optional, Callable
from ..core.utils.log_utils import get_logger
logger = get_logger(__name__)
class SpecialSuppliersService:
"""
处理特殊供应商逻辑的服务类,如蓉城易购等
"""
def __init__(self, config_manager=None):
self.config_manager = config_manager
def process_yang_biyue_only(self, src_path: str) -> Optional[str]:
"""
仅执行杨碧月订单的预处理,返回预处理后的文件路径
"""
try:
from app.core.utils.file_utils import smart_read_excel
# 读取原始数据
df = smart_read_excel(src_path)
# 检查是否包含“杨碧月”
handler_col = None
for col in df.columns:
if '经手人' in str(col):
handler_col = col
break
if handler_col is None or not df[handler_col].astype(str).str.contains('杨碧月').any():
return None
# 识别到杨碧月订单,执行专用清洗
logger.info("识别到杨碧月订单,正在执行专用清洗...")
# 定义列映射关系 (映射到 ExcelProcessor 期望的中文列名)
# 使用精确匹配优先,防止“结算单位”匹配到“单位”
column_map = {
'商品条码': '商品条码',
'商品名称': '商品名称',
'商品规格': '规格',
'单位': '单位',
'数量': '数量',
'含税单价': '单价',
'含税金额': '金额'
}
found_cols = {}
# 1. 第一遍:尝试精确匹配
for target_zh, std_name in column_map.items():
for col in df.columns:
if str(col).strip() == target_zh:
found_cols[col] = std_name
break
# 2. 第二遍:对未匹配成功的列尝试模糊匹配(但要排除特定干扰词)
for target_zh, std_name in column_map.items():
if std_name in found_cols.values():
continue
for col in df.columns:
col_str = str(col)
if target_zh in col_str:
# 排除干扰列
if target_zh == '单位' and '结算单位' in col_str:
continue
if target_zh == '数量' and '基本单位数量' in col_str:
continue
found_cols[col] = std_name
break
if len(found_cols) < 4:
logger.error(f"杨碧月订单列匹配不足: 找到 {list(found_cols.values())}")
return None
df_clean = df[list(found_cols.keys())].copy()
df_clean = df_clean.rename(columns=found_cols)
# 过滤掉空的条码行
df_clean = df_clean.dropna(subset=['商品条码'])
# 保存预处理文件
out_dir = os.path.dirname(src_path)
base = os.path.basename(src_path)
final_path = os.path.join(out_dir, f"预处理之后_{base}")
df_clean.to_excel(final_path, index=False)
return final_path
except Exception as e:
logger.error(f"预处理杨碧月订单出错: {e}")
return None
def process_yang_biyue(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
"""
处理杨碧月经手的订单(预处理+处理)
"""
try:
if progress_cb: progress_cb(10, "正在进行杨碧月订单预处理...")
preprocessed_path = self.process_yang_biyue_only(src_path)
if not preprocessed_path:
return None
if progress_cb: progress_cb(60, "预处理文件已保存,开始标准转换流程...")
# 延迟导入以避免循环依赖
from app.services.order_service import OrderService
order_service = OrderService(self.config_manager)
result = order_service.process_excel(preprocessed_path, progress_cb=lambda p: progress_cb(60 + int(p*0.4), "生成采购单中...") if progress_cb else None)
return result
except Exception as e:
logger.error(f"处理杨碧月订单出错: {e}")
return None
def preprocess_rongcheng_yigou(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
"""
蓉城易购订单预处理:按用户提供的 E, N, Q, S 列索引进行强制清洗
"""
try:
if progress_cb: progress_cb(10, "正在处理蓉城易购预处理...")
from app.core.utils.file_utils import smart_read_excel
# 蓉城易购格式:Row 0是单号,Row 1是联系人,Row 2是表头,Row 3开始是数据
df_raw = smart_read_excel(src_path, header=None)
# 检查数据行数
if len(df_raw) <= 3:
logger.error("蓉城易购文件数据行数不足")
return None
# 提取数据部分 (Row 3开始)
df_data = df_raw.iloc[3:].reset_index(drop=True)
# 用户指定列映射:
# E列 (Index 4) -> 商品条码
# N列 (Index 13) -> 数量
# Q列 (Index 16) -> 单价
# S列 (Index 18) -> 金额
# C列 (Index 2) -> 商品名称 (通用需求)
idx_map = {
2: '商品名称',
4: '商品条码',
13: '数量',
16: '单价',
18: '金额'
}
# 确保列索引不越界
available_indices = [i for i in idx_map.keys() if i < df_data.shape[1]]
df2 = df_data.iloc[:, available_indices].copy()
df2.columns = [idx_map[i] for i in available_indices]
# 强制转换类型
for c in ['数量', '单价', '金额']:
if c in df2.columns:
df2[c] = pd.to_numeric(df2[c], errors='coerce').fillna(0)
# 过滤掉空的条码行
df2 = df2.dropna(subset=['商品条码'])
df2['商品条码'] = df2['商品条码'].astype(str).str.strip()
df2 = df2[df2['商品条码'] != '']
# 核心逻辑:分裂多条码行并均分数量
if '商品条码' in df2.columns and '数量' in df2.columns:
rows = []
for _, row in df2.iterrows():
bc_val = str(row.get('商品条码', '')).strip()
# 识别分隔符:/ ,
if any(sep in bc_val for sep in ['/', ',', '', '']):
parts = re.split(r'[/,,、]+', bc_val)
parts = [p.strip() for p in parts if p.strip()]
if len(parts) >= 2:
q_total = float(row.get('数量', 0) or 0)
if q_total > 0:
n = len(parts)
base_qty = int(q_total // n)
remainder = int(q_total % n)
for i, p_bc in enumerate(parts):
new_row = row.copy()
new_row['商品条码'] = p_bc
current_qty = base_qty + (1 if i < remainder else 0)
new_row['数量'] = current_qty
if '单价' in new_row:
try:
up = float(new_row['单价'] or 0)
new_row['金额'] = up * current_qty
except Exception:
pass
rows.append(new_row)
continue
rows.append(row)
df2 = pd.DataFrame(rows)
# 保存预处理文件
out_dir = os.path.dirname(src_path)
base = os.path.basename(src_path)
final_path = os.path.join(out_dir, f"预处理之后_{base}")
df2.to_excel(final_path, index=False)
if progress_cb: progress_cb(100, "蓉城易购预处理完成")
return final_path
except Exception as e:
logger.error(f"预处理蓉城易购订单出错: {e}")
return None
def process_rongcheng_yigou(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
"""
兼容性方法:处理蓉城易购订单并执行后续转换
"""
cleaned_path = self.preprocess_rongcheng_yigou(src_path, progress_cb)
if cleaned_path:
from app.services.order_service import OrderService
order_service = OrderService(self.config_manager)
return order_service.process_excel(cleaned_path, progress_cb=lambda p: progress_cb(60 + int(p*0.4), "生成采购单中...") if progress_cb else None)
return None
+336
View File
@@ -0,0 +1,336 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
烟草公司订单处理服务
----------------
处理烟草公司特定格式的订单明细文件,生成银豹采购单
"""
import os
import glob
import datetime
import pandas as pd
import xlrd
import xlwt
import re
from xlutils.copy import copy
from openpyxl import load_workbook
from typing import Optional, Dict, Any, List, Tuple
from app.core.utils.log_utils import get_logger
from app.core.utils.string_utils import parse_monetary_string
from app.core.utils.dialog_utils import show_custom_dialog # 导入自定义弹窗工具
from ..config.settings import ConfigManager
logger = get_logger(__name__)
class TobaccoService:
"""烟草公司订单处理服务"""
def __init__(self, config: Dict[str, Any]):
"""
初始化服务
Args:
config: 配置信息
"""
self.config = config
# 修复配置获取方式,使用fallback机制
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
self.template_file = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
# 将烟草订单保存到result目录
result_dir = "data/result"
os.makedirs(result_dir, exist_ok=True)
self.output_file = os.path.join(result_dir, '银豹采购单_烟草公司.xls')
def get_latest_tobacco_order(self) -> Optional[str]:
"""
获取最新的烟草订单明细文件
Returns:
文件路径或None
"""
# 获取今日开始时间戳
today = datetime.date.today()
today_start = datetime.datetime.combine(today, datetime.time.min).timestamp()
# 查找订单明细文件
file_pattern = os.path.join(self.output_dir, "订单明细*.xlsx")
candidates = glob.glob(file_pattern)
if not candidates:
logger.warning("未找到烟草公司订单明细文件")
return None
# 按创建时间排序
candidates.sort(key=os.path.getctime, reverse=True)
latest_file = candidates[0]
# 检查是否是今天的文件
if os.path.getctime(latest_file) >= today_start:
logger.info(f"找到最新烟草订单明细文件: {latest_file}")
return latest_file
else:
logger.warning(f"找到的烟草订单明细文件不是今天创建的: {latest_file}")
return latest_file # 仍然返回最新文件,但给出警告
def preprocess_tobacco_order(self, file_path: str) -> Optional[str]:
"""
烟草订单预处理:按用户提供的 B, E, G, H 列索引进行强制清洗
"""
try:
logger.info(f"执行烟草订单专用预处理: {file_path}")
from app.core.utils.file_utils import smart_read_excel
# 烟草格式:Row 0是专卖证号,Row 1是表头,Row 2是合计,Row 3开始是数据
df_raw = smart_read_excel(file_path, header=None)
if len(df_raw) <= 3:
logger.error("烟草订单文件数据行数不足")
return None
# 提取数据部分 (Row 3开始)
df_data = df_raw.iloc[3:].reset_index(drop=True)
# 用户指定列映射:
# A列 (Index 0) -> 商品名称
# B列 (Index 1) -> 商品条码 (盒码)
# E列 (Index 4) -> 批发价 (单价)
# G列 (Index 6) -> 订单量 (数量)
# H列 (Index 7) -> 金额
idx_map = {
0: '商品名称',
1: '商品条码',
4: '批发价',
6: '数量',
7: '金额'
}
available_indices = [i for i in idx_map.keys() if i < df_data.shape[1]]
df = df_data.iloc[:, available_indices].copy()
df.columns = [idx_map[i] for i in available_indices]
# 1. 过滤订单量不为0的数据
df['数量'] = pd.to_numeric(df['数量'], errors='coerce').fillna(0)
df = df[df['数量'] != 0].copy()
if df.empty:
logger.warning("烟草订单无有效订单量记录")
return None
# 2. 核心清洗逻辑:
# 数量 = 订单量 * 10 (G列)
# 单价 = 批发价 / 10 (E列)
df['单价'] = pd.to_numeric(df['批发价'], errors='coerce').fillna(0) / 10
df['数量'] = df['数量'] * 10
# 3. 校验金额 (H列)
df['金额'] = pd.to_numeric(df['金额'], errors='coerce').fillna(0)
# 4. 只保留需要的列
final_cols = ['商品条码', '商品名称', '数量', '单价', '金额']
df_final = df[final_cols].copy()
# 保存预处理文件
out_dir = os.path.dirname(file_path)
base = os.path.basename(file_path)
final_path = os.path.join(out_dir, f"预处理之后_{base}")
df_final.to_excel(final_path, index=False)
logger.info(f"烟草订单预处理完成: {final_path}")
return final_path
except Exception as e:
logger.error(f"烟草订单预处理失败: {e}")
return None
def process_tobacco_order(self, input_file=None):
"""
处理烟草订单
Args:
input_file: 输入文件路径,如果为None则自动查找最新文件
Returns:
输出文件路径或None(如果处理失败)
"""
try:
# 如果没有指定输入文件,查找最新的文件
if input_file is None:
input_file = self.get_latest_tobacco_order()
if input_file is None:
logger.warning("未找到烟草公司订单明细文件")
logger.error("未找到可处理的烟草订单明细文件")
return None
logger.info(f"开始处理烟草公司订单: {input_file}")
# 读取订单时间和总金额
order_info = self._read_order_info(input_file)
if not order_info:
logger.error(f"读取订单信息失败: {input_file}")
return None
order_time, total_amount = order_info
# 读取订单数据
order_data = self._read_order_data(input_file)
if order_data is None or order_data.empty:
logger.error(f"读取订单数据失败: {input_file}")
return None
# 生成银豹采购单
output_file = self._generate_pospal_order(order_data, order_time)
if not output_file:
logger.error("生成银豹采购单失败")
return None
# 获取处理条目数
total_count = len(order_data)
# 输出处理结果
logger.info(f"烟草公司订单处理成功,订单时间: {order_time}, 总金额: {total_amount}, 处理条目: {total_count}")
logger.info(f"采购单已生成: {output_file}")
# 显示处理结果对话框
self.show_result_dialog(output_file, order_time, total_count, total_amount)
return output_file
except Exception as e:
logger.error(f"处理烟草公司订单时发生错误: {e}", exc_info=True)
return None
def _read_order_info(self, file_path: str) -> Optional[Tuple[str, float]]:
"""
读取订单信息(时间和总金额)
Args:
file_path: 文件路径
Returns:
包含订单时间和总金额的元组或None
"""
try:
wb_info = load_workbook(file_path, data_only=True)
ws_info = wb_info.active
order_time = ws_info["H1"].value or "(空)"
total_amount = ws_info["H3"].value or 0
return (order_time, total_amount)
except Exception as e:
logger.error(f"读取订单信息出错: {e}")
return None
def _read_order_data(self, file_path: str) -> Optional[pd.DataFrame]:
"""
读取订单数据
Args:
file_path: 文件路径
Returns:
订单数据DataFrame或None
"""
columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
try:
from app.core.utils.file_utils import smart_read_excel
# 读取Excel文件
df_old = smart_read_excel(file_path, header=None, skiprows=3, names=columns)
# 过滤订单量不为0的数据,并计算采购量和单价
df_filtered = df_old[df_old['订单量'] != 0].copy()
df_filtered['采购量'] = df_filtered['订单量'] * 10
df_filtered['采购单价'] = df_filtered['金额'] / df_filtered['采购量']
df_filtered = df_filtered.reset_index(drop=True)
return df_filtered
except Exception as e:
logger.error(f"读取订单数据失败: {e}")
return None
def _generate_pospal_order(self, order_data: pd.DataFrame, order_time: str) -> Optional[str]:
"""
生成银豹采购单
Args:
order_data: 订单数据
order_time: 订单时间
Returns:
输出文件路径或None
"""
try:
# 检查模板文件是否存在
if not os.path.exists(self.template_file):
logger.error(f"采购单模板文件不存在: {self.template_file}")
return None
# 打开模板,准备写入
template_rd = xlrd.open_workbook(self.template_file, formatting_info=True)
template_wb = copy(template_rd)
template_ws = template_wb.get_sheet(0)
# 获取模板中的表头列索引
header_row = template_rd.sheet_by_index(0).row_values(0)
barcode_col = header_row.index("条码(必填)")
amount_col = header_row.index("采购量(必填)")
gift_col = header_row.index("赠送量")
price_col = header_row.index("采购单价(必填)")
# 写入数据到模板
for i, row in order_data.iterrows():
template_ws.write(i + 1, barcode_col, row['盒码']) # 商品条码
template_ws.write(i + 1, amount_col, int(row['采购量'])) # 采购量
template_ws.write(i + 1, gift_col, "") # 赠送量为空
template_ws.write(i + 1, price_col, round(row['采购单价'], 2)) # 采购单价保留两位小数
# 确保输出目录存在
os.makedirs(os.path.dirname(self.output_file), exist_ok=True)
# 保存输出文件
template_wb.save(self.output_file)
logger.info(f"采购单生成成功: {self.output_file}")
return self.output_file
except Exception as e:
logger.error(f"生成银豹采购单失败: {e}")
return None
def show_result_dialog(self, output_file, order_time, total_count, total_amount):
"""
显示处理结果对话框
Args:
output_file: 输出文件路径
order_time: 订单时间
total_count: 总处理条目
total_amount: 总金额
"""
# 创建附加信息
additional_info = {
"订单来源": "烟草公司",
"处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# 确保 total_amount 是数字类型
parsed = parse_monetary_string(total_amount)
total_amount = parsed if parsed is not None else 0.0
amount_display = f"¥{total_amount:.2f}"
# 显示自定义对话框
show_custom_dialog(
title="烟草订单处理结果",
message="烟草订单处理完成",
result_file=output_file,
time_info=order_time,
count_info=f"{total_count}个商品",
amount_info=amount_display,
additional_info=additional_info
)
# 记录日志
logger.info(f"烟草公司订单处理成功,订单时间: {order_time}, 总金额: {total_amount}, 处理条目: {total_count}")