新版本

This commit is contained in:
2025-11-15 18:46:03 +08:00
parent 9f97ac3f21
commit 73d17836d7
68 changed files with 49834 additions and 69055 deletions
+6 -6
View File
@@ -4,7 +4,7 @@ OCR服务模块
提供OCR识别服务,协调OCR流程。
"""
from typing import Dict, List, Optional, Tuple, Union, Any
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
import os
from ..config.settings import ConfigManager
@@ -88,7 +88,7 @@ class OCRService:
logger.error(f"处理图片时发生错误: {e}", exc_info=True)
return None
def process_images_batch(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
def process_images_batch(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片
@@ -100,10 +100,10 @@ class OCRService:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService开始批量处理图片, batch_size={batch_size}, max_workers={max_workers}")
return self.ocr_processor.process_images_batch(batch_size, max_workers)
return self.ocr_processor.process_images_batch(batch_size, max_workers, progress_cb)
# 添加batch_process作为process_images_batch的别名,确保兼容性
def batch_process(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
def batch_process(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片(别名方法,与process_images_batch功能相同)
@@ -115,7 +115,7 @@ class OCRService:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService.batch_process被调用,转发到process_images_batch")
return self.process_images_batch(batch_size, max_workers)
return self.process_images_batch(batch_size, max_workers, progress_cb)
def validate_image(self, image_path: str) -> bool:
"""
@@ -190,4 +190,4 @@ class OCRService:
except Exception as e:
logger.error(f"生成Excel文件时发生错误: {e}", exc_info=True)
return None
return None
+10 -10
View File
@@ -4,7 +4,7 @@
提供订单处理服务,协调Excel处理和订单合并流程。
"""
from typing import Dict, List, Optional, Tuple, Union, Any
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
@@ -43,7 +43,7 @@ class OrderService:
"""
return self.excel_processor.get_latest_excel()
def process_excel(self, file_path: Optional[str] = None) -> Optional[str]:
def process_excel(self, file_path: Optional[str] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
处理Excel文件,生成采购单
@@ -55,10 +55,10 @@ class OrderService:
"""
if file_path:
logger.info(f"OrderService开始处理指定Excel文件: {file_path}")
return self.excel_processor.process_specific_file(file_path)
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
else:
logger.info("OrderService开始处理最新Excel文件")
return self.excel_processor.process_latest_file()
return self.excel_processor.process_latest_file(progress_cb=progress_cb)
def get_purchase_orders(self) -> List[str]:
"""
@@ -69,7 +69,7 @@ class OrderService:
"""
return self.order_merger.get_purchase_orders()
def merge_purchase_orders(self, file_paths: List[str]) -> Optional[str]:
def merge_purchase_orders(self, file_paths: List[str], progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并指定的采购单文件
@@ -80,9 +80,9 @@ class OrderService:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
return self.merge_orders(file_paths)
return self.merge_orders(file_paths, progress_cb)
def merge_all_purchase_orders(self) -> Optional[str]:
def merge_all_purchase_orders(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并所有可用的采购单文件
@@ -90,9 +90,9 @@ class OrderService:
合并后的采购单文件路径,如果合并失败则返回None
"""
logger.info("OrderService开始合并所有采购单")
return self.merge_orders(None)
return self.merge_orders(None, progress_cb)
def merge_orders(self, file_paths: Optional[List[str]] = None) -> Optional[str]:
def merge_orders(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
"""
合并采购单
@@ -107,4 +107,4 @@ class OrderService:
else:
logger.info("OrderService开始合并所有采购单")
return self.order_merger.process(file_paths)
return self.order_merger.process(file_paths, progress_cb)
+297
View File
@@ -0,0 +1,297 @@
"""
处理器调度服务
负责管理和调度各种文件处理器,实现智能文件类型检测和处理器选择
"""
import logging
from typing import Dict, Any, Optional, List
from pathlib import Path
from ..core.processors.base import BaseProcessor
from ..core.processors.tobacco_processor import TobaccoProcessor
from ..core.processors.ocr_processor import OCRProcessor
from ..core.utils.log_utils import get_logger
logger = get_logger(__name__)
class ProcessorService:
"""处理器调度服务
负责管理所有处理器实例,提供统一的文件处理接口
"""
def __init__(self, config: Dict[str, Any]):
"""初始化处理器服务
Args:
config: 系统配置字典
"""
self.config = config
self.processors: List[BaseProcessor] = []
self._load_processors()
logger.info(f"处理器服务初始化完成,加载了{len(self.processors)}个处理器")
def _load_processors(self):
"""加载所有处理器"""
try:
self.processors = [
TobaccoProcessor(self.config),
OCRProcessor(self.config),
]
supplier_configs = []
try:
import json
from pathlib import Path
# 优先从`config/suppliers_config.json`加载
config_path = Path("config/suppliers_config.json")
if not config_path.exists():
# 兼容其它路径
config_path = Path("./suppliers_config.json")
if config_path.exists():
with open(config_path, 'r', encoding='utf-8') as f:
data = json.load(f)
ok, errs, supplier_configs = self._validate_suppliers_config(data)
if not ok:
logger.error("供应商配置校验失败:\n" + "\n".join([f"- {e}" for e in errs]))
else:
logger.info(f"{config_path} 加载供应商配置,共 {len(supplier_configs)}")
else:
logger.info("未找到供应商配置文件,跳过供应商处理器加载")
except Exception as e:
logger.error(f"读取供应商配置失败: {e}")
for supplier_config in supplier_configs:
try:
from ..core.processors.supplier_processors.generic_supplier_processor import GenericSupplierProcessor
processor = GenericSupplierProcessor(self.config, supplier_config)
self.processors.append(processor)
logger.info(f"加载供应商处理器: {processor.name}")
except Exception as e:
logger.error(f"加载供应商处理器失败: {e}")
logger.info(f"成功加载{len(self.processors)}个处理器")
except Exception as e:
logger.error(f"加载处理器时出错: {e}", exc_info=True)
self.processors = [
TobaccoProcessor(self.config),
OCRProcessor(self.config),
]
def _validate_suppliers_config(self, data):
try:
suppliers = data.get('suppliers')
errors = []
valid = []
if not isinstance(suppliers, list) or not suppliers:
errors.append('suppliers必须是非空数组')
return False, errors, []
for idx, s in enumerate(suppliers):
e = self._validate_single_supplier(s, idx)
if e:
errors.extend(e)
else:
valid.append(s)
return len(errors) == 0, errors, valid
except Exception as e:
return False, [f'配置解析异常: {e}'], []
def _validate_single_supplier(self, s, idx):
errs = []
prefix = f'suppliers[{idx}]'
name = s.get('name')
if not name or not isinstance(name, str):
errs.append(f'{prefix}.name 必须为字符串')
fp = s.get('filename_patterns', [])
ci = s.get('content_indicators', [])
if not fp and not ci:
errs.append(f'{prefix} 必须至少提供 filename_patterns 或 content_indicators 之一')
cm = s.get('column_mapping', {})
if cm and not isinstance(cm, dict):
errs.append(f'{prefix}.column_mapping 必须为对象')
cr = s.get('cleaning_rules', [])
if cr and not isinstance(cr, list):
errs.append(f'{prefix}.cleaning_rules 必须为数组')
else:
for i, rule in enumerate(cr):
rtype = rule.get('type')
if rtype not in ('remove_rows','fill_na','convert_type'):
errs.append(f'{prefix}.cleaning_rules[{i}].type 非法: {rtype}')
if rtype == 'remove_rows' and not rule.get('condition'):
errs.append(f'{prefix}.cleaning_rules[{i}].condition 必填')
if rtype in ('fill_na','convert_type'):
if not rule.get('columns') and not rule.get('column'):
errs.append(f'{prefix}.cleaning_rules[{i}] 需提供 columns 或 column')
calc = s.get('calculations', [])
if calc and not isinstance(calc, list):
errs.append(f'{prefix}.calculations 必须为数组')
else:
for i, c in enumerate(calc):
ctype = c.get('type')
if ctype not in ('multiply','divide','formula'):
errs.append(f'{prefix}.calculations[{i}].type 非法: {ctype}')
if ctype in ('multiply','divide'):
if not c.get('source_column') or not c.get('target_column'):
errs.append(f'{prefix}.calculations[{i}] 需提供 source_column 与 target_column')
if ctype == 'formula' and (not c.get('formula') or not c.get('target_column')):
errs.append(f'{prefix}.calculations[{i}] 需提供 formula 与 target_column')
return errs
def process_file(self, input_file: Path, output_dir: Path,
preferred_processor: Optional[str] = None) -> Optional[Path]:
"""处理文件 - 自动选择合适的处理器
Args:
input_file: 输入文件路径
output_dir: 输出目录路径
preferred_processor: 优先使用的处理器名称(可选)
Returns:
输出文件路径,处理失败返回None
"""
if not input_file.exists():
logger.error(f"输入文件不存在: {input_file}")
return None
if not output_dir.exists():
output_dir.mkdir(parents=True, exist_ok=True)
try:
# 如果指定了优先处理器,先尝试使用它
if preferred_processor:
processor = self._get_processor_by_name(preferred_processor)
if processor and processor.can_process(input_file):
logger.info(f"使用指定的处理器: {processor.name}")
return processor.process(input_file, output_dir)
else:
logger.warning(f"指定的处理器不可用或无法处理该文件: {preferred_processor}")
# 自动选择合适的处理器
suitable_processors = [p for p in self.processors if p.can_process(input_file)]
if not suitable_processors:
logger.warning(f"未找到适合处理文件的处理器: {input_file}")
logger.info(f"支持的文件类型: {self.get_supported_types()}")
return None
# 使用第一个合适的处理器
processor = suitable_processors[0]
logger.info(f"使用处理器 {processor.name} 处理文件: {input_file}")
return processor.process(input_file, output_dir)
except Exception as e:
logger.error(f"处理文件时出错: {e}", exc_info=True)
return None
def _get_processor_by_name(self, name: str) -> Optional[BaseProcessor]:
"""根据名称获取处理器
Args:
name: 处理器名称
Returns:
处理器实例或None
"""
for processor in self.processors:
if processor.name == name or processor.__class__.__name__ == name:
return processor
return None
def get_supported_types(self) -> List[Dict[str, Any]]:
"""获取支持的文件类型信息
Returns:
处理器类型信息列表
"""
return [
{
'name': processor.name,
'description': processor.description,
'extensions': processor.get_supported_extensions(),
'class_name': processor.__class__.__name__
}
for processor in self.processors
]
def get_processor_info(self) -> List[Dict[str, Any]]:
"""获取处理器详细信息
Returns:
处理器详细信息列表
"""
return [
{
'name': processor.name,
'description': processor.description,
'extensions': processor.get_supported_extensions(),
'required_columns': processor.get_required_columns(),
'class_name': processor.__class__.__name__,
'module': processor.__class__.__module__
}
for processor in self.processors
]
def can_process_file(self, file_path: Path) -> bool:
"""检查是否有处理器能处理该文件
Args:
file_path: 文件路径
Returns:
是否有处理器能处理
"""
if not file_path.exists():
return False
return any(processor.can_process(file_path) for processor in self.processors)
def get_suitable_processors(self, file_path: Path) -> List[BaseProcessor]:
"""获取能处理该文件的所有处理器
Args:
file_path: 文件路径
Returns:
合适的处理器列表
"""
if not file_path.exists():
return []
return [p for p in self.processors if p.can_process(file_path)]
def reload_processors(self):
"""重新加载处理器"""
logger.info("重新加载处理器...")
self.processors.clear()
self._load_processors()
logger.info(f"重新加载完成,共{len(self.processors)}个处理器")
def add_processor(self, processor: BaseProcessor):
"""添加处理器
Args:
processor: 处理器实例
"""
self.processors.append(processor)
logger.info(f"添加处理器: {processor.name}")
def remove_processor(self, processor_name: str) -> bool:
"""移除处理器
Args:
processor_name: 处理器名称
Returns:
是否成功移除
"""
for i, processor in enumerate(self.processors):
if processor.name == processor_name or processor.__class__.__name__ == processor_name:
del self.processors[i]
logger.info(f"移除处理器: {processor_name}")
return True
logger.warning(f"未找到要移除的处理器: {processor_name}")
return False