orc-order-v2/app/services/ocr_service.py
2025-11-15 18:46:03 +08:00

194 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
OCR服务模块
---------
提供OCR识别服务协调OCR流程。
"""
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
import os
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
from ..core.ocr.table_ocr import OCRProcessor
logger = get_logger(__name__)
class OCRService:
"""
OCR识别服务协调OCR流程
"""
def __init__(self, config: Optional[ConfigManager] = None):
"""
初始化OCR服务
Args:
config: 配置管理器如果为None则创建新的
"""
logger.info("初始化OCRService")
self.config = config or ConfigManager()
# 创建OCR处理器
self.ocr_processor = OCRProcessor(self.config)
logger.info("OCRService初始化完成")
def get_unprocessed_images(self) -> List[str]:
"""
获取待处理的图片列表
Returns:
待处理图片路径列表
"""
return self.ocr_processor.get_unprocessed_images()
def process_image(self, image_path: str) -> Optional[str]:
"""
处理单个图片文件
Args:
image_path: 图片文件路径
Returns:
生成的Excel文件路径如果处理失败则返回None
"""
try:
# 检查文件是否存在
if not os.path.exists(image_path):
logger.error(f"文件不存在: {image_path}")
return None
# 检查文件类型
if not self._is_valid_image(image_path):
logger.error(f"不支持的文件类型: {image_path}")
return None
# 检查是否已处理
excel_file = self._get_excel_path(image_path)
if os.path.exists(excel_file):
logger.info(f"文件已处理过跳过OCR识别: {image_path}")
return excel_file
# 执行OCR识别
result = self.ocr_processor.process_image(image_path)
if not result:
logger.error(f"OCR识别失败: {image_path}")
return None
# 生成Excel文件
excel_file = self._generate_excel(result, image_path)
if not excel_file:
logger.error(f"生成Excel文件失败: {image_path}")
return None
logger.info(f"处理完成: {image_path} -> {excel_file}")
return excel_file
except Exception as e:
logger.error(f"处理图片时发生错误: {e}", exc_info=True)
return None
def process_images_batch(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService开始批量处理图片, batch_size={batch_size}, max_workers={max_workers}")
return self.ocr_processor.process_images_batch(batch_size, max_workers, progress_cb)
# 添加batch_process作为process_images_batch的别名确保兼容性
def batch_process(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
"""
批量处理图片别名方法与process_images_batch功能相同
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService.batch_process被调用转发到process_images_batch")
return self.process_images_batch(batch_size, max_workers, progress_cb)
def validate_image(self, image_path: str) -> bool:
"""
验证图片是否有效
Args:
image_path: 图片路径
Returns:
图片是否有效
"""
return self.ocr_processor.validate_image(image_path)
def _is_valid_image(self, image_path: str) -> bool:
"""
检查文件是否为有效的图片格式
Args:
image_path: 图片文件路径
Returns:
是否为有效图片格式
"""
return self.validate_image(image_path)
def _get_excel_path(self, image_path: str) -> str:
"""
根据图片路径生成对应的Excel文件路径
Args:
image_path: 图片文件路径
Returns:
Excel文件路径
"""
# 获取文件名(不含扩展名)
base_name = os.path.splitext(os.path.basename(image_path))[0]
# 生成Excel文件路径
output_dir = self.config.get('Paths', 'output_folder', fallback='data/output')
excel_path = os.path.join(output_dir, f"{base_name}.xlsx")
return excel_path
def _generate_excel(self, ocr_result: dict, image_path: str) -> Optional[str]:
"""
根据OCR结果生成Excel文件
Args:
ocr_result: OCR识别结果
image_path: 原始图片路径
Returns:
生成的Excel文件路径失败返回None
"""
try:
excel_path = self._get_excel_path(image_path)
# 确保输出目录存在
os.makedirs(os.path.dirname(excel_path), exist_ok=True)
# 调用OCR处理器的Excel生成功能
if hasattr(self.ocr_processor, 'generate_excel'):
success = self.ocr_processor.generate_excel(ocr_result, excel_path)
if success:
return excel_path
else:
# 如果OCR处理器没有generate_excel方法直接返回路径
# 假设OCR处理器已经生成了Excel文件
if os.path.exists(excel_path):
return excel_path
return None
except Exception as e:
logger.error(f"生成Excel文件时发生错误: {e}", exc_info=True)
return None