orc-order-v2/app/services/ocr_service.py

103 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
OCR服务模块
---------
提供OCR识别服务协调OCR流程。
"""
from typing import Dict, List, Optional, Tuple, Union, Any
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
from ..core.ocr.table_ocr import OCRProcessor
logger = get_logger(__name__)
class OCRService:
"""
OCR识别服务协调OCR流程
"""
def __init__(self, config: Optional[ConfigManager] = None):
"""
初始化OCR服务
Args:
config: 配置管理器如果为None则创建新的
"""
logger.info("初始化OCRService")
self.config = config or ConfigManager()
# 创建OCR处理器
self.ocr_processor = OCRProcessor(self.config)
logger.info("OCRService初始化完成")
def get_unprocessed_images(self) -> List[str]:
"""
获取待处理的图片列表
Returns:
待处理图片路径列表
"""
return self.ocr_processor.get_unprocessed_images()
def process_image(self, image_path: str) -> Optional[str]:
"""
处理单张图片
Args:
image_path: 图片路径
Returns:
输出Excel文件路径如果处理失败则返回None
"""
logger.info(f"OCRService开始处理图片: {image_path}")
result = self.ocr_processor.process_image(image_path)
if result:
logger.info(f"OCRService处理图片成功: {image_path} -> {result}")
else:
logger.error(f"OCRService处理图片失败: {image_path}")
return result
def process_images_batch(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
"""
批量处理图片
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService开始批量处理图片, batch_size={batch_size}, max_workers={max_workers}")
return self.ocr_processor.process_images_batch(batch_size, max_workers)
# 添加batch_process作为process_images_batch的别名确保兼容性
def batch_process(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
"""
批量处理图片别名方法与process_images_batch功能相同
Args:
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
(总处理数, 成功处理数)元组
"""
logger.info(f"OCRService.batch_process被调用转发到process_images_batch")
return self.process_images_batch(batch_size, max_workers)
def validate_image(self, image_path: str) -> bool:
"""
验证图片是否有效
Args:
image_path: 图片路径
Returns:
图片是否有效
"""
return self.ocr_processor.validate_image(image_path)