## v1.5.3 (2024-03-21)

- 优化了完整流程处理逻辑:
  - 修改了OCR处理逻辑,当遇到已处理的图片时自动跳过并继续执行
  - 改进了错误处理,避免因图片已处理而中断流程
  - 优化了日志提示信息,提供更清晰的处理状态反馈
- 改进了OCRService的process_image方法:
  - 添加了文件存在性检查
  - 添加了文件类型验证
  - 添加了已处理文件检查
  - 优化了错误处理和日志记录
This commit is contained in:
2025-05-10 12:58:28 +08:00
parent 201aac35e6
commit 4a8169ff63
35 changed files with 497 additions and 145 deletions
Binary file not shown.
+39 -12
View File
@@ -5,6 +5,7 @@ OCR服务模块
"""
from typing import Dict, List, Optional, Tuple, Union, Any
import os
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger
@@ -43,23 +44,49 @@ class OCRService:
def process_image(self, image_path: str) -> Optional[str]:
"""
处理单图片
处理单图片文件
Args:
image_path: 图片路径
image_path: 图片文件路径
Returns:
输出Excel文件路径,如果处理失败则返回None
生成的Excel文件路径,如果处理失败则返回None
"""
logger.info(f"OCRService开始处理图片: {image_path}")
result = self.ocr_processor.process_image(image_path)
if result:
logger.info(f"OCRService处理图片成功: {image_path} -> {result}")
else:
logger.error(f"OCRService处理图片失败: {image_path}")
return result
try:
# 检查文件是否存在
if not os.path.exists(image_path):
logger.error(f"文件不存在: {image_path}")
return None
# 检查文件类型
if not self._is_valid_image(image_path):
logger.error(f"不支持的文件类型: {image_path}")
return None
# 检查是否已处理
excel_file = self._get_excel_path(image_path)
if os.path.exists(excel_file):
logger.info(f"文件已处理过,跳过OCR识别: {image_path}")
return excel_file
# 执行OCR识别
result = self.ocr_processor.process_image(image_path)
if not result:
logger.error(f"OCR识别失败: {image_path}")
return None
# 生成Excel文件
excel_file = self._generate_excel(result, image_path)
if not excel_file:
logger.error(f"生成Excel文件失败: {image_path}")
return None
logger.info(f"处理完成: {image_path} -> {excel_file}")
return excel_file
except Exception as e:
logger.error(f"处理图片时发生错误: {e}", exc_info=True)
return None
def process_images_batch(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
"""