## v1.5.3 (2024-03-21)

- 优化了完整流程处理逻辑： - 修改了OCR处理逻辑，当遇到已处理的图片时自动跳过并继续执行 - 改进了错误处理，避免因图片已处理而中断流程 - 优化了日志提示信息，提供更清晰的处理状态反馈 - 改进了OCRService的process_image方法： - 添加了文件存在性检查 - 添加了文件类型验证 - 添加了已处理文件检查 - 优化了错误处理和日志记录
2025-05-10 12:58:28 +08:00
parent 201aac35e6
commit 4a8169ff63
35 changed files with 497 additions and 145 deletions
@@ -5,6 +5,7 @@ OCR服务模块
 """

 from typing import Dict, List, Optional, Tuple, Union, Any
+import os

 from ..config.settings import ConfigManager
 from ..core.utils.log_utils import get_logger
@@ -43,23 +44,49 @@ class OCRService:
    
    def process_image(self, image_path: str) -> Optional[str]:
        """
-        处理单张图片
+        处理单个图片文件
        
        Args:
-            image_path: 图片路径
+            image_path: 图片文件路径
            
        Returns:
-            输出Excel文件路径，如果处理失败则返回None
+            生成的Excel文件路径，如果处理失败则返回None
        """
-        logger.info(f"OCRService开始处理图片: {image_path}")
-        result = self.ocr_processor.process_image(image_path)
-        
-        if result:
-            logger.info(f"OCRService处理图片成功: {image_path} -> {result}")
-        else:
-            logger.error(f"OCRService处理图片失败: {image_path}")
-        
-        return result
+        try:
+            # 检查文件是否存在
+            if not os.path.exists(image_path):
+                logger.error(f"文件不存在: {image_path}")
+                return None
+                
+            # 检查文件类型
+            if not self._is_valid_image(image_path):
+                logger.error(f"不支持的文件类型: {image_path}")
+                return None
+                
+            # 检查是否已处理
+            excel_file = self._get_excel_path(image_path)
+            if os.path.exists(excel_file):
+                logger.info(f"文件已处理过，跳过OCR识别: {image_path}")
+                return excel_file
+                
+            # 执行OCR识别
+            result = self.ocr_processor.process_image(image_path)
+            if not result:
+                logger.error(f"OCR识别失败: {image_path}")
+                return None
+                
+            # 生成Excel文件
+            excel_file = self._generate_excel(result, image_path)
+            if not excel_file:
+                logger.error(f"生成Excel文件失败: {image_path}")
+                return None
+                
+            logger.info(f"处理完成: {image_path} -> {excel_file}")
+            return excel_file
+            
+        except Exception as e:
+            logger.error(f"处理图片时发生错误: {e}", exc_info=True)
+            return None
    
    def process_images_batch(self, batch_size: int = None, max_workers: int = None) -> Tuple[int, int]:
        """