orc-order-v2/app/cli/ocr_cli.py

164 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
OCR命令行工具
----------
提供OCR识别相关的命令行接口。
"""
import os
import sys
import argparse
from typing import List, Optional
from ..config.settings import ConfigManager
from ..core.utils.log_utils import get_logger, close_logger
from ..services.ocr_service import OCRService
logger = get_logger(__name__)
def create_parser() -> argparse.ArgumentParser:
"""
创建命令行参数解析器
Returns:
参数解析器
"""
parser = argparse.ArgumentParser(description='OCR识别工具')
# 通用选项
parser.add_argument('--config', type=str, help='配置文件路径')
# 子命令
subparsers = parser.add_subparsers(dest='command', help='子命令')
# 单文件处理命令
process_parser = subparsers.add_parser('process', help='处理单个文件')
process_parser.add_argument('--input', type=str, required=True, help='输入图片文件路径')
# 批量处理命令
batch_parser = subparsers.add_parser('batch', help='批量处理文件')
batch_parser.add_argument('--batch-size', type=int, help='批处理大小')
batch_parser.add_argument('--max-workers', type=int, help='最大线程数')
# 查看未处理文件命令
list_parser = subparsers.add_parser('list', help='列出未处理的文件')
return parser
def process_file(ocr_service: OCRService, input_file: str) -> bool:
"""
处理单个文件
Args:
ocr_service: OCR服务
input_file: 输入文件路径
Returns:
处理是否成功
"""
if not os.path.exists(input_file):
logger.error(f"输入文件不存在: {input_file}")
return False
if not ocr_service.validate_image(input_file):
logger.error(f"输入文件无效: {input_file}")
return False
result = ocr_service.process_image(input_file)
if result:
logger.info(f"处理成功,输出文件: {result}")
return True
else:
logger.error("处理失败")
return False
def process_batch(ocr_service: OCRService, batch_size: Optional[int] = None, max_workers: Optional[int] = None) -> bool:
"""
批量处理文件
Args:
ocr_service: OCR服务
batch_size: 批处理大小
max_workers: 最大线程数
Returns:
处理是否成功
"""
total, success = ocr_service.process_images_batch(batch_size, max_workers)
if total == 0:
logger.warning("没有找到需要处理的文件")
return False
logger.info(f"批量处理完成,总计: {total},成功: {success}")
return success > 0
def list_unprocessed(ocr_service: OCRService) -> bool:
"""
列出未处理的文件
Args:
ocr_service: OCR服务
Returns:
是否有未处理的文件
"""
files = ocr_service.get_unprocessed_images()
if not files:
logger.info("没有未处理的文件")
return False
logger.info(f"未处理的文件 ({len(files)}):")
for file in files:
logger.info(f" {file}")
return True
def main(args: Optional[List[str]] = None) -> int:
"""
OCR命令行主函数
Args:
args: 命令行参数如果为None则使用sys.argv
Returns:
退出状态码
"""
parser = create_parser()
parsed_args = parser.parse_args(args)
if parsed_args.command is None:
parser.print_help()
return 1
try:
# 创建配置管理器
config = ConfigManager(parsed_args.config) if parsed_args.config else ConfigManager()
# 创建OCR服务
ocr_service = OCRService(config)
# 根据命令执行不同功能
if parsed_args.command == 'process':
success = process_file(ocr_service, parsed_args.input)
elif parsed_args.command == 'batch':
success = process_batch(ocr_service, parsed_args.batch_size, parsed_args.max_workers)
elif parsed_args.command == 'list':
success = list_unprocessed(ocr_service)
else:
parser.print_help()
return 1
return 0 if success else 1
except Exception as e:
logger.error(f"执行过程中发生错误: {e}")
return 1
finally:
# 关闭日志
close_logger(__name__)
if __name__ == '__main__':
sys.exit(main())