feat: 益选 OCR 订单处理系统初始提交
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用) - 百度 OCR 表格识别集成 - 规则引擎(列映射/数据清洗/单位转换/规格推断) - 条码映射管理与云端同步(Gitea REST API) - 云端同步支持:条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理(图片→OCR→Excel→合并) - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
OCR订单处理系统
|
||||
---------------
|
||||
用于自动识别和处理Excel格式的订单文件的系统。
|
||||
支持多种格式的订单处理,包括普通订单和赠品订单的处理。
|
||||
"""
|
||||
|
||||
__version__ = '2.0.0'
|
||||
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - 配置模块
|
||||
------------------------
|
||||
负责管理系统配置,包括API密钥、路径和处理选项。
|
||||
"""
|
||||
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
默认配置
|
||||
-------
|
||||
包含系统的默认配置值。
|
||||
"""
|
||||
|
||||
# 默认配置
|
||||
DEFAULT_CONFIG = {
|
||||
'API': {
|
||||
'api_key': '', # 将从配置文件中读取
|
||||
'secret_key': '', # 将从配置文件中读取
|
||||
'timeout': '30',
|
||||
'max_retries': '3',
|
||||
'retry_delay': '2',
|
||||
'api_url': 'https://aip.baidubce.com/rest/2.0/ocr/v1/table',
|
||||
'token_url': 'https://aip.baidubce.com/oauth/2.0/token',
|
||||
'form_ocr_url': 'https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/get_request_result'
|
||||
},
|
||||
'Paths': {
|
||||
'input_folder': 'data/input',
|
||||
'output_folder': 'data/output',
|
||||
'temp_folder': 'data/temp',
|
||||
'template_folder': 'templates',
|
||||
'template_file': '银豹-采购单模板.xls',
|
||||
'processed_record': 'data/processed_files.json',
|
||||
'data_dir': 'data',
|
||||
'product_db': 'data/product_cache.db'
|
||||
},
|
||||
'Performance': {
|
||||
'max_workers': '4',
|
||||
'batch_size': '5',
|
||||
'skip_existing': 'true'
|
||||
},
|
||||
'File': {
|
||||
'allowed_extensions': '.jpg,.jpeg,.png,.bmp',
|
||||
'excel_extension': '.xlsx',
|
||||
'max_file_size_mb': '4'
|
||||
},
|
||||
'Templates': {
|
||||
'purchase_order': '银豹-采购单模板.xls',
|
||||
'item_data': '商品资料.xlsx'
|
||||
},
|
||||
'Gitea': {
|
||||
'base_url': 'https://gitea.94kan.cn',
|
||||
'owner': 'houhuan',
|
||||
'repo': 'yixuan-sync-data',
|
||||
'token': ''
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
配置管理模块
|
||||
-----------
|
||||
提供统一的配置加载、访问和保存功能。
|
||||
"""
|
||||
|
||||
import os
|
||||
import configparser
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from ..core.utils.log_utils import get_logger
|
||||
from .defaults import DEFAULT_CONFIG
|
||||
|
||||
# 加载 .env 文件
|
||||
load_dotenv()
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class ConfigManager:
|
||||
"""
|
||||
配置管理类,负责加载和保存配置
|
||||
单例模式确保全局只有一个配置实例
|
||||
"""
|
||||
_instance = None
|
||||
|
||||
def __new__(cls, config_file=None):
|
||||
"""单例模式实现"""
|
||||
if cls._instance is None:
|
||||
cls._instance = super(ConfigManager, cls).__new__(cls)
|
||||
cls._instance._init(config_file)
|
||||
return cls._instance
|
||||
|
||||
def _init(self, config_file):
|
||||
"""初始化配置管理器"""
|
||||
self.config_file = config_file or 'config.ini'
|
||||
self.config = configparser.ConfigParser()
|
||||
self.load_config()
|
||||
|
||||
def load_config(self) -> None:
|
||||
"""
|
||||
加载配置文件,如果不存在则创建默认配置
|
||||
API 密钥优先从环境变量 (.env) 读取
|
||||
"""
|
||||
if not os.path.exists(self.config_file):
|
||||
self.create_default_config()
|
||||
else:
|
||||
try:
|
||||
# 先读取现有配置
|
||||
self.config.read(self.config_file, encoding='utf-8')
|
||||
|
||||
# 检查是否有缺失的配置项,只添加缺失的项
|
||||
for section, options in DEFAULT_CONFIG.items():
|
||||
if not self.config.has_section(section):
|
||||
self.config.add_section(section)
|
||||
|
||||
for option, value in options.items():
|
||||
if not self.config.has_option(section, option):
|
||||
self.config.set(section, option, value)
|
||||
|
||||
# API 密钥优先从环境变量读取
|
||||
self._override_from_env()
|
||||
|
||||
# 保存更新后的配置
|
||||
self.save_config()
|
||||
logger.info(f"已加载并更新配置文件: {self.config_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"加载配置文件时出错: {e}")
|
||||
logger.info("使用默认配置")
|
||||
self.create_default_config(save=False)
|
||||
|
||||
def _override_from_env(self) -> None:
|
||||
"""从环境变量覆盖敏感配置"""
|
||||
env_mapping = {
|
||||
('API', 'api_key'): 'BAIDU_API_KEY',
|
||||
('API', 'secret_key'): 'BAIDU_SECRET_KEY',
|
||||
('Gitea', 'token'): 'GITEA_TOKEN',
|
||||
}
|
||||
for (section, option), env_key in env_mapping.items():
|
||||
env_val = os.getenv(env_key, '').strip()
|
||||
if env_val:
|
||||
self.config.set(section, option, env_val)
|
||||
|
||||
def create_default_config(self, save: bool = True) -> None:
|
||||
"""创建默认配置"""
|
||||
for section, options in DEFAULT_CONFIG.items():
|
||||
if not self.config.has_section(section):
|
||||
self.config.add_section(section)
|
||||
|
||||
for option, value in options.items():
|
||||
self.config.set(section, option, value)
|
||||
|
||||
if save:
|
||||
self.save_config()
|
||||
logger.info(f"已创建默认配置文件: {self.config_file}")
|
||||
|
||||
def save_config(self) -> None:
|
||||
"""保存配置到文件(API 密钥不写入文件)"""
|
||||
try:
|
||||
# 保存前临时清空 API 密钥,避免写入文件
|
||||
saved_keys = {}
|
||||
for option in ('api_key', 'secret_key'):
|
||||
saved_keys[option] = self.config.get('API', option, fallback='')
|
||||
self.config.set('API', option, '')
|
||||
|
||||
with open(self.config_file, 'w', encoding='utf-8') as f:
|
||||
self.config.write(f)
|
||||
|
||||
# 恢复内存中的值
|
||||
for option, val in saved_keys.items():
|
||||
self.config.set('API', option, val)
|
||||
|
||||
logger.info(f"配置已保存到: {self.config_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"保存配置文件时出错: {e}")
|
||||
|
||||
def get(self, section: str, option: str, fallback: Any = None) -> Any:
|
||||
"""获取配置值"""
|
||||
return self.config.get(section, option, fallback=fallback)
|
||||
|
||||
def getint(self, section: str, option: str, fallback: int = 0) -> int:
|
||||
"""获取整数配置值"""
|
||||
return self.config.getint(section, option, fallback=fallback)
|
||||
|
||||
def getfloat(self, section: str, option: str, fallback: float = 0.0) -> float:
|
||||
"""获取浮点数配置值"""
|
||||
return self.config.getfloat(section, option, fallback=fallback)
|
||||
|
||||
def getboolean(self, section: str, option: str, fallback: bool = False) -> bool:
|
||||
"""获取布尔配置值"""
|
||||
return self.config.getboolean(section, option, fallback=fallback)
|
||||
|
||||
def get_list(self, section: str, option: str, fallback: str = "", delimiter: str = ",") -> List[str]:
|
||||
"""获取列表配置值(逗号分隔的字符串转为列表)"""
|
||||
value = self.get(section, option, fallback)
|
||||
return [item.strip() for item in value.split(delimiter) if item.strip()]
|
||||
|
||||
def update(self, section: str, option: str, value: Any) -> None:
|
||||
"""更新配置选项"""
|
||||
if not self.config.has_section(section):
|
||||
self.config.add_section(section)
|
||||
|
||||
self.config.set(section, option, str(value))
|
||||
logger.debug(f"更新配置: [{section}] {option} = {value}")
|
||||
|
||||
def get_path(self, section: str, option: str, fallback: str = "", create: bool = False) -> str:
|
||||
"""
|
||||
获取路径配置并确保它是一个有效的绝对路径
|
||||
如果create为True,则自动创建该目录
|
||||
"""
|
||||
from pathlib import Path
|
||||
path_str = self.get(section, option, fallback)
|
||||
path = Path(path_str)
|
||||
|
||||
if not path.is_absolute():
|
||||
# 相对路径,转为绝对路径(相对于项目根目录)
|
||||
path = Path(os.getcwd()) / path
|
||||
|
||||
if create:
|
||||
try:
|
||||
# 智能判断是文件还是目录
|
||||
# 如果有后缀名则认为是文件,创建其父目录
|
||||
if path.suffix:
|
||||
directory = path.parent
|
||||
if not directory.exists():
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"已创建父目录: {directory}")
|
||||
else:
|
||||
# 否则认为是目录路径
|
||||
if not path.exists():
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"已创建目录: {path}")
|
||||
except Exception as e:
|
||||
logger.error(f"创建目录失败: {path}, 错误: {e}")
|
||||
|
||||
return str(path.absolute())
|
||||
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
商品资料 SQLite 数据库
|
||||
|
||||
将商品资料 (条码/名称/进货价/单位) 存储在 SQLite 中,
|
||||
支持从 Excel 自动导入和按条码快速查询。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.file_utils import smart_read_excel
|
||||
from ...core.handlers.column_mapper import ColumnMapper
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ProductDatabase:
|
||||
"""商品资料 SQLite 数据库"""
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS products (
|
||||
barcode TEXT PRIMARY KEY,
|
||||
name TEXT DEFAULT '',
|
||||
price REAL DEFAULT 0.0,
|
||||
unit TEXT DEFAULT '',
|
||||
updated_at TEXT
|
||||
);
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: str, excel_source: str):
|
||||
"""初始化数据库,如果 SQLite 不存在则自动从 Excel 导入
|
||||
|
||||
Args:
|
||||
db_path: SQLite 数据库文件路径
|
||||
excel_source: 商品资料 Excel 文件路径
|
||||
"""
|
||||
self.db_path = db_path
|
||||
self.excel_source = excel_source
|
||||
self._ensure_db()
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
return sqlite3.connect(self.db_path)
|
||||
|
||||
def _ensure_db(self):
|
||||
"""确保数据库存在,不存在则从 Excel 导入"""
|
||||
if os.path.exists(self.db_path):
|
||||
return
|
||||
|
||||
if not os.path.exists(self.excel_source):
|
||||
logger.warning(f"商品资料 Excel 不存在,跳过导入: {self.excel_source}")
|
||||
self._create_empty_db()
|
||||
return
|
||||
|
||||
logger.info(f"首次运行,从 Excel 导入商品资料: {self.excel_source}")
|
||||
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
||||
self._create_empty_db()
|
||||
count = self.import_from_excel(self.excel_source)
|
||||
logger.info(f"商品资料导入完成: {count} 条记录")
|
||||
|
||||
def _create_empty_db(self):
|
||||
"""创建空数据库"""
|
||||
conn = self._connect()
|
||||
try:
|
||||
conn.executescript(self.SCHEMA)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def import_from_excel(self, excel_path: str) -> int:
|
||||
"""从 Excel 导入商品资料
|
||||
|
||||
Args:
|
||||
excel_path: Excel 文件路径
|
||||
|
||||
Returns:
|
||||
导入的记录数
|
||||
"""
|
||||
df = smart_read_excel(excel_path)
|
||||
if df is None or df.empty:
|
||||
logger.warning(f"Excel 文件为空或读取失败: {excel_path}")
|
||||
return 0
|
||||
|
||||
# 查找条码列
|
||||
barcode_col = ColumnMapper.find_column(list(df.columns), 'barcode')
|
||||
if not barcode_col:
|
||||
logger.error(f"Excel 中未找到条码列: {list(df.columns)}")
|
||||
return 0
|
||||
|
||||
# 查找进货价列
|
||||
price_col = ColumnMapper.find_column(list(df.columns), 'unit_price')
|
||||
# 进货价可能没有标准别名,补充查找
|
||||
if not price_col:
|
||||
for col in df.columns:
|
||||
col_str = str(col).strip()
|
||||
if '进货价' in col_str:
|
||||
price_col = col
|
||||
break
|
||||
|
||||
# 查找名称列和单位列 (可选)
|
||||
name_col = ColumnMapper.find_column(list(df.columns), 'name')
|
||||
unit_col = ColumnMapper.find_column(list(df.columns), 'unit')
|
||||
|
||||
now = datetime.now().isoformat()
|
||||
rows = []
|
||||
for _, row in df.iterrows():
|
||||
barcode = str(row.get(barcode_col, '')).strip()
|
||||
if not barcode or barcode == 'nan':
|
||||
continue
|
||||
|
||||
price = 0.0
|
||||
if price_col:
|
||||
try:
|
||||
p = row.get(price_col)
|
||||
if p is not None and str(p).strip() not in ('', 'nan', 'None'):
|
||||
price = float(p)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
name = str(row.get(name_col, '')).strip() if name_col else ''
|
||||
if name == 'nan':
|
||||
name = ''
|
||||
unit = str(row.get(unit_col, '')).strip() if unit_col else ''
|
||||
if unit == 'nan':
|
||||
unit = ''
|
||||
|
||||
rows.append((barcode, name, price, unit, now))
|
||||
|
||||
if not rows:
|
||||
logger.warning(f"Excel 中未解析出有效记录: {excel_path}")
|
||||
return 0
|
||||
|
||||
conn = self._connect()
|
||||
try:
|
||||
conn.executemany(
|
||||
"INSERT OR REPLACE INTO products (barcode, name, price, unit, updated_at) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
rows
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return len(rows)
|
||||
|
||||
def reimport(self) -> int:
|
||||
"""重新从 Excel 导入(清空现有数据后重新导入)
|
||||
|
||||
Returns:
|
||||
导入的记录数
|
||||
"""
|
||||
conn = self._connect()
|
||||
try:
|
||||
conn.execute("DELETE FROM products")
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
return self.import_from_excel(self.excel_source)
|
||||
|
||||
def get_price(self, barcode: str) -> Optional[float]:
|
||||
"""按条码查询进货价
|
||||
|
||||
Args:
|
||||
barcode: 商品条码
|
||||
|
||||
Returns:
|
||||
进货价,未找到返回 None
|
||||
"""
|
||||
conn = self._connect()
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"SELECT price FROM products WHERE barcode = ?",
|
||||
(str(barcode).strip(),)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_prices(self, barcodes: List[str]) -> Dict[str, float]:
|
||||
"""批量查询进货价
|
||||
|
||||
Args:
|
||||
barcodes: 条码列表
|
||||
|
||||
Returns:
|
||||
{条码: 进货价} 字典,未找到的不包含
|
||||
"""
|
||||
if not barcodes:
|
||||
return {}
|
||||
|
||||
conn = self._connect()
|
||||
try:
|
||||
placeholders = ','.join('?' * len(barcodes))
|
||||
cursor = conn.execute(
|
||||
f"SELECT barcode, price FROM products WHERE barcode IN ({placeholders})",
|
||||
[str(b).strip() for b in barcodes]
|
||||
)
|
||||
return {row[0]: row[1] for row in cursor.fetchall()}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def count(self) -> int:
|
||||
"""返回商品总数"""
|
||||
conn = self._connect()
|
||||
try:
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM products")
|
||||
return cursor.fetchone()[0]
|
||||
finally:
|
||||
conn.close()
|
||||
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - Excel处理模块
|
||||
----------------------------
|
||||
提供Excel文件处理、数据提取和转换功能。
|
||||
"""
|
||||
@@ -0,0 +1,535 @@
|
||||
"""
|
||||
单位转换模块
|
||||
----------
|
||||
提供单位转换功能,支持规格推断和单位自动提取。
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Tuple, Optional, Any, List, Union
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from .handlers.barcode_mapper import BarcodeMapper
|
||||
from .handlers.unit_converter_handlers import (
|
||||
JianUnitHandler, BoxUnitHandler, TiHeUnitHandler,
|
||||
GiftUnitHandler, UnitHandler
|
||||
)
|
||||
from .validators import ProductValidator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 条码映射配置文件路径
|
||||
BARCODE_MAPPING_CONFIG = "config/barcode_mappings.json"
|
||||
|
||||
class UnitConverter:
|
||||
"""
|
||||
单位转换器:处理不同单位之间的转换,支持从商品名称推断规格
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化单位转换器
|
||||
"""
|
||||
# 加载特殊条码配置
|
||||
self.special_barcodes = self.load_barcode_mappings()
|
||||
|
||||
# 规格推断的正则表达式模式
|
||||
self.spec_patterns = [
|
||||
# 1*6、1x12、1X20等格式
|
||||
(r'(\d+)[*xX×](\d+)', r'\1*\2'),
|
||||
# 1*5*12和1x5x12等三级格式
|
||||
(r'(\d+)[*xX×](\d+)[*xX×](\d+)', r'\1*\2*\3'),
|
||||
# "xx入"格式,如"12入"、"24入"
|
||||
(r'(\d+)入', r'1*\1'),
|
||||
# "xxL*1"或"xx升*1"格式
|
||||
(r'([\d\.]+)[L升][*xX×]?(\d+)?', r'\1L*\2' if r'\2' else r'\1L*1'),
|
||||
# "xxkg*1"或"xx公斤*1"格式
|
||||
(r'([\d\.]+)(?:kg|公斤)[*xX×]?(\d+)?', r'\1kg*\2' if r'\2' else r'\1kg*1'),
|
||||
# "xxg*1"或"xx克*1"格式
|
||||
(r'([\d\.]+)(?:g|克)[*xX×]?(\d+)?', r'\1g*\2' if r'\2' else r'\1g*1'),
|
||||
# "xxmL*1"或"xx毫升*1"格式
|
||||
(r'([\d\.]+)(?:mL|毫升)[*xX×]?(\d+)?', r'\1mL*\2' if r'\2' else r'\1mL*1'),
|
||||
]
|
||||
|
||||
# 初始化处理程序
|
||||
self._init_handlers()
|
||||
|
||||
# 初始化验证器
|
||||
self.validator = ProductValidator()
|
||||
|
||||
def _init_handlers(self):
|
||||
"""
|
||||
初始化各种处理程序
|
||||
"""
|
||||
# 创建条码处理程序
|
||||
self.barcode_mapper = BarcodeMapper(self.special_barcodes)
|
||||
|
||||
# 创建单位处理程序列表,优先级从高到低
|
||||
self.unit_handlers: List[UnitHandler] = [
|
||||
GiftUnitHandler(), # 首先处理赠品,优先级最高
|
||||
JianUnitHandler(), # 处理"件"单位
|
||||
BoxUnitHandler(), # 处理"箱"单位
|
||||
TiHeUnitHandler() # 处理"提"和"盒"单位
|
||||
]
|
||||
|
||||
def extract_unit_from_quantity(self, quantity_str: str) -> Tuple[Optional[float], Optional[str]]:
|
||||
"""
|
||||
从数量字符串中提取单位
|
||||
|
||||
支持的格式:
|
||||
1. "2箱" -> (2, "箱")
|
||||
2. "3件" -> (3, "件")
|
||||
3. "1.5提" -> (1.5, "提")
|
||||
4. "数量: 5盒" -> (5, "盒")
|
||||
5. "× 2瓶" -> (2, "瓶")
|
||||
|
||||
Args:
|
||||
quantity_str: 数量字符串,如"2箱"、"5件"
|
||||
|
||||
Returns:
|
||||
(数量, 单位)的元组,如果无法提取则返回(None, None)
|
||||
"""
|
||||
if not quantity_str or not isinstance(quantity_str, str):
|
||||
return None, None
|
||||
|
||||
# 清理字符串,移除前后空白和一些常见前缀
|
||||
cleaned_str = quantity_str.strip()
|
||||
for prefix in ['数量:', '数量:', '×', 'x', 'X', '*']:
|
||||
cleaned_str = cleaned_str.replace(prefix, '').strip()
|
||||
|
||||
# 匹配数字+单位格式 (基本格式)
|
||||
basic_match = re.match(r'^([\d\.]+)\s*([^\d\s\.]+)$', cleaned_str)
|
||||
if basic_match:
|
||||
try:
|
||||
num = float(basic_match.group(1))
|
||||
unit = basic_match.group(2)
|
||||
logger.info(f"从数量提取单位(基本格式): {quantity_str} -> 数量={num}, 单位={unit}")
|
||||
return num, unit
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 匹配更复杂的格式,如包含其他文本的情况
|
||||
complex_match = re.search(r'([\d\.]+)\s*([箱|件|瓶|提|盒|袋|桶|包|kg|g|升|毫升|L|ml|个])', cleaned_str)
|
||||
if complex_match:
|
||||
try:
|
||||
num = float(complex_match.group(1))
|
||||
unit = complex_match.group(2)
|
||||
logger.info(f"从数量提取单位(复杂格式): {quantity_str} -> 数量={num}, 单位={unit}")
|
||||
return num, unit
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None, None
|
||||
|
||||
def extract_specification(self, text: str) -> Optional[str]:
|
||||
"""
|
||||
从文本中提取规格信息
|
||||
|
||||
Args:
|
||||
text: 文本字符串
|
||||
|
||||
Returns:
|
||||
提取的规格字符串,如果无法提取则返回None
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return None
|
||||
|
||||
# 处理XX入白膜格式,如"550纯净水24入白膜"
|
||||
match = re.search(r'.*?(\d+)入白膜', text)
|
||||
if match:
|
||||
result = f"1*{match.group(1)}"
|
||||
logger.info(f"提取规格(入白膜): {text} -> {result}")
|
||||
return result
|
||||
|
||||
# 尝试所有模式
|
||||
for pattern, replacement in self.spec_patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
# 特殊处理三级格式,确保正确显示为1*5*12
|
||||
if '*' in replacement and replacement.count('*') == 1 and len(match.groups()) >= 2:
|
||||
result = f"{match.group(1)}*{match.group(2)}"
|
||||
logger.info(f"提取规格: {text} -> {result}")
|
||||
return result
|
||||
# 特殊处理三级规格格式
|
||||
elif '*' in replacement and replacement.count('*') == 2 and len(match.groups()) >= 3:
|
||||
result = f"{match.group(1)}*{match.group(2)}*{match.group(3)}"
|
||||
logger.info(f"提取三级规格: {text} -> {result}")
|
||||
return result
|
||||
# 一般情况
|
||||
else:
|
||||
result = re.sub(pattern, replacement, text)
|
||||
logger.info(f"提取规格: {text} -> {result}")
|
||||
return result
|
||||
|
||||
# 没有匹配任何模式
|
||||
return None
|
||||
|
||||
def infer_specification_from_name(self, name: str) -> Optional[str]:
|
||||
"""
|
||||
从商品名称中推断规格
|
||||
|
||||
规则:
|
||||
1. "xx入纸箱" -> 1*xx (如"15入纸箱" -> 1*15)
|
||||
2. 直接包含规格 "1*15" -> 1*15
|
||||
3. "xx纸箱" -> 1*xx (如"15纸箱" -> 1*15)
|
||||
4. "xx白膜" -> 1*xx (如"12白膜" -> 1*12)
|
||||
5. "xxL" 容量单位特殊处理
|
||||
6. "xx(g|ml|毫升|克)*数字" -> 1*数字 (如"450g*15" -> 1*15)
|
||||
|
||||
Args:
|
||||
name: 商品名称
|
||||
|
||||
Returns:
|
||||
推断的规格,如果无法推断则返回None
|
||||
"""
|
||||
if not name or not isinstance(name, str):
|
||||
return None
|
||||
|
||||
# 记录原始商品名称,用于日志
|
||||
original_name = name
|
||||
|
||||
# 新增模式: 处理重量/容量*数字格式,如"450g*15", "450ml*15"
|
||||
# 忽略重量/容量值,只提取后面的数量作为规格
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(重量/容量*数量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式1.1: "xx入白膜" 格式,如"550纯净水24入白膜" -> "1*24"
|
||||
pattern1_1 = r'.*?(\d+)入白膜'
|
||||
match = re.search(pattern1_1, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(入白膜): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式1: "xx入纸箱" 格式,如"445水溶C血橙15入纸箱" -> "1*15"
|
||||
pattern1 = r'.*?(\d+)入纸箱'
|
||||
match = re.search(pattern1, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(入纸箱): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式2: 直接包含规格,如"500-东方树叶-乌龙茶1*15-纸箱装" -> "1*15"
|
||||
pattern2 = r'.*?(\d+)[*xX×](\d+).*'
|
||||
match = re.search(pattern2, name)
|
||||
if match:
|
||||
inferred_spec = f"{match.group(1)}*{match.group(2)}"
|
||||
logger.info(f"从名称推断规格(直接格式): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式3: "xx纸箱" 格式,如"500茶π蜜桃乌龙15纸箱" -> "1*15"
|
||||
pattern3 = r'.*?(\d+)纸箱'
|
||||
match = re.search(pattern3, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(纸箱): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式4: "xx白膜" 格式,如"1.5L水12白膜" 或 "550水24白膜" -> "1*12" 或 "1*24"
|
||||
pattern4 = r'.*?(\d+)白膜'
|
||||
match = re.search(pattern4, name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
logger.info(f"从名称推断规格(白膜): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式5: 容量单位带数量格式 "1.8L*8瓶" -> "1.8L*8"
|
||||
volume_count_pattern = r'.*?([\d\.]+)[Ll升][*×xX](\d+).*'
|
||||
match = re.search(volume_count_pattern, name)
|
||||
if match:
|
||||
volume = match.group(1)
|
||||
count = match.group(2)
|
||||
inferred_spec = f"{volume}L*{count}"
|
||||
logger.info(f"从名称推断规格(容量*数量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 特殊模式6: 简单容量单位如"12.9L桶装水" -> "12.9L*1"
|
||||
simple_volume_pattern = r'.*?([\d\.]+)[Ll升].*'
|
||||
match = re.search(simple_volume_pattern, name)
|
||||
if match:
|
||||
inferred_spec = f"{match.group(1)}L*1"
|
||||
logger.info(f"从名称推断规格(简单容量): {original_name} -> {inferred_spec}")
|
||||
return inferred_spec
|
||||
|
||||
# 尝试通用模式匹配
|
||||
spec = self.extract_specification(name)
|
||||
if spec:
|
||||
logger.info(f"从名称推断规格(通用模式): {original_name} -> {spec}")
|
||||
return spec
|
||||
|
||||
return None
|
||||
|
||||
def parse_specification(self, spec: str) -> Tuple[int, int, Optional[int]]:
|
||||
"""
|
||||
解析规格字符串,支持1*12和1*5*12等格式
|
||||
|
||||
Args:
|
||||
spec: 规格字符串
|
||||
|
||||
Returns:
|
||||
(一级包装, 二级包装, 三级包装)元组,如果是二级包装,第三个值为None
|
||||
"""
|
||||
if not spec or not isinstance(spec, str):
|
||||
return 1, 1, None
|
||||
|
||||
try:
|
||||
# 清理规格字符串,确保格式统一
|
||||
spec = re.sub(r'\s+', '', spec) # 移除所有空白
|
||||
spec = re.sub(r'[xX×]', '*', spec) # 统一分隔符为*
|
||||
|
||||
logger.debug(f"解析规格: {spec}")
|
||||
|
||||
# 新增:处理“1件=12桶/袋/盒...”等等式规格,统一为1*12
|
||||
eq_match = re.match(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[==]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec)
|
||||
if eq_match:
|
||||
try:
|
||||
level2 = int(eq_match.group(2))
|
||||
logger.info(f"解析等式规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理三级包装,如1*5*12
|
||||
three_level_match = re.match(r'(\d+)[*](\d+)[*](\d+)', spec)
|
||||
if three_level_match:
|
||||
try:
|
||||
level1 = int(three_level_match.group(1))
|
||||
level2 = int(three_level_match.group(2))
|
||||
level3 = int(three_level_match.group(3))
|
||||
logger.info(f"解析三级规格: {spec} -> {level1}*{level2}*{level3}")
|
||||
return level1, level2, level3
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带重量单位的规格,如5kg*6、500g*12等
|
||||
weight_match = re.match(r'([\d\.]+)(?:kg|g|克|千克|公斤)[*](\d+)', spec, re.IGNORECASE)
|
||||
if weight_match:
|
||||
try:
|
||||
# 对于重量单位,使用1作为一级包装,后面的数字作为二级包装
|
||||
level2 = int(weight_match.group(2))
|
||||
logger.info(f"解析重量规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带容量单位的规格,如500ml*15, 1L*12等
|
||||
ml_match = re.match(r'(\d+)(?:ml|毫升)[*](\d+)', spec, re.IGNORECASE)
|
||||
if ml_match:
|
||||
try:
|
||||
# 对于ml单位,使用1作为一级包装,后面的数字作为二级包装
|
||||
level2 = int(ml_match.group(2))
|
||||
logger.info(f"解析容量(ml)规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理带L单位的规格,如1L*12等
|
||||
l_match = re.match(r'(\d+(?:\.\d+)?)[Ll升][*](\d+)', spec)
|
||||
if l_match:
|
||||
try:
|
||||
# 对于L单位,正确提取第二部分作为包装数量
|
||||
level2 = int(l_match.group(2))
|
||||
logger.info(f"解析容量(L)规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理二级包装,如1*12
|
||||
two_level_match = re.match(r'(\d+)[*](\d+)', spec)
|
||||
if two_level_match:
|
||||
try:
|
||||
level1 = int(two_level_match.group(1))
|
||||
level2 = int(two_level_match.group(2))
|
||||
logger.info(f"解析二级规格: {spec} -> {level1}*{level2}")
|
||||
return level1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 特殊处理L/升为单位的规格,如12.5L*1
|
||||
volume_match = re.match(r'([\d\.]+)[L升][*xX×](\d+)', spec)
|
||||
if volume_match:
|
||||
try:
|
||||
volume = float(volume_match.group(1))
|
||||
quantity = int(volume_match.group(2))
|
||||
logger.info(f"解析容量规格: {spec} -> {volume}L*{quantity}")
|
||||
return 1, quantity, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 处理不规范格式,如IL*12, 6oo*12等,从中提取数字部分作为包装数量
|
||||
# 只要规格中包含*和数字,就尝试提取*后面的数字作为件数
|
||||
irregular_match = re.search(r'[^0-9]*\*(\d+)', spec)
|
||||
if irregular_match:
|
||||
try:
|
||||
level2 = int(irregular_match.group(1))
|
||||
logger.info(f"解析不规范规格: {spec} -> 1*{level2}")
|
||||
return 1, level2, None
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 默认值
|
||||
logger.warning(f"无法解析规格: {spec},使用默认值1*1")
|
||||
return 1, 1, None
|
||||
except Exception as e:
|
||||
logger.error(f"解析规格时出错: {e}")
|
||||
return 1, 1, None
|
||||
|
||||
def process_unit_conversion(self, product: Dict) -> Dict:
|
||||
"""
|
||||
处理单位转换,按照以下规则:
|
||||
1. 特殊条码: 优先处理特殊条码
|
||||
2. 赠品处理: 对于赠品,维持数量转换但单价为0
|
||||
3. "件"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
|
||||
4. "箱"单位: 数量×包装数量, 单价÷包装数量, 单位转为"瓶"
|
||||
5. "提"和"盒"单位: 如果是三级规格, 按件处理; 如果是二级规格, 保持不变
|
||||
6. 其他单位: 保持不变
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
# 首先验证商品数据
|
||||
product = self.validator.validate_product(product)
|
||||
|
||||
# 复制原始数据,避免修改原始字典
|
||||
result = product.copy()
|
||||
|
||||
barcode = result.get('barcode', '')
|
||||
specification = result.get('specification', '')
|
||||
|
||||
# 跳过无效数据
|
||||
if not barcode:
|
||||
return result
|
||||
|
||||
# 先处理条码映射
|
||||
result = self.barcode_mapper.map_barcode(result)
|
||||
|
||||
# 如果没有规格信息,无法进行单位转换
|
||||
if not specification:
|
||||
# 尝试从商品名称推断规格
|
||||
inferred_spec = self.infer_specification_from_name(result.get('name', ''))
|
||||
if inferred_spec:
|
||||
result['specification'] = inferred_spec
|
||||
logger.info(f"从商品名称推断规格: {result.get('name', '')} -> {inferred_spec}")
|
||||
else:
|
||||
return result
|
||||
|
||||
# 解析规格信息
|
||||
level1, level2, level3 = self.parse_specification(result.get('specification', ''))
|
||||
|
||||
# 使用单位处理程序处理单位转换
|
||||
for handler in self.unit_handlers:
|
||||
if handler.can_handle(result):
|
||||
return handler.handle(result, level1, level2, level3)
|
||||
|
||||
# 没有找到适用的处理程序,保持不变
|
||||
logger.info(f"其他单位处理: 保持原样 数量: {result.get('quantity', 0)}, 单价: {result.get('price', 0)}, 单位: {result.get('unit', '')}")
|
||||
return result
|
||||
|
||||
def load_barcode_mappings(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
从配置文件加载条码映射
|
||||
|
||||
Returns:
|
||||
条码映射字典
|
||||
"""
|
||||
# 默认映射
|
||||
default_mappings = {
|
||||
'6925019900087': {
|
||||
'multiplier': 10,
|
||||
'target_unit': '瓶',
|
||||
'description': '特殊处理:数量*10,单位转换为瓶'
|
||||
},
|
||||
'6921168593804': {
|
||||
'multiplier': 30,
|
||||
'target_unit': '瓶',
|
||||
'description': 'NFC产品特殊处理:每箱30瓶'
|
||||
},
|
||||
'6901826888138': {
|
||||
'multiplier': 30,
|
||||
'target_unit': '瓶',
|
||||
'fixed_price': 112/30,
|
||||
'specification': '1*30',
|
||||
'description': '特殊处理: 规格1*30,数量*30,单价=112/30'
|
||||
},
|
||||
# 条码映射配置
|
||||
'6920584471055': {
|
||||
'map_to': '6920584471017',
|
||||
'description': '条码映射:6920584471055 -> 6920584471017'
|
||||
},
|
||||
'6925861571159': {
|
||||
'map_to': '69021824',
|
||||
'description': '条码映射:6925861571159 -> 69021824'
|
||||
},
|
||||
'6923644268923': {
|
||||
'map_to': '6923644268480',
|
||||
'description': '条码映射:6923644268923 -> 6923644268480'
|
||||
},
|
||||
# 添加特殊条码6958620703716,既需要特殊处理又需要映射
|
||||
'6958620703716': {
|
||||
'specification': '1*14',
|
||||
'map_to': '6958620703907',
|
||||
'description': '特殊处理: 规格1*14,同时映射到6958620703907'
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
# 检查配置文件是否存在
|
||||
if os.path.exists(BARCODE_MAPPING_CONFIG):
|
||||
with open(BARCODE_MAPPING_CONFIG, 'r', encoding='utf-8') as file:
|
||||
mappings = json.load(file)
|
||||
logger.info(f"成功加载条码映射配置,共{len(mappings)}项")
|
||||
return mappings
|
||||
else:
|
||||
# 创建默认配置文件
|
||||
self.save_barcode_mappings(default_mappings)
|
||||
logger.info(f"创建默认条码映射配置,共{len(default_mappings)}项")
|
||||
return default_mappings
|
||||
except Exception as e:
|
||||
logger.error(f"加载条码映射配置失败: {e}")
|
||||
return default_mappings
|
||||
|
||||
def save_barcode_mappings(self, mappings: Dict[str, Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
保存条码映射到配置文件
|
||||
|
||||
Args:
|
||||
mappings: 条码映射字典
|
||||
|
||||
Returns:
|
||||
保存是否成功
|
||||
"""
|
||||
try:
|
||||
# 确保配置目录存在
|
||||
os.makedirs(os.path.dirname(BARCODE_MAPPING_CONFIG), exist_ok=True)
|
||||
|
||||
# 写入配置文件
|
||||
with open(BARCODE_MAPPING_CONFIG, 'w', encoding='utf-8') as file:
|
||||
json.dump(mappings, file, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"条码映射配置保存成功,共{len(mappings)}项")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"保存条码映射配置失败: {e}")
|
||||
return False
|
||||
|
||||
def update_barcode_mappings(self, new_mappings: Dict[str, Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
更新条码映射配置
|
||||
|
||||
Args:
|
||||
new_mappings: 新的条码映射字典
|
||||
|
||||
Returns:
|
||||
更新是否成功
|
||||
"""
|
||||
self.special_barcodes = new_mappings
|
||||
return self.save_barcode_mappings(new_mappings)
|
||||
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
单位转换处理程序包
|
||||
-----------------
|
||||
提供单位转换和条码处理的各种处理程序
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
# 导出所有处理程序类
|
||||
from .barcode_mapper import BarcodeMapper
|
||||
from .unit_converter_handlers import JianUnitHandler, BoxUnitHandler, TiHeUnitHandler, GiftUnitHandler, UnitHandler
|
||||
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
条码映射处理程序
|
||||
-------------
|
||||
处理特殊条码的映射和转换
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
from ...utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BarcodeMapper:
|
||||
"""
|
||||
条码映射器:负责特殊条码的映射和处理
|
||||
"""
|
||||
|
||||
def __init__(self, special_barcodes: Dict[str, Dict[str, Any]]):
|
||||
"""
|
||||
初始化条码映射器
|
||||
|
||||
Args:
|
||||
special_barcodes: 特殊条码配置字典
|
||||
"""
|
||||
self.special_barcodes = special_barcodes or {}
|
||||
|
||||
def map_barcode(self, product: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
映射商品条码,处理特殊情况
|
||||
|
||||
Args:
|
||||
product: 包含条码的商品信息字典
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
barcode = result.get('barcode', '')
|
||||
|
||||
# 如果条码不在特殊条码列表中,直接返回
|
||||
if not barcode or barcode not in self.special_barcodes:
|
||||
return result
|
||||
|
||||
special_config = self.special_barcodes[barcode]
|
||||
|
||||
# 处理特殊倍数
|
||||
if 'multiplier' in special_config:
|
||||
multiplier = special_config.get('multiplier', 1)
|
||||
target_unit = special_config.get('target_unit', '瓶')
|
||||
|
||||
# 数量乘以倍数
|
||||
quantity = result.get('quantity', 0)
|
||||
new_quantity = quantity * multiplier
|
||||
|
||||
# 单价除以倍数
|
||||
price = result.get('price', 0)
|
||||
new_price = price / multiplier if price else 0
|
||||
|
||||
# 如果有固定单价,优先使用
|
||||
if 'fixed_price' in special_config:
|
||||
new_price = special_config['fixed_price']
|
||||
logger.info(f"特殊条码({barcode})使用固定单价: {new_price}")
|
||||
|
||||
# 如果有固定规格,设置规格
|
||||
if 'specification' in special_config:
|
||||
result['specification'] = special_config['specification']
|
||||
logger.info(f"特殊条码({barcode})使用固定规格: {special_config['specification']}")
|
||||
|
||||
logger.info(f"特殊条码处理: {barcode}, 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {result.get('unit', '')} -> {target_unit}")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = target_unit
|
||||
|
||||
# 处理条码映射 - 放在后面以便可以同时进行特殊处理和条码映射
|
||||
if 'map_to' in special_config:
|
||||
new_barcode = special_config['map_to']
|
||||
logger.info(f"条码映射: {barcode} -> {new_barcode}")
|
||||
result['barcode'] = new_barcode
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
单位转换处理程序
|
||||
-------------
|
||||
处理不同单位的转换逻辑
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Optional, Any, Tuple, Protocol
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from ...utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class UnitHandler(ABC):
|
||||
"""
|
||||
单位处理器基类:定义单位处理接口
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理单位转换
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class JianUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"件"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"件")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
# 匹配"件"、"件、"、"件装"等
|
||||
return unit == '件' or unit.startswith('件')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"件"单位转换:数量×包装数量,单价÷包装数量,单位转为"瓶"
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"件单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 件 -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class BoxUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"箱"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"箱")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
# 匹配"箱"、"箱、"、"箱装"等
|
||||
return unit == '箱' or unit.startswith('箱')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"箱"单位转换:数量×包装数量,单价÷包装数量,单位转为"瓶"
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"箱单位处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: 箱 -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class TiHeUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理"提"和"盒"单位的转换
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(单位为"提"或"盒")
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
unit = str(product.get('unit', '')).strip()
|
||||
return unit in ['提', '盒'] or unit.startswith('提') or unit.startswith('盒')
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理"提"和"盒"单位转换:
|
||||
- 如果是三级规格,按件处理(数量×包装数量,单价÷包装数量,单位转为"瓶")
|
||||
- 如果是二级规格,保持不变
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
quantity = result.get('quantity', 0)
|
||||
price = result.get('price', 0)
|
||||
unit = result.get('unit', '')
|
||||
|
||||
# 如果是三级规格,按件处理
|
||||
if level3 is not None:
|
||||
# 计算包装数量 - 只乘以最后一级数量
|
||||
packaging_count = level3
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
# 单价÷包装数量
|
||||
new_price = price / packaging_count if price else 0
|
||||
|
||||
logger.info(f"提/盒单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: {price} -> {new_price}, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['price'] = new_price
|
||||
result['unit'] = '瓶'
|
||||
else:
|
||||
# 如果是二级规格,保持不变
|
||||
logger.info(f"提/盒单位(二级规格)处理: 保持原样 数量: {quantity}, 单价: {price}, 单位: {unit}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class GiftUnitHandler(UnitHandler):
|
||||
"""
|
||||
处理赠品的特殊情况
|
||||
"""
|
||||
|
||||
def can_handle(self, product: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查是否可以处理该商品(是否为赠品)
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
|
||||
Returns:
|
||||
是否可以处理
|
||||
"""
|
||||
return product.get('is_gift', False) is True
|
||||
|
||||
def handle(self, product: Dict[str, Any], level1: int, level2: int, level3: Optional[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
处理赠品的单位转换:
|
||||
- 对于件/箱单位,数量仍然需要转换,但赠品的单价保持为0
|
||||
|
||||
Args:
|
||||
product: 商品信息字典
|
||||
level1: 一级包装数量
|
||||
level2: 二级包装数量
|
||||
level3: 三级包装数量,可能为None
|
||||
|
||||
Returns:
|
||||
处理后的商品信息字典
|
||||
"""
|
||||
result = product.copy()
|
||||
|
||||
unit = result.get('unit', '')
|
||||
quantity = result.get('quantity', 0)
|
||||
|
||||
# 根据单位类型选择适当的包装数计算
|
||||
if unit in ['件', '箱']:
|
||||
# 计算包装数量(二级*三级,如果无三级则仅二级)
|
||||
packaging_count = level2 * (level3 or 1)
|
||||
|
||||
# 数量×包装数量
|
||||
new_quantity = quantity * packaging_count
|
||||
|
||||
logger.info(f"赠品{unit}单位处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['unit'] = '瓶'
|
||||
elif unit in ['提', '盒'] and level3 is not None:
|
||||
# 对于三级规格的提/盒,类似件处理
|
||||
new_quantity = quantity * level3
|
||||
|
||||
logger.info(f"赠品{unit}单位(三级规格)处理: 数量: {quantity} -> {new_quantity}, 单价: 0, 单位: {unit} -> 瓶")
|
||||
|
||||
result['quantity'] = new_quantity
|
||||
result['unit'] = '瓶'
|
||||
else:
|
||||
# 其他情况保持不变
|
||||
logger.info(f"赠品{unit}单位处理: 保持原样 数量: {quantity}, 单价: 0, 单位: {unit}")
|
||||
|
||||
# 确保单价为0
|
||||
result['price'] = 0
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,423 @@
|
||||
"""
|
||||
订单合并模块
|
||||
----------
|
||||
提供采购单合并功能,将多个采购单合并为一个。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import xlrd
|
||||
import xlwt
|
||||
from xlutils.copy import copy as xlcopy
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from ...config.settings import ConfigManager
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..handlers.column_mapper import ColumnMapper
|
||||
from ..utils.file_utils import (
|
||||
ensure_dir,
|
||||
get_file_extension,
|
||||
get_files_by_extensions,
|
||||
load_json,
|
||||
save_json
|
||||
)
|
||||
from ..utils.string_utils import (
|
||||
clean_string,
|
||||
clean_barcode,
|
||||
format_barcode
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class PurchaseOrderMerger:
|
||||
"""
|
||||
采购单合并器:将多个采购单Excel文件合并成一个文件
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化采购单合并器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 修复ConfigParser对象没有get_path方法的问题
|
||||
try:
|
||||
# 获取输出目录
|
||||
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
# 记录实际路径
|
||||
logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
|
||||
|
||||
# 获取模板文件路径
|
||||
template_folder = config.get('Paths', 'template_folder', fallback='templates')
|
||||
template_name = config.get('Templates', 'purchase_order', fallback='银豹-采购单模板.xls')
|
||||
|
||||
self.template_path = os.path.join(template_folder, template_name)
|
||||
|
||||
# 检查模板文件是否存在
|
||||
if not os.path.exists(self.template_path):
|
||||
logger.warning(f"模板文件不存在: {self.template_path}")
|
||||
|
||||
# 用于记录已合并的文件
|
||||
self.merged_files_json = os.path.join(self.output_dir, "merged_files.json")
|
||||
self.merged_files = self._load_merged_files()
|
||||
|
||||
logger.info(f"初始化PurchaseOrderMerger完成,模板文件: {self.template_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化PurchaseOrderMerger失败: {e}")
|
||||
raise
|
||||
|
||||
def _load_merged_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载已合并文件的缓存
|
||||
|
||||
Returns:
|
||||
合并记录字典
|
||||
"""
|
||||
return load_json(self.merged_files_json, {})
|
||||
|
||||
def _save_merged_files(self) -> None:
|
||||
"""保存已合并文件的缓存"""
|
||||
save_json(self.merged_files, self.merged_files_json)
|
||||
|
||||
def get_purchase_orders(self) -> List[str]:
|
||||
"""
|
||||
获取result目录下的采购单Excel文件
|
||||
|
||||
Returns:
|
||||
采购单文件路径列表
|
||||
"""
|
||||
# 采购单文件保存在data/result目录
|
||||
result_dir = "data/result"
|
||||
logger.info(f"搜索目录 {result_dir} 中的采购单Excel文件")
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
# 获取所有Excel文件
|
||||
all_files = get_files_by_extensions(result_dir, ['.xls', '.xlsx'])
|
||||
|
||||
# 筛选采购单文件
|
||||
purchase_orders = [
|
||||
file for file in all_files
|
||||
if os.path.basename(file).startswith('采购单_')
|
||||
]
|
||||
|
||||
if not purchase_orders:
|
||||
logger.warning(f"未在 {result_dir} 目录下找到采购单Excel文件")
|
||||
return []
|
||||
|
||||
# 按修改时间排序,最新的在前
|
||||
purchase_orders.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
||||
|
||||
logger.info(f"找到 {len(purchase_orders)} 个采购单Excel文件")
|
||||
return purchase_orders
|
||||
|
||||
def read_purchase_order(self, file_path: str) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
读取采购单Excel文件
|
||||
|
||||
Args:
|
||||
file_path: 采购单文件路径
|
||||
|
||||
Returns:
|
||||
数据帧,如果读取失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 读取Excel文件
|
||||
df = pd.read_excel(file_path)
|
||||
logger.info(f"成功读取采购单文件: {file_path}")
|
||||
|
||||
# 打印列名,用于调试
|
||||
logger.debug(f"Excel文件的列名: {df.columns.tolist()}")
|
||||
|
||||
# 处理特殊情况:检查是否需要读取指定行作为标题行
|
||||
header_row_idx = ColumnMapper.detect_header_row(df, max_rows=5, min_matches=3)
|
||||
if header_row_idx >= 0:
|
||||
logger.info(f"检测到表头在第 {header_row_idx+1} 行")
|
||||
|
||||
# 使用此行作为列名,数据从下一行开始
|
||||
header_row = df.iloc[header_row_idx].astype(str)
|
||||
data_rows = df.iloc[header_row_idx+1:].reset_index(drop=True)
|
||||
|
||||
# 为每一列分配名称(避免重复的列名)
|
||||
new_columns = []
|
||||
for i, col in enumerate(header_row):
|
||||
col_str = str(col)
|
||||
if col_str == 'nan' or col_str == 'None' or pd.isna(col):
|
||||
new_columns.append(f"Col_{i}")
|
||||
else:
|
||||
new_columns.append(col_str)
|
||||
|
||||
# 使用新列名创建新的DataFrame
|
||||
data_rows.columns = new_columns
|
||||
df = data_rows
|
||||
logger.debug(f"重新构建的数据帧列名: {df.columns.tolist()}")
|
||||
|
||||
# 使用 ColumnMapper 统一查找列名(保留中文键名以兼容下游代码)
|
||||
all_columns = df.columns.tolist()
|
||||
logger.info(f"列名: {all_columns}")
|
||||
|
||||
standard_to_chinese = {
|
||||
'barcode': '条码',
|
||||
'quantity': '采购量',
|
||||
'unit_price': '采购单价',
|
||||
'gift_quantity': '赠送量',
|
||||
}
|
||||
|
||||
mapped_columns = {}
|
||||
for std_name, chinese_name in standard_to_chinese.items():
|
||||
matched = ColumnMapper.find_column(all_columns, std_name)
|
||||
if matched:
|
||||
mapped_columns[chinese_name] = matched
|
||||
logger.info(f"列名映射: {matched} -> {chinese_name}")
|
||||
|
||||
# 如果找到了必要的列,重命名列
|
||||
if mapped_columns:
|
||||
rename_dict = {mapped_columns[key]: key for key in mapped_columns}
|
||||
logger.info(f"列名重命名映射: {rename_dict}")
|
||||
df = df.rename(columns=rename_dict)
|
||||
logger.info(f"重命名后的列名: {df.columns.tolist()}")
|
||||
else:
|
||||
logger.warning(f"未找到可映射的列名: {file_path}")
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"读取采购单文件失败: {file_path}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def merge_purchase_orders(self, file_paths: List[str]) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
合并多个采购单文件
|
||||
|
||||
Args:
|
||||
file_paths: 采购单文件路径列表
|
||||
|
||||
Returns:
|
||||
合并后的数据帧,如果合并失败则返回None
|
||||
"""
|
||||
if not file_paths:
|
||||
logger.warning("没有需要合并的采购单文件")
|
||||
return None
|
||||
|
||||
# 读取所有采购单文件
|
||||
dfs = []
|
||||
for file_path in file_paths:
|
||||
df = self.read_purchase_order(file_path)
|
||||
if df is not None:
|
||||
dfs.append(df)
|
||||
|
||||
if not dfs:
|
||||
logger.warning("没有成功读取的采购单文件")
|
||||
return None
|
||||
|
||||
# 合并数据
|
||||
logger.info(f"开始合并 {len(dfs)} 个采购单文件")
|
||||
|
||||
# 首先,整理每个数据帧以确保它们有相同的结构
|
||||
processed_dfs = []
|
||||
for i, df in enumerate(dfs):
|
||||
# 确保必要的列存在
|
||||
required_columns = ['条码', '采购量', '采购单价']
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
|
||||
if missing_columns:
|
||||
logger.warning(f"数据帧 {i} 缺少必要的列: {missing_columns}")
|
||||
continue
|
||||
|
||||
# 处理赠送量列不存在的情况
|
||||
if '赠送量' not in df.columns:
|
||||
df['赠送量'] = 0
|
||||
|
||||
# 选择并清理需要的列
|
||||
cleaned_df = pd.DataFrame()
|
||||
|
||||
# 清理条码 - 确保是字符串且无小数点
|
||||
cleaned_df['条码'] = df['条码'].apply(lambda x: format_barcode(x) if pd.notna(x) else '')
|
||||
|
||||
# 清理采购量 - 确保是数字
|
||||
cleaned_df['采购量'] = pd.to_numeric(df['采购量'], errors='coerce').fillna(0)
|
||||
|
||||
# 清理单价 - 确保是数字并保留4位小数
|
||||
cleaned_df['采购单价'] = pd.to_numeric(df['采购单价'], errors='coerce').fillna(0).round(4)
|
||||
|
||||
# 清理赠送量 - 确保是数字
|
||||
cleaned_df['赠送量'] = pd.to_numeric(df['赠送量'], errors='coerce').fillna(0)
|
||||
|
||||
# 过滤无效行 - 条码为空或采购量为0的行跳过
|
||||
valid_df = cleaned_df[(cleaned_df['条码'] != '') & (cleaned_df['采购量'] > 0)]
|
||||
|
||||
if len(valid_df) > 0:
|
||||
processed_dfs.append(valid_df)
|
||||
logger.info(f"处理文件 {i+1}: 有效记录 {len(valid_df)} 行")
|
||||
else:
|
||||
logger.warning(f"处理文件 {i+1}: 没有有效记录")
|
||||
|
||||
if not processed_dfs:
|
||||
logger.warning("没有有效的数据帧用于合并")
|
||||
return None
|
||||
|
||||
# 将所有数据帧合并
|
||||
merged_df = pd.concat(processed_dfs, ignore_index=True)
|
||||
|
||||
# 按条码和单价分组,合并相同商品
|
||||
# 四舍五入到4位小数,避免浮点误差导致相同价格被当作不同价格
|
||||
merged_df['采购单价'] = merged_df['采购单价'].round(4)
|
||||
|
||||
# 对于同一条码和单价的商品,合并数量和赠送量
|
||||
result = merged_df.groupby(['条码', '采购单价'], as_index=False).agg({
|
||||
'采购量': 'sum',
|
||||
'赠送量': 'sum'
|
||||
})
|
||||
|
||||
# 排序,按条码升序
|
||||
result = result.sort_values('条码').reset_index(drop=True)
|
||||
|
||||
# 设置为0的赠送量设为空
|
||||
result.loc[result['赠送量'] == 0, '赠送量'] = pd.NA
|
||||
|
||||
logger.info(f"合并完成,共 {len(result)} 条商品记录")
|
||||
return result
|
||||
|
||||
def create_merged_purchase_order(self, df: pd.DataFrame) -> Optional[str]:
|
||||
"""
|
||||
创建合并的采购单文件,完全按照银豹格式要求
|
||||
|
||||
Args:
|
||||
df: 合并后的数据帧
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果创建失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 打开模板文件
|
||||
template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
|
||||
template_sheet = template_workbook.sheet_by_index(0)
|
||||
|
||||
# 首先分析模板结构,确定关键列的位置
|
||||
logger.info(f"分析模板结构")
|
||||
for i in range(min(5, template_sheet.nrows)):
|
||||
row_values = [str(cell.value).strip() for cell in template_sheet.row(i)]
|
||||
logger.debug(f"模板第{i+1}行: {row_values}")
|
||||
|
||||
# 银豹模板的标准列位置:
|
||||
# 条码列(商品条码): B列(索引1)
|
||||
barcode_col = 1
|
||||
# 采购量列: C列(索引2)
|
||||
quantity_col = 2
|
||||
# 赠送量列: D列(索引3)
|
||||
gift_col = 3
|
||||
# 采购单价列: E列(索引4)
|
||||
price_col = 4
|
||||
|
||||
# 找到数据开始行 - 通常是第二行(索引1)
|
||||
data_start_row = 1
|
||||
|
||||
# 创建可写的副本
|
||||
output_workbook = xlcopy(template_workbook)
|
||||
output_sheet = output_workbook.get_sheet(0)
|
||||
|
||||
# 设置单价的格式样式(保留4位小数)
|
||||
price_style = xlwt.XFStyle()
|
||||
price_style.num_format_str = '0.0000'
|
||||
|
||||
# 数量格式
|
||||
quantity_style = xlwt.XFStyle()
|
||||
quantity_style.num_format_str = '0'
|
||||
|
||||
# 遍历数据并填充到Excel
|
||||
for i, (_, row) in enumerate(df.iterrows()):
|
||||
r = data_start_row + i
|
||||
|
||||
# 只填充银豹采购单格式要求的4个列:条码、采购量、赠送量、采购单价
|
||||
|
||||
# 条码(必填)- B列(1)
|
||||
output_sheet.write(r, barcode_col, row['条码'])
|
||||
|
||||
# 采购量(必填)- C列(2)
|
||||
output_sheet.write(r, quantity_col, float(row['采购量']), quantity_style)
|
||||
|
||||
# 赠送量 - D列(3)
|
||||
if pd.notna(row['赠送量']) and float(row['赠送量']) > 0:
|
||||
output_sheet.write(r, gift_col, float(row['赠送量']), quantity_style)
|
||||
|
||||
# 采购单价(必填)- E列(4)
|
||||
output_sheet.write(r, price_col, float(row['采购单价']), price_style)
|
||||
|
||||
# 生成输出文件名,保存到data/result目录
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
result_dir = "data/result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
output_file = os.path.join(result_dir, f"合并采购单_{timestamp}.xls")
|
||||
|
||||
# 保存文件
|
||||
output_workbook.save(output_file)
|
||||
logger.info(f"合并采购单已保存到: {output_file},共{len(df)}条记录")
|
||||
return output_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"创建合并采购单时出错: {e}")
|
||||
return None
|
||||
|
||||
def process(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理采购单合并
|
||||
|
||||
Args:
|
||||
file_paths: 指定要合并的文件路径列表,如果为None则自动获取
|
||||
|
||||
Returns:
|
||||
合并后的文件路径,如果合并失败则返回None
|
||||
"""
|
||||
# 如果未指定文件路径,则获取所有采购单文件
|
||||
if file_paths is None:
|
||||
file_paths = self.get_purchase_orders()
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(97)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查是否有文件需要合并
|
||||
if not file_paths:
|
||||
logger.warning("没有找到可合并的采购单文件")
|
||||
return None
|
||||
|
||||
# 合并采购单
|
||||
merged_df = self.merge_purchase_orders(file_paths)
|
||||
if merged_df is None:
|
||||
logger.error("合并采购单失败")
|
||||
return None
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(98)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 创建合并的采购单文件
|
||||
output_file = self.create_merged_purchase_order(merged_df)
|
||||
if output_file is None:
|
||||
logger.error("创建合并采购单文件失败")
|
||||
return None
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(100)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 记录已合并文件
|
||||
for file_path in file_paths:
|
||||
self.merged_files[file_path] = output_file
|
||||
self._save_merged_files()
|
||||
|
||||
return output_file
|
||||
@@ -0,0 +1,860 @@
|
||||
"""
|
||||
Excel处理核心模块
|
||||
--------------
|
||||
提供Excel文件处理功能,包括表格解析、数据提取和处理。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import xlrd
|
||||
import xlwt
|
||||
from xlutils.copy import copy as xlcopy
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from ...config.settings import ConfigManager
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.file_utils import (
|
||||
ensure_dir,
|
||||
get_file_extension,
|
||||
get_latest_file,
|
||||
load_json,
|
||||
save_json
|
||||
)
|
||||
from ..utils.string_utils import (
|
||||
clean_string,
|
||||
extract_number,
|
||||
format_barcode,
|
||||
parse_monetary_string
|
||||
)
|
||||
from .converter import UnitConverter
|
||||
from ..handlers.column_mapper import ColumnMapper
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class ExcelProcessor:
|
||||
"""
|
||||
Excel处理器:处理OCR识别后的Excel文件,
|
||||
提取条码、单价和数量,并按照采购单模板的格式填充
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化Excel处理器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 修复ConfigParser对象没有get_path方法的问题
|
||||
try:
|
||||
# 获取输入和输出目录
|
||||
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
self.temp_dir = config.get('Paths', 'temp_folder', fallback='data/temp')
|
||||
|
||||
# 获取模板文件路径
|
||||
self.template_path = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
|
||||
if not os.path.exists(self.template_path):
|
||||
logger.warning(f"模板文件不存在: {self.template_path}")
|
||||
|
||||
# 设置缓存文件路径
|
||||
self.cache_file = os.path.join(self.output_dir, "processed_files.json")
|
||||
self.processed_files = self._load_processed_files()
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
os.makedirs(self.temp_dir, exist_ok=True)
|
||||
|
||||
# 记录实际路径
|
||||
logger.info(f"使用输出目录: {os.path.abspath(self.output_dir)}")
|
||||
logger.info(f"使用临时目录: {os.path.abspath(self.temp_dir)}")
|
||||
|
||||
# 加载单位转换器和配置
|
||||
self.unit_converter = UnitConverter()
|
||||
logger.info(f"初始化ExcelProcessor完成,模板文件: {self.template_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化ExcelProcessor失败: {e}")
|
||||
raise
|
||||
|
||||
def _load_processed_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载已处理文件的缓存
|
||||
|
||||
Returns:
|
||||
处理记录字典
|
||||
"""
|
||||
return load_json(self.cache_file, {})
|
||||
|
||||
def _save_processed_files(self) -> None:
|
||||
"""保存已处理文件的缓存"""
|
||||
save_json(self.processed_files, self.cache_file)
|
||||
|
||||
def get_latest_excel(self) -> Optional[str]:
|
||||
"""
|
||||
获取output目录下最新的Excel文件(排除采购单文件)
|
||||
|
||||
Returns:
|
||||
最新Excel文件的路径,如果未找到则返回None
|
||||
"""
|
||||
logger.info(f"搜索目录 {self.output_dir} 中的Excel文件")
|
||||
|
||||
# 使用文件工具获取最新文件
|
||||
latest_file = get_latest_file(
|
||||
self.output_dir,
|
||||
pattern="", # 不限制文件名
|
||||
extensions=['.xlsx', '.xls'] # 限制为Excel文件
|
||||
)
|
||||
|
||||
# 如果没有找到文件
|
||||
if not latest_file:
|
||||
logger.warning(f"未在 {self.output_dir} 目录下找到未处理的Excel文件")
|
||||
return None
|
||||
|
||||
# 检查是否是采购单(以"采购单_"开头的文件)
|
||||
file_name = os.path.basename(latest_file)
|
||||
if file_name.startswith('采购单_'):
|
||||
logger.warning(f"找到的最新文件是采购单,不作处理: {latest_file}")
|
||||
return None
|
||||
|
||||
logger.info(f"找到最新的Excel文件: {latest_file}")
|
||||
return latest_file
|
||||
|
||||
def extract_barcode(self, df: pd.DataFrame) -> List[str]:
|
||||
"""
|
||||
从数据帧中提取条码列名
|
||||
|
||||
Args:
|
||||
df: 数据帧
|
||||
|
||||
Returns:
|
||||
可能的条码列名列表
|
||||
"""
|
||||
possible_barcode_columns = ColumnMapper.STANDARD_COLUMNS['barcode']
|
||||
|
||||
found_columns = []
|
||||
|
||||
# 检查精确匹配
|
||||
for col in df.columns:
|
||||
col_str = str(col).strip()
|
||||
if col_str in possible_barcode_columns:
|
||||
found_columns.append(col)
|
||||
logger.info(f"找到精确匹配的条码列: {col_str}")
|
||||
|
||||
# 如果找不到精确匹配,尝试部分匹配
|
||||
if not found_columns:
|
||||
for col in df.columns:
|
||||
col_str = str(col).strip().lower()
|
||||
for keyword in ['条码', '条形码', 'barcode', '编码']:
|
||||
if keyword.lower() in col_str:
|
||||
found_columns.append(col)
|
||||
logger.info(f"找到部分匹配的条码列: {col} (包含关键词: {keyword})")
|
||||
break
|
||||
|
||||
# 如果仍然找不到,尝试使用数据特征识别
|
||||
if not found_columns and len(df) > 0:
|
||||
for col in df.columns:
|
||||
# 检查此列数据是否符合条码特征
|
||||
sample_values = df[col].dropna().astype(str).tolist()[:10] # 取前10个非空值
|
||||
|
||||
if sample_values and all(len(val) >= 8 and len(val) <= 14 for val in sample_values):
|
||||
# 大多数条码长度在8-14之间
|
||||
if all(val.isdigit() for val in sample_values):
|
||||
found_columns.append(col)
|
||||
logger.info(f"基于数据特征识别的可能条码列: {col}")
|
||||
|
||||
return found_columns
|
||||
|
||||
def extract_product_info(self, df: pd.DataFrame) -> List[Dict]:
|
||||
"""
|
||||
从数据帧中提取商品信息
|
||||
|
||||
Args:
|
||||
df: 数据帧
|
||||
|
||||
Returns:
|
||||
商品信息列表
|
||||
"""
|
||||
products = []
|
||||
|
||||
# 检测列映射
|
||||
column_mapping = self._detect_column_mapping(df)
|
||||
logger.info(f"检测到列映射: {column_mapping}")
|
||||
|
||||
# 处理每一行
|
||||
for idx, row in df.iterrows():
|
||||
try:
|
||||
# 初始化商品信息
|
||||
product = {
|
||||
'barcode': '', # 条码
|
||||
'name': '', # 商品名称
|
||||
'specification': '', # 规格
|
||||
'quantity': 0, # 数量
|
||||
'unit': '', # 单位
|
||||
'price': 0, # 单价
|
||||
'amount': 0, # 金额
|
||||
'is_gift': False # 是否为赠品
|
||||
}
|
||||
|
||||
# 提取条码
|
||||
if '条码' in df.columns and not pd.isna(row['条码']):
|
||||
product['barcode'] = str(row['条码']).strip()
|
||||
elif column_mapping.get('barcode') and not pd.isna(row[column_mapping['barcode']]):
|
||||
product['barcode'] = str(row[column_mapping['barcode']]).strip()
|
||||
|
||||
# 跳过空条码行
|
||||
if not product['barcode']:
|
||||
continue
|
||||
|
||||
# 检查备注列,过滤换货、退货、作废等非采购行
|
||||
skip_row = False
|
||||
for col in df.columns:
|
||||
col_str = str(col)
|
||||
if any(k in col_str for k in ['备注', '说明', '类型', '备注1']):
|
||||
val = str(row[col]).strip()
|
||||
# 过滤常见的非采购关键字
|
||||
if any(k in val for k in ['换货', '退货', '作废', '减钱', '冲减', '赠品单', '补货']):
|
||||
logger.info(f"过滤非采购行: {product['barcode']} - {product.get('name', '')}, 原因: {col_str}包含 '{val}'")
|
||||
skip_row = True
|
||||
break
|
||||
if skip_row:
|
||||
continue
|
||||
|
||||
# 提取商品名称
|
||||
if '商品名称' in df.columns and not pd.isna(row['商品名称']):
|
||||
product['name'] = str(row['商品名称']).strip()
|
||||
elif '名称' in df.columns and not pd.isna(row['名称']):
|
||||
product['name'] = str(row['名称']).strip()
|
||||
elif column_mapping.get('name') and not pd.isna(row[column_mapping['name']]):
|
||||
product['name'] = str(row[column_mapping['name']]).strip()
|
||||
|
||||
# 提取单位
|
||||
if '单位' in df.columns and not pd.isna(row['单位']):
|
||||
product['unit'] = str(row['单位']).strip()
|
||||
elif column_mapping.get('unit') and not pd.isna(row[column_mapping['unit']]):
|
||||
product['unit'] = str(row[column_mapping['unit']]).strip()
|
||||
|
||||
# 提取单价
|
||||
if '单价' in df.columns and not pd.isna(row['单价']):
|
||||
product['price'] = row['单价']
|
||||
elif column_mapping.get('price') and not pd.isna(row[column_mapping['price']]):
|
||||
product['price'] = row[column_mapping['price']]
|
||||
|
||||
# 提取金额
|
||||
if '金额' in df.columns and not pd.isna(row['金额']):
|
||||
product['amount'] = row['金额']
|
||||
elif '小计' in df.columns and not pd.isna(row['小计']):
|
||||
product['amount'] = row['小计']
|
||||
elif column_mapping.get('amount') and not pd.isna(row[column_mapping['amount']]):
|
||||
product['amount'] = row[column_mapping['amount']]
|
||||
# 根据金额判断赠品:金额为0、为空、或为o/O
|
||||
amt = product.get('amount', None)
|
||||
try:
|
||||
is_amt_gift = False
|
||||
if amt is None:
|
||||
is_amt_gift = True
|
||||
elif isinstance(amt, str):
|
||||
parsed = parse_monetary_string(amt)
|
||||
is_amt_gift = (parsed is None or parsed == 0.0)
|
||||
else:
|
||||
parsed = parse_monetary_string(amt)
|
||||
is_amt_gift = (parsed is not None and parsed == 0.0)
|
||||
if is_amt_gift:
|
||||
product['is_gift'] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 提取数量
|
||||
if '数量' in df.columns and not pd.isna(row['数量']):
|
||||
product['quantity'] = row['数量']
|
||||
elif column_mapping.get('quantity') and not pd.isna(row[column_mapping['quantity']]):
|
||||
product['quantity'] = row[column_mapping['quantity']]
|
||||
|
||||
# 处理可能的复合数量字段,例如"2箱"、"3件"
|
||||
if isinstance(product['quantity'], str) and product['quantity']:
|
||||
num, unit = self.unit_converter.extract_unit_from_quantity(product['quantity'])
|
||||
if unit:
|
||||
product['unit'] = unit
|
||||
if num is not None:
|
||||
product['quantity'] = num
|
||||
|
||||
# 提取规格并解析包装数量
|
||||
if '规格' in df.columns and not pd.isna(row['规格']):
|
||||
product['specification'] = str(row['规格'])
|
||||
# 修正OCR误识别的4.51*4为4.5L*4
|
||||
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"解析规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
elif column_mapping.get('specification') and not pd.isna(row[column_mapping['specification']]):
|
||||
product['specification'] = str(row[column_mapping['specification']])
|
||||
# 修正OCR误识别的4.51*4为4.5L*4
|
||||
product['specification'] = re.sub(r'(\d+\.\d+)1\*(\d+)', r'\1L*\2', product['specification'])
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"从映射列解析规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
else:
|
||||
# 只有在无法从Excel获取规格时,才尝试从商品名称推断规格
|
||||
if product['name']:
|
||||
# 特殊处理:优先检查名称中是否包含"容量*数量"格式
|
||||
container_pattern = r'.*?(\d+(?:\.\d+)?)\s*(?:ml|[mM][lL]|[lL]|升|毫升)[*×xX](\d+).*'
|
||||
match = re.search(container_pattern, product['name'])
|
||||
if match:
|
||||
# 容量单位*数量格式,如"1.8L*8瓶",取数量部分作为包装数量
|
||||
volume = match.group(1)
|
||||
count = match.group(2)
|
||||
inferred_spec = f"{volume}L*{count}"
|
||||
inferred_qty = int(count)
|
||||
product['specification'] = inferred_spec
|
||||
product['package_quantity'] = inferred_qty
|
||||
logger.info(f"从商品名称提取容量*数量格式: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
# 原来的重量/容量*数字格式处理逻辑
|
||||
else:
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, product['name'])
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
inferred_qty = int(match.group(1))
|
||||
product['specification'] = inferred_spec
|
||||
product['package_quantity'] = inferred_qty
|
||||
logger.info(f"从商品名称提取重量/容量规格: {product['name']} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
else:
|
||||
# 一般情况的规格推断
|
||||
inferred_spec = self.unit_converter.infer_specification_from_name(product['name'])
|
||||
if inferred_spec:
|
||||
product['specification'] = inferred_spec
|
||||
package_quantity = self.parse_specification(inferred_spec)
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"从商品名称推断规格: {product['name']} -> {inferred_spec}, 包装数量={package_quantity}")
|
||||
|
||||
# 检查已设置的规格但未设置包装数量的情况
|
||||
if product.get('specification') and not product.get('package_quantity'):
|
||||
package_quantity = self.parse_specification(product['specification'])
|
||||
if package_quantity:
|
||||
product['package_quantity'] = package_quantity
|
||||
logger.info(f"解析已设置的规格: {product['specification']} -> 包装数量={package_quantity}")
|
||||
|
||||
# 新增逻辑:根据规格推断单位为"件"
|
||||
if not product['unit'] and product.get('barcode') and product.get('specification') and product.get('quantity') and product.get('price') is not None:
|
||||
# 检查规格是否符合容量*数量格式
|
||||
volume_pattern = r'(\d+(?:\.\d+)?)\s*(?:ml|[mL]L|l|L|升|毫升)[*×xX](\d+)'
|
||||
match = re.search(volume_pattern, product['specification'])
|
||||
|
||||
# 判断是否需要推断单位为"件"
|
||||
if match:
|
||||
product['unit'] = '件'
|
||||
logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
|
||||
else:
|
||||
# 检查简单的数量*数量格式
|
||||
simple_pattern = r'(\d+)[*×xX](\d+)'
|
||||
match = re.search(simple_pattern, product['specification'])
|
||||
if match:
|
||||
product['unit'] = '件'
|
||||
logger.info(f"根据规格推断单位: {product['specification']} -> 单位=件")
|
||||
|
||||
# 应用单位转换规则
|
||||
product = self.unit_converter.process_unit_conversion(product)
|
||||
|
||||
# 如果数量为0但单价和金额都存在,计算数量 = 金额/单价
|
||||
if (product['quantity'] == 0 or product['quantity'] is None) and product['price'] > 0 and product['amount']:
|
||||
try:
|
||||
amount = parse_monetary_string(product['amount'])
|
||||
if amount is not None and amount > 0:
|
||||
quantity = amount / product['price']
|
||||
logger.info(f"数量为空或为0,通过金额({amount})和单价({product['price']})计算得出数量: {quantity}")
|
||||
product['quantity'] = quantity
|
||||
except Exception as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
products.append(product)
|
||||
except Exception as e:
|
||||
logger.error(f"提取第{idx+1}行商品信息时出错: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
logger.info(f"提取到 {len(products)} 个商品信息")
|
||||
return products
|
||||
|
||||
def fill_template(self, products: List[Dict], output_file_path: str) -> bool:
|
||||
"""
|
||||
填充采购单模板
|
||||
|
||||
Args:
|
||||
products: 商品信息列表
|
||||
output_file_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
是否成功填充
|
||||
"""
|
||||
try:
|
||||
# 打开模板文件
|
||||
template_workbook = xlrd.open_workbook(self.template_path, formatting_info=True)
|
||||
template_sheet = template_workbook.sheet_by_index(0)
|
||||
|
||||
# 创建可写的副本
|
||||
output_workbook = xlcopy(template_workbook)
|
||||
output_sheet = output_workbook.get_sheet(0)
|
||||
|
||||
# 先对产品按条码分组,区分正常商品和赠品
|
||||
barcode_groups = {}
|
||||
|
||||
# 遍历所有产品,按条码分组
|
||||
logger.info(f"开始处理{len(products)} 个产品信息")
|
||||
for product in products:
|
||||
barcode = product.get('barcode', '')
|
||||
# 确保条码是整数字符串
|
||||
barcode = format_barcode(barcode)
|
||||
|
||||
if not barcode:
|
||||
logger.warning(f"跳过无条码商品")
|
||||
continue
|
||||
|
||||
# 获取数量和单价
|
||||
quantity = product.get('quantity', 0)
|
||||
price = product.get('price', 0)
|
||||
amount = product.get('amount', 0)
|
||||
|
||||
# 如果数量为0但单价和金额都存在,计算数量 = 金额/单价
|
||||
if (quantity == 0 or quantity is None) and price > 0 and amount:
|
||||
try:
|
||||
amount = parse_monetary_string(amount)
|
||||
if amount is not None and amount > 0:
|
||||
quantity = amount / price
|
||||
logger.info(f"数量为空或为0,通过金额({amount})和单价({price})计算得出数量: {quantity}")
|
||||
product['quantity'] = quantity
|
||||
except Exception as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
# 判断是否为赠品(价格为0)
|
||||
is_gift = bool(product.get('is_gift', False)) or (price == 0)
|
||||
|
||||
logger.info(f"处理商品: 条码={barcode}, 数量={quantity}, 单价={price}, 是否赠品={is_gift}")
|
||||
|
||||
if barcode not in barcode_groups:
|
||||
barcode_groups[barcode] = {
|
||||
'normal': None, # 正常商品信息
|
||||
'gift_quantity': 0 # 赠品数量
|
||||
}
|
||||
|
||||
if is_gift:
|
||||
# 是赠品,累加赠品数量
|
||||
barcode_groups[barcode]['gift_quantity'] += quantity
|
||||
logger.info(f"发现赠品:条码{barcode}, 数量={quantity}")
|
||||
else:
|
||||
# 是正常商品
|
||||
if barcode_groups[barcode]['normal'] is None:
|
||||
barcode_groups[barcode]['normal'] = {
|
||||
'product': product,
|
||||
'quantity': quantity,
|
||||
'price': price
|
||||
}
|
||||
logger.info(f"发现正常商品:条码{barcode}, 数量={quantity}, 单价={price}")
|
||||
else:
|
||||
# 如果有多个正常商品记录,累加数量
|
||||
barcode_groups[barcode]['normal']['quantity'] += quantity
|
||||
logger.info(f"累加正常商品数量:条码{barcode}, 新增={quantity}, 累计={barcode_groups[barcode]['normal']['quantity']}")
|
||||
|
||||
# 如果单价不同,取平均值
|
||||
if price != barcode_groups[barcode]['normal']['price']:
|
||||
avg_price = (barcode_groups[barcode]['normal']['price'] + price) / 2
|
||||
barcode_groups[barcode]['normal']['price'] = avg_price
|
||||
logger.info(f"调整单价(取平均值):条码{barcode}, 原价={barcode_groups[barcode]['normal']['price']}, 新价={price}, 平均={avg_price}")
|
||||
|
||||
# 输出调试信息
|
||||
logger.info(f"分组后共{len(barcode_groups)} 个不同条码的商品")
|
||||
for barcode, group in barcode_groups.items():
|
||||
if group['normal'] is not None:
|
||||
logger.info(f"条码 {barcode} 处理结果:正常商品数量{group['normal']['quantity']},单价{group['normal']['price']},赠品数量{group['gift_quantity']}")
|
||||
else:
|
||||
logger.info(f"条码 {barcode} 处理结果:只有赠品,数量={group['gift_quantity']}")
|
||||
|
||||
# 准备填充数据
|
||||
row_index = 1 # 从第2行开始填充(索引从0开始)
|
||||
|
||||
for barcode, group in barcode_groups.items():
|
||||
# 1. 列B(1): 条码(必填)
|
||||
output_sheet.write(row_index, 1, barcode)
|
||||
|
||||
if group['normal'] is not None:
|
||||
# 有正常商品
|
||||
product = group['normal']['product']
|
||||
|
||||
# 2. 列C(2): 采购量(必填) 使用正常商品的采购量
|
||||
normal_quantity = group['normal']['quantity']
|
||||
output_sheet.write(row_index, 2, normal_quantity)
|
||||
|
||||
# 3. 列D(3): 赠送量 - 添加赠品数量
|
||||
if group['gift_quantity'] > 0:
|
||||
output_sheet.write(row_index, 3, group['gift_quantity'])
|
||||
logger.info(f"条码 {barcode} 填充:采购量={normal_quantity},赠品数量{group['gift_quantity']}")
|
||||
|
||||
# 4. 列E(4): 采购单价(必填)
|
||||
purchase_price = group['normal']['price']
|
||||
style = xlwt.XFStyle()
|
||||
style.num_format_str = '0.0000'
|
||||
output_sheet.write(row_index, 4, round(purchase_price, 4), style)
|
||||
else:
|
||||
# 只有赠品,没有正常商品
|
||||
# 采购量填0,赠送量填赠品数量
|
||||
output_sheet.write(row_index, 2, 0) # 采购量为0
|
||||
output_sheet.write(row_index, 3, group['gift_quantity']) # 赠送量
|
||||
output_sheet.write(row_index, 4, 0) # 单价为0
|
||||
|
||||
logger.info(f"条码 {barcode} 填充:仅有赠品,采购量=0,赠品数量={group['gift_quantity']}")
|
||||
|
||||
# 移到下一行
|
||||
row_index += 1
|
||||
|
||||
# 保存文件
|
||||
output_workbook.save(output_file_path)
|
||||
logger.info(f"采购单已保存到: {output_file_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"填充模板时出错: {e}")
|
||||
return False
|
||||
|
||||
def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
|
||||
"""自动识别表头行,委托给 ColumnMapper.detect_header_row"""
|
||||
result = ColumnMapper.detect_header_row(df, max_rows=30)
|
||||
if result >= 0:
|
||||
logger.info(f"找到表头行: 第{result+1}行")
|
||||
return result
|
||||
# 回退:找第一个非空行
|
||||
for row in range(len(df)):
|
||||
if df.iloc[row].notna().sum() > 3:
|
||||
logger.info(f"未找到明确表头,使用第一个有效行: 第{row+1}行")
|
||||
return row
|
||||
logger.warning("无法识别表头行")
|
||||
return None
|
||||
|
||||
def process_specific_file(self, file_path: str, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理指定的Excel文件
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果处理失败则返回None
|
||||
"""
|
||||
logger.info(f"开始处理Excel文件: {file_path}")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# 读取Excel文件时不立即指定表头
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(92)
|
||||
except Exception:
|
||||
pass
|
||||
df = pd.read_excel(file_path, header=None)
|
||||
logger.info(f"成功读取Excel文件: {file_path}, 共 {len(df)} 行")
|
||||
|
||||
# 自动识别表头行
|
||||
header_row = self._find_header_row(df)
|
||||
if header_row is None:
|
||||
logger.error("无法识别表头行")
|
||||
return None
|
||||
|
||||
logger.info(f"识别到表头在第 {header_row+1} 行")
|
||||
|
||||
# 重新设置表头,避免二次读取
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(94)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 使用识别到的表头行设置列名,并过滤掉表头之前的行
|
||||
df.columns = df.iloc[header_row]
|
||||
df = df.iloc[header_row + 1:].reset_index(drop=True)
|
||||
|
||||
logger.info(f"重新整理数据结构,共 {len(df)} 行有效数据")
|
||||
|
||||
# 提取商品信息
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(96)
|
||||
except Exception:
|
||||
pass
|
||||
products = self.extract_product_info(df)
|
||||
|
||||
if not products:
|
||||
logger.warning("未提取到有效商品信息")
|
||||
return None
|
||||
|
||||
# 生成输出文件名,保存到data/result目录
|
||||
file_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
result_dir = "data/result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
output_file = os.path.join(result_dir, f"采购单_{file_name}.xls")
|
||||
|
||||
# 填充模板并保存
|
||||
if self.fill_template(products, output_file):
|
||||
# 记录已处理文件
|
||||
self.processed_files[file_path] = output_file
|
||||
self._save_processed_files()
|
||||
|
||||
# 不再自动打开输出目录
|
||||
logger.info(f"采购单已保存到: {output_file}")
|
||||
if progress_cb:
|
||||
try:
|
||||
progress_cb(100)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_file
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理Excel文件时出错: {file_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def process_latest_file(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理最新的Excel文件
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果处理失败则返回None
|
||||
"""
|
||||
# 获取最新的Excel文件
|
||||
latest_file = self.get_latest_excel()
|
||||
if not latest_file:
|
||||
logger.warning("未找到可处理的Excel文件")
|
||||
return None
|
||||
|
||||
# 处理文件
|
||||
return self.process_specific_file(latest_file, progress_cb=progress_cb)
|
||||
|
||||
def _detect_column_mapping(self, df: pd.DataFrame) -> Dict[str, str]:
|
||||
"""
|
||||
自动检测列名映射
|
||||
|
||||
Args:
|
||||
df: 数据框
|
||||
|
||||
Returns:
|
||||
列名映射字典,键为标准列名,值为实际列名
|
||||
"""
|
||||
# 提取有用的列
|
||||
barcode_cols = self.extract_barcode(df)
|
||||
|
||||
# 如果没有找到条码列,无法继续处理
|
||||
if not barcode_cols:
|
||||
logger.error("未找到条码列,无法处理")
|
||||
return {}
|
||||
|
||||
# 使用 ColumnMapper 统一查找列名
|
||||
mapped_columns = {'barcode': barcode_cols[0]}
|
||||
logger.info(f"使用条码列: {mapped_columns['barcode']}")
|
||||
|
||||
# 内部键名 -> 标准列名映射 (processor.py 使用 price/amount 作为内部键名)
|
||||
field_map = [
|
||||
('name', 'name'),
|
||||
('specification', 'specification'),
|
||||
('quantity', 'quantity'),
|
||||
('unit', 'unit'),
|
||||
('price', 'unit_price'),
|
||||
('amount', 'total_price'),
|
||||
]
|
||||
|
||||
for internal_key, standard_name in field_map:
|
||||
matched = ColumnMapper.find_column(list(df.columns), standard_name)
|
||||
if matched:
|
||||
mapped_columns[internal_key] = matched
|
||||
logger.info(f"找到{internal_key}列: {matched}")
|
||||
|
||||
return mapped_columns
|
||||
|
||||
def infer_specification_from_name(self, product_name: str) -> Tuple[Optional[str], Optional[int]]:
|
||||
"""
|
||||
从商品名称推断规格
|
||||
根据特定的命名规则匹配规格信息
|
||||
|
||||
Args:
|
||||
product_name: 商品名称
|
||||
|
||||
Returns:
|
||||
规格字符串和包装数量的元组
|
||||
"""
|
||||
if not product_name or not isinstance(product_name, str):
|
||||
logger.warning(f"无效的商品名: {product_name}")
|
||||
return None, None
|
||||
|
||||
product_name = product_name.strip()
|
||||
|
||||
# 特殊处理:重量/容量*数字格式
|
||||
weight_volume_pattern = r'.*?\d+(?:g|ml|毫升|克)[*xX×](\d+)'
|
||||
match = re.search(weight_volume_pattern, product_name)
|
||||
if match:
|
||||
inferred_spec = f"1*{match.group(1)}"
|
||||
inferred_qty = int(match.group(1))
|
||||
logger.info(f"从商品名称提取重量/容量规格: {product_name} -> {inferred_spec}, 包装数量={inferred_qty}")
|
||||
return inferred_spec, inferred_qty
|
||||
|
||||
# 使用单位转换器推断规格
|
||||
inferred_spec = self.unit_converter.infer_specification_from_name(product_name)
|
||||
if inferred_spec:
|
||||
# 解析规格中的包装数量
|
||||
package_quantity = self.parse_specification(inferred_spec)
|
||||
if package_quantity:
|
||||
logger.info(f"从商品名称推断规格: {product_name} -> {inferred_spec}, 包装数量={package_quantity}")
|
||||
return inferred_spec, package_quantity
|
||||
|
||||
# 特定商品规则匹配
|
||||
spec_rules = [
|
||||
# XX入白膜格式,如"550纯净水24入白膜"
|
||||
(r'.*?(\d+)入白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 白膜格式,如"550水24白膜"
|
||||
(r'.*?(\d+)白膜', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 445水溶C系列
|
||||
(r'445水溶C.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 东方树叶系列
|
||||
(r'东方树叶.*?(\d+\*\d+).*纸箱', lambda m: (m.group(1), int(m.group(1).split('*')[1]))),
|
||||
|
||||
# 桶装
|
||||
(r'(\d+\.?\d*L)桶装', lambda m: (f"{m.group(1)}*1", 1)),
|
||||
|
||||
# 树叶茶系
|
||||
(r'树叶.*?(\d+)[入个]纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 茶π系列
|
||||
(r'茶[πΠπ].*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 通用入数匹配
|
||||
(r'.*?(\d+)[入个](?:纸箱|箱装|白膜)', lambda m: (f"1*{m.group(1)}", int(m.group(1)))),
|
||||
|
||||
# 通用数字+纸箱格式
|
||||
(r'.*?(\d+)纸箱', lambda m: (f"1*{m.group(1)}", int(m.group(1))))
|
||||
]
|
||||
|
||||
# 尝试所有规则
|
||||
for pattern, formatter in spec_rules:
|
||||
match = re.search(pattern, product_name)
|
||||
if match:
|
||||
spec, qty = formatter(match)
|
||||
logger.info(f"根据特定规则推断规格: {product_name} -> {spec}, 包装数量={qty}")
|
||||
return spec, qty
|
||||
|
||||
# 尝试直接从名称中提取数字*数字格式
|
||||
match = re.search(r'(\d+\*\d+)', product_name)
|
||||
if match:
|
||||
spec = match.group(1)
|
||||
package_quantity = self.parse_specification(spec)
|
||||
if package_quantity:
|
||||
logger.info(f"从名称中直接提取规格: {spec}, 包装数量={package_quantity}")
|
||||
return spec, package_quantity
|
||||
|
||||
# 最后尝试提取任何位置的数字,默认典型件装数
|
||||
numbers = re.findall(r'\d+', product_name)
|
||||
if numbers:
|
||||
for num in numbers:
|
||||
# 检查是否为典型的件装数(12/15/24/30)
|
||||
if num in ['12', '15', '24', '30']:
|
||||
spec = f"1*{num}"
|
||||
logger.info(f"从名称中提取可能的件装数: {spec}, 包装数量={int(num)}")
|
||||
return spec, int(num)
|
||||
|
||||
logger.warning(f"无法从商品名'{product_name}' 推断规格")
|
||||
return None, None
|
||||
|
||||
def parse_specification(self, spec_str: str) -> Optional[int]:
|
||||
"""
|
||||
解析规格字符串,提取包装数量
|
||||
支持格式:1*15, 1x15, 1*5*10, 5kg*6, IL*12等
|
||||
|
||||
Args:
|
||||
spec_str: 规格字符串
|
||||
|
||||
Returns:
|
||||
包装数量,如果无法解析则返回None
|
||||
"""
|
||||
if not spec_str or not isinstance(spec_str, str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# 清理规格字符串
|
||||
spec_str = clean_string(spec_str)
|
||||
|
||||
# 处理可能的OCR误识别,如"IL"应为"1L","6oo"应为"600"
|
||||
spec_str = re.sub(r'(\b|^)[iIlL](\d+)', r'1\2', spec_str) # 将"IL"替换为"1L"
|
||||
spec_str = re.sub(r'(\d+)[oO0]{2,}', lambda m: m.group(1) + '00', spec_str) # 将"6oo"替换为"600"
|
||||
spec_str = spec_str.replace('×', '*').replace('x', '*').replace('X', '*') # 统一乘号
|
||||
|
||||
logger.debug(f"清理后的规格字符串: {spec_str}")
|
||||
|
||||
# 新增:匹配“1件=12桶/袋/盒…”等等式规格,取右侧数量作为包装数量
|
||||
eq_match = re.search(r'(\d+(?:\.\d+)?)\s*(?:件|箱|提|盒)\s*[==]\s*(\d+)\s*(?:瓶|桶|盒|支|个|袋|罐|包|卷)', spec_str)
|
||||
if eq_match:
|
||||
return int(eq_match.group(2))
|
||||
|
||||
# 匹配带单位的格式,如"5kg*6"、"450g*15"、"450ml*15"
|
||||
weight_pattern = r'(\d+(?:\.\d+)?)\s*(?:kg|KG|千克|公斤)[*×](\d+)'
|
||||
match = re.search(weight_pattern, spec_str)
|
||||
if match:
|
||||
return int(match.group(2))
|
||||
|
||||
# 匹配克、毫升等单位格式
|
||||
match = re.search(r'\d+(?:\.\d+)?(?:g|G|ml|ML|mL|毫升|克)[*×](\d+)', spec_str)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
# 匹配1*5*10 格式的三级规格
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
|
||||
if match:
|
||||
# 取最后一个数字作为袋数量
|
||||
return int(float(match.group(3)))
|
||||
|
||||
# 匹配1*15, 1x15 格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[*×](\d+(?:\.\d+)?)', spec_str)
|
||||
if match:
|
||||
# 取第二个数字作为包装数量
|
||||
return int(float(match.group(2)))
|
||||
|
||||
# 匹配24瓶/件等格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋][//](件|箱)', spec_str)
|
||||
if match:
|
||||
return int(float(match.group(1)))
|
||||
|
||||
# 匹配4L格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+(?:\.\d+)?)?', spec_str)
|
||||
if match:
|
||||
# 如果有第二个数字,返回它;否则返回1
|
||||
return int(float(match.group(2))) if match.group(2) else 1
|
||||
|
||||
# 匹配单独的数字+单位格式,如"12瓶装"
|
||||
match = re.search(r'(\d+(?:\.\d+)?)[瓶个支袋包盒罐箱](?:装|\/箱)?', spec_str)
|
||||
if match:
|
||||
return int(float(match.group(1)))
|
||||
|
||||
# 尝试直接匹配任何数字
|
||||
numbers = re.findall(r'\d+(?:\.\d+)?', spec_str)
|
||||
if numbers and len(numbers) > 0:
|
||||
# 如果只有一个数字,通常是包装数量
|
||||
if len(numbers) == 1:
|
||||
return int(float(numbers[0]))
|
||||
|
||||
# 如果有多个数字,尝试识别可能的包装数量(典型数值如6/12/24/30)
|
||||
for num in numbers:
|
||||
if float(num) in [6.0, 12.0, 24.0, 30.0]:
|
||||
return int(float(num))
|
||||
|
||||
# 如果没有典型数值,选择最后一个数字(通常是包装数量)
|
||||
return int(float(numbers[-1]))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"解析规格'{spec_str}'时出错: {e}")
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,259 @@
|
||||
"""
|
||||
数据验证器模块
|
||||
----------
|
||||
提供对商品数据的验证和修复功能
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List, Tuple, Union
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.string_utils import parse_monetary_string
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ProductValidator:
|
||||
"""
|
||||
商品数据验证器:验证和修复商品数据
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化商品数据验证器
|
||||
"""
|
||||
# 仓库标识列表
|
||||
self.warehouse_identifiers = ["仓库", "仓库全名", "warehouse"]
|
||||
|
||||
def validate_barcode(self, barcode: Any) -> Tuple[bool, str, Optional[str]]:
|
||||
"""
|
||||
验证并修复条码
|
||||
|
||||
Args:
|
||||
barcode: 原始条码值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的条码, 错误信息)元组
|
||||
"""
|
||||
error_message = None
|
||||
|
||||
# 处理空值
|
||||
if barcode is None:
|
||||
return False, "", "条码为空"
|
||||
|
||||
# 转为字符串
|
||||
barcode_str = str(barcode).strip()
|
||||
|
||||
# 处理"仓库"特殊情况
|
||||
if barcode_str in self.warehouse_identifiers:
|
||||
return False, barcode_str, "条码为仓库标识"
|
||||
|
||||
# 清理条码格式(移除非数字字符)
|
||||
barcode_clean = re.sub(r'\D', '', barcode_str)
|
||||
|
||||
# 如果清理后为空,无效
|
||||
if not barcode_clean:
|
||||
return False, barcode_str, "条码不包含数字"
|
||||
|
||||
# 对特定的错误条码进行修正(5开头改为6开头)
|
||||
if len(barcode_clean) > 8 and barcode_clean.startswith('5') and not barcode_clean.startswith('53'):
|
||||
original_barcode = barcode_clean
|
||||
barcode_clean = '6' + barcode_clean[1:]
|
||||
logger.info(f"修正条码前缀 5->6: {original_barcode} -> {barcode_clean}")
|
||||
|
||||
# 新增:处理14位条码,如果多余长度都是0,截断为13位
|
||||
if len(barcode_clean) > 13:
|
||||
original_length = len(barcode_clean)
|
||||
# 检查多余部分是否都是0
|
||||
if barcode_clean.endswith('0'):
|
||||
# 从末尾开始移除0,直到条码长度为13位或不再以0结尾
|
||||
while len(barcode_clean) > 13 and barcode_clean.endswith('0'):
|
||||
barcode_clean = barcode_clean[:-1]
|
||||
logger.info(f"修正条码长度: 从{original_length}位截断到{len(barcode_clean)}位")
|
||||
else:
|
||||
error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 验证条码长度
|
||||
if len(barcode_clean) < 8 or len(barcode_clean) > 13:
|
||||
error_message = f"条码长度异常: {barcode_clean}, 长度={len(barcode_clean)}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 验证条码是否全为数字
|
||||
if not barcode_clean.isdigit():
|
||||
error_message = f"条码包含非数字字符: {barcode_clean}"
|
||||
logger.warning(error_message)
|
||||
return False, barcode_clean, error_message
|
||||
|
||||
# 对于序号9的特殊情况,允许其条码格式
|
||||
if barcode_clean == "5321545613":
|
||||
logger.info(f"特殊条码验证通过: {barcode_clean}")
|
||||
return True, barcode_clean, None
|
||||
|
||||
logger.debug(f"条码验证通过: {barcode_clean}")
|
||||
return True, barcode_clean, None
|
||||
|
||||
def validate_quantity(self, quantity: Any) -> Tuple[bool, float, Optional[str]]:
|
||||
"""
|
||||
验证并修复数量
|
||||
|
||||
Args:
|
||||
quantity: 原始数量值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的数量, 错误信息)元组
|
||||
"""
|
||||
# 处理空值
|
||||
if quantity is None:
|
||||
return False, 0.0, "数量为空"
|
||||
|
||||
# 如果是字符串,尝试解析
|
||||
if isinstance(quantity, str):
|
||||
# 去除空白和非数字字符(保留小数点)
|
||||
quantity_clean = re.sub(r'[^\d\.]', '', quantity.strip())
|
||||
if not quantity_clean:
|
||||
return False, 0.0, "数量不包含数字"
|
||||
|
||||
try:
|
||||
quantity_value = float(quantity_clean)
|
||||
except ValueError:
|
||||
return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
|
||||
else:
|
||||
# 尝试直接转换
|
||||
try:
|
||||
quantity_value = float(quantity)
|
||||
except (ValueError, TypeError):
|
||||
return False, 0.0, f"无法将数量 '{quantity}' 转换为数字"
|
||||
|
||||
# 数量必须大于0
|
||||
if quantity_value <= 0:
|
||||
return False, 0.0, f"数量必须大于0,当前值: {quantity_value}"
|
||||
|
||||
return True, quantity_value, None
|
||||
|
||||
def validate_price(self, price: Any) -> Tuple[bool, float, bool, Optional[str]]:
|
||||
"""
|
||||
验证并修复单价
|
||||
|
||||
Args:
|
||||
price: 原始单价值
|
||||
|
||||
Returns:
|
||||
(是否有效, 修复后的单价, 是否为赠品, 错误信息)元组
|
||||
"""
|
||||
# 初始化不是赠品
|
||||
is_gift = False
|
||||
|
||||
# 处理空值
|
||||
if price is None:
|
||||
return False, 0.0, True, "单价为空,视为赠品"
|
||||
|
||||
# 如果是字符串,检查赠品标识
|
||||
if isinstance(price, str):
|
||||
price_str = price.strip().lower()
|
||||
if price_str in ["赠品", "gift", "赠送", "0", ""]:
|
||||
return True, 0.0, True, None
|
||||
|
||||
price_value = parse_monetary_string(price_str)
|
||||
if price_value is None:
|
||||
return False, 0.0, True, f"无法将单价 '{price}' 转换为数字,视为赠品"
|
||||
else:
|
||||
# 尝试直接转换
|
||||
try:
|
||||
price_value = float(price)
|
||||
except (ValueError, TypeError):
|
||||
return False, 0.0, True, f"无法将单价 '{price}' 转换为数字,视为赠品"
|
||||
|
||||
# 单价为0视为赠品
|
||||
if price_value == 0:
|
||||
return True, 0.0, True, None
|
||||
|
||||
# 单价必须大于0
|
||||
if price_value < 0:
|
||||
return False, 0.0, True, f"单价不能为负数: {price_value},视为赠品"
|
||||
|
||||
return True, price_value, False, None
|
||||
|
||||
def validate_product(self, product: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
验证并修复商品数据
|
||||
|
||||
Args:
|
||||
product: 商品数据字典
|
||||
|
||||
Returns:
|
||||
修复后的商品数据字典
|
||||
"""
|
||||
# 创建新字典,避免修改原始数据
|
||||
validated_product = product.copy()
|
||||
|
||||
# 验证条码
|
||||
barcode = product.get('barcode', '')
|
||||
is_valid, fixed_barcode, error_msg = self.validate_barcode(barcode)
|
||||
if is_valid:
|
||||
validated_product['barcode'] = fixed_barcode
|
||||
else:
|
||||
logger.warning(f"条码验证失败: {error_msg}")
|
||||
if fixed_barcode:
|
||||
# 即使验证失败,但如果有修复后的条码仍然使用它
|
||||
validated_product['barcode'] = fixed_barcode
|
||||
|
||||
# 验证单价
|
||||
price = product.get('price', 0)
|
||||
is_valid, fixed_price, is_gift, error_msg = self.validate_price(price)
|
||||
validated_product['price'] = fixed_price
|
||||
|
||||
# 如果单价验证结果表示为赠品,更新赠品标识
|
||||
if is_gift:
|
||||
validated_product['is_gift'] = True
|
||||
if error_msg:
|
||||
logger.info(error_msg)
|
||||
|
||||
amount = product.get('amount', None)
|
||||
try:
|
||||
is_amount_gift = False
|
||||
parsed_amount = parse_monetary_string(amount)
|
||||
if parsed_amount is None or parsed_amount == 0.0:
|
||||
is_amount_gift = True
|
||||
if is_amount_gift:
|
||||
validated_product['is_gift'] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 验证数量
|
||||
quantity = product.get('quantity', None)
|
||||
is_valid, fixed_quantity, error_msg = self.validate_quantity(quantity)
|
||||
|
||||
# 检查数量是否为空,但单价和金额存在的情况
|
||||
if not is_valid and error_msg == "数量为空":
|
||||
# 获取金额
|
||||
amount = product.get('amount', None)
|
||||
|
||||
# 如果单价有效且金额存在,则可以计算数量
|
||||
if fixed_price > 0 and amount is not None:
|
||||
try:
|
||||
# 确保金额是数字
|
||||
amount = parse_monetary_string(amount)
|
||||
if amount is None:
|
||||
raise ValueError("无法解析金额")
|
||||
|
||||
# 计算数量 = 金额 / 单价
|
||||
if amount > 0:
|
||||
calculated_quantity = amount / fixed_price
|
||||
logger.info(f"数量为空,通过金额({amount})和单价({fixed_price})计算得出数量: {calculated_quantity}")
|
||||
validated_product['quantity'] = calculated_quantity
|
||||
is_valid = True
|
||||
except (ValueError, TypeError, ZeroDivisionError) as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
# 如果数量验证有效或通过金额计算成功
|
||||
if is_valid:
|
||||
validated_product['quantity'] = fixed_quantity if is_valid and fixed_quantity > 0 else validated_product.get('quantity', 0)
|
||||
else:
|
||||
logger.warning(f"数量验证失败: {error_msg}")
|
||||
validated_product['quantity'] = 0.0
|
||||
|
||||
return validated_product
|
||||
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
数据处理handlers模块初始化文件
|
||||
"""
|
||||
|
||||
from .data_cleaner import DataCleaner
|
||||
from .column_mapper import ColumnMapper
|
||||
from .calculator import DataCalculator
|
||||
|
||||
__all__ = ['DataCleaner', 'ColumnMapper', 'DataCalculator']
|
||||
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
数据计算处理器
|
||||
|
||||
提供各种数据计算功能,如数量计算、价格计算、汇总统计等
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from ...core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataCalculator:
|
||||
"""数据计算处理器
|
||||
|
||||
提供标准化的数据计算功能,支持各种业务计算规则
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
"""初始化数据计算器
|
||||
|
||||
Args:
|
||||
config: 计算配置
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.calculation_rules = []
|
||||
|
||||
def add_rule(self, rule_type: str, **kwargs):
|
||||
"""添加计算规则
|
||||
|
||||
Args:
|
||||
rule_type: 规则类型
|
||||
**kwargs: 规则参数
|
||||
"""
|
||||
rule = {'type': rule_type, **kwargs}
|
||||
self.calculation_rules.append(rule)
|
||||
logger.debug(f"添加计算规则: {rule_type}")
|
||||
|
||||
def calculate(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""执行数据计算
|
||||
|
||||
Args:
|
||||
df: 输入数据
|
||||
|
||||
Returns:
|
||||
计算后的数据
|
||||
"""
|
||||
logger.info(f"开始数据计算,原始数据形状: {df.shape}")
|
||||
|
||||
result_df = df.copy()
|
||||
|
||||
for i, rule in enumerate(self.calculation_rules):
|
||||
try:
|
||||
logger.debug(f"执行计算规则 {i+1}/{len(self.calculation_rules)}: {rule['type']}")
|
||||
result_df = self._apply_rule(result_df, rule)
|
||||
logger.debug(f"规则执行完成,数据形状: {result_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"计算规则执行失败: {rule}, 错误: {e}")
|
||||
# 继续执行下一个规则,而不是中断整个流程
|
||||
continue
|
||||
|
||||
logger.info(f"数据计算完成,最终数据形状: {result_df.shape}")
|
||||
return result_df
|
||||
|
||||
def _apply_rule(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""应用单个计算规则
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
rule_type = rule.get('type')
|
||||
|
||||
if rule_type == 'multiply':
|
||||
return self._multiply(df, rule)
|
||||
elif rule_type == 'divide':
|
||||
return self._divide(df, rule)
|
||||
elif rule_type == 'add':
|
||||
return self._add(df, rule)
|
||||
elif rule_type == 'subtract':
|
||||
return self._subtract(df, rule)
|
||||
elif rule_type == 'formula':
|
||||
return self._formula(df, rule)
|
||||
elif rule_type == 'round':
|
||||
return self._round(df, rule)
|
||||
elif rule_type == 'sum':
|
||||
return self._sum(df, rule)
|
||||
elif rule_type == 'aggregate':
|
||||
return self._aggregate(df, rule)
|
||||
else:
|
||||
logger.warning(f"未知的计算规则类型: {rule_type}")
|
||||
return df
|
||||
|
||||
def _multiply(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""乘法计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
source_column = rule.get('source_column')
|
||||
target_column = rule.get('target_column')
|
||||
factor = rule.get('factor', 1)
|
||||
|
||||
if source_column and target_column:
|
||||
if source_column in df.columns:
|
||||
df[target_column] = df[source_column] * factor
|
||||
logger.debug(f"乘法计算: {source_column} * {factor} -> {target_column}")
|
||||
else:
|
||||
logger.warning(f"源列不存在: {source_column}")
|
||||
|
||||
return df
|
||||
|
||||
def _divide(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""除法计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
source_column = rule.get('source_column')
|
||||
target_column = rule.get('target_column')
|
||||
divisor = rule.get('divisor', 1)
|
||||
|
||||
if source_column and target_column and divisor != 0:
|
||||
if source_column in df.columns:
|
||||
df[target_column] = df[source_column] / divisor
|
||||
logger.debug(f"除法计算: {source_column} / {divisor} -> {target_column}")
|
||||
else:
|
||||
logger.warning(f"源列不存在: {source_column}")
|
||||
elif divisor == 0:
|
||||
logger.error("除数不能为0")
|
||||
|
||||
return df
|
||||
|
||||
def _add(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""加法计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns', [])
|
||||
target_column = rule.get('target_column')
|
||||
constant = rule.get('constant', 0)
|
||||
|
||||
if target_column:
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
if columns:
|
||||
# 列相加
|
||||
valid_columns = [col for col in columns if col in df.columns]
|
||||
if valid_columns:
|
||||
df[target_column] = df[valid_columns].sum(axis=1) + constant
|
||||
logger.debug(f"加法计算: {valid_columns} + {constant} -> {target_column}")
|
||||
else:
|
||||
logger.warning(f"没有有效的列用于加法计算: {columns}")
|
||||
else:
|
||||
# 只加常数
|
||||
if target_column in df.columns:
|
||||
df[target_column] = df[target_column] + constant
|
||||
logger.debug(f"加法计算: {target_column} + {constant}")
|
||||
else:
|
||||
logger.warning(f"目标列不存在: {target_column}")
|
||||
|
||||
return df
|
||||
|
||||
def _subtract(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""减法计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
minuend = rule.get('minuend') # 被减数列
|
||||
subtrahend = rule.get('subtrahend') # 减数列
|
||||
target_column = rule.get('target_column')
|
||||
constant = rule.get('constant', 0)
|
||||
|
||||
if target_column and minuend and minuend in df.columns:
|
||||
if subtrahend and subtrahend in df.columns:
|
||||
df[target_column] = df[minuend] - df[subtrahend] - constant
|
||||
logger.debug(f"减法计算: {minuend} - {subtrahend} - {constant} -> {target_column}")
|
||||
else:
|
||||
df[target_column] = df[minuend] - constant
|
||||
logger.debug(f"减法计算: {minuend} - {constant} -> {target_column}")
|
||||
else:
|
||||
logger.warning(f"减法计算参数不完整或列不存在")
|
||||
|
||||
return df
|
||||
|
||||
def _formula(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""公式计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
formula = rule.get('formula')
|
||||
target_column = rule.get('target_column')
|
||||
|
||||
if formula and target_column:
|
||||
try:
|
||||
df[target_column] = df.eval(formula)
|
||||
logger.debug(f"公式计算: {formula} -> {target_column}")
|
||||
except Exception as e:
|
||||
logger.error(f"公式计算失败: {formula}, 错误: {e}")
|
||||
else:
|
||||
logger.warning("公式计算缺少公式或目标列")
|
||||
|
||||
return df
|
||||
|
||||
def _round(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""四舍五入
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns', [])
|
||||
decimals = rule.get('decimals', 0)
|
||||
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
target_columns = columns or df.select_dtypes(include=[np.number]).columns
|
||||
|
||||
for col in target_columns:
|
||||
if col in df.columns and pd.api.types.is_numeric_dtype(df[col]):
|
||||
df[col] = df[col].round(decimals)
|
||||
logger.debug(f"四舍五入: {col} 保留 {decimals} 位小数")
|
||||
|
||||
return df
|
||||
|
||||
def _sum(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""求和计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns', [])
|
||||
target_column = rule.get('target_column')
|
||||
group_by = rule.get('group_by')
|
||||
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
if group_by and group_by in df.columns:
|
||||
# 分组求和
|
||||
if columns:
|
||||
for col in columns:
|
||||
if col in df.columns:
|
||||
sum_result = df.groupby(group_by)[col].sum()
|
||||
logger.debug(f"分组求和: {col} 按 {group_by} 分组")
|
||||
else:
|
||||
# 所有数值列分组求和
|
||||
numeric_columns = df.select_dtypes(include=[np.number]).columns
|
||||
sum_result = df.groupby(group_by)[numeric_columns].sum()
|
||||
logger.debug(f"分组求和: 所有数值列 按 {group_by} 分组")
|
||||
else:
|
||||
# 总体求和
|
||||
if columns:
|
||||
valid_columns = [col for col in columns if col in df.columns]
|
||||
if valid_columns and target_column:
|
||||
df[target_column] = df[valid_columns].sum(axis=1)
|
||||
logger.debug(f"求和计算: {valid_columns} -> {target_column}")
|
||||
else:
|
||||
# 所有数值列求和
|
||||
numeric_columns = df.select_dtypes(include=[np.number]).columns
|
||||
if target_column and len(numeric_columns) > 0:
|
||||
df[target_column] = df[numeric_columns].sum(axis=1)
|
||||
logger.debug(f"求和计算: {list(numeric_columns)} -> {target_column}")
|
||||
|
||||
return df
|
||||
|
||||
def _aggregate(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""聚合计算
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
group_by = rule.get('group_by')
|
||||
aggregations = rule.get('aggregations', {})
|
||||
|
||||
if group_by and group_by in df.columns:
|
||||
# 构建聚合函数字典
|
||||
agg_dict = {}
|
||||
for column, func in aggregations.items():
|
||||
if column in df.columns:
|
||||
if isinstance(func, str):
|
||||
agg_dict[column] = func
|
||||
elif isinstance(func, list):
|
||||
agg_dict[column] = func
|
||||
|
||||
if agg_dict:
|
||||
result = df.groupby(group_by).agg(agg_dict)
|
||||
logger.debug(f"聚合计算: 按 {group_by} 分组, 聚合: {agg_dict}")
|
||||
return result.reset_index()
|
||||
|
||||
return df
|
||||
|
||||
# 便捷方法
|
||||
def multiply(self, source_column: str, target_column: str, factor: float):
|
||||
"""乘法计算"""
|
||||
self.add_rule('multiply', source_column=source_column,
|
||||
target_column=target_column, factor=factor)
|
||||
return self
|
||||
|
||||
def divide(self, source_column: str, target_column: str, divisor: float):
|
||||
"""除法计算"""
|
||||
self.add_rule('divide', source_column=source_column,
|
||||
target_column=target_column, divisor=divisor)
|
||||
return self
|
||||
|
||||
def add(self, columns: Union[str, List[str]], target_column: str, constant: float = 0):
|
||||
"""加法计算"""
|
||||
self.add_rule('add', columns=columns, target_column=target_column, constant=constant)
|
||||
return self
|
||||
|
||||
def subtract(self, minuend: str, target_column: str,
|
||||
subtrahend: Optional[str] = None, constant: float = 0):
|
||||
"""减法计算"""
|
||||
self.add_rule('subtract', minuend=minuend, target_column=target_column,
|
||||
subtrahend=subtrahend, constant=constant)
|
||||
return self
|
||||
|
||||
def formula(self, formula: str, target_column: str):
|
||||
"""公式计算"""
|
||||
self.add_rule('formula', formula=formula, target_column=target_column)
|
||||
return self
|
||||
|
||||
def round_columns(self, columns: Optional[Union[str, List[str]]] = None, decimals: int = 0):
|
||||
"""四舍五入"""
|
||||
self.add_rule('round', columns=columns, decimals=decimals)
|
||||
return self
|
||||
|
||||
def sum_columns(self, columns: Optional[Union[str, List[str]]] = None,
|
||||
target_column: Optional[str] = None, group_by: Optional[str] = None):
|
||||
"""求和计算"""
|
||||
self.add_rule('sum', columns=columns, target_column=target_column, group_by=group_by)
|
||||
return self
|
||||
|
||||
def aggregate(self, group_by: str, aggregations: Dict[str, Union[str, List[str]]]):
|
||||
"""聚合计算"""
|
||||
self.add_rule('aggregate', group_by=group_by, aggregations=aggregations)
|
||||
return self
|
||||
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
列映射处理器
|
||||
|
||||
提供列名映射和转换功能,支持不同供应商的列名标准化
|
||||
"""
|
||||
|
||||
import re
|
||||
import pandas as pd
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from ...core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ColumnMapper:
|
||||
"""列映射处理器
|
||||
|
||||
提供列名标准化功能,将不同供应商的列名映射到标准列名
|
||||
"""
|
||||
|
||||
# 标准列名定义(所有列名别名的唯一来源)
|
||||
STANDARD_COLUMNS = {
|
||||
'barcode': [
|
||||
'条码', '条形码', '商品条码', '商品条形码', '产品条码', '商品编码',
|
||||
'商品编号', '条码(必填)', '电脑条码', '条码ID',
|
||||
'barcode', 'Barcode', 'BarCode', 'code', '编码',
|
||||
],
|
||||
'name': [
|
||||
'商品名称', '产品名称', '名称', '商品', '产品', '商品名', '品名',
|
||||
'品项名', '商品或服务名称', '品项', '名 称',
|
||||
'name', 'product_name',
|
||||
],
|
||||
'specification': [
|
||||
'规格', '规格型号', '型号', '商品规格', '产品规格', '包装规格', '规 格',
|
||||
'specification', 'spec', 'model',
|
||||
],
|
||||
'quantity': [
|
||||
'数量', '采购量', '订货数量', '订单量', '需求量', '采购数量', '购买数量',
|
||||
'订单数量', '数量(必填)', '采购量(必填)', '入库数', '入库数量', '数 量',
|
||||
'quantity', 'qty',
|
||||
],
|
||||
'unit': [
|
||||
'单位', '计量单位', '采购单位', '单位(必填)', '单位名称', '计价单位', '单 位',
|
||||
'unit', 'units',
|
||||
],
|
||||
'unit_price': [
|
||||
'单价', '价格', '采购单价', '进货价', '销售价', '采购价', '参考价',
|
||||
'入库单价', '单价(必填)', '采购单价(必填)', '价格(必填)', '单 价',
|
||||
'unit_price', 'price',
|
||||
],
|
||||
'total_price': [
|
||||
'总价', '金额', '小计', '合计金额', '小计金额', '金额(元)',
|
||||
'金额合计', '合计', '总额',
|
||||
'total_price', 'total', 'amount',
|
||||
],
|
||||
'gift_quantity': [
|
||||
'赠送量', '赠品数量', '赠送数量', '赠品',
|
||||
],
|
||||
'category': ['类别', '分类', '商品类别', 'category', 'type'],
|
||||
'brand': ['品牌', '商标', 'brand'],
|
||||
'supplier': ['供应商', '供货商', 'supplier', 'vendor'],
|
||||
}
|
||||
|
||||
def __init__(self, mapping_config: Optional[Dict[str, Any]] = None):
|
||||
"""初始化列映射器
|
||||
|
||||
Args:
|
||||
mapping_config: 映射配置
|
||||
"""
|
||||
self.mapping_config = mapping_config or {}
|
||||
self.custom_mappings = {}
|
||||
self._build_reverse_mapping()
|
||||
|
||||
def _build_reverse_mapping(self):
|
||||
"""构建反向映射表"""
|
||||
self.reverse_mapping = {}
|
||||
|
||||
# 添加标准列的反向映射
|
||||
for standard_name, variations in self.STANDARD_COLUMNS.items():
|
||||
for variation in variations:
|
||||
self.reverse_mapping[variation.lower()] = standard_name
|
||||
|
||||
# 添加自定义映射
|
||||
for standard_name, custom_names in self.mapping_config.items():
|
||||
if isinstance(custom_names, str):
|
||||
custom_names = [custom_names]
|
||||
|
||||
for custom_name in custom_names:
|
||||
self.reverse_mapping[custom_name.lower()] = standard_name
|
||||
self.custom_mappings[custom_name.lower()] = standard_name
|
||||
|
||||
def map_columns(self, df: pd.DataFrame, target_columns: Optional[List[str]] = None) -> pd.DataFrame:
|
||||
"""映射列名
|
||||
|
||||
Args:
|
||||
df: 输入数据
|
||||
target_columns: 目标列名列表,如果为None则使用所有标准列
|
||||
|
||||
Returns:
|
||||
列名映射后的数据
|
||||
"""
|
||||
if target_columns is None:
|
||||
target_columns = list(self.STANDARD_COLUMNS.keys())
|
||||
|
||||
logger.info(f"开始列名映射,目标列: {target_columns}")
|
||||
logger.info(f"原始列名: {list(df.columns)}")
|
||||
|
||||
# 创建列名映射
|
||||
column_mapping = {}
|
||||
used_columns = set()
|
||||
|
||||
for target_col in target_columns:
|
||||
# 查找匹配的原始列名
|
||||
matched_column = self._find_matching_column(df.columns, target_col)
|
||||
if matched_column:
|
||||
column_mapping[matched_column] = target_col
|
||||
used_columns.add(matched_column)
|
||||
logger.debug(f"列名映射: {matched_column} -> {target_col}")
|
||||
|
||||
# 重命名列
|
||||
if column_mapping:
|
||||
df_mapped = df.rename(columns=column_mapping)
|
||||
|
||||
# 添加缺失的目标列
|
||||
for target_col in target_columns:
|
||||
if target_col not in df_mapped.columns:
|
||||
df_mapped[target_col] = self._get_default_value(target_col)
|
||||
logger.debug(f"添加缺失列: {target_col}")
|
||||
|
||||
# 只保留目标列
|
||||
existing_target_columns = [col for col in target_columns if col in df_mapped.columns]
|
||||
df_result = df_mapped[existing_target_columns]
|
||||
|
||||
logger.info(f"列名映射完成,结果列名: {list(df_result.columns)}")
|
||||
return df_result
|
||||
else:
|
||||
logger.warning("没有找到可映射的列名")
|
||||
return df
|
||||
|
||||
def _find_matching_column(self, columns: List[str], target_column: str) -> Optional[str]:
|
||||
"""查找匹配的列名
|
||||
|
||||
Args:
|
||||
columns: 原始列名列表
|
||||
target_column: 目标标准列名
|
||||
|
||||
Returns:
|
||||
匹配的原始列名或None
|
||||
"""
|
||||
# 获取目标列的所有可能变体
|
||||
possible_names = []
|
||||
|
||||
# 标准列名变体
|
||||
if target_column in self.STANDARD_COLUMNS:
|
||||
possible_names.extend(self.STANDARD_COLUMNS[target_column])
|
||||
|
||||
# 自定义映射
|
||||
for standard_name, custom_names in self.mapping_config.items():
|
||||
if standard_name == target_column:
|
||||
if isinstance(custom_names, str):
|
||||
possible_names.append(custom_names)
|
||||
else:
|
||||
possible_names.extend(custom_names)
|
||||
|
||||
# 查找匹配
|
||||
for possible_name in possible_names:
|
||||
# 精确匹配(忽略大小写)
|
||||
for column in columns:
|
||||
if column.lower() == possible_name.lower():
|
||||
return column
|
||||
|
||||
# 模糊匹配
|
||||
for column in columns:
|
||||
if possible_name.lower() in column.lower() or column.lower() in possible_name.lower():
|
||||
return column
|
||||
|
||||
return None
|
||||
|
||||
def _get_default_value(self, column_name: str) -> Any:
|
||||
"""获取列的默认值
|
||||
|
||||
Args:
|
||||
column_name: 列名
|
||||
|
||||
Returns:
|
||||
默认值
|
||||
"""
|
||||
# 根据列名类型返回合适的默认值
|
||||
if column_name in ['quantity', 'unit_price', 'total_price']:
|
||||
return 0
|
||||
elif column_name in ['barcode', 'name', 'specification', 'unit', 'category', 'brand', 'supplier']:
|
||||
return ''
|
||||
else:
|
||||
return None
|
||||
|
||||
def add_custom_mapping(self, standard_name: str, custom_names: Union[str, List[str]]):
|
||||
"""添加自定义列名映射
|
||||
|
||||
Args:
|
||||
standard_name: 标准列名
|
||||
custom_names: 自定义列名或列名列表
|
||||
"""
|
||||
if isinstance(custom_names, str):
|
||||
custom_names = [custom_names]
|
||||
|
||||
# 更新配置
|
||||
self.mapping_config[standard_name] = custom_names
|
||||
|
||||
# 更新反向映射
|
||||
for custom_name in custom_names:
|
||||
self.reverse_mapping[custom_name.lower()] = standard_name
|
||||
self.custom_mappings[custom_name.lower()] = standard_name
|
||||
|
||||
logger.info(f"添加自定义映射: {standard_name} <- {custom_names}")
|
||||
|
||||
def detect_column_types(self, df: pd.DataFrame) -> Dict[str, str]:
|
||||
"""检测列的数据类型
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
|
||||
Returns:
|
||||
列类型字典
|
||||
"""
|
||||
column_types = {}
|
||||
|
||||
for column in df.columns:
|
||||
if pd.api.types.is_numeric_dtype(df[column]):
|
||||
column_types[column] = 'numeric'
|
||||
elif pd.api.types.is_datetime64_any_dtype(df[column]):
|
||||
column_types[column] = 'datetime'
|
||||
elif pd.api.types.is_bool_dtype(df[column]):
|
||||
column_types[column] = 'boolean'
|
||||
else:
|
||||
column_types[column] = 'text'
|
||||
|
||||
return column_types
|
||||
|
||||
def suggest_column_mapping(self, df: pd.DataFrame) -> Dict[str, List[str]]:
|
||||
"""建议列名映射
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
|
||||
Returns:
|
||||
建议的映射关系
|
||||
"""
|
||||
suggestions = {}
|
||||
|
||||
for column in df.columns:
|
||||
column_lower = column.lower()
|
||||
suggestions[column] = []
|
||||
|
||||
# 检查标准列名
|
||||
for standard_name, variations in self.STANDARD_COLUMNS.items():
|
||||
for variation in variations:
|
||||
if column_lower in variation.lower() or variation.lower() in column_lower:
|
||||
suggestions[column].append(standard_name)
|
||||
|
||||
# 检查自定义映射
|
||||
for custom_name, standard_name in self.custom_mappings.items():
|
||||
if column_lower in custom_name or custom_name in column_lower:
|
||||
suggestions[column].append(standard_name)
|
||||
|
||||
# 去重
|
||||
suggestions[column] = list(set(suggestions[column]))
|
||||
|
||||
# 只返回有建议的列
|
||||
return {k: v for k, v in suggestions.items() if v}
|
||||
|
||||
def validate_mapping(self, df: pd.DataFrame, required_columns: List[str]) -> Dict[str, Any]:
|
||||
"""验证列映射结果
|
||||
|
||||
Args:
|
||||
df: 映射后的数据
|
||||
required_columns: 必需的列名列表
|
||||
|
||||
Returns:
|
||||
验证结果
|
||||
"""
|
||||
result = {
|
||||
'valid': True,
|
||||
'missing_columns': [],
|
||||
'empty_columns': [],
|
||||
'warnings': []
|
||||
}
|
||||
|
||||
# 检查缺失列
|
||||
for col in required_columns:
|
||||
if col not in df.columns:
|
||||
result['missing_columns'].append(col)
|
||||
result['valid'] = False
|
||||
|
||||
# 检查空列
|
||||
for col in df.columns:
|
||||
if df[col].isnull().all():
|
||||
result['empty_columns'].append(col)
|
||||
result['warnings'].append(f"列 '{col}' 全部为空值")
|
||||
|
||||
# 检查数值列
|
||||
numeric_columns = ['quantity', 'unit_price', 'total_price']
|
||||
for col in numeric_columns:
|
||||
if col in df.columns and not pd.api.types.is_numeric_dtype(df[col]):
|
||||
result['warnings'].append(f"列 '{col}' 不是数值类型")
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def find_column(cls, columns: List[str], standard_name: str) -> Optional[str]:
|
||||
"""在列名列表中查找匹配标准列名的列
|
||||
|
||||
匹配策略: 精确匹配 → 忽略空白匹配 → 子串匹配
|
||||
|
||||
Args:
|
||||
columns: 实际列名列表
|
||||
standard_name: 标准列名 (STANDARD_COLUMNS 的键)
|
||||
|
||||
Returns:
|
||||
匹配到的实际列名,未找到返回 None
|
||||
"""
|
||||
candidates = cls.STANDARD_COLUMNS.get(standard_name, [])
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
columns_str = [str(c) for c in columns]
|
||||
|
||||
# 精确匹配
|
||||
for col in columns_str:
|
||||
col_clean = col.strip()
|
||||
for candidate in candidates:
|
||||
if col_clean == candidate:
|
||||
return col
|
||||
|
||||
# 忽略空白匹配
|
||||
for col in columns_str:
|
||||
col_clean = re.sub(r'\s+', '', col.strip())
|
||||
for candidate in candidates:
|
||||
if col_clean == re.sub(r'\s+', '', candidate):
|
||||
return col
|
||||
|
||||
# 子串匹配 (候选名包含在列名中)
|
||||
for col in columns_str:
|
||||
col_lower = col.strip().lower()
|
||||
for candidate in candidates:
|
||||
if candidate.lower() in col_lower:
|
||||
return col
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def detect_header_row(df: pd.DataFrame, max_rows: int = 10, min_matches: int = 3) -> int:
|
||||
"""检测表头所在行
|
||||
|
||||
扫描前 max_rows 行,返回包含最多关键词匹配的行索引。
|
||||
|
||||
Args:
|
||||
df: 数据框
|
||||
max_rows: 最大扫描行数
|
||||
min_matches: 最少关键词匹配数
|
||||
|
||||
Returns:
|
||||
表头行索引,未找到返回 -1
|
||||
"""
|
||||
header_keywords = [
|
||||
'条码', '条形码', '商品条码', '商品名称', '名称', '规格',
|
||||
'单价', '数量', '金额', '单位', '必填', '编码',
|
||||
]
|
||||
|
||||
best_row = -1
|
||||
best_matches = 0
|
||||
|
||||
for row_idx in range(min(max_rows, len(df))):
|
||||
row_values = df.iloc[row_idx].astype(str)
|
||||
matches = sum(
|
||||
1 for kw in header_keywords
|
||||
if any(kw in str(val) for val in row_values.values)
|
||||
)
|
||||
if matches >= min_matches and matches > best_matches:
|
||||
best_matches = matches
|
||||
best_row = row_idx
|
||||
|
||||
return best_row
|
||||
@@ -0,0 +1,401 @@
|
||||
"""
|
||||
数据清洗处理器
|
||||
|
||||
提供各种数据清洗功能,如空值处理、重复项处理、数据类型转换等
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from ...core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataCleaner:
|
||||
"""数据清洗处理器
|
||||
|
||||
提供标准化的数据清洗功能,支持链式调用和规则配置
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
"""初始化数据清洗器
|
||||
|
||||
Args:
|
||||
config: 清洗配置
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.cleaning_rules = []
|
||||
|
||||
def add_rule(self, rule_type: str, **kwargs):
|
||||
"""添加清洗规则
|
||||
|
||||
Args:
|
||||
rule_type: 规则类型
|
||||
**kwargs: 规则参数
|
||||
"""
|
||||
rule = {'type': rule_type, **kwargs}
|
||||
self.cleaning_rules.append(rule)
|
||||
logger.debug(f"添加清洗规则: {rule_type}")
|
||||
|
||||
def clean(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""执行数据清洗
|
||||
|
||||
Args:
|
||||
df: 输入数据
|
||||
|
||||
Returns:
|
||||
清洗后的数据
|
||||
"""
|
||||
logger.info(f"开始数据清洗,原始数据形状: {df.shape}")
|
||||
|
||||
result_df = df.copy()
|
||||
|
||||
for i, rule in enumerate(self.cleaning_rules):
|
||||
try:
|
||||
logger.debug(f"执行清洗规则 {i+1}/{len(self.cleaning_rules)}: {rule['type']}")
|
||||
result_df = self._apply_rule(result_df, rule)
|
||||
logger.debug(f"规则执行完成,数据形状: {result_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"清洗规则执行失败: {rule}, 错误: {e}")
|
||||
# 继续执行下一个规则,而不是中断整个流程
|
||||
continue
|
||||
|
||||
logger.info(f"数据清洗完成,最终数据形状: {result_df.shape}")
|
||||
return result_df
|
||||
|
||||
def _apply_rule(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""应用单个清洗规则
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
rule_type = rule.get('type')
|
||||
|
||||
if rule_type == 'remove_duplicates':
|
||||
return self._remove_duplicates(df, rule)
|
||||
elif rule_type == 'fill_na':
|
||||
return self._fill_na(df, rule)
|
||||
elif rule_type == 'remove_rows':
|
||||
return self._remove_rows(df, rule)
|
||||
elif rule_type == 'convert_type':
|
||||
return self._convert_type(df, rule)
|
||||
elif rule_type == 'strip_whitespace':
|
||||
return self._strip_whitespace(df, rule)
|
||||
elif rule_type == 'normalize_text':
|
||||
return self._normalize_text(df, rule)
|
||||
elif rule_type == 'validate_data':
|
||||
return self._validate_data(df, rule)
|
||||
else:
|
||||
logger.warning(f"未知的清洗规则类型: {rule_type}")
|
||||
return df
|
||||
|
||||
def _remove_duplicates(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""移除重复项
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
subset = rule.get('subset') # 用于判断重复的列
|
||||
keep = rule.get('keep', 'first') # 保留哪个重复项
|
||||
|
||||
before_count = len(df)
|
||||
df_cleaned = df.drop_duplicates(subset=subset, keep=keep)
|
||||
after_count = len(df_cleaned)
|
||||
|
||||
logger.info(f"移除重复项: {before_count - after_count} 行被移除")
|
||||
return df_cleaned
|
||||
|
||||
def _fill_na(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""填充空值
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns') # 要处理的列
|
||||
value = rule.get('value', 0) # 填充值
|
||||
method = rule.get('method') # 填充方法('ffill', 'bfill', 'mean', 'median')
|
||||
|
||||
if columns:
|
||||
# 处理指定列
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
for col in columns:
|
||||
if col in df.columns:
|
||||
if method == 'ffill':
|
||||
df[col] = df[col].fillna(method='ffill')
|
||||
elif method == 'bfill':
|
||||
df[col] = df[col].fillna(method='bfill')
|
||||
elif method == 'mean':
|
||||
df[col] = df[col].fillna(df[col].mean())
|
||||
elif method == 'median':
|
||||
df[col] = df[col].fillna(df[col].median())
|
||||
else:
|
||||
df[col] = df[col].fillna(value)
|
||||
|
||||
logger.debug(f"填充列 {col} 的空值: {method or value}")
|
||||
else:
|
||||
# 处理所有列
|
||||
if method == 'ffill':
|
||||
df = df.fillna(method='ffill')
|
||||
elif method == 'bfill':
|
||||
df = df.fillna(method='bfill')
|
||||
else:
|
||||
df = df.fillna(value)
|
||||
|
||||
logger.debug(f"填充所有列的空值: {method or value}")
|
||||
|
||||
return df
|
||||
|
||||
def _remove_rows(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""移除行
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
condition = rule.get('condition') # 条件表达式
|
||||
columns = rule.get('columns') # 要检查的列
|
||||
values = rule.get('values') # 要移除的值
|
||||
|
||||
if condition:
|
||||
# 使用条件表达式
|
||||
try:
|
||||
before_count = len(df)
|
||||
df_filtered = df.query(condition)
|
||||
after_count = len(df_filtered)
|
||||
logger.info(f"条件过滤: {condition}, 移除了 {before_count - after_count} 行")
|
||||
return df_filtered
|
||||
except Exception as e:
|
||||
logger.error(f"条件表达式执行失败: {condition}, 错误: {e}")
|
||||
return df
|
||||
|
||||
if columns and values:
|
||||
# 基于列值过滤
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
|
||||
df_filtered = df.copy()
|
||||
for col in columns:
|
||||
if col in df_filtered.columns:
|
||||
mask = ~df_filtered[col].isin(values)
|
||||
df_filtered = df_filtered[mask]
|
||||
logger.debug(f"列 {col} 过滤值 {values}")
|
||||
|
||||
return df_filtered
|
||||
|
||||
logger.warning("移除行规则缺少条件或列配置")
|
||||
return df
|
||||
|
||||
def _convert_type(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""类型转换
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns')
|
||||
target_type = rule.get('target_type', 'float')
|
||||
errors = rule.get('errors', 'coerce') # 错误处理方式
|
||||
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
for col in columns:
|
||||
if col in df.columns:
|
||||
try:
|
||||
if target_type == 'int':
|
||||
df[col] = pd.to_numeric(df[col], errors=errors).astype('Int64')
|
||||
elif target_type == 'float':
|
||||
df[col] = pd.to_numeric(df[col], errors=errors)
|
||||
elif target_type == 'datetime':
|
||||
df[col] = pd.to_datetime(df[col], errors=errors)
|
||||
elif target_type == 'string':
|
||||
df[col] = df[col].astype(str)
|
||||
else:
|
||||
df[col] = df[col].astype(target_type)
|
||||
|
||||
logger.debug(f"列 {col} 类型转换: {target_type}")
|
||||
except Exception as e:
|
||||
logger.error(f"列 {col} 类型转换失败: {e}")
|
||||
|
||||
return df
|
||||
|
||||
def _strip_whitespace(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""去除空白字符
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns')
|
||||
|
||||
if columns:
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
for col in columns:
|
||||
if col in df.columns and df[col].dtype == 'object':
|
||||
df[col] = df[col].str.strip()
|
||||
logger.debug(f"列 {col} 去除空白字符")
|
||||
else:
|
||||
# 处理所有文本列
|
||||
text_columns = df.select_dtypes(include=['object']).columns
|
||||
for col in text_columns:
|
||||
df[col] = df[col].str.strip()
|
||||
|
||||
logger.debug(f"所有文本列去除空白字符: {list(text_columns)}")
|
||||
|
||||
return df
|
||||
|
||||
def _normalize_text(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""文本标准化
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns')
|
||||
lowercase = rule.get('lowercase', False)
|
||||
uppercase = rule.get('uppercase', False)
|
||||
replace_map = rule.get('replace_map', {}) # 替换映射
|
||||
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
target_columns = columns or df.select_dtypes(include=['object']).columns
|
||||
|
||||
for col in target_columns:
|
||||
if col in df.columns and df[col].dtype == 'object':
|
||||
if lowercase:
|
||||
df[col] = df[col].str.lower()
|
||||
elif uppercase:
|
||||
df[col] = df[col].str.upper()
|
||||
|
||||
# 应用替换映射
|
||||
for old, new in replace_map.items():
|
||||
df[col] = df[col].str.replace(old, new)
|
||||
|
||||
logger.debug(f"列 {col} 文本标准化完成")
|
||||
|
||||
return df
|
||||
|
||||
def _validate_data(self, df: pd.DataFrame, rule: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""数据验证
|
||||
|
||||
Args:
|
||||
df: 数据
|
||||
rule: 规则配置
|
||||
|
||||
Returns:
|
||||
处理后的数据
|
||||
"""
|
||||
columns = rule.get('columns')
|
||||
min_value = rule.get('min_value')
|
||||
max_value = rule.get('max_value')
|
||||
required = rule.get('required', False)
|
||||
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
|
||||
validation_results = []
|
||||
|
||||
for col in columns:
|
||||
if col in df.columns:
|
||||
# 检查必需值
|
||||
if required:
|
||||
null_count = df[col].isnull().sum()
|
||||
if null_count > 0:
|
||||
validation_results.append(f"{col}: {null_count} 个空值")
|
||||
|
||||
# 检查数值范围
|
||||
if min_value is not None or max_value is not None:
|
||||
if pd.api.types.is_numeric_dtype(df[col]):
|
||||
invalid_mask = pd.Series(False, index=df.index)
|
||||
if min_value is not None:
|
||||
invalid_mask |= df[col] < min_value
|
||||
if max_value is not None:
|
||||
invalid_mask |= df[col] > max_value
|
||||
|
||||
invalid_count = invalid_mask.sum()
|
||||
if invalid_count > 0:
|
||||
validation_results.append(f"{col}: {invalid_count} 个值超出范围")
|
||||
|
||||
if validation_results:
|
||||
logger.warning(f"数据验证发现问题: {', '.join(validation_results)}")
|
||||
else:
|
||||
logger.debug("数据验证通过")
|
||||
|
||||
return df
|
||||
|
||||
# 便捷方法
|
||||
def remove_duplicates(self, subset: Optional[List[str]] = None, keep: str = 'first'):
|
||||
"""移除重复项"""
|
||||
self.add_rule('remove_duplicates', subset=subset, keep=keep)
|
||||
return self
|
||||
|
||||
def fill_na(self, columns: Optional[Union[str, List[str]]] = None,
|
||||
value: Any = 0, method: Optional[str] = None):
|
||||
"""填充空值"""
|
||||
self.add_rule('fill_na', columns=columns, value=value, method=method)
|
||||
return self
|
||||
|
||||
def remove_rows(self, condition: Optional[str] = None,
|
||||
columns: Optional[Union[str, List[str]]] = None,
|
||||
values: Optional[Any] = None):
|
||||
"""移除行"""
|
||||
self.add_rule('remove_rows', condition=condition, columns=columns, values=values)
|
||||
return self
|
||||
|
||||
def convert_type(self, columns: Union[str, List[str]], target_type: str, errors: str = 'coerce'):
|
||||
"""类型转换"""
|
||||
self.add_rule('convert_type', columns=columns, target_type=target_type, errors=errors)
|
||||
return self
|
||||
|
||||
def strip_whitespace(self, columns: Optional[Union[str, List[str]]] = None):
|
||||
"""去除空白字符"""
|
||||
self.add_rule('strip_whitespace', columns=columns)
|
||||
return self
|
||||
|
||||
def normalize_text(self, columns: Optional[Union[str, List[str]]] = None,
|
||||
lowercase: bool = False, uppercase: bool = False,
|
||||
replace_map: Optional[Dict[str, str]] = None):
|
||||
"""文本标准化"""
|
||||
self.add_rule('normalize_text', columns=columns, lowercase=lowercase,
|
||||
uppercase=uppercase, replace_map=replace_map or {})
|
||||
return self
|
||||
|
||||
def validate_data(self, columns: Union[str, List[str]],
|
||||
min_value: Optional[float] = None,
|
||||
max_value: Optional[float] = None,
|
||||
required: bool = False):
|
||||
"""数据验证"""
|
||||
self.add_rule('validate_data', columns=columns, min_value=min_value,
|
||||
max_value=max_value, required=required)
|
||||
return self
|
||||
@@ -0,0 +1,150 @@
|
||||
import re
|
||||
import pandas as pd
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
def _split_quantity_unit(df: pd.DataFrame, source: str, dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if source in df.columns:
|
||||
vals = df[source].astype(str).fillna("")
|
||||
nums = []
|
||||
units = []
|
||||
default_unit = (dictionary or {}).get("default_unit", "")
|
||||
unit_synonyms = (dictionary or {}).get("unit_synonyms", {})
|
||||
for v in vals:
|
||||
m = re.search(r"(\d+(?:\.\d+)?)(箱|件|提|盒|瓶)", v)
|
||||
if m:
|
||||
nums.append(float(m.group(1)))
|
||||
u = unit_synonyms.get(m.group(2), m.group(2))
|
||||
units.append(u)
|
||||
else:
|
||||
try:
|
||||
nums.append(float(v))
|
||||
units.append(unit_synonyms.get(default_unit, default_unit))
|
||||
except Exception:
|
||||
nums.append(0.0)
|
||||
units.append(unit_synonyms.get(default_unit, default_unit))
|
||||
df["quantity"] = nums
|
||||
df["unit"] = units
|
||||
return df
|
||||
|
||||
def _extract_spec_from_name(df: pd.DataFrame, source: str, dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if source in df.columns:
|
||||
names = df[source].astype(str).fillna("")
|
||||
specs = []
|
||||
packs = []
|
||||
ignore_words = (dictionary or {}).get("ignore_words", [])
|
||||
name_patterns = (dictionary or {}).get("name_patterns", [])
|
||||
for s in names:
|
||||
if ignore_words:
|
||||
for w in ignore_words:
|
||||
s = s.replace(w, "")
|
||||
matched = False
|
||||
for pat in name_patterns:
|
||||
try:
|
||||
m = re.search(pat, s)
|
||||
if m and len(m.groups()) >= 2:
|
||||
try:
|
||||
qty = int(m.group(len(m.groups())))
|
||||
except Exception:
|
||||
qty = None
|
||||
specs.append(s)
|
||||
packs.append(qty)
|
||||
matched = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if matched:
|
||||
continue
|
||||
m = re.search(r"(\d+(?:\.\d+)?)(ml|l|升|毫升)[*×xX](\d+)", s, re.IGNORECASE)
|
||||
if m:
|
||||
specs.append(f"{m.group(1)}{m.group(2)}*{m.group(3)}")
|
||||
packs.append(int(m.group(3)))
|
||||
continue
|
||||
m2 = re.search(r"(\d+)[*×xX](\d+)", s)
|
||||
if m2:
|
||||
specs.append(f"1*{m2.group(2)}")
|
||||
packs.append(int(m2.group(2)))
|
||||
continue
|
||||
m3 = re.search(r"(\d{2,3})\D*(\d{1,3})\D*", s)
|
||||
if m3:
|
||||
specs.append(f"1*{m3.group(2)}")
|
||||
packs.append(int(m3.group(2)))
|
||||
continue
|
||||
specs.append("")
|
||||
packs.append(None)
|
||||
df["specification"] = df.get("specification", pd.Series(specs))
|
||||
df["package_quantity"] = packs
|
||||
return df
|
||||
|
||||
def _normalize_unit(df: pd.DataFrame, target: str, unit_map: Dict[str, str], dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
if target in df.columns:
|
||||
df[target] = df[target].astype(str)
|
||||
df[target] = df[target].apply(lambda u: unit_map.get(u, u))
|
||||
pack_multipliers = (dictionary or {}).get("pack_multipliers", {})
|
||||
default_pq = (dictionary or {}).get("default_package_quantity", 1)
|
||||
try:
|
||||
if "quantity" in df.columns:
|
||||
def convert_qty(row):
|
||||
u = row.get(target)
|
||||
q = row.get("quantity")
|
||||
pq = row.get("package_quantity")
|
||||
if u in ("件", "箱", "提", "盒"):
|
||||
mult = pq or pack_multipliers.get(u, default_pq)
|
||||
if pd.notna(q) and pd.notna(mult) and float(mult) > 0:
|
||||
return float(q) * float(mult)
|
||||
return q
|
||||
df["quantity"] = df.apply(convert_qty, axis=1)
|
||||
df[target] = df[target].apply(lambda u: "瓶" if u in ("件","箱","提","盒") else u)
|
||||
except Exception:
|
||||
pass
|
||||
return df
|
||||
|
||||
def _compute_quantity_from_total(df: pd.DataFrame) -> pd.DataFrame:
|
||||
if "quantity" in df.columns and "unit_price" in df.columns:
|
||||
qty = df["quantity"].fillna(0)
|
||||
up = pd.to_numeric(df.get("unit_price", 0), errors="coerce").fillna(0)
|
||||
tp = pd.to_numeric(df.get("total_price", 0), errors="coerce").fillna(0)
|
||||
need = (qty <= 0) & (up > 0) & (tp > 0)
|
||||
df.loc[need, "quantity"] = (tp[need] / up[need]).round(6)
|
||||
return df
|
||||
|
||||
def _fill_missing(df: pd.DataFrame, fills: Dict[str, Any]) -> pd.DataFrame:
|
||||
for k, v in fills.items():
|
||||
if k in df.columns:
|
||||
df[k] = df[k].fillna(v)
|
||||
else:
|
||||
df[k] = v
|
||||
return df
|
||||
|
||||
def _mark_gift(df: pd.DataFrame) -> pd.DataFrame:
|
||||
df["is_gift"] = False
|
||||
tp = df.get("total_price")
|
||||
up = df.get("unit_price")
|
||||
flags = pd.Series([False]*len(df))
|
||||
if tp is not None:
|
||||
tpn = pd.to_numeric(tp, errors="coerce").fillna(0)
|
||||
flags = flags | (tpn == 0)
|
||||
if up is not None:
|
||||
upn = pd.to_numeric(up, errors="coerce").fillna(0)
|
||||
flags = flags | (upn == 0)
|
||||
if "name" in df.columns:
|
||||
flags = flags | df["name"].astype(str).str.contains(r"赠品|^o$|^O$", regex=True)
|
||||
df.loc[flags, "is_gift"] = True
|
||||
return df
|
||||
|
||||
def apply_rules(df: pd.DataFrame, rules: List[Dict[str, Any]], dictionary: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
|
||||
out = df.copy()
|
||||
for r in rules or []:
|
||||
t = r.get("type")
|
||||
if t == "split_quantity_unit":
|
||||
out = _split_quantity_unit(out, r.get("source", "quantity"), dictionary)
|
||||
elif t == "extract_spec_from_name":
|
||||
out = _extract_spec_from_name(out, r.get("source", "name"), dictionary)
|
||||
elif t == "normalize_unit":
|
||||
out = _normalize_unit(out, r.get("target", "unit"), r.get("map", {}), dictionary)
|
||||
elif t == "compute_quantity_from_total":
|
||||
out = _compute_quantity_from_total(out)
|
||||
elif t == "fill_missing":
|
||||
out = _fill_missing(out, r.get("fills", {}))
|
||||
elif t == "mark_gift":
|
||||
out = _mark_gift(out)
|
||||
return out
|
||||
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - OCR核心模块
|
||||
---------------------------
|
||||
提供OCR识别相关功能,包括图片预处理、文字识别和表格识别。
|
||||
"""
|
||||
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
百度OCR客户端模块
|
||||
---------------
|
||||
提供百度OCR API的访问和调用功能。
|
||||
"""
|
||||
|
||||
import time
|
||||
import base64
|
||||
import requests
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Token 过期相关常量
|
||||
_DEFAULT_TOKEN_LIFETIME = 30 * 24 * 3600 # 30天(秒)
|
||||
_TOKEN_EARLY_EXPIRY = 3600 # 提前1小时刷新(秒)
|
||||
|
||||
class TokenManager:
|
||||
"""
|
||||
令牌管理类,负责获取和刷新百度API访问令牌
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, secret_key: str, max_retries: int = 3, retry_delay: int = 2, token_url: str = None):
|
||||
"""
|
||||
初始化令牌管理器
|
||||
|
||||
Args:
|
||||
api_key: 百度API Key
|
||||
secret_key: 百度Secret Key
|
||||
max_retries: 最大重试次数
|
||||
retry_delay: 重试延迟(秒)
|
||||
token_url: 令牌获取地址
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.secret_key = secret_key
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
self.token_url = token_url or 'https://aip.baidubce.com/oauth/2.0/token'
|
||||
self.access_token = None
|
||||
self.token_expiry = 0
|
||||
|
||||
def get_token(self) -> Optional[str]:
|
||||
"""
|
||||
获取访问令牌,如果令牌已过期则刷新
|
||||
|
||||
Returns:
|
||||
访问令牌,如果获取失败则返回None
|
||||
"""
|
||||
if self.is_token_valid():
|
||||
return self.access_token
|
||||
|
||||
return self.refresh_token()
|
||||
|
||||
def is_token_valid(self) -> bool:
|
||||
"""
|
||||
检查令牌是否有效
|
||||
|
||||
Returns:
|
||||
令牌是否有效
|
||||
"""
|
||||
return (
|
||||
self.access_token is not None and
|
||||
self.token_expiry > time.time() + 60 # 提前1分钟刷新
|
||||
)
|
||||
|
||||
def refresh_token(self) -> Optional[str]:
|
||||
"""
|
||||
刷新访问令牌
|
||||
|
||||
Returns:
|
||||
新的访问令牌,如果获取失败则返回None
|
||||
"""
|
||||
url = self.token_url
|
||||
params = {
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": self.api_key,
|
||||
"client_secret": self.secret_key
|
||||
}
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = requests.post(url, params=params, timeout=10)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if "access_token" in result:
|
||||
self.access_token = result["access_token"]
|
||||
# 设置令牌过期时间(默认30天,提前1小时过期以确保安全)
|
||||
self.token_expiry = time.time() + result.get("expires_in", _DEFAULT_TOKEN_LIFETIME) - _TOKEN_EARLY_EXPIRY
|
||||
logger.info("成功获取访问令牌")
|
||||
return self.access_token
|
||||
|
||||
logger.warning(f"获取访问令牌失败 (尝试 {attempt+1}/{self.max_retries}): {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"获取访问令牌时发生错误 (尝试 {attempt+1}/{self.max_retries}): {e}")
|
||||
|
||||
# 如果不是最后一次尝试,则等待后重试
|
||||
if attempt < self.max_retries - 1:
|
||||
time.sleep(self.retry_delay * (attempt + 1)) # 指数退避
|
||||
|
||||
logger.error("无法获取访问令牌")
|
||||
return None
|
||||
|
||||
class BaiduOCRClient:
|
||||
"""
|
||||
百度OCR API客户端
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化百度OCR客户端
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 从配置中读取API信息
|
||||
try:
|
||||
# 修复getint调用方式
|
||||
self.timeout = config.get('API', 'timeout', fallback=30)
|
||||
if isinstance(self.timeout, str):
|
||||
self.timeout = int(self.timeout)
|
||||
|
||||
self.api_key = config.get('API', 'api_key', fallback='')
|
||||
self.secret_key = config.get('API', 'secret_key', fallback='')
|
||||
|
||||
# 使用fallback而不是位置参数
|
||||
try:
|
||||
self.max_retries = config.getint('API', 'max_retries', fallback=3)
|
||||
except (TypeError, AttributeError):
|
||||
# 如果getint不支持fallback,则使用get再转换
|
||||
self.max_retries = int(config.get('API', 'max_retries', fallback='3'))
|
||||
|
||||
try:
|
||||
self.retry_delay = config.getint('API', 'retry_delay', fallback=2)
|
||||
except (TypeError, AttributeError):
|
||||
# 如果getint不支持fallback,则使用get再转换
|
||||
self.retry_delay = int(config.get('API', 'retry_delay', fallback='2'))
|
||||
|
||||
self.api_url = config.get('API', 'api_url', fallback='https://aip.baidubce.com/rest/2.0/ocr/v1/table')
|
||||
|
||||
# 创建令牌管理器
|
||||
self.token_manager = TokenManager(
|
||||
self.api_key,
|
||||
self.secret_key,
|
||||
self.max_retries,
|
||||
self.retry_delay,
|
||||
token_url=config.get('API', 'token_url', fallback='https://aip.baidubce.com/oauth/2.0/token')
|
||||
)
|
||||
|
||||
# 验证API配置
|
||||
if not self.api_key or not self.secret_key:
|
||||
logger.warning("API密钥未设置,请在配置文件中设置API密钥")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化失败: {e}")
|
||||
|
||||
def read_image(self, image_path: str) -> Optional[bytes]:
|
||||
"""
|
||||
读取图片文件为二进制数据
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
图片二进制数据,如果读取失败则返回None
|
||||
"""
|
||||
try:
|
||||
with open(image_path, 'rb') as f:
|
||||
return f.read()
|
||||
except Exception as e:
|
||||
logger.error(f"读取图片文件失败: {image_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def recognize_table(self, image_data: Union[str, bytes]) -> Optional[Dict]:
|
||||
"""
|
||||
识别表格
|
||||
|
||||
Args:
|
||||
image_data: 图片数据,可以是文件路径或二进制数据
|
||||
|
||||
Returns:
|
||||
识别结果字典,如果识别失败则返回None
|
||||
"""
|
||||
# 获取访问令牌
|
||||
access_token = self.token_manager.get_token()
|
||||
if not access_token:
|
||||
logger.error("无法获取访问令牌,无法进行表格识别")
|
||||
return None
|
||||
|
||||
# 如果是文件路径,读取图片数据
|
||||
if isinstance(image_data, str):
|
||||
image_data = self.read_image(image_data)
|
||||
if image_data is None:
|
||||
return None
|
||||
|
||||
# 准备请求参数
|
||||
url = f"{self.api_url}?access_token={access_token}"
|
||||
image_base64 = base64.b64encode(image_data).decode('utf-8')
|
||||
|
||||
# 请求参数 - 添加return_excel参数,与v1版本保持一致
|
||||
payload = {
|
||||
'image': image_base64,
|
||||
'is_sync': 'true', # 同步请求
|
||||
'request_type': 'excel', # 输出为Excel
|
||||
'return_excel': 'true' # 直接返回Excel数据
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
# 发送请求
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
# 打印返回结果以便调试
|
||||
logger.debug(f"百度OCR API返回结果: {result}")
|
||||
|
||||
if 'error_code' in result:
|
||||
error_msg = result.get('error_msg', '未知错误')
|
||||
logger.error(f"百度OCR API错误: {error_msg}")
|
||||
# 如果是授权错误,尝试刷新令牌
|
||||
if result.get('error_code') in [110, 111]: # 授权相关错误码
|
||||
logger.info("尝试刷新访问令牌...")
|
||||
self.token_manager.refresh_token()
|
||||
return None
|
||||
|
||||
# 兼容不同的返回结构
|
||||
# 这是最关键的修改部分: 直接返回整个结果,不强制要求特定结构
|
||||
return result
|
||||
else:
|
||||
logger.warning(f"表格识别请求失败 (尝试 {attempt+1}/{self.max_retries}): {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"表格识别时发生错误 (尝试 {attempt+1}/{self.max_retries}): {e}")
|
||||
|
||||
# 如果不是最后一次尝试,则等待后重试
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt) # 指数退避
|
||||
logger.info(f"将在 {wait_time} 秒后重试...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
logger.error("表格识别失败")
|
||||
return None
|
||||
|
||||
def get_excel_result(self, request_id_or_result: Union[str, Dict]) -> Optional[bytes]:
|
||||
"""
|
||||
获取Excel结果
|
||||
|
||||
Args:
|
||||
request_id_or_result: 请求ID或完整的识别结果
|
||||
|
||||
Returns:
|
||||
Excel二进制数据,如果获取失败则返回None
|
||||
"""
|
||||
# 获取访问令牌
|
||||
access_token = self.token_manager.get_token()
|
||||
if not access_token:
|
||||
logger.error("无法获取访问令牌,无法获取Excel结果")
|
||||
return None
|
||||
|
||||
# 处理直接传入结果对象的情况
|
||||
request_id = request_id_or_result
|
||||
if isinstance(request_id_or_result, dict):
|
||||
# v1版本兼容处理:如果结果中直接包含Excel数据
|
||||
if 'result' in request_id_or_result:
|
||||
# 如果是同步返回的Excel结果(某些API版本会直接返回)
|
||||
if 'result_data' in request_id_or_result['result']:
|
||||
excel_content = request_id_or_result['result']['result_data']
|
||||
if excel_content:
|
||||
try:
|
||||
return base64.b64decode(excel_content)
|
||||
except Exception as e:
|
||||
logger.error(f"解析Excel数据失败: {e}")
|
||||
|
||||
# 提取request_id
|
||||
if 'request_id' in request_id_or_result['result']:
|
||||
request_id = request_id_or_result['result']['request_id']
|
||||
logger.debug(f"从result子对象中提取request_id: {request_id}")
|
||||
elif 'tables_result' in request_id_or_result['result'] and len(request_id_or_result['result']['tables_result']) > 0:
|
||||
# 某些版本API可能直接返回表格内容,此时可能没有request_id
|
||||
logger.info("检测到API直接返回了表格内容,但没有request_id")
|
||||
return None
|
||||
# 有些版本可能request_id在顶层
|
||||
elif 'request_id' in request_id_or_result:
|
||||
request_id = request_id_or_result['request_id']
|
||||
logger.debug(f"从顶层对象中提取request_id: {request_id}")
|
||||
|
||||
# 如果没有有效的request_id,无法获取结果
|
||||
if not isinstance(request_id, str):
|
||||
logger.error(f"无法从结果中提取有效的request_id: {request_id_or_result}")
|
||||
return None
|
||||
|
||||
base_url = self.config.get('API', 'form_ocr_url', fallback='https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/get_request_result')
|
||||
url = f"{base_url}?access_token={access_token}"
|
||||
|
||||
payload = {
|
||||
'request_id': request_id,
|
||||
'result_type': 'excel'
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
result = response.json()
|
||||
logger.debug(f"获取Excel结果返回: {result}")
|
||||
|
||||
# 检查是否还在处理中
|
||||
if result.get('result', {}).get('ret_code') == 3:
|
||||
logger.info(f"Excel结果正在处理中,等待后重试 (尝试 {attempt+1}/{self.max_retries})")
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
# 检查是否有错误
|
||||
if 'error_code' in result or result.get('result', {}).get('ret_code') != 0:
|
||||
error_msg = result.get('error_msg') or result.get('result', {}).get('ret_msg', '未知错误')
|
||||
logger.error(f"获取Excel结果失败: {error_msg}")
|
||||
return None
|
||||
|
||||
# 获取Excel内容
|
||||
excel_content = result.get('result', {}).get('result_data')
|
||||
if excel_content:
|
||||
return base64.b64decode(excel_content)
|
||||
else:
|
||||
logger.error("Excel结果为空")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析Excel结果时出错: {e}")
|
||||
return None
|
||||
|
||||
else:
|
||||
logger.warning(f"获取Excel结果请求失败 (尝试 {attempt+1}/{self.max_retries}): {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"获取Excel结果时发生错误 (尝试 {attempt+1}/{self.max_retries}): {e}")
|
||||
|
||||
# 如果不是最后一次尝试,则等待后重试
|
||||
if attempt < self.max_retries - 1:
|
||||
time.sleep(self.retry_delay * (attempt + 1))
|
||||
|
||||
logger.error("获取Excel结果失败")
|
||||
return None
|
||||
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
表格OCR处理模块
|
||||
-------------
|
||||
处理图片并提取表格内容,保存为Excel文件。
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import base64
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Tuple, Callable
|
||||
|
||||
from ..utils.log_utils import get_logger
|
||||
from ..utils.file_utils import (
|
||||
ensure_dir,
|
||||
get_file_extension,
|
||||
get_files_by_extensions,
|
||||
generate_timestamp_filename,
|
||||
is_file_size_valid,
|
||||
load_json,
|
||||
save_json
|
||||
)
|
||||
from .baidu_ocr import BaiduOCRClient
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class ProcessedRecordManager:
|
||||
"""处理记录管理器,用于跟踪已处理的文件"""
|
||||
|
||||
def __init__(self, record_file: str):
|
||||
"""
|
||||
初始化处理记录管理器
|
||||
|
||||
Args:
|
||||
record_file: 记录文件路径
|
||||
"""
|
||||
self.record_file = record_file
|
||||
self.processed_files = self._load_record()
|
||||
|
||||
def _load_record(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载处理记录
|
||||
|
||||
Returns:
|
||||
处理记录字典,键为输入文件路径,值为输出文件路径
|
||||
"""
|
||||
return load_json(self.record_file, {})
|
||||
|
||||
def save_record(self) -> None:
|
||||
"""保存处理记录"""
|
||||
save_json(self.processed_files, self.record_file)
|
||||
|
||||
def is_processed(self, image_file: str) -> bool:
|
||||
"""
|
||||
检查图片是否已处理
|
||||
|
||||
Args:
|
||||
image_file: 图片文件路径
|
||||
|
||||
Returns:
|
||||
是否已处理
|
||||
"""
|
||||
return image_file in self.processed_files
|
||||
|
||||
def mark_as_processed(self, image_file: str, output_file: str) -> None:
|
||||
"""
|
||||
标记图片为已处理
|
||||
|
||||
Args:
|
||||
image_file: 图片文件路径
|
||||
output_file: 输出文件路径
|
||||
"""
|
||||
self.processed_files[image_file] = output_file
|
||||
self.save_record()
|
||||
|
||||
def get_output_file(self, image_file: str) -> Optional[str]:
|
||||
"""
|
||||
获取图片的输出文件路径
|
||||
|
||||
Args:
|
||||
image_file: 图片文件路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,如果不存在则返回None
|
||||
"""
|
||||
return self.processed_files.get(image_file)
|
||||
|
||||
def get_unprocessed_files(self, files: List[str]) -> List[str]:
|
||||
"""
|
||||
获取未处理的文件列表
|
||||
|
||||
Args:
|
||||
files: 文件列表
|
||||
|
||||
Returns:
|
||||
未处理的文件列表
|
||||
"""
|
||||
return [file for file in files if not self.is_processed(file)]
|
||||
|
||||
class OCRProcessor:
|
||||
"""
|
||||
OCR处理器,负责协调OCR识别和结果处理
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化OCR处理器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
# 修复ConfigParser对象没有get_path方法的问题
|
||||
try:
|
||||
# 获取输入和输出目录
|
||||
self.input_folder = config.get('Paths', 'input_folder', fallback='data/input')
|
||||
self.output_folder = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
self.temp_folder = config.get('Paths', 'temp_folder', fallback='data/temp')
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(self.input_folder, exist_ok=True)
|
||||
os.makedirs(self.output_folder, exist_ok=True)
|
||||
os.makedirs(self.temp_folder, exist_ok=True)
|
||||
|
||||
# 获取文件类型列表
|
||||
allowed_extensions_str = config.get('File', 'allowed_extensions', fallback='.jpg,.jpeg,.png,.bmp')
|
||||
self.file_types = [ext.strip() for ext in allowed_extensions_str.split(',') if ext.strip()]
|
||||
if not self.file_types:
|
||||
self.file_types = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tif', '.tiff']
|
||||
|
||||
# 初始化OCR客户端
|
||||
self.ocr_client = BaiduOCRClient(self.config)
|
||||
|
||||
# 记录实际路径
|
||||
logger.info(f"使用输入目录: {os.path.abspath(self.input_folder)}")
|
||||
logger.info(f"使用输出目录: {os.path.abspath(self.output_folder)}")
|
||||
logger.info(f"使用临时目录: {os.path.abspath(self.temp_folder)}")
|
||||
logger.info(f"允许的文件类型: {self.file_types}")
|
||||
|
||||
# 初始化processed_files_json和record_manager
|
||||
self.processed_files_json = os.path.join(self.output_folder, 'processed_files.json')
|
||||
self.record_manager = ProcessedRecordManager(self.processed_files_json)
|
||||
|
||||
# 加载已处理文件记录
|
||||
self.processed_files = self._load_processed_files()
|
||||
|
||||
logger.info(f"初始化OCRProcessor完成:输入目录={self.input_folder}, 输出目录={self.output_folder}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化OCRProcessor失败: {e}")
|
||||
raise
|
||||
|
||||
def _load_processed_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
加载已处理的文件记录
|
||||
|
||||
Returns:
|
||||
已处理的文件记录字典,键为输入文件路径,值为输出文件路径
|
||||
"""
|
||||
return load_json(self.processed_files_json, {})
|
||||
|
||||
def get_unprocessed_images(self) -> List[str]:
|
||||
"""
|
||||
获取未处理的图片列表
|
||||
|
||||
Returns:
|
||||
未处理的图片文件路径列表
|
||||
"""
|
||||
# 获取所有图片文件
|
||||
image_files = get_files_by_extensions(self.input_folder, self.file_types)
|
||||
|
||||
# 如果需要跳过已存在的文件
|
||||
skip_existing = True
|
||||
try:
|
||||
skip_existing = self.config.getboolean('Performance', 'skip_existing', fallback=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if skip_existing:
|
||||
# 过滤已处理的文件
|
||||
unprocessed_files = self.record_manager.get_unprocessed_files(image_files)
|
||||
logger.info(f"找到 {len(image_files)} 个图片文件,其中 {len(unprocessed_files)} 个未处理")
|
||||
return unprocessed_files
|
||||
|
||||
logger.info(f"找到 {len(image_files)} 个图片文件(不跳过已处理的文件)")
|
||||
return image_files
|
||||
|
||||
def validate_image(self, image_path: str) -> bool:
|
||||
"""
|
||||
验证图片是否有效
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
图片是否有效
|
||||
"""
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(image_path):
|
||||
logger.warning(f"图片文件不存在: {image_path}")
|
||||
return False
|
||||
|
||||
# 检查文件扩展名
|
||||
ext = get_file_extension(image_path)
|
||||
if ext not in self.file_types:
|
||||
logger.warning(f"不支持的文件类型: {ext}, 文件: {image_path}")
|
||||
return False
|
||||
|
||||
# 检查文件大小
|
||||
max_size_mb = 4.0
|
||||
try:
|
||||
max_size_mb = float(self.config.get('File', 'max_file_size_mb', fallback='4.0'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not is_file_size_valid(image_path, max_size_mb):
|
||||
logger.warning(f"文件大小超过限制 ({max_size_mb}MB): {image_path}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def process_image(self, image_path: str) -> Optional[str]:
|
||||
"""
|
||||
处理单个图片
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
输出Excel文件路径,如果处理失败则返回None
|
||||
"""
|
||||
# 验证图片
|
||||
if not self.validate_image(image_path):
|
||||
return None
|
||||
|
||||
# 获取是否跳过已处理文件的配置
|
||||
skip_existing = True
|
||||
try:
|
||||
skip_existing = self.config.getboolean('Performance', 'skip_existing', fallback=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 如果需要跳过已处理的文件
|
||||
if skip_existing and self.record_manager.is_processed(image_path):
|
||||
output_file = self.record_manager.get_output_file(image_path)
|
||||
logger.info(f"图片已处理,跳过: {image_path}, 输出文件: {output_file}")
|
||||
return output_file
|
||||
|
||||
logger.info(f"开始处理图片: {image_path}")
|
||||
|
||||
try:
|
||||
# 获取Excel扩展名
|
||||
excel_extension = '.xlsx'
|
||||
try:
|
||||
excel_extension = self.config.get('File', 'excel_extension', fallback='.xlsx')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 生成输出文件路径
|
||||
file_name = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_file = os.path.join(self.output_folder, f"{file_name}{excel_extension}")
|
||||
|
||||
# 检查是否已存在对应的Excel文件
|
||||
if os.path.exists(output_file) and skip_existing:
|
||||
logger.info(f"已存在对应的Excel文件,跳过处理: {os.path.basename(image_path)} -> {os.path.basename(output_file)}")
|
||||
# 记录处理结果
|
||||
self.record_manager.mark_as_processed(image_path, output_file)
|
||||
return output_file
|
||||
|
||||
# 进行OCR识别
|
||||
ocr_result = self.ocr_client.recognize_table(image_path)
|
||||
if not ocr_result:
|
||||
logger.error(f"OCR识别失败: {image_path}")
|
||||
return None
|
||||
|
||||
# 保存Excel文件 - 按照v1版本逻辑提取Excel数据
|
||||
excel_base64 = None
|
||||
|
||||
# 从不同可能的字段中尝试获取Excel数据
|
||||
if 'excel_file' in ocr_result:
|
||||
excel_base64 = ocr_result['excel_file']
|
||||
logger.debug("从excel_file字段获取Excel数据")
|
||||
elif 'result' in ocr_result:
|
||||
if 'result_data' in ocr_result['result']:
|
||||
excel_base64 = ocr_result['result']['result_data']
|
||||
logger.debug("从result.result_data字段获取Excel数据")
|
||||
elif 'excel_file' in ocr_result['result']:
|
||||
excel_base64 = ocr_result['result']['excel_file']
|
||||
logger.debug("从result.excel_file字段获取Excel数据")
|
||||
elif 'tables_result' in ocr_result['result'] and ocr_result['result']['tables_result']:
|
||||
for table in ocr_result['result']['tables_result']:
|
||||
if 'excel_file' in table:
|
||||
excel_base64 = table['excel_file']
|
||||
logger.debug("从tables_result中获取Excel数据")
|
||||
break
|
||||
|
||||
# 如果还是没有找到Excel数据,尝试通过get_excel_result获取
|
||||
if not excel_base64:
|
||||
logger.info("无法从直接返回中获取Excel数据,尝试通过API获取...")
|
||||
excel_data = self.ocr_client.get_excel_result(ocr_result)
|
||||
if not excel_data:
|
||||
logger.error(f"获取Excel结果失败: {image_path}")
|
||||
return None
|
||||
|
||||
# 保存Excel文件
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(excel_data)
|
||||
else:
|
||||
# 解码并保存Excel文件
|
||||
try:
|
||||
excel_data = base64.b64decode(excel_base64)
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(excel_data)
|
||||
except Exception as e:
|
||||
logger.error(f"解码或保存Excel数据时出错: {e}")
|
||||
return None
|
||||
|
||||
logger.info(f"图片处理成功: {image_path}, 输出文件: {output_file}")
|
||||
|
||||
# 标记为已处理
|
||||
self.record_manager.mark_as_processed(image_path, output_file)
|
||||
|
||||
return output_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理图片时出错: {image_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
def process_images_batch(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
|
||||
"""
|
||||
批量处理图片
|
||||
|
||||
Args:
|
||||
batch_size: 批处理大小,如果为None则使用配置值
|
||||
max_workers: 最大线程数,如果为None则使用配置值
|
||||
|
||||
Returns:
|
||||
(总处理数, 成功处理数)元组
|
||||
"""
|
||||
# 使用配置值或参数值
|
||||
if batch_size is None:
|
||||
try:
|
||||
batch_size = self.config.getint('Performance', 'batch_size', fallback=5)
|
||||
except Exception:
|
||||
batch_size = 5
|
||||
|
||||
if max_workers is None:
|
||||
try:
|
||||
max_workers = self.config.getint('Performance', 'max_workers', fallback=4)
|
||||
except Exception:
|
||||
max_workers = 4
|
||||
|
||||
# 获取未处理的图片
|
||||
unprocessed_images = self.get_unprocessed_images()
|
||||
if not unprocessed_images:
|
||||
logger.warning("没有需要处理的图片")
|
||||
return 0, 0
|
||||
|
||||
total = len(unprocessed_images)
|
||||
success_count = 0
|
||||
|
||||
# 按批次处理
|
||||
for i in range(0, total, batch_size):
|
||||
batch = unprocessed_images[i:i+batch_size]
|
||||
logger.info(f"处理批次 {i//batch_size+1}/{(total+batch_size-1)//batch_size}: {len(batch)} 个文件")
|
||||
try:
|
||||
if progress_cb:
|
||||
# 以批次为单位估算进度(0-90%),保留10%给后续阶段
|
||||
percent = int(10 + (i / max(total, 1)) * 80)
|
||||
progress_cb(min(percent, 90))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 使用多线程处理批次
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
results = list(executor.map(self.process_image, batch))
|
||||
|
||||
# 统计成功数
|
||||
success_count += sum(1 for result in results if result is not None)
|
||||
|
||||
logger.info(f"所有图片处理完成, 总计: {total}, 成功: {success_count}")
|
||||
try:
|
||||
if progress_cb:
|
||||
progress_cb(90)
|
||||
except Exception:
|
||||
pass
|
||||
return total, success_count
|
||||
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
处理器模块初始化文件
|
||||
"""
|
||||
|
||||
from .base import BaseProcessor
|
||||
from .ocr_processor import OCRProcessor
|
||||
from .tobacco_processor import TobaccoProcessor
|
||||
|
||||
__all__ = ['BaseProcessor', 'OCRProcessor', 'TobaccoProcessor']
|
||||
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
基础处理器接口模块
|
||||
|
||||
定义所有处理器的基类,提供统一的处理接口
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
from ...core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BaseProcessor(ABC):
|
||||
"""基础处理器接口 - 所有处理器的基类
|
||||
|
||||
采用策略模式设计,每个处理器负责特定类型的文件处理
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""初始化处理器
|
||||
|
||||
Args:
|
||||
config: 处理器配置字典
|
||||
"""
|
||||
self.config = config
|
||||
self.name = self.__class__.__name__
|
||||
self.description = ""
|
||||
self._setup_logging()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""设置处理器日志"""
|
||||
self.logger = logging.getLogger(f"{__name__}.{self.name}")
|
||||
|
||||
@abstractmethod
|
||||
def can_process(self, file_path: Path) -> bool:
|
||||
"""判断是否能处理该文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否能处理该文件
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""处理文件,返回输出文件路径
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
output_dir: 输出目录路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,处理失败返回None
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_required_columns(self) -> List[str]:
|
||||
"""返回需要的列名列表
|
||||
|
||||
Returns:
|
||||
列名列表
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate_input(self, file_path: Path) -> bool:
|
||||
"""验证输入文件有效性
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
文件是否有效
|
||||
"""
|
||||
try:
|
||||
if not file_path.exists():
|
||||
self.logger.warning(f"文件不存在: {file_path}")
|
||||
return False
|
||||
|
||||
if not file_path.is_file():
|
||||
self.logger.warning(f"不是文件: {file_path}")
|
||||
return False
|
||||
|
||||
supported_extensions = self.get_supported_extensions()
|
||||
if supported_extensions and file_path.suffix.lower() not in supported_extensions:
|
||||
self.logger.warning(f"不支持的文件类型: {file_path.suffix}, 支持的类型: {supported_extensions}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"验证文件时出错: {e}")
|
||||
return False
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
"""获取支持的文件扩展名
|
||||
|
||||
Returns:
|
||||
支持的扩展名列表,空列表表示支持所有类型
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_output_filename(self, input_file: Path, suffix: str = "_processed") -> str:
|
||||
"""生成输出文件名
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
suffix: 文件名后缀
|
||||
|
||||
Returns:
|
||||
输出文件名
|
||||
"""
|
||||
return f"{input_file.stem}{suffix}{input_file.suffix}"
|
||||
|
||||
def _read_excel_safely(self, file_path: Path, **kwargs) -> pd.DataFrame:
|
||||
"""根据扩展名选择合适的读取引擎
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
**kwargs: 传递给 pd.read_excel 的参数
|
||||
|
||||
Returns:
|
||||
DataFrame
|
||||
|
||||
Raises:
|
||||
Exception: 读取失败时抛出
|
||||
"""
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix == '.xlsx':
|
||||
return pd.read_excel(file_path, engine='openpyxl', **kwargs)
|
||||
elif suffix == '.xls':
|
||||
try:
|
||||
return pd.read_excel(file_path, engine='xlrd', **kwargs)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"读取xls失败,可能缺少xlrd: {e}")
|
||||
raise
|
||||
else:
|
||||
return pd.read_excel(file_path, **kwargs)
|
||||
|
||||
def log_processing_start(self, input_file: Path):
|
||||
"""记录处理开始日志"""
|
||||
self.logger.info(f"开始处理文件: {input_file}")
|
||||
self.logger.info(f"处理器: {self.name} - {self.description}")
|
||||
|
||||
def log_processing_end(self, input_file: Path, output_file: Optional[Path] = None, success: bool = True):
|
||||
"""记录处理结束日志"""
|
||||
if success:
|
||||
self.logger.info(f"处理完成: {input_file}")
|
||||
if output_file:
|
||||
self.logger.info(f"输出文件: {output_file}")
|
||||
else:
|
||||
self.logger.error(f"处理失败: {input_file}")
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""字符串表示"""
|
||||
return f"{self.name}({self.description})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""详细字符串表示"""
|
||||
return f"{self.__class__.__module__}.{self.__class__.__name__}(name='{self.name}', description='{self.description}')"
|
||||
@@ -0,0 +1,192 @@
|
||||
"""
|
||||
OCR处理器
|
||||
|
||||
处理图片文件的OCR识别完整流程:图片识别 → Excel处理 → 标准采购单生成
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from .base import BaseProcessor
|
||||
from ...services.ocr_service import OCRService
|
||||
from ...services.order_service import OrderService
|
||||
from ...core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class OCRProcessor(BaseProcessor):
|
||||
"""OCR处理器
|
||||
|
||||
处理图片文件的完整OCR识别流程:
|
||||
1. OCR识别图片中的表格信息
|
||||
2. 处理识别结果生成Excel文件
|
||||
3. 转换为标准采购单格式
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""初始化OCR处理器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
super().__init__(config)
|
||||
self.description = "OCR识别完整流程(图片→识别→Excel→采购单)"
|
||||
|
||||
# 初始化服务
|
||||
self.ocr_service = OCRService(config)
|
||||
self.order_service = OrderService(config)
|
||||
|
||||
def can_process(self, file_path: Path) -> bool:
|
||||
"""判断是否为支持的图片文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否能处理该文件
|
||||
"""
|
||||
if not self.validate_input(file_path):
|
||||
return False
|
||||
|
||||
# 支持的图片格式
|
||||
supported_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
|
||||
|
||||
if file_path.suffix.lower() in supported_extensions:
|
||||
self.logger.info(f"识别为图片文件: {file_path.name}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""处理图片文件的完整OCR流程
|
||||
|
||||
Args:
|
||||
input_file: 输入图片文件路径
|
||||
output_dir: 输出目录路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,处理失败返回None
|
||||
"""
|
||||
self.log_processing_start(input_file)
|
||||
|
||||
try:
|
||||
self.logger.info("开始OCR识别流程...")
|
||||
|
||||
# 步骤1: OCR识别
|
||||
self.logger.info("步骤1/3: OCR识别图片...")
|
||||
ocr_result = self._perform_ocr(input_file, output_dir)
|
||||
if not ocr_result:
|
||||
self.logger.error("OCR识别失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
# 步骤2: Excel处理
|
||||
self.logger.info("步骤2/3: 处理Excel文件...")
|
||||
excel_result = self._process_excel(ocr_result, output_dir)
|
||||
if not excel_result:
|
||||
self.logger.error("Excel处理失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
# 步骤3: 生成标准采购单
|
||||
self.logger.info("步骤3/3: 生成标准采购单...")
|
||||
final_result = self._generate_purchase_order(excel_result, output_dir)
|
||||
|
||||
if final_result:
|
||||
self.logger.info(f"OCR处理流程完成,输出文件: {final_result}")
|
||||
self.log_processing_end(input_file, final_result, success=True)
|
||||
return final_result
|
||||
else:
|
||||
self.logger.error("生成采购单失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"OCR处理流程出错: {e}", exc_info=True)
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
def get_required_columns(self) -> List[str]:
|
||||
"""返回需要的列名列表"""
|
||||
# OCR处理不直接依赖列名,由后续处理步骤决定
|
||||
return []
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
"""支持的文件扩展名"""
|
||||
return ['.jpg', '.jpeg', '.png', '.bmp']
|
||||
|
||||
def _perform_ocr(self, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""执行OCR识别
|
||||
|
||||
Args:
|
||||
input_file: 输入图片文件
|
||||
output_dir: 输出目录
|
||||
|
||||
Returns:
|
||||
OCR生成的Excel文件路径,失败返回None
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"开始OCR识别: {input_file}")
|
||||
|
||||
# 使用OCR服务处理图片
|
||||
result_path = self.ocr_service.process_image(str(input_file))
|
||||
|
||||
if result_path:
|
||||
# 确保结果文件在输出目录中
|
||||
result_path = Path(result_path)
|
||||
if result_path.exists():
|
||||
self.logger.info(f"OCR识别成功,输出文件: {result_path}")
|
||||
return result_path
|
||||
else:
|
||||
self.logger.error(f"OCR结果文件不存在: {result_path}")
|
||||
return None
|
||||
else:
|
||||
self.logger.error("OCR服务返回None")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"OCR识别失败: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def _process_excel(self, excel_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""处理Excel文件
|
||||
|
||||
Args:
|
||||
excel_file: Excel文件路径
|
||||
output_dir: 输出目录
|
||||
|
||||
Returns:
|
||||
处理后的Excel文件路径,失败返回None
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"开始处理Excel文件: {excel_file}")
|
||||
|
||||
# 使用订单服务处理Excel文件(生成采购单)
|
||||
result_path = self.order_service.process_excel(str(excel_file))
|
||||
|
||||
if result_path:
|
||||
result_path = Path(result_path)
|
||||
if result_path.exists():
|
||||
self.logger.info(f"Excel处理成功,输出文件: {result_path}")
|
||||
return result_path
|
||||
else:
|
||||
self.logger.error(f"Excel处理结果文件不存在: {result_path}")
|
||||
return None
|
||||
else:
|
||||
self.logger.error("Excel处理服务返回None")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Excel处理失败: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def _generate_purchase_order(self, processed_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""采购单生成由OrderService完成,此处直接返回处理结果"""
|
||||
try:
|
||||
if processed_file and processed_file.exists():
|
||||
return processed_file
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
供应商处理器模块初始化文件
|
||||
"""
|
||||
|
||||
from .generic_supplier_processor import GenericSupplierProcessor
|
||||
|
||||
__all__ = ['GenericSupplierProcessor']
|
||||
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
通用供应商处理器
|
||||
|
||||
可配置化的供应商处理器,支持通过配置文件定义处理规则
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
import pandas as pd
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pathlib import Path
|
||||
|
||||
from ..base import BaseProcessor
|
||||
from ...utils.log_utils import get_logger
|
||||
from ...handlers.rule_engine import apply_rules
|
||||
from ...handlers.column_mapper import ColumnMapper
|
||||
from ...handlers.data_cleaner import DataCleaner
|
||||
from ...handlers.calculator import DataCalculator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GenericSupplierProcessor(BaseProcessor):
|
||||
"""通用供应商处理器
|
||||
|
||||
基于配置文件处理不同供应商的Excel文件,支持:
|
||||
- 文件名模式匹配
|
||||
- 内容特征识别
|
||||
- 列映射配置
|
||||
- 数据清洗规则
|
||||
- 计算处理规则
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any], supplier_config: Dict[str, Any]):
|
||||
"""初始化通用供应商处理器
|
||||
|
||||
Args:
|
||||
config: 系统配置
|
||||
supplier_config: 供应商特定配置
|
||||
"""
|
||||
super().__init__(config)
|
||||
self.supplier_config = supplier_config
|
||||
|
||||
# 从配置中提取基本信息
|
||||
self.name = supplier_config.get('name', 'GenericSupplier')
|
||||
self.description = supplier_config.get('description', '通用供应商处理器')
|
||||
|
||||
# 处理规则配置
|
||||
self.filename_patterns = supplier_config.get('filename_patterns', [])
|
||||
self.content_indicators = supplier_config.get('content_indicators', [])
|
||||
self.column_mapping = supplier_config.get('column_mapping', {})
|
||||
self.cleaning_rules = supplier_config.get('cleaning_rules', [])
|
||||
self.calculations = supplier_config.get('calculations', [])
|
||||
|
||||
# 输出配置
|
||||
self.output_template = supplier_config.get('output_template', 'templates/银豹-采购单模板.xls')
|
||||
self.output_suffix = supplier_config.get('output_suffix', '_银豹采购单')
|
||||
|
||||
def can_process(self, file_path: Path) -> bool:
|
||||
"""判断是否能处理该文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否能处理
|
||||
"""
|
||||
if not self.validate_input(file_path):
|
||||
return False
|
||||
|
||||
# 检查文件名模式
|
||||
if self.filename_patterns:
|
||||
filename_match = self._check_filename_patterns(file_path)
|
||||
if filename_match:
|
||||
return True
|
||||
|
||||
# 检查文件内容特征
|
||||
if self.content_indicators:
|
||||
content_match = self._check_content_indicators(file_path)
|
||||
if content_match:
|
||||
return True
|
||||
|
||||
# 如果都没有配置,则无法判断
|
||||
if not self.filename_patterns and not self.content_indicators:
|
||||
self.logger.warning(f"处理器 {self.name} 没有配置识别规则")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""处理文件
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
output_dir: 输出目录路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,处理失败返回None
|
||||
"""
|
||||
self.log_processing_start(input_file)
|
||||
|
||||
try:
|
||||
# 步骤1: 读取数据
|
||||
self.logger.info("步骤1/4: 读取数据...")
|
||||
df = self._read_supplier_data(input_file)
|
||||
if df is None or df.empty:
|
||||
self.logger.error("读取数据失败或数据为空")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
# 步骤2: 应用列映射
|
||||
self.logger.info("步骤2/4: 应用列映射...")
|
||||
mapped_df = self._apply_column_mapping(df)
|
||||
if mapped_df is None:
|
||||
self.logger.error("列映射失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
# 步骤3: 数据清洗
|
||||
self.logger.info("步骤3/4: 数据清洗...")
|
||||
cleaned_df = self._apply_data_cleaning(mapped_df)
|
||||
if cleaned_df is None:
|
||||
self.logger.error("数据清洗失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
try:
|
||||
rules = self.supplier_config.get('rules', [])
|
||||
dictionary = self.supplier_config.get('dictionary')
|
||||
standardized_df = apply_rules(cleaned_df, rules, dictionary)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"规则执行失败: {e}")
|
||||
standardized_df = cleaned_df
|
||||
|
||||
# 步骤4: 计算处理
|
||||
self.logger.info("步骤4/4: 计算处理...")
|
||||
calculated_df = self._apply_calculations(standardized_df)
|
||||
if calculated_df is None:
|
||||
self.logger.error("计算处理失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
# 生成输出文件
|
||||
output_file = self._generate_output(calculated_df, input_file, output_dir)
|
||||
|
||||
if output_file and output_file.exists():
|
||||
self.logger.info(f"处理完成,输出文件: {output_file}")
|
||||
self.log_processing_end(input_file, output_file, success=True)
|
||||
return output_file
|
||||
else:
|
||||
self.logger.error("输出文件生成失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理文件时出错: {e}", exc_info=True)
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
def get_required_columns(self) -> List[str]:
|
||||
"""返回需要的列名列表"""
|
||||
# 从列映射配置中提取目标列名
|
||||
return list(self.column_mapping.values()) if self.column_mapping else []
|
||||
|
||||
def _check_filename_patterns(self, file_path: Path) -> bool:
|
||||
"""检查文件名模式
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否匹配
|
||||
"""
|
||||
try:
|
||||
filename = file_path.name
|
||||
for pattern in self.filename_patterns:
|
||||
if fnmatch.fnmatch(filename.lower(), pattern.lower()):
|
||||
self.logger.info(f"文件名匹配成功: {filename} -> {pattern}")
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"检查文件名模式时出错: {e}")
|
||||
return False
|
||||
|
||||
def _check_content_indicators(self, file_path: Path) -> bool:
|
||||
"""检查文件内容特征
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否匹配
|
||||
"""
|
||||
try:
|
||||
df = self._read_excel_safely(file_path, nrows=5)
|
||||
|
||||
# 检查列名中是否包含指定关键词
|
||||
columns_str = str(list(df.columns)).lower()
|
||||
|
||||
for indicator in self.content_indicators:
|
||||
if indicator.lower() in columns_str:
|
||||
self.logger.info(f"内容特征匹配成功: {indicator}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"检查内容特征时出错: {e}")
|
||||
return False
|
||||
|
||||
def _read_supplier_data(self, file_path: Path) -> Optional[pd.DataFrame]:
|
||||
"""读取供应商数据
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
数据DataFrame或None
|
||||
"""
|
||||
try:
|
||||
specified = self.supplier_config.get('header_row')
|
||||
if specified is not None:
|
||||
try:
|
||||
df = self._read_excel_safely(file_path, header=int(specified))
|
||||
except Exception:
|
||||
df = self._read_excel_safely(file_path)
|
||||
else:
|
||||
df0 = self._read_excel_safely(file_path, header=None)
|
||||
if df0 is None:
|
||||
return None
|
||||
header_row = self._find_header_row(df0)
|
||||
if header_row is not None:
|
||||
df = self._read_excel_safely(file_path, header=header_row)
|
||||
else:
|
||||
df = self._read_excel_safely(file_path)
|
||||
if df is None or df.empty:
|
||||
self.logger.warning("数据文件为空")
|
||||
return None
|
||||
self.logger.info(f"成功读取数据,形状: {df.shape}")
|
||||
return df
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取数据失败: {e}")
|
||||
return None
|
||||
|
||||
def _find_header_row(self, df: pd.DataFrame) -> Optional[int]:
|
||||
result = ColumnMapper.detect_header_row(df, max_rows=30)
|
||||
return result if result >= 0 else None
|
||||
|
||||
def _apply_column_mapping(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
||||
"""应用列映射
|
||||
|
||||
Args:
|
||||
df: 原始数据
|
||||
|
||||
Returns:
|
||||
映射后的数据或None
|
||||
"""
|
||||
if not self.column_mapping:
|
||||
self.logger.info("没有列映射配置")
|
||||
return df
|
||||
|
||||
try:
|
||||
# 应用列重命名
|
||||
df_renamed = df.rename(columns=self.column_mapping)
|
||||
|
||||
# 检查必需的列是否存在
|
||||
required_columns = self.get_required_columns()
|
||||
missing_columns = [col for col in required_columns if col not in df_renamed.columns]
|
||||
|
||||
if missing_columns:
|
||||
self.logger.warning(f"缺少必需的列: {missing_columns}")
|
||||
# 创建缺失的列并填充默认值
|
||||
for col in missing_columns:
|
||||
df_renamed[col] = 0 if '量' in col or '价' in col else ''
|
||||
self.logger.info(f"创建缺失列: {col},默认值: {df_renamed[col].iloc[0] if len(df_renamed) > 0 else 'N/A'}")
|
||||
|
||||
self.logger.info(f"列映射完成,列名: {list(df_renamed.columns)}")
|
||||
return df_renamed
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"列映射失败: {e}")
|
||||
return None
|
||||
|
||||
def _apply_data_cleaning(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
||||
"""应用数据清洗规则,委托给 DataCleaner"""
|
||||
if not self.cleaning_rules:
|
||||
self.logger.info("没有数据清洗规则")
|
||||
return df
|
||||
try:
|
||||
cleaner = DataCleaner()
|
||||
for rule in self.cleaning_rules:
|
||||
cleaner.add_rule(rule.get('type'), **{k: v for k, v in rule.items() if k != 'type'})
|
||||
result = cleaner.clean(df)
|
||||
self.logger.info(f"数据清洗完成,数据形状: {result.shape}")
|
||||
return result
|
||||
except Exception as e:
|
||||
self.logger.error(f"数据清洗失败: {e}")
|
||||
return None
|
||||
|
||||
def _apply_calculations(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
||||
"""应用计算处理,委托给 DataCalculator"""
|
||||
if not self.calculations:
|
||||
self.logger.info("没有计算规则")
|
||||
return df
|
||||
try:
|
||||
calculator = DataCalculator()
|
||||
for calc in self.calculations:
|
||||
calculator.add_rule(calc.get('type'), **{k: v for k, v in calc.items() if k != 'type'})
|
||||
result = calculator.calculate(df)
|
||||
self.logger.info(f"计算处理完成,数据形状: {result.shape}")
|
||||
return result
|
||||
except Exception as e:
|
||||
self.logger.error(f"计算处理失败: {e}")
|
||||
return None
|
||||
|
||||
def _generate_output(self, df: pd.DataFrame, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""生成输出文件
|
||||
|
||||
Args:
|
||||
df: 最终数据
|
||||
input_file: 输入文件路径
|
||||
output_dir: 输出目录
|
||||
|
||||
Returns:
|
||||
输出文件路径或None
|
||||
"""
|
||||
try:
|
||||
# 生成输出文件名
|
||||
timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"{input_file.stem}{self.output_suffix}_{timestamp}.xls"
|
||||
output_file = output_dir / output_filename
|
||||
|
||||
# 这里应该使用实际的模板生成逻辑
|
||||
# 暂时直接保存为Excel文件
|
||||
df.to_excel(output_file, index=False)
|
||||
|
||||
self.logger.info(f"输出文件生成成功: {output_file}")
|
||||
return output_file
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"生成输出文件失败: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
烟草订单处理器
|
||||
|
||||
处理烟草公司特定格式的订单明细文件,生成银豹采购单
|
||||
"""
|
||||
|
||||
import os
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import xlrd
|
||||
import xlwt
|
||||
from xlutils.copy import copy
|
||||
from openpyxl import load_workbook
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from .base import BaseProcessor
|
||||
from ...core.utils.log_utils import get_logger
|
||||
from ...core.utils.string_utils import parse_monetary_string
|
||||
from ...core.utils.dialog_utils import show_custom_dialog
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class TobaccoProcessor(BaseProcessor):
|
||||
"""烟草订单处理器
|
||||
|
||||
处理烟草公司订单明细文件,提取商品信息并生成标准银豹采购单格式
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""初始化烟草订单处理器
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
super().__init__(config)
|
||||
self.description = "处理烟草公司订单明细文件"
|
||||
self.template_file = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
|
||||
|
||||
# 输出目录配置
|
||||
self.result_dir = Path("data/result")
|
||||
self.result_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 默认输出文件名
|
||||
self.default_output_name = "银豹采购单_烟草公司.xls"
|
||||
|
||||
def can_process(self, file_path: Path) -> bool:
|
||||
"""判断是否为烟草订单文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否能处理该文件
|
||||
"""
|
||||
if not self.validate_input(file_path):
|
||||
return False
|
||||
|
||||
# 检查文件名特征
|
||||
filename = file_path.name
|
||||
tobacco_keywords = ['烟草', '卷烟', '订单明细', 'tobacco', '烟']
|
||||
|
||||
# 检查文件内容特征
|
||||
try:
|
||||
df = self._read_excel_safely(file_path, nrows=5)
|
||||
required_columns = ['商品', '盒码', '订单量']
|
||||
|
||||
# 检查文件名或内容特征
|
||||
filename_match = any(keyword in filename for keyword in tobacco_keywords)
|
||||
content_match = all(col in df.columns for col in required_columns)
|
||||
|
||||
if filename_match or content_match:
|
||||
self.logger.info(f"识别为烟草订单文件: {filename}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"检查文件内容时出错: {e}")
|
||||
# 如果无法读取内容,仅基于文件名判断
|
||||
return any(keyword in filename for keyword in tobacco_keywords)
|
||||
|
||||
def process(self, input_file: Path, output_dir: Path) -> Optional[Path]:
|
||||
"""处理烟草订单
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
output_dir: 输出目录路径
|
||||
|
||||
Returns:
|
||||
输出文件路径,处理失败返回None
|
||||
"""
|
||||
self.log_processing_start(input_file)
|
||||
|
||||
try:
|
||||
# 读取订单信息(时间和总金额)
|
||||
order_info = self._read_order_info(input_file)
|
||||
if not order_info:
|
||||
self.logger.error(f"读取订单信息失败: {input_file}")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
order_time, total_amount = order_info
|
||||
self.logger.info(f"订单信息 - 时间: {order_time}, 总金额: {total_amount}")
|
||||
|
||||
# 读取订单数据
|
||||
order_data = self._read_order_data(input_file)
|
||||
if order_data is None or order_data.empty:
|
||||
self.logger.error(f"读取订单数据失败或数据为空: {input_file}")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
self.logger.info(f"成功读取订单数据,共{len(order_data)}条记录")
|
||||
|
||||
# 生成输出文件路径
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"银豹采购单_烟草公司_{timestamp}.xls"
|
||||
output_file = output_dir / output_filename
|
||||
|
||||
# 确保输出目录存在
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 生成银豹采购单
|
||||
result = self._generate_pospal_order(order_data, order_time, output_file)
|
||||
|
||||
if result:
|
||||
self.logger.info(f"采购单生成成功: {output_file}")
|
||||
self.log_processing_end(input_file, output_file, success=True)
|
||||
|
||||
# 显示处理结果
|
||||
self._show_processing_result(output_file, order_time, len(order_data), total_amount)
|
||||
|
||||
return output_file
|
||||
else:
|
||||
self.logger.error("生成银豹采购单失败")
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理烟草订单时发生错误: {e}", exc_info=True)
|
||||
self.log_processing_end(input_file, success=False)
|
||||
return None
|
||||
|
||||
def get_required_columns(self) -> List[str]:
|
||||
"""返回需要的列名列表"""
|
||||
return ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
"""支持的文件扩展名"""
|
||||
return ['.xlsx', '.xls']
|
||||
|
||||
def _read_order_info(self, file_path: Path) -> Optional[Tuple[str, float]]:
|
||||
"""读取订单信息(时间和总金额)
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
包含订单时间和总金额的元组或None
|
||||
"""
|
||||
try:
|
||||
wb_info = load_workbook(file_path, data_only=True)
|
||||
ws_info = wb_info.active
|
||||
|
||||
# 从指定单元格读取订单信息
|
||||
order_time = ws_info["H1"].value or "(空)"
|
||||
total_amount = ws_info["H3"].value or 0.0
|
||||
|
||||
self.logger.info(f"成功读取订单信息: 时间={order_time}, 总金额={total_amount}")
|
||||
return (order_time, total_amount)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取订单信息出错: {e}")
|
||||
return None
|
||||
|
||||
def _read_order_data(self, file_path: Path) -> Optional[pd.DataFrame]:
|
||||
"""读取订单数据
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
订单数据DataFrame或None
|
||||
"""
|
||||
columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
|
||||
|
||||
try:
|
||||
df_old = self._read_excel_safely(file_path, header=None, skiprows=3, names=columns)
|
||||
|
||||
# 过滤订单量不为0的数据,并计算采购量和单价
|
||||
df_filtered = df_old[df_old['订单量'] != 0].copy()
|
||||
|
||||
if df_filtered.empty:
|
||||
self.logger.warning("没有订单量不为0的记录")
|
||||
return None
|
||||
|
||||
# 计算采购量和单价
|
||||
df_filtered['采购量'] = df_filtered['订单量'] * 10 # 烟草订单通常需要乘以10
|
||||
df_filtered['采购单价'] = df_filtered['金额'] / df_filtered['采购量']
|
||||
df_filtered = df_filtered.reset_index(drop=True)
|
||||
|
||||
self.logger.info(f"成功处理订单数据,有效记录数: {len(df_filtered)}")
|
||||
return df_filtered
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取订单数据失败: {e}")
|
||||
return None
|
||||
|
||||
def _generate_pospal_order(self, order_data: pd.DataFrame, order_time: str, output_file: Path) -> bool:
|
||||
"""生成银豹采购单
|
||||
|
||||
Args:
|
||||
order_data: 订单数据
|
||||
order_time: 订单时间
|
||||
output_file: 输出文件路径
|
||||
|
||||
Returns:
|
||||
是否生成成功
|
||||
"""
|
||||
try:
|
||||
# 检查模板文件是否存在
|
||||
template_path = Path(self.template_file)
|
||||
if not template_path.exists():
|
||||
self.logger.error(f"采购单模板文件不存在: {template_path}")
|
||||
return False
|
||||
|
||||
self.logger.info(f"使用模板文件: {template_path}")
|
||||
|
||||
# 打开模板,准备写入
|
||||
template_rd = xlrd.open_workbook(str(template_path), formatting_info=True)
|
||||
template_wb = copy(template_rd)
|
||||
template_ws = template_wb.get_sheet(0)
|
||||
|
||||
# 获取模板中的表头列索引
|
||||
header_row = template_rd.sheet_by_index(0).row_values(0)
|
||||
|
||||
# 查找需要的列索引
|
||||
try:
|
||||
barcode_col = header_row.index("条码(必填)")
|
||||
amount_col = header_row.index("采购量(必填)")
|
||||
gift_col = header_row.index("赠送量")
|
||||
price_col = header_row.index("采购单价(必填)")
|
||||
except ValueError as e:
|
||||
self.logger.error(f"模板列查找失败: {e}")
|
||||
return False
|
||||
|
||||
self.logger.info(f"模板列索引 - 条码:{barcode_col}, 采购量:{amount_col}, 赠送量:{gift_col}, 单价:{price_col}")
|
||||
|
||||
# 写入数据到模板
|
||||
for i, row in order_data.iterrows():
|
||||
template_ws.write(i + 1, barcode_col, row['盒码']) # 商品条码
|
||||
template_ws.write(i + 1, amount_col, int(row['采购量'])) # 采购量
|
||||
template_ws.write(i + 1, gift_col, "") # 赠送量为空
|
||||
template_ws.write(i + 1, price_col, round(row['采购单价'], 2)) # 采购单价保留两位小数
|
||||
|
||||
# 确保输出目录存在
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 保存输出文件
|
||||
template_wb.save(str(output_file))
|
||||
|
||||
self.logger.info(f"采购单生成成功: {output_file}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"生成银豹采购单失败: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _show_processing_result(self, output_file: Path, order_time: str, total_count: int, total_amount: float):
|
||||
"""显示处理结果
|
||||
|
||||
Args:
|
||||
output_file: 输出文件路径
|
||||
order_time: 订单时间
|
||||
total_count: 处理条目数
|
||||
total_amount: 总金额
|
||||
"""
|
||||
try:
|
||||
# 创建附加信息
|
||||
additional_info = {
|
||||
"订单来源": "烟草公司",
|
||||
"处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
# 格式化金额显示
|
||||
parsed = parse_monetary_string(total_amount)
|
||||
total_amount = parsed if parsed is not None else 0.0
|
||||
amount_display = f"¥{total_amount:.2f}"
|
||||
|
||||
# 显示自定义对话框
|
||||
show_custom_dialog(
|
||||
title="烟草订单处理结果",
|
||||
message="烟草订单处理完成",
|
||||
result_file=str(output_file),
|
||||
time_info=order_time,
|
||||
count_info=f"{total_count}个商品",
|
||||
amount_info=amount_display,
|
||||
additional_info=additional_info
|
||||
)
|
||||
|
||||
self.logger.info(f"显示处理结果 - 文件:{output_file}, 时间:{order_time}, 数量:{total_count}, 金额:{total_amount}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"显示处理结果时出错: {e}")
|
||||
|
||||
def get_latest_tobacco_order(self) -> Optional[Path]:
|
||||
"""获取最新的烟草订单明细文件(兼容旧接口)
|
||||
|
||||
Returns:
|
||||
文件路径或None
|
||||
"""
|
||||
try:
|
||||
# 获取今日开始时间戳
|
||||
today = datetime.date.today()
|
||||
today_start = datetime.datetime.combine(today, datetime.time.min).timestamp()
|
||||
|
||||
# 查找订单明细文件
|
||||
result_dir = Path("data/output")
|
||||
if not result_dir.exists():
|
||||
return None
|
||||
|
||||
# 查找符合条件的文件
|
||||
candidates = []
|
||||
for file_path in result_dir.glob("订单明细*.xlsx"):
|
||||
if file_path.stat().st_ctime >= today_start:
|
||||
candidates.append(file_path)
|
||||
|
||||
if not candidates:
|
||||
self.logger.warning("未找到今天创建的烟草订单明细文件")
|
||||
# 返回最新的文件
|
||||
all_files = list(result_dir.glob("订单明细*.xlsx"))
|
||||
if all_files:
|
||||
all_files.sort(key=lambda x: x.stat().st_ctime, reverse=True)
|
||||
return all_files[0]
|
||||
return None
|
||||
|
||||
# 返回最新的文件
|
||||
candidates.sort(key=lambda x: x.stat().st_ctime, reverse=True)
|
||||
latest_file = candidates[0]
|
||||
|
||||
self.logger.info(f"找到最新烟草订单明细文件: {latest_file}")
|
||||
return latest_file
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取最新烟草订单文件时出错: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - 工具模块
|
||||
------------------------
|
||||
提供系统通用工具和辅助函数。
|
||||
"""
|
||||
@@ -0,0 +1,184 @@
|
||||
"""云端同步模块 — 基于 Gitea REST API 的文件同步"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from .log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GiteaSync:
|
||||
"""通过 Gitea REST API 读写仓库文件"""
|
||||
|
||||
def __init__(self, base_url: str, owner: str, repo: str, token: str, timeout: int = 15):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.owner = owner
|
||||
self.repo = repo
|
||||
self.token = token
|
||||
self.timeout = timeout
|
||||
|
||||
@property
|
||||
def _headers(self) -> dict:
|
||||
return {"Authorization": f"token {self.token}"}
|
||||
|
||||
def _api_url(self, path: str) -> str:
|
||||
return f"{self.base_url}/api/v1/repos/{self.owner}/{self.repo}/contents/{path}"
|
||||
|
||||
def pull_file(self, remote_path: str) -> Optional[Tuple[bytes, str]]:
|
||||
"""从仓库下载文件
|
||||
|
||||
Returns:
|
||||
(content_bytes, sha) 或 None(文件不存在或失败)
|
||||
"""
|
||||
try:
|
||||
resp = requests.get(
|
||||
self._api_url(remote_path),
|
||||
headers=self._headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
logger.info(f"云端文件不存在: {remote_path}")
|
||||
return None
|
||||
if resp.status_code != 200:
|
||||
logger.warning(f"拉取文件失败: {resp.status_code} {resp.text[:200]}")
|
||||
return None
|
||||
|
||||
data = resp.json()
|
||||
sha = data.get("sha", "")
|
||||
content_b64 = data.get("content", "")
|
||||
# Gitea 返回的 base64 可能含换行
|
||||
content_bytes = base64.b64decode(content_b64.replace("\n", ""))
|
||||
logger.info(f"拉取文件成功: {remote_path} ({len(content_bytes)} bytes)")
|
||||
return content_bytes, sha
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"拉取文件网络错误: {e}")
|
||||
return None
|
||||
|
||||
def push_file(
|
||||
self,
|
||||
remote_path: str,
|
||||
content: bytes,
|
||||
message: str,
|
||||
sha: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""上传或更新文件到仓库
|
||||
|
||||
Args:
|
||||
remote_path: 仓库中的文件路径
|
||||
content: 文件内容(bytes)
|
||||
message: commit message
|
||||
sha: 文件当前 sha(更新时必传,新建时省略)
|
||||
|
||||
Returns:
|
||||
新的 sha,失败返回 None
|
||||
"""
|
||||
payload = {
|
||||
"message": message,
|
||||
"content": base64.b64encode(content).decode("ascii"),
|
||||
}
|
||||
if sha:
|
||||
payload["sha"] = sha
|
||||
|
||||
try:
|
||||
resp = requests.put(
|
||||
self._api_url(remote_path),
|
||||
headers={**self._headers, "Content-Type": "application/json"},
|
||||
json=payload,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
if resp.status_code not in (200, 201):
|
||||
logger.warning(f"推送文件失败: {resp.status_code} {resp.text[:200]}")
|
||||
return None
|
||||
|
||||
new_sha = resp.json().get("content", {}).get("sha", "")
|
||||
logger.info(f"推送文件成功: {remote_path} (sha={new_sha[:12]})")
|
||||
return new_sha
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"推送文件网络错误: {e}")
|
||||
return None
|
||||
|
||||
def file_exists(self, remote_path: str) -> Optional[str]:
|
||||
"""检查文件是否存在
|
||||
|
||||
Returns:
|
||||
文件 sha(存在)或 None(不存在)
|
||||
"""
|
||||
try:
|
||||
resp = requests.head(
|
||||
self._api_url(remote_path),
|
||||
headers=self._headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
# HEAD 不返回 body,需要 GET 获取 sha
|
||||
result = self.pull_file(remote_path)
|
||||
return result[1] if result else None
|
||||
return None
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
def pull_json(self, remote_path: str) -> Optional[Tuple[dict, str]]:
|
||||
"""拉取并解析 JSON 文件
|
||||
|
||||
Returns:
|
||||
(parsed_dict, sha) 或 None
|
||||
"""
|
||||
result = self.pull_file(remote_path)
|
||||
if result is None:
|
||||
return None
|
||||
content_bytes, sha = result
|
||||
try:
|
||||
data = json.loads(content_bytes)
|
||||
return data, sha
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"解析 JSON 失败: {e}")
|
||||
return None
|
||||
|
||||
def push_json(self, remote_path: str, data: dict, message: str, sha: Optional[str] = None) -> Optional[str]:
|
||||
"""将 dict 序列化为 JSON 并推送
|
||||
|
||||
Returns:
|
||||
新的 sha,失败返回 None
|
||||
"""
|
||||
content = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
|
||||
return self.push_file(remote_path, content, message, sha)
|
||||
|
||||
def push_binary(self, remote_path: str, local_path: str, message: str) -> Optional[str]:
|
||||
"""读取本地二进制文件并推送到云端
|
||||
|
||||
Returns:
|
||||
新的 sha,失败返回 None
|
||||
"""
|
||||
try:
|
||||
with open(local_path, "rb") as f:
|
||||
content = f.read()
|
||||
except OSError as e:
|
||||
logger.error(f"读取本地文件失败: {local_path} — {e}")
|
||||
return None
|
||||
|
||||
existing_sha = self.file_exists(remote_path)
|
||||
return self.push_file(remote_path, content, message, sha=existing_sha)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config) -> Optional["GiteaSync"]:
|
||||
"""从 ConfigManager 创建实例
|
||||
|
||||
Returns:
|
||||
GiteaSync 实例,配置不完整时返回 None
|
||||
"""
|
||||
base_url = config.get("Gitea", "base_url", fallback="").strip()
|
||||
owner = config.get("Gitea", "owner", fallback="").strip()
|
||||
repo = config.get("Gitea", "repo", fallback="").strip()
|
||||
token = config.get("Gitea", "token", fallback="").strip()
|
||||
|
||||
if not all([base_url, owner, repo, token]):
|
||||
logger.debug("Gitea 配置不完整,跳过云端同步")
|
||||
return None
|
||||
|
||||
return cls(base_url=base_url, owner=owner, repo=repo, token=token)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
文件操作工具模块
|
||||
--------------
|
||||
提供文件处理、查找和管理功能。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Union, Any
|
||||
|
||||
from .log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
def ensure_dir(directory: str) -> bool:
|
||||
"""
|
||||
确保目录存在,如果不存在则创建
|
||||
|
||||
Args:
|
||||
directory: 目录路径
|
||||
|
||||
Returns:
|
||||
是否成功创建或目录已存在
|
||||
"""
|
||||
try:
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"创建目录失败: {directory}, 错误: {e}")
|
||||
return False
|
||||
|
||||
def get_file_extension(file_path: str) -> str:
|
||||
"""
|
||||
获取文件扩展名(小写)
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
文件扩展名,包含点(例如 .jpg)
|
||||
"""
|
||||
return os.path.splitext(file_path)[1].lower()
|
||||
|
||||
def is_valid_extension(file_path: str, allowed_extensions: List[str]) -> bool:
|
||||
"""
|
||||
检查文件扩展名是否在允许的列表中
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
allowed_extensions: 允许的扩展名列表(例如 ['.jpg', '.png'])
|
||||
|
||||
Returns:
|
||||
文件扩展名是否有效
|
||||
"""
|
||||
ext = get_file_extension(file_path)
|
||||
return ext in allowed_extensions
|
||||
|
||||
def get_files_by_extensions(directory: str, extensions: List[str], exclude_patterns: List[str] = None) -> List[str]:
|
||||
"""
|
||||
获取指定目录下所有符合扩展名的文件路径
|
||||
|
||||
Args:
|
||||
directory: 目录路径
|
||||
extensions: 扩展名列表(例如 ['.jpg', '.png'])
|
||||
exclude_patterns: 排除的文件名模式(例如 ['~$', '.tmp'])
|
||||
|
||||
Returns:
|
||||
文件路径列表
|
||||
"""
|
||||
if exclude_patterns is None:
|
||||
exclude_patterns = ['~$', '.tmp']
|
||||
|
||||
files = []
|
||||
for file in os.listdir(directory):
|
||||
file_path = os.path.join(directory, file)
|
||||
|
||||
# 检查是否是文件
|
||||
if not os.path.isfile(file_path):
|
||||
continue
|
||||
|
||||
# 检查扩展名
|
||||
if not is_valid_extension(file_path, extensions):
|
||||
continue
|
||||
|
||||
# 检查排除模式
|
||||
exclude = False
|
||||
for pattern in exclude_patterns:
|
||||
if pattern in file:
|
||||
exclude = True
|
||||
break
|
||||
|
||||
if not exclude:
|
||||
files.append(file_path)
|
||||
|
||||
return files
|
||||
|
||||
def get_latest_file(directory: str, pattern: str = "", extensions: List[str] = None) -> Optional[str]:
|
||||
"""
|
||||
获取指定目录下最新的文件
|
||||
|
||||
Args:
|
||||
directory: 目录路径
|
||||
pattern: 文件名包含的字符串模式
|
||||
extensions: 限制的文件扩展名列表
|
||||
|
||||
Returns:
|
||||
最新文件的路径,如果没有找到则返回None
|
||||
"""
|
||||
if not os.path.exists(directory):
|
||||
logger.warning(f"目录不存在: {directory}")
|
||||
return None
|
||||
|
||||
files = []
|
||||
for file in os.listdir(directory):
|
||||
# 检查模式和扩展名
|
||||
if (pattern and pattern not in file) or \
|
||||
(extensions and not is_valid_extension(file, extensions)):
|
||||
continue
|
||||
|
||||
file_path = os.path.join(directory, file)
|
||||
if os.path.isfile(file_path):
|
||||
files.append((file_path, os.path.getmtime(file_path)))
|
||||
|
||||
if not files:
|
||||
logger.warning(f"未在目录 {directory} 中找到符合条件的文件")
|
||||
return None
|
||||
|
||||
# 按修改时间排序,返回最新的
|
||||
sorted_files = sorted(files, key=lambda x: x[1], reverse=True)
|
||||
return sorted_files[0][0]
|
||||
|
||||
def generate_timestamp_filename(original_path: str) -> str:
|
||||
"""
|
||||
生成基于时间戳的文件名
|
||||
|
||||
Args:
|
||||
original_path: 原始文件路径
|
||||
|
||||
Returns:
|
||||
带时间戳的新文件路径
|
||||
"""
|
||||
dir_path = os.path.dirname(original_path)
|
||||
ext = os.path.splitext(original_path)[1]
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
return os.path.join(dir_path, f"{timestamp}{ext}")
|
||||
|
||||
def rename_file(source_path: str, target_path: str) -> bool:
|
||||
"""
|
||||
重命名文件
|
||||
|
||||
Args:
|
||||
source_path: 源文件路径
|
||||
target_path: 目标文件路径
|
||||
|
||||
Returns:
|
||||
是否成功重命名
|
||||
"""
|
||||
try:
|
||||
# 确保目标目录存在
|
||||
target_dir = os.path.dirname(target_path)
|
||||
ensure_dir(target_dir)
|
||||
|
||||
# 重命名文件
|
||||
os.rename(source_path, target_path)
|
||||
logger.info(f"文件已重命名: {os.path.basename(source_path)} -> {os.path.basename(target_path)}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"重命名文件失败: {e}")
|
||||
return False
|
||||
|
||||
def load_json(file_path: str, default: Any = None) -> Any:
|
||||
"""
|
||||
加载JSON文件
|
||||
|
||||
Args:
|
||||
file_path: JSON文件路径
|
||||
default: 如果文件不存在或加载失败时返回的默认值
|
||||
|
||||
Returns:
|
||||
JSON内容,或者默认值
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
return default
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"加载JSON文件失败: {file_path}, 错误: {e}")
|
||||
return default
|
||||
|
||||
def save_json(data: Any, file_path: str, ensure_ascii: bool = False, indent: int = 2) -> bool:
|
||||
"""
|
||||
保存数据到JSON文件
|
||||
|
||||
Args:
|
||||
data: 要保存的数据
|
||||
file_path: JSON文件路径
|
||||
ensure_ascii: 是否确保ASCII编码
|
||||
indent: 缩进空格数
|
||||
|
||||
Returns:
|
||||
是否成功保存
|
||||
"""
|
||||
try:
|
||||
# 确保目录存在
|
||||
directory = os.path.dirname(file_path)
|
||||
ensure_dir(directory)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=ensure_ascii, indent=indent)
|
||||
logger.debug(f"JSON数据已保存到: {file_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"保存JSON文件失败: {file_path}, 错误: {e}")
|
||||
return False
|
||||
|
||||
def smart_read_excel(file_path: Union[str, Path], **kwargs) -> Any:
|
||||
"""
|
||||
智能读取 Excel 文件,自动选择引擎并处理常见错误
|
||||
|
||||
Args:
|
||||
file_path: Excel 文件路径
|
||||
**kwargs: 传递给 pd.read_excel 的额外参数
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame 对象
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
path_str = str(file_path)
|
||||
ext = os.path.splitext(path_str)[1].lower()
|
||||
|
||||
# 自动选择引擎
|
||||
if ext == '.xlsx':
|
||||
kwargs.setdefault('engine', 'openpyxl')
|
||||
elif ext == '.xls':
|
||||
kwargs.setdefault('engine', 'xlrd')
|
||||
|
||||
try:
|
||||
return pd.read_excel(path_str, **kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"读取 Excel 文件失败: {path_str}, 错误: {e}")
|
||||
raise
|
||||
|
||||
def get_file_size(file_path: str) -> int:
|
||||
"""
|
||||
获取文件大小(字节)
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
文件大小(字节)
|
||||
"""
|
||||
try:
|
||||
return os.path.getsize(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"获取文件大小失败: {file_path}, 错误: {e}")
|
||||
return 0
|
||||
|
||||
def is_file_size_valid(file_path: str, max_size_mb: float) -> bool:
|
||||
"""
|
||||
检查文件大小是否在允许范围内
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
max_size_mb: 最大允许大小(MB)
|
||||
|
||||
Returns:
|
||||
文件大小是否有效
|
||||
"""
|
||||
size_bytes = get_file_size(file_path)
|
||||
max_size_bytes = max_size_mb * 1024 * 1024
|
||||
return size_bytes <= max_size_bytes
|
||||
|
||||
|
||||
def format_file_size(size_bytes: int) -> str:
|
||||
"""将字节数格式化为可读的文件大小字符串(KB/MB)"""
|
||||
if size_bytes < 1024 * 1024:
|
||||
return f"{size_bytes / 1024:.1f} KB"
|
||||
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
||||
@@ -0,0 +1,180 @@
|
||||
"""
|
||||
日志工具模块
|
||||
----------
|
||||
提供统一的日志配置和管理功能。
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict
|
||||
|
||||
# 日志处理器字典,用于跟踪已创建的处理器
|
||||
_handlers: Dict[str, logging.Handler] = {}
|
||||
|
||||
def setup_logger(name: str,
|
||||
log_file: Optional[str] = None,
|
||||
level=logging.INFO,
|
||||
console_output: bool = True,
|
||||
file_output: bool = True,
|
||||
log_format: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') -> logging.Logger:
|
||||
"""
|
||||
配置并返回日志记录器
|
||||
|
||||
Args:
|
||||
name: 日志记录器的名称
|
||||
log_file: 日志文件路径,如果为None则使用默认路径
|
||||
level: 日志级别
|
||||
console_output: 是否输出到控制台
|
||||
file_output: 是否输出到文件
|
||||
log_format: 日志格式
|
||||
|
||||
Returns:
|
||||
配置好的日志记录器
|
||||
"""
|
||||
# 获取或创建日志记录器
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
# 如果已经配置过处理器,不重复配置
|
||||
if logger.handlers:
|
||||
return logger
|
||||
|
||||
# 设置日志级别
|
||||
logger.setLevel(level)
|
||||
|
||||
# 创建格式化器
|
||||
formatter = logging.Formatter(log_format)
|
||||
|
||||
# 如果需要输出到文件
|
||||
if file_output:
|
||||
# 如果没有指定日志文件,使用默认路径
|
||||
if log_file is None:
|
||||
log_dir = os.path.abspath('logs')
|
||||
# 确保日志目录存在
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
log_file = os.path.join(log_dir, f"{name}.log")
|
||||
|
||||
# 创建文件处理器
|
||||
try:
|
||||
# 使用滚动日志,限制单个日志大小与备份数量
|
||||
file_handler = RotatingFileHandler(log_file, maxBytes=5 * 1024 * 1024, backupCount=3, encoding='utf-8')
|
||||
file_handler.setFormatter(formatter)
|
||||
file_handler.setLevel(level)
|
||||
logger.addHandler(file_handler)
|
||||
_handlers[f"{name}_file"] = file_handler
|
||||
|
||||
# 记录活跃标记,避免被日志清理工具删除
|
||||
active_marker = os.path.join(os.path.dirname(log_file), f"{name}.active")
|
||||
with open(active_marker, 'w', encoding='utf-8') as f:
|
||||
f.write(f"Active since: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
except Exception as e:
|
||||
print(f"无法创建日志文件处理器: {e}")
|
||||
|
||||
# 如果需要输出到控制台
|
||||
if console_output:
|
||||
# 创建控制台处理器
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setFormatter(formatter)
|
||||
console_handler.setLevel(level)
|
||||
logger.addHandler(console_handler)
|
||||
_handlers[f"{name}_console"] = console_handler
|
||||
|
||||
return logger
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""
|
||||
获取已配置的日志记录器,如果不存在则创建一个新的
|
||||
|
||||
Args:
|
||||
name: 日志记录器的名称
|
||||
|
||||
Returns:
|
||||
日志记录器
|
||||
"""
|
||||
logger = logging.getLogger(name)
|
||||
if not logger.handlers:
|
||||
return setup_logger(name)
|
||||
return logger
|
||||
|
||||
def set_log_level(level: str) -> None:
|
||||
"""
|
||||
设置所有日志记录器的级别
|
||||
|
||||
Args:
|
||||
level: 日志级别(DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
"""
|
||||
level_map = {
|
||||
'debug': logging.DEBUG,
|
||||
'info': logging.INFO,
|
||||
'warning': logging.WARNING,
|
||||
'error': logging.ERROR,
|
||||
'critical': logging.CRITICAL
|
||||
}
|
||||
|
||||
# 获取对应的日志级别
|
||||
log_level = level_map.get(level.lower(), logging.INFO)
|
||||
|
||||
# 获取所有记录器
|
||||
loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
|
||||
|
||||
# 设置每个记录器的级别
|
||||
for logger in loggers:
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# 设置根记录器的级别
|
||||
logging.getLogger().setLevel(log_level)
|
||||
|
||||
print(f"所有日志记录器级别已设置为: {logging.getLevelName(log_level)}")
|
||||
|
||||
def close_logger(name: str) -> None:
|
||||
"""
|
||||
关闭日志记录器的所有处理器
|
||||
|
||||
Args:
|
||||
name: 日志记录器的名称
|
||||
"""
|
||||
logger = logging.getLogger(name)
|
||||
for handler in logger.handlers[:]:
|
||||
handler.close()
|
||||
logger.removeHandler(handler)
|
||||
|
||||
# 清除处理器缓存
|
||||
_handlers.pop(f"{name}_file", None)
|
||||
_handlers.pop(f"{name}_console", None)
|
||||
|
||||
def close_all_loggers() -> None:
|
||||
"""
|
||||
关闭所有日志记录器的处理器
|
||||
"""
|
||||
# 获取所有记录器
|
||||
loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
|
||||
|
||||
# 关闭每个记录器的处理器
|
||||
for logger in loggers:
|
||||
if hasattr(logger, 'handlers'):
|
||||
for handler in logger.handlers[:]:
|
||||
handler.close()
|
||||
logger.removeHandler(handler)
|
||||
|
||||
# 清空处理器缓存
|
||||
_handlers.clear()
|
||||
|
||||
print("所有日志记录器已关闭")
|
||||
|
||||
def cleanup_active_marker(name: str) -> None:
|
||||
"""
|
||||
清理日志活跃标记
|
||||
|
||||
Args:
|
||||
name: 日志记录器的名称
|
||||
"""
|
||||
try:
|
||||
log_dir = os.path.abspath('logs')
|
||||
active_marker = os.path.join(log_dir, f"{name}.active")
|
||||
if os.path.exists(active_marker):
|
||||
os.remove(active_marker)
|
||||
except Exception as e:
|
||||
print(f"无法清理日志活跃标记: {e}")
|
||||
@@ -0,0 +1,279 @@
|
||||
"""
|
||||
字符串处理工具模块
|
||||
---------------
|
||||
提供字符串处理、正则表达式匹配等功能。
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
def clean_string(text: str) -> str:
|
||||
"""
|
||||
清理字符串,移除多余空白
|
||||
|
||||
Args:
|
||||
text: 源字符串
|
||||
|
||||
Returns:
|
||||
清理后的字符串
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return ""
|
||||
|
||||
# 移除首尾空白
|
||||
text = text.strip()
|
||||
# 移除多余空白
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
return text
|
||||
|
||||
def remove_non_digits(text: str) -> str:
|
||||
"""
|
||||
移除字符串中的非数字字符
|
||||
|
||||
Args:
|
||||
text: 源字符串
|
||||
|
||||
Returns:
|
||||
只包含数字的字符串
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return ""
|
||||
|
||||
return re.sub(r'\D', '', text)
|
||||
|
||||
def extract_number(text: str) -> Optional[float]:
|
||||
"""
|
||||
从字符串中提取数字
|
||||
|
||||
Args:
|
||||
text: 源字符串
|
||||
|
||||
Returns:
|
||||
提取的数字,如果没有则返回None
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return None
|
||||
|
||||
# 匹配数字(可以包含小数点和负号)
|
||||
match = re.search(r'-?\d+(\.\d+)?', text)
|
||||
if match:
|
||||
return float(match.group())
|
||||
return None
|
||||
|
||||
def extract_unit(text: str, units: List[str] = None) -> Optional[str]:
|
||||
"""
|
||||
从字符串中提取单位
|
||||
|
||||
Args:
|
||||
text: 源字符串
|
||||
units: 有效单位列表,如果为None则自动识别
|
||||
|
||||
Returns:
|
||||
提取的单位,如果没有则返回None
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return None
|
||||
|
||||
# 如果提供了单位列表,检查字符串中是否包含
|
||||
if units:
|
||||
for unit in units:
|
||||
if unit in text:
|
||||
return unit
|
||||
return None
|
||||
|
||||
# 否则,尝试自动识别常见单位
|
||||
# 正则表达式:匹配数字后面的非数字部分作为单位
|
||||
match = re.search(r'\d+\s*([^\d\s]+)', text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def extract_number_and_unit(text: str) -> Tuple[Optional[float], Optional[str]]:
|
||||
"""
|
||||
从字符串中同时提取数字和单位
|
||||
|
||||
Args:
|
||||
text: 源字符串
|
||||
|
||||
Returns:
|
||||
(数字, 单位)元组,如果没有则对应返回None
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return None, None
|
||||
|
||||
# 匹配数字和单位的组合
|
||||
match = re.search(r'(-?\d+(?:\.\d+)?)\s*([^\d\s]+)?', text)
|
||||
if match:
|
||||
number = float(match.group(1))
|
||||
unit = match.group(2) if match.group(2) else None
|
||||
return number, unit
|
||||
return None, None
|
||||
|
||||
def parse_specification(spec_str: str) -> Optional[int]:
|
||||
"""
|
||||
解析规格字符串,提取包装数量
|
||||
支持格式:1*15, 1x15, 1*5*10
|
||||
|
||||
Args:
|
||||
spec_str: 规格字符串
|
||||
|
||||
Returns:
|
||||
包装数量,如果无法解析则返回None
|
||||
"""
|
||||
if not spec_str or not isinstance(spec_str, str):
|
||||
return None
|
||||
|
||||
try:
|
||||
# 清理规格字符串
|
||||
spec_str = clean_string(spec_str)
|
||||
|
||||
# 匹配重量/容量格式,如"450g*15"、"450ml*15"
|
||||
match = re.search(r'\d+(?:g|ml|毫升|克)[*xX×](\d+)', spec_str)
|
||||
if match:
|
||||
# 返回后面的数量
|
||||
return int(match.group(1))
|
||||
|
||||
# 匹配1*5*10 格式的三级规格
|
||||
match = re.search(r'(\d+)[\*xX×](\d+)[\*xX×](\d+)', spec_str)
|
||||
if match:
|
||||
# 取最后一个数字作为袋数量
|
||||
return int(match.group(3))
|
||||
|
||||
# 匹配1*15, 1x15 格式
|
||||
match = re.search(r'(\d+)[\*xX×](\d+)', spec_str)
|
||||
if match:
|
||||
# 取第二个数字作为包装数量
|
||||
return int(match.group(2))
|
||||
|
||||
# 匹配24瓶/件等格式
|
||||
match = re.search(r'(\d+)[瓶个支袋][//](件|箱)', spec_str)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
# 匹配4L格式
|
||||
match = re.search(r'(\d+(?:\.\d+)?)\s*[Ll升][*×]?(\d+)?', spec_str)
|
||||
if match:
|
||||
# 如果有第二个数字,返回它;否则返回1
|
||||
return int(match.group(2)) if match.group(2) else 1
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def clean_barcode(barcode: Any) -> str:
|
||||
"""
|
||||
清理条码格式
|
||||
|
||||
Args:
|
||||
barcode: 条码(可以是字符串、整数或浮点数)
|
||||
|
||||
Returns:
|
||||
清理后的条码字符串
|
||||
"""
|
||||
if isinstance(barcode, (int, float)):
|
||||
barcode = f"{barcode:.0f}"
|
||||
|
||||
# 清理条码格式,移除可能的非数字字符(包括小数点)
|
||||
barcode_clean = re.sub(r'\.0+$', '', str(barcode)) # 移除末尾0
|
||||
barcode_clean = re.sub(r'\D', '', barcode_clean) # 只保留数字
|
||||
|
||||
return barcode_clean
|
||||
|
||||
def is_scientific_notation(value: str) -> bool:
|
||||
"""
|
||||
检查字符串是否是科学计数法表示
|
||||
|
||||
Args:
|
||||
value: 字符串值
|
||||
|
||||
Returns:
|
||||
是否是科学计数法
|
||||
"""
|
||||
return bool(re.match(r'^-?\d+(\.\d+)?[eE][+-]?\d+$', str(value)))
|
||||
|
||||
def parse_monetary_string(value: Any) -> Optional[float]:
|
||||
"""
|
||||
解析金额/数量字符串为浮点数。
|
||||
处理: 货币符号(¥/$)、逗号作小数点、逗号作千位分隔符、中文"元"后缀等。
|
||||
|
||||
Args:
|
||||
value: 金额值(字符串、数字或其他类型)
|
||||
|
||||
Returns:
|
||||
解析后的浮点数,无法解析则返回 None
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
if not isinstance(value, str):
|
||||
return None
|
||||
|
||||
s = value.strip()
|
||||
if not s or s.lower() in ('o', 'none', 'null', '-', '--'):
|
||||
return None
|
||||
|
||||
# 移除非数字字符,保留数字、小数点、逗号和负号
|
||||
cleaned = re.sub(r'[^\d\.\-,]', '', s)
|
||||
if not cleaned or cleaned in ('-', '.', '-.', ','):
|
||||
return None
|
||||
|
||||
# 逗号处理策略:
|
||||
# 多个逗号 -> 千位分隔符,全部移除 (如 "1,234,567" = 1234567)
|
||||
# 一个逗号 + 无小数点 -> 逗号当小数点 (如 "1,5" = 1.5)
|
||||
# 一个逗号 + 有小数点 -> 千位分隔符,移除 (如 "1,234.56" = 1234.56)
|
||||
comma_count = cleaned.count(',')
|
||||
if comma_count > 1:
|
||||
cleaned = cleaned.replace(',', '')
|
||||
elif comma_count == 1 and '.' not in cleaned:
|
||||
cleaned = cleaned.replace(',', '.')
|
||||
elif comma_count == 1 and '.' in cleaned:
|
||||
cleaned = cleaned.replace(',', '')
|
||||
|
||||
try:
|
||||
return float(cleaned)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def format_barcode(barcode: Any) -> str:
|
||||
"""
|
||||
格式化条码,处理科学计数法
|
||||
|
||||
Args:
|
||||
barcode: 条码值
|
||||
|
||||
Returns:
|
||||
格式化后的条码字符串
|
||||
"""
|
||||
if barcode is None:
|
||||
return ""
|
||||
|
||||
# 先转为字符串
|
||||
barcode_str = str(barcode).strip()
|
||||
|
||||
# 判断是否为科学计数法
|
||||
if is_scientific_notation(barcode_str):
|
||||
try:
|
||||
# 科学计数法转为普通数字字符串
|
||||
barcode_str = f"{float(barcode_str):.0f}"
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# 移除可能的小数部分(如"123456.0"变为"123456")
|
||||
if '.' in barcode_str:
|
||||
barcode_str = re.sub(r'\.0+$', '', barcode_str)
|
||||
|
||||
# 确保是纯数字字符串
|
||||
if not barcode_str.isdigit():
|
||||
# 只保留数字字符
|
||||
barcode_str = re.sub(r'\D', '', barcode_str)
|
||||
|
||||
# 新增:处理末尾多余的0,标准条码通常为12-13位
|
||||
if len(barcode_str) > 13 and barcode_str.endswith('0'):
|
||||
# 从末尾开始移除多余的0,直到条码长度为13位或者不再以0结尾
|
||||
while len(barcode_str) > 13 and barcode_str.endswith('0'):
|
||||
barcode_str = barcode_str[:-1]
|
||||
|
||||
return barcode_str
|
||||
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
OCR订单处理系统 - 服务模块
|
||||
-----------------------
|
||||
提供业务逻辑服务,协调各个核心组件完成业务功能。
|
||||
"""
|
||||
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
OCR服务模块
|
||||
---------
|
||||
提供OCR识别服务,协调OCR流程。
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
import os
|
||||
|
||||
from ..config.settings import ConfigManager
|
||||
from ..core.utils.log_utils import get_logger
|
||||
from ..core.ocr.table_ocr import OCRProcessor
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class OCRService:
|
||||
"""
|
||||
OCR识别服务:协调OCR流程
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[ConfigManager] = None):
|
||||
"""
|
||||
初始化OCR服务
|
||||
|
||||
Args:
|
||||
config: 配置管理器,如果为None则创建新的
|
||||
"""
|
||||
logger.info("初始化OCRService")
|
||||
self.config = config or ConfigManager()
|
||||
|
||||
# 创建OCR处理器
|
||||
self.ocr_processor = OCRProcessor(self.config)
|
||||
|
||||
logger.info("OCRService初始化完成")
|
||||
|
||||
def get_unprocessed_images(self) -> List[str]:
|
||||
"""
|
||||
获取待处理的图片列表
|
||||
|
||||
Returns:
|
||||
待处理图片路径列表
|
||||
"""
|
||||
return self.ocr_processor.get_unprocessed_images()
|
||||
|
||||
def process_image(self, image_path: str) -> Optional[str]:
|
||||
"""
|
||||
处理单个图片文件
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
生成的Excel文件路径,如果处理失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(image_path):
|
||||
logger.error(f"文件不存在: {image_path}")
|
||||
return None
|
||||
|
||||
# 检查文件类型
|
||||
if not self._is_valid_image(image_path):
|
||||
logger.error(f"不支持的文件类型: {image_path}")
|
||||
return None
|
||||
|
||||
# 检查是否已处理
|
||||
excel_file = self._get_excel_path(image_path)
|
||||
if os.path.exists(excel_file):
|
||||
logger.info(f"文件已处理过,跳过OCR识别: {image_path}")
|
||||
return excel_file
|
||||
|
||||
# 执行OCR识别
|
||||
result = self.ocr_processor.process_image(image_path)
|
||||
if not result:
|
||||
logger.error(f"OCR识别失败: {image_path}")
|
||||
return None
|
||||
|
||||
# 生成Excel文件
|
||||
excel_file = self._generate_excel(result, image_path)
|
||||
if not excel_file:
|
||||
logger.error(f"生成Excel文件失败: {image_path}")
|
||||
return None
|
||||
|
||||
logger.info(f"处理完成: {image_path} -> {excel_file}")
|
||||
return excel_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理图片时发生错误: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def process_images_batch(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
|
||||
"""
|
||||
批量处理图片
|
||||
|
||||
Args:
|
||||
batch_size: 批处理大小
|
||||
max_workers: 最大线程数
|
||||
|
||||
Returns:
|
||||
(总处理数, 成功处理数)元组
|
||||
"""
|
||||
logger.info(f"OCRService开始批量处理图片, batch_size={batch_size}, max_workers={max_workers}")
|
||||
return self.ocr_processor.process_images_batch(batch_size, max_workers, progress_cb)
|
||||
|
||||
# 添加batch_process作为process_images_batch的别名,确保兼容性
|
||||
def batch_process(self, batch_size: int = None, max_workers: int = None, progress_cb: Optional[Callable[[int], None]] = None) -> Tuple[int, int]:
|
||||
"""
|
||||
批量处理图片(别名方法,与process_images_batch功能相同)
|
||||
|
||||
Args:
|
||||
batch_size: 批处理大小
|
||||
max_workers: 最大线程数
|
||||
|
||||
Returns:
|
||||
(总处理数, 成功处理数)元组
|
||||
"""
|
||||
logger.info(f"OCRService.batch_process被调用,转发到process_images_batch")
|
||||
return self.process_images_batch(batch_size, max_workers, progress_cb)
|
||||
|
||||
def validate_image(self, image_path: str) -> bool:
|
||||
"""
|
||||
验证图片是否有效
|
||||
|
||||
Args:
|
||||
image_path: 图片路径
|
||||
|
||||
Returns:
|
||||
图片是否有效
|
||||
"""
|
||||
return self.ocr_processor.validate_image(image_path)
|
||||
|
||||
def _is_valid_image(self, image_path: str) -> bool:
|
||||
"""
|
||||
检查文件是否为有效的图片格式
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
是否为有效图片格式
|
||||
"""
|
||||
return self.validate_image(image_path)
|
||||
|
||||
def _get_excel_path(self, image_path: str) -> str:
|
||||
"""
|
||||
根据图片路径生成对应的Excel文件路径
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
Excel文件路径
|
||||
"""
|
||||
# 获取文件名(不含扩展名)
|
||||
base_name = os.path.splitext(os.path.basename(image_path))[0]
|
||||
# 生成Excel文件路径
|
||||
output_dir = self.config.get('Paths', 'output_folder', fallback='data/output')
|
||||
excel_path = os.path.join(output_dir, f"{base_name}.xlsx")
|
||||
return excel_path
|
||||
|
||||
def _generate_excel(self, ocr_result: dict, image_path: str) -> Optional[str]:
|
||||
"""
|
||||
根据OCR结果生成Excel文件
|
||||
|
||||
Args:
|
||||
ocr_result: OCR识别结果
|
||||
image_path: 原始图片路径
|
||||
|
||||
Returns:
|
||||
生成的Excel文件路径,失败返回None
|
||||
"""
|
||||
try:
|
||||
excel_path = self._get_excel_path(image_path)
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(os.path.dirname(excel_path), exist_ok=True)
|
||||
|
||||
# 调用OCR处理器的Excel生成功能
|
||||
if hasattr(self.ocr_processor, 'generate_excel'):
|
||||
success = self.ocr_processor.generate_excel(ocr_result, excel_path)
|
||||
if success:
|
||||
return excel_path
|
||||
else:
|
||||
# 如果OCR处理器没有generate_excel方法,直接返回路径
|
||||
# 假设OCR处理器已经生成了Excel文件
|
||||
if os.path.exists(excel_path):
|
||||
return excel_path
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"生成Excel文件时发生错误: {e}", exc_info=True)
|
||||
return None
|
||||
@@ -0,0 +1,245 @@
|
||||
"""
|
||||
订单服务模块
|
||||
---------
|
||||
提供订单处理服务,协调Excel处理和订单合并流程。
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, List, Optional, Tuple, Union, Any, Callable
|
||||
|
||||
from ..config.settings import ConfigManager
|
||||
from ..core.utils.log_utils import get_logger
|
||||
from ..core.excel.processor import ExcelProcessor
|
||||
from ..core.excel.merger import PurchaseOrderMerger
|
||||
from ..core.db.product_db import ProductDatabase
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class OrderService:
|
||||
"""
|
||||
订单服务:协调Excel处理和订单合并流程
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[ConfigManager] = None):
|
||||
"""
|
||||
初始化订单服务
|
||||
|
||||
Args:
|
||||
config: 配置管理器,如果为None则创建新的
|
||||
"""
|
||||
logger.info("初始化OrderService")
|
||||
self.config = config or ConfigManager()
|
||||
|
||||
# 创建Excel处理器和采购单合并器
|
||||
self.excel_processor = ExcelProcessor(self.config)
|
||||
self.order_merger = PurchaseOrderMerger(self.config)
|
||||
|
||||
logger.info("OrderService初始化完成")
|
||||
|
||||
def get_latest_excel(self) -> Optional[str]:
|
||||
"""
|
||||
获取最新的Excel文件
|
||||
|
||||
Returns:
|
||||
最新Excel文件路径,如果未找到则返回None
|
||||
"""
|
||||
return self.excel_processor.get_latest_excel()
|
||||
|
||||
def process_excel(self, file_path: Optional[str] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理Excel订单文件,生成标准采购单
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径,如果为None则处理最新的文件
|
||||
|
||||
Returns:
|
||||
输出采购单文件路径,如果处理失败则返回None
|
||||
"""
|
||||
if not file_path:
|
||||
file_path = self.excel_processor.get_latest_excel()
|
||||
if not file_path:
|
||||
logger.warning("未找到可处理的Excel文件")
|
||||
return None
|
||||
logger.info("OrderService开始处理最新Excel文件")
|
||||
else:
|
||||
logger.info(f"OrderService开始处理指定Excel文件: {file_path}")
|
||||
|
||||
# 检查是否需要特殊的供应商预处理(如杨碧月)
|
||||
try:
|
||||
from .special_suppliers_service import SpecialSuppliersService
|
||||
special_service = SpecialSuppliersService(self.config)
|
||||
|
||||
# 尝试识别并预处理(注意:这里不再传入 progress_cb 避免无限递归或重复进度条,
|
||||
# 或者我们在 special_service 内部逻辑中处理完后直接返回结果)
|
||||
# 为了避免循环调用,我们在 SpecialSuppliersService 内部不再调用 process_excel,
|
||||
# 而是让 process_excel 识别后自己决定是否处理预处理后的文件。
|
||||
|
||||
# 我们新增一个 check_and_preprocess 方法
|
||||
preprocessed_path = self._check_special_preprocess(file_path)
|
||||
if preprocessed_path:
|
||||
logger.info(f"检测到特殊供应商,已生成预处理文件: {preprocessed_path}")
|
||||
file_path = preprocessed_path
|
||||
except Exception as e:
|
||||
logger.error(f"检查特殊预处理时出错: {e}")
|
||||
|
||||
return self.excel_processor.process_specific_file(file_path, progress_cb=progress_cb)
|
||||
|
||||
def _check_special_preprocess(self, file_path: str) -> Optional[str]:
|
||||
"""检查并执行特殊的预处理(支持杨碧月、烟草公司、蓉城易购)"""
|
||||
try:
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
# 仅读取前 50 行进行智能识别 (header=None 确保能读到第一行内容)
|
||||
df_head = smart_read_excel(file_path, nrows=50, header=None)
|
||||
df_str = df_head.astype(str)
|
||||
|
||||
# 1. 识别:烟草公司 (Tobacco)
|
||||
# 特征:内容中包含“专卖证号”或特定证号“510109104938”
|
||||
is_tobacco = df_str.apply(lambda x: x.str.contains('专卖证号|510109104938')).any().any()
|
||||
if is_tobacco:
|
||||
logger.info("识别到烟草公司订单,执行专用预处理...")
|
||||
from .tobacco_service import TobaccoService
|
||||
tobacco_svc = TobaccoService(self.config)
|
||||
return tobacco_svc.preprocess_tobacco_order(file_path)
|
||||
|
||||
# 2. 识别:蓉城易购 (Rongcheng Yigou)
|
||||
# 特征:内容中包含单号标识“RCDH”
|
||||
is_rongcheng = df_str.apply(lambda x: x.str.contains('RCDH')).any().any()
|
||||
if is_rongcheng:
|
||||
logger.info("识别到蓉城易购订单,执行专用预处理...")
|
||||
from .special_suppliers_service import SpecialSuppliersService
|
||||
special_svc = SpecialSuppliersService(self.config)
|
||||
return special_svc.preprocess_rongcheng_yigou(file_path)
|
||||
|
||||
# 3. 识别:杨碧月 (Yang Biyue)
|
||||
# 特征:经手人列包含“杨碧月”
|
||||
handler_col = None
|
||||
for col in df_head.columns:
|
||||
# 在前50行中搜索“经手人”关键字
|
||||
if df_head[col].astype(str).str.contains('经手人').any():
|
||||
handler_col = col
|
||||
break
|
||||
|
||||
if handler_col is not None:
|
||||
# 检查该列是否有“杨碧月”
|
||||
if df_head[handler_col].astype(str).str.contains('杨碧月').any():
|
||||
logger.info("识别到杨碧月订单,执行专用预处理...")
|
||||
from .special_suppliers_service import SpecialSuppliersService
|
||||
special_svc = SpecialSuppliersService(self.config)
|
||||
return special_svc.process_yang_biyue_only(file_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"智能预处理识别失败: {e}")
|
||||
return None
|
||||
|
||||
def get_purchase_orders(self) -> List[str]:
|
||||
"""
|
||||
获取采购单文件列表
|
||||
|
||||
Returns:
|
||||
采购单文件路径列表
|
||||
"""
|
||||
return self.order_merger.get_purchase_orders()
|
||||
|
||||
def merge_purchase_orders(self, file_paths: List[str], progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
合并指定的采购单文件
|
||||
|
||||
Args:
|
||||
file_paths: 采购单文件路径列表
|
||||
|
||||
Returns:
|
||||
合并后的采购单文件路径,如果合并失败则返回None
|
||||
"""
|
||||
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
|
||||
return self.merge_orders(file_paths, progress_cb)
|
||||
|
||||
def merge_all_purchase_orders(self, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
合并所有可用的采购单文件
|
||||
|
||||
Returns:
|
||||
合并后的采购单文件路径,如果合并失败则返回None
|
||||
"""
|
||||
logger.info("OrderService开始合并所有采购单")
|
||||
return self.merge_orders(None, progress_cb)
|
||||
|
||||
def merge_orders(self, file_paths: Optional[List[str]] = None, progress_cb: Optional[Callable[[int], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
合并采购单
|
||||
|
||||
Args:
|
||||
file_paths: 采购单文件路径列表,如果为None则处理所有采购单
|
||||
|
||||
Returns:
|
||||
合并后的采购单文件路径,如果合并失败则返回None
|
||||
"""
|
||||
if file_paths:
|
||||
logger.info(f"OrderService开始合并指定采购单: {file_paths}")
|
||||
else:
|
||||
logger.info("OrderService开始合并所有采购单")
|
||||
|
||||
return self.order_merger.process(file_paths, progress_cb)
|
||||
|
||||
def validate_unit_price(self, result_path: str) -> List[str]:
|
||||
"""
|
||||
校验采购单单价与商品资料进货价的差异
|
||||
|
||||
Args:
|
||||
result_path: 待校验的采购单路径
|
||||
|
||||
Returns:
|
||||
差异信息列表,无差异返回空列表
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import os
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
from app.core.handlers.column_mapper import ColumnMapper as CM
|
||||
|
||||
config = ConfigManager()
|
||||
template_folder = config.get('Paths', 'template_folder', fallback='templates')
|
||||
item_data = config.get('Templates', 'item_data', fallback='商品资料.xlsx')
|
||||
item_path = os.path.join(template_folder, item_data)
|
||||
product_db_path = config.get('Paths', 'product_db', fallback='data/product_cache.db')
|
||||
|
||||
# 使用 SQLite 查询商品进货价
|
||||
product_db = ProductDatabase(product_db_path, item_path)
|
||||
|
||||
# 读取待校验的采购单
|
||||
df_res = smart_read_excel(result_path)
|
||||
|
||||
res_barcode_col = CM.find_column(list(df_res.columns), 'barcode')
|
||||
res_price_col = CM.find_column(list(df_res.columns), 'unit_price')
|
||||
|
||||
if not res_barcode_col or not res_price_col:
|
||||
logger.warning("未能在采购单中找到条码或单价列")
|
||||
return []
|
||||
|
||||
# 批量查询进货价
|
||||
barcodes = df_res[res_barcode_col].astype(str).str.strip().tolist()
|
||||
item_prices = product_db.get_prices(barcodes)
|
||||
|
||||
results = []
|
||||
for _, row in df_res.iterrows():
|
||||
bc = str(row[res_barcode_col]).strip()
|
||||
if bc not in item_prices:
|
||||
continue
|
||||
|
||||
try:
|
||||
res_price = float(row[res_price_col])
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
item_price = item_prices[bc]
|
||||
diff = abs(res_price - item_price)
|
||||
if diff > 1.0:
|
||||
results.append(f"条码 {bc}: 采购单价={res_price} vs 进货价={item_price} 差异={diff:.2f}")
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"单价校验过程中发生错误: {e}")
|
||||
return []
|
||||
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
处理器调度服务
|
||||
|
||||
负责管理和调度各种文件处理器,实现智能文件类型检测和处理器选择
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pathlib import Path
|
||||
|
||||
from ..core.processors.base import BaseProcessor
|
||||
from ..core.processors.tobacco_processor import TobaccoProcessor
|
||||
from ..core.processors.ocr_processor import OCRProcessor
|
||||
from ..core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ProcessorService:
|
||||
"""处理器调度服务
|
||||
|
||||
负责管理所有处理器实例,提供统一的文件处理接口
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""初始化处理器服务
|
||||
|
||||
Args:
|
||||
config: 系统配置字典
|
||||
"""
|
||||
self.config = config
|
||||
self.processors: List[BaseProcessor] = []
|
||||
self._load_processors()
|
||||
logger.info(f"处理器服务初始化完成,加载了{len(self.processors)}个处理器")
|
||||
|
||||
def _load_processors(self):
|
||||
"""加载所有处理器"""
|
||||
try:
|
||||
self.processors = [
|
||||
TobaccoProcessor(self.config),
|
||||
OCRProcessor(self.config),
|
||||
]
|
||||
|
||||
supplier_configs = []
|
||||
try:
|
||||
import json
|
||||
from pathlib import Path
|
||||
# 优先从`config/suppliers_config.json`加载
|
||||
config_path = Path("config/suppliers_config.json")
|
||||
if not config_path.exists():
|
||||
# 兼容其它路径
|
||||
config_path = Path("./suppliers_config.json")
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
ok, errs, supplier_configs = self._validate_suppliers_config(data)
|
||||
if not ok:
|
||||
logger.error("供应商配置校验失败:\n" + "\n".join([f"- {e}" for e in errs]))
|
||||
else:
|
||||
logger.info(f"从 {config_path} 加载供应商配置,共 {len(supplier_configs)} 项")
|
||||
else:
|
||||
logger.info("未找到供应商配置文件,跳过供应商处理器加载")
|
||||
except Exception as e:
|
||||
logger.error(f"读取供应商配置失败: {e}")
|
||||
|
||||
for supplier_config in supplier_configs:
|
||||
try:
|
||||
from ..core.processors.supplier_processors.generic_supplier_processor import GenericSupplierProcessor
|
||||
processor = GenericSupplierProcessor(self.config, supplier_config)
|
||||
self.processors.append(processor)
|
||||
logger.info(f"加载供应商处理器: {processor.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"加载供应商处理器失败: {e}")
|
||||
|
||||
logger.info(f"成功加载{len(self.processors)}个处理器")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"加载处理器时出错: {e}", exc_info=True)
|
||||
self.processors = [
|
||||
TobaccoProcessor(self.config),
|
||||
OCRProcessor(self.config),
|
||||
]
|
||||
|
||||
def _validate_suppliers_config(self, data):
|
||||
try:
|
||||
suppliers = data.get('suppliers')
|
||||
errors = []
|
||||
valid = []
|
||||
if not isinstance(suppliers, list) or not suppliers:
|
||||
errors.append('suppliers必须是非空数组')
|
||||
return False, errors, []
|
||||
for idx, s in enumerate(suppliers):
|
||||
e = self._validate_single_supplier(s, idx)
|
||||
if e:
|
||||
errors.extend(e)
|
||||
else:
|
||||
valid.append(s)
|
||||
return len(errors) == 0, errors, valid
|
||||
except Exception as e:
|
||||
return False, [f'配置解析异常: {e}'], []
|
||||
|
||||
def _validate_single_supplier(self, s, idx):
|
||||
errs = []
|
||||
prefix = f'suppliers[{idx}]'
|
||||
name = s.get('name')
|
||||
if not name or not isinstance(name, str):
|
||||
errs.append(f'{prefix}.name 必须为字符串')
|
||||
fp = s.get('filename_patterns', [])
|
||||
ci = s.get('content_indicators', [])
|
||||
if not fp and not ci:
|
||||
errs.append(f'{prefix} 必须至少提供 filename_patterns 或 content_indicators 之一')
|
||||
cm = s.get('column_mapping', {})
|
||||
if cm and not isinstance(cm, dict):
|
||||
errs.append(f'{prefix}.column_mapping 必须为对象')
|
||||
cr = s.get('cleaning_rules', [])
|
||||
if cr and not isinstance(cr, list):
|
||||
errs.append(f'{prefix}.cleaning_rules 必须为数组')
|
||||
else:
|
||||
for i, rule in enumerate(cr):
|
||||
rtype = rule.get('type')
|
||||
if rtype not in ('remove_rows','fill_na','convert_type'):
|
||||
errs.append(f'{prefix}.cleaning_rules[{i}].type 非法: {rtype}')
|
||||
if rtype == 'remove_rows' and not rule.get('condition'):
|
||||
errs.append(f'{prefix}.cleaning_rules[{i}].condition 必填')
|
||||
if rtype in ('fill_na','convert_type'):
|
||||
if not rule.get('columns') and not rule.get('column'):
|
||||
errs.append(f'{prefix}.cleaning_rules[{i}] 需提供 columns 或 column')
|
||||
calc = s.get('calculations', [])
|
||||
if calc and not isinstance(calc, list):
|
||||
errs.append(f'{prefix}.calculations 必须为数组')
|
||||
else:
|
||||
for i, c in enumerate(calc):
|
||||
ctype = c.get('type')
|
||||
if ctype not in ('multiply','divide','formula'):
|
||||
errs.append(f'{prefix}.calculations[{i}].type 非法: {ctype}')
|
||||
if ctype in ('multiply','divide'):
|
||||
if not c.get('source_column') or not c.get('target_column'):
|
||||
errs.append(f'{prefix}.calculations[{i}] 需提供 source_column 与 target_column')
|
||||
if ctype == 'formula' and (not c.get('formula') or not c.get('target_column')):
|
||||
errs.append(f'{prefix}.calculations[{i}] 需提供 formula 与 target_column')
|
||||
return errs
|
||||
|
||||
def process_file(self, input_file: Path, output_dir: Path,
|
||||
preferred_processor: Optional[str] = None) -> Optional[Path]:
|
||||
"""处理文件 - 自动选择合适的处理器
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径
|
||||
output_dir: 输出目录路径
|
||||
preferred_processor: 优先使用的处理器名称(可选)
|
||||
|
||||
Returns:
|
||||
输出文件路径,处理失败返回None
|
||||
"""
|
||||
if not input_file.exists():
|
||||
logger.error(f"输入文件不存在: {input_file}")
|
||||
return None
|
||||
|
||||
if not output_dir.exists():
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# 如果指定了优先处理器,先尝试使用它
|
||||
if preferred_processor:
|
||||
processor = self._get_processor_by_name(preferred_processor)
|
||||
if processor and processor.can_process(input_file):
|
||||
logger.info(f"使用指定的处理器: {processor.name}")
|
||||
return processor.process(input_file, output_dir)
|
||||
else:
|
||||
logger.warning(f"指定的处理器不可用或无法处理该文件: {preferred_processor}")
|
||||
|
||||
# 自动选择合适的处理器
|
||||
suitable_processors = [p for p in self.processors if p.can_process(input_file)]
|
||||
|
||||
if not suitable_processors:
|
||||
logger.warning(f"未找到适合处理文件的处理器: {input_file}")
|
||||
logger.info(f"支持的文件类型: {self.get_supported_types()}")
|
||||
return None
|
||||
|
||||
# 使用第一个合适的处理器
|
||||
processor = suitable_processors[0]
|
||||
logger.info(f"使用处理器 {processor.name} 处理文件: {input_file}")
|
||||
|
||||
return processor.process(input_file, output_dir)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理文件时出错: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def _get_processor_by_name(self, name: str) -> Optional[BaseProcessor]:
|
||||
"""根据名称获取处理器
|
||||
|
||||
Args:
|
||||
name: 处理器名称
|
||||
|
||||
Returns:
|
||||
处理器实例或None
|
||||
"""
|
||||
for processor in self.processors:
|
||||
if processor.name == name or processor.__class__.__name__ == name:
|
||||
return processor
|
||||
return None
|
||||
|
||||
def get_supported_types(self) -> List[Dict[str, Any]]:
|
||||
"""获取支持的文件类型信息
|
||||
|
||||
Returns:
|
||||
处理器类型信息列表
|
||||
"""
|
||||
return [
|
||||
{
|
||||
'name': processor.name,
|
||||
'description': processor.description,
|
||||
'extensions': processor.get_supported_extensions(),
|
||||
'class_name': processor.__class__.__name__
|
||||
}
|
||||
for processor in self.processors
|
||||
]
|
||||
|
||||
def get_processor_info(self) -> List[Dict[str, Any]]:
|
||||
"""获取处理器详细信息
|
||||
|
||||
Returns:
|
||||
处理器详细信息列表
|
||||
"""
|
||||
return [
|
||||
{
|
||||
'name': processor.name,
|
||||
'description': processor.description,
|
||||
'extensions': processor.get_supported_extensions(),
|
||||
'required_columns': processor.get_required_columns(),
|
||||
'class_name': processor.__class__.__name__,
|
||||
'module': processor.__class__.__module__
|
||||
}
|
||||
for processor in self.processors
|
||||
]
|
||||
|
||||
def can_process_file(self, file_path: Path) -> bool:
|
||||
"""检查是否有处理器能处理该文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
是否有处理器能处理
|
||||
"""
|
||||
if not file_path.exists():
|
||||
return False
|
||||
|
||||
return any(processor.can_process(file_path) for processor in self.processors)
|
||||
|
||||
def get_suitable_processors(self, file_path: Path) -> List[BaseProcessor]:
|
||||
"""获取能处理该文件的所有处理器
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
合适的处理器列表
|
||||
"""
|
||||
if not file_path.exists():
|
||||
return []
|
||||
|
||||
return [p for p in self.processors if p.can_process(file_path)]
|
||||
|
||||
def reload_processors(self):
|
||||
"""重新加载处理器"""
|
||||
logger.info("重新加载处理器...")
|
||||
self.processors.clear()
|
||||
self._load_processors()
|
||||
logger.info(f"重新加载完成,共{len(self.processors)}个处理器")
|
||||
|
||||
def add_processor(self, processor: BaseProcessor):
|
||||
"""添加处理器
|
||||
|
||||
Args:
|
||||
processor: 处理器实例
|
||||
"""
|
||||
self.processors.append(processor)
|
||||
logger.info(f"添加处理器: {processor.name}")
|
||||
|
||||
def remove_processor(self, processor_name: str) -> bool:
|
||||
"""移除处理器
|
||||
|
||||
Args:
|
||||
processor_name: 处理器名称
|
||||
|
||||
Returns:
|
||||
是否成功移除
|
||||
"""
|
||||
for i, processor in enumerate(self.processors):
|
||||
if processor.name == processor_name or processor.__class__.__name__ == processor_name:
|
||||
del self.processors[i]
|
||||
logger.info(f"移除处理器: {processor_name}")
|
||||
return True
|
||||
logger.warning(f"未找到要移除的处理器: {processor_name}")
|
||||
return False
|
||||
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import pandas as pd
|
||||
from typing import Optional, Callable
|
||||
|
||||
from ..core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class SpecialSuppliersService:
|
||||
"""
|
||||
处理特殊供应商逻辑的服务类,如蓉城易购等
|
||||
"""
|
||||
|
||||
def __init__(self, config_manager=None):
|
||||
self.config_manager = config_manager
|
||||
|
||||
def process_yang_biyue_only(self, src_path: str) -> Optional[str]:
|
||||
"""
|
||||
仅执行杨碧月订单的预处理,返回预处理后的文件路径
|
||||
"""
|
||||
try:
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
# 读取原始数据
|
||||
df = smart_read_excel(src_path)
|
||||
|
||||
# 检查是否包含“杨碧月”
|
||||
handler_col = None
|
||||
for col in df.columns:
|
||||
if '经手人' in str(col):
|
||||
handler_col = col
|
||||
break
|
||||
|
||||
if handler_col is None or not df[handler_col].astype(str).str.contains('杨碧月').any():
|
||||
return None
|
||||
|
||||
# 识别到杨碧月订单,执行专用清洗
|
||||
logger.info("识别到杨碧月订单,正在执行专用清洗...")
|
||||
|
||||
# 定义列映射关系 (映射到 ExcelProcessor 期望的中文列名)
|
||||
# 使用精确匹配优先,防止“结算单位”匹配到“单位”
|
||||
column_map = {
|
||||
'商品条码': '商品条码',
|
||||
'商品名称': '商品名称',
|
||||
'商品规格': '规格',
|
||||
'单位': '单位',
|
||||
'数量': '数量',
|
||||
'含税单价': '单价',
|
||||
'含税金额': '金额'
|
||||
}
|
||||
|
||||
found_cols = {}
|
||||
# 1. 第一遍:尝试精确匹配
|
||||
for target_zh, std_name in column_map.items():
|
||||
for col in df.columns:
|
||||
if str(col).strip() == target_zh:
|
||||
found_cols[col] = std_name
|
||||
break
|
||||
|
||||
# 2. 第二遍:对未匹配成功的列尝试模糊匹配(但要排除特定干扰词)
|
||||
for target_zh, std_name in column_map.items():
|
||||
if std_name in found_cols.values():
|
||||
continue
|
||||
for col in df.columns:
|
||||
col_str = str(col)
|
||||
if target_zh in col_str:
|
||||
# 排除干扰列
|
||||
if target_zh == '单位' and '结算单位' in col_str:
|
||||
continue
|
||||
if target_zh == '数量' and '基本单位数量' in col_str:
|
||||
continue
|
||||
found_cols[col] = std_name
|
||||
break
|
||||
|
||||
if len(found_cols) < 4:
|
||||
logger.error(f"杨碧月订单列匹配不足: 找到 {list(found_cols.values())}")
|
||||
return None
|
||||
|
||||
df_clean = df[list(found_cols.keys())].copy()
|
||||
df_clean = df_clean.rename(columns=found_cols)
|
||||
|
||||
# 过滤掉空的条码行
|
||||
df_clean = df_clean.dropna(subset=['商品条码'])
|
||||
|
||||
# 保存预处理文件
|
||||
out_dir = os.path.dirname(src_path)
|
||||
base = os.path.basename(src_path)
|
||||
final_path = os.path.join(out_dir, f"预处理之后_{base}")
|
||||
df_clean.to_excel(final_path, index=False)
|
||||
|
||||
return final_path
|
||||
except Exception as e:
|
||||
logger.error(f"预处理杨碧月订单出错: {e}")
|
||||
return None
|
||||
|
||||
def process_yang_biyue(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
处理杨碧月经手的订单(预处理+处理)
|
||||
"""
|
||||
try:
|
||||
if progress_cb: progress_cb(10, "正在进行杨碧月订单预处理...")
|
||||
preprocessed_path = self.process_yang_biyue_only(src_path)
|
||||
|
||||
if not preprocessed_path:
|
||||
return None
|
||||
|
||||
if progress_cb: progress_cb(60, "预处理文件已保存,开始标准转换流程...")
|
||||
|
||||
# 延迟导入以避免循环依赖
|
||||
from app.services.order_service import OrderService
|
||||
order_service = OrderService(self.config_manager)
|
||||
result = order_service.process_excel(preprocessed_path, progress_cb=lambda p: progress_cb(60 + int(p*0.4), "生成采购单中...") if progress_cb else None)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理杨碧月订单出错: {e}")
|
||||
return None
|
||||
|
||||
def preprocess_rongcheng_yigou(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
蓉城易购订单预处理:按用户提供的 E, N, Q, S 列索引进行强制清洗
|
||||
"""
|
||||
try:
|
||||
if progress_cb: progress_cb(10, "正在处理蓉城易购预处理...")
|
||||
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
# 蓉城易购格式:Row 0是单号,Row 1是联系人,Row 2是表头,Row 3开始是数据
|
||||
df_raw = smart_read_excel(src_path, header=None)
|
||||
|
||||
# 检查数据行数
|
||||
if len(df_raw) <= 3:
|
||||
logger.error("蓉城易购文件数据行数不足")
|
||||
return None
|
||||
|
||||
# 提取数据部分 (Row 3开始)
|
||||
df_data = df_raw.iloc[3:].reset_index(drop=True)
|
||||
|
||||
# 用户指定列映射:
|
||||
# E列 (Index 4) -> 商品条码
|
||||
# N列 (Index 13) -> 数量
|
||||
# Q列 (Index 16) -> 单价
|
||||
# S列 (Index 18) -> 金额
|
||||
# C列 (Index 2) -> 商品名称 (通用需求)
|
||||
|
||||
idx_map = {
|
||||
2: '商品名称',
|
||||
4: '商品条码',
|
||||
13: '数量',
|
||||
16: '单价',
|
||||
18: '金额'
|
||||
}
|
||||
|
||||
# 确保列索引不越界
|
||||
available_indices = [i for i in idx_map.keys() if i < df_data.shape[1]]
|
||||
df2 = df_data.iloc[:, available_indices].copy()
|
||||
df2.columns = [idx_map[i] for i in available_indices]
|
||||
|
||||
# 强制转换类型
|
||||
for c in ['数量', '单价', '金额']:
|
||||
if c in df2.columns:
|
||||
df2[c] = pd.to_numeric(df2[c], errors='coerce').fillna(0)
|
||||
|
||||
# 过滤掉空的条码行
|
||||
df2 = df2.dropna(subset=['商品条码'])
|
||||
df2['商品条码'] = df2['商品条码'].astype(str).str.strip()
|
||||
df2 = df2[df2['商品条码'] != '']
|
||||
|
||||
# 核心逻辑:分裂多条码行并均分数量
|
||||
if '商品条码' in df2.columns and '数量' in df2.columns:
|
||||
rows = []
|
||||
for _, row in df2.iterrows():
|
||||
bc_val = str(row.get('商品条码', '')).strip()
|
||||
# 识别分隔符:/ , , 、
|
||||
if any(sep in bc_val for sep in ['/', ',', ',', '、']):
|
||||
parts = re.split(r'[/,,、]+', bc_val)
|
||||
parts = [p.strip() for p in parts if p.strip()]
|
||||
|
||||
if len(parts) >= 2:
|
||||
q_total = float(row.get('数量', 0) or 0)
|
||||
if q_total > 0:
|
||||
n = len(parts)
|
||||
base_qty = int(q_total // n)
|
||||
remainder = int(q_total % n)
|
||||
|
||||
for i, p_bc in enumerate(parts):
|
||||
new_row = row.copy()
|
||||
new_row['商品条码'] = p_bc
|
||||
current_qty = base_qty + (1 if i < remainder else 0)
|
||||
new_row['数量'] = current_qty
|
||||
if '单价' in new_row:
|
||||
try:
|
||||
up = float(new_row['单价'] or 0)
|
||||
new_row['金额'] = up * current_qty
|
||||
except Exception:
|
||||
pass
|
||||
rows.append(new_row)
|
||||
continue
|
||||
rows.append(row)
|
||||
df2 = pd.DataFrame(rows)
|
||||
|
||||
# 保存预处理文件
|
||||
out_dir = os.path.dirname(src_path)
|
||||
base = os.path.basename(src_path)
|
||||
final_path = os.path.join(out_dir, f"预处理之后_{base}")
|
||||
df2.to_excel(final_path, index=False)
|
||||
|
||||
if progress_cb: progress_cb(100, "蓉城易购预处理完成")
|
||||
return final_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"预处理蓉城易购订单出错: {e}")
|
||||
return None
|
||||
|
||||
def process_rongcheng_yigou(self, src_path: str, progress_cb: Optional[Callable[[int, str], None]] = None) -> Optional[str]:
|
||||
"""
|
||||
兼容性方法:处理蓉城易购订单并执行后续转换
|
||||
"""
|
||||
cleaned_path = self.preprocess_rongcheng_yigou(src_path, progress_cb)
|
||||
if cleaned_path:
|
||||
from app.services.order_service import OrderService
|
||||
order_service = OrderService(self.config_manager)
|
||||
return order_service.process_excel(cleaned_path, progress_cb=lambda p: progress_cb(60 + int(p*0.4), "生成采购单中...") if progress_cb else None)
|
||||
return None
|
||||
@@ -0,0 +1,336 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
烟草公司订单处理服务
|
||||
----------------
|
||||
处理烟草公司特定格式的订单明细文件,生成银豹采购单
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import xlrd
|
||||
import xlwt
|
||||
import re
|
||||
from xlutils.copy import copy
|
||||
from openpyxl import load_workbook
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
from app.core.utils.log_utils import get_logger
|
||||
from app.core.utils.string_utils import parse_monetary_string
|
||||
from app.core.utils.dialog_utils import show_custom_dialog # 导入自定义弹窗工具
|
||||
from ..config.settings import ConfigManager
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class TobaccoService:
|
||||
"""烟草公司订单处理服务"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""
|
||||
初始化服务
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
"""
|
||||
self.config = config
|
||||
# 修复配置获取方式,使用fallback机制
|
||||
self.output_dir = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
self.template_file = config.get('Paths', 'template_file', fallback='templates/银豹-采购单模板.xls')
|
||||
# 将烟草订单保存到result目录
|
||||
result_dir = "data/result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
self.output_file = os.path.join(result_dir, '银豹采购单_烟草公司.xls')
|
||||
|
||||
def get_latest_tobacco_order(self) -> Optional[str]:
|
||||
"""
|
||||
获取最新的烟草订单明细文件
|
||||
|
||||
Returns:
|
||||
文件路径或None
|
||||
"""
|
||||
# 获取今日开始时间戳
|
||||
today = datetime.date.today()
|
||||
today_start = datetime.datetime.combine(today, datetime.time.min).timestamp()
|
||||
|
||||
# 查找订单明细文件
|
||||
file_pattern = os.path.join(self.output_dir, "订单明细*.xlsx")
|
||||
candidates = glob.glob(file_pattern)
|
||||
|
||||
if not candidates:
|
||||
logger.warning("未找到烟草公司订单明细文件")
|
||||
return None
|
||||
|
||||
# 按创建时间排序
|
||||
candidates.sort(key=os.path.getctime, reverse=True)
|
||||
latest_file = candidates[0]
|
||||
|
||||
# 检查是否是今天的文件
|
||||
if os.path.getctime(latest_file) >= today_start:
|
||||
logger.info(f"找到最新烟草订单明细文件: {latest_file}")
|
||||
return latest_file
|
||||
else:
|
||||
logger.warning(f"找到的烟草订单明细文件不是今天创建的: {latest_file}")
|
||||
return latest_file # 仍然返回最新文件,但给出警告
|
||||
|
||||
def preprocess_tobacco_order(self, file_path: str) -> Optional[str]:
|
||||
"""
|
||||
烟草订单预处理:按用户提供的 B, E, G, H 列索引进行强制清洗
|
||||
"""
|
||||
try:
|
||||
logger.info(f"执行烟草订单专用预处理: {file_path}")
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
|
||||
# 烟草格式:Row 0是专卖证号,Row 1是表头,Row 2是合计,Row 3开始是数据
|
||||
df_raw = smart_read_excel(file_path, header=None)
|
||||
|
||||
if len(df_raw) <= 3:
|
||||
logger.error("烟草订单文件数据行数不足")
|
||||
return None
|
||||
|
||||
# 提取数据部分 (Row 3开始)
|
||||
df_data = df_raw.iloc[3:].reset_index(drop=True)
|
||||
|
||||
# 用户指定列映射:
|
||||
# A列 (Index 0) -> 商品名称
|
||||
# B列 (Index 1) -> 商品条码 (盒码)
|
||||
# E列 (Index 4) -> 批发价 (单价)
|
||||
# G列 (Index 6) -> 订单量 (数量)
|
||||
# H列 (Index 7) -> 金额
|
||||
|
||||
idx_map = {
|
||||
0: '商品名称',
|
||||
1: '商品条码',
|
||||
4: '批发价',
|
||||
6: '数量',
|
||||
7: '金额'
|
||||
}
|
||||
|
||||
available_indices = [i for i in idx_map.keys() if i < df_data.shape[1]]
|
||||
df = df_data.iloc[:, available_indices].copy()
|
||||
df.columns = [idx_map[i] for i in available_indices]
|
||||
|
||||
# 1. 过滤订单量不为0的数据
|
||||
df['数量'] = pd.to_numeric(df['数量'], errors='coerce').fillna(0)
|
||||
df = df[df['数量'] != 0].copy()
|
||||
|
||||
if df.empty:
|
||||
logger.warning("烟草订单无有效订单量记录")
|
||||
return None
|
||||
|
||||
# 2. 核心清洗逻辑:
|
||||
# 数量 = 订单量 * 10 (G列)
|
||||
# 单价 = 批发价 / 10 (E列)
|
||||
df['单价'] = pd.to_numeric(df['批发价'], errors='coerce').fillna(0) / 10
|
||||
df['数量'] = df['数量'] * 10
|
||||
|
||||
# 3. 校验金额 (H列)
|
||||
df['金额'] = pd.to_numeric(df['金额'], errors='coerce').fillna(0)
|
||||
|
||||
# 4. 只保留需要的列
|
||||
final_cols = ['商品条码', '商品名称', '数量', '单价', '金额']
|
||||
df_final = df[final_cols].copy()
|
||||
|
||||
# 保存预处理文件
|
||||
out_dir = os.path.dirname(file_path)
|
||||
base = os.path.basename(file_path)
|
||||
final_path = os.path.join(out_dir, f"预处理之后_{base}")
|
||||
df_final.to_excel(final_path, index=False)
|
||||
|
||||
logger.info(f"烟草订单预处理完成: {final_path}")
|
||||
return final_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"烟草订单预处理失败: {e}")
|
||||
return None
|
||||
|
||||
def process_tobacco_order(self, input_file=None):
|
||||
"""
|
||||
处理烟草订单
|
||||
|
||||
Args:
|
||||
input_file: 输入文件路径,如果为None则自动查找最新文件
|
||||
|
||||
Returns:
|
||||
输出文件路径或None(如果处理失败)
|
||||
"""
|
||||
try:
|
||||
# 如果没有指定输入文件,查找最新的文件
|
||||
if input_file is None:
|
||||
input_file = self.get_latest_tobacco_order()
|
||||
|
||||
if input_file is None:
|
||||
logger.warning("未找到烟草公司订单明细文件")
|
||||
logger.error("未找到可处理的烟草订单明细文件")
|
||||
return None
|
||||
|
||||
logger.info(f"开始处理烟草公司订单: {input_file}")
|
||||
|
||||
# 读取订单时间和总金额
|
||||
order_info = self._read_order_info(input_file)
|
||||
if not order_info:
|
||||
logger.error(f"读取订单信息失败: {input_file}")
|
||||
return None
|
||||
|
||||
order_time, total_amount = order_info
|
||||
|
||||
# 读取订单数据
|
||||
order_data = self._read_order_data(input_file)
|
||||
if order_data is None or order_data.empty:
|
||||
logger.error(f"读取订单数据失败: {input_file}")
|
||||
return None
|
||||
|
||||
# 生成银豹采购单
|
||||
output_file = self._generate_pospal_order(order_data, order_time)
|
||||
if not output_file:
|
||||
logger.error("生成银豹采购单失败")
|
||||
return None
|
||||
|
||||
# 获取处理条目数
|
||||
total_count = len(order_data)
|
||||
|
||||
# 输出处理结果
|
||||
logger.info(f"烟草公司订单处理成功,订单时间: {order_time}, 总金额: {total_amount}, 处理条目: {total_count}")
|
||||
logger.info(f"采购单已生成: {output_file}")
|
||||
|
||||
# 显示处理结果对话框
|
||||
self.show_result_dialog(output_file, order_time, total_count, total_amount)
|
||||
|
||||
return output_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理烟草公司订单时发生错误: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def _read_order_info(self, file_path: str) -> Optional[Tuple[str, float]]:
|
||||
"""
|
||||
读取订单信息(时间和总金额)
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
包含订单时间和总金额的元组或None
|
||||
"""
|
||||
try:
|
||||
wb_info = load_workbook(file_path, data_only=True)
|
||||
ws_info = wb_info.active
|
||||
order_time = ws_info["H1"].value or "(空)"
|
||||
total_amount = ws_info["H3"].value or 0
|
||||
|
||||
return (order_time, total_amount)
|
||||
except Exception as e:
|
||||
logger.error(f"读取订单信息出错: {e}")
|
||||
return None
|
||||
|
||||
def _read_order_data(self, file_path: str) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
读取订单数据
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
订单数据DataFrame或None
|
||||
"""
|
||||
columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
|
||||
|
||||
try:
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
# 读取Excel文件
|
||||
df_old = smart_read_excel(file_path, header=None, skiprows=3, names=columns)
|
||||
|
||||
# 过滤订单量不为0的数据,并计算采购量和单价
|
||||
df_filtered = df_old[df_old['订单量'] != 0].copy()
|
||||
df_filtered['采购量'] = df_filtered['订单量'] * 10
|
||||
df_filtered['采购单价'] = df_filtered['金额'] / df_filtered['采购量']
|
||||
df_filtered = df_filtered.reset_index(drop=True)
|
||||
|
||||
return df_filtered
|
||||
except Exception as e:
|
||||
logger.error(f"读取订单数据失败: {e}")
|
||||
return None
|
||||
|
||||
def _generate_pospal_order(self, order_data: pd.DataFrame, order_time: str) -> Optional[str]:
|
||||
"""
|
||||
生成银豹采购单
|
||||
|
||||
Args:
|
||||
order_data: 订单数据
|
||||
order_time: 订单时间
|
||||
|
||||
Returns:
|
||||
输出文件路径或None
|
||||
"""
|
||||
try:
|
||||
# 检查模板文件是否存在
|
||||
if not os.path.exists(self.template_file):
|
||||
logger.error(f"采购单模板文件不存在: {self.template_file}")
|
||||
return None
|
||||
|
||||
# 打开模板,准备写入
|
||||
template_rd = xlrd.open_workbook(self.template_file, formatting_info=True)
|
||||
template_wb = copy(template_rd)
|
||||
template_ws = template_wb.get_sheet(0)
|
||||
|
||||
# 获取模板中的表头列索引
|
||||
header_row = template_rd.sheet_by_index(0).row_values(0)
|
||||
barcode_col = header_row.index("条码(必填)")
|
||||
amount_col = header_row.index("采购量(必填)")
|
||||
gift_col = header_row.index("赠送量")
|
||||
price_col = header_row.index("采购单价(必填)")
|
||||
|
||||
# 写入数据到模板
|
||||
for i, row in order_data.iterrows():
|
||||
template_ws.write(i + 1, barcode_col, row['盒码']) # 商品条码
|
||||
template_ws.write(i + 1, amount_col, int(row['采购量'])) # 采购量
|
||||
template_ws.write(i + 1, gift_col, "") # 赠送量为空
|
||||
template_ws.write(i + 1, price_col, round(row['采购单价'], 2)) # 采购单价保留两位小数
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(os.path.dirname(self.output_file), exist_ok=True)
|
||||
|
||||
# 保存输出文件
|
||||
template_wb.save(self.output_file)
|
||||
logger.info(f"采购单生成成功: {self.output_file}")
|
||||
|
||||
return self.output_file
|
||||
except Exception as e:
|
||||
logger.error(f"生成银豹采购单失败: {e}")
|
||||
return None
|
||||
|
||||
def show_result_dialog(self, output_file, order_time, total_count, total_amount):
|
||||
"""
|
||||
显示处理结果对话框
|
||||
|
||||
Args:
|
||||
output_file: 输出文件路径
|
||||
order_time: 订单时间
|
||||
total_count: 总处理条目
|
||||
total_amount: 总金额
|
||||
"""
|
||||
# 创建附加信息
|
||||
additional_info = {
|
||||
"订单来源": "烟草公司",
|
||||
"处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
# 确保 total_amount 是数字类型
|
||||
parsed = parse_monetary_string(total_amount)
|
||||
total_amount = parsed if parsed is not None else 0.0
|
||||
amount_display = f"¥{total_amount:.2f}"
|
||||
|
||||
# 显示自定义对话框
|
||||
show_custom_dialog(
|
||||
title="烟草订单处理结果",
|
||||
message="烟草订单处理完成",
|
||||
result_file=output_file,
|
||||
time_info=order_time,
|
||||
count_info=f"{total_count}个商品",
|
||||
amount_info=amount_display,
|
||||
additional_info=additional_info
|
||||
)
|
||||
|
||||
# 记录日志
|
||||
logger.info(f"烟草公司订单处理成功,订单时间: {order_time}, 总金额: {total_amount}, 处理条目: {total_count}")
|
||||
@@ -0,0 +1,2 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""益选-OCR订单处理系统 UI 模块"""
|
||||
@@ -0,0 +1,565 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""业务操作处理模块"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox
|
||||
from threading import Thread
|
||||
|
||||
from app.config.settings import ConfigManager
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.order_service import OrderService
|
||||
from app.core.utils.log_utils import get_logger
|
||||
|
||||
from .logging_ui import add_to_log, init_gui_logger, dispose_gui_logger, GUILogHandler
|
||||
from .ui_widgets import ProgressReporter
|
||||
from .error_utils import show_error_dialog, get_error_suggestion
|
||||
|
||||
logger = get_logger(__name__)
|
||||
from .result_previews import show_ocr_result_preview, show_excel_result_preview, show_merge_result_preview
|
||||
from .user_settings import add_recent_file
|
||||
from .command_runner import get_running_task, set_running_task
|
||||
from .file_operations import select_file, select_excel_file, validate_unit_price_against_item_data
|
||||
|
||||
|
||||
def _ask_and_merge_purchase_orders(order_service, log_widget, add_to_recent=False):
|
||||
"""弹窗询问是否合并采购单,返回合并结果路径或 None。
|
||||
|
||||
用于 run_pipeline_directly 和 batch_process_orders_with_status 的共享逻辑。
|
||||
"""
|
||||
try:
|
||||
purchase_orders = order_service.get_purchase_orders()
|
||||
|
||||
if len(purchase_orders) == 0:
|
||||
add_to_log(log_widget, "没有找到采购单文件,跳过合并步骤\n", "info")
|
||||
elif len(purchase_orders) == 1:
|
||||
add_to_log(log_widget, f"只有1个采购单文件,无需合并: {os.path.basename(purchase_orders[0])}\n", "info")
|
||||
else:
|
||||
add_to_log(log_widget, f"找到{len(purchase_orders)}个采购单文件\n", "info")
|
||||
|
||||
file_list = "\n".join([f"• {os.path.basename(f)}" for f in purchase_orders])
|
||||
merge_choice = messagebox.askyesnocancel(
|
||||
"采购单合并选择",
|
||||
f"发现{len(purchase_orders)}个采购单文件:\n\n{file_list}\n\n是否需要合并这些采购单?\n\n• 选择'是':合并所有采购单\n• 选择'否':保持文件分离\n• 选择'取消':跳过此步骤",
|
||||
icon='question'
|
||||
)
|
||||
|
||||
if merge_choice is True:
|
||||
add_to_log(log_widget, "开始合并采购单...\n", "info")
|
||||
merge_result = order_service.merge_all_purchase_orders()
|
||||
if merge_result:
|
||||
add_to_log(log_widget, "采购单合并完成\n", "success")
|
||||
if add_to_recent:
|
||||
try:
|
||||
add_recent_file(merge_result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
return merge_result
|
||||
else:
|
||||
add_to_log(log_widget, "合并失败\n", "warning")
|
||||
elif merge_choice is False:
|
||||
add_to_log(log_widget, "用户选择不合并采购单,保持文件分离\n", "info")
|
||||
else:
|
||||
add_to_log(log_widget, "用户取消合并操作\n", "info")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"合并过程出现问题: {str(e)}\n", "warning")
|
||||
return None
|
||||
|
||||
|
||||
def process_single_image_with_status(log_widget, status_bar):
|
||||
status_bar.set_status("选择图片中...")
|
||||
file_path = select_file(log_widget, [("图片文件", "*.jpg *.jpeg *.png *.bmp"), ("所有文件", "*.*")], "选择图片")
|
||||
if not file_path:
|
||||
status_bar.set_status("操作已取消")
|
||||
add_to_log(log_widget, "未选择文件,操作已取消\n", "warning")
|
||||
return
|
||||
|
||||
def run_in_thread():
|
||||
try:
|
||||
status_bar.set_running(True)
|
||||
status_bar.set_status("开始处理图片...")
|
||||
|
||||
gui_handler = GUILogHandler(log_widget)
|
||||
gui_handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
gui_handler.setFormatter(formatter)
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers[:]:
|
||||
if isinstance(handler, logging.StreamHandler):
|
||||
root_logger.removeHandler(handler)
|
||||
root_logger.addHandler(gui_handler)
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
ocr_service = OCRService()
|
||||
add_to_log(log_widget, f"开始处理图片: {file_path}\n", "info")
|
||||
try:
|
||||
add_recent_file(file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
excel_path = ocr_service.process_image(file_path)
|
||||
|
||||
if excel_path:
|
||||
add_to_log(log_widget, "图片OCR处理完成\n", "success")
|
||||
preview_output = f"采购单已保存到: {excel_path}\n"
|
||||
show_excel_result_preview(preview_output)
|
||||
try:
|
||||
add_recent_file(excel_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
else:
|
||||
add_to_log(log_widget, "图片OCR处理失败\n", "error")
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"处理单个图片时出错: {str(e)}\n", "error")
|
||||
sugg = get_error_suggestion(str(e))
|
||||
if sugg:
|
||||
show_error_dialog("OCR处理错误", str(e), sugg)
|
||||
finally:
|
||||
try:
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers[:]:
|
||||
if isinstance(handler, GUILogHandler):
|
||||
root_logger.removeHandler(handler)
|
||||
handler.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"清理日志处理器失败: {e}")
|
||||
status_bar.set_running(False)
|
||||
status_bar.set_status("就绪")
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def run_pipeline_directly(log_widget, status_bar):
|
||||
"""直接运行完整处理流程"""
|
||||
if get_running_task() is not None:
|
||||
messagebox.showinfo("任务进行中", "请等待当前任务完成后再执行新的操作。")
|
||||
return
|
||||
|
||||
def run_in_thread():
|
||||
set_running_task("pipeline")
|
||||
|
||||
if status_bar:
|
||||
status_bar.set_running(True)
|
||||
status_bar.set_status("开始完整处理流程...")
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
start_perf = time.perf_counter()
|
||||
log_widget.configure(state=tk.NORMAL)
|
||||
log_widget.delete(1.0, tk.END)
|
||||
log_widget.insert(tk.END, "执行命令: 完整处理流程\n", "command")
|
||||
log_widget.insert(tk.END, f"开始时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n", "time")
|
||||
log_widget.insert(tk.END, "=" * 50 + "\n\n", "separator")
|
||||
log_widget.configure(state=tk.DISABLED)
|
||||
|
||||
try:
|
||||
config = ConfigManager()
|
||||
|
||||
gui_handler = init_gui_logger(log_widget)
|
||||
|
||||
ocr_service = OCRService(config)
|
||||
order_service = OrderService(config)
|
||||
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
reporter.set("开始OCR批量处理...", 10)
|
||||
|
||||
total, success = ocr_service.batch_process(progress_cb=lambda p: reporter.set("OCR处理中...", p))
|
||||
if total == 0:
|
||||
add_to_log(log_widget, "没有找到需要处理的图片\n", "warning")
|
||||
if status_bar:
|
||||
status_bar.set_status("未找到图片文件")
|
||||
return
|
||||
elif success == 0:
|
||||
add_to_log(log_widget, "OCR处理没有成功处理任何新文件\n", "warning")
|
||||
else:
|
||||
add_to_log(log_widget, f"OCR处理完成,共处理 {success}/{total} 个文件\n", "success")
|
||||
try:
|
||||
processed_map = {}
|
||||
config = ConfigManager()
|
||||
pjson = config.get('Paths', 'processed_record', fallback='data/processed_files.json')
|
||||
if os.path.exists(pjson):
|
||||
with open(pjson, 'r', encoding='utf-8') as f:
|
||||
processed_map = json.load(f)
|
||||
outputs = list(processed_map.values())
|
||||
for p in outputs[-10:]:
|
||||
if p:
|
||||
add_recent_file(os.path.abspath(p))
|
||||
except Exception as e:
|
||||
logger.debug(f"加载已处理文件记录失败: {e}")
|
||||
reporter.set("开始Excel处理...", 92)
|
||||
|
||||
add_to_log(log_widget, "开始Excel处理...\n", "info")
|
||||
result = order_service.process_excel()
|
||||
|
||||
if not result:
|
||||
add_to_log(log_widget, "Excel处理失败\n", "error")
|
||||
else:
|
||||
add_to_log(log_widget, "Excel处理完成\n", "success")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
|
||||
reporter.set("检查是否需要合并采购单...", 80)
|
||||
_ask_and_merge_purchase_orders(order_service, log_widget, add_to_recent=True)
|
||||
|
||||
end_time = datetime.datetime.now()
|
||||
duration_sec = max(0.0, time.perf_counter() - start_perf)
|
||||
|
||||
add_to_log(log_widget, f"\n{'=' * 50}\n", "separator")
|
||||
add_to_log(log_widget, "完整处理流程执行完毕!\n", "success")
|
||||
add_to_log(log_widget, f"结束时间: {end_time.strftime('%Y-%m-%d %H:%M:%S')}\n", "time")
|
||||
add_to_log(log_widget, f"耗时: {duration_sec:.2f} 秒\n", "time")
|
||||
reporter.set("处理完成", 100)
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"执行过程中发生错误: {str(e)}\n", "error")
|
||||
import traceback
|
||||
add_to_log(log_widget, f"详细错误信息: {traceback.format_exc()}\n", "error")
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
|
||||
set_running_task(None)
|
||||
if status_bar:
|
||||
status_bar.set_running(False)
|
||||
status_bar.set_status("就绪")
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def batch_ocr_with_status(log_widget, status_bar):
|
||||
"""OCR批量识别"""
|
||||
def run_in_thread():
|
||||
try:
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
reporter.set("正在进行OCR批量识别...", 10)
|
||||
add_to_log(log_widget, "开始OCR批量识别\n", "info")
|
||||
|
||||
init_gui_logger(log_widget)
|
||||
|
||||
ocr_service = OCRService()
|
||||
|
||||
result = ocr_service.batch_process()
|
||||
|
||||
if result:
|
||||
add_to_log(log_widget, "OCR批量识别完成\n", "success")
|
||||
show_ocr_result_preview("OCR批量识别成功完成")
|
||||
reporter.set("批量识别完成", 100)
|
||||
try:
|
||||
processed_map = {}
|
||||
config = ConfigManager()
|
||||
pjson = config.get('Paths', 'processed_record', fallback='data/processed_files.json')
|
||||
if os.path.exists(pjson):
|
||||
with open(pjson, 'r', encoding='utf-8') as f:
|
||||
processed_map = json.load(f)
|
||||
outputs = list(processed_map.values())
|
||||
for p in outputs[-10:]:
|
||||
if p:
|
||||
add_recent_file(p)
|
||||
inputs = list(processed_map.keys())
|
||||
for p in inputs[-10:]:
|
||||
if p:
|
||||
add_recent_file(p)
|
||||
except Exception as e:
|
||||
logger.debug(f"加载已处理文件记录失败: {e}")
|
||||
else:
|
||||
add_to_log(log_widget, "OCR批量识别失败\n", "error")
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"OCR批量识别出错: {str(e)}\n", "error")
|
||||
sugg = get_error_suggestion(str(e))
|
||||
if sugg:
|
||||
show_error_dialog("OCR处理错误", str(e), sugg)
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def batch_process_orders_with_status(log_widget, status_bar):
|
||||
"""批量处理订单(仅Excel处理,包含合并确认)"""
|
||||
def run_in_thread():
|
||||
try:
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
reporter.set("正在批量处理订单...", 10)
|
||||
add_to_log(log_widget, "开始批量处理订单\n", "info")
|
||||
|
||||
init_gui_logger(log_widget)
|
||||
|
||||
order_service = OrderService()
|
||||
|
||||
add_to_log(log_widget, "开始Excel处理...\n", "info")
|
||||
try:
|
||||
latest_input = order_service.get_latest_excel()
|
||||
if latest_input:
|
||||
add_recent_file(latest_input)
|
||||
except Exception as e:
|
||||
logger.debug(f"获取最新Excel失败: {e}")
|
||||
result = order_service.process_excel(progress_cb=lambda p: reporter.set("Excel处理中...", p))
|
||||
|
||||
if result:
|
||||
add_to_log(log_widget, "Excel处理完成\n", "success")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
|
||||
reporter.set("检查是否需要合并采购单...", 70)
|
||||
add_to_log(log_widget, "检查是否需要合并采购单...\n", "info")
|
||||
_ask_and_merge_purchase_orders(order_service, log_widget)
|
||||
|
||||
add_to_log(log_widget, "批量处理订单完成\n", "success")
|
||||
reporter.set("批量处理订单完成", 100)
|
||||
show_excel_result_preview(f"采购单已保存到: {result}\n")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
else:
|
||||
add_to_log(log_widget, "批量处理订单失败\n", "error")
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"批量处理订单时出错: {str(e)}\n", "error")
|
||||
sugg = get_error_suggestion(str(e))
|
||||
if sugg:
|
||||
show_error_dialog("Excel处理错误", str(e), sugg)
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def merge_orders_with_status(log_widget, status_bar):
|
||||
"""合并采购单"""
|
||||
def run_in_thread():
|
||||
try:
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
reporter.set("正在合并采购单...", 10)
|
||||
add_to_log(log_widget, "开始合并采购单\n", "info")
|
||||
|
||||
init_gui_logger(log_widget)
|
||||
|
||||
order_service = OrderService()
|
||||
|
||||
result = order_service.merge_all_purchase_orders(progress_cb=lambda p: reporter.set("合并处理中...", p))
|
||||
|
||||
if result:
|
||||
add_to_log(log_widget, "采购单合并完成\n", "success")
|
||||
show_merge_result_preview(f"已保存到: {result}\n")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
else:
|
||||
add_to_log(log_widget, "采购单合并失败\n", "error")
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"采购单合并出错: {str(e)}\n", "error")
|
||||
sugg = get_error_suggestion(str(e))
|
||||
if sugg:
|
||||
show_error_dialog("合并错误", str(e), sugg)
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def process_excel_file_with_status(log_widget, status_bar):
|
||||
"""处理Excel文件"""
|
||||
def run_in_thread():
|
||||
try:
|
||||
status_bar.set_running(True)
|
||||
status_bar.set_status("选择Excel文件中...")
|
||||
file_path = select_excel_file(log_widget)
|
||||
|
||||
if file_path:
|
||||
status_bar.set_status("开始处理Excel文件...")
|
||||
add_to_log(log_widget, f"开始处理Excel文件: {file_path}\n", "info")
|
||||
else:
|
||||
status_bar.set_status("操作已取消")
|
||||
add_to_log(log_widget, "未选择文件,操作已取消\n", "warning")
|
||||
return
|
||||
|
||||
init_gui_logger(log_widget)
|
||||
|
||||
order_service = OrderService()
|
||||
|
||||
if file_path:
|
||||
try:
|
||||
add_recent_file(file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
result = order_service.process_excel(file_path, progress_cb=lambda p: status_bar.set_status("Excel处理中...", p))
|
||||
else:
|
||||
try:
|
||||
latest_input = order_service.get_latest_excel()
|
||||
if latest_input:
|
||||
add_recent_file(latest_input)
|
||||
except Exception as e:
|
||||
logger.debug(f"获取最新Excel失败: {e}")
|
||||
result = order_service.process_excel(progress_cb=lambda p: status_bar.set_status("Excel处理中...", p))
|
||||
|
||||
if result:
|
||||
add_to_log(log_widget, "Excel文件处理完成\n", "success")
|
||||
show_excel_result_preview(f"采购单已保存到: {result}\n")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
else:
|
||||
add_to_log(log_widget, "Excel文件处理失败\n", "error")
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"Excel文件处理出错: {str(e)}\n", "error")
|
||||
msg = str(e)
|
||||
suggestion = None
|
||||
if 'openpyxl' in msg or 'engine' in msg:
|
||||
suggestion = "安装依赖:pip install openpyxl"
|
||||
elif 'xlrd' in msg:
|
||||
suggestion = "安装依赖:pip install xlrd"
|
||||
if suggestion:
|
||||
show_error_dialog("Excel处理错误", msg, suggestion)
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
|
||||
status_bar.set_running(False)
|
||||
status_bar.set_status("就绪")
|
||||
|
||||
thread = Thread(target=run_in_thread)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
def process_dropped_file(log_widget, status_bar, file_path):
|
||||
try:
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
if ext in ['.jpg', '.jpeg', '.png', '.bmp']:
|
||||
def _run_img():
|
||||
try:
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
init_gui_logger(log_widget)
|
||||
add_to_log(log_widget, f"开始一键处理图片: {file_path}\n", "info")
|
||||
try:
|
||||
add_recent_file(file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
|
||||
# 步骤1: OCR识别
|
||||
reporter.set("OCR识别中...", 10)
|
||||
ocr_service = OCRService()
|
||||
excel_path = ocr_service.process_image(file_path)
|
||||
if not excel_path:
|
||||
add_to_log(log_widget, "图片OCR处理失败\n", "error")
|
||||
return
|
||||
add_to_log(log_widget, f"OCR识别完成: {excel_path}\n", "success")
|
||||
|
||||
# 步骤2: Excel处理
|
||||
reporter.set("Excel处理中...", 40)
|
||||
order_service = OrderService()
|
||||
result = order_service.process_excel(excel_path, progress_cb=lambda p: reporter.set("Excel处理中...", p))
|
||||
if not result:
|
||||
add_to_log(log_widget, "Excel处理失败\n", "error")
|
||||
return
|
||||
add_to_log(log_widget, f"Excel处理完成: {result}\n", "success")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
|
||||
# 步骤3: 合并采购单
|
||||
reporter.set("检查合并采购单...", 80)
|
||||
_ask_and_merge_purchase_orders(order_service, log_widget, add_to_recent=True)
|
||||
|
||||
reporter.set("处理完成", 100)
|
||||
add_to_log(log_widget, "一键处理完成!\n", "success")
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
t = Thread(target=_run_img)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
elif ext in ['.xlsx', '.xls']:
|
||||
def _run_xls():
|
||||
try:
|
||||
reporter = ProgressReporter(status_bar)
|
||||
reporter.running()
|
||||
init_gui_logger(log_widget)
|
||||
order_service = OrderService()
|
||||
add_to_log(log_widget, f"开始一键处理Excel文件: {file_path}\n", "info")
|
||||
try:
|
||||
add_recent_file(file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
|
||||
# 步骤1: Excel处理
|
||||
reporter.set("Excel处理中...", 20)
|
||||
result = order_service.process_excel(file_path, progress_cb=lambda p: reporter.set("Excel处理中...", p))
|
||||
if not result:
|
||||
add_to_log(log_widget, "Excel文件处理失败\n", "error")
|
||||
return
|
||||
add_to_log(log_widget, f"Excel处理完成: {result}\n", "success")
|
||||
try:
|
||||
add_recent_file(result)
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
try:
|
||||
validate_unit_price_against_item_data(result, log_widget)
|
||||
except Exception as e:
|
||||
logger.debug(f"单价校验失败: {e}")
|
||||
|
||||
# 步骤2: 合并采购单
|
||||
reporter.set("检查合并采购单...", 80)
|
||||
_ask_and_merge_purchase_orders(order_service, log_widget, add_to_recent=True)
|
||||
|
||||
reporter.set("处理完成", 100)
|
||||
add_to_log(log_widget, "一键处理完成!\n", "success")
|
||||
finally:
|
||||
dispose_gui_logger()
|
||||
reporter.done()
|
||||
t = Thread(target=_run_xls)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
else:
|
||||
add_to_log(log_widget, f"不支持的文件类型: {file_path}\n", "warning")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"处理拖拽文件失败: {str(e)}\n", "error")
|
||||
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""条码映射编辑模块"""
|
||||
|
||||
from tkinter import messagebox
|
||||
|
||||
from app.core.excel.converter import UnitConverter
|
||||
from app.core.utils.dialog_utils import show_barcode_mapping_dialog
|
||||
|
||||
from .logging_ui import add_to_log
|
||||
|
||||
|
||||
def edit_barcode_mappings(log_widget):
|
||||
"""编辑条码映射配置"""
|
||||
try:
|
||||
add_to_log(log_widget, "正在加载条码映射配置...\n", "info")
|
||||
|
||||
unit_converter = UnitConverter()
|
||||
|
||||
current_mappings = unit_converter.special_barcodes
|
||||
|
||||
def save_mappings(new_mappings):
|
||||
success = unit_converter.update_barcode_mappings(new_mappings)
|
||||
if success:
|
||||
add_to_log(log_widget, f"成功保存条码映射配置,共{len(new_mappings)}项\n", "success")
|
||||
else:
|
||||
add_to_log(log_widget, "保存条码映射配置失败\n", "error")
|
||||
|
||||
show_barcode_mapping_dialog(None, save_mappings, current_mappings)
|
||||
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"编辑条码映射时出错: {str(e)}\n", "error")
|
||||
messagebox.showerror("错误", f"编辑条码映射时出错: {str(e)}")
|
||||
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""命令执行器模块"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import datetime
|
||||
import re
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox
|
||||
from threading import Thread
|
||||
|
||||
from .logging_ui import LogRedirector
|
||||
from .result_previews import show_result_preview
|
||||
|
||||
# 任务状态跟踪
|
||||
_RUNNING_TASK = None
|
||||
|
||||
|
||||
def get_running_task():
|
||||
return _RUNNING_TASK
|
||||
|
||||
|
||||
def set_running_task(val):
|
||||
global _RUNNING_TASK
|
||||
_RUNNING_TASK = val
|
||||
|
||||
|
||||
def run_command_with_logging(command, log_widget, status_bar=None, on_complete=None):
|
||||
"""运行命令并将输出重定向到日志窗口"""
|
||||
if _RUNNING_TASK is not None:
|
||||
messagebox.showinfo("任务进行中", "请等待当前任务完成后再执行新的操作。")
|
||||
return
|
||||
|
||||
def run_in_thread():
|
||||
global _RUNNING_TASK
|
||||
_RUNNING_TASK = command
|
||||
|
||||
if status_bar:
|
||||
status_bar.set_running(True)
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
start_perf = time.perf_counter()
|
||||
log_widget.configure(state=tk.NORMAL)
|
||||
log_widget.delete(1.0, tk.END)
|
||||
log_widget.insert(tk.END, f"执行命令: {' '.join(command)}\n", "command")
|
||||
log_widget.insert(tk.END, f"开始时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n", "time")
|
||||
log_widget.insert(tk.END, "=" * 50 + "\n\n", "separator")
|
||||
log_widget.configure(state=tk.DISABLED)
|
||||
|
||||
old_stdout = sys.stdout
|
||||
old_stderr = sys.stderr
|
||||
|
||||
log_redirector = LogRedirector(log_widget)
|
||||
|
||||
env = os.environ.copy()
|
||||
try:
|
||||
from app.config.settings import ConfigManager
|
||||
cfg = ConfigManager()
|
||||
env["OCR_OUTPUT_DIR"] = cfg.get_path('Paths', 'output_folder', fallback='data/output', create=True)
|
||||
env["OCR_INPUT_DIR"] = cfg.get_path('Paths', 'input_folder', fallback='data/input', create=True)
|
||||
env["OCR_TEMP_DIR"] = cfg.get_path('Paths', 'temp_folder', fallback='data/temp', create=True)
|
||||
except Exception:
|
||||
env["OCR_OUTPUT_DIR"] = os.path.abspath("data/output")
|
||||
env["OCR_INPUT_DIR"] = os.path.abspath("data/input")
|
||||
env["OCR_TEMP_DIR"] = os.path.abspath("data/temp")
|
||||
env["OCR_LOG_LEVEL"] = "DEBUG"
|
||||
|
||||
try:
|
||||
sys.stdout = log_redirector
|
||||
sys.stderr = log_redirector
|
||||
|
||||
print("日志重定向已启动,现在同时输出到终端和GUI")
|
||||
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
output_data = []
|
||||
for line in process.stdout:
|
||||
output_data.append(line)
|
||||
print(line.rstrip())
|
||||
|
||||
if status_bar:
|
||||
progress = extract_progress_from_log(line)
|
||||
if progress is not None:
|
||||
log_widget.after(0, lambda p=progress: status_bar.set_status(f"处理中: {p}%完成", p))
|
||||
|
||||
process.wait()
|
||||
|
||||
end_time = datetime.datetime.now()
|
||||
duration_sec = max(0.0, time.perf_counter() - start_perf)
|
||||
|
||||
print(f"\n{'=' * 50}")
|
||||
print(f"执行完毕!返回码: {process.returncode}")
|
||||
print(f"结束时间: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print(f"耗时: {duration_sec:.2f} 秒")
|
||||
|
||||
output_text = ''.join(output_data)
|
||||
|
||||
is_pipeline = "pipeline" in command
|
||||
no_merge_files = "未找到采购单文件" in output_text
|
||||
single_file = "只有1个采购单文件" in output_text
|
||||
|
||||
if is_pipeline and (no_merge_files or single_file):
|
||||
print("完整流程中没有需要合并的文件,但其他步骤执行成功,视为成功完成")
|
||||
if status_bar:
|
||||
log_widget.after(0, lambda: status_bar.set_status("处理完成", 100))
|
||||
log_widget.after(0, lambda: show_result_preview(command, output_text))
|
||||
else:
|
||||
if on_complete:
|
||||
log_widget.after(0, lambda: on_complete(process.returncode, output_text))
|
||||
elif process.returncode == 0:
|
||||
if status_bar:
|
||||
log_widget.after(0, lambda: status_bar.set_status("处理完成", 100))
|
||||
log_widget.after(0, lambda: show_result_preview(command, output_text))
|
||||
else:
|
||||
if status_bar:
|
||||
log_widget.after(0, lambda: status_bar.set_status(f"处理失败 (返回码: {process.returncode})", 0))
|
||||
log_widget.after(0, lambda: messagebox.showerror("操作失败", f"处理失败,返回码:{process.returncode}"))
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n执行出错: {str(e)}")
|
||||
if status_bar:
|
||||
log_widget.after(0, lambda: status_bar.set_status(f"执行出错: {str(e)}", 0))
|
||||
log_widget.after(0, lambda: messagebox.showerror("执行错误", f"执行命令时出错: {str(e)}"))
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
sys.stderr = old_stderr
|
||||
|
||||
_RUNNING_TASK = None
|
||||
if status_bar:
|
||||
log_widget.after(0, lambda: status_bar.set_running(False))
|
||||
|
||||
Thread(target=run_in_thread).start()
|
||||
|
||||
|
||||
def extract_progress_from_log(log_line):
|
||||
"""从日志行中提取进度信息"""
|
||||
batch_match = re.search(r'处理批次 (\d+)/(\d+)', log_line)
|
||||
if batch_match:
|
||||
current = int(batch_match.group(1))
|
||||
total = int(batch_match.group(2))
|
||||
return int(current / total * 100)
|
||||
|
||||
percent_match = re.search(r'(\d+)%', log_line)
|
||||
if percent_match:
|
||||
return int(percent_match.group(1))
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""系统设置对话框模块"""
|
||||
|
||||
import os
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox, filedialog, ttk
|
||||
|
||||
from app.config.settings import ConfigManager
|
||||
|
||||
from .user_settings import load_user_settings, save_user_settings
|
||||
from .ui_widgets import center_window
|
||||
from app.core.utils.dialog_utils import show_cloud_sync_dialog
|
||||
|
||||
# 模块级状态
|
||||
_PROCESSOR_SERVICE = None
|
||||
|
||||
|
||||
def show_config_dialog(root, cfg: ConfigManager):
|
||||
global _PROCESSOR_SERVICE
|
||||
|
||||
settings = load_user_settings()
|
||||
dlg = tk.Toplevel(root)
|
||||
dlg.title("系统设置")
|
||||
dlg.geometry("560x680")
|
||||
center_window(dlg)
|
||||
|
||||
content = ttk.Frame(dlg)
|
||||
content.pack(fill=tk.BOTH, expand=True, padx=16, pady=16)
|
||||
for i in range(2):
|
||||
content.columnconfigure(i, weight=1)
|
||||
|
||||
# 当前值
|
||||
log_level_val = tk.StringVar(value=settings.get('log_level', 'INFO'))
|
||||
max_workers_val = tk.StringVar(value=str(settings.get('concurrency_max_workers', cfg.getint('Performance', 'max_workers', 4))))
|
||||
batch_size_val = tk.StringVar(value=str(settings.get('concurrency_batch_size', cfg.getint('Performance', 'batch_size', 5))))
|
||||
template_path_val = tk.StringVar(value=settings.get('template_path', os.path.join(cfg.get('Paths', 'template_folder', 'templates'), cfg.get('Templates', 'purchase_order', '银豹-采购单模板.xls'))))
|
||||
input_dir_val = tk.StringVar(value=settings.get('input_folder', cfg.get('Paths', 'input_folder', 'data/input')))
|
||||
output_dir_val = tk.StringVar(value=settings.get('output_folder', cfg.get('Paths', 'output_folder', 'data/output')))
|
||||
result_dir_val = tk.StringVar(value=settings.get('result_folder', 'data/result'))
|
||||
|
||||
def add_row(row, label_text, widget):
|
||||
ttk.Label(content, text=label_text).grid(row=row, column=0, sticky='w', padx=4, pady=6)
|
||||
widget.grid(row=row, column=1, sticky='ew', padx=4, pady=6)
|
||||
|
||||
# 日志级别
|
||||
lvl = ttk.Combobox(content, textvariable=log_level_val, values=['DEBUG', 'INFO', 'WARNING', 'ERROR'], state='readonly')
|
||||
add_row(0, "日志级别", lvl)
|
||||
|
||||
# 并发参数
|
||||
maxw_entry = ttk.Entry(content, textvariable=max_workers_val)
|
||||
add_row(1, "最大并发(max_workers)", maxw_entry)
|
||||
batch_entry = ttk.Entry(content, textvariable=batch_size_val)
|
||||
add_row(2, "批次大小(batch_size)", batch_entry)
|
||||
|
||||
# 模板路径
|
||||
tpl_frame = ttk.Frame(content)
|
||||
tpl_entry = ttk.Entry(tpl_frame, textvariable=template_path_val)
|
||||
tpl_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||||
|
||||
def _select_template():
|
||||
p = filedialog.askopenfilename(title="选择模板文件", filetypes=[("Excel模板", "*.xls *.xlsx"), ("所有文件", "*.*")])
|
||||
if p:
|
||||
try:
|
||||
template_path_val.set(os.path.relpath(p, os.getcwd()))
|
||||
except Exception:
|
||||
template_path_val.set(p)
|
||||
|
||||
ttk.Button(tpl_frame, text="选择", command=_select_template).pack(side=tk.LEFT, padx=6)
|
||||
add_row(3, "采购单模板文件", tpl_frame)
|
||||
|
||||
# 目录
|
||||
def dir_row(row_idx, label, var):
|
||||
f = ttk.Frame(content)
|
||||
e = ttk.Entry(f, textvariable=var)
|
||||
e.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||||
|
||||
def _select_dir():
|
||||
d = filedialog.askdirectory(title=f"选择{label}")
|
||||
if d:
|
||||
try:
|
||||
var.set(os.path.relpath(d, os.getcwd()))
|
||||
except Exception:
|
||||
var.set(d)
|
||||
|
||||
ttk.Button(f, text="选择", command=_select_dir).pack(side=tk.LEFT, padx=6)
|
||||
add_row(row_idx, label, f)
|
||||
|
||||
dir_row(4, "输入目录", input_dir_val)
|
||||
dir_row(5, "输出目录", output_dir_val)
|
||||
dir_row(6, "结果目录", result_dir_val)
|
||||
|
||||
api_key_val = tk.StringVar(value=settings.get('api_key', cfg.get('API', 'api_key', '')))
|
||||
secret_key_val = tk.StringVar(value=settings.get('secret_key', cfg.get('API', 'secret_key', '')))
|
||||
timeout_val = tk.StringVar(value=str(settings.get('timeout', cfg.getint('API', 'timeout', 30))))
|
||||
max_retries_val = tk.StringVar(value=str(settings.get('max_retries', cfg.getint('API', 'max_retries', 3))))
|
||||
retry_delay_val = tk.StringVar(value=str(settings.get('retry_delay', cfg.getint('API', 'retry_delay', 2))))
|
||||
api_url_val = tk.StringVar(value=settings.get('api_url', cfg.get('API', 'api_url', '')))
|
||||
|
||||
api_key_entry = ttk.Entry(content, textvariable=api_key_val)
|
||||
add_row(7, "API Key", api_key_entry)
|
||||
secret_key_entry = ttk.Entry(content, textvariable=secret_key_val)
|
||||
secret_key_entry.configure(show='*')
|
||||
add_row(8, "Secret Key", secret_key_entry)
|
||||
add_row(9, "Timeout", ttk.Entry(content, textvariable=timeout_val))
|
||||
add_row(10, "Max Retries", ttk.Entry(content, textvariable=max_retries_val))
|
||||
add_row(11, "Retry Delay", ttk.Entry(content, textvariable=retry_delay_val))
|
||||
add_row(12, "API URL", ttk.Entry(content, textvariable=api_url_val))
|
||||
|
||||
# ---- Gitea 云端同步配置 ----
|
||||
ttk.Separator(content).grid(row=13, column=0, columnspan=2, sticky='ew', pady=8)
|
||||
ttk.Label(content, text="云端同步 (Gitea)", font=("Arial", 10, "bold")).grid(row=14, column=0, sticky='w', padx=4, pady=4)
|
||||
|
||||
gitea_url_val = tk.StringVar(value=cfg.get('Gitea', 'base_url', fallback='https://gitea.94kan.cn'))
|
||||
gitea_owner_val = tk.StringVar(value=cfg.get('Gitea', 'owner', fallback='houhuan'))
|
||||
gitea_repo_val = tk.StringVar(value=cfg.get('Gitea', 'repo', fallback='yixuan-sync-data'))
|
||||
gitea_token_val = tk.StringVar(value=cfg.get('Gitea', 'token', fallback=''))
|
||||
|
||||
add_row(15, "Gitea 地址", ttk.Entry(content, textvariable=gitea_url_val))
|
||||
add_row(16, "仓库所有者", ttk.Entry(content, textvariable=gitea_owner_val))
|
||||
add_row(17, "仓库名称", ttk.Entry(content, textvariable=gitea_repo_val))
|
||||
gitea_token_entry = ttk.Entry(content, textvariable=gitea_token_val, show='*')
|
||||
add_row(18, "Access Token", gitea_token_entry)
|
||||
|
||||
# 操作按钮
|
||||
btns = ttk.Frame(content)
|
||||
btns.grid(row=19, column=0, columnspan=2, sticky='ew', pady=10)
|
||||
btns.columnconfigure(0, weight=1)
|
||||
|
||||
def save_settings():
|
||||
try:
|
||||
s = load_user_settings()
|
||||
s['log_level'] = log_level_val.get()
|
||||
s['concurrency_max_workers'] = int(max_workers_val.get() or '4')
|
||||
s['concurrency_batch_size'] = int(batch_size_val.get() or '5')
|
||||
tp = template_path_val.get()
|
||||
inp = input_dir_val.get()
|
||||
outp = output_dir_val.get()
|
||||
resp = result_dir_val.get()
|
||||
try:
|
||||
if tp:
|
||||
tp = os.path.relpath(tp, os.getcwd()) if os.path.isabs(tp) else tp
|
||||
if inp:
|
||||
inp = os.path.relpath(inp, os.getcwd()) if os.path.isabs(inp) else inp
|
||||
if outp:
|
||||
outp = os.path.relpath(outp, os.getcwd()) if os.path.isabs(outp) else outp
|
||||
if resp:
|
||||
resp = os.path.relpath(resp, os.getcwd()) if os.path.isabs(resp) else resp
|
||||
except Exception:
|
||||
pass
|
||||
s['template_path'] = tp
|
||||
s['input_folder'] = inp
|
||||
s['output_folder'] = outp
|
||||
s['result_folder'] = resp
|
||||
save_user_settings(s)
|
||||
try:
|
||||
from app.core.utils.log_utils import set_log_level
|
||||
set_log_level(s['log_level'])
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
tpl_path = s['template_path']
|
||||
tpl_dir = os.path.dirname(tpl_path)
|
||||
tpl_name = os.path.basename(tpl_path)
|
||||
cfg.update('Paths', 'template_folder', tpl_dir)
|
||||
cfg.update('Templates', 'purchase_order', tpl_name)
|
||||
try:
|
||||
cfg.update('Paths', 'template_file', os.path.join(tpl_dir, tpl_name))
|
||||
except Exception:
|
||||
pass
|
||||
cfg.update('Paths', 'input_folder', s['input_folder'])
|
||||
cfg.update('Paths', 'output_folder', s['output_folder'])
|
||||
cfg.update('Performance', 'max_workers', s['concurrency_max_workers'])
|
||||
cfg.update('Performance', 'batch_size', s['concurrency_batch_size'])
|
||||
cfg.update('API', 'api_key', api_key_val.get())
|
||||
cfg.update('API', 'secret_key', secret_key_val.get())
|
||||
cfg.update('API', 'timeout', timeout_val.get())
|
||||
cfg.update('API', 'max_retries', max_retries_val.get())
|
||||
cfg.update('API', 'retry_delay', retry_delay_val.get())
|
||||
cfg.update('API', 'api_url', api_url_val.get())
|
||||
cfg.update('Gitea', 'base_url', gitea_url_val.get())
|
||||
cfg.update('Gitea', 'owner', gitea_owner_val.get())
|
||||
cfg.update('Gitea', 'repo', gitea_repo_val.get())
|
||||
cfg.update('Gitea', 'token', gitea_token_val.get())
|
||||
cfg.save_config()
|
||||
except Exception:
|
||||
pass
|
||||
messagebox.showinfo("设置已保存", "系统设置已更新并保存")
|
||||
dlg.destroy()
|
||||
except Exception as e:
|
||||
messagebox.showerror("保存失败", str(e))
|
||||
|
||||
def reload_suppliers():
|
||||
global _PROCESSOR_SERVICE
|
||||
try:
|
||||
from app.services.processor_service import ProcessorService
|
||||
if _PROCESSOR_SERVICE is None:
|
||||
_PROCESSOR_SERVICE = ProcessorService(ConfigManager())
|
||||
_PROCESSOR_SERVICE.reload_processors()
|
||||
messagebox.showinfo("已重新加载", "供应商处理器已重新加载并应用最新配置")
|
||||
except Exception as e:
|
||||
messagebox.showerror("重新加载失败", str(e))
|
||||
|
||||
ttk.Button(btns, text="重新加载供应商配置", command=reload_suppliers).grid(row=0, column=0, sticky='w')
|
||||
ttk.Button(btns, text="云端同步", command=lambda: show_cloud_sync_dialog(dlg)).grid(row=0, column=1, sticky='w', padx=6)
|
||||
ttk.Button(btns, text="取消", command=dlg.destroy).grid(row=0, column=2, sticky='e')
|
||||
ttk.Button(btns, text="保存", command=save_settings).grid(row=0, column=3, sticky='e', padx=6)
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""错误处理工具模块"""
|
||||
|
||||
from tkinter import messagebox
|
||||
from typing import Optional
|
||||
|
||||
from app.core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def show_error_dialog(title: str, message: str, suggestion: Optional[str] = None):
|
||||
try:
|
||||
full_msg = message
|
||||
if suggestion:
|
||||
full_msg = f"{message}\n\n建议操作:\n- {suggestion}"
|
||||
messagebox.showerror(title, full_msg)
|
||||
except Exception as e:
|
||||
logger.debug(f"显示错误对话框失败: {e}")
|
||||
|
||||
|
||||
def get_error_suggestion(message: str) -> Optional[str]:
|
||||
msg = (message or "").lower()
|
||||
if 'openpyxl' in msg or ('engine' in msg and 'xlsx' in msg):
|
||||
return '安装依赖:pip install openpyxl'
|
||||
if 'xlrd' in msg or ('engine' in msg and 'xls' in msg):
|
||||
return '安装依赖:pip install xlrd'
|
||||
if 'timeout' in msg or 'timed out' in msg:
|
||||
return '检查网络,增大API超时时间或稍后重试'
|
||||
if 'invalid access_token' in msg or 'access token' in msg:
|
||||
return '刷新百度OCR令牌或检查api_key/secret_key'
|
||||
if '429' in msg or 'too many requests' in msg:
|
||||
return '降低识别频率或稍后重试'
|
||||
if '模板文件不存在' in msg or ('no such file' in msg and '模板' in msg):
|
||||
return '在系统设置中选择正确的模板文件路径'
|
||||
if '没有找到采购单' in msg or '未在 data/result 目录下找到采购单' in msg:
|
||||
return '确认data/result目录内存在采购单文件'
|
||||
if 'permission denied' in msg:
|
||||
return '以管理员权限运行或更改目录写入权限'
|
||||
return None
|
||||
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""文件与目录操作模块"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox, filedialog, scrolledtext
|
||||
|
||||
from .logging_ui import add_to_log
|
||||
from .ui_widgets import center_window
|
||||
from app.config.settings import ConfigManager
|
||||
|
||||
|
||||
def select_file(log_widget, file_types=None, title="选择文件"):
|
||||
"""通用文件选择对话框"""
|
||||
if file_types is None:
|
||||
file_types = [("所有文件", "*.*")]
|
||||
file_path = filedialog.askopenfilename(title=title, filetypes=file_types)
|
||||
if file_path:
|
||||
add_to_log(log_widget, f"已选择文件: {file_path}\n", "info")
|
||||
return file_path
|
||||
|
||||
|
||||
def select_excel_file(log_widget):
|
||||
"""选择Excel文件"""
|
||||
return select_file(
|
||||
log_widget,
|
||||
[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")],
|
||||
"选择Excel文件"
|
||||
)
|
||||
|
||||
|
||||
def ensure_directories():
|
||||
"""确保必要的目录结构存在"""
|
||||
config = ConfigManager()
|
||||
directories = [
|
||||
config.get('Paths', 'input_folder', fallback='data/input'),
|
||||
config.get('Paths', 'output_folder', fallback='data/output'),
|
||||
'data/result',
|
||||
config.get('Paths', 'temp_folder', fallback='data/temp'),
|
||||
'logs'
|
||||
]
|
||||
for directory in directories:
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
print(f"创建目录: {directory}")
|
||||
|
||||
|
||||
def clean_cache(log_widget):
|
||||
"""清除处理缓存"""
|
||||
from .command_runner import set_running_task
|
||||
try:
|
||||
config = ConfigManager()
|
||||
processed_record = config.get('Paths', 'processed_record', fallback='data/processed_files.json')
|
||||
output_folder = config.get('Paths', 'output_folder', fallback='data/output')
|
||||
cache_files = [
|
||||
processed_record,
|
||||
os.path.join(output_folder, "processed_files.json"),
|
||||
os.path.join(output_folder, "merged_files.json")
|
||||
]
|
||||
|
||||
for cache_file in cache_files:
|
||||
if os.path.exists(cache_file):
|
||||
os.remove(cache_file)
|
||||
add_to_log(log_widget, f"已清除缓存文件: {cache_file}\n", "success")
|
||||
|
||||
temp_dir = os.path.join("data/temp")
|
||||
if os.path.exists(temp_dir):
|
||||
for file in os.listdir(temp_dir):
|
||||
file_path = os.path.join(temp_dir, file)
|
||||
try:
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
add_to_log(log_widget, f"已清除临时文件: {file_path}\n", "info")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"清除文件时出错: {file_path}, 错误: {str(e)}\n", "error")
|
||||
|
||||
log_dir = "logs"
|
||||
if os.path.exists(log_dir):
|
||||
for file in os.listdir(log_dir):
|
||||
if file.endswith(".active"):
|
||||
file_path = os.path.join(log_dir, file)
|
||||
try:
|
||||
os.remove(file_path)
|
||||
add_to_log(log_widget, f"已清除活动日志标记: {file_path}\n", "info")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"清除文件时出错: {file_path}, 错误: {str(e)}\n", "error")
|
||||
|
||||
set_running_task(None)
|
||||
|
||||
add_to_log(log_widget, "缓存清除完成,系统将重新处理所有文件\n", "success")
|
||||
messagebox.showinfo("缓存清除", "缓存已清除,系统将重新处理所有文件。")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"清除缓存时出错: {str(e)}\n", "error")
|
||||
messagebox.showerror("错误", f"清除缓存时出错: {str(e)}")
|
||||
|
||||
|
||||
def open_result_directory():
|
||||
try:
|
||||
result_dir = os.path.abspath("data/result")
|
||||
if not os.path.exists(result_dir):
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
os.startfile(result_dir)
|
||||
except Exception as e:
|
||||
messagebox.showerror("错误", f"无法打开结果目录: {str(e)}")
|
||||
|
||||
|
||||
def _open_directory_from_settings(settings_key, default_path, label):
|
||||
"""通用的从用户设置读取路径并打开目录"""
|
||||
from .user_settings import load_user_settings
|
||||
try:
|
||||
s = load_user_settings()
|
||||
path = os.path.abspath(s.get(settings_key, default_path))
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
os.startfile(path)
|
||||
except Exception as e:
|
||||
messagebox.showerror("错误", f"无法打开{label}: {str(e)}")
|
||||
|
||||
|
||||
def open_input_directory_from_settings():
|
||||
_open_directory_from_settings('input_folder', 'data/input', '输入目录')
|
||||
|
||||
|
||||
def open_output_directory_from_settings():
|
||||
_open_directory_from_settings('output_folder', 'data/output', '输出目录')
|
||||
|
||||
|
||||
def open_result_directory_from_settings():
|
||||
_open_directory_from_settings('result_folder', 'data/result', '结果目录')
|
||||
|
||||
|
||||
def clean_data_files(log_widget):
|
||||
"""清理数据文件(仅清理input和output目录)"""
|
||||
try:
|
||||
if not messagebox.askyesno("确认清理", "确定要清理input和output目录的文件吗?这将删除所有输入和输出数据。"):
|
||||
add_to_log(log_widget, "操作已取消\n", "info")
|
||||
return
|
||||
|
||||
files_cleaned = 0
|
||||
|
||||
input_dir = "data/input"
|
||||
if os.path.exists(input_dir):
|
||||
for file in os.listdir(input_dir):
|
||||
file_path = os.path.join(input_dir, file)
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
files_cleaned += 1
|
||||
add_to_log(log_widget, "已清理input目录\n", "info")
|
||||
|
||||
output_dir = "data/output"
|
||||
if os.path.exists(output_dir):
|
||||
for file in os.listdir(output_dir):
|
||||
file_path = os.path.join(output_dir, file)
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
files_cleaned += 1
|
||||
add_to_log(log_widget, "已清理output目录\n", "info")
|
||||
|
||||
add_to_log(log_widget, f"清理完成,共清理 {files_cleaned} 个文件\n", "success")
|
||||
messagebox.showinfo("清理完成", f"已成功清理 {files_cleaned} 个文件")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"清理数据文件时出错: {str(e)}\n", "error")
|
||||
messagebox.showerror("错误", f"清理数据文件时出错: {str(e)}")
|
||||
|
||||
|
||||
def clean_result_files(log_widget):
|
||||
try:
|
||||
if not messagebox.askyesno("确认清理", "确定要清理result目录的文件吗?这将删除所有已生成的采购单文件。"):
|
||||
add_to_log(log_widget, "操作已取消\n", "info")
|
||||
return
|
||||
count = 0
|
||||
result_dir = "data/result"
|
||||
if os.path.exists(result_dir):
|
||||
for file in os.listdir(result_dir):
|
||||
file_path = os.path.join(result_dir, file)
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
count += 1
|
||||
add_to_log(log_widget, f"已清理result目录,共 {count} 个文件\n", "success")
|
||||
messagebox.showinfo("清理完成", f"已清理result目录 {count} 个文件")
|
||||
except Exception as e:
|
||||
add_to_log(log_widget, f"清理result目录时出错: {str(e)}\n", "error")
|
||||
messagebox.showerror("错误", f"清理result目录时出错: {str(e)}")
|
||||
|
||||
|
||||
def validate_unit_price_against_item_data(result_path: str, log_widget=None):
|
||||
try:
|
||||
from app.services.order_service import OrderService
|
||||
service = OrderService()
|
||||
bad_results = service.validate_unit_price(result_path)
|
||||
|
||||
if bad_results:
|
||||
display_count = min(len(bad_results), 10)
|
||||
msg = f"存在{len(bad_results)}条单价与商品资料进货价差异超过1元:\n" + "\n".join(bad_results[:display_count])
|
||||
if len(bad_results) > 10:
|
||||
msg += f"\n...(其余 {len(bad_results) - 10} 条已省略)"
|
||||
messagebox.showwarning("单价校验提示", msg)
|
||||
if log_widget is not None:
|
||||
add_to_log(log_widget, f"单价校验发现{len(bad_results)}条差异>1元\n", "warning")
|
||||
else:
|
||||
if log_widget is not None:
|
||||
add_to_log(log_widget, "单价校验通过(差异<=1元)\n", "success")
|
||||
except Exception as e:
|
||||
if log_widget is not None:
|
||||
add_to_log(log_widget, f"单价校验出错: {str(e)}\n", "error")
|
||||
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""GUI日志处理模块"""
|
||||
|
||||
import logging
|
||||
import queue
|
||||
import sys
|
||||
import tkinter as tk
|
||||
|
||||
# 全局日志队列,用于异步更新UI
|
||||
LOG_QUEUE = queue.Queue()
|
||||
|
||||
|
||||
class LogRedirector:
|
||||
"""日志重定向器,用于捕获命令输出并显示到界面"""
|
||||
def __init__(self, text_widget):
|
||||
self.text_widget = text_widget
|
||||
self.buffer = ""
|
||||
self.terminal = sys.__stdout__
|
||||
|
||||
def write(self, string):
|
||||
self.buffer += string
|
||||
self.terminal.write(string)
|
||||
self.text_widget.after(0, self.update_text_widget)
|
||||
|
||||
def update_text_widget(self):
|
||||
self.text_widget.configure(state=tk.NORMAL)
|
||||
|
||||
if self.buffer.strip():
|
||||
if any(marker in self.buffer.lower() for marker in ["错误", "error", "失败", "异常", "exception"]):
|
||||
self.text_widget.insert(tk.END, self.buffer, "error")
|
||||
elif any(marker in self.buffer.lower() for marker in ["警告", "warning"]):
|
||||
self.text_widget.insert(tk.END, self.buffer, "warning")
|
||||
elif any(marker in self.buffer.lower() for marker in ["成功", "success", "完成", "成功处理"]):
|
||||
self.text_widget.insert(tk.END, self.buffer, "success")
|
||||
elif any(marker in self.buffer.lower() for marker in ["info", "信息", "开始", "处理中"]):
|
||||
self.text_widget.insert(tk.END, self.buffer, "info")
|
||||
else:
|
||||
self.text_widget.insert(tk.END, self.buffer, "normal")
|
||||
else:
|
||||
self.text_widget.insert(tk.END, self.buffer)
|
||||
|
||||
self.text_widget.see(tk.END)
|
||||
self.text_widget.configure(state=tk.DISABLED)
|
||||
self.buffer = ""
|
||||
|
||||
def flush(self):
|
||||
self.terminal.flush()
|
||||
|
||||
|
||||
class GUILogHandler(logging.Handler):
|
||||
"""自定义日志处理器,将日志放入队列,由GUI主线程定时消费"""
|
||||
def __init__(self, text_widget):
|
||||
super().__init__()
|
||||
self.text_widget = text_widget
|
||||
|
||||
def emit(self, record):
|
||||
try:
|
||||
msg = self.format(record)
|
||||
if record.levelno >= logging.ERROR:
|
||||
tag = "error"
|
||||
elif record.levelno >= logging.WARNING:
|
||||
tag = "warning"
|
||||
elif record.levelno >= logging.INFO:
|
||||
tag = "info"
|
||||
else:
|
||||
tag = "normal"
|
||||
|
||||
LOG_QUEUE.put((msg + "\n", tag))
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
|
||||
def poll_log_queue(text_widget):
|
||||
"""定期从队列中读取日志并更新UI"""
|
||||
try:
|
||||
updated = False
|
||||
while not LOG_QUEUE.empty():
|
||||
msg, tag = LOG_QUEUE.get_nowait()
|
||||
text_widget.configure(state=tk.NORMAL)
|
||||
text_widget.insert(tk.END, msg, tag)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
text_widget.see(tk.END)
|
||||
text_widget.configure(state=tk.DISABLED)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
text_widget.after(100, lambda: poll_log_queue(text_widget))
|
||||
|
||||
|
||||
def init_gui_logger(text_widget, level=logging.INFO):
|
||||
handler = GUILogHandler(text_widget)
|
||||
handler.setLevel(level)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
root_logger = logging.getLogger()
|
||||
for h in root_logger.handlers[:]:
|
||||
if isinstance(h, logging.StreamHandler):
|
||||
root_logger.removeHandler(h)
|
||||
if not any(isinstance(h, GUILogHandler) for h in root_logger.handlers):
|
||||
root_logger.addHandler(handler)
|
||||
root_logger.setLevel(level)
|
||||
return handler
|
||||
|
||||
|
||||
def dispose_gui_logger():
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers[:]:
|
||||
if isinstance(handler, GUILogHandler):
|
||||
root_logger.removeHandler(handler)
|
||||
try:
|
||||
handler.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_to_log(log_widget, text, tag="normal"):
|
||||
"""向日志队列添加文本,由 poll_log_queue 消费并更新 UI"""
|
||||
if log_widget is None:
|
||||
print(f"[{tag}] {text}", end="")
|
||||
return
|
||||
|
||||
LOG_QUEUE.put((text, tag))
|
||||
@@ -0,0 +1,485 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""主窗口模块"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox, filedialog, scrolledtext
|
||||
|
||||
from app.config.settings import ConfigManager
|
||||
from app.core.utils.log_utils import set_log_level
|
||||
|
||||
from .theme import THEMES, get_theme_mode, set_theme_mode, create_modern_button, create_card_frame
|
||||
from .logging_ui import add_to_log, poll_log_queue
|
||||
from .ui_widgets import StatusBar
|
||||
from .user_settings import (
|
||||
load_user_settings, save_user_settings, refresh_recent_list_widget,
|
||||
_extract_path_from_recent_item, clear_recent_files, RECENT_LIST_WIDGET,
|
||||
)
|
||||
from .file_operations import (
|
||||
ensure_directories, open_result_directory, clean_cache,
|
||||
clean_data_files, clean_result_files,
|
||||
)
|
||||
from .action_handlers import (
|
||||
process_single_image_with_status, run_pipeline_directly,
|
||||
batch_ocr_with_status, batch_process_orders_with_status,
|
||||
merge_orders_with_status, process_excel_file_with_status,
|
||||
process_dropped_file,
|
||||
)
|
||||
from .config_dialog import show_config_dialog
|
||||
from .barcode_editor import edit_barcode_mappings
|
||||
from .shortcuts import bind_keyboard_shortcuts
|
||||
from app.core.utils.dialog_utils import show_cloud_sync_dialog
|
||||
|
||||
|
||||
def _init_window():
|
||||
"""初始化窗口、主题和设置,返回 (root, theme, settings, dnd_supported)"""
|
||||
ensure_directories()
|
||||
|
||||
dnd_supported = False
|
||||
try:
|
||||
from tkinterdnd2 import TkinterDnD, DND_FILES
|
||||
root = TkinterDnD.Tk()
|
||||
dnd_supported = True
|
||||
except Exception:
|
||||
root = tk.Tk()
|
||||
|
||||
settings = load_user_settings()
|
||||
theme_mode = settings.get('theme_mode', get_theme_mode())
|
||||
set_theme_mode(theme_mode)
|
||||
|
||||
try:
|
||||
cfg_for_title = ConfigManager()
|
||||
ver = cfg_for_title.get('App', 'version', fallback='dev')
|
||||
root.title(f"益选-OCR订单处理系统 v{ver} by 欢欢欢")
|
||||
except Exception:
|
||||
root.title("益选-OCR订单处理系统 by 欢欢欢")
|
||||
|
||||
root.geometry("900x600")
|
||||
settings['window_size'] = "900x600"
|
||||
theme = THEMES[get_theme_mode()]
|
||||
root.configure(bg=theme["bg"])
|
||||
|
||||
try:
|
||||
log_level = settings.get('log_level')
|
||||
if log_level:
|
||||
set_log_level(log_level)
|
||||
concurrency = settings.get('concurrency_max_workers')
|
||||
if concurrency:
|
||||
cfg = ConfigManager()
|
||||
cfg.update('Performance', 'max_workers', str(concurrency))
|
||||
cfg.save_config()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
root.iconbitmap(default="")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return root, theme, settings, dnd_supported
|
||||
|
||||
|
||||
def _create_left_panel(content_frame, theme, log_text, status_bar):
|
||||
"""创建左侧面板:完整流程、OCR处理、Excel处理、最近文件"""
|
||||
left_panel = create_card_frame(content_frame)
|
||||
left_panel.pack(side=tk.LEFT, fill=tk.BOTH, expand=False, padx=(0, 5), pady=5)
|
||||
left_panel.configure(width=160)
|
||||
|
||||
panel_content = tk.Frame(left_panel, bg=theme["card_bg"])
|
||||
panel_content.pack(fill=tk.BOTH, expand=True, padx=10, pady=(5, 10))
|
||||
|
||||
# 完整流程区
|
||||
pipeline_section = tk.LabelFrame(
|
||||
panel_content, text="完整流程", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
pipeline_section.pack(fill=tk.X, pady=(0, 8))
|
||||
pipeline_frame = tk.Frame(pipeline_section, bg=theme["card_bg"])
|
||||
pipeline_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
create_modern_button(pipeline_frame, "一键处理", lambda: run_pipeline_directly(log_text, status_bar), "primary", px_width=150, px_height=32).pack(anchor='w', pady=3)
|
||||
|
||||
# OCR处理区
|
||||
core_section = tk.LabelFrame(
|
||||
panel_content, text="OCR处理", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
core_section.pack(fill=tk.X, pady=(0, 8))
|
||||
core_buttons_frame = tk.Frame(core_section, bg=theme["card_bg"])
|
||||
core_buttons_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
core_row1 = tk.Frame(core_buttons_frame, bg=theme["card_bg"])
|
||||
core_row1.pack(fill=tk.X, pady=3)
|
||||
create_modern_button(core_row1, "批量识别", lambda: batch_ocr_with_status(log_text, status_bar), "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(0, 3))
|
||||
create_modern_button(core_row1, "单个识别", lambda: process_single_image_with_status(log_text, status_bar), "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(3, 0))
|
||||
|
||||
# Excel处理区
|
||||
ocr_section = tk.LabelFrame(
|
||||
panel_content, text="Excel处理", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
ocr_section.pack(fill=tk.X, pady=(0, 8))
|
||||
ocr_buttons_frame = tk.Frame(ocr_section, bg=theme["card_bg"])
|
||||
ocr_buttons_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
ocr_row1 = tk.Frame(ocr_buttons_frame, bg=theme["card_bg"])
|
||||
ocr_row1.pack(fill=tk.X, pady=3)
|
||||
create_modern_button(ocr_row1, "批量处理", lambda: batch_process_orders_with_status(log_text, status_bar), "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(0, 3))
|
||||
create_modern_button(ocr_row1, "单个处理", lambda: process_excel_file_with_status(log_text, status_bar), "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(3, 0))
|
||||
|
||||
# 最近文件区
|
||||
_create_recent_files_section(panel_content, theme, log_text)
|
||||
|
||||
|
||||
def _create_recent_files_section(parent, theme, log_text):
|
||||
"""创建最近文件列表区域"""
|
||||
recent_section = tk.LabelFrame(
|
||||
parent, text="最近文件", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
recent_section.pack(fill=tk.BOTH, pady=(0, 12))
|
||||
recent_frame = tk.Frame(recent_section, bg=theme["card_bg"])
|
||||
recent_frame.pack(fill=tk.BOTH, padx=8, pady=6)
|
||||
recent_top = tk.Frame(recent_frame, bg=theme["card_bg"])
|
||||
recent_top.pack(fill=tk.X)
|
||||
|
||||
def _resize_recent_top(e):
|
||||
try:
|
||||
h = int(e.height * 0.75)
|
||||
recent_top.configure(height=h)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
recent_top.pack_propagate(False)
|
||||
except Exception:
|
||||
pass
|
||||
recent_frame.bind('<Configure>', _resize_recent_top)
|
||||
|
||||
recent_rect = tk.Frame(recent_top, bg=theme["card_bg"], highlightbackground=theme["border"], highlightthickness=1)
|
||||
recent_rect.pack(fill=tk.BOTH, expand=True)
|
||||
recent_list = tk.Listbox(recent_rect, height=12)
|
||||
recent_scrollbar = tk.Scrollbar(recent_rect)
|
||||
recent_list.configure(yscrollcommand=recent_scrollbar.set)
|
||||
recent_scrollbar.configure(command=recent_list.yview)
|
||||
recent_list.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||||
recent_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||||
|
||||
import app.ui.user_settings as _us_mod
|
||||
_us_mod.RECENT_LIST_WIDGET = recent_list
|
||||
|
||||
def _open_selected_event(evt=None):
|
||||
try:
|
||||
idxs = recent_list.curselection()
|
||||
if not idxs:
|
||||
return
|
||||
p = _extract_path_from_recent_item(recent_list.get(idxs[0]))
|
||||
if os.path.exists(p):
|
||||
os.startfile(p)
|
||||
else:
|
||||
messagebox.showwarning("文件不存在", p)
|
||||
except Exception as e:
|
||||
messagebox.showerror("打开失败", str(e))
|
||||
|
||||
recent_list.bind('<Double-Button-1>', _open_selected_event)
|
||||
refresh_recent_list_widget()
|
||||
rf_btns = tk.Frame(recent_frame, bg=theme["card_bg"])
|
||||
rf_btns.pack(fill=tk.X, pady=6)
|
||||
|
||||
def clear_list():
|
||||
clear_recent_files()
|
||||
recent_list.delete(0, tk.END)
|
||||
|
||||
create_modern_button(rf_btns, "清空列表", clear_list, "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(3, 0))
|
||||
|
||||
def purge_invalid():
|
||||
try:
|
||||
kept = []
|
||||
for i in range(recent_list.size()):
|
||||
item = recent_list.get(i)
|
||||
p = _extract_path_from_recent_item(item)
|
||||
if os.path.exists(p):
|
||||
kept.append(p)
|
||||
try:
|
||||
kept_sorted = sorted(kept, key=lambda p: os.path.getmtime(p), reverse=True)
|
||||
except Exception:
|
||||
kept_sorted = kept
|
||||
s = load_user_settings()
|
||||
s['recent_files'] = kept_sorted
|
||||
save_user_settings(s)
|
||||
recent_list.delete(0, tk.END)
|
||||
for i, p in enumerate(s['recent_files'][:recent_list.size() or len(s['recent_files'])], start=1):
|
||||
recent_list.insert(tk.END, f"{i}. {p}")
|
||||
refresh_recent_list_widget()
|
||||
add_to_log(log_text, "已清理无效的最近文件条目\n", "success")
|
||||
except Exception as e:
|
||||
messagebox.showerror("清理失败", str(e))
|
||||
|
||||
create_modern_button(rf_btns, "清理无效", purge_invalid, "primary", px_width=72, px_height=32).pack(side=tk.LEFT, padx=(3, 0))
|
||||
|
||||
|
||||
def _create_right_panel(content_frame, theme, log_text, root):
|
||||
"""创建右侧面板:快捷操作、系统设置"""
|
||||
right_panel = create_card_frame(content_frame)
|
||||
right_panel.pack(side=tk.RIGHT, fill=tk.BOTH, expand=False, padx=(5, 0), pady=5)
|
||||
right_panel.configure(width=380)
|
||||
|
||||
right_panel_content = tk.Frame(right_panel, bg=theme["card_bg"])
|
||||
right_panel_content.pack(fill=tk.BOTH, expand=True, padx=10, pady=(5, 10))
|
||||
|
||||
# 工具功能区
|
||||
tools_section = tk.LabelFrame(
|
||||
right_panel_content, text="快捷操作", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
tools_section.pack(fill=tk.X, pady=(0, 8))
|
||||
tools_buttons_frame = tk.Frame(tools_section, bg=theme["card_bg"])
|
||||
tools_buttons_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
tk.Frame(tools_buttons_frame, bg=theme["card_bg"]).pack(fill=tk.X, pady=3)
|
||||
|
||||
create_modern_button(tools_buttons_frame, "打开结果目录", lambda: open_result_directory(), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "打开输出目录", lambda: os.startfile(os.path.abspath("data/output")), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "打开输入目录", lambda: os.startfile(os.path.abspath("data/input")), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "合并订单", lambda: merge_orders_with_status(log_text, StatusBar(root)), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "清除缓存", lambda: clean_cache(log_text), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "清理input/out文件", lambda: clean_data_files(log_text), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(tools_buttons_frame, "清理result文件", lambda: clean_result_files(log_text), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
|
||||
# 系统设置区
|
||||
settings_section = tk.LabelFrame(
|
||||
right_panel_content, text="系统设置", bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
settings_section.pack(fill=tk.X, pady=(0, 8))
|
||||
settings_buttons_frame = tk.Frame(settings_section, bg=theme["card_bg"])
|
||||
settings_buttons_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
create_modern_button(settings_buttons_frame, "系统设置", lambda: show_config_dialog(root, ConfigManager()), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(settings_buttons_frame, "条码映射", lambda: edit_barcode_mappings(log_text), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
create_modern_button(settings_buttons_frame, "云端同步", lambda: show_cloud_sync_dialog(root), "primary", px_width=132, px_height=32).pack(anchor='w', pady=3)
|
||||
|
||||
|
||||
def _setup_drag_area(mid_container, theme, dnd_supported, log_text, status_bar):
|
||||
"""创建拖拽/点击选择文件区域"""
|
||||
drag_panel = create_card_frame(mid_container)
|
||||
drag_panel.pack(side=tk.TOP, fill=tk.X, padx=(5, 5), pady=(0, 5))
|
||||
drag_panel_content = tk.Frame(drag_panel, bg=theme["card_bg"])
|
||||
drag_panel_content.pack(fill=tk.X, padx=10, pady=6)
|
||||
|
||||
dnd_section = tk.LabelFrame(
|
||||
drag_panel_content, bg=theme["card_bg"], fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold"), relief="flat", borderwidth=0
|
||||
)
|
||||
dnd_section.pack(fill=tk.X, pady=(0, 0))
|
||||
dnd_frame = tk.Frame(dnd_section, bg=theme["card_bg"], highlightthickness=1, highlightbackground=theme["border"])
|
||||
dnd_frame.configure(height=60)
|
||||
dnd_frame.pack(fill=tk.X, padx=8, pady=6)
|
||||
try:
|
||||
dnd_frame.pack_propagate(False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _set_highlight(active: bool):
|
||||
try:
|
||||
dnd_frame.configure(highlightbackground=theme["info"] if active else theme["border"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
dnd_frame.bind('<Enter>', lambda e: _set_highlight(True))
|
||||
dnd_frame.bind('<Leave>', lambda e: _set_highlight(False))
|
||||
|
||||
msg_row = tk.Frame(dnd_frame, bg=theme["card_bg"])
|
||||
msg_row.pack(fill=tk.X)
|
||||
if dnd_supported:
|
||||
tk.Label(
|
||||
msg_row, text="拖拽已启用:拖拽或点击此区域选择文件",
|
||||
bg=theme["card_bg"], fg="#999999", justify="center"
|
||||
).pack(fill=tk.X)
|
||||
else:
|
||||
tk.Label(
|
||||
msg_row, text="点击此区域选择文件;可安装拖拽支持",
|
||||
bg=theme["card_bg"], fg="#999999", justify="center"
|
||||
).pack(fill=tk.X)
|
||||
|
||||
if not dnd_supported:
|
||||
btn_row = tk.Frame(dnd_frame, bg=theme["card_bg"])
|
||||
btn_row.pack(fill=tk.X)
|
||||
|
||||
def copy_install():
|
||||
try:
|
||||
mid_container.winfo_toplevel().clipboard_clear()
|
||||
mid_container.winfo_toplevel().clipboard_append("pip install tkinterdnd2")
|
||||
messagebox.showinfo("已复制", "已复制安装命令:pip install tkinterdnd2")
|
||||
except Exception as e:
|
||||
messagebox.showwarning("复制失败", str(e))
|
||||
|
||||
create_modern_button(btn_row, "复制安装命令", copy_install, "primary", px_width=132, px_height=28).pack(side=tk.RIGHT)
|
||||
|
||||
def install_and_restart():
|
||||
try:
|
||||
add_to_log(log_text, "开始安装拖拽支持库 tkinterdnd2...\n", "info")
|
||||
cmd = [sys.executable, "-m", "pip", "install", "tkinterdnd2"]
|
||||
result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
add_to_log(log_text, result.stdout + "\n", "info")
|
||||
add_to_log(log_text, "安装成功,准备重启程序以启用拖拽...\n", "success")
|
||||
if messagebox.askyesno("安装完成", "已安装拖拽支持,是否立即重启应用?"):
|
||||
os.execl(sys.executable, sys.executable, *sys.argv)
|
||||
except subprocess.CalledProcessError as e:
|
||||
add_to_log(log_text, f"安装失败: {e.stderr}\n", "error")
|
||||
messagebox.showerror("安装失败", f"安装输出:\n{e.stderr}")
|
||||
except Exception as e:
|
||||
add_to_log(log_text, f"安装失败: {str(e)}\n", "error")
|
||||
messagebox.showerror("安装失败", str(e))
|
||||
|
||||
create_modern_button(btn_row, "一键安装拖拽", install_and_restart, "primary", px_width=132, px_height=28).pack(side=tk.RIGHT, padx=(3, 0))
|
||||
|
||||
# 点击拖拽框选择文件
|
||||
def _click_select(evt=None):
|
||||
try:
|
||||
files = filedialog.askopenfilenames(
|
||||
title="选择图片或Excel文件",
|
||||
filetypes=[
|
||||
("支持文件", "*.xlsx *.xls *.jpg *.jpeg *.png *.bmp"),
|
||||
("Excel", "*.xlsx *.xls"),
|
||||
("图片", "*.jpg *.jpeg *.png *.bmp"),
|
||||
("所有文件", "*.*"),
|
||||
]
|
||||
)
|
||||
if not files:
|
||||
return
|
||||
for p in files:
|
||||
process_dropped_file(log_text, status_bar, p)
|
||||
except Exception as e:
|
||||
messagebox.showerror("选择失败", str(e))
|
||||
|
||||
dnd_frame.bind('<Button-1>', _click_select)
|
||||
msg_row.bind('<Button-1>', _click_select)
|
||||
|
||||
if dnd_supported:
|
||||
def _on_drop(event):
|
||||
try:
|
||||
data = event.data
|
||||
paths = []
|
||||
buf = ""
|
||||
in_brace = False
|
||||
for ch in data:
|
||||
if ch == '{':
|
||||
in_brace = True
|
||||
buf = ""
|
||||
elif ch == '}':
|
||||
in_brace = False
|
||||
paths.append(buf)
|
||||
buf = ""
|
||||
elif ch == ' ' and not in_brace:
|
||||
if buf:
|
||||
paths.append(buf)
|
||||
buf = ""
|
||||
else:
|
||||
buf += ch
|
||||
if buf:
|
||||
paths.append(buf)
|
||||
for p in paths:
|
||||
process_dropped_file(log_text, status_bar, p)
|
||||
except Exception as e:
|
||||
add_to_log(log_text, f"拖拽处理失败: {str(e)}\n", "error")
|
||||
|
||||
try:
|
||||
from tkinterdnd2 import DND_FILES
|
||||
dnd_frame.drop_target_register(DND_FILES)
|
||||
dnd_frame.dnd_bind('<<Drop>>', _on_drop)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _create_log_panel(mid_container, theme):
|
||||
"""创建中间日志面板,返回 log_text widget"""
|
||||
log_panel = create_card_frame(mid_container, "处理日志")
|
||||
log_panel.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=(5, 5), pady=5)
|
||||
|
||||
log_text = scrolledtext.ScrolledText(
|
||||
log_panel, wrap=tk.WORD, width=68, height=26,
|
||||
bg=theme["log_bg"], fg=theme["log_fg"],
|
||||
font=("Consolas", 9), state=tk.DISABLED,
|
||||
relief="flat", borderwidth=0
|
||||
)
|
||||
log_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=(5, 10))
|
||||
|
||||
log_text.tag_configure("command", foreground=theme["info"], font=("Consolas", 9, "bold"))
|
||||
log_text.tag_configure("time", foreground=theme["secondary_bg"], font=("Consolas", 8))
|
||||
log_text.tag_configure("separator", foreground=theme["border"])
|
||||
log_text.tag_configure("success", foreground=theme["success"], font=("Consolas", 9, "bold"))
|
||||
log_text.tag_configure("error", foreground=theme["error"], font=("Consolas", 9, "bold"))
|
||||
log_text.tag_configure("warning", foreground=theme["warning"], font=("Consolas", 9, "bold"))
|
||||
log_text.tag_configure("info", foreground=theme["info"], font=("Consolas", 9))
|
||||
|
||||
poll_log_queue(log_text)
|
||||
|
||||
add_to_log(log_text, "欢迎使用 益选-OCR订单处理系统 v1.1.0\n", "success")
|
||||
add_to_log(log_text, "系统已就绪,请选择相应功能进行操作。\n\n", "info")
|
||||
add_to_log(log_text, "功能说明:\n", "command")
|
||||
add_to_log(log_text, "• 完整处理流程:一键完成OCR识别和Excel处理\n", "info")
|
||||
add_to_log(log_text, "• 批量处理订单:批量处理多个订单文件\n", "info")
|
||||
add_to_log(log_text, "• 处理烟草订单:专门处理烟草类订单\n", "info")
|
||||
add_to_log(log_text, "• 合并订单:将多个订单合并为一个文件\n\n", "info")
|
||||
add_to_log(log_text, "请将需要处理的图片文件放入 data/input 目录中。\n", "warning")
|
||||
add_to_log(log_text, "OCR识别结果保存在 data/output 目录,处理完成的订单保存在 result 目录中。\n\n", "warning")
|
||||
add_to_log(log_text, "=" * 50 + "\n\n", "separator")
|
||||
|
||||
return log_text
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
try:
|
||||
root, theme, settings, dnd_supported = _init_window()
|
||||
|
||||
# 主容器
|
||||
main_container = tk.Frame(root, bg=theme["bg"])
|
||||
main_container.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
||||
content_frame = tk.Frame(main_container, bg=theme["bg"])
|
||||
content_frame.pack(fill=tk.BOTH, expand=True)
|
||||
|
||||
# 中间容器(拖拽区 + 日志区)
|
||||
mid_container = tk.Frame(content_frame, bg=theme["bg"])
|
||||
mid_container.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(5, 5), pady=5)
|
||||
|
||||
log_text = _create_log_panel(mid_container, theme)
|
||||
|
||||
# 状态栏
|
||||
status_bar = StatusBar(root)
|
||||
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
|
||||
|
||||
# 左侧面板
|
||||
_create_left_panel(content_frame, theme, log_text, status_bar)
|
||||
|
||||
# 右侧面板
|
||||
_create_right_panel(content_frame, theme, log_text, root)
|
||||
|
||||
# 拖拽区域
|
||||
_setup_drag_area(mid_container, theme, dnd_supported, log_text, status_bar)
|
||||
|
||||
# 快捷键 + 关闭事件
|
||||
def on_close():
|
||||
try:
|
||||
w = root.winfo_width()
|
||||
h = root.winfo_height()
|
||||
settings['window_size'] = f"{w}x{h}"
|
||||
settings['theme_mode'] = get_theme_mode()
|
||||
save_user_settings(settings)
|
||||
except Exception:
|
||||
pass
|
||||
root.destroy()
|
||||
|
||||
root.protocol("WM_DELETE_WINDOW", on_close)
|
||||
bind_keyboard_shortcuts(root, log_text, status_bar)
|
||||
|
||||
root.mainloop()
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
error_msg = f"程序启动失败: {str(e)}\n详细错误信息:\n{traceback.format_exc()}"
|
||||
print(error_msg)
|
||||
try:
|
||||
import tkinter.messagebox as mb
|
||||
mb.showerror("启动错误", f"程序启动失败:\n{str(e)}")
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,371 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""处理结果预览对话框模块"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import datetime
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox, scrolledtext
|
||||
|
||||
from .theme import THEMES, get_theme_mode, apply_theme
|
||||
from .ui_widgets import center_window
|
||||
from app.core.utils.file_utils import format_file_size
|
||||
|
||||
TOBACCO_PREVIEW_WINDOW = None
|
||||
|
||||
|
||||
def show_result_preview(command, output):
|
||||
"""显示处理结果预览"""
|
||||
if "ocr" in command:
|
||||
show_ocr_result_preview(output)
|
||||
elif "excel" in command:
|
||||
show_excel_result_preview(output)
|
||||
elif "merge" in command:
|
||||
show_merge_result_preview(output)
|
||||
elif "pipeline" in command:
|
||||
show_pipeline_result_preview(output)
|
||||
else:
|
||||
messagebox.showinfo("处理完成", "操作已成功完成!\n请在data/output目录查看结果。")
|
||||
|
||||
|
||||
def show_ocr_result_preview(output):
|
||||
"""显示OCR处理结果预览"""
|
||||
files_match = re.search(r'找到 (\d+) 个图片文件,其中 (\d+) 个未处理', output)
|
||||
processed_match = re.search(r'所有图片处理完成, 总计: (\d+), 成功: (\d+)', output)
|
||||
|
||||
if processed_match:
|
||||
total = int(processed_match.group(1))
|
||||
success = int(processed_match.group(2))
|
||||
|
||||
preview = tk.Toplevel()
|
||||
preview.title("OCR处理结果")
|
||||
preview.geometry("400x300")
|
||||
preview.resizable(False, False)
|
||||
center_window(preview)
|
||||
|
||||
tk.Label(preview, text="OCR处理完成", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
result_frame = tk.Frame(preview)
|
||||
result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
tk.Label(result_frame, text=f"总共处理: {total} 个文件", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"成功处理: {success} 个文件", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"失败数量: {total - success} 个文件", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
|
||||
if success == total:
|
||||
result_text = "全部处理成功!"
|
||||
result_color = "#28a745"
|
||||
elif success > total * 0.8:
|
||||
result_text = "大部分处理成功。"
|
||||
result_color = "#ffc107"
|
||||
else:
|
||||
result_text = "处理失败较多,请检查日志。"
|
||||
result_color = "#dc3545"
|
||||
|
||||
tk.Label(result_frame, text=result_text, font=("Arial", 12, "bold"), fg=result_color).pack(pady=10)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
|
||||
tk.Button(button_frame, text="查看输出文件", command=lambda: os.startfile(os.path.abspath("data/output"))).pack(side=tk.LEFT, padx=10)
|
||||
tk.Button(button_frame, text="关闭", command=preview.destroy).pack(side=tk.LEFT, padx=10)
|
||||
else:
|
||||
messagebox.showinfo("OCR处理完成", "OCR处理已完成,请在data/output目录查看结果。")
|
||||
|
||||
|
||||
def show_excel_result_preview(output):
|
||||
"""显示Excel处理结果预览"""
|
||||
extract_match = re.search(r'提取到 (\d+) 个商品信息', output)
|
||||
file_match = re.search(r'采购单已保存到: (.+?)(?:\n|$)', output)
|
||||
|
||||
if extract_match and file_match:
|
||||
products_count = int(extract_match.group(1))
|
||||
output_file = file_match.group(1)
|
||||
|
||||
preview = tk.Toplevel()
|
||||
preview.title("Excel处理结果")
|
||||
preview.geometry("450x320")
|
||||
preview.resizable(False, False)
|
||||
center_window(preview)
|
||||
|
||||
tk.Label(preview, text="Excel处理完成", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
result_frame = tk.Frame(preview)
|
||||
result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
tk.Label(result_frame, text=f"提取商品数量: {products_count} 个", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"输出文件: {os.path.basename(output_file)}", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
|
||||
tk.Label(result_frame, text="采购单已成功生成!", font=("Arial", 12, "bold"), fg="#28a745").pack(pady=10)
|
||||
|
||||
file_frame = tk.Frame(result_frame, relief=tk.GROOVE, borderwidth=1)
|
||||
file_frame.pack(fill=tk.X, padx=15, pady=5)
|
||||
|
||||
tk.Label(file_frame, text="文件信息", font=("Arial", 10, "bold")).pack(anchor=tk.W, padx=10, pady=5)
|
||||
|
||||
try:
|
||||
file_size = os.path.getsize(output_file)
|
||||
file_time = datetime.datetime.fromtimestamp(os.path.getmtime(output_file))
|
||||
size_text = format_file_size(file_size)
|
||||
tk.Label(file_frame, text=f"文件大小: {size_text}", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
tk.Label(file_frame, text=f"创建时间: {file_time.strftime('%Y-%m-%d %H:%M:%S')}", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
except Exception:
|
||||
tk.Label(file_frame, text="无法获取文件信息", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
|
||||
tk.Button(button_frame, text="打开文件", command=lambda: os.startfile(output_file)).pack(side=tk.LEFT, padx=5)
|
||||
tk.Button(button_frame, text="打开所在文件夹", command=lambda: os.startfile(os.path.dirname(output_file))).pack(side=tk.LEFT, padx=5)
|
||||
tk.Button(button_frame, text="关闭", command=preview.destroy).pack(side=tk.LEFT, padx=5)
|
||||
else:
|
||||
messagebox.showinfo("Excel处理完成", "Excel处理已完成,请在data/output目录查看结果。")
|
||||
|
||||
|
||||
def show_merge_result_preview(output):
|
||||
"""显示合并结果预览"""
|
||||
merged_match = re.search(r'合并了 (\d+) 个采购单', output)
|
||||
product_match = re.search(r'共处理 (\d+) 个商品', output)
|
||||
output_match = re.search(r'已保存到: (.+?)(?:\n|$)', output)
|
||||
|
||||
if merged_match and output_match:
|
||||
merged_count = int(merged_match.group(1))
|
||||
product_count = int(product_match.group(1)) if product_match else 0
|
||||
output_file = output_match.group(1)
|
||||
|
||||
preview = tk.Toplevel()
|
||||
preview.title("采购单合并结果")
|
||||
preview.geometry("450x300")
|
||||
preview.resizable(False, False)
|
||||
apply_theme(preview)
|
||||
|
||||
tk.Label(preview, text="采购单合并完成", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
result_frame = tk.Frame(preview)
|
||||
result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
tk.Label(result_frame, text=f"合并采购单数量: {merged_count} 个", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"处理商品数量: {product_count} 个", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"输出文件: {os.path.basename(output_file)}", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
|
||||
theme = THEMES[get_theme_mode()]
|
||||
tk.Label(result_frame, text="采购单已成功合并!", font=("Arial", 12, "bold"), fg=theme["success"]).pack(pady=10)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
|
||||
tk.Button(button_frame, text="打开文件", command=lambda: os.startfile(output_file)).pack(side=tk.LEFT, padx=10)
|
||||
tk.Button(button_frame, text="打开所在文件夹", command=lambda: os.startfile(os.path.dirname(output_file))).pack(side=tk.LEFT, padx=10)
|
||||
tk.Button(button_frame, text="关闭", command=preview.destroy).pack(side=tk.LEFT, padx=10)
|
||||
else:
|
||||
messagebox.showinfo("采购单合并完成", "采购单合并已完成,请在data/output目录查看结果。")
|
||||
|
||||
|
||||
def show_pipeline_result_preview(output):
|
||||
"""显示完整流程结果预览"""
|
||||
ocr_match = re.search(r'所有图片处理完成, 总计: (\d+), 成功: (\d+)', output)
|
||||
excel_match = re.search(r'提取到 (\d+) 个商品信息', output)
|
||||
output_file_match = re.search(r'采购单已保存到: (.+?)(?:\n|$)', output)
|
||||
|
||||
preview = tk.Toplevel()
|
||||
preview.title("完整流程处理结果")
|
||||
preview.geometry("500x400")
|
||||
preview.resizable(False, False)
|
||||
center_window(preview)
|
||||
|
||||
tk.Label(preview, text="完整处理流程已完成", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
no_files_match = re.search(r'未找到可合并的文件', output)
|
||||
if no_files_match:
|
||||
tk.Label(preview, text="未找到可合并的文件,但其他步骤已成功执行", font=("Arial", 12)).pack(pady=0)
|
||||
|
||||
result_frame = tk.Frame(preview)
|
||||
result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
result_text = scrolledtext.ScrolledText(result_frame, wrap=tk.WORD, height=15, width=60)
|
||||
result_text.pack(fill=tk.BOTH, expand=True, padx=15, pady=5)
|
||||
result_text.configure(state=tk.NORMAL)
|
||||
|
||||
result_text.insert(tk.END, "===== 流程执行结果 =====\n\n", "title")
|
||||
|
||||
result_text.insert(tk.END, "步骤1: OCR识别\n", "step")
|
||||
if ocr_match:
|
||||
total = int(ocr_match.group(1))
|
||||
success = int(ocr_match.group(2))
|
||||
result_text.insert(tk.END, f" 处理图片: {total} 个\n", "info")
|
||||
result_text.insert(tk.END, f" 成功识别: {success} 个\n", "info")
|
||||
if success == total:
|
||||
result_text.insert(tk.END, " 结果: 全部识别成功\n", "success")
|
||||
else:
|
||||
result_text.insert(tk.END, f" 结果: 部分识别成功 ({success}/{total})\n", "warning")
|
||||
else:
|
||||
result_text.insert(tk.END, " 结果: 无OCR处理或处理信息不完整\n", "warning")
|
||||
|
||||
result_text.insert(tk.END, "\n步骤2: Excel处理\n", "step")
|
||||
if excel_match:
|
||||
products = int(excel_match.group(1))
|
||||
result_text.insert(tk.END, f" 提取商品: {products} 个\n", "info")
|
||||
result_text.insert(tk.END, " 结果: 成功生成采购单\n", "success")
|
||||
if output_file_match:
|
||||
output_file = output_file_match.group(1)
|
||||
result_text.insert(tk.END, f" 输出文件: {os.path.basename(output_file)}\n", "info")
|
||||
else:
|
||||
result_text.insert(tk.END, " 结果: 无Excel处理或处理信息不完整\n", "warning")
|
||||
|
||||
result_text.insert(tk.END, "\n===== 整体评估 =====\n", "title")
|
||||
|
||||
has_errors = "错误" in output or "失败" in output
|
||||
|
||||
no_files_match2 = re.search(r'未找到采购单文件', output)
|
||||
single_file_match = re.search(r'只有1个采购单文件', output)
|
||||
|
||||
if no_files_match2:
|
||||
result_text.insert(tk.END, "没有找到可合并的文件,但处理流程已成功完成。\n", "warning")
|
||||
result_text.insert(tk.END, "可以选择打开Excel文件或查看输出文件夹。\n", "info")
|
||||
elif single_file_match:
|
||||
result_text.insert(tk.END, "只有一个采购单文件,无需合并,处理流程已成功完成。\n", "warning")
|
||||
result_text.insert(tk.END, "可以选择打开生成的Excel文件。\n", "info")
|
||||
elif ocr_match and excel_match and not has_errors:
|
||||
result_text.insert(tk.END, "流程完整执行成功!\n", "success")
|
||||
elif ocr_match or excel_match:
|
||||
result_text.insert(tk.END, "流程部分执行成功,请检查日志获取详情。\n", "warning")
|
||||
else:
|
||||
result_text.insert(tk.END, "流程执行可能存在问题,请查看详细日志。\n", "error")
|
||||
|
||||
result_text.tag_configure("title", font=("Arial", 12, "bold"))
|
||||
result_text.tag_configure("step", font=("Arial", 11, "bold"))
|
||||
result_text.tag_configure("info", font=("Arial", 10))
|
||||
result_text.tag_configure("success", font=("Arial", 10, "bold"), foreground="#28a745")
|
||||
result_text.tag_configure("warning", font=("Arial", 10, "bold"), foreground="#ffc107")
|
||||
result_text.tag_configure("error", font=("Arial", 10, "bold"), foreground="#dc3545")
|
||||
|
||||
result_text.configure(state=tk.DISABLED)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
|
||||
if output_file_match:
|
||||
output_file = output_file_match.group(1)
|
||||
tk.Button(button_frame, text="打开Excel文件", command=lambda: os.startfile(output_file)).pack(side=tk.LEFT, padx=10)
|
||||
else:
|
||||
if excel_match or no_files_match or single_file_match:
|
||||
output_dir = os.path.abspath("data/output")
|
||||
excel_files = [f for f in os.listdir(output_dir) if f.startswith('采购单_') and (f.endswith('.xls') or f.endswith('.xlsx'))]
|
||||
if excel_files:
|
||||
excel_files.sort(key=lambda x: os.path.getmtime(os.path.join(output_dir, x)), reverse=True)
|
||||
latest_file = os.path.join(output_dir, excel_files[0])
|
||||
tk.Button(button_frame, text="打开最新Excel文件",
|
||||
command=lambda: os.startfile(latest_file)).pack(side=tk.LEFT, padx=10)
|
||||
|
||||
tk.Button(button_frame, text="查看输出文件夹", command=lambda: os.startfile(os.path.abspath("data/output"))).pack(side=tk.LEFT, padx=10)
|
||||
tk.Button(button_frame, text="关闭", command=preview.destroy).pack(side=tk.LEFT, padx=10)
|
||||
|
||||
|
||||
def show_tobacco_result_preview(returncode, output):
|
||||
"""显示烟草订单处理结果预览"""
|
||||
global TOBACCO_PREVIEW_WINDOW
|
||||
if returncode != 0:
|
||||
return
|
||||
|
||||
try:
|
||||
try:
|
||||
if TOBACCO_PREVIEW_WINDOW and TOBACCO_PREVIEW_WINDOW.winfo_exists():
|
||||
TOBACCO_PREVIEW_WINDOW.lift()
|
||||
return
|
||||
except Exception:
|
||||
TOBACCO_PREVIEW_WINDOW = None
|
||||
|
||||
result_file = None
|
||||
order_time = "(未知)"
|
||||
total_amount = "(未知)"
|
||||
items_count = 0
|
||||
|
||||
abs_path_match = re.search(r'烟草订单处理完成,绝对路径: (.+)(?:\n|$)', output)
|
||||
if abs_path_match:
|
||||
result_file = abs_path_match.group(1).strip()
|
||||
|
||||
for line in output.split('\n'):
|
||||
if "烟草公司订单处理成功" in line and "订单时间" in line:
|
||||
time_match = re.search(r'订单时间: ([^,]+)', line)
|
||||
amount_match = re.search(r'总金额: ([^,]+)', line)
|
||||
items_match = re.search(r'处理条目: (\d+)', line)
|
||||
|
||||
if time_match:
|
||||
order_time = time_match.group(1).strip()
|
||||
if amount_match:
|
||||
total_amount = amount_match.group(1).strip()
|
||||
if items_match:
|
||||
items_count = int(items_match.group(1).strip())
|
||||
|
||||
if not result_file or not os.path.exists(result_file):
|
||||
default_path = os.path.abspath("data/output/银豹采购单_烟草公司.xls")
|
||||
if os.path.exists(default_path):
|
||||
result_file = default_path
|
||||
|
||||
preview = tk.Toplevel()
|
||||
preview.title("烟草订单处理结果")
|
||||
preview.geometry("450x320")
|
||||
preview.resizable(False, False)
|
||||
TOBACCO_PREVIEW_WINDOW = preview
|
||||
|
||||
def _close_preview():
|
||||
global TOBACCO_PREVIEW_WINDOW
|
||||
TOBACCO_PREVIEW_WINDOW = None
|
||||
try:
|
||||
preview.destroy()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
preview.protocol("WM_DELETE_WINDOW", _close_preview)
|
||||
center_window(preview)
|
||||
|
||||
tk.Label(preview, text="烟草订单处理完成", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
result_frame = tk.Frame(preview)
|
||||
result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
tk.Label(result_frame, text=f"订单时间: {order_time}", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"订单总金额: {total_amount}", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text=f"处理商品数量: {items_count} 个", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
|
||||
if result_file and os.path.exists(result_file):
|
||||
tk.Label(result_frame, text=f"输出文件: {os.path.basename(result_file)}", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text="银豹采购单已成功生成!", font=("Arial", 12, "bold"), fg="#28a745").pack(pady=10)
|
||||
|
||||
file_frame = tk.Frame(result_frame, relief=tk.GROOVE, borderwidth=1)
|
||||
file_frame.pack(fill=tk.X, padx=15, pady=5)
|
||||
tk.Label(file_frame, text="文件信息", font=("Arial", 10, "bold")).pack(anchor=tk.W, padx=10, pady=5)
|
||||
|
||||
try:
|
||||
file_size = os.path.getsize(result_file)
|
||||
file_time = datetime.datetime.fromtimestamp(os.path.getmtime(result_file))
|
||||
size_text = format_file_size(file_size)
|
||||
tk.Label(file_frame, text=f"文件大小: {size_text}", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
tk.Label(file_frame, text=f"创建时间: {file_time.strftime('%Y-%m-%d %H:%M:%S')}", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
except Exception:
|
||||
tk.Label(file_frame, text="无法获取文件信息", font=("Arial", 10)).pack(anchor=tk.W, padx=10, pady=2)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
tk.Button(button_frame, text="打开文件", command=lambda: os.startfile(result_file)).pack(side=tk.LEFT, padx=5)
|
||||
tk.Button(button_frame, text="打开所在文件夹", command=lambda: os.startfile(os.path.dirname(result_file))).pack(side=tk.LEFT, padx=5)
|
||||
tk.Button(button_frame, text="关闭", command=_close_preview).pack(side=tk.LEFT, padx=5)
|
||||
else:
|
||||
tk.Label(result_frame, text="未找到输出文件", font=("Arial", 12)).pack(anchor=tk.W, padx=20, pady=5)
|
||||
tk.Label(result_frame, text="请检查data/output目录", font=("Arial", 12, "bold"), fg="#dc3545").pack(pady=10)
|
||||
|
||||
button_frame = tk.Frame(preview)
|
||||
button_frame.pack(pady=10)
|
||||
tk.Button(button_frame, text="打开输出目录", command=lambda: os.startfile(os.path.abspath("data/output"))).pack(side=tk.LEFT, padx=5)
|
||||
tk.Button(button_frame, text="关闭", command=_close_preview).pack(side=tk.LEFT, padx=5)
|
||||
|
||||
preview.lift()
|
||||
preview.attributes('-topmost', True)
|
||||
preview.after_idle(lambda: preview.attributes('-topmost', False))
|
||||
|
||||
except Exception as e:
|
||||
messagebox.showerror(
|
||||
"处理异常",
|
||||
f"显示预览时发生错误: {e}\n请检查日志了解详细信息。"
|
||||
)
|
||||
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""键盘快捷键模块"""
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox
|
||||
|
||||
from .ui_widgets import center_window
|
||||
from .action_handlers import (
|
||||
process_single_image_with_status,
|
||||
process_excel_file_with_status,
|
||||
batch_ocr_with_status,
|
||||
run_pipeline_directly,
|
||||
merge_orders_with_status,
|
||||
)
|
||||
from .file_operations import clean_cache
|
||||
|
||||
|
||||
def bind_keyboard_shortcuts(root, log_widget, status_bar):
|
||||
"""绑定键盘快捷键"""
|
||||
root.bind('<Control-o>', lambda e: process_single_image_with_status(log_widget, status_bar))
|
||||
root.bind('<Control-e>', lambda e: process_excel_file_with_status(log_widget, status_bar))
|
||||
root.bind('<Control-b>', lambda e: batch_ocr_with_status(log_widget, status_bar))
|
||||
root.bind('<Control-p>', lambda e: run_pipeline_directly(log_widget, status_bar))
|
||||
root.bind('<Control-m>', lambda e: merge_orders_with_status(log_widget, status_bar))
|
||||
root.bind('<F5>', lambda e: clean_cache(log_widget))
|
||||
root.bind('<Escape>', lambda e: root.quit() if messagebox.askyesno("确认退出", "确定要退出程序吗?") else None)
|
||||
root.bind('<F1>', lambda e: show_shortcuts_help())
|
||||
|
||||
|
||||
def show_shortcuts_help():
|
||||
"""显示快捷键帮助对话框"""
|
||||
help_dialog = tk.Toplevel()
|
||||
help_dialog.title("快捷键帮助")
|
||||
help_dialog.geometry("400x450")
|
||||
center_window(help_dialog)
|
||||
|
||||
tk.Label(help_dialog, text="键盘快捷键", font=("Arial", 16, "bold")).pack(pady=10)
|
||||
|
||||
help_text = tk.Text(help_dialog, wrap=tk.WORD, width=50, height=20)
|
||||
help_text.pack(padx=20, pady=10, fill=tk.BOTH, expand=True)
|
||||
|
||||
shortcuts = """
|
||||
Ctrl+O: 处理单个图片
|
||||
Ctrl+E: 处理Excel文件
|
||||
Ctrl+B: OCR批量识别
|
||||
Ctrl+P: 完整处理流程
|
||||
Ctrl+M: 合并采购单
|
||||
F5: 清除处理缓存
|
||||
Esc: 退出程序
|
||||
"""
|
||||
|
||||
help_text.insert(tk.END, shortcuts)
|
||||
help_text.configure(state=tk.DISABLED)
|
||||
|
||||
tk.Button(help_dialog, text="确定", command=help_dialog.destroy).pack(pady=10)
|
||||
|
||||
help_dialog.lift()
|
||||
help_dialog.attributes('-topmost', True)
|
||||
help_dialog.after_idle(lambda: help_dialog.attributes('-topmost', False))
|
||||
+193
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""主题管理模块"""
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import scrolledtext, ttk
|
||||
|
||||
# 私有主题模式变量
|
||||
_theme_mode = "light"
|
||||
|
||||
# 浅色和深色主题颜色
|
||||
THEMES = {
|
||||
"light": {
|
||||
"bg": "#f8f9fa",
|
||||
"fg": "#212529",
|
||||
"button_bg": "#ffffff",
|
||||
"button_fg": "#495057",
|
||||
"button_hover": "#e9ecef",
|
||||
"primary_bg": "#007bff",
|
||||
"primary_fg": "#ffffff",
|
||||
"secondary_bg": "#6c757d",
|
||||
"secondary_fg": "#ffffff",
|
||||
"log_bg": "#ffffff",
|
||||
"log_fg": "#212529",
|
||||
"highlight_bg": "#007bff",
|
||||
"highlight_fg": "#ffffff",
|
||||
"border": "#dee2e6",
|
||||
"success": "#28a745",
|
||||
"error": "#dc3545",
|
||||
"warning": "#ffc107",
|
||||
"info": "#17a2b8",
|
||||
"card_bg": "#ffffff",
|
||||
"shadow": "#00000010"
|
||||
},
|
||||
"dark": {
|
||||
"bg": "#1a1a1a",
|
||||
"fg": "#e9ecef",
|
||||
"button_bg": "#343a40",
|
||||
"button_fg": "#e9ecef",
|
||||
"button_hover": "#495057",
|
||||
"primary_bg": "#0d6efd",
|
||||
"primary_fg": "#ffffff",
|
||||
"secondary_bg": "#6c757d",
|
||||
"secondary_fg": "#ffffff",
|
||||
"log_bg": "#212529",
|
||||
"log_fg": "#e9ecef",
|
||||
"highlight_bg": "#0d6efd",
|
||||
"highlight_fg": "#ffffff",
|
||||
"border": "#495057",
|
||||
"success": "#198754",
|
||||
"error": "#dc3545",
|
||||
"warning": "#ffc107",
|
||||
"info": "#0dcaf0",
|
||||
"card_bg": "#2d3748",
|
||||
"shadow": "#00000030"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_theme_mode() -> str:
|
||||
return _theme_mode
|
||||
|
||||
|
||||
def set_theme_mode(mode: str):
|
||||
global _theme_mode
|
||||
_theme_mode = mode
|
||||
|
||||
|
||||
def create_modern_button(parent, text, command, style="primary", width=None, height=None, px_width=None, px_height=None):
|
||||
"""创建现代化样式的按钮"""
|
||||
theme = THEMES[_theme_mode]
|
||||
|
||||
if style == "primary":
|
||||
bg_color = "white"
|
||||
fg_color = theme["primary_bg"]
|
||||
hover_color = "#f0f8ff"
|
||||
border_color = theme["primary_bg"]
|
||||
elif style == "secondary":
|
||||
bg_color = theme["secondary_bg"]
|
||||
fg_color = theme["secondary_fg"]
|
||||
hover_color = theme["button_hover"]
|
||||
border_color = theme["secondary_bg"]
|
||||
else:
|
||||
bg_color = "white"
|
||||
fg_color = theme["primary_bg"]
|
||||
hover_color = "#f0f8ff"
|
||||
border_color = theme["primary_bg"]
|
||||
|
||||
button_frame = tk.Frame(parent, bg=border_color, highlightthickness=0)
|
||||
button_frame.configure(relief="flat", bd=0)
|
||||
if px_width or px_height:
|
||||
try:
|
||||
w = px_width if px_width else button_frame.winfo_reqwidth()
|
||||
h = px_height if px_height else 32
|
||||
button_frame.configure(width=w, height=h)
|
||||
button_frame.pack_propagate(False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
button = tk.Button(
|
||||
button_frame,
|
||||
text=text,
|
||||
command=command,
|
||||
bg=bg_color,
|
||||
fg=fg_color,
|
||||
font=("Microsoft YaHei UI", 8),
|
||||
relief="flat",
|
||||
bd=0,
|
||||
padx=14,
|
||||
pady=4,
|
||||
anchor="center",
|
||||
cursor="hand2",
|
||||
activebackground=hover_color,
|
||||
activeforeground=fg_color
|
||||
)
|
||||
|
||||
if width:
|
||||
button.configure(width=width)
|
||||
else:
|
||||
button.configure(width=12)
|
||||
if height is not None:
|
||||
button.configure(height=height)
|
||||
else:
|
||||
button.configure(height=1)
|
||||
if height:
|
||||
button.configure(height=height)
|
||||
|
||||
# 悬停效果
|
||||
def on_enter(e):
|
||||
button.configure(bg=hover_color)
|
||||
|
||||
def on_leave(e):
|
||||
button.configure(bg=bg_color)
|
||||
|
||||
button.bind("<Enter>", on_enter)
|
||||
button.bind("<Leave>", on_leave)
|
||||
button_frame.bind("<Enter>", on_enter)
|
||||
button_frame.bind("<Leave>", on_leave)
|
||||
|
||||
button.pack(fill=tk.BOTH, expand=True, padx=1, pady=1)
|
||||
return button_frame
|
||||
|
||||
|
||||
def create_card_frame(parent, title=None):
|
||||
"""创建卡片样式的框架"""
|
||||
theme = THEMES[_theme_mode]
|
||||
|
||||
card = tk.Frame(
|
||||
parent,
|
||||
bg=theme["card_bg"],
|
||||
relief="flat",
|
||||
borderwidth=1,
|
||||
highlightbackground=theme["border"],
|
||||
highlightthickness=1
|
||||
)
|
||||
|
||||
if title:
|
||||
title_label = tk.Label(
|
||||
card,
|
||||
text=title,
|
||||
bg=theme["card_bg"],
|
||||
fg=theme["fg"],
|
||||
font=("Microsoft YaHei UI", 10, "bold")
|
||||
)
|
||||
title_label.pack(pady=(6, 3))
|
||||
|
||||
return card
|
||||
|
||||
|
||||
def apply_theme(widget, theme_mode=None):
|
||||
"""应用主题到小部件"""
|
||||
if theme_mode is None:
|
||||
theme_mode = _theme_mode
|
||||
|
||||
theme = THEMES[theme_mode]
|
||||
|
||||
try:
|
||||
widget.configure(bg=theme["bg"], fg=theme["fg"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for child in widget.winfo_children():
|
||||
if isinstance(child, tk.Button) and not isinstance(child, ttk.Button):
|
||||
child.configure(bg=theme["button_bg"], fg=theme["button_fg"])
|
||||
elif isinstance(child, scrolledtext.ScrolledText):
|
||||
child.configure(bg=theme["log_bg"], fg=theme["log_fg"])
|
||||
else:
|
||||
try:
|
||||
child.configure(bg=theme["bg"], fg=theme["fg"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
apply_theme(child, theme_mode)
|
||||
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""UI控件模块 - StatusBar、ProgressReporter、可折叠框架等"""
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import ttk
|
||||
|
||||
from .theme import THEMES, get_theme_mode
|
||||
|
||||
|
||||
class StatusBar(tk.Frame):
|
||||
"""状态栏,显示当前系统状态和进度"""
|
||||
|
||||
def __init__(self, master, **kwargs):
|
||||
super().__init__(master, **kwargs)
|
||||
self.configure(height=25, relief=tk.SUNKEN, borderwidth=1)
|
||||
|
||||
self.status_label = tk.Label(self, text="就绪", anchor=tk.W, padx=5)
|
||||
self.status_label.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||||
|
||||
self.progress = ttk.Progressbar(self, orient=tk.HORIZONTAL, length=200, mode='determinate')
|
||||
self.progress.pack(side=tk.RIGHT, padx=5, pady=2)
|
||||
|
||||
self.progress.pack_forget()
|
||||
|
||||
def set_status(self, text, progress=None):
|
||||
"""设置状态栏文本和进度"""
|
||||
self.status_label.config(text=text)
|
||||
|
||||
if progress is not None and 0 <= progress <= 100:
|
||||
self.progress.pack(side=tk.RIGHT, padx=5, pady=2)
|
||||
self.progress.config(value=progress)
|
||||
else:
|
||||
self.progress.pack_forget()
|
||||
|
||||
def set_running(self, is_running=True):
|
||||
"""设置运行状态"""
|
||||
theme = THEMES[get_theme_mode()]
|
||||
if is_running:
|
||||
self.status_label.config(text="处理中...", foreground=theme["info"])
|
||||
self.progress.pack(side=tk.RIGHT, padx=5, pady=2)
|
||||
self.progress.config(mode='indeterminate')
|
||||
self.progress.start()
|
||||
else:
|
||||
self.status_label.config(text="就绪", foreground=theme["fg"])
|
||||
self.progress.stop()
|
||||
self.progress.pack_forget()
|
||||
|
||||
|
||||
class ProgressReporter:
|
||||
def __init__(self, status_bar: StatusBar):
|
||||
self.status_bar = status_bar
|
||||
|
||||
def set(self, text: str, percent: int = None):
|
||||
try:
|
||||
if percent is not None:
|
||||
self.status_bar.set_status(text, percent)
|
||||
else:
|
||||
self.status_bar.set_status(text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def running(self):
|
||||
try:
|
||||
self.status_bar.set_running(True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def done(self):
|
||||
try:
|
||||
self.status_bar.set_running(False)
|
||||
self.status_bar.set_status("就绪")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def create_collapsible_frame(parent, title, initial_state=True):
|
||||
"""创建可折叠的面板"""
|
||||
frame = tk.Frame(parent)
|
||||
frame.pack(fill=tk.X, pady=5)
|
||||
|
||||
title_frame = tk.Frame(frame)
|
||||
title_frame.pack(fill=tk.X)
|
||||
|
||||
state_var = tk.BooleanVar(value=initial_state)
|
||||
indicator = "▼" if initial_state else "►"
|
||||
state_label = tk.Label(title_frame, text=indicator, font=("Arial", 10, "bold"))
|
||||
state_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
title_label = tk.Label(title_frame, text=title, font=("Arial", 11, "bold"))
|
||||
title_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
content_frame = tk.Frame(frame)
|
||||
if initial_state:
|
||||
content_frame.pack(fill=tk.X, padx=20, pady=5)
|
||||
|
||||
def toggle_collapse(event=None):
|
||||
current_state = state_var.get()
|
||||
new_state = not current_state
|
||||
state_var.set(new_state)
|
||||
state_label.config(text="▼" if new_state else "►")
|
||||
if new_state:
|
||||
content_frame.pack(fill=tk.X, padx=20, pady=5)
|
||||
else:
|
||||
content_frame.pack_forget()
|
||||
|
||||
title_frame.bind("<Button-1>", toggle_collapse)
|
||||
state_label.bind("<Button-1>", toggle_collapse)
|
||||
title_label.bind("<Button-1>", toggle_collapse)
|
||||
|
||||
return content_frame, state_var
|
||||
|
||||
|
||||
def center_window(window):
|
||||
"""使窗口居中显示"""
|
||||
window.update_idletasks()
|
||||
width = window.winfo_width()
|
||||
height = window.winfo_height()
|
||||
x = (window.winfo_screenwidth() // 2) - (width // 2)
|
||||
y = (window.winfo_screenheight() // 2) - (height // 2)
|
||||
window.geometry('{}x{}+{}+{}'.format(width, height, x, y))
|
||||
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""用户设置与最近文件管理模块"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import tkinter as tk
|
||||
from typing import Dict, List, Any
|
||||
|
||||
from app.core.utils.log_utils import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
RECENT_LIST_WIDGET = None
|
||||
|
||||
|
||||
def load_user_settings():
|
||||
try:
|
||||
path = os.path.abspath(os.path.join('data', 'user_settings.json'))
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.debug(f"加载用户设置失败: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def save_user_settings(settings: Dict[str, Any]):
|
||||
try:
|
||||
os.makedirs('data', exist_ok=True)
|
||||
path = os.path.abspath(os.path.join('data', 'user_settings.json'))
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
json.dump(settings, f, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
logger.debug(f"保存用户设置失败: {e}")
|
||||
|
||||
|
||||
def get_recent_files() -> List[str]:
|
||||
s = load_user_settings()
|
||||
items = s.get('recent_files', [])
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
|
||||
def _allowed(p: str) -> bool:
|
||||
try:
|
||||
if not isinstance(p, str) or not os.path.isfile(p):
|
||||
return False
|
||||
ext = os.path.splitext(p)[1].lower()
|
||||
return ext in {'.xlsx', '.xls', '.jpg', '.jpeg', '.png', '.bmp'}
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
kept = [p for p in items if _allowed(p)]
|
||||
if not kept:
|
||||
candidates = []
|
||||
for d in ['data/output', 'data/result']:
|
||||
try:
|
||||
if os.path.exists(d):
|
||||
for name in os.listdir(d):
|
||||
p = os.path.join(d, name)
|
||||
if _allowed(p):
|
||||
candidates.append(p)
|
||||
except Exception:
|
||||
pass
|
||||
if candidates:
|
||||
kept = candidates
|
||||
try:
|
||||
kept_sorted = sorted(kept, key=lambda p: os.path.getmtime(p), reverse=True)
|
||||
except Exception:
|
||||
kept_sorted = kept
|
||||
if kept_sorted != items or len(kept_sorted) != len(items):
|
||||
s['recent_files'] = kept_sorted[:20]
|
||||
save_user_settings(s)
|
||||
return kept_sorted[:10]
|
||||
|
||||
|
||||
def refresh_recent_list_widget():
|
||||
try:
|
||||
global RECENT_LIST_WIDGET
|
||||
if RECENT_LIST_WIDGET is None:
|
||||
return
|
||||
RECENT_LIST_WIDGET.delete(0, tk.END)
|
||||
for i, p in enumerate(get_recent_files(), start=1):
|
||||
RECENT_LIST_WIDGET.insert(tk.END, f"{i}. {p}")
|
||||
except Exception as e:
|
||||
logger.debug(f"刷新最近文件列表失败: {e}")
|
||||
|
||||
|
||||
def _extract_path_from_recent_item(s: str) -> str:
|
||||
try:
|
||||
m = re.match(r'^(\d+)\.\s+(.*)$', s)
|
||||
p = m.group(2) if m else s
|
||||
return p.strip().strip('"')
|
||||
except Exception:
|
||||
return s.strip().strip('"')
|
||||
|
||||
|
||||
def add_recent_file(path: str) -> None:
|
||||
try:
|
||||
if not path:
|
||||
return
|
||||
try:
|
||||
if not os.path.isfile(path):
|
||||
return
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext not in {'.xlsx', '.xls', '.jpg', '.jpeg', '.png', '.bmp'}:
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
s = load_user_settings()
|
||||
items = s.get('recent_files', [])
|
||||
items = [p for p in items if p != path]
|
||||
items.insert(0, path)
|
||||
s['recent_files'] = items[:20]
|
||||
save_user_settings(s)
|
||||
refresh_recent_list_widget()
|
||||
except Exception as e:
|
||||
logger.debug(f"添加最近文件失败: {e}")
|
||||
|
||||
|
||||
def clear_recent_files():
|
||||
try:
|
||||
s = load_user_settings()
|
||||
s['recent_files'] = []
|
||||
save_user_settings(s)
|
||||
except Exception as e:
|
||||
logger.debug(f"清空最近文件失败: {e}")
|
||||
Reference in New Issue
Block a user