refactor: 重构文件读取和日志处理以提升性能和稳定性

- 新增 smart_read_excel 工具函数,统一 Excel 读取逻辑并自动选择引擎
- 重构 ConfigManager.get_path 方法,使用 pathlib 提升路径处理可靠性
- 将 GUI 日志处理改为异步队列模式,避免 UI 阻塞
- 优化 ExcelProcessor 的表头识别逻辑,避免重复读取文件
- 更新配置文件中的版本号
This commit is contained in:
侯欢 2026-03-30 11:17:25 +08:00
parent bfccdd3a37
commit b7bce93995
8 changed files with 101 additions and 46 deletions

View File

@ -117,25 +117,29 @@ class ConfigManager:
获取路径配置并确保它是一个有效的绝对路径 获取路径配置并确保它是一个有效的绝对路径
如果create为True则自动创建该目录 如果create为True则自动创建该目录
""" """
path = self.get(section, option, fallback) from pathlib import Path
path_str = self.get(section, option, fallback)
path = Path(path_str)
if not os.path.isabs(path): if not path.is_absolute():
# 相对路径,转为绝对路径 # 相对路径,转为绝对路径(相对于项目根目录)
path = os.path.abspath(path) path = Path(os.getcwd()) / path
if create and not os.path.exists(path): if create:
try: try:
# 如果是文件路径,创建其父目录 # 智能判断是文件还是目录
if '.' in os.path.basename(path): # 如果有后缀名则认为是文件,创建其父目录
directory = os.path.dirname(path) if path.suffix:
if directory and not os.path.exists(directory): directory = path.parent
os.makedirs(directory, exist_ok=True) if not directory.exists():
logger.info(f"已创建目录: {directory}") directory.mkdir(parents=True, exist_ok=True)
logger.info(f"已创建父目录: {directory}")
else: else:
# 否则认为是目录路径 # 否则认为是目录路径
os.makedirs(path, exist_ok=True) if not path.exists():
path.mkdir(parents=True, exist_ok=True)
logger.info(f"已创建目录: {path}") logger.info(f"已创建目录: {path}")
except Exception as e: except Exception as e:
logger.error(f"创建目录失败: {path}, 错误: {e}") logger.error(f"创建目录失败: {path}, 错误: {e}")
return path return str(path.absolute())

View File

@ -708,14 +708,18 @@ class ExcelProcessor:
logger.info(f"识别到表头在第 {header_row+1}") logger.info(f"识别到表头在第 {header_row+1}")
# 重新读取Excel正确指定表头行 # 重新设置表头,避免二次读取
if progress_cb: if progress_cb:
try: try:
progress_cb(94) progress_cb(94)
except Exception: except Exception:
pass pass
df = pd.read_excel(file_path, header=header_row)
logger.info(f"使用表头行重新读取数据,共 {len(df)} 行有效数据") # 使用识别到的表头行设置列名,并过滤掉表头之前的行
df.columns = df.iloc[header_row]
df = df.iloc[header_row + 1:].reset_index(drop=True)
logger.info(f"重新整理数据结构,共 {len(df)} 行有效数据")
# 提取商品信息 # 提取商品信息
if progress_cb: if progress_cb:

View File

@ -219,6 +219,34 @@ def save_json(data: Any, file_path: str, ensure_ascii: bool = False, indent: int
logger.error(f"保存JSON文件失败: {file_path}, 错误: {e}") logger.error(f"保存JSON文件失败: {file_path}, 错误: {e}")
return False return False
def smart_read_excel(file_path: Union[str, Path], **kwargs) -> Any:
"""
智能读取 Excel 文件自动选择引擎并处理常见错误
Args:
file_path: Excel 文件路径
**kwargs: 传递给 pd.read_excel 的额外参数
Returns:
pandas.DataFrame 对象
"""
import pandas as pd
path_str = str(file_path)
ext = os.path.splitext(path_str)[1].lower()
# 自动选择引擎
if ext == '.xlsx':
kwargs.setdefault('engine', 'openpyxl')
elif ext == '.xls':
kwargs.setdefault('engine', 'xlrd')
try:
return pd.read_excel(path_str, **kwargs)
except Exception as e:
logger.error(f"读取 Excel 文件失败: {path_str}, 错误: {e}")
raise
def get_file_size(file_path: str) -> int: def get_file_size(file_path: str) -> int:
""" """
获取文件大小字节 获取文件大小字节

View File

@ -122,21 +122,15 @@ class OrderService:
try: try:
import pandas as pd import pandas as pd
import os import os
from app.core.utils.file_utils import smart_read_excel
def _read_df(path):
ap = os.path.abspath(path)
if ap.lower().endswith('.xlsx'):
return pd.read_excel(ap, engine='openpyxl')
else:
return pd.read_excel(ap, engine='xlrd')
item_path = os.path.join('templates', '商品资料.xlsx') item_path = os.path.join('templates', '商品资料.xlsx')
if not os.path.exists(item_path): if not os.path.exists(item_path):
logger.warning(f"未找到商品资料文件: {item_path}") logger.warning(f"未找到商品资料文件: {item_path}")
return [] return []
df_item = _read_df(item_path) df_item = smart_read_excel(item_path)
df_res = _read_df(result_path) df_res = smart_read_excel(result_path)
def _find_col(df, candidates, contains=None): def _find_col(df, candidates, contains=None):
cols = list(df.columns) cols = list(df.columns)

View File

@ -41,10 +41,11 @@ class SpecialSuppliersService:
return c return c
return None return None
from app.core.utils.file_utils import smart_read_excel
try: try:
df_raw = pd.read_excel(src_path, header=2) df_raw = smart_read_excel(src_path, header=2)
except Exception: except Exception:
df_raw = pd.read_excel(src_path) df_raw = smart_read_excel(src_path)
df_raw = df_raw.iloc[2:].reset_index(drop=True) df_raw = df_raw.iloc[2:].reset_index(drop=True)
# 去除全空列与行 # 去除全空列与行

View File

@ -165,8 +165,9 @@ class TobaccoService:
columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额'] columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
try: try:
from app.core.utils.file_utils import smart_read_excel
# 读取Excel文件 # 读取Excel文件
df_old = pd.read_excel(file_path, header=None, skiprows=3, names=columns) df_old = smart_read_excel(file_path, header=None, skiprows=3, names=columns)
# 过滤订单量不为0的数据并计算采购量和单价 # 过滤订单量不为0的数据并计算采购量和单价
df_filtered = df_old[df_old['订单量'] != 0].copy() df_filtered = df_old[df_old['订单量'] != 0].copy()

View File

@ -27,5 +27,4 @@ max_file_size_mb = 4
purchase_order = 银豹-采购单模板.xls purchase_order = 银豹-采购单模板.xls
[App] [App]
version = 2026.03.25.2048 version = 2026.03.30.1036

View File

@ -22,6 +22,7 @@ import pandas as pd
import json import json
import re import re
import logging import logging
import queue
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
from pathlib import Path from pathlib import Path
@ -1043,8 +1044,11 @@ class LogRedirector:
self.terminal.flush() # 确保终端也被刷新 self.terminal.flush() # 确保终端也被刷新
# 全局日志队列用于异步更新UI
LOG_QUEUE = queue.Queue()
class GUILogHandler(logging.Handler): class GUILogHandler(logging.Handler):
"""自定义日志处理器将日志输出到GUI界面""" """自定义日志处理器,将日志放入队列由GUI主线程定时消费"""
def __init__(self, text_widget): def __init__(self, text_widget):
super().__init__() super().__init__()
self.text_widget = text_widget self.text_widget = text_widget
@ -1062,17 +1066,31 @@ class GUILogHandler(logging.Handler):
else: else:
tag = "normal" tag = "normal"
# 在UI线程中更新文本控件 # 将日志信息和标签放入全局队列
self.text_widget.after(0, lambda: self._update_text_widget(msg + "\n", tag)) LOG_QUEUE.put((msg + "\n", tag))
except Exception: except Exception:
self.handleError(record) self.handleError(record)
def _update_text_widget(self, message, tag): def poll_log_queue(text_widget):
"""在UI线程中更新文本控件""" """定期从队列中读取日志并更新UI"""
self.text_widget.configure(state=tk.NORMAL) try:
self.text_widget.insert(tk.END, message, tag) # 一次性处理队列中所有的待显示日志
self.text_widget.see(tk.END) updated = False
self.text_widget.configure(state=tk.DISABLED) while not LOG_QUEUE.empty():
msg, tag = LOG_QUEUE.get_nowait()
text_widget.configure(state=tk.NORMAL)
text_widget.insert(tk.END, msg, tag)
updated = True
if updated:
text_widget.see(tk.END)
text_widget.configure(state=tk.DISABLED)
except Exception:
pass
finally:
# 每100ms轮询一次
text_widget.after(100, lambda: poll_log_queue(text_widget))
def init_gui_logger(text_widget, level=logging.INFO): def init_gui_logger(text_widget, level=logging.INFO):
handler = GUILogHandler(text_widget) handler = GUILogHandler(text_widget)
@ -1835,6 +1853,9 @@ def main():
log_text.tag_configure("warning", foreground=THEMES[THEME_MODE]["warning"], font=("Consolas", 9, "bold")) log_text.tag_configure("warning", foreground=THEMES[THEME_MODE]["warning"], font=("Consolas", 9, "bold"))
log_text.tag_configure("info", foreground=THEMES[THEME_MODE]["info"], font=("Consolas", 9)) log_text.tag_configure("info", foreground=THEMES[THEME_MODE]["info"], font=("Consolas", 9))
# 启动日志队列轮询
poll_log_queue(log_text)
# 初始化日志内容 # 初始化日志内容
add_to_log(log_text, "欢迎使用 益选-OCR订单处理系统 v1.1.0\n", "success") add_to_log(log_text, "欢迎使用 益选-OCR订单处理系统 v1.1.0\n", "success")
add_to_log(log_text, "系统已就绪,请选择相应功能进行操作。\n\n", "info") add_to_log(log_text, "系统已就绪,请选择相应功能进行操作。\n\n", "info")
@ -2289,11 +2310,14 @@ def main():
pass pass
def add_to_log(log_widget, text, tag="normal"): def add_to_log(log_widget, text, tag="normal"):
"""向日志窗口添加文本,支持样式标签""" """向日志队列添加文本,由 poll_log_queue 消费并更新 UI"""
log_widget.configure(state=tk.NORMAL) # 兼容性处理:如果 log_widget 是 None例如在 headless 模式下运行部分逻辑)
log_widget.insert(tk.END, text, tag) if log_widget is None:
log_widget.see(tk.END) # 自动滚动到底部 print(f"[{tag}] {text}", end="")
log_widget.configure(state=tk.DISABLED) return
# 将日志信息和标签放入全局队列,实现异步更新
LOG_QUEUE.put((text, tag))
def select_file(log_widget, file_types=[("所有文件", "*.*")], title="选择文件"): def select_file(log_widget, file_types=[("所有文件", "*.*")], title="选择文件"):
"""通用文件选择对话框""" """通用文件选择对话框"""