refactor: 重构文件读取和日志处理以提升性能和稳定性

- 新增 smart_read_excel 工具函数，统一 Excel 读取逻辑并自动选择引擎 - 重构 ConfigManager.get_path 方法，使用 pathlib 提升路径处理可靠性 - 将 GUI 日志处理改为异步队列模式，避免 UI 阻塞 - 优化 ExcelProcessor 的表头识别逻辑，避免重复读取文件 - 更新配置文件中的版本号
2026-03-30 11:17:25 +08:00
parent bfccdd3a37
commit b7bce93995
8 changed files with 101 additions and 46 deletions
@@ -117,25 +117,29 @@ class ConfigManager:
        获取路径配置并确保它是一个有效的绝对路径
        如果create为True，则自动创建该目录
        """
-        path = self.get(section, option, fallback)
+        from pathlib import Path
+        path_str = self.get(section, option, fallback)
+        path = Path(path_str)
        
-        if not os.path.isabs(path):
-            # 相对路径，转为绝对路径
-            path = os.path.abspath(path)
+        if not path.is_absolute():
+            # 相对路径，转为绝对路径（相对于项目根目录）
+            path = Path(os.getcwd()) / path
        
-        if create and not os.path.exists(path):
+        if create:
            try:
-                # 如果是文件路径，创建其父目录
-                if '.' in os.path.basename(path):
-                    directory = os.path.dirname(path)
-                    if directory and not os.path.exists(directory):
-                        os.makedirs(directory, exist_ok=True)
-                        logger.info(f"已创建目录: {directory}")
+                # 智能判断是文件还是目录
+                # 如果有后缀名则认为是文件，创建其父目录
+                if path.suffix:
+                    directory = path.parent
+                    if not directory.exists():
+                        directory.mkdir(parents=True, exist_ok=True)
+                        logger.info(f"已创建父目录: {directory}")
                else:
                    # 否则认为是目录路径
-                    os.makedirs(path, exist_ok=True)
-                    logger.info(f"已创建目录: {path}")
+                    if not path.exists():
+                        path.mkdir(parents=True, exist_ok=True)
+                        logger.info(f"已创建目录: {path}")
            except Exception as e:
                logger.error(f"创建目录失败: {path}, 错误: {e}")
        
-        return path 
+        return str(path.absolute())
@@ -708,14 +708,18 @@ class ExcelProcessor:
                
            logger.info(f"识别到表头在第 {header_row+1} 行")
            
-            # 重新读取Excel，正确指定表头行
+            # 重新设置表头，避免二次读取
            if progress_cb:
                try:
                    progress_cb(94)
                except Exception:
                    pass
-            df = pd.read_excel(file_path, header=header_row)
-            logger.info(f"使用表头行重新读取数据，共 {len(df)} 行有效数据")
+            
+            # 使用识别到的表头行设置列名，并过滤掉表头之前的行
+            df.columns = df.iloc[header_row]
+            df = df.iloc[header_row + 1:].reset_index(drop=True)
+            
+            logger.info(f"重新整理数据结构，共 {len(df)} 行有效数据")
            
            # 提取商品信息
            if progress_cb:
@@ -219,6 +219,34 @@ def save_json(data: Any, file_path: str, ensure_ascii: bool = False, indent: int
        logger.error(f"保存JSON文件失败: {file_path}, 错误: {e}")
        return False

+def smart_read_excel(file_path: Union[str, Path], **kwargs) -> Any:
+    """
+    智能读取 Excel 文件，自动选择引擎并处理常见错误
+    
+    Args:
+        file_path: Excel 文件路径
+        **kwargs: 传递给 pd.read_excel 的额外参数
+        
+    Returns:
+        pandas.DataFrame 对象
+    """
+    import pandas as pd
+    
+    path_str = str(file_path)
+    ext = os.path.splitext(path_str)[1].lower()
+    
+    # 自动选择引擎
+    if ext == '.xlsx':
+        kwargs.setdefault('engine', 'openpyxl')
+    elif ext == '.xls':
+        kwargs.setdefault('engine', 'xlrd')
+    
+    try:
+        return pd.read_excel(path_str, **kwargs)
+    except Exception as e:
+        logger.error(f"读取 Excel 文件失败: {path_str}, 错误: {e}")
+        raise
+
 def get_file_size(file_path: str) -> int:
    """
    获取文件大小（字节）
@@ -122,21 +122,15 @@ class OrderService:
        try:
            import pandas as pd
            import os
+            from app.core.utils.file_utils import smart_read_excel
            
-            def _read_df(path):
-                ap = os.path.abspath(path)
-                if ap.lower().endswith('.xlsx'):
-                    return pd.read_excel(ap, engine='openpyxl')
-                else:
-                    return pd.read_excel(ap, engine='xlrd')
-                    
            item_path = os.path.join('templates', '商品资料.xlsx')
            if not os.path.exists(item_path):
                logger.warning(f"未找到商品资料文件: {item_path}")
                return []
                
-            df_item = _read_df(item_path)
-            df_res = _read_df(result_path)
+            df_item = smart_read_excel(item_path)
+            df_res = smart_read_excel(result_path)
            
            def _find_col(df, candidates, contains=None):
                cols = list(df.columns)
@@ -41,10 +41,11 @@ class SpecialSuppliersService:
                                return c
                return None

+            from app.core.utils.file_utils import smart_read_excel
            try:
-                df_raw = pd.read_excel(src_path, header=2)
+                df_raw = smart_read_excel(src_path, header=2)
            except Exception:
-                df_raw = pd.read_excel(src_path)
+                df_raw = smart_read_excel(src_path)
                df_raw = df_raw.iloc[2:].reset_index(drop=True)

            # 去除全空列与行
@@ -165,8 +165,9 @@ class TobaccoService:
        columns = ['商品', '盒码', '条码', '建议零售价', '批发价', '需求量', '订单量', '金额']
        
        try:
+            from app.core.utils.file_utils import smart_read_excel
            # 读取Excel文件
-            df_old = pd.read_excel(file_path, header=None, skiprows=3, names=columns)
+            df_old = smart_read_excel(file_path, header=None, skiprows=3, names=columns)
            
            # 过滤订单量不为0的数据，并计算采购量和单价
            df_filtered = df_old[df_old['订单量'] != 0].copy()