fix: sync/barcode/memory overhaul + detailed logs + preview + result tracking

- Sync: fix GiteaSync constructor + add push()/pull() methods - Barcode: two-tab layout matching GUI (mapping + special rules) - Memory: spec→specification unification, manual add, confidence/price tracking - Processing: TaskLogHandler captures detailed logs (barcode mapping, unit conversion) - Preview: fullscreen dialog for file preview (image/Excel) in Orders/Tables/Images - Detail: per-file log filtering in file pages - Tasks: result files now per-task, add copy path button - Config: reactive edited state + save_config fix - Dashboard: sync task isolation, log limit 10 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-05 19:37:10 +08:00
parent c18039f790
commit 81bafaf557
20 changed files with 1610 additions and 502 deletions
@@ -17,13 +17,22 @@ _mappings_file = _project_root / "config" / "barcode_mappings.json"

 class BarcodeMapping(BaseModel):
    barcode: str
-    target: str
+    target: Optional[str] = None
    description: Optional[str] = None
+    # Special rule fields
+    multiplier: Optional[int] = None
+    target_unit: Optional[str] = None
+    fixed_price: Optional[float] = None
+    specification: Optional[str] = None


 class BarcodeMappingUpdate(BaseModel):
    target: Optional[str] = None
    description: Optional[str] = None
+    multiplier: Optional[int] = None
+    target_unit: Optional[str] = None
+    fixed_price: Optional[float] = None
+    specification: Optional[str] = None


 def _load_mappings() -> Dict:
@@ -51,12 +60,29 @@ async def list_barcodes(
        if isinstance(info, dict):
            target = info.get("map_to", info.get("target", ""))
            desc = info.get("description", "")
+            item = {
+                "barcode": barcode,
+                "target": target,
+                "description": desc,
+                "multiplier": info.get("multiplier"),
+                "target_unit": info.get("target_unit"),
+                "fixed_price": info.get("fixed_price"),
+                "specification": info.get("specification"),
+            }
        else:
-            target = str(info)
-            desc = ""
-        if search and search not in barcode and search not in target and search not in desc:
+            item = {
+                "barcode": barcode,
+                "target": str(info),
+                "description": "",
+                "multiplier": None,
+                "target_unit": None,
+                "fixed_price": None,
+                "specification": None,
+            }
+        s = search.lower() if search else ""
+        if s and s not in barcode.lower() and s not in item["target"].lower() and s not in (desc or "").lower():
            continue
-        items.append({"barcode": barcode, "target": target, "description": desc})
+        items.append(item)
    return {"items": items, "total": len(items)}


@@ -82,9 +108,22 @@ async def create_barcode(
    mappings = _load_mappings()
    if body.barcode in mappings:
        raise HTTPException(409, f"条码 {body.barcode} 已存在")
-    mappings[body.barcode] = {"map_to": body.target, "description": body.description or ""}
+
+    entry: dict = {"description": body.description or ""}
+    if body.multiplier:
+        entry["multiplier"] = body.multiplier
+        if body.target_unit:
+            entry["target_unit"] = body.target_unit
+        if body.fixed_price is not None:
+            entry["fixed_price"] = body.fixed_price
+        if body.specification:
+            entry["specification"] = body.specification
+    else:
+        entry["map_to"] = body.target or ""
+
+    mappings[body.barcode] = entry
    _save_mappings(mappings)
-    return {"message": f"已创建映射 {body.barcode} → {body.target}"}
+    return {"message": f"已创建规则 {body.barcode}"}


@router.put("/{barcode}")
@@ -95,20 +134,35 @@ async def update_barcode(
 ):
    mappings = _load_mappings()
    if barcode not in mappings:
-        raise HTTPException(404, f"未找到条码映射 {barcode}")
+        raise HTTPException(404, f"未找到条码规则 {barcode}")

    existing = mappings[barcode]
    if not isinstance(existing, dict):
        existing = {"map_to": str(existing), "description": ""}

-    if body.target is not None:
+    # Check if this is a special rule (has multiplier) or being converted to one
+    if body.multiplier is not None:
+        # Convert to special rule: remove map_to, add multiplier fields
+        existing.pop("map_to", None)
+        existing["multiplier"] = body.multiplier
+        if body.target_unit is not None:
+            existing["target_unit"] = body.target_unit
+        if body.fixed_price is not None:
+            existing["fixed_price"] = body.fixed_price
+        if body.specification is not None:
+            existing["specification"] = body.specification
+    elif body.target is not None:
+        # Convert to simple mapping: remove special fields, add map_to
+        for k in ("multiplier", "target_unit", "fixed_price", "specification"):
+            existing.pop(k, None)
        existing["map_to"] = body.target
+
    if body.description is not None:
        existing["description"] = body.description

    mappings[barcode] = existing
    _save_mappings(mappings)
-    return {"message": f"已更新映射 {barcode}"}
+    return {"message": f"已更新规则 {barcode}"}


@router.delete("/{barcode}")
@@ -7,7 +7,7 @@ from pathlib import Path
 from typing import List, Optional

 from fastapi import APIRouter, HTTPException, UploadFile, File, Depends, Query, Request
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, JSONResponse
 from pydantic import BaseModel

 from ..auth.dependencies import get_current_user, get_current_user_flexible
@@ -267,10 +267,13 @@ async def get_file_relations(
    status: Optional[str] = None,
    page: int = Query(1, ge=1),
    page_size: int = Query(50, ge=1, le=200),
+    sort_by: Optional[str] = None,
+    sort_order: str = "desc",
    current_user: dict = Depends(get_current_user),
 ):
    """Query file relations with optional view filter."""
-    items, total = query_file_relations(view=view, status=status, page=page, page_size=page_size)
+    items, total = query_file_relations(view=view, status=status, page=page, page_size=page_size,
+                                        sort_by=sort_by, sort_order=sort_order)
    return {"items": items, "total": total}


@@ -299,3 +302,47 @@ async def delete_relations(
    """Delete file relation records by IDs."""
    delete_file_relations(body.ids)
    return {"message": f"已删除 {len(body.ids)} 条关系记录"}
+
+
+# ---------------------------------------------------------------------------
+# File preview
+# ---------------------------------------------------------------------------
+
+@router.get("/preview/{directory}/{filename:path}")
+async def preview_file(
+    directory: str,
+    filename: str,
+    current_user: dict = Depends(get_current_user),
+):
+    """Preview file content: images served directly, Excel returned as JSON grid."""
+    # Security: only allow specific directories
+    if directory not in ("input", "output", "result"):
+        raise HTTPException(403, "不允许访问该目录")
+
+    dir_map = {"input": _input_dir, "output": _output_dir, "result": _result_dir}
+    file_path = dir_map[directory] / filename
+    if not file_path.is_file():
+        raise HTTPException(404, f"文件不存在: {filename}")
+
+    ext = file_path.suffix.lower()
+    # Images: serve directly
+    if ext in ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.webp'):
+        return FileResponse(str(file_path))
+
+    # Excel: read and return as JSON grid
+    if ext in ('.xls', '.xlsx'):
+        try:
+            import pandas as pd
+            from fastapi.responses import JSONResponse
+            df = pd.read_excel(str(file_path), header=None)
+            # Fill NaN with empty string
+            df = df.fillna('')
+            rows = []
+            for _, row in df.iterrows():
+                rows.append([str(v) if v != '' else '' for v in row])
+            # Limit to first 200 rows
+            return JSONResponse({"type": "excel", "rows": rows[:200], "total_rows": len(rows)})
+        except Exception as e:
+            raise HTTPException(500, f"读取文件失败: {e}")
+
+    raise HTTPException(400, f"不支持预览的文件类型: {ext}")
@@ -18,18 +18,31 @@ _excel_source = str(_project_root / "templates" / "商品资料.xlsx")
 class MemoryItem(BaseModel):
    barcode: str
    name: str
-    spec: Optional[str] = None
+    specification: Optional[str] = None
    unit: Optional[str] = None
    price: Optional[float] = None
+    avg_price: Optional[float] = None
+    min_price: Optional[float] = None
+    max_price: Optional[float] = None
+    price_count: int = 0
    confidence: int = 0
    source: str = "ocr"
    last_used: Optional[str] = None
    use_count: int = 0


+class MemoryCreate(BaseModel):
+    barcode: str
+    name: Optional[str] = ""
+    specification: Optional[str] = None
+    unit: Optional[str] = None
+    price: Optional[float] = None
+    confidence: int = 50
+
+
 class MemoryUpdate(BaseModel):
    name: Optional[str] = None
-    spec: Optional[str] = None
+    specification: Optional[str] = None
    unit: Optional[str] = None
    price: Optional[float] = None
    confidence: Optional[int] = None
@@ -51,9 +64,13 @@ def _row_to_item(row: Dict) -> MemoryItem:
    return MemoryItem(
        barcode=row.get("barcode", ""),
        name=row.get("name", ""),
-        spec=row.get("spec"),
+        specification=row.get("specification"),
        unit=row.get("unit"),
        price=row.get("price"),
+        avg_price=row.get("avg_price"),
+        min_price=row.get("min_price"),
+        max_price=row.get("max_price"),
+        price_count=row.get("price_count", 0),
        confidence=row.get("confidence", 0),
        source=row.get("source", "ocr"),
        last_used=row.get("last_used"),
@@ -99,6 +116,25 @@ async def get_memory(
    return product


+@router.post("")
+async def create_memory(
+    body: MemoryCreate,
+    current_user: dict = Depends(get_current_user),
+):
+    db = _get_db()
+    existing = db.get_memory(body.barcode)
+    if existing:
+        raise HTTPException(409, f"条码 {body.barcode} 已存在，请使用编辑功能")
+    db.learn_from_product({
+        "barcode": body.barcode,
+        "name": body.name or "",
+        "specification": body.specification or "",
+        "unit": body.unit or "",
+        "price": body.price or 0,
+    }, source="user_confirmed")
+    return {"message": f"已创建记忆记录 {body.barcode}"}
+
+
@router.put("/{barcode}")
 async def update_memory(
    barcode: str,
@@ -1,8 +1,10 @@
 """Processing endpoints: OCR, Excel conversion, merge, and full pipeline."""

 import asyncio
+import logging
 import os
 import sys
+import threading
 import traceback
 from pathlib import Path
 from typing import Optional, List
@@ -18,6 +20,66 @@ router = APIRouter(prefix="/api/processing", tags=["processing"])

 _wrapper = ServiceWrapper(max_workers=3)

+# ── Thread-safe log capture ──
+_tlocal = threading.local()
+
+
+class TaskLogHandler(logging.Handler):
+    """Capture all log records during task execution and forward to tm.add_log()"""
+
+    def emit(self, record: logging.LogRecord):
+        ctx = getattr(_tlocal, 'ctx', None)
+        if ctx:
+            tm = ctx.get('tm')
+            task_id = ctx.get('task_id')
+            if tm and task_id:
+                msg = self.format(record)
+                if any(skip in msg for skip in ['DEBUG:', 'urllib3', 'charset_normalizer']):
+                    return
+                tm.add_log(task_id, msg)
+
+
+_log_handler = TaskLogHandler()
+_log_handler.setLevel(logging.DEBUG)
+_log_handler.setFormatter(logging.Formatter('%(message)s'))
+_root_logger = logging.getLogger()
+_configured = False
+
+
+def _setup_log_capture():
+    global _configured
+    if not _configured:
+        _root_logger.addHandler(_log_handler)
+        _configured = True
+
+
+def _start_log_capture(tm, task_id: str):
+    _setup_log_capture()
+    _root_logger.setLevel(logging.DEBUG)
+    _tlocal.ctx = {'tm': tm, 'task_id': task_id}
+
+
+def _stop_log_capture():
+    _tlocal.ctx = None
+
+
+def _add_result_file(name: str):
+    files = getattr(_tlocal, 'result_files', None)
+    if files is not None:
+        files.append(name)
+
+
+def _wrap_with_capture(tm, task_id, func):
+    """Wrap a do_work function with log capture setup/teardown."""
+    def wrapped():
+        _start_log_capture(tm, task_id)
+        _tlocal.result_files = []
+        try:
+            return func()
+        finally:
+            _stop_log_capture()
+    return wrapped
+
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 _input_dir = _project_root / "data" / "input"
 _output_dir = _project_root / "data" / "output"
@@ -74,6 +136,92 @@ def _run_background(coro):
    asyncio.ensure_future(coro)


+def _run_background_with_log(coro, tm, task_id: str):
+    """Schedule a coroutine with log capture during execution."""
+
+    async def _wrapped():
+        _start_log_capture(tm, task_id)
+        try:
+            await coro
+        finally:
+            _stop_log_capture()
+
+    asyncio.ensure_future(_wrapped())
+
+
+def _get_product_db():
+    from app.core.db.product_db import ProductDatabase
+    return ProductDatabase(
+        str(_project_root / 'data' / 'product_cache.db'),
+        str(_project_root / 'templates' / '商品资料.xlsx')
+    )
+
+
+def _learn_products_from_excel(excel_path: Path, tm, task_id, source: str = 'ocr'):
+    """从处理后的Excel文件学习商品数据到记忆库"""
+    try:
+        from app.core.utils.file_utils import smart_read_excel
+        df = smart_read_excel(str(excel_path))
+        if df is None or df.empty:
+            return
+    except Exception:
+        return
+
+    from app.core.handlers.column_mapper import ColumnMapper
+    barcode_col = ColumnMapper.find_column(list(df.columns), 'barcode')
+    if not barcode_col:
+        return
+    name_col = ColumnMapper.find_column(list(df.columns), 'name')
+    spec_col = ColumnMapper.find_column(list(df.columns), 'specification')
+    unit_col = ColumnMapper.find_column(list(df.columns), 'unit')
+    price_col = ColumnMapper.find_column(list(df.columns), 'unit_price') or ColumnMapper.find_column(list(df.columns), 'price')
+
+    db = _get_product_db()
+    barcodes = [str(r.get(barcode_col, '')).strip() for _, r in df.iterrows() if str(r.get(barcode_col, '')).strip()]
+    memory = db.load_batch(barcodes)
+
+    learned = 0
+    for _, row in df.iterrows():
+        barcode = str(row.get(barcode_col, '')).strip()
+        if not barcode or barcode == 'nan':
+            continue
+        price = 0.0
+        if price_col:
+            try:
+                p = row.get(price_col)
+                if p is not None and str(p).strip() not in ('', 'nan', 'None'):
+                    price = float(p)
+            except (ValueError, TypeError):
+                pass
+
+        product = {
+            'barcode': barcode,
+            'name': str(row.get(name_col, '')).strip() if name_col else '',
+            'specification': str(row.get(spec_col, '')).strip() if spec_col else '',
+            'unit': str(row.get(unit_col, '')).strip() if unit_col else '',
+            'price': price,
+        }
+
+        # 1. 记忆辅助补全
+        filled, fill_log = db.fill_from_memory(barcode, product, memory)
+        if fill_log:
+            tm.add_log(task_id, f"  {fill_log}")
+
+        # 2. 价格预警
+        warn = db.price_warning(barcode, price, memory)
+        if warn:
+            tm.add_log(task_id, f"  {warn}")
+
+        # 3. 学习
+        log = db.learn_from_product(filled, source=source, memory=memory, add_log=None)
+        if log:
+            tm.add_log(task_id, f"  {log}")
+            learned += 1
+
+    if learned:
+        tm.add_log(task_id, f"[记忆库] 从 {excel_path.name} 学习了 {learned} 条商品数据")
+
+
 # ---------------------------------------------------------------------------
 # Batch endpoints
 # ---------------------------------------------------------------------------
@@ -117,16 +265,23 @@ async def ocr_batch(
                    for ext in ['.xlsx', '.xls']:
                        candidate = _output_dir / f"{out_stem}{ext}"
                        if candidate.exists():
-                            upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
+                            upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
+                            _add_result_file(candidate.name)
                            break
                    tm.add_log(task.id, f"[OCR] 完成: {f.name}")
+                    # Learn products into memory from OCR output
+                    out_file = _output_dir / f"{out_stem}.xlsx"
+                    if not out_file.exists():
+                        out_file = _output_dir / f"{out_stem}.xls"
+                    if out_file.exists():
+                        _learn_products_from_excel(out_file, tm, task.id, source='ocr')
                except Exception as e:
                    tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")

-            result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
+            result_files = list(getattr(_tlocal, 'result_files', []))
            tm.set_completed(task.id, result_files=result_files, message=f"OCR完成，共处理 {total} 个文件")

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message="OCR任务已创建")
@@ -162,7 +317,7 @@ async def process_excel(
                result_path = _result_dir / result_name
                if result_path.exists():
                    tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
-                    upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                    upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
                    continue

                tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
@@ -171,15 +326,19 @@ async def process_excel(
                    svc.process_excel(str(f))
                    # Find result file
                    if result_path.exists():
-                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
+                        _add_result_file(result_name)
                    tm.add_log(task.id, f"[Excel] 完成: {f.name}")
+                    # Learn products into memory from purchase order result
+                    if result_path.exists():
+                        _learn_products_from_excel(result_path, tm, task.id, source='ocr')
                except Exception as e:
                    tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")

-            result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
+            result_files = list(getattr(_tlocal, 'result_files', []))
            tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成，共 {total} 个文件")

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message="Excel处理任务已创建")
@@ -224,7 +383,7 @@ async def merge_orders(
                tm.add_log(task.id, f"[合并] 失败: {e}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message="合并任务已创建")
@@ -271,9 +430,14 @@ async def full_pipeline(
                        for ext in ['.xlsx', '.xls']:
                            candidate = _output_dir / f"{out_stem}{ext}"
                            if candidate.exists():
-                                upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
+                                upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
                                break
                        tm.add_log(task.id, f"[OCR] 完成: {f.name}")
+                        out_file = _output_dir / f"{out_stem}.xlsx"
+                        if not out_file.exists():
+                            out_file = _output_dir / f"{out_stem}.xls"
+                        if out_file.exists():
+                            _learn_products_from_excel(out_file, tm, task.id, source='ocr')
                    except Exception as e:
                        tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")

@@ -292,7 +456,7 @@ async def full_pipeline(
                    result_path = _result_dir / result_name
                    if result_path.exists():
                        tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
-                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
                        tm.update_progress(task.id, pct, f"跳过: {f.name}")
                        continue

@@ -300,19 +464,21 @@ async def full_pipeline(
                    try:
                        order_svc.process_excel(str(f))
                        if result_path.exists():
-                            upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                            upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
                        tm.add_log(task.id, f"[Excel] 完成: {f.name}")
+                        if result_path.exists():
+                            _learn_products_from_excel(result_path, tm, task.id, source='ocr')
                    except Exception as e:
                        tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")

-                result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
+                result_files = list(getattr(_tlocal, 'result_files', []))
                tm.set_completed(task.id, result_files=result_files, message="全流程处理完成（不含合并）")
            except Exception as e:
                tb = traceback.format_exc()
                tm.add_log(task.id, f"[错误] {tb}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message="全流程任务已创建")
@@ -349,16 +515,16 @@ async def ocr_single(
                for ext in ['.xlsx', '.xls']:
                    candidate = _output_dir / f"{stem}{ext}"
                    if candidate.exists():
-                        upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
+                        upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
                        break
                tm.add_log(task.id, f"[OCR] 完成: {body.filename}")
-                result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
+                result_files = list(getattr(_tlocal, 'result_files', []))
                tm.set_completed(task.id, result_files=result_files, message=f"OCR完成: {body.filename}")
            except Exception as e:
                tm.add_log(task.id, f"[OCR] 失败: {e}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message=f"OCR任务已创建: {body.filename}")
@@ -390,13 +556,13 @@ async def excel_single(
                if (_result_dir / result_name).exists():
                    upsert_file_relation(output_excel=body.filename, result_purchase=result_name, status='done')
                tm.add_log(task.id, f"[Excel] 完成: {body.filename}")
-                result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
+                result_files = list(getattr(_tlocal, 'result_files', []))
                tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成: {body.filename}")
            except Exception as e:
                tm.add_log(task.id, f"[Excel] 失败: {e}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message=f"Excel处理任务已创建: {body.filename}")
@@ -432,13 +598,13 @@ async def pipeline_single(
                if out_xlsx.exists() or out_xls.exists():
                    out_name = out_xlsx.name if out_xlsx.exists() else out_xls.name
                    tm.add_log(task.id, f"[跳过] 已OCR过 → {out_name}")
-                    upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done')
+                    upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done'); _add_result_file(out_name)
                else:
                    ocr_svc.process_image(str(file_path))
                    for ext in ['.xlsx', '.xls']:
                        candidate = _output_dir / f"{stem}{ext}"
                        if candidate.exists():
-                            upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
+                            upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
                            break
                    tm.add_log(task.id, f"[OCR] 完成")

@@ -464,14 +630,14 @@ async def pipeline_single(
                    else:
                        tm.add_log(task.id, f"[错误] OCR未生成Excel文件")

-                result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
+                result_files = list(getattr(_tlocal, 'result_files', []))
                tm.set_completed(task.id, result_files=result_files, message=f"全流程完成: {body.filename}")
            except Exception as e:
                tb = traceback.format_exc()
                tm.add_log(task.id, f"[错误] {tb}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message=f"全流程任务已创建: {body.filename}")
@@ -511,7 +677,7 @@ async def merge_batch(
                tm.add_log(task.id, f"[合并] 失败: {e}")
                tm.set_failed(task.id, str(e))

-        await _wrapper.run_sync(do_work)
+        await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))

    _run_background(_bg())
    return TaskResponse(task_id=task.id, status="accepted", message="批量合并任务已创建")
@@ -1,5 +1,6 @@
 """Cloud sync endpoints (Gitea-based)."""

+from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path

 from fastapi import APIRouter, HTTPException, Depends, Request
@@ -23,7 +24,30 @@ def _get_sync():
    from app.core.utils.cloud_sync import GiteaSync
    from app.config.settings import ConfigManager
    cfg = ConfigManager()
-    return GiteaSync(cfg)
+    return GiteaSync.from_config(cfg)
+
+
+def _run_sync_in_thread(tm, task_id, action_name, sync_method):
+    """Run a blocking sync operation in a thread."""
+
+    def _run():
+        try:
+            tm.update_progress(task_id, 10, "正在初始化同步...")
+            sync = _get_sync()
+            if sync is None:
+                tm.set_failed(task_id, "Gitea 配置不完整，请先在系统配置中设置 base_url/owner/repo/token")
+                return
+            tm.update_progress(task_id, 30, f"正在{action_name}文件...")
+            tm.add_log(task_id, f"[{action_name}] 开始{action_name}")
+            result = sync_method(sync)
+            tm.add_log(task_id, f"[{action_name}] 完成: {result}")
+            tm.set_completed(task_id, message=f"{action_name}完成")
+        except Exception as e:
+            tm.set_failed(task_id, str(e))
+
+    pool = ThreadPoolExecutor(max_workers=1)
+    pool.submit(_run)
+    pool.shutdown(wait=False)


@router.post("/push", response_model=SyncResponse)
@@ -33,21 +57,7 @@ async def sync_push(
 ):
    tm = request.state.task_manager
    task = tm.create_task("推送到云端")
-
-    async def _run():
-        try:
-            tm.update_progress(task.id, 10, "正在初始化同步...")
-            sync = _get_sync()
-            tm.update_progress(task.id, 30, "正在推送文件...")
-            tm.add_log(task.id, "[Push] 开始推送")
-            result = sync.push()
-            tm.add_log(task.id, f"[Push] 完成: {result}")
-            tm.set_completed(task.id, message="推送完成")
-        except Exception as e:
-            tm.set_failed(task.id, str(e))
-
-    import asyncio
-    asyncio.create_task(_run())
+    _run_sync_in_thread(tm, task.id, "Push", lambda s: s.push())
    return SyncResponse(task_id=task.id, status="accepted", message="推送任务已创建")


@@ -58,21 +68,7 @@ async def sync_pull(
 ):
    tm = request.state.task_manager
    task = tm.create_task("从云端拉取")
-
-    async def _run():
-        try:
-            tm.update_progress(task.id, 10, "正在初始化同步...")
-            sync = _get_sync()
-            tm.update_progress(task.id, 30, "正在拉取文件...")
-            tm.add_log(task.id, "[Pull] 开始拉取")
-            result = sync.pull()
-            tm.add_log(task.id, f"[Pull] 完成: {result}")
-            tm.set_completed(task.id, message="拉取完成")
-        except Exception as e:
-            tm.set_failed(task.id, str(e))
-
-    import asyncio
-    asyncio.create_task(_run())
+    _run_sync_in_thread(tm, task.id, "Pull", lambda s: s.pull())
    return SyncResponse(task_id=task.id, status="accepted", message="拉取任务已创建")


@@ -83,10 +79,11 @@ async def sync_status(
    try:
        from app.config.settings import ConfigManager
        cfg = ConfigManager()
-        base_url = cfg.get("Gitea", "base_url", fallback="")
-        owner = cfg.get("Gitea", "owner", fallback="")
-        repo = cfg.get("Gitea", "repo", fallback="")
-        enabled = bool(base_url and owner and repo)
+        base_url = cfg.get("Gitea", "base_url", fallback="").strip()
+        owner = cfg.get("Gitea", "owner", fallback="").strip()
+        repo = cfg.get("Gitea", "repo", fallback="").strip()
+        token = cfg.get("Gitea", "token", fallback="").strip()
+        enabled = bool(base_url and owner and repo and token)
        repo_url = f"{base_url}/{owner}/{repo}" if enabled else ""
        return {"enabled": enabled, "repo_url": repo_url}
    except Exception:
@@ -475,7 +475,8 @@ def upsert_file_relation(input_image: str = None, output_excel: str = None,


 def query_file_relations(view: str = None, status: str = None,
-                         page: int = 1, page_size: int = 50) -> tuple[list[dict], int]:
+                         page: int = 1, page_size: int = 50,
+                         sort_by: str = None, sort_order: str = "desc") -> tuple[list[dict], int]:
    """Query file relations with optional view filter and pagination.

    view='orders': only rows with result_purchase, sorted by result_purchase
@@ -508,6 +509,13 @@ def query_file_relations(view: str = None, status: str = None,

        where = (" WHERE " + " AND ".join(clauses)) if clauses else ""

+        # Sort
+        if sort_by and sort_by in ('created_at', 'updated_at', 'input_image', 'output_excel', 'result_purchase', 'status'):
+            sort_col = sort_by
+        else:
+            sort_col = order_by.split()[0] if order_by else 'id'
+        sort_dir = 'DESC' if sort_order.lower() == 'desc' else 'ASC'
+
        # Count
        row = conn.execute(
            f"SELECT COUNT(*) as cnt FROM file_relations{where}", params
@@ -518,7 +526,7 @@ def query_file_relations(view: str = None, status: str = None,
        offset = (page - 1) * page_size
        params.extend([page_size, offset])
        rows = conn.execute(
-            f"SELECT * FROM file_relations{where} ORDER BY {order_by} LIMIT ? OFFSET ?",
+            f"SELECT * FROM file_relations{where} ORDER BY {sort_col} {sort_dir} LIMIT ? OFFSET ?",
            params,
        ).fetchall()