Refactor processing logic and enhance error handling

- Cleaned up code in processing.py by removing inline semicolons and improving readability. - Updated upsert_file_relation calls to ensure consistent handling of file relations. - Enhanced query_file_relations in db_schema.py to support filtering by file existence. - Improved API error handling in index.ts with user-friendly messages for 401 and 403 errors. - Added online/offline status tracking in Layout.vue. - Implemented debounced search functionality across multiple views to optimize performance. - Introduced loading skeletons in Dashboard.vue for better user experience during data fetching. - Enhanced file preview cleanup logic in Images.vue, Orders.vue, and Tables.vue to prevent memory leaks. - Updated global styles to include new loading and notification animations.
2026-05-12 18:37:23 +08:00
parent 81bafaf557
commit e441ac82a8
20 changed files with 455 additions and 76 deletions
@@ -53,6 +53,7 @@ class MemoryListResponse(BaseModel):
    total: int
    page: int
    page_size: int
+    stats: Optional[Dict] = None


 def _get_db():
@@ -96,11 +97,17 @@ async def list_memory(
    start = (page - 1) * page_size
    page_items = results[start:start + page_size]

+    # Compute confidence stats from all results (not just current page)
+    high = sum(1 for r in results if r.get("confidence", 0) > 50)
+    medium = sum(1 for r in results if 10 <= r.get("confidence", 0) <= 50)
+    low = sum(1 for r in results if r.get("confidence", 0) < 10)
+
    return MemoryListResponse(
        items=[_row_to_item(r) for r in page_items],
        total=total,
        page=page,
        page_size=page_size,
+        stats={"high": high, "medium": medium, "low": low, "total": total},
    )


@@ -265,7 +265,7 @@ async def ocr_batch(
                    for ext in ['.xlsx', '.xls']:
                        candidate = _output_dir / f"{out_stem}{ext}"
                        if candidate.exists():
-                            upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
+                            upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
                            _add_result_file(candidate.name)
                            break
                    tm.add_log(task.id, f"[OCR] 完成: {f.name}")
@@ -317,7 +317,8 @@ async def process_excel(
                result_path = _result_dir / result_name
                if result_path.exists():
                    tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
-                    upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
+                    upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                    _add_result_file(result_name)
                    continue

                tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
@@ -326,7 +327,7 @@ async def process_excel(
                    svc.process_excel(str(f))
                    # Find result file
                    if result_path.exists():
-                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
+                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
                        _add_result_file(result_name)
                    tm.add_log(task.id, f"[Excel] 完成: {f.name}")
                    # Learn products into memory from purchase order result
@@ -430,7 +431,8 @@ async def full_pipeline(
                        for ext in ['.xlsx', '.xls']:
                            candidate = _output_dir / f"{out_stem}{ext}"
                            if candidate.exists():
-                                upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
+                                upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
+                                _add_result_file(candidate.name)
                                break
                        tm.add_log(task.id, f"[OCR] 完成: {f.name}")
                        out_file = _output_dir / f"{out_stem}.xlsx"
@@ -456,7 +458,8 @@ async def full_pipeline(
                    result_path = _result_dir / result_name
                    if result_path.exists():
                        tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
-                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
+                        upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                        _add_result_file(result_name)
                        tm.update_progress(task.id, pct, f"跳过: {f.name}")
                        continue

@@ -464,7 +467,8 @@ async def full_pipeline(
                    try:
                        order_svc.process_excel(str(f))
                        if result_path.exists():
-                            upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
+                            upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
+                        _add_result_file(result_name)
                        tm.add_log(task.id, f"[Excel] 完成: {f.name}")
                        if result_path.exists():
                            _learn_products_from_excel(result_path, tm, task.id, source='ocr')
@@ -515,7 +519,8 @@ async def ocr_single(
                for ext in ['.xlsx', '.xls']:
                    candidate = _output_dir / f"{stem}{ext}"
                    if candidate.exists():
-                        upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
+                        upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
+                        _add_result_file(candidate.name)
                        break
                tm.add_log(task.id, f"[OCR] 完成: {body.filename}")
                result_files = list(getattr(_tlocal, 'result_files', []))
@@ -598,13 +603,15 @@ async def pipeline_single(
                if out_xlsx.exists() or out_xls.exists():
                    out_name = out_xlsx.name if out_xlsx.exists() else out_xls.name
                    tm.add_log(task.id, f"[跳过] 已OCR过 → {out_name}")
-                    upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done'); _add_result_file(out_name)
+                    upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done')
+                    _add_result_file(out_name)
                else:
                    ocr_svc.process_image(str(file_path))
                    for ext in ['.xlsx', '.xls']:
                        candidate = _output_dir / f"{stem}{ext}"
                        if candidate.exists():
-                            upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
+                            upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
+                            _add_result_file(candidate.name)
                            break
                    tm.add_log(task.id, f"[OCR] 完成")

@@ -476,16 +476,21 @@ def upsert_file_relation(input_image: str = None, output_excel: str = None,

 def query_file_relations(view: str = None, status: str = None,
                         page: int = 1, page_size: int = 50,
-                         sort_by: str = None, sort_order: str = "desc") -> tuple[list[dict], int]:
+                         sort_by: str = None, sort_order: str = "desc",
+                         exists_only: bool = True) -> tuple[list[dict], int]:
    """Query file relations with optional view filter and pagination.

    view='orders': only rows with result_purchase, sorted by result_purchase
    view='tables': only rows with output_excel, sorted by output_excel
    view='images': only rows with input_image, sorted by input_image
    view=None: all rows
+    exists_only=True: for a given view, only return rows where the primary file
+                      still exists on disk (input_image for images,
+                      output_excel for tables, result_purchase for orders)

    Returns (items, total).
    """
+    project_root = Path(__file__).resolve().parent.parent.parent.parent
    conn = sqlite3.connect(_db_path)
    conn.row_factory = sqlite3.Row
    try:
@@ -516,22 +521,13 @@ def query_file_relations(view: str = None, status: str = None,
            sort_col = order_by.split()[0] if order_by else 'id'
        sort_dir = 'DESC' if sort_order.lower() == 'desc' else 'ASC'

-        # Count
-        row = conn.execute(
-            f"SELECT COUNT(*) as cnt FROM file_relations{where}", params
-        ).fetchone()
-        total = row[0] if row else 0
-
-        # Page
-        offset = (page - 1) * page_size
-        params.extend([page_size, offset])
+        # Fetch all matching rows (existence filter happens in Python)
        rows = conn.execute(
-            f"SELECT * FROM file_relations{where} ORDER BY {sort_col} {sort_dir} LIMIT ? OFFSET ?",
+            f"SELECT * FROM file_relations{where} ORDER BY {sort_col} {sort_dir}",
            params,
        ).fetchall()

        items = []
-        project_root = Path(__file__).resolve().parent.parent.parent.parent
        for r in rows:
            d = dict(r)
            # Check file existence
@@ -547,8 +543,24 @@ def query_file_relations(view: str = None, status: str = None,
                d['result_exists'] = (project_root / 'data' / 'result' / d['result_purchase']).exists()
            else:
                d['result_exists'] = False
+
+            # Filter: when exists_only is True, only keep rows whose primary file exists
+            if exists_only:
+                if view == 'images' and not d['input_exists']:
+                    continue
+                if view == 'tables' and not d['output_exists']:
+                    continue
+                if view == 'orders' and not d['result_exists']:
+                    continue
+
            items.append(d)

+        total = len(items)
+
+        # Page (Python-side after existence filtering)
+        start = (page - 1) * page_size
+        items = items[start:start + page_size]
+
        return items, total
    finally:
        conn.close()