fix: sync/barcode/memory overhaul + detailed logs + preview + result tracking

- Sync: fix GiteaSync constructor + add push()/pull() methods
- Barcode: two-tab layout matching GUI (mapping + special rules)
- Memory: spec→specification unification, manual add, confidence/price tracking
- Processing: TaskLogHandler captures detailed logs (barcode mapping, unit conversion)
- Preview: fullscreen dialog for file preview (image/Excel) in Orders/Tables/Images
- Detail: per-file log filtering in file pages
- Tasks: result files now per-task, add copy path button
- Config: reactive edited state + save_config fix
- Dashboard: sync task isolation, log limit 10

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 19:37:10 +08:00
parent c18039f790
commit 81bafaf557
20 changed files with 1610 additions and 502 deletions
+189 -23
View File
@@ -1,8 +1,10 @@
"""Processing endpoints: OCR, Excel conversion, merge, and full pipeline."""
import asyncio
import logging
import os
import sys
import threading
import traceback
from pathlib import Path
from typing import Optional, List
@@ -18,6 +20,66 @@ router = APIRouter(prefix="/api/processing", tags=["processing"])
_wrapper = ServiceWrapper(max_workers=3)
# ── Thread-safe log capture ──
_tlocal = threading.local()
class TaskLogHandler(logging.Handler):
"""Capture all log records during task execution and forward to tm.add_log()"""
def emit(self, record: logging.LogRecord):
ctx = getattr(_tlocal, 'ctx', None)
if ctx:
tm = ctx.get('tm')
task_id = ctx.get('task_id')
if tm and task_id:
msg = self.format(record)
if any(skip in msg for skip in ['DEBUG:', 'urllib3', 'charset_normalizer']):
return
tm.add_log(task_id, msg)
_log_handler = TaskLogHandler()
_log_handler.setLevel(logging.DEBUG)
_log_handler.setFormatter(logging.Formatter('%(message)s'))
_root_logger = logging.getLogger()
_configured = False
def _setup_log_capture():
global _configured
if not _configured:
_root_logger.addHandler(_log_handler)
_configured = True
def _start_log_capture(tm, task_id: str):
_setup_log_capture()
_root_logger.setLevel(logging.DEBUG)
_tlocal.ctx = {'tm': tm, 'task_id': task_id}
def _stop_log_capture():
_tlocal.ctx = None
def _add_result_file(name: str):
files = getattr(_tlocal, 'result_files', None)
if files is not None:
files.append(name)
def _wrap_with_capture(tm, task_id, func):
"""Wrap a do_work function with log capture setup/teardown."""
def wrapped():
_start_log_capture(tm, task_id)
_tlocal.result_files = []
try:
return func()
finally:
_stop_log_capture()
return wrapped
_project_root = Path(__file__).resolve().parent.parent.parent.parent
_input_dir = _project_root / "data" / "input"
_output_dir = _project_root / "data" / "output"
@@ -74,6 +136,92 @@ def _run_background(coro):
asyncio.ensure_future(coro)
def _run_background_with_log(coro, tm, task_id: str):
"""Schedule a coroutine with log capture during execution."""
async def _wrapped():
_start_log_capture(tm, task_id)
try:
await coro
finally:
_stop_log_capture()
asyncio.ensure_future(_wrapped())
def _get_product_db():
from app.core.db.product_db import ProductDatabase
return ProductDatabase(
str(_project_root / 'data' / 'product_cache.db'),
str(_project_root / 'templates' / '商品资料.xlsx')
)
def _learn_products_from_excel(excel_path: Path, tm, task_id, source: str = 'ocr'):
"""从处理后的Excel文件学习商品数据到记忆库"""
try:
from app.core.utils.file_utils import smart_read_excel
df = smart_read_excel(str(excel_path))
if df is None or df.empty:
return
except Exception:
return
from app.core.handlers.column_mapper import ColumnMapper
barcode_col = ColumnMapper.find_column(list(df.columns), 'barcode')
if not barcode_col:
return
name_col = ColumnMapper.find_column(list(df.columns), 'name')
spec_col = ColumnMapper.find_column(list(df.columns), 'specification')
unit_col = ColumnMapper.find_column(list(df.columns), 'unit')
price_col = ColumnMapper.find_column(list(df.columns), 'unit_price') or ColumnMapper.find_column(list(df.columns), 'price')
db = _get_product_db()
barcodes = [str(r.get(barcode_col, '')).strip() for _, r in df.iterrows() if str(r.get(barcode_col, '')).strip()]
memory = db.load_batch(barcodes)
learned = 0
for _, row in df.iterrows():
barcode = str(row.get(barcode_col, '')).strip()
if not barcode or barcode == 'nan':
continue
price = 0.0
if price_col:
try:
p = row.get(price_col)
if p is not None and str(p).strip() not in ('', 'nan', 'None'):
price = float(p)
except (ValueError, TypeError):
pass
product = {
'barcode': barcode,
'name': str(row.get(name_col, '')).strip() if name_col else '',
'specification': str(row.get(spec_col, '')).strip() if spec_col else '',
'unit': str(row.get(unit_col, '')).strip() if unit_col else '',
'price': price,
}
# 1. 记忆辅助补全
filled, fill_log = db.fill_from_memory(barcode, product, memory)
if fill_log:
tm.add_log(task_id, f" {fill_log}")
# 2. 价格预警
warn = db.price_warning(barcode, price, memory)
if warn:
tm.add_log(task_id, f" {warn}")
# 3. 学习
log = db.learn_from_product(filled, source=source, memory=memory, add_log=None)
if log:
tm.add_log(task_id, f" {log}")
learned += 1
if learned:
tm.add_log(task_id, f"[记忆库] 从 {excel_path.name} 学习了 {learned} 条商品数据")
# ---------------------------------------------------------------------------
# Batch endpoints
# ---------------------------------------------------------------------------
@@ -117,16 +265,23 @@ async def ocr_batch(
for ext in ['.xlsx', '.xls']:
candidate = _output_dir / f"{out_stem}{ext}"
if candidate.exists():
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
_add_result_file(candidate.name)
break
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
# Learn products into memory from OCR output
out_file = _output_dir / f"{out_stem}.xlsx"
if not out_file.exists():
out_file = _output_dir / f"{out_stem}.xls"
if out_file.exists():
_learn_products_from_excel(out_file, tm, task.id, source='ocr')
except Exception as e:
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成,共处理 {total} 个文件")
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message="OCR任务已创建")
@@ -162,7 +317,7 @@ async def process_excel(
result_path = _result_dir / result_name
if result_path.exists():
tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
continue
tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
@@ -171,15 +326,19 @@ async def process_excel(
svc.process_excel(str(f))
# Find result file
if result_path.exists():
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
_add_result_file(result_name)
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
# Learn products into memory from purchase order result
if result_path.exists():
_learn_products_from_excel(result_path, tm, task.id, source='ocr')
except Exception as e:
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成,共 {total} 个文件")
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message="Excel处理任务已创建")
@@ -224,7 +383,7 @@ async def merge_orders(
tm.add_log(task.id, f"[合并] 失败: {e}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message="合并任务已创建")
@@ -271,9 +430,14 @@ async def full_pipeline(
for ext in ['.xlsx', '.xls']:
candidate = _output_dir / f"{out_stem}{ext}"
if candidate.exists():
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
break
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
out_file = _output_dir / f"{out_stem}.xlsx"
if not out_file.exists():
out_file = _output_dir / f"{out_stem}.xls"
if out_file.exists():
_learn_products_from_excel(out_file, tm, task.id, source='ocr')
except Exception as e:
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
@@ -292,7 +456,7 @@ async def full_pipeline(
result_path = _result_dir / result_name
if result_path.exists():
tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
tm.update_progress(task.id, pct, f"跳过: {f.name}")
continue
@@ -300,19 +464,21 @@ async def full_pipeline(
try:
order_svc.process_excel(str(f))
if result_path.exists():
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
if result_path.exists():
_learn_products_from_excel(result_path, tm, task.id, source='ocr')
except Exception as e:
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message="全流程处理完成(不含合并)")
except Exception as e:
tb = traceback.format_exc()
tm.add_log(task.id, f"[错误] {tb}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message="全流程任务已创建")
@@ -349,16 +515,16 @@ async def ocr_single(
for ext in ['.xlsx', '.xls']:
candidate = _output_dir / f"{stem}{ext}"
if candidate.exists():
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
break
tm.add_log(task.id, f"[OCR] 完成: {body.filename}")
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成: {body.filename}")
except Exception as e:
tm.add_log(task.id, f"[OCR] 失败: {e}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message=f"OCR任务已创建: {body.filename}")
@@ -390,13 +556,13 @@ async def excel_single(
if (_result_dir / result_name).exists():
upsert_file_relation(output_excel=body.filename, result_purchase=result_name, status='done')
tm.add_log(task.id, f"[Excel] 完成: {body.filename}")
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成: {body.filename}")
except Exception as e:
tm.add_log(task.id, f"[Excel] 失败: {e}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message=f"Excel处理任务已创建: {body.filename}")
@@ -432,13 +598,13 @@ async def pipeline_single(
if out_xlsx.exists() or out_xls.exists():
out_name = out_xlsx.name if out_xlsx.exists() else out_xls.name
tm.add_log(task.id, f"[跳过] 已OCR过 → {out_name}")
upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done')
upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done'); _add_result_file(out_name)
else:
ocr_svc.process_image(str(file_path))
for ext in ['.xlsx', '.xls']:
candidate = _output_dir / f"{stem}{ext}"
if candidate.exists():
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
break
tm.add_log(task.id, f"[OCR] 完成")
@@ -464,14 +630,14 @@ async def pipeline_single(
else:
tm.add_log(task.id, f"[错误] OCR未生成Excel文件")
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
result_files = list(getattr(_tlocal, 'result_files', []))
tm.set_completed(task.id, result_files=result_files, message=f"全流程完成: {body.filename}")
except Exception as e:
tb = traceback.format_exc()
tm.add_log(task.id, f"[错误] {tb}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message=f"全流程任务已创建: {body.filename}")
@@ -511,7 +677,7 @@ async def merge_batch(
tm.add_log(task.id, f"[合并] 失败: {e}")
tm.set_failed(task.id, str(e))
await _wrapper.run_sync(do_work)
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
_run_background(_bg())
return TaskResponse(task_id=task.id, status="accepted", message="批量合并任务已创建")