fix: sync/barcode/memory overhaul + detailed logs + preview + result tracking
- Sync: fix GiteaSync constructor + add push()/pull() methods - Barcode: two-tab layout matching GUI (mapping + special rules) - Memory: spec→specification unification, manual add, confidence/price tracking - Processing: TaskLogHandler captures detailed logs (barcode mapping, unit conversion) - Preview: fullscreen dialog for file preview (image/Excel) in Orders/Tables/Images - Detail: per-file log filtering in file pages - Tasks: result files now per-task, add copy path button - Config: reactive edited state + save_config fix - Dashboard: sync task isolation, log limit 10 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
"""Processing endpoints: OCR, Excel conversion, merge, and full pipeline."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
@@ -18,6 +20,66 @@ router = APIRouter(prefix="/api/processing", tags=["processing"])
|
||||
|
||||
_wrapper = ServiceWrapper(max_workers=3)
|
||||
|
||||
# ── Thread-safe log capture ──
|
||||
_tlocal = threading.local()
|
||||
|
||||
|
||||
class TaskLogHandler(logging.Handler):
|
||||
"""Capture all log records during task execution and forward to tm.add_log()"""
|
||||
|
||||
def emit(self, record: logging.LogRecord):
|
||||
ctx = getattr(_tlocal, 'ctx', None)
|
||||
if ctx:
|
||||
tm = ctx.get('tm')
|
||||
task_id = ctx.get('task_id')
|
||||
if tm and task_id:
|
||||
msg = self.format(record)
|
||||
if any(skip in msg for skip in ['DEBUG:', 'urllib3', 'charset_normalizer']):
|
||||
return
|
||||
tm.add_log(task_id, msg)
|
||||
|
||||
|
||||
_log_handler = TaskLogHandler()
|
||||
_log_handler.setLevel(logging.DEBUG)
|
||||
_log_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
_root_logger = logging.getLogger()
|
||||
_configured = False
|
||||
|
||||
|
||||
def _setup_log_capture():
|
||||
global _configured
|
||||
if not _configured:
|
||||
_root_logger.addHandler(_log_handler)
|
||||
_configured = True
|
||||
|
||||
|
||||
def _start_log_capture(tm, task_id: str):
|
||||
_setup_log_capture()
|
||||
_root_logger.setLevel(logging.DEBUG)
|
||||
_tlocal.ctx = {'tm': tm, 'task_id': task_id}
|
||||
|
||||
|
||||
def _stop_log_capture():
|
||||
_tlocal.ctx = None
|
||||
|
||||
|
||||
def _add_result_file(name: str):
|
||||
files = getattr(_tlocal, 'result_files', None)
|
||||
if files is not None:
|
||||
files.append(name)
|
||||
|
||||
|
||||
def _wrap_with_capture(tm, task_id, func):
|
||||
"""Wrap a do_work function with log capture setup/teardown."""
|
||||
def wrapped():
|
||||
_start_log_capture(tm, task_id)
|
||||
_tlocal.result_files = []
|
||||
try:
|
||||
return func()
|
||||
finally:
|
||||
_stop_log_capture()
|
||||
return wrapped
|
||||
|
||||
_project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
_input_dir = _project_root / "data" / "input"
|
||||
_output_dir = _project_root / "data" / "output"
|
||||
@@ -74,6 +136,92 @@ def _run_background(coro):
|
||||
asyncio.ensure_future(coro)
|
||||
|
||||
|
||||
def _run_background_with_log(coro, tm, task_id: str):
|
||||
"""Schedule a coroutine with log capture during execution."""
|
||||
|
||||
async def _wrapped():
|
||||
_start_log_capture(tm, task_id)
|
||||
try:
|
||||
await coro
|
||||
finally:
|
||||
_stop_log_capture()
|
||||
|
||||
asyncio.ensure_future(_wrapped())
|
||||
|
||||
|
||||
def _get_product_db():
|
||||
from app.core.db.product_db import ProductDatabase
|
||||
return ProductDatabase(
|
||||
str(_project_root / 'data' / 'product_cache.db'),
|
||||
str(_project_root / 'templates' / '商品资料.xlsx')
|
||||
)
|
||||
|
||||
|
||||
def _learn_products_from_excel(excel_path: Path, tm, task_id, source: str = 'ocr'):
|
||||
"""从处理后的Excel文件学习商品数据到记忆库"""
|
||||
try:
|
||||
from app.core.utils.file_utils import smart_read_excel
|
||||
df = smart_read_excel(str(excel_path))
|
||||
if df is None or df.empty:
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
|
||||
from app.core.handlers.column_mapper import ColumnMapper
|
||||
barcode_col = ColumnMapper.find_column(list(df.columns), 'barcode')
|
||||
if not barcode_col:
|
||||
return
|
||||
name_col = ColumnMapper.find_column(list(df.columns), 'name')
|
||||
spec_col = ColumnMapper.find_column(list(df.columns), 'specification')
|
||||
unit_col = ColumnMapper.find_column(list(df.columns), 'unit')
|
||||
price_col = ColumnMapper.find_column(list(df.columns), 'unit_price') or ColumnMapper.find_column(list(df.columns), 'price')
|
||||
|
||||
db = _get_product_db()
|
||||
barcodes = [str(r.get(barcode_col, '')).strip() for _, r in df.iterrows() if str(r.get(barcode_col, '')).strip()]
|
||||
memory = db.load_batch(barcodes)
|
||||
|
||||
learned = 0
|
||||
for _, row in df.iterrows():
|
||||
barcode = str(row.get(barcode_col, '')).strip()
|
||||
if not barcode or barcode == 'nan':
|
||||
continue
|
||||
price = 0.0
|
||||
if price_col:
|
||||
try:
|
||||
p = row.get(price_col)
|
||||
if p is not None and str(p).strip() not in ('', 'nan', 'None'):
|
||||
price = float(p)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
product = {
|
||||
'barcode': barcode,
|
||||
'name': str(row.get(name_col, '')).strip() if name_col else '',
|
||||
'specification': str(row.get(spec_col, '')).strip() if spec_col else '',
|
||||
'unit': str(row.get(unit_col, '')).strip() if unit_col else '',
|
||||
'price': price,
|
||||
}
|
||||
|
||||
# 1. 记忆辅助补全
|
||||
filled, fill_log = db.fill_from_memory(barcode, product, memory)
|
||||
if fill_log:
|
||||
tm.add_log(task_id, f" {fill_log}")
|
||||
|
||||
# 2. 价格预警
|
||||
warn = db.price_warning(barcode, price, memory)
|
||||
if warn:
|
||||
tm.add_log(task_id, f" {warn}")
|
||||
|
||||
# 3. 学习
|
||||
log = db.learn_from_product(filled, source=source, memory=memory, add_log=None)
|
||||
if log:
|
||||
tm.add_log(task_id, f" {log}")
|
||||
learned += 1
|
||||
|
||||
if learned:
|
||||
tm.add_log(task_id, f"[记忆库] 从 {excel_path.name} 学习了 {learned} 条商品数据")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -117,16 +265,23 @@ async def ocr_batch(
|
||||
for ext in ['.xlsx', '.xls']:
|
||||
candidate = _output_dir / f"{out_stem}{ext}"
|
||||
if candidate.exists():
|
||||
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
|
||||
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
|
||||
_add_result_file(candidate.name)
|
||||
break
|
||||
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
|
||||
# Learn products into memory from OCR output
|
||||
out_file = _output_dir / f"{out_stem}.xlsx"
|
||||
if not out_file.exists():
|
||||
out_file = _output_dir / f"{out_stem}.xls"
|
||||
if out_file.exists():
|
||||
_learn_products_from_excel(out_file, tm, task.id, source='ocr')
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
|
||||
|
||||
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成,共处理 {total} 个文件")
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message="OCR任务已创建")
|
||||
@@ -162,7 +317,7 @@ async def process_excel(
|
||||
result_path = _result_dir / result_name
|
||||
if result_path.exists():
|
||||
tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
|
||||
continue
|
||||
|
||||
tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
|
||||
@@ -171,15 +326,19 @@ async def process_excel(
|
||||
svc.process_excel(str(f))
|
||||
# Find result file
|
||||
if result_path.exists():
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
|
||||
_add_result_file(result_name)
|
||||
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
|
||||
# Learn products into memory from purchase order result
|
||||
if result_path.exists():
|
||||
_learn_products_from_excel(result_path, tm, task.id, source='ocr')
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
|
||||
|
||||
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成,共 {total} 个文件")
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message="Excel处理任务已创建")
|
||||
@@ -224,7 +383,7 @@ async def merge_orders(
|
||||
tm.add_log(task.id, f"[合并] 失败: {e}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message="合并任务已创建")
|
||||
@@ -271,9 +430,14 @@ async def full_pipeline(
|
||||
for ext in ['.xlsx', '.xls']:
|
||||
candidate = _output_dir / f"{out_stem}{ext}"
|
||||
if candidate.exists():
|
||||
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done')
|
||||
upsert_file_relation(input_image=f.name, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
|
||||
break
|
||||
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
|
||||
out_file = _output_dir / f"{out_stem}.xlsx"
|
||||
if not out_file.exists():
|
||||
out_file = _output_dir / f"{out_stem}.xls"
|
||||
if out_file.exists():
|
||||
_learn_products_from_excel(out_file, tm, task.id, source='ocr')
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
|
||||
|
||||
@@ -292,7 +456,7 @@ async def full_pipeline(
|
||||
result_path = _result_dir / result_name
|
||||
if result_path.exists():
|
||||
tm.add_log(task.id, f"[跳过] {f.name} 已处理过 → {result_name}")
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
|
||||
tm.update_progress(task.id, pct, f"跳过: {f.name}")
|
||||
continue
|
||||
|
||||
@@ -300,19 +464,21 @@ async def full_pipeline(
|
||||
try:
|
||||
order_svc.process_excel(str(f))
|
||||
if result_path.exists():
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done')
|
||||
upsert_file_relation(output_excel=f.name, result_purchase=result_name, status='done'); _add_result_file(result_name)
|
||||
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
|
||||
if result_path.exists():
|
||||
_learn_products_from_excel(result_path, tm, task.id, source='ocr')
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
|
||||
|
||||
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message="全流程处理完成(不含合并)")
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
tm.add_log(task.id, f"[错误] {tb}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message="全流程任务已创建")
|
||||
@@ -349,16 +515,16 @@ async def ocr_single(
|
||||
for ext in ['.xlsx', '.xls']:
|
||||
candidate = _output_dir / f"{stem}{ext}"
|
||||
if candidate.exists():
|
||||
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
|
||||
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
|
||||
break
|
||||
tm.add_log(task.id, f"[OCR] 完成: {body.filename}")
|
||||
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成: {body.filename}")
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[OCR] 失败: {e}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message=f"OCR任务已创建: {body.filename}")
|
||||
@@ -390,13 +556,13 @@ async def excel_single(
|
||||
if (_result_dir / result_name).exists():
|
||||
upsert_file_relation(output_excel=body.filename, result_purchase=result_name, status='done')
|
||||
tm.add_log(task.id, f"[Excel] 完成: {body.filename}")
|
||||
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成: {body.filename}")
|
||||
except Exception as e:
|
||||
tm.add_log(task.id, f"[Excel] 失败: {e}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message=f"Excel处理任务已创建: {body.filename}")
|
||||
@@ -432,13 +598,13 @@ async def pipeline_single(
|
||||
if out_xlsx.exists() or out_xls.exists():
|
||||
out_name = out_xlsx.name if out_xlsx.exists() else out_xls.name
|
||||
tm.add_log(task.id, f"[跳过] 已OCR过 → {out_name}")
|
||||
upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done')
|
||||
upsert_file_relation(input_image=body.filename, output_excel=out_name, status='ocr_done'); _add_result_file(out_name)
|
||||
else:
|
||||
ocr_svc.process_image(str(file_path))
|
||||
for ext in ['.xlsx', '.xls']:
|
||||
candidate = _output_dir / f"{stem}{ext}"
|
||||
if candidate.exists():
|
||||
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done')
|
||||
upsert_file_relation(input_image=body.filename, output_excel=candidate.name, status='ocr_done'); _add_result_file(candidate.name)
|
||||
break
|
||||
tm.add_log(task.id, f"[OCR] 完成")
|
||||
|
||||
@@ -464,14 +630,14 @@ async def pipeline_single(
|
||||
else:
|
||||
tm.add_log(task.id, f"[错误] OCR未生成Excel文件")
|
||||
|
||||
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
||||
result_files = list(getattr(_tlocal, 'result_files', []))
|
||||
tm.set_completed(task.id, result_files=result_files, message=f"全流程完成: {body.filename}")
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
tm.add_log(task.id, f"[错误] {tb}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message=f"全流程任务已创建: {body.filename}")
|
||||
@@ -511,7 +677,7 @@ async def merge_batch(
|
||||
tm.add_log(task.id, f"[合并] 失败: {e}")
|
||||
tm.set_failed(task.id, str(e))
|
||||
|
||||
await _wrapper.run_sync(do_work)
|
||||
await _wrapper.run_sync(_wrap_with_capture(tm, task.id, do_work))
|
||||
|
||||
_run_background(_bg())
|
||||
return TaskResponse(task_id=task.id, status="accepted", message="批量合并任务已创建")
|
||||
|
||||
Reference in New Issue
Block a user