feat: shadcn主题 + 文件关系追踪 + 处理流程修复

前端:
- 全站应用 shadcn/ui 主题 (zinc灰调, Inter字体, 1px细边框, 无硬阴影)
- 重写 global.css / Dashboard.vue / Login.vue / Layout.vue 样式
- 新增文件处理子页面: 采购单(Orders), 表格处理(Tables), 图片处理(Images)
- 侧边栏使用 el-sub-menu 组织文件处理导航

后端:
- 新增 file_relations 表追踪 input→output→result 链路
- 新增 /files/relations, /files/stats/detailed 等关系查询API
- 新增 ocr-single, excel-single, pipeline-single, merge-batch 端点
- 处理流程增加跳过逻辑 (已处理文件自动跳过)
- 全流程不再自动合并, 合并仅在采购单页面手动触发

Bug修复:
- TaskManager: asyncio.create_task 在线程池中无事件循环 → 改用 _schedule() 调度
- PurchaseOrderMerger 缺少 config 参数 → 传入 ConfigManager()
- FastAPI regex= 弃用 → 改为 pattern=
- merger.process() 接收 Path 对象 → 转为字符串
@
This commit is contained in:
2026-05-05 14:16:30 +08:00
parent dedc3b4183
commit 0721ed099c
13 changed files with 2341 additions and 602 deletions
+307
View File
@@ -4,6 +4,7 @@ Tables:
- http_logs: HTTP request/response logging
- task_history: Background task tracking
- file_metadata: File operation records
- file_relations: Input→Output→Result file chain tracking
All functions are synchronous; the async db_pool.DBPool wraps them via run_in_executor.
"""
@@ -69,6 +70,19 @@ def init_db():
task_id TEXT
);
CREATE INDEX IF NOT EXISTS idx_file_metadata_timestamp ON file_metadata(timestamp);
CREATE TABLE IF NOT EXISTS file_relations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
input_image TEXT,
output_excel TEXT,
result_purchase TEXT,
status TEXT DEFAULT 'pending',
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_file_relations_input ON file_relations(input_image);
CREATE INDEX IF NOT EXISTS idx_file_relations_output ON file_relations(output_excel);
CREATE INDEX IF NOT EXISTS idx_file_relations_result ON file_relations(result_purchase);
""")
conn.commit()
finally:
@@ -83,6 +97,7 @@ def cleanup_old_records():
conn.execute("DELETE FROM http_logs WHERE timestamp < ?", (cutoff,))
conn.execute("DELETE FROM task_history WHERE created_at < ?", (cutoff,))
conn.execute("DELETE FROM file_metadata WHERE timestamp < ?", (cutoff,))
conn.execute("DELETE FROM file_relations WHERE updated_at < ?", (cutoff,))
conn.commit()
finally:
conn.close()
@@ -395,6 +410,298 @@ def query_file_stats() -> list[dict]:
return stats
# ---------------------------------------------------------------------------
# File relations — CRUD
# ---------------------------------------------------------------------------
def upsert_file_relation(input_image: str = None, output_excel: str = None,
result_purchase: str = None, status: str = 'pending'):
"""Insert or update a file relation.
Match strategy:
- If input_image provided, try to find existing row by input_image
- Else if output_excel provided, try to find by output_excel
- Otherwise insert new row.
"""
now = datetime.now().isoformat()
conn = sqlite3.connect(_db_path)
conn.row_factory = sqlite3.Row
try:
existing = None
if input_image:
existing = conn.execute(
"SELECT * FROM file_relations WHERE input_image = ?", (input_image,)
).fetchone()
if not existing and output_excel:
existing = conn.execute(
"SELECT * FROM file_relations WHERE output_excel = ?", (output_excel,)
).fetchone()
if not existing and result_purchase:
existing = conn.execute(
"SELECT * FROM file_relations WHERE result_purchase = ?", (result_purchase,)
).fetchone()
if existing:
updates = []
params = []
if input_image and not existing['input_image']:
updates.append("input_image = ?")
params.append(input_image)
if output_excel and not existing['output_excel']:
updates.append("output_excel = ?")
params.append(output_excel)
if result_purchase and not existing['result_purchase']:
updates.append("result_purchase = ?")
params.append(result_purchase)
if status:
updates.append("status = ?")
params.append(status)
updates.append("updated_at = ?")
params.append(now)
params.append(existing['id'])
conn.execute(
f"UPDATE file_relations SET {', '.join(updates)} WHERE id = ?",
params,
)
else:
conn.execute(
"INSERT INTO file_relations (input_image, output_excel, result_purchase, status, created_at, updated_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(input_image, output_excel, result_purchase, status, now, now),
)
conn.commit()
finally:
conn.close()
def query_file_relations(view: str = None, status: str = None,
page: int = 1, page_size: int = 50) -> tuple[list[dict], int]:
"""Query file relations with optional view filter and pagination.
view='orders': only rows with result_purchase, sorted by result_purchase
view='tables': only rows with output_excel, sorted by output_excel
view='images': only rows with input_image, sorted by input_image
view=None: all rows
Returns (items, total).
"""
conn = sqlite3.connect(_db_path)
conn.row_factory = sqlite3.Row
try:
clauses = []
params = []
if view == 'orders':
clauses.append("result_purchase IS NOT NULL")
order_by = "result_purchase DESC"
elif view == 'tables':
clauses.append("output_excel IS NOT NULL")
order_by = "output_excel DESC"
elif view == 'images':
clauses.append("input_image IS NOT NULL")
order_by = "input_image DESC"
else:
order_by = "id DESC"
if status:
clauses.append("status = ?")
params.append(status)
where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
# Count
row = conn.execute(
f"SELECT COUNT(*) as cnt FROM file_relations{where}", params
).fetchone()
total = row[0] if row else 0
# Page
offset = (page - 1) * page_size
params.extend([page_size, offset])
rows = conn.execute(
f"SELECT * FROM file_relations{where} ORDER BY {order_by} LIMIT ? OFFSET ?",
params,
).fetchall()
items = []
project_root = Path(__file__).resolve().parent.parent.parent.parent
for r in rows:
d = dict(r)
# Check file existence
if d.get('input_image'):
d['input_exists'] = (project_root / 'data' / 'input' / d['input_image']).exists()
else:
d['input_exists'] = False
if d.get('output_excel'):
d['output_exists'] = (project_root / 'data' / 'output' / d['output_excel']).exists()
else:
d['output_exists'] = False
if d.get('result_purchase'):
d['result_exists'] = (project_root / 'data' / 'result' / d['result_purchase']).exists()
else:
d['result_exists'] = False
items.append(d)
return items, total
finally:
conn.close()
def delete_file_relations(ids: list[int]):
"""Delete file relation records by IDs."""
if not ids:
return
conn = sqlite3.connect(_db_path)
try:
placeholders = ','.join('?' * len(ids))
conn.execute(f"DELETE FROM file_relations WHERE id IN ({placeholders})", ids)
conn.commit()
finally:
conn.close()
def sync_file_relations():
"""Scan input/output/result directories and rebuild file_relations table.
Matches files by stem:
- input: {stem}.jpg/.png/.bmp
- output: {stem}.xlsx or {stem}.xls
- result: 采购单_{stem}.xls
"""
project_root = Path(__file__).resolve().parent.parent.parent.parent
input_dir = project_root / 'data' / 'input'
output_dir = project_root / 'data' / 'output'
result_dir = project_root / 'data' / 'result'
image_exts = {'.jpg', '.jpeg', '.png', '.bmp'}
excel_exts = {'.xls', '.xlsx'}
# Collect files by stem
input_files = {} # stem -> filename
if input_dir.exists():
for f in input_dir.iterdir():
if f.is_file() and f.suffix.lower() in image_exts:
input_files[f.stem] = f.name
output_files = {}
if output_dir.exists():
for f in output_dir.iterdir():
if f.is_file() and f.suffix.lower() in excel_exts:
output_files[f.stem] = f.name
result_files = {}
if result_dir.exists():
for f in result_dir.iterdir():
if f.is_file() and f.suffix.lower() in excel_exts:
name = f.name
# Strip 采购单_ prefix for matching
if name.startswith('采购单_'):
stem = name[len('采购单_'):-len(f.suffix)]
elif name.startswith('合并采购单_'):
continue # Skip merged files
else:
stem = f.stem
result_files[stem] = name
# Build relations
all_stems = set(input_files.keys()) | set(output_files.keys()) | set(result_files.keys())
now = datetime.now().isoformat()
conn = sqlite3.connect(_db_path)
try:
# Clear existing and rebuild
conn.execute("DELETE FROM file_relations")
for stem in sorted(all_stems):
inp = input_files.get(stem)
out = output_files.get(stem)
res = result_files.get(stem)
if res:
status = 'done'
elif out:
status = 'ocr_done'
else:
status = 'pending'
conn.execute(
"INSERT INTO file_relations (input_image, output_excel, result_purchase, status, created_at, updated_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(inp, out, res, status, now, now),
)
conn.commit()
finally:
conn.close()
def query_file_relations_stats() -> dict:
"""Get detailed file statistics for Dashboard.
Returns dict with:
- input_images: count of image files in input/
- output_excel: count of excel files in output/
- unprocessed_images: images without corresponding output
- unprocessed_excel: excel without corresponding result
- completed_results: purchase order files in result/
- total_processed: relations with status done/merged
"""
project_root = Path(__file__).resolve().parent.parent.parent.parent
input_dir = project_root / 'data' / 'input'
output_dir = project_root / 'data' / 'output'
result_dir = project_root / 'data' / 'result'
image_exts = {'.jpg', '.jpeg', '.png', '.bmp'}
excel_exts = {'.xls', '.xlsx'}
# Count files
input_images = 0
input_stems = set()
if input_dir.exists():
for f in input_dir.iterdir():
if f.is_file() and f.suffix.lower() in image_exts:
input_images += 1
input_stems.add(f.stem)
output_excel = 0
output_stems = set()
if output_dir.exists():
for f in output_dir.iterdir():
if f.is_file() and f.suffix.lower() in excel_exts:
output_excel += 1
output_stems.add(f.stem)
completed_results = 0
result_stems = set()
if result_dir.exists():
for f in result_dir.iterdir():
if f.is_file() and f.suffix.lower() in excel_exts:
if f.name.startswith('采购单_'):
completed_results += 1
stem = f.name[len('采购单_'):-len(f.suffix)]
result_stems.add(stem)
unprocessed_images = len(input_stems - output_stems)
unprocessed_excel = len(output_stems - result_stems)
# Count from relations table
conn = sqlite3.connect(_db_path)
try:
row = conn.execute(
"SELECT COUNT(*) FROM file_relations WHERE status IN ('done', 'merged')"
).fetchone()
total_processed = row[0] if row else 0
finally:
conn.close()
return {
'input_images': input_images,
'output_excel': output_excel,
'unprocessed_images': unprocessed_images,
'unprocessed_excel': unprocessed_excel,
'completed_results': completed_results,
'total_processed': total_processed,
}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------