@
feat: shadcn主题 + 文件关系追踪 + 处理流程修复 前端: - 全站应用 shadcn/ui 主题 (zinc灰调, Inter字体, 1px细边框, 无硬阴影) - 重写 global.css / Dashboard.vue / Login.vue / Layout.vue 样式 - 新增文件处理子页面: 采购单(Orders), 表格处理(Tables), 图片处理(Images) - 侧边栏使用 el-sub-menu 组织文件处理导航 后端: - 新增 file_relations 表追踪 input→output→result 链路 - 新增 /files/relations, /files/stats/detailed 等关系查询API - 新增 ocr-single, excel-single, pipeline-single, merge-batch 端点 - 处理流程增加跳过逻辑 (已处理文件自动跳过) - 全流程不再自动合并, 合并仅在采购单页面手动触发 Bug修复: - TaskManager: asyncio.create_task 在线程池中无事件循环 → 改用 _schedule() 调度 - PurchaseOrderMerger 缺少 config 参数 → 传入 ConfigManager() - FastAPI regex= 弃用 → 改为 pattern= - merger.process() 接收 Path 对象 → 转为字符串 @
This commit is contained in:
@@ -4,6 +4,7 @@ Tables:
|
||||
- http_logs: HTTP request/response logging
|
||||
- task_history: Background task tracking
|
||||
- file_metadata: File operation records
|
||||
- file_relations: Input→Output→Result file chain tracking
|
||||
|
||||
All functions are synchronous; the async db_pool.DBPool wraps them via run_in_executor.
|
||||
"""
|
||||
@@ -69,6 +70,19 @@ def init_db():
|
||||
task_id TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_file_metadata_timestamp ON file_metadata(timestamp);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS file_relations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
input_image TEXT,
|
||||
output_excel TEXT,
|
||||
result_purchase TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_file_relations_input ON file_relations(input_image);
|
||||
CREATE INDEX IF NOT EXISTS idx_file_relations_output ON file_relations(output_excel);
|
||||
CREATE INDEX IF NOT EXISTS idx_file_relations_result ON file_relations(result_purchase);
|
||||
""")
|
||||
conn.commit()
|
||||
finally:
|
||||
@@ -83,6 +97,7 @@ def cleanup_old_records():
|
||||
conn.execute("DELETE FROM http_logs WHERE timestamp < ?", (cutoff,))
|
||||
conn.execute("DELETE FROM task_history WHERE created_at < ?", (cutoff,))
|
||||
conn.execute("DELETE FROM file_metadata WHERE timestamp < ?", (cutoff,))
|
||||
conn.execute("DELETE FROM file_relations WHERE updated_at < ?", (cutoff,))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
@@ -395,6 +410,298 @@ def query_file_stats() -> list[dict]:
|
||||
return stats
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File relations — CRUD
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def upsert_file_relation(input_image: str = None, output_excel: str = None,
|
||||
result_purchase: str = None, status: str = 'pending'):
|
||||
"""Insert or update a file relation.
|
||||
|
||||
Match strategy:
|
||||
- If input_image provided, try to find existing row by input_image
|
||||
- Else if output_excel provided, try to find by output_excel
|
||||
- Otherwise insert new row.
|
||||
"""
|
||||
now = datetime.now().isoformat()
|
||||
conn = sqlite3.connect(_db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
existing = None
|
||||
if input_image:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM file_relations WHERE input_image = ?", (input_image,)
|
||||
).fetchone()
|
||||
if not existing and output_excel:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM file_relations WHERE output_excel = ?", (output_excel,)
|
||||
).fetchone()
|
||||
if not existing and result_purchase:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM file_relations WHERE result_purchase = ?", (result_purchase,)
|
||||
).fetchone()
|
||||
|
||||
if existing:
|
||||
updates = []
|
||||
params = []
|
||||
if input_image and not existing['input_image']:
|
||||
updates.append("input_image = ?")
|
||||
params.append(input_image)
|
||||
if output_excel and not existing['output_excel']:
|
||||
updates.append("output_excel = ?")
|
||||
params.append(output_excel)
|
||||
if result_purchase and not existing['result_purchase']:
|
||||
updates.append("result_purchase = ?")
|
||||
params.append(result_purchase)
|
||||
if status:
|
||||
updates.append("status = ?")
|
||||
params.append(status)
|
||||
updates.append("updated_at = ?")
|
||||
params.append(now)
|
||||
params.append(existing['id'])
|
||||
conn.execute(
|
||||
f"UPDATE file_relations SET {', '.join(updates)} WHERE id = ?",
|
||||
params,
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
"INSERT INTO file_relations (input_image, output_excel, result_purchase, status, created_at, updated_at) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(input_image, output_excel, result_purchase, status, now, now),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_file_relations(view: str = None, status: str = None,
|
||||
page: int = 1, page_size: int = 50) -> tuple[list[dict], int]:
|
||||
"""Query file relations with optional view filter and pagination.
|
||||
|
||||
view='orders': only rows with result_purchase, sorted by result_purchase
|
||||
view='tables': only rows with output_excel, sorted by output_excel
|
||||
view='images': only rows with input_image, sorted by input_image
|
||||
view=None: all rows
|
||||
|
||||
Returns (items, total).
|
||||
"""
|
||||
conn = sqlite3.connect(_db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
clauses = []
|
||||
params = []
|
||||
if view == 'orders':
|
||||
clauses.append("result_purchase IS NOT NULL")
|
||||
order_by = "result_purchase DESC"
|
||||
elif view == 'tables':
|
||||
clauses.append("output_excel IS NOT NULL")
|
||||
order_by = "output_excel DESC"
|
||||
elif view == 'images':
|
||||
clauses.append("input_image IS NOT NULL")
|
||||
order_by = "input_image DESC"
|
||||
else:
|
||||
order_by = "id DESC"
|
||||
|
||||
if status:
|
||||
clauses.append("status = ?")
|
||||
params.append(status)
|
||||
|
||||
where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
|
||||
|
||||
# Count
|
||||
row = conn.execute(
|
||||
f"SELECT COUNT(*) as cnt FROM file_relations{where}", params
|
||||
).fetchone()
|
||||
total = row[0] if row else 0
|
||||
|
||||
# Page
|
||||
offset = (page - 1) * page_size
|
||||
params.extend([page_size, offset])
|
||||
rows = conn.execute(
|
||||
f"SELECT * FROM file_relations{where} ORDER BY {order_by} LIMIT ? OFFSET ?",
|
||||
params,
|
||||
).fetchall()
|
||||
|
||||
items = []
|
||||
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
# Check file existence
|
||||
if d.get('input_image'):
|
||||
d['input_exists'] = (project_root / 'data' / 'input' / d['input_image']).exists()
|
||||
else:
|
||||
d['input_exists'] = False
|
||||
if d.get('output_excel'):
|
||||
d['output_exists'] = (project_root / 'data' / 'output' / d['output_excel']).exists()
|
||||
else:
|
||||
d['output_exists'] = False
|
||||
if d.get('result_purchase'):
|
||||
d['result_exists'] = (project_root / 'data' / 'result' / d['result_purchase']).exists()
|
||||
else:
|
||||
d['result_exists'] = False
|
||||
items.append(d)
|
||||
|
||||
return items, total
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def delete_file_relations(ids: list[int]):
|
||||
"""Delete file relation records by IDs."""
|
||||
if not ids:
|
||||
return
|
||||
conn = sqlite3.connect(_db_path)
|
||||
try:
|
||||
placeholders = ','.join('?' * len(ids))
|
||||
conn.execute(f"DELETE FROM file_relations WHERE id IN ({placeholders})", ids)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def sync_file_relations():
|
||||
"""Scan input/output/result directories and rebuild file_relations table.
|
||||
|
||||
Matches files by stem:
|
||||
- input: {stem}.jpg/.png/.bmp
|
||||
- output: {stem}.xlsx or {stem}.xls
|
||||
- result: 采购单_{stem}.xls
|
||||
"""
|
||||
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
input_dir = project_root / 'data' / 'input'
|
||||
output_dir = project_root / 'data' / 'output'
|
||||
result_dir = project_root / 'data' / 'result'
|
||||
|
||||
image_exts = {'.jpg', '.jpeg', '.png', '.bmp'}
|
||||
excel_exts = {'.xls', '.xlsx'}
|
||||
|
||||
# Collect files by stem
|
||||
input_files = {} # stem -> filename
|
||||
if input_dir.exists():
|
||||
for f in input_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in image_exts:
|
||||
input_files[f.stem] = f.name
|
||||
|
||||
output_files = {}
|
||||
if output_dir.exists():
|
||||
for f in output_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in excel_exts:
|
||||
output_files[f.stem] = f.name
|
||||
|
||||
result_files = {}
|
||||
if result_dir.exists():
|
||||
for f in result_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in excel_exts:
|
||||
name = f.name
|
||||
# Strip 采购单_ prefix for matching
|
||||
if name.startswith('采购单_'):
|
||||
stem = name[len('采购单_'):-len(f.suffix)]
|
||||
elif name.startswith('合并采购单_'):
|
||||
continue # Skip merged files
|
||||
else:
|
||||
stem = f.stem
|
||||
result_files[stem] = name
|
||||
|
||||
# Build relations
|
||||
all_stems = set(input_files.keys()) | set(output_files.keys()) | set(result_files.keys())
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
conn = sqlite3.connect(_db_path)
|
||||
try:
|
||||
# Clear existing and rebuild
|
||||
conn.execute("DELETE FROM file_relations")
|
||||
|
||||
for stem in sorted(all_stems):
|
||||
inp = input_files.get(stem)
|
||||
out = output_files.get(stem)
|
||||
res = result_files.get(stem)
|
||||
|
||||
if res:
|
||||
status = 'done'
|
||||
elif out:
|
||||
status = 'ocr_done'
|
||||
else:
|
||||
status = 'pending'
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO file_relations (input_image, output_excel, result_purchase, status, created_at, updated_at) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(inp, out, res, status, now, now),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_file_relations_stats() -> dict:
|
||||
"""Get detailed file statistics for Dashboard.
|
||||
|
||||
Returns dict with:
|
||||
- input_images: count of image files in input/
|
||||
- output_excel: count of excel files in output/
|
||||
- unprocessed_images: images without corresponding output
|
||||
- unprocessed_excel: excel without corresponding result
|
||||
- completed_results: purchase order files in result/
|
||||
- total_processed: relations with status done/merged
|
||||
"""
|
||||
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
input_dir = project_root / 'data' / 'input'
|
||||
output_dir = project_root / 'data' / 'output'
|
||||
result_dir = project_root / 'data' / 'result'
|
||||
|
||||
image_exts = {'.jpg', '.jpeg', '.png', '.bmp'}
|
||||
excel_exts = {'.xls', '.xlsx'}
|
||||
|
||||
# Count files
|
||||
input_images = 0
|
||||
input_stems = set()
|
||||
if input_dir.exists():
|
||||
for f in input_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in image_exts:
|
||||
input_images += 1
|
||||
input_stems.add(f.stem)
|
||||
|
||||
output_excel = 0
|
||||
output_stems = set()
|
||||
if output_dir.exists():
|
||||
for f in output_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in excel_exts:
|
||||
output_excel += 1
|
||||
output_stems.add(f.stem)
|
||||
|
||||
completed_results = 0
|
||||
result_stems = set()
|
||||
if result_dir.exists():
|
||||
for f in result_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in excel_exts:
|
||||
if f.name.startswith('采购单_'):
|
||||
completed_results += 1
|
||||
stem = f.name[len('采购单_'):-len(f.suffix)]
|
||||
result_stems.add(stem)
|
||||
|
||||
unprocessed_images = len(input_stems - output_stems)
|
||||
unprocessed_excel = len(output_stems - result_stems)
|
||||
|
||||
# Count from relations table
|
||||
conn = sqlite3.connect(_db_path)
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) FROM file_relations WHERE status IN ('done', 'merged')"
|
||||
).fetchone()
|
||||
total_processed = row[0] if row else 0
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'input_images': input_images,
|
||||
'output_excel': output_excel,
|
||||
'unprocessed_images': unprocessed_images,
|
||||
'unprocessed_excel': unprocessed_excel,
|
||||
'completed_results': completed_results,
|
||||
'total_processed': total_processed,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -47,10 +47,25 @@ class TaskManager:
|
||||
self._tasks: Dict[str, Task] = {}
|
||||
self._connections: Dict[str, Set] = {}
|
||||
self._db = None # type: ignore
|
||||
self._loop = None # captured event loop
|
||||
|
||||
def set_db_pool(self, db_pool):
|
||||
"""Set the DBPool reference for database persistence."""
|
||||
self._db = db_pool
|
||||
try:
|
||||
self._loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
def _schedule(self, coro):
|
||||
"""Schedule a coroutine from either async or thread context."""
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
asyncio.ensure_future(coro, loop=loop)
|
||||
except RuntimeError:
|
||||
# No running loop — we're in a thread; schedule onto the main loop
|
||||
if self._loop and self._loop.is_running():
|
||||
asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
|
||||
def create_task(self, name: str) -> Task:
|
||||
task_id = str(uuid.uuid4())[:8]
|
||||
@@ -58,7 +73,7 @@ class TaskManager:
|
||||
self._tasks[task_id] = task
|
||||
self._connections[task_id] = set()
|
||||
if self._db:
|
||||
asyncio.create_task(
|
||||
self._schedule(
|
||||
self._db.execute_write(insert_task, task_id, name, TaskStatus.PENDING.value)
|
||||
)
|
||||
return task
|
||||
@@ -76,13 +91,13 @@ class TaskManager:
|
||||
task.progress = progress
|
||||
task.message = message
|
||||
if self._db:
|
||||
asyncio.create_task(
|
||||
self._schedule(
|
||||
self._db.execute_write(
|
||||
update_task, task_id,
|
||||
status=task.status.value, progress=progress, message=message,
|
||||
)
|
||||
)
|
||||
asyncio.create_task(self._broadcast(task_id))
|
||||
self._schedule(self._broadcast(task_id))
|
||||
|
||||
def add_log(self, task_id: str, line: str):
|
||||
task = self._tasks.get(task_id)
|
||||
@@ -90,13 +105,13 @@ class TaskManager:
|
||||
return
|
||||
task.log_lines.append(line)
|
||||
if self._db:
|
||||
asyncio.create_task(
|
||||
self._schedule(
|
||||
self._db.execute_write(
|
||||
update_task, task_id,
|
||||
log_lines=json.dumps(task.log_lines[-200:]),
|
||||
)
|
||||
)
|
||||
asyncio.create_task(self._broadcast(task_id))
|
||||
self._schedule(self._broadcast(task_id))
|
||||
|
||||
def set_completed(self, task_id: str, result_files: List[str] = None, message: str = ""):
|
||||
task = self._tasks.get(task_id)
|
||||
@@ -109,7 +124,7 @@ class TaskManager:
|
||||
task.result_files = result_files
|
||||
now = datetime.now().isoformat()
|
||||
if self._db:
|
||||
asyncio.create_task(
|
||||
self._schedule(
|
||||
self._db.execute_write(
|
||||
update_task, task_id,
|
||||
status=TaskStatus.COMPLETED.value, progress=100,
|
||||
@@ -118,7 +133,7 @@ class TaskManager:
|
||||
completed_at=now,
|
||||
)
|
||||
)
|
||||
asyncio.create_task(self._broadcast(task_id))
|
||||
self._schedule(self._broadcast(task_id))
|
||||
|
||||
def set_failed(self, task_id: str, error: str):
|
||||
task = self._tasks.get(task_id)
|
||||
@@ -129,14 +144,14 @@ class TaskManager:
|
||||
task.message = f"处理失败: {error}"
|
||||
now = datetime.now().isoformat()
|
||||
if self._db:
|
||||
asyncio.create_task(
|
||||
self._schedule(
|
||||
self._db.execute_write(
|
||||
update_task, task_id,
|
||||
status=TaskStatus.FAILED.value, error=error,
|
||||
message=task.message, completed_at=now,
|
||||
)
|
||||
)
|
||||
asyncio.create_task(self._broadcast(task_id))
|
||||
self._schedule(self._broadcast(task_id))
|
||||
|
||||
def subscribe(self, task_id: str, websocket):
|
||||
if task_id in self._connections:
|
||||
|
||||
Reference in New Issue
Block a user