dedc3b4183
- Full FastAPI backend with JWT auth, file management, processing pipeline, memory CRUD, barcode mappings, config management, cloud sync - Vue 3 + Element Plus frontend with dashboard, task history, HTTP logs, memory editor, barcode editor, config editor, sync page - HTTP request logging middleware with SQLite persistence - Task history tracking with progress and retry support - File metadata recording for upload/download operations - WebAuth section in config.ini for bcrypt password storage - Bug fix: logs.py count query returns tuple not dict Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
251 lines
9.1 KiB
Python
251 lines
9.1 KiB
Python
"""Processing endpoints: OCR, Excel conversion, merge, and full pipeline."""
|
|
|
|
import os
|
|
import sys
|
|
import traceback
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends, Request
|
|
from pydantic import BaseModel
|
|
|
|
from ..auth.dependencies import get_current_user
|
|
from ..services.service_wrapper import ServiceWrapper
|
|
|
|
router = APIRouter(prefix="/api/processing", tags=["processing"])
|
|
|
|
_wrapper = ServiceWrapper(max_workers=3)
|
|
|
|
_project_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
_input_dir = _project_root / "data" / "input"
|
|
_output_dir = _project_root / "data" / "output"
|
|
_result_dir = _project_root / "data" / "result"
|
|
|
|
|
|
class PipelineRequest(BaseModel):
|
|
files: Optional[List[str]] = None # specific files, or None = all in input/
|
|
supplier: Optional[str] = None # force supplier type
|
|
|
|
|
|
class TaskResponse(BaseModel):
|
|
task_id: str
|
|
status: str
|
|
message: str
|
|
|
|
|
|
def _get_task_manager(request: Request):
|
|
return request.state.task_manager
|
|
|
|
|
|
def _list_input_files(filter_ext: Optional[List[str]] = None) -> List[Path]:
|
|
if not _input_dir.is_dir():
|
|
return []
|
|
files = []
|
|
for f in sorted(_input_dir.iterdir()):
|
|
if f.is_file():
|
|
if filter_ext is None or f.suffix.lower() in filter_ext:
|
|
files.append(f)
|
|
return files
|
|
|
|
|
|
@router.post("/ocr-batch", response_model=TaskResponse)
|
|
async def ocr_batch(
|
|
request: Request,
|
|
current_user: dict = Depends(get_current_user),
|
|
):
|
|
"""Run OCR on all images in input/."""
|
|
tm = _get_task_manager(request)
|
|
task = tm.create_task("批量OCR识别")
|
|
|
|
image_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
|
|
files = _list_input_files(filter_ext=list(image_exts))
|
|
if not files:
|
|
raise HTTPException(400, "input/ 目录中没有图片文件")
|
|
|
|
async def _run():
|
|
try:
|
|
from app.services.ocr_service import OCRService
|
|
svc = OCRService()
|
|
total = len(files)
|
|
for i, f in enumerate(files):
|
|
tm.update_progress(task.id, int((i / total) * 100), f"正在识别: {f.name}")
|
|
tm.add_log(task.id, f"[OCR] 处理 {f.name}")
|
|
try:
|
|
svc.process_single(str(f), str(_output_dir))
|
|
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
|
|
except Exception as e:
|
|
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
|
|
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
|
|
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成,共处理 {total} 个文件")
|
|
except Exception as e:
|
|
tm.set_failed(task.id, str(e))
|
|
|
|
import asyncio
|
|
asyncio.create_task(_run())
|
|
|
|
return TaskResponse(task_id=task.id, status="accepted", message="OCR任务已创建")
|
|
|
|
|
|
@router.post("/excel", response_model=TaskResponse)
|
|
async def process_excel(
|
|
request: Request,
|
|
body: PipelineRequest = PipelineRequest(),
|
|
current_user: dict = Depends(get_current_user),
|
|
):
|
|
"""Convert OCR output Excel files to standardized format."""
|
|
tm = _get_task_manager(request)
|
|
task = tm.create_task("Excel标准化处理")
|
|
|
|
excel_exts = {'.xls', '.xlsx'}
|
|
if body.files:
|
|
files = [_output_dir / f for f in body.files if (_output_dir / f).is_file()]
|
|
else:
|
|
files = _list_input_files(filter_ext=list(excel_exts))
|
|
if not files:
|
|
files = _list_input_files_from(_output_dir, filter_ext=list(excel_exts))
|
|
|
|
if not files:
|
|
raise HTTPException(400, "没有找到Excel文件")
|
|
|
|
async def _run():
|
|
try:
|
|
from app.services.order_service import OrderService
|
|
svc = OrderService()
|
|
total = len(files)
|
|
for i, f in enumerate(files):
|
|
tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
|
|
tm.add_log(task.id, f"[Excel] 处理 {f.name}")
|
|
try:
|
|
svc.process_excel(str(f), str(_result_dir))
|
|
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
|
|
except Exception as e:
|
|
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
|
|
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
|
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成,共 {total} 个文件")
|
|
except Exception as e:
|
|
tm.set_failed(task.id, str(e))
|
|
|
|
import asyncio
|
|
asyncio.create_task(_run())
|
|
|
|
return TaskResponse(task_id=task.id, status="accepted", message="Excel处理任务已创建")
|
|
|
|
|
|
@router.post("/merge", response_model=TaskResponse)
|
|
async def merge_orders(
|
|
request: Request,
|
|
current_user: dict = Depends(get_current_user),
|
|
):
|
|
"""Merge all processed Excel files into a single purchase order."""
|
|
tm = _get_task_manager(request)
|
|
task = tm.create_task("合并采购单")
|
|
|
|
async def _run():
|
|
try:
|
|
from app.services.order_service import OrderService
|
|
svc = OrderService()
|
|
tm.update_progress(task.id, 20, "正在合并采购单...")
|
|
tm.add_log(task.id, "[合并] 开始合并")
|
|
result = svc.merge_orders(str(_result_dir))
|
|
tm.add_log(task.id, f"[合并] 完成: {result}")
|
|
tm.set_completed(task.id, result_files=[result] if result else [], message="合并完成")
|
|
except Exception as e:
|
|
tm.set_failed(task.id, str(e))
|
|
|
|
import asyncio
|
|
asyncio.create_task(_run())
|
|
|
|
return TaskResponse(task_id=task.id, status="accepted", message="合并任务已创建")
|
|
|
|
|
|
@router.post("/pipeline", response_model=TaskResponse)
|
|
async def full_pipeline(
|
|
request: Request,
|
|
body: PipelineRequest = PipelineRequest(),
|
|
current_user: dict = Depends(get_current_user),
|
|
):
|
|
"""Run the full pipeline: OCR → Excel → Merge."""
|
|
tm = _get_task_manager(request)
|
|
task = tm.create_task("一键全流程处理")
|
|
|
|
async def _run():
|
|
try:
|
|
# Step 1: OCR
|
|
tm.update_progress(task.id, 0, "步骤 1/3: OCR识别")
|
|
tm.add_log(task.id, "[Pipeline] 开始OCR识别")
|
|
from app.services.ocr_service import OCRService
|
|
ocr_svc = OCRService()
|
|
|
|
image_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
|
|
images = _list_input_files(filter_ext=list(image_exts))
|
|
for i, f in enumerate(images):
|
|
pct = int((i / max(len(images), 1)) * 30)
|
|
tm.update_progress(task.id, pct, f"OCR: {f.name}")
|
|
try:
|
|
ocr_svc.process_single(str(f), str(_output_dir))
|
|
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
|
|
except Exception as e:
|
|
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
|
|
|
|
# Step 2: Excel conversion
|
|
tm.update_progress(task.id, 35, "步骤 2/3: Excel标准化")
|
|
tm.add_log(task.id, "[Pipeline] 开始Excel处理")
|
|
from app.services.order_service import OrderService
|
|
order_svc = OrderService()
|
|
|
|
excel_files = list(_output_dir.glob("*.xls")) + list(_output_dir.glob("*.xlsx"))
|
|
for i, f in enumerate(excel_files):
|
|
pct = 35 + int((i / max(len(excel_files), 1)) * 35)
|
|
tm.update_progress(task.id, pct, f"Excel: {f.name}")
|
|
try:
|
|
order_svc.process_excel(str(f), str(_result_dir))
|
|
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
|
|
except Exception as e:
|
|
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
|
|
|
|
# Step 3: Merge
|
|
tm.update_progress(task.id, 75, "步骤 3/3: 合并采购单")
|
|
tm.add_log(task.id, "[Pipeline] 开始合并")
|
|
try:
|
|
result = order_svc.merge_orders(str(_result_dir))
|
|
tm.add_log(task.id, f"[合并] 完成: {result}")
|
|
except Exception as e:
|
|
tm.add_log(task.id, f"[合并] 失败: {e}")
|
|
result = None
|
|
|
|
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
|
|
tm.set_completed(task.id, result_files=result_files, message="全流程处理完成")
|
|
except Exception as e:
|
|
tb = traceback.format_exc()
|
|
tm.add_log(task.id, f"[错误] {tb}")
|
|
tm.set_failed(task.id, str(e))
|
|
|
|
import asyncio
|
|
asyncio.create_task(_run())
|
|
|
|
return TaskResponse(task_id=task.id, status="accepted", message="全流程任务已创建")
|
|
|
|
|
|
@router.get("/status/{task_id}")
|
|
async def get_task_status(
|
|
task_id: str,
|
|
request: Request,
|
|
current_user: dict = Depends(get_current_user),
|
|
):
|
|
tm = _get_task_manager(request)
|
|
task = tm.get_task(task_id)
|
|
if not task:
|
|
raise HTTPException(404, "任务不存在")
|
|
return task.to_dict()
|
|
|
|
|
|
def _list_input_files_from(directory: Path, filter_ext: List[str] = None) -> List[Path]:
|
|
if not directory.is_dir():
|
|
return []
|
|
files = []
|
|
for f in sorted(directory.iterdir()):
|
|
if f.is_file():
|
|
if filter_ext is None or f.suffix.lower() in filter_ext:
|
|
files.append(f)
|
|
return files
|