Files
orc-order-v2/web/backend/routers/processing.py
T
houhuan dedc3b4183 feat: complete web application — FastAPI backend + Vue 3 SPA frontend
- Full FastAPI backend with JWT auth, file management, processing pipeline,
  memory CRUD, barcode mappings, config management, cloud sync
- Vue 3 + Element Plus frontend with dashboard, task history, HTTP logs,
  memory editor, barcode editor, config editor, sync page
- HTTP request logging middleware with SQLite persistence
- Task history tracking with progress and retry support
- File metadata recording for upload/download operations
- WebAuth section in config.ini for bcrypt password storage
- Bug fix: logs.py count query returns tuple not dict

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-05 11:59:07 +08:00

251 lines
9.1 KiB
Python

"""Processing endpoints: OCR, Excel conversion, merge, and full pipeline."""
import os
import sys
import traceback
from pathlib import Path
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Request
from pydantic import BaseModel
from ..auth.dependencies import get_current_user
from ..services.service_wrapper import ServiceWrapper
router = APIRouter(prefix="/api/processing", tags=["processing"])
_wrapper = ServiceWrapper(max_workers=3)
_project_root = Path(__file__).resolve().parent.parent.parent.parent
_input_dir = _project_root / "data" / "input"
_output_dir = _project_root / "data" / "output"
_result_dir = _project_root / "data" / "result"
class PipelineRequest(BaseModel):
files: Optional[List[str]] = None # specific files, or None = all in input/
supplier: Optional[str] = None # force supplier type
class TaskResponse(BaseModel):
task_id: str
status: str
message: str
def _get_task_manager(request: Request):
return request.state.task_manager
def _list_input_files(filter_ext: Optional[List[str]] = None) -> List[Path]:
if not _input_dir.is_dir():
return []
files = []
for f in sorted(_input_dir.iterdir()):
if f.is_file():
if filter_ext is None or f.suffix.lower() in filter_ext:
files.append(f)
return files
@router.post("/ocr-batch", response_model=TaskResponse)
async def ocr_batch(
request: Request,
current_user: dict = Depends(get_current_user),
):
"""Run OCR on all images in input/."""
tm = _get_task_manager(request)
task = tm.create_task("批量OCR识别")
image_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
files = _list_input_files(filter_ext=list(image_exts))
if not files:
raise HTTPException(400, "input/ 目录中没有图片文件")
async def _run():
try:
from app.services.ocr_service import OCRService
svc = OCRService()
total = len(files)
for i, f in enumerate(files):
tm.update_progress(task.id, int((i / total) * 100), f"正在识别: {f.name}")
tm.add_log(task.id, f"[OCR] 处理 {f.name}")
try:
svc.process_single(str(f), str(_output_dir))
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
except Exception as e:
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
result_files = [f.name for f in _output_dir.iterdir() if f.is_file()]
tm.set_completed(task.id, result_files=result_files, message=f"OCR完成,共处理 {total} 个文件")
except Exception as e:
tm.set_failed(task.id, str(e))
import asyncio
asyncio.create_task(_run())
return TaskResponse(task_id=task.id, status="accepted", message="OCR任务已创建")
@router.post("/excel", response_model=TaskResponse)
async def process_excel(
request: Request,
body: PipelineRequest = PipelineRequest(),
current_user: dict = Depends(get_current_user),
):
"""Convert OCR output Excel files to standardized format."""
tm = _get_task_manager(request)
task = tm.create_task("Excel标准化处理")
excel_exts = {'.xls', '.xlsx'}
if body.files:
files = [_output_dir / f for f in body.files if (_output_dir / f).is_file()]
else:
files = _list_input_files(filter_ext=list(excel_exts))
if not files:
files = _list_input_files_from(_output_dir, filter_ext=list(excel_exts))
if not files:
raise HTTPException(400, "没有找到Excel文件")
async def _run():
try:
from app.services.order_service import OrderService
svc = OrderService()
total = len(files)
for i, f in enumerate(files):
tm.update_progress(task.id, int((i / total) * 100), f"正在处理: {f.name}")
tm.add_log(task.id, f"[Excel] 处理 {f.name}")
try:
svc.process_excel(str(f), str(_result_dir))
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
except Exception as e:
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
tm.set_completed(task.id, result_files=result_files, message=f"Excel处理完成,共 {total} 个文件")
except Exception as e:
tm.set_failed(task.id, str(e))
import asyncio
asyncio.create_task(_run())
return TaskResponse(task_id=task.id, status="accepted", message="Excel处理任务已创建")
@router.post("/merge", response_model=TaskResponse)
async def merge_orders(
request: Request,
current_user: dict = Depends(get_current_user),
):
"""Merge all processed Excel files into a single purchase order."""
tm = _get_task_manager(request)
task = tm.create_task("合并采购单")
async def _run():
try:
from app.services.order_service import OrderService
svc = OrderService()
tm.update_progress(task.id, 20, "正在合并采购单...")
tm.add_log(task.id, "[合并] 开始合并")
result = svc.merge_orders(str(_result_dir))
tm.add_log(task.id, f"[合并] 完成: {result}")
tm.set_completed(task.id, result_files=[result] if result else [], message="合并完成")
except Exception as e:
tm.set_failed(task.id, str(e))
import asyncio
asyncio.create_task(_run())
return TaskResponse(task_id=task.id, status="accepted", message="合并任务已创建")
@router.post("/pipeline", response_model=TaskResponse)
async def full_pipeline(
request: Request,
body: PipelineRequest = PipelineRequest(),
current_user: dict = Depends(get_current_user),
):
"""Run the full pipeline: OCR → Excel → Merge."""
tm = _get_task_manager(request)
task = tm.create_task("一键全流程处理")
async def _run():
try:
# Step 1: OCR
tm.update_progress(task.id, 0, "步骤 1/3: OCR识别")
tm.add_log(task.id, "[Pipeline] 开始OCR识别")
from app.services.ocr_service import OCRService
ocr_svc = OCRService()
image_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
images = _list_input_files(filter_ext=list(image_exts))
for i, f in enumerate(images):
pct = int((i / max(len(images), 1)) * 30)
tm.update_progress(task.id, pct, f"OCR: {f.name}")
try:
ocr_svc.process_single(str(f), str(_output_dir))
tm.add_log(task.id, f"[OCR] 完成: {f.name}")
except Exception as e:
tm.add_log(task.id, f"[OCR] 失败: {f.name} - {e}")
# Step 2: Excel conversion
tm.update_progress(task.id, 35, "步骤 2/3: Excel标准化")
tm.add_log(task.id, "[Pipeline] 开始Excel处理")
from app.services.order_service import OrderService
order_svc = OrderService()
excel_files = list(_output_dir.glob("*.xls")) + list(_output_dir.glob("*.xlsx"))
for i, f in enumerate(excel_files):
pct = 35 + int((i / max(len(excel_files), 1)) * 35)
tm.update_progress(task.id, pct, f"Excel: {f.name}")
try:
order_svc.process_excel(str(f), str(_result_dir))
tm.add_log(task.id, f"[Excel] 完成: {f.name}")
except Exception as e:
tm.add_log(task.id, f"[Excel] 失败: {f.name} - {e}")
# Step 3: Merge
tm.update_progress(task.id, 75, "步骤 3/3: 合并采购单")
tm.add_log(task.id, "[Pipeline] 开始合并")
try:
result = order_svc.merge_orders(str(_result_dir))
tm.add_log(task.id, f"[合并] 完成: {result}")
except Exception as e:
tm.add_log(task.id, f"[合并] 失败: {e}")
result = None
result_files = [f.name for f in _result_dir.iterdir() if f.is_file()]
tm.set_completed(task.id, result_files=result_files, message="全流程处理完成")
except Exception as e:
tb = traceback.format_exc()
tm.add_log(task.id, f"[错误] {tb}")
tm.set_failed(task.id, str(e))
import asyncio
asyncio.create_task(_run())
return TaskResponse(task_id=task.id, status="accepted", message="全流程任务已创建")
@router.get("/status/{task_id}")
async def get_task_status(
task_id: str,
request: Request,
current_user: dict = Depends(get_current_user),
):
tm = _get_task_manager(request)
task = tm.get_task(task_id)
if not task:
raise HTTPException(404, "任务不存在")
return task.to_dict()
def _list_input_files_from(directory: Path, filter_ext: List[str] = None) -> List[Path]:
if not directory.is_dir():
return []
files = []
for f in sorted(directory.iterdir()):
if f.is_file():
if filter_ext is None or f.suffix.lower() in filter_ext:
files.append(f)
return files