feat: 益选 OCR 订单处理系统初始提交

- 智能供应商识别(蓉城易购/烟草/杨碧月/通用)
- 百度 OCR 表格识别集成
- 规则引擎(列映射/数据清洗/单位转换/规格推断)
- 条码映射管理与云端同步(Gitea REST API)
- 云端同步支持:条码映射、供应商配置、商品资料、采购模板
- 拖拽一键处理(图片→OCR→Excel→合并)
- 191 个单元测试
- 移除无用的模板管理功能
- 清理 IDE 产物目录

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:51:13 +08:00
commit e4d62df7e3
78 changed files with 15257 additions and 0 deletions
View File
+214
View File
@@ -0,0 +1,214 @@
"""
商品资料 SQLite 数据库
将商品资料 (条码/名称/进货价/单位) 存储在 SQLite 中,
支持从 Excel 自动导入和按条码快速查询。
"""
import os
import sqlite3
from datetime import datetime
from typing import Dict, List, Optional
import pandas as pd
from ..utils.log_utils import get_logger
from ..utils.file_utils import smart_read_excel
from ...core.handlers.column_mapper import ColumnMapper
logger = get_logger(__name__)
class ProductDatabase:
"""商品资料 SQLite 数据库"""
SCHEMA = """
CREATE TABLE IF NOT EXISTS products (
barcode TEXT PRIMARY KEY,
name TEXT DEFAULT '',
price REAL DEFAULT 0.0,
unit TEXT DEFAULT '',
updated_at TEXT
);
"""
def __init__(self, db_path: str, excel_source: str):
"""初始化数据库,如果 SQLite 不存在则自动从 Excel 导入
Args:
db_path: SQLite 数据库文件路径
excel_source: 商品资料 Excel 文件路径
"""
self.db_path = db_path
self.excel_source = excel_source
self._ensure_db()
def _connect(self) -> sqlite3.Connection:
return sqlite3.connect(self.db_path)
def _ensure_db(self):
"""确保数据库存在,不存在则从 Excel 导入"""
if os.path.exists(self.db_path):
return
if not os.path.exists(self.excel_source):
logger.warning(f"商品资料 Excel 不存在,跳过导入: {self.excel_source}")
self._create_empty_db()
return
logger.info(f"首次运行,从 Excel 导入商品资料: {self.excel_source}")
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
self._create_empty_db()
count = self.import_from_excel(self.excel_source)
logger.info(f"商品资料导入完成: {count} 条记录")
def _create_empty_db(self):
"""创建空数据库"""
conn = self._connect()
try:
conn.executescript(self.SCHEMA)
conn.commit()
finally:
conn.close()
def import_from_excel(self, excel_path: str) -> int:
"""从 Excel 导入商品资料
Args:
excel_path: Excel 文件路径
Returns:
导入的记录数
"""
df = smart_read_excel(excel_path)
if df is None or df.empty:
logger.warning(f"Excel 文件为空或读取失败: {excel_path}")
return 0
# 查找条码列
barcode_col = ColumnMapper.find_column(list(df.columns), 'barcode')
if not barcode_col:
logger.error(f"Excel 中未找到条码列: {list(df.columns)}")
return 0
# 查找进货价列
price_col = ColumnMapper.find_column(list(df.columns), 'unit_price')
# 进货价可能没有标准别名,补充查找
if not price_col:
for col in df.columns:
col_str = str(col).strip()
if '进货价' in col_str:
price_col = col
break
# 查找名称列和单位列 (可选)
name_col = ColumnMapper.find_column(list(df.columns), 'name')
unit_col = ColumnMapper.find_column(list(df.columns), 'unit')
now = datetime.now().isoformat()
rows = []
for _, row in df.iterrows():
barcode = str(row.get(barcode_col, '')).strip()
if not barcode or barcode == 'nan':
continue
price = 0.0
if price_col:
try:
p = row.get(price_col)
if p is not None and str(p).strip() not in ('', 'nan', 'None'):
price = float(p)
except (ValueError, TypeError):
pass
name = str(row.get(name_col, '')).strip() if name_col else ''
if name == 'nan':
name = ''
unit = str(row.get(unit_col, '')).strip() if unit_col else ''
if unit == 'nan':
unit = ''
rows.append((barcode, name, price, unit, now))
if not rows:
logger.warning(f"Excel 中未解析出有效记录: {excel_path}")
return 0
conn = self._connect()
try:
conn.executemany(
"INSERT OR REPLACE INTO products (barcode, name, price, unit, updated_at) "
"VALUES (?, ?, ?, ?, ?)",
rows
)
conn.commit()
finally:
conn.close()
return len(rows)
def reimport(self) -> int:
"""重新从 Excel 导入(清空现有数据后重新导入)
Returns:
导入的记录数
"""
conn = self._connect()
try:
conn.execute("DELETE FROM products")
conn.commit()
finally:
conn.close()
return self.import_from_excel(self.excel_source)
def get_price(self, barcode: str) -> Optional[float]:
"""按条码查询进货价
Args:
barcode: 商品条码
Returns:
进货价,未找到返回 None
"""
conn = self._connect()
try:
cursor = conn.execute(
"SELECT price FROM products WHERE barcode = ?",
(str(barcode).strip(),)
)
row = cursor.fetchone()
return row[0] if row else None
finally:
conn.close()
def get_prices(self, barcodes: List[str]) -> Dict[str, float]:
"""批量查询进货价
Args:
barcodes: 条码列表
Returns:
{条码: 进货价} 字典,未找到的不包含
"""
if not barcodes:
return {}
conn = self._connect()
try:
placeholders = ','.join('?' * len(barcodes))
cursor = conn.execute(
f"SELECT barcode, price FROM products WHERE barcode IN ({placeholders})",
[str(b).strip() for b in barcodes]
)
return {row[0]: row[1] for row in cursor.fetchall()}
finally:
conn.close()
def count(self) -> int:
"""返回商品总数"""
conn = self._connect()
try:
cursor = conn.execute("SELECT COUNT(*) FROM products")
return cursor.fetchone()[0]
finally:
conn.close()