@
feat: 商品记忆库 — 从OCR结果学习,逐步替代OCR识别 - 扩展 product_db.py: schema迁移(specification/source/confidence/usage_count/last_seen) + 学习逻辑(learn_from_product)、置信度系统、批量查询、导入导出、云端同步 - 注入处理管线: processor.py 在提取产品后调用 _apply_memory() 用记忆补全OCR + _is_spec_suspicious() 检测OCR规格质量,处理完后自动学习 - order_service.py 创建共享 ProductDatabase 实例 - dialog_utils.py 新增商品记忆库云端同步条目 - 新建 memory_editor.py: Treeview查看/编辑/搜索/删除/重新导入 - main_window.py 系统设置区新增"商品记忆库"按钮 - build_exe.py 添加 memory_editor 到 hidden_imports @
This commit is contained in:
@@ -40,12 +40,13 @@ class ExcelProcessor:
|
||||
提取条码、单价和数量,并按照采购单模板的格式填充
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
def __init__(self, config, product_db=None):
|
||||
"""
|
||||
初始化Excel处理器
|
||||
|
||||
|
||||
Args:
|
||||
config: 配置信息
|
||||
product_db: 商品数据库实例(可选,由外部传入以共享)
|
||||
"""
|
||||
self.config = config
|
||||
|
||||
@@ -74,6 +75,18 @@ class ExcelProcessor:
|
||||
|
||||
# 加载单位转换器和配置
|
||||
self.unit_converter = UnitConverter()
|
||||
|
||||
# 商品记忆库
|
||||
if product_db is not None:
|
||||
self.product_db = product_db
|
||||
else:
|
||||
from ..db.product_db import ProductDatabase
|
||||
db_path = config.get_path('Paths', 'product_db', fallback='data/product_cache.db') if hasattr(config, 'get_path') else 'data/product_cache.db'
|
||||
tpl_folder = config.get('Paths', 'template_folder', fallback='templates')
|
||||
item_data = config.get('Templates', 'item_data', fallback='商品资料.xlsx')
|
||||
tpl_path = os.path.join(tpl_folder, item_data)
|
||||
self.product_db = ProductDatabase(db_path, tpl_path)
|
||||
|
||||
logger.info(f"初始化ExcelProcessor完成,模板文件: {self.template_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"初始化ExcelProcessor失败: {e}")
|
||||
@@ -371,14 +384,70 @@ class ExcelProcessor:
|
||||
except Exception as e:
|
||||
logger.warning(f"通过金额和单价计算数量失败: {e}")
|
||||
|
||||
# 应用记忆库补全
|
||||
product = self._apply_memory(product)
|
||||
|
||||
products.append(product)
|
||||
except Exception as e:
|
||||
logger.error(f"提取第{idx+1}行商品信息时出错: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
|
||||
logger.info(f"提取到 {len(products)} 个商品信息")
|
||||
return products
|
||||
|
||||
|
||||
def _apply_memory(self, product: Dict) -> Dict:
|
||||
"""查记忆库,补全 OCR 缺失/错误的字段"""
|
||||
barcode = product.get('barcode', '')
|
||||
if not barcode:
|
||||
return product
|
||||
|
||||
try:
|
||||
memory = self.product_db.get_memory(barcode)
|
||||
except Exception:
|
||||
return product
|
||||
|
||||
if memory is None or memory.get('confidence', 0) < 80:
|
||||
return product
|
||||
|
||||
# 补全规格
|
||||
ocr_spec = product.get('specification', '')
|
||||
mem_spec = memory.get('specification', '') or ''
|
||||
if mem_spec and (not ocr_spec or self._is_spec_suspicious(ocr_spec)):
|
||||
product['specification'] = mem_spec
|
||||
logger.info(f"记忆修正规格: {barcode} '{ocr_spec}' -> '{mem_spec}'")
|
||||
|
||||
# 补全名称
|
||||
ocr_name = product.get('name', '')
|
||||
mem_name = memory.get('name', '') or ''
|
||||
if mem_name and not ocr_name:
|
||||
product['name'] = mem_name
|
||||
logger.info(f"记忆修正名称: {barcode} -> '{mem_name}'")
|
||||
|
||||
# 补全单位
|
||||
ocr_unit = product.get('unit', '')
|
||||
mem_unit = memory.get('unit', '') or ''
|
||||
if mem_unit and not ocr_unit:
|
||||
product['unit'] = mem_unit
|
||||
logger.info(f"记忆修正单位: {barcode} -> '{mem_unit}'")
|
||||
|
||||
# 不改数量和单价(每单不同)
|
||||
return product
|
||||
|
||||
def _is_spec_suspicious(self, spec: str) -> bool:
|
||||
"""检测规格是否像 OCR 垃圾"""
|
||||
if not spec:
|
||||
return True
|
||||
# IL*12(I 和 1 混淆)
|
||||
if re.search(r'^[Ii][Ll*]', spec):
|
||||
return True
|
||||
# 4.51*4(L 被识别为 1)
|
||||
if re.search(r'\d+\.\d+1\*\d+', spec):
|
||||
return True
|
||||
# 包含非常规字符(排除常见规格字符)
|
||||
if re.search(r'[^\d.*xX\-LlKkGgMm升毫瓶桶盒箱件提\s]', spec):
|
||||
return True
|
||||
return False
|
||||
|
||||
def fill_template(self, products: List[Dict], output_file_path: str) -> bool:
|
||||
"""
|
||||
填充采购单模板
|
||||
@@ -599,6 +668,14 @@ class ExcelProcessor:
|
||||
|
||||
# 填充模板并保存
|
||||
if self.fill_template(products, output_file):
|
||||
# 从处理结果中学习商品记忆
|
||||
try:
|
||||
self.product_db.learn_from_products(products, source='ocr')
|
||||
self.product_db._export_memory_json()
|
||||
logger.info(f"已从处理结果学习 {len(products)} 条商品记忆")
|
||||
except Exception as e:
|
||||
logger.warning(f"学习商品记忆失败: {e}")
|
||||
|
||||
# 记录已处理文件
|
||||
self.processed_files[file_path] = output_file
|
||||
self._save_processed_files()
|
||||
|
||||
Reference in New Issue
Block a user