Files
orc-order-v2/tests/test_rule_engine.py
houhuan e4d62df7e3 feat: 益选 OCR 订单处理系统初始提交
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用)
- 百度 OCR 表格识别集成
- 规则引擎(列映射/数据清洗/单位转换/规格推断)
- 条码映射管理与云端同步(Gitea REST API)
- 云端同步支持:条码映射、供应商配置、商品资料、采购模板
- 拖拽一键处理(图片→OCR→Excel→合并)
- 191 个单元测试
- 移除无用的模板管理功能
- 清理 IDE 产物目录

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 19:51:13 +08:00

224 lines
7.9 KiB
Python

"""app.core.handlers.rule_engine 单元测试"""
import pytest
import pandas as pd
from app.core.handlers.rule_engine import (
apply_rules,
_split_quantity_unit,
_extract_spec_from_name,
_normalize_unit,
_compute_quantity_from_total,
_fill_missing,
_mark_gift,
)
@pytest.fixture
def sample_df():
return pd.DataFrame({
'name': ['农夫山泉550ml*24', '蒙牛纯牛奶', '可口可乐330ml*6'],
'quantity_raw': ['2箱', '5', '3提'],
'unit_price': [28.8, 3.5, 10.8],
'total_price': [57.6, 17.5, 32.4],
})
class TestSplitQuantityUnit:
def test_split_with_unit(self):
df = pd.DataFrame({'quantity_raw': ['2箱', '5瓶', '3提']})
result = _split_quantity_unit(df, 'quantity_raw')
assert list(result['quantity']) == [2.0, 5.0, 3.0]
assert list(result['unit']) == ['', '', '']
def test_split_number_only(self):
df = pd.DataFrame({'quantity_raw': ['10', '20']})
result = _split_quantity_unit(df, 'quantity_raw')
assert list(result['quantity']) == [10.0, 20.0]
def test_split_with_synonyms(self):
df = pd.DataFrame({'quantity_raw': ['2件']})
dictionary = {'unit_synonyms': {'': ''}, 'default_unit': ''}
result = _split_quantity_unit(df, 'quantity_raw', dictionary)
assert result.loc[0, 'unit'] == ''
def test_split_missing_column(self):
df = pd.DataFrame({'other': [1, 2]})
result = _split_quantity_unit(df, 'quantity_raw')
assert 'quantity' not in result.columns
def test_split_invalid_value(self):
df = pd.DataFrame({'quantity_raw': ['abc']})
result = _split_quantity_unit(df, 'quantity_raw')
assert result.loc[0, 'quantity'] == 0.0
class TestExtractSpecFromName:
def test_extract_550ml_24(self):
df = pd.DataFrame({'name': ['农夫山泉550ml*24']})
result = _extract_spec_from_name(df, 'name')
assert result.loc[0, 'package_quantity'] == 24
def test_extract_330ml_6(self):
df = pd.DataFrame({'name': ['可口可乐330ml*6']})
result = _extract_spec_from_name(df, 'name')
assert result.loc[0, 'package_quantity'] == 6
def test_extract_1_star_pattern(self):
df = pd.DataFrame({'name': ['啤酒1*12']})
result = _extract_spec_from_name(df, 'name')
assert result.loc[0, 'package_quantity'] == 12
def test_no_spec(self):
df = pd.DataFrame({'name': ['蒙牛纯牛奶']})
result = _extract_spec_from_name(df, 'name')
assert result.loc[0, 'package_quantity'] is None
def test_missing_column(self):
df = pd.DataFrame({'other': ['test']})
result = _extract_spec_from_name(df, 'name')
assert 'package_quantity' not in result.columns
def test_with_ignore_words(self):
df = pd.DataFrame({'name': ['新品 农夫山泉550ml*24']})
dictionary = {'ignore_words': ['新品'], 'name_patterns': []}
result = _extract_spec_from_name(df, 'name', dictionary)
assert result.loc[0, 'package_quantity'] == 24
class TestNormalizeUnit:
def test_map_units(self):
df = pd.DataFrame({'unit': ['', '', '', ''], 'quantity': [1, 2, 3, 4]})
unit_map = {'': '', '': '', '': ''}
result = _normalize_unit(df, 'unit', unit_map)
# _normalize_unit maps via unit_map, then converts 件→瓶 as packed unit
assert list(result['unit']) == ['', '', '', '']
def test_convert_quantity_for_packed_units(self):
df = pd.DataFrame({
'unit': ['', ''],
'quantity': [2, 5],
'package_quantity': [12, None],
})
unit_map = {'': ''}
result = _normalize_unit(df, 'unit', unit_map)
assert result.loc[0, 'quantity'] == 24 # 2 * 12
assert result.loc[1, 'quantity'] == 5 # unchanged
def test_missing_column(self):
df = pd.DataFrame({'other': [1]})
result = _normalize_unit(df, 'unit', {})
assert 'unit' not in result.columns
class TestComputeQuantityFromTotal:
def test_compute_when_qty_zero(self):
df = pd.DataFrame({
'quantity': [0, 5, 0],
'unit_price': [10.0, 20.0, 0.0],
'total_price': [50.0, 100.0, 30.0],
})
result = _compute_quantity_from_total(df)
assert result.loc[0, 'quantity'] == 5.0 # 50 / 10
assert result.loc[1, 'quantity'] == 5 # unchanged
def test_no_compute_when_qty_positive(self):
df = pd.DataFrame({
'quantity': [3, 5],
'unit_price': [10.0, 20.0],
'total_price': [50.0, 100.0],
})
result = _compute_quantity_from_total(df)
assert list(result['quantity']) == [3, 5]
class TestFillMissing:
def test_fill_existing_column(self):
df = pd.DataFrame({'a': [1, None, 3], 'b': [None, 2, None]})
result = _fill_missing(df, {'a': 0, 'b': 99})
assert result.loc[1, 'a'] == 0
assert result.loc[0, 'b'] == 99
def test_fill_new_column(self):
df = pd.DataFrame({'a': [1, 2]})
result = _fill_missing(df, {'new_col': 'default'})
assert list(result['new_col']) == ['default', 'default']
class TestMarkGift:
def test_gift_by_zero_price(self):
df = pd.DataFrame({
'name': ['商品A', '商品B'],
'unit_price': [10.0, 0.0],
'total_price': [20.0, 0.0],
})
result = _mark_gift(df)
assert result.loc[0, 'is_gift'] == False
assert result.loc[1, 'is_gift'] == True
def test_gift_by_name(self):
df = pd.DataFrame({
'name': ['赠品-杯子', '商品A'],
'unit_price': [0.0, 10.0],
'total_price': [0.0, 20.0],
})
result = _mark_gift(df)
assert result.loc[0, 'is_gift'] == True
assert result.loc[1, 'is_gift'] == False
def test_gift_no_price_columns(self):
df = pd.DataFrame({'name': ['赠品', '正常']})
result = _mark_gift(df)
assert result.loc[0, 'is_gift'] == True
assert result.loc[1, 'is_gift'] == False
class TestApplyRules:
def test_multiple_rules(self, sample_df):
rules = [
{'type': 'split_quantity_unit', 'source': 'quantity_raw'},
{'type': 'extract_spec_from_name', 'source': 'name'},
{'type': 'mark_gift'},
{'type': 'fill_missing', 'fills': {'unit': ''}},
]
result = apply_rules(sample_df, rules)
assert 'quantity' in result.columns
assert 'unit' in result.columns
assert 'package_quantity' in result.columns
assert 'is_gift' in result.columns
def test_empty_rules(self, sample_df):
result = apply_rules(sample_df, [])
assert len(result) == len(sample_df)
def test_none_rules(self, sample_df):
result = apply_rules(sample_df, None)
assert len(result) == len(sample_df)
def test_unknown_rule_type(self, sample_df):
rules = [{'type': 'unknown_operation'}]
result = apply_rules(sample_df, rules)
assert len(result) == len(sample_df)
def test_with_dictionary(self):
df = pd.DataFrame({
'name': ['农夫山泉550ml*24'],
'quantity_raw': ['2箱'],
})
dictionary = {
'unit_synonyms': {'': ''},
'default_unit': '',
'ignore_words': [],
'name_patterns': [],
'pack_multipliers': {'': 12},
'default_package_quantity': 1,
}
rules = [
{'type': 'split_quantity_unit', 'source': 'quantity_raw'},
{'type': 'extract_spec_from_name', 'source': 'name'},
{'type': 'normalize_unit', 'target': 'unit', 'map': {'': ''}},
]
result = apply_rules(df, rules, dictionary)
assert 'quantity' in result.columns
assert 'unit' in result.columns