e4d62df7e3
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用) - 百度 OCR 表格识别集成 - 规则引擎(列映射/数据清洗/单位转换/规格推断) - 条码映射管理与云端同步(Gitea REST API) - 云端同步支持:条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理(图片→OCR→Excel→合并) - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
224 lines
7.9 KiB
Python
224 lines
7.9 KiB
Python
"""app.core.handlers.rule_engine 单元测试"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
|
|
from app.core.handlers.rule_engine import (
|
|
apply_rules,
|
|
_split_quantity_unit,
|
|
_extract_spec_from_name,
|
|
_normalize_unit,
|
|
_compute_quantity_from_total,
|
|
_fill_missing,
|
|
_mark_gift,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_df():
|
|
return pd.DataFrame({
|
|
'name': ['农夫山泉550ml*24', '蒙牛纯牛奶', '可口可乐330ml*6'],
|
|
'quantity_raw': ['2箱', '5', '3提'],
|
|
'unit_price': [28.8, 3.5, 10.8],
|
|
'total_price': [57.6, 17.5, 32.4],
|
|
})
|
|
|
|
|
|
class TestSplitQuantityUnit:
|
|
def test_split_with_unit(self):
|
|
df = pd.DataFrame({'quantity_raw': ['2箱', '5瓶', '3提']})
|
|
result = _split_quantity_unit(df, 'quantity_raw')
|
|
assert list(result['quantity']) == [2.0, 5.0, 3.0]
|
|
assert list(result['unit']) == ['箱', '瓶', '提']
|
|
|
|
def test_split_number_only(self):
|
|
df = pd.DataFrame({'quantity_raw': ['10', '20']})
|
|
result = _split_quantity_unit(df, 'quantity_raw')
|
|
assert list(result['quantity']) == [10.0, 20.0]
|
|
|
|
def test_split_with_synonyms(self):
|
|
df = pd.DataFrame({'quantity_raw': ['2件']})
|
|
dictionary = {'unit_synonyms': {'件': '箱'}, 'default_unit': '瓶'}
|
|
result = _split_quantity_unit(df, 'quantity_raw', dictionary)
|
|
assert result.loc[0, 'unit'] == '箱'
|
|
|
|
def test_split_missing_column(self):
|
|
df = pd.DataFrame({'other': [1, 2]})
|
|
result = _split_quantity_unit(df, 'quantity_raw')
|
|
assert 'quantity' not in result.columns
|
|
|
|
def test_split_invalid_value(self):
|
|
df = pd.DataFrame({'quantity_raw': ['abc']})
|
|
result = _split_quantity_unit(df, 'quantity_raw')
|
|
assert result.loc[0, 'quantity'] == 0.0
|
|
|
|
|
|
class TestExtractSpecFromName:
|
|
def test_extract_550ml_24(self):
|
|
df = pd.DataFrame({'name': ['农夫山泉550ml*24']})
|
|
result = _extract_spec_from_name(df, 'name')
|
|
assert result.loc[0, 'package_quantity'] == 24
|
|
|
|
def test_extract_330ml_6(self):
|
|
df = pd.DataFrame({'name': ['可口可乐330ml*6']})
|
|
result = _extract_spec_from_name(df, 'name')
|
|
assert result.loc[0, 'package_quantity'] == 6
|
|
|
|
def test_extract_1_star_pattern(self):
|
|
df = pd.DataFrame({'name': ['啤酒1*12']})
|
|
result = _extract_spec_from_name(df, 'name')
|
|
assert result.loc[0, 'package_quantity'] == 12
|
|
|
|
def test_no_spec(self):
|
|
df = pd.DataFrame({'name': ['蒙牛纯牛奶']})
|
|
result = _extract_spec_from_name(df, 'name')
|
|
assert result.loc[0, 'package_quantity'] is None
|
|
|
|
def test_missing_column(self):
|
|
df = pd.DataFrame({'other': ['test']})
|
|
result = _extract_spec_from_name(df, 'name')
|
|
assert 'package_quantity' not in result.columns
|
|
|
|
def test_with_ignore_words(self):
|
|
df = pd.DataFrame({'name': ['新品 农夫山泉550ml*24']})
|
|
dictionary = {'ignore_words': ['新品'], 'name_patterns': []}
|
|
result = _extract_spec_from_name(df, 'name', dictionary)
|
|
assert result.loc[0, 'package_quantity'] == 24
|
|
|
|
|
|
class TestNormalizeUnit:
|
|
def test_map_units(self):
|
|
df = pd.DataFrame({'unit': ['箱', '提', '盒', '瓶'], 'quantity': [1, 2, 3, 4]})
|
|
unit_map = {'箱': '件', '提': '件', '盒': '件'}
|
|
result = _normalize_unit(df, 'unit', unit_map)
|
|
# _normalize_unit maps via unit_map, then converts 件→瓶 as packed unit
|
|
assert list(result['unit']) == ['瓶', '瓶', '瓶', '瓶']
|
|
|
|
def test_convert_quantity_for_packed_units(self):
|
|
df = pd.DataFrame({
|
|
'unit': ['箱', '瓶'],
|
|
'quantity': [2, 5],
|
|
'package_quantity': [12, None],
|
|
})
|
|
unit_map = {'箱': '件'}
|
|
result = _normalize_unit(df, 'unit', unit_map)
|
|
assert result.loc[0, 'quantity'] == 24 # 2 * 12
|
|
assert result.loc[1, 'quantity'] == 5 # unchanged
|
|
|
|
def test_missing_column(self):
|
|
df = pd.DataFrame({'other': [1]})
|
|
result = _normalize_unit(df, 'unit', {})
|
|
assert 'unit' not in result.columns
|
|
|
|
|
|
class TestComputeQuantityFromTotal:
|
|
def test_compute_when_qty_zero(self):
|
|
df = pd.DataFrame({
|
|
'quantity': [0, 5, 0],
|
|
'unit_price': [10.0, 20.0, 0.0],
|
|
'total_price': [50.0, 100.0, 30.0],
|
|
})
|
|
result = _compute_quantity_from_total(df)
|
|
assert result.loc[0, 'quantity'] == 5.0 # 50 / 10
|
|
assert result.loc[1, 'quantity'] == 5 # unchanged
|
|
|
|
def test_no_compute_when_qty_positive(self):
|
|
df = pd.DataFrame({
|
|
'quantity': [3, 5],
|
|
'unit_price': [10.0, 20.0],
|
|
'total_price': [50.0, 100.0],
|
|
})
|
|
result = _compute_quantity_from_total(df)
|
|
assert list(result['quantity']) == [3, 5]
|
|
|
|
|
|
class TestFillMissing:
|
|
def test_fill_existing_column(self):
|
|
df = pd.DataFrame({'a': [1, None, 3], 'b': [None, 2, None]})
|
|
result = _fill_missing(df, {'a': 0, 'b': 99})
|
|
assert result.loc[1, 'a'] == 0
|
|
assert result.loc[0, 'b'] == 99
|
|
|
|
def test_fill_new_column(self):
|
|
df = pd.DataFrame({'a': [1, 2]})
|
|
result = _fill_missing(df, {'new_col': 'default'})
|
|
assert list(result['new_col']) == ['default', 'default']
|
|
|
|
|
|
class TestMarkGift:
|
|
def test_gift_by_zero_price(self):
|
|
df = pd.DataFrame({
|
|
'name': ['商品A', '商品B'],
|
|
'unit_price': [10.0, 0.0],
|
|
'total_price': [20.0, 0.0],
|
|
})
|
|
result = _mark_gift(df)
|
|
assert result.loc[0, 'is_gift'] == False
|
|
assert result.loc[1, 'is_gift'] == True
|
|
|
|
def test_gift_by_name(self):
|
|
df = pd.DataFrame({
|
|
'name': ['赠品-杯子', '商品A'],
|
|
'unit_price': [0.0, 10.0],
|
|
'total_price': [0.0, 20.0],
|
|
})
|
|
result = _mark_gift(df)
|
|
assert result.loc[0, 'is_gift'] == True
|
|
assert result.loc[1, 'is_gift'] == False
|
|
|
|
def test_gift_no_price_columns(self):
|
|
df = pd.DataFrame({'name': ['赠品', '正常']})
|
|
result = _mark_gift(df)
|
|
assert result.loc[0, 'is_gift'] == True
|
|
assert result.loc[1, 'is_gift'] == False
|
|
|
|
|
|
class TestApplyRules:
|
|
def test_multiple_rules(self, sample_df):
|
|
rules = [
|
|
{'type': 'split_quantity_unit', 'source': 'quantity_raw'},
|
|
{'type': 'extract_spec_from_name', 'source': 'name'},
|
|
{'type': 'mark_gift'},
|
|
{'type': 'fill_missing', 'fills': {'unit': '瓶'}},
|
|
]
|
|
result = apply_rules(sample_df, rules)
|
|
assert 'quantity' in result.columns
|
|
assert 'unit' in result.columns
|
|
assert 'package_quantity' in result.columns
|
|
assert 'is_gift' in result.columns
|
|
|
|
def test_empty_rules(self, sample_df):
|
|
result = apply_rules(sample_df, [])
|
|
assert len(result) == len(sample_df)
|
|
|
|
def test_none_rules(self, sample_df):
|
|
result = apply_rules(sample_df, None)
|
|
assert len(result) == len(sample_df)
|
|
|
|
def test_unknown_rule_type(self, sample_df):
|
|
rules = [{'type': 'unknown_operation'}]
|
|
result = apply_rules(sample_df, rules)
|
|
assert len(result) == len(sample_df)
|
|
|
|
def test_with_dictionary(self):
|
|
df = pd.DataFrame({
|
|
'name': ['农夫山泉550ml*24'],
|
|
'quantity_raw': ['2箱'],
|
|
})
|
|
dictionary = {
|
|
'unit_synonyms': {'箱': '件'},
|
|
'default_unit': '瓶',
|
|
'ignore_words': [],
|
|
'name_patterns': [],
|
|
'pack_multipliers': {'件': 12},
|
|
'default_package_quantity': 1,
|
|
}
|
|
rules = [
|
|
{'type': 'split_quantity_unit', 'source': 'quantity_raw'},
|
|
{'type': 'extract_spec_from_name', 'source': 'name'},
|
|
{'type': 'normalize_unit', 'target': 'unit', 'map': {'箱': '件'}},
|
|
]
|
|
result = apply_rules(df, rules, dictionary)
|
|
assert 'quantity' in result.columns
|
|
assert 'unit' in result.columns
|