e4d62df7e3
- 智能供应商识别(蓉城易购/烟草/杨碧月/通用) - 百度 OCR 表格识别集成 - 规则引擎(列映射/数据清洗/单位转换/规格推断) - 条码映射管理与云端同步(Gitea REST API) - 云端同步支持:条码映射、供应商配置、商品资料、采购模板 - 拖拽一键处理(图片→OCR→Excel→合并) - 191 个单元测试 - 移除无用的模板管理功能 - 清理 IDE 产物目录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
155 lines
5.8 KiB
Python
155 lines
5.8 KiB
Python
"""app.core.handlers.column_mapper 单元测试"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
from app.core.handlers.column_mapper import ColumnMapper
|
|
|
|
|
|
class TestStandardColumns:
|
|
"""STANDARD_COLUMNS 完整性测试"""
|
|
|
|
def test_has_all_standard_fields(self):
|
|
expected = {'barcode', 'name', 'specification', 'quantity', 'unit',
|
|
'unit_price', 'total_price', 'gift_quantity',
|
|
'category', 'brand', 'supplier'}
|
|
assert set(ColumnMapper.STANDARD_COLUMNS.keys()) == expected
|
|
|
|
def test_no_empty_alias_lists(self):
|
|
for field, aliases in ColumnMapper.STANDARD_COLUMNS.items():
|
|
assert len(aliases) > 0, f"{field} has no aliases"
|
|
|
|
def test_barcode_includes_key_names(self):
|
|
bc = ColumnMapper.STANDARD_COLUMNS['barcode']
|
|
assert '条码' in bc
|
|
assert '商品条码' in bc
|
|
assert 'barcode' in bc
|
|
|
|
def test_gift_quantity_includes_common_names(self):
|
|
gq = ColumnMapper.STANDARD_COLUMNS['gift_quantity']
|
|
assert '赠送量' in gq
|
|
assert '赠品数量' in gq
|
|
|
|
|
|
class TestFindColumn:
|
|
"""ColumnMapper.find_column 列查找测试"""
|
|
|
|
def test_exact_match(self):
|
|
cols = ['商品条码', '商品名称', '数量', '单价']
|
|
assert ColumnMapper.find_column(cols, 'barcode') == '商品条码'
|
|
|
|
def test_exact_match_standard_english(self):
|
|
cols = ['barcode', 'name', 'quantity']
|
|
assert ColumnMapper.find_column(cols, 'barcode') == 'barcode'
|
|
|
|
def test_whitespace_match(self):
|
|
"""列名含空格时应匹配"""
|
|
cols = ['名 称', '数 量']
|
|
assert ColumnMapper.find_column(cols, 'name') == '名 称'
|
|
assert ColumnMapper.find_column(cols, 'quantity') == '数 量'
|
|
|
|
def test_partial_match_substring(self):
|
|
"""列名包含候选名时应匹配"""
|
|
cols = ['商品条码(小条码)', '商品名称']
|
|
assert ColumnMapper.find_column(cols, 'barcode') == '商品条码(小条码)'
|
|
|
|
def test_not_found_returns_none(self):
|
|
cols = ['日期', '备注', '编号']
|
|
assert ColumnMapper.find_column(cols, 'barcode') is None
|
|
|
|
def test_unknown_standard_name_returns_none(self):
|
|
cols = ['商品条码']
|
|
assert ColumnMapper.find_column(cols, 'nonexistent_field') is None
|
|
|
|
def test_first_match_wins(self):
|
|
"""多个列都能匹配时返回第一个"""
|
|
cols = ['条码', '商品条码', 'barcode']
|
|
assert ColumnMapper.find_column(cols, 'barcode') == '条码'
|
|
|
|
def test_case_insensitive(self):
|
|
cols = ['Barcode', 'Name']
|
|
assert ColumnMapper.find_column(cols, 'barcode') == 'Barcode'
|
|
|
|
def test_all_fields_matchable(self):
|
|
"""每个标准字段都能找到至少一个匹配"""
|
|
cols = [
|
|
'商品条码', '商品名称', '规格', '数量', '单位',
|
|
'单价', '金额', '赠送量', '类别', '品牌', '供应商',
|
|
]
|
|
for std_name in ColumnMapper.STANDARD_COLUMNS:
|
|
result = ColumnMapper.find_column(cols, std_name)
|
|
assert result is not None, f"Could not find {std_name} in {cols}"
|
|
|
|
|
|
class TestDetectHeaderRow:
|
|
"""ColumnMapper.detect_header_row 表头检测测试"""
|
|
|
|
def test_header_on_first_row(self):
|
|
df = pd.DataFrame({
|
|
'A': ['条码', '123456', '789012'],
|
|
'B': ['数量', '10', '20'],
|
|
'C': ['单价', '5.5', '3.0'],
|
|
})
|
|
assert ColumnMapper.detect_header_row(df, min_matches=2) == 0
|
|
|
|
def test_header_on_second_row(self):
|
|
df = pd.DataFrame({
|
|
'A': ['备注', '条码', '123456'],
|
|
'B': ['日期', '数量', '10'],
|
|
'C': ['时间', '单价', '5.5'],
|
|
})
|
|
assert ColumnMapper.detect_header_row(df, min_matches=2) == 1
|
|
|
|
def test_no_header_returns_minus_one(self):
|
|
df = pd.DataFrame({
|
|
'A': ['aaa', 'bbb', 'ccc'],
|
|
'B': ['ddd', 'eee', 'fff'],
|
|
})
|
|
assert ColumnMapper.detect_header_row(df, min_matches=3) == -1
|
|
|
|
def test_empty_dataframe(self):
|
|
df = pd.DataFrame()
|
|
assert ColumnMapper.detect_header_row(df) == -1
|
|
|
|
def test_max_rows_limits_scan(self):
|
|
"""表头在第 10 行但 max_rows=5 时应返回 -1"""
|
|
data = {f'col{i}': ['x'] * 15 for i in range(3)}
|
|
data['col0'][10] = '条码'
|
|
data['col1'][10] = '数量'
|
|
data['col2'][10] = '单价'
|
|
df = pd.DataFrame(data)
|
|
assert ColumnMapper.detect_header_row(df, max_rows=5, min_matches=2) == -1
|
|
|
|
|
|
class TestColumnMapperInstance:
|
|
"""ColumnMapper 实例方法测试"""
|
|
|
|
def test_init_with_no_config(self):
|
|
mapper = ColumnMapper()
|
|
assert mapper.mapping_config == {}
|
|
|
|
def test_init_with_custom_config(self):
|
|
mapper = ColumnMapper(mapping_config={'barcode': ['我的条码']})
|
|
assert '我的条码' in mapper.custom_mappings
|
|
|
|
def test_map_columns_renames(self):
|
|
mapper = ColumnMapper()
|
|
df = pd.DataFrame({'商品条码': ['123'], '商品名称': ['测试'], '数量': [10]})
|
|
result = mapper.map_columns(df, target_columns=['barcode', 'name', 'quantity'])
|
|
assert 'barcode' in result.columns
|
|
assert 'name' in result.columns
|
|
assert 'quantity' in result.columns
|
|
|
|
def test_map_columns_fills_missing(self):
|
|
mapper = ColumnMapper()
|
|
df = pd.DataFrame({'商品条码': ['123']})
|
|
result = mapper.map_columns(df, target_columns=['barcode', 'quantity'])
|
|
assert 'barcode' in result.columns
|
|
assert 'quantity' in result.columns
|
|
assert result['quantity'].iloc[0] == 0 # default value
|
|
|
|
def test_add_custom_mapping(self):
|
|
mapper = ColumnMapper()
|
|
mapper.add_custom_mapping('barcode', '自定义条码列')
|
|
assert '自定义条码列' in mapper.reverse_mapping
|
|
assert mapper.reverse_mapping['自定义条码列'] == 'barcode'
|