2059d959
tangwang
feat(eval): 多评估集统...
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
from config.loader import get_app_config
from scripts.evaluation.eval_framework.datasets import resolve_dataset
def test_search_evaluation_registry_contains_expected_datasets() -> None:
se = get_app_config().search_evaluation
ids = [item.dataset_id for item in se.datasets]
assert "core_queries" in ids
assert "clothing_top771" in ids
assert se.default_dataset_id == "core_queries"
def test_resolve_dataset_returns_expected_query_counts() -> None:
core = resolve_dataset(dataset_id="core_queries")
clothing = resolve_dataset(dataset_id="clothing_top771")
assert core.query_count > 0
assert clothing.query_count == 771
assert clothing.dataset_id == "clothing_top771"
|