Blame view

scripts/evaluation/eval_framework/constants.py 1.04 KB
c81b0fc1   tangwang   scripts/evaluatio...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
  """Paths and shared constants for search evaluation."""
  
  from pathlib import Path
  
  _PKG_DIR = Path(__file__).resolve().parent
  _SCRIPTS_EVAL_DIR = _PKG_DIR.parent
  PROJECT_ROOT = _SCRIPTS_EVAL_DIR.parents[1]
  
  RELEVANCE_EXACT = "Exact"
  RELEVANCE_PARTIAL = "Partial"
  RELEVANCE_IRRELEVANT = "Irrelevant"
  VALID_LABELS = {RELEVANCE_EXACT, RELEVANCE_PARTIAL, RELEVANCE_IRRELEVANT}
  
  DEFAULT_ARTIFACT_ROOT = PROJECT_ROOT / "artifacts" / "search_evaluation"
  DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt"
  
  JUDGE_PROMPT_VERSION_SIMPLE = "v3_simple_20260331"
  JUDGE_PROMPT_VERSION_COMPLEX = "v2_structured_20260331"
  DEFAULT_LABELER_MODE = "simple"
d172c259   tangwang   eval框架
20
21
22
23
24
25
  
  # Rebuild annotation pool (build --force-refresh-labels): search recall + full-corpus rerank + LLM batches
  DEFAULT_SEARCH_RECALL_TOP_K = 500
  DEFAULT_RERANK_HIGH_THRESHOLD = 0.5
  DEFAULT_RERANK_HIGH_SKIP_COUNT = 1000
  DEFAULT_REBUILD_LLM_BATCH_SIZE = 50
167f33b4   tangwang   eval框架前端
26
  DEFAULT_REBUILD_MIN_LLM_BATCHES = 20
d172c259   tangwang   eval框架
27
28
29
  DEFAULT_REBUILD_MAX_LLM_BATCHES = 40
  DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO = 0.92
  DEFAULT_REBUILD_IRRELEVANT_STOP_STREAK = 3