Commit bdb65283908f21ee69eadaf078075e4f9140a332

Authored by tangwang
1 parent 167f33b4

标注框架 批量标注

scripts/evaluation/eval_framework/cli.py
@@ -5,6 +5,7 @@ from __future__ import annotations @@ -5,6 +5,7 @@ from __future__ import annotations
5 import argparse 5 import argparse
6 import json 6 import json
7 from pathlib import Path 7 from pathlib import Path
  8 +from typing import Any, Dict
8 9
9 from .constants import ( 10 from .constants import (
10 DEFAULT_LABELER_MODE, 11 DEFAULT_LABELER_MODE,
@@ -23,6 +24,38 @@ from .utils import ensure_dir, utc_now_iso, utc_timestamp @@ -23,6 +24,38 @@ from .utils import ensure_dir, utc_now_iso, utc_timestamp
23 from .web_app import create_web_app 24 from .web_app import create_web_app
24 25
25 26
  27 +def add_judge_llm_args(p: argparse.ArgumentParser) -> None:
  28 + p.add_argument(
  29 + "--judge-model",
  30 + default=None,
  31 + metavar="MODEL",
  32 + help="Judge LLM model (default: eval_framework.constants.DEFAULT_JUDGE_MODEL).",
  33 + )
  34 + p.add_argument(
  35 + "--enable-thinking",
  36 + action=argparse.BooleanOptionalAction,
  37 + default=None,
  38 + help="enable_thinking for DashScope (default: DEFAULT_JUDGE_ENABLE_THINKING).",
  39 + )
  40 + p.add_argument(
  41 + "--dashscope-batch",
  42 + action=argparse.BooleanOptionalAction,
  43 + default=None,
  44 + help="DashScope Batch File API vs sync chat (default: DEFAULT_JUDGE_DASHSCOPE_BATCH).",
  45 + )
  46 +
  47 +
  48 +def framework_kwargs_from_args(args: argparse.Namespace) -> Dict[str, Any]:
  49 + kw: Dict[str, Any] = {}
  50 + if args.judge_model is not None:
  51 + kw["judge_model"] = args.judge_model
  52 + if args.enable_thinking is not None:
  53 + kw["enable_thinking"] = args.enable_thinking
  54 + if args.dashscope_batch is not None:
  55 + kw["use_dashscope_batch"] = args.dashscope_batch
  56 + return kw
  57 +
  58 +
26 def build_cli_parser() -> argparse.ArgumentParser: 59 def build_cli_parser() -> argparse.ArgumentParser:
27 parser = argparse.ArgumentParser(description="Search evaluation annotation builder and web UI") 60 parser = argparse.ArgumentParser(description="Search evaluation annotation builder and web UI")
28 sub = parser.add_subparsers(dest="command", required=True) 61 sub = parser.add_subparsers(dest="command", required=True)
@@ -71,6 +104,7 @@ def build_cli_parser() -> argparse.ArgumentParser: @@ -71,6 +104,7 @@ def build_cli_parser() -> argparse.ArgumentParser:
71 build.add_argument("--force-refresh-rerank", action="store_true") 104 build.add_argument("--force-refresh-rerank", action="store_true")
72 build.add_argument("--force-refresh-labels", action="store_true") 105 build.add_argument("--force-refresh-labels", action="store_true")
73 build.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) 106 build.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"])
  107 + add_judge_llm_args(build)
74 108
75 batch = sub.add_parser("batch", help="Run batch evaluation against live search") 109 batch = sub.add_parser("batch", help="Run batch evaluation against live search")
76 batch.add_argument("--tenant-id", default="163") 110 batch.add_argument("--tenant-id", default="163")
@@ -79,6 +113,7 @@ def build_cli_parser() -> argparse.ArgumentParser: @@ -79,6 +113,7 @@ def build_cli_parser() -> argparse.ArgumentParser:
79 batch.add_argument("--language", default="en") 113 batch.add_argument("--language", default="en")
80 batch.add_argument("--force-refresh-labels", action="store_true") 114 batch.add_argument("--force-refresh-labels", action="store_true")
81 batch.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) 115 batch.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"])
  116 + add_judge_llm_args(batch)
82 117
83 audit = sub.add_parser("audit", help="Audit annotation quality for queries") 118 audit = sub.add_parser("audit", help="Audit annotation quality for queries")
84 audit.add_argument("--tenant-id", default="163") 119 audit.add_argument("--tenant-id", default="163")
@@ -88,6 +123,7 @@ def build_cli_parser() -> argparse.ArgumentParser: @@ -88,6 +123,7 @@ def build_cli_parser() -> argparse.ArgumentParser:
88 audit.add_argument("--limit-suspicious", type=int, default=5) 123 audit.add_argument("--limit-suspicious", type=int, default=5)
89 audit.add_argument("--force-refresh-labels", action="store_true") 124 audit.add_argument("--force-refresh-labels", action="store_true")
90 audit.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) 125 audit.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"])
  126 + add_judge_llm_args(audit)
91 127
92 serve = sub.add_parser("serve", help="Serve evaluation web UI on port 6010") 128 serve = sub.add_parser("serve", help="Serve evaluation web UI on port 6010")
93 serve.add_argument("--tenant-id", default="163") 129 serve.add_argument("--tenant-id", default="163")
@@ -95,12 +131,15 @@ def build_cli_parser() -> argparse.ArgumentParser: @@ -95,12 +131,15 @@ def build_cli_parser() -> argparse.ArgumentParser:
95 serve.add_argument("--host", default="0.0.0.0") 131 serve.add_argument("--host", default="0.0.0.0")
96 serve.add_argument("--port", type=int, default=6010) 132 serve.add_argument("--port", type=int, default=6010)
97 serve.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) 133 serve.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"])
  134 + add_judge_llm_args(serve)
98 135
99 return parser 136 return parser
100 137
101 138
102 def run_build(args: argparse.Namespace) -> None: 139 def run_build(args: argparse.Namespace) -> None:
103 - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) 140 + framework = SearchEvaluationFramework(
  141 + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args)
  142 + )
104 queries = framework.queries_from_file(Path(args.queries_file)) 143 queries = framework.queries_from_file(Path(args.queries_file))
105 summary = [] 144 summary = []
106 rebuild_kwargs = {} 145 rebuild_kwargs = {}
@@ -152,7 +191,9 @@ def run_build(args: argparse.Namespace) -> None: @@ -152,7 +191,9 @@ def run_build(args: argparse.Namespace) -> None:
152 191
153 192
154 def run_batch(args: argparse.Namespace) -> None: 193 def run_batch(args: argparse.Namespace) -> None:
155 - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) 194 + framework = SearchEvaluationFramework(
  195 + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args)
  196 + )
156 queries = framework.queries_from_file(Path(args.queries_file)) 197 queries = framework.queries_from_file(Path(args.queries_file))
157 payload = framework.batch_evaluate( 198 payload = framework.batch_evaluate(
158 queries=queries, 199 queries=queries,
@@ -165,7 +206,9 @@ def run_batch(args: argparse.Namespace) -> None: @@ -165,7 +206,9 @@ def run_batch(args: argparse.Namespace) -> None:
165 206
166 207
167 def run_audit(args: argparse.Namespace) -> None: 208 def run_audit(args: argparse.Namespace) -> None:
168 - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) 209 + framework = SearchEvaluationFramework(
  210 + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args)
  211 + )
169 queries = framework.queries_from_file(Path(args.queries_file)) 212 queries = framework.queries_from_file(Path(args.queries_file))
170 audit_items = [] 213 audit_items = []
171 for query in queries: 214 for query in queries:
@@ -215,7 +258,9 @@ def run_audit(args: argparse.Namespace) -> None: @@ -215,7 +258,9 @@ def run_audit(args: argparse.Namespace) -> None:
215 258
216 259
217 def run_serve(args: argparse.Namespace) -> None: 260 def run_serve(args: argparse.Namespace) -> None:
218 - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) 261 + framework = SearchEvaluationFramework(
  262 + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args)
  263 + )
219 app = create_web_app(framework, Path(args.queries_file)) 264 app = create_web_app(framework, Path(args.queries_file))
220 import uvicorn 265 import uvicorn
221 266
scripts/evaluation/eval_framework/clients.py
@@ -2,6 +2,10 @@ @@ -2,6 +2,10 @@
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
  5 +import io
  6 +import json
  7 +import time
  8 +import uuid
5 from typing import Any, Dict, List, Optional, Sequence, Tuple 9 from typing import Any, Dict, List, Optional, Sequence, Tuple
6 10
7 import requests 11 import requests
@@ -60,26 +64,51 @@ class RerankServiceClient: @@ -60,26 +64,51 @@ class RerankServiceClient:
60 64
61 65
62 class DashScopeLabelClient: 66 class DashScopeLabelClient:
63 - def __init__(self, model: str, base_url: str, api_key: str, batch_size: int = 40): 67 + """DashScope OpenAI-compatible chat: synchronous or Batch File API (JSONL job).
  68 +
  69 + Batch flow: https://help.aliyun.com/zh/model-studio/batch-interfaces-compatible-with-openai/
  70 + """
  71 +
  72 + def __init__(
  73 + self,
  74 + model: str,
  75 + base_url: str,
  76 + api_key: str,
  77 + batch_size: int = 40,
  78 + *,
  79 + batch_completion_window: str = "24h",
  80 + batch_poll_interval_sec: float = 10.0,
  81 + enable_thinking: bool = True,
  82 + use_batch: bool = True,
  83 + ):
64 self.model = model 84 self.model = model
65 self.base_url = base_url.rstrip("/") 85 self.base_url = base_url.rstrip("/")
66 self.api_key = api_key 86 self.api_key = api_key
67 self.batch_size = int(batch_size) 87 self.batch_size = int(batch_size)
  88 + self.batch_completion_window = str(batch_completion_window)
  89 + self.batch_poll_interval_sec = float(batch_poll_interval_sec)
  90 + self.enable_thinking = bool(enable_thinking)
  91 + self.use_batch = bool(use_batch)
68 self.session = requests.Session() 92 self.session = requests.Session()
69 93
70 - def _chat(self, prompt: str) -> Tuple[str, str]: 94 + def _auth_headers(self) -> Dict[str, str]:
  95 + return {"Authorization": f"Bearer {self.api_key}"}
  96 +
  97 + def _completion_body(self, prompt: str) -> Dict[str, Any]:
  98 + body: Dict[str, Any] = {
  99 + "model": self.model,
  100 + "messages": [{"role": "user", "content": prompt}],
  101 + "temperature": 0,
  102 + "top_p": 0.1,
  103 + "enable_thinking": self.enable_thinking,
  104 + }
  105 + return body
  106 +
  107 + def _chat_sync(self, prompt: str) -> Tuple[str, str]:
71 response = self.session.post( 108 response = self.session.post(
72 f"{self.base_url}/chat/completions", 109 f"{self.base_url}/chat/completions",
73 - headers={  
74 - "Authorization": f"Bearer {self.api_key}",  
75 - "Content-Type": "application/json",  
76 - },  
77 - json={  
78 - "model": self.model,  
79 - "messages": [{"role": "user", "content": prompt}],  
80 - "temperature": 0,  
81 - "top_p": 0.1,  
82 - }, 110 + headers={**self._auth_headers(), "Content-Type": "application/json"},
  111 + json=self._completion_body(prompt),
83 timeout=180, 112 timeout=180,
84 ) 113 )
85 response.raise_for_status() 114 response.raise_for_status()
@@ -87,6 +116,114 @@ class DashScopeLabelClient: @@ -87,6 +116,114 @@ class DashScopeLabelClient:
87 content = str(((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip() 116 content = str(((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip()
88 return content, safe_json_dumps(data) 117 return content, safe_json_dumps(data)
89 118
  119 + def _chat_batch(self, prompt: str) -> Tuple[str, str]:
  120 + """One chat completion via Batch File API (single-line JSONL job)."""
  121 + custom_id = uuid.uuid4().hex
  122 + body = self._completion_body(prompt)
  123 + line_obj = {
  124 + "custom_id": custom_id,
  125 + "method": "POST",
  126 + "url": "/v1/chat/completions",
  127 + "body": body,
  128 + }
  129 + jsonl = json.dumps(line_obj, ensure_ascii=False, separators=(",", ":")) + "\n"
  130 + auth = self._auth_headers()
  131 +
  132 + up = self.session.post(
  133 + f"{self.base_url}/files",
  134 + headers=auth,
  135 + files={
  136 + "file": (
  137 + "eval_batch_input.jsonl",
  138 + io.BytesIO(jsonl.encode("utf-8")),
  139 + "application/octet-stream",
  140 + )
  141 + },
  142 + data={"purpose": "batch"},
  143 + timeout=300,
  144 + )
  145 + up.raise_for_status()
  146 + file_id = (up.json() or {}).get("id")
  147 + if not file_id:
  148 + raise RuntimeError(f"DashScope file upload returned no id: {up.text!r}")
  149 +
  150 + cr = self.session.post(
  151 + f"{self.base_url}/batches",
  152 + headers={**auth, "Content-Type": "application/json"},
  153 + json={
  154 + "input_file_id": file_id,
  155 + "endpoint": "/v1/chat/completions",
  156 + "completion_window": self.batch_completion_window,
  157 + },
  158 + timeout=120,
  159 + )
  160 + cr.raise_for_status()
  161 + batch_payload = cr.json() or {}
  162 + batch_id = batch_payload.get("id")
  163 + if not batch_id:
  164 + raise RuntimeError(f"DashScope batches.create returned no id: {cr.text!r}")
  165 +
  166 + terminal = frozenset({"completed", "failed", "expired", "cancelled"})
  167 + batch: Dict[str, Any] = dict(batch_payload)
  168 + status = str(batch.get("status") or "")
  169 + while status not in terminal:
  170 + time.sleep(self.batch_poll_interval_sec)
  171 + br = self.session.get(f"{self.base_url}/batches/{batch_id}", headers=auth, timeout=120)
  172 + br.raise_for_status()
  173 + batch = br.json() or {}
  174 + status = str(batch.get("status") or "")
  175 +
  176 + if status != "completed":
  177 + raise RuntimeError(
  178 + f"DashScope batch {batch_id} ended with status={status!r} errors={batch.get('errors')!r}"
  179 + )
  180 +
  181 + out_id = batch.get("output_file_id")
  182 + err_id = batch.get("error_file_id")
  183 +
  184 + row = self._find_batch_line_for_custom_id(out_id, custom_id, auth)
  185 + if row is None:
  186 + err_row = self._find_batch_line_for_custom_id(err_id, custom_id, auth)
  187 + if err_row is not None:
  188 + raise RuntimeError(f"DashScope batch request failed: {err_row!r}")
  189 + raise RuntimeError(f"DashScope batch output missing custom_id={custom_id!r}")
  190 +
  191 + resp = row.get("response") or {}
  192 + sc = resp.get("status_code")
  193 + if sc is not None and int(sc) != 200:
  194 + raise RuntimeError(f"DashScope batch line error: {row!r}")
  195 +
  196 + data = resp.get("body") or {}
  197 + content = str(((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip()
  198 + return content, safe_json_dumps(row)
  199 +
  200 + def _chat(self, prompt: str) -> Tuple[str, str]:
  201 + if self.use_batch:
  202 + return self._chat_batch(prompt)
  203 + return self._chat_sync(prompt)
  204 +
  205 + def _find_batch_line_for_custom_id(
  206 + self,
  207 + file_id: Optional[str],
  208 + custom_id: str,
  209 + auth: Dict[str, str],
  210 + ) -> Optional[Dict[str, Any]]:
  211 + if not file_id or str(file_id) in ("null", ""):
  212 + return None
  213 + r = self.session.get(f"{self.base_url}/files/{file_id}/content", headers=auth, timeout=300)
  214 + r.raise_for_status()
  215 + for raw in r.text.splitlines():
  216 + raw = raw.strip()
  217 + if not raw:
  218 + continue
  219 + try:
  220 + obj = json.loads(raw)
  221 + except json.JSONDecodeError:
  222 + continue
  223 + if str(obj.get("custom_id")) == custom_id:
  224 + return obj
  225 + return None
  226 +
90 def classify_batch_simple( 227 def classify_batch_simple(
91 self, 228 self,
92 query: str, 229 query: str,
scripts/evaluation/eval_framework/constants.py
@@ -17,6 +17,12 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt" @@ -17,6 +17,12 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt"
17 JUDGE_PROMPT_VERSION_SIMPLE = "v3_simple_20260331" 17 JUDGE_PROMPT_VERSION_SIMPLE = "v3_simple_20260331"
18 JUDGE_PROMPT_VERSION_COMPLEX = "v2_structured_20260331" 18 JUDGE_PROMPT_VERSION_COMPLEX = "v2_structured_20260331"
19 DEFAULT_LABELER_MODE = "simple" 19 DEFAULT_LABELER_MODE = "simple"
  20 +# Judge LLM (eval_framework only; override via CLI --judge-model / constructor kwargs)
  21 +DEFAULT_JUDGE_MODEL = "qwen3.5-flash"
  22 +DEFAULT_JUDGE_ENABLE_THINKING = True
  23 +DEFAULT_JUDGE_DASHSCOPE_BATCH = True
  24 +DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW = "24h"
  25 +DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC = 10.0
20 26
21 # Rebuild annotation pool (build --force-refresh-labels): search recall + full-corpus rerank + LLM batches 27 # Rebuild annotation pool (build --force-refresh-labels): search recall + full-corpus rerank + LLM batches
22 DEFAULT_SEARCH_RECALL_TOP_K = 500 28 DEFAULT_SEARCH_RECALL_TOP_K = 500
scripts/evaluation/eval_framework/framework.py
@@ -16,6 +16,11 @@ from indexer.mapping_generator import get_tenant_index_name @@ -16,6 +16,11 @@ from indexer.mapping_generator import get_tenant_index_name
16 from .clients import DashScopeLabelClient, RerankServiceClient, SearchServiceClient 16 from .clients import DashScopeLabelClient, RerankServiceClient, SearchServiceClient
17 from .constants import ( 17 from .constants import (
18 DEFAULT_ARTIFACT_ROOT, 18 DEFAULT_ARTIFACT_ROOT,
  19 + DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW,
  20 + DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC,
  21 + DEFAULT_JUDGE_DASHSCOPE_BATCH,
  22 + DEFAULT_JUDGE_ENABLE_THINKING,
  23 + DEFAULT_JUDGE_MODEL,
19 DEFAULT_LABELER_MODE, 24 DEFAULT_LABELER_MODE,
20 DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO, 25 DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO,
21 DEFAULT_REBUILD_IRRELEVANT_STOP_STREAK, 26 DEFAULT_REBUILD_IRRELEVANT_STOP_STREAK,
@@ -73,6 +78,10 @@ class SearchEvaluationFramework: @@ -73,6 +78,10 @@ class SearchEvaluationFramework:
73 artifact_root: Path = DEFAULT_ARTIFACT_ROOT, 78 artifact_root: Path = DEFAULT_ARTIFACT_ROOT,
74 search_base_url: str = "http://localhost:6002", 79 search_base_url: str = "http://localhost:6002",
75 labeler_mode: str = DEFAULT_LABELER_MODE, 80 labeler_mode: str = DEFAULT_LABELER_MODE,
  81 + *,
  82 + judge_model: str | None = None,
  83 + enable_thinking: bool | None = None,
  84 + use_dashscope_batch: bool | None = None,
76 ): 85 ):
77 init_service(get_app_config().infrastructure.elasticsearch.host) 86 init_service(get_app_config().infrastructure.elasticsearch.host)
78 self.tenant_id = str(tenant_id) 87 self.tenant_id = str(tenant_id)
@@ -89,13 +98,22 @@ class SearchEvaluationFramework: @@ -89,13 +98,22 @@ class SearchEvaluationFramework:
89 api_key = app_cfg.infrastructure.secrets.dashscope_api_key 98 api_key = app_cfg.infrastructure.secrets.dashscope_api_key
90 if not api_key: 99 if not api_key:
91 raise RuntimeError("dashscope_api_key is required for search evaluation annotation") 100 raise RuntimeError("dashscope_api_key is required for search evaluation annotation")
  101 + model = str(judge_model or DEFAULT_JUDGE_MODEL)
  102 + et = DEFAULT_JUDGE_ENABLE_THINKING if enable_thinking is None else enable_thinking
  103 + use_batch = DEFAULT_JUDGE_DASHSCOPE_BATCH if use_dashscope_batch is None else use_dashscope_batch
  104 + batch_window = DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW
  105 + batch_poll = float(DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC)
92 self.label_client = DashScopeLabelClient( 106 self.label_client = DashScopeLabelClient(
93 - model=str(llm_cfg["model"]), 107 + model=model,
94 base_url=str(llm_cfg["base_url"]), 108 base_url=str(llm_cfg["base_url"]),
95 api_key=str(api_key), 109 api_key=str(api_key),
  110 + batch_completion_window=batch_window,
  111 + batch_poll_interval_sec=batch_poll,
  112 + enable_thinking=et,
  113 + use_batch=use_batch,
96 ) 114 )
97 self.query_parser = None 115 self.query_parser = None
98 - 116 +
99 def _get_query_parser(self): 117 def _get_query_parser(self):
100 if self.query_parser is None: 118 if self.query_parser is None:
101 self.query_parser = get_query_parser() 119 self.query_parser = get_query_parser()
scripts/evaluation/eval_framework/prompts.py
@@ -96,7 +96,7 @@ The lines must correspond sequentially to the products above. @@ -96,7 +96,7 @@ The lines must correspond sequentially to the products above.
96 Do not output any other information. 96 Do not output any other information.
97 """ 97 """
98 98
99 -_CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣犳槸涓涓湇楗扮數鍟嗘悳绱㈢郴缁熶腑鐨勭浉鍏虫у垽鏂姪鎵嬨 99 +_CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣犳槸涓涓湇楗扮數鍟嗘悳绱㈢郴缁熶腑鐨勭浉鍏虫у垽鏂姪鎵嬨
100 缁欏畾鐢ㄦ埛鏌ヨ璇嶄互鍙婃瘡涓晢鍝佺殑淇℃伅锛岃涓烘瘡涓晢鍝佸垎閰嶄竴涓浉鍏虫ф爣绛俱 100 缁欏畾鐢ㄦ埛鏌ヨ璇嶄互鍙婃瘡涓晢鍝佺殑淇℃伅锛岃涓烘瘡涓晢鍝佸垎閰嶄竴涓浉鍏虫ф爣绛俱
101 101
102 ## 鐩稿叧鎬ф爣绛 102 ## 鐩稿叧鎬ф爣绛
@@ -108,9 +108,8 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣 @@ -108,9 +108,8 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣
108 - 鏌ヨ浠呭寘鍚骇鍝佺被鍨嬶紝浜у搧鍗充负璇ョ被鍨嬨 108 - 鏌ヨ浠呭寘鍚骇鍝佺被鍨嬶紝浜у搧鍗充负璇ョ被鍨嬨
109 - 鏌ヨ鍖呭惈鈥滀骇鍝佺被鍨 + 灞炴р濓紝浜у搧鍦ㄧ被鍨嬪強鎵鏈夋槑纭睘鎬т笂鍧囩鍚堛 109 - 鏌ヨ鍖呭惈鈥滀骇鍝佺被鍨 + 灞炴р濓紝浜у搧鍦ㄧ被鍨嬪強鎵鏈夋槑纭睘鎬т笂鍧囩鍚堛
110 110
111 -### 閮ㄥ垎鐩稿叧  
112 -浜у搧婊¤冻鐢ㄦ埛鐨勪富瑕佹剰鍥撅紙鏍稿績浜у搧绫诲瀷鍖归厤锛夛紝浣嗘煡璇腑鏄庣‘鐨勯儴鍒嗚姹傛湭浣撶幇锛屾垨瀛樺湪鍋忓樊銆傝櫧鐒舵湁涓嶄竴鑷达紝浣嗕粛灞炰簬鈥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併  
113 - 111 +### 鍩烘湰鐩稿叧 (High Relevant)
  112 +浜у搧婊¤冻鐢ㄦ埛鐨勪富瑕佹剰鍥撅紙鏍稿績浜у搧绫诲瀷鍖归厤锛夛紝浣嗘煡璇腑鏄庣‘鐨勯儴鍒嗚姹傛湭鍦ㄤ骇鍝佷俊鎭腑浣撶幇銆佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄥ苟涓嶄弗閲嶅啿绐佺殑鍋忓樊銆傝鍟嗗搧鏄弧瓒崇敤鎴锋牳蹇冮渶姹傜殑鑹ソ鏇夸唬鍝併
114 鍦ㄤ互涓嬫儏鍐典娇鐢ㄩ儴鍒嗙浉鍏筹細 113 鍦ㄤ互涓嬫儏鍐典娇鐢ㄩ儴鍒嗙浉鍏筹細
115 - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾閮ㄥ垎璇锋眰鐨勫睘鎬у湪鍟嗗搧淇℃伅涓己澶便佹湭鎻愬強鎴栨棤娉曠‘璁ゃ 114 - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾閮ㄥ垎璇锋眰鐨勫睘鎬у湪鍟嗗搧淇℃伅涓己澶便佹湭鎻愬強鎴栨棤娉曠‘璁ゃ
116 - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾鏉愯川銆佺増鍨嬨侀鏍肩瓑娆¤瑕佹眰瀛樺湪鍋忓樊鎴栦笉涓鑷淬 115 - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾鏉愯川銆佺増鍨嬨侀鏍肩瓑娆¤瑕佹眰瀛樺湪鍋忓樊鎴栦笉涓鑷淬
@@ -130,45 +129,63 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣 @@ -130,45 +129,63 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣
130 - 鍦ㄧ數鍟嗘悳绱腑锛岀敤鎴蜂粛鍙兘鍥犱负娆惧紡銆佺┛鐫鍦烘櫙鐩歌繎鑰岀偣鍑昏鍟嗗搧銆 129 - 鍦ㄧ數鍟嗘悳绱腑锛岀敤鎴蜂粛鍙兘鍥犱负娆惧紡銆佺┛鐫鍦烘櫙鐩歌繎鑰岀偣鍑昏鍟嗗搧銆
131 - 鍥犳搴斿垽涓洪儴鍒嗙浉鍏筹紝鍗斥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併 130 - 鍥犳搴斿垽涓洪儴鍒嗙浉鍏筹紝鍗斥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併
132 131
133 -### 涓嶇浉鍏  
134 -浜у搧鏈弧瓒崇敤鎴风殑涓昏璐墿鎰忓浘锛屼富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛  
135 -- 鏍稿績浜у搧绫诲瀷涓庢煡璇笉鍖归厤銆  
136 -- 浜у搧铏藉睘澶ц嚧鐩稿叧鐨勫ぇ绫伙紝浣嗕笌鏌ヨ鎸囧畾鐨勫叿浣撳瓙绫讳笉鍙簰鎹€  
137 -- 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鏄庢樉杩濊儗浜嗘煡璇腑涓涓槑纭笖閲嶈鐨勫睘鎬ц姹傘 132 +璇︾粏妗堜緥锛
  133 +- 鏌ヨ锛氣滈粦鑹蹭腑闀垮崐韬鈥
  134 +- 鍟嗗搧锛氣滄槬绉嬪鏂版瀹芥澗鏄剧槮澶ф憜闀胯纰庤姳鍗婅韩瑁欒ざ鐨辫璁¤鈥
  135 +
  136 +鍒嗘瀽锛
  137 +- 鍝佺被鍖归厤锛氫骇鍝佹槸鈥滃崐韬鈥濓紝鍝佺被绗﹀悎銆
  138 +- 棰滆壊涓嶅尮閰嶏細浜у搧鎻忚堪鏈彁鍙婇粦鑹蹭笖鏄庣‘鍖呭惈鈥滅鑺扁濓紙floral锛夛紝鑺辫壊涓庣函榛戝樊寮傝緝澶с
  139 +- 闀垮害瀛樺湪鍋忓樊锛氱敤鎴疯姹傗滀腑闀库濓紝鑰屼骇鍝佹爣棰樺己璋冣滈暱瑁欌濓紙Long Skirt锛夛紝闀垮害鍋忛暱銆
  140 +- 鏍稿績鍝佺被鈥滃崐韬鈥濆尮閰嶏紝涓斺滄樉鐦︹濃滃ぇ鎽嗏濈瓑椋庢牸鍙兘绗﹀悎閮ㄥ垎鎼滅储鈥滀腑闀垮崐韬鈥濈敤鎴风殑娼滃湪鍋忓ソ锛堝鐗堝瀷鐩镐技锛夛紝鈥滈暱瑁欌濆拰鈥滀腑闀库濇棤涓ラ噸鐭涚浘锛屽睘浜庢牳蹇冨搧绫诲尮閰嶏紝灞炴у瓨鍦ㄤ笉涓ラ噸鍋忓樊鐨勨滃熀鏈浉鍏斥濄
  141 +
  142 +### 寮辩浉鍏 (Low Relevant)
  143 +浜у搧涓庣敤鎴风殑鏍稿績鎰忓浘瀛樺湪宸窛锛屼富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛屼絾浠嶅彲鑳藉洜椋庢牸銆佸満鏅垨鍔熻兘涓婄殑鐩镐技鎬ц岃鐢ㄦ埛鎺ュ彈銆備负鈥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併
  144 +- **鍏稿瀷鎯呭喌**锛
  145 + - 鏍稿績浜у搧绫诲瀷鏈夊樊寮傦紝浣嗛鏍笺佺┛鐫鍦烘櫙鎴栧姛鑳介潪甯告帴杩戯紝濡傛煡璇⑩滈粦鑹蹭腑闀垮崐韬鈥濓紝鍟嗗搧涓衡滆繛琛h鈥濓紙鍚屽睘瑁欒澶х被锛屾寮忕浉浼硷級銆
  146 + - 鏍稿績浜у搧绫诲瀷鏈夊樊寮傦紝浣嗗湪璐墿鍦烘櫙涓嬪睘浜庣浉杩戝搧绫伙紝鍙媺寮烘浛浠o紝濡傛煡璇⑩滅墰浠旇¥鈥濓紝鍟嗗搧涓衡滀紤闂茶¥鈥濓紙鍧囦负瑁ゅ瓙澶х被锛岄鏍煎彲鑳界浉杩戯級銆
  147 + - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鍦ㄥ涓潪鍏抽敭灞炴т笂瀛樺湪鍋忓樊锛屽鑷翠笌鐢ㄦ埛鐞嗘兂鐩爣宸窛杈冨ぇ锛屼絾浠嶄繚鐣欎竴瀹氬叧鑱旀с
  148 +
  149 +鍏稿瀷鎯呭喌锛
  150 +- 鏌ヨ锛氣滈粦鑹蹭腑闀垮崐韬鈥濓紝浜у搧锛氣滄柊娆鹃珮鑵癡棰嗕腑闀挎杩炶。瑁 浼橀泤鍗拌姳榛戣壊鎬ф劅杩炶。瑁欌 鈫 鏍稿績浜у搧绫诲瀷鈥滃崐韬鈥濅笌鈥滆繛琛h鈥濇湁宸紓锛屼絾涓よ呭悓灞炶瑁呭ぇ绫讳笖娆惧紡涓婂潎涓衡滀腑闀挎鈥濓紝鍏锋湁鐩镐技鎬с
  151 +
  152 +### 涓嶇浉鍏 (Irrelevant)
  153 +浜у搧鏈弧瓒崇敤鎴风殑涓昏璐墿鎰忓浘锛岀敤鎴风偣鍑诲姩鏈烘瀬浣庛備富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛
  154 +- 鏍稿績浜у搧绫诲瀷涓庢煡璇笉鍖归厤锛屼笖涓嶅睘浜庨鏍/鍦烘櫙鐩歌繎鐨勬浛浠e搧銆
  155 +- 浜у搧铏藉睘澶ц嚧鐩稿叧鐨勫ぇ绫伙紝浣嗕笌鏌ヨ鎸囧畾鐨勫叿浣撳瓙绫讳笉鍙簰鎹紝涓旈鏍/鍦烘櫙宸紓澶с
  156 +- 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鏄庢樉杩濊儗浜嗘煡璇腑涓涓槑纭笖閲嶈鐨勫睘鎬ц姹傦紝涓斾笉瀛樺湪鍙帴鍙楃殑鐞嗙敱銆
138 157
139 鍏稿瀷鎯呭喌锛 158 鍏稿瀷鎯呭喌锛
140 - 鏌ヨ锛氣滆¥瀛愨濓紝浜у搧锛氣滈瀷瀛愨 鈫 浜у搧绫诲瀷閿欒銆 159 - 鏌ヨ锛氣滆¥瀛愨濓紝浜у搧锛氣滈瀷瀛愨 鈫 浜у搧绫诲瀷閿欒銆
141 -- 鏌ヨ锛氣滆繛琛h鈥濓紝浜у搧锛氣滃崐韬鈥 鈫 鍏蜂綋浜у搧绫诲瀷涓嶅悓銆  
142 -- 鏌ヨ锛氣滀慨韬¥鈥濓紝浜у搧锛氣滃鏉鹃様鑵胯¥鈥 鈫 涓庣増鍨嬭姹傛槑鏄惧啿绐併 160 +- 鏌ヨ锛氣滀慨韬¥鈥濓紝浜у搧锛氣滃鏉鹃様鑵胯¥鈥 鈫 涓庣増鍨嬭姹傛槑鏄惧啿绐侊紝鏇夸唬鎬ф瀬浣庛
143 - 鏌ヨ锛氣滄棤琚栬繛琛h鈥濓紝浜у搧锛氣滈暱琚栬繛琛h鈥 鈫 涓庤鍨嬭姹傛槑鏄惧啿绐併 161 - 鏌ヨ锛氣滄棤琚栬繛琛h鈥濓紝浜у搧锛氣滈暱琚栬繛琛h鈥 鈫 涓庤鍨嬭姹傛槑鏄惧啿绐併
144 -  
145 -璇ユ爣绛惧己璋冪敤鎴锋剰鍥剧殑鏄庣‘鎬с傚綋鏌ヨ鎸囧悜鍏蜂綋绫诲瀷鎴栧叧閿睘鎬ф椂锛屽嵆浣夸骇鍝佸湪鏇撮珮灞傜骇绫诲埆涓婄浉鍏筹紝涔熷簲鎸変笉鐩稿叧澶勭悊銆 162 +- 鏌ヨ锛氣滅墰浠旇¥鈥濓紝浜у搧锛氣滆繍鍔ㄨ¥鈥 鈫 鏍稿績鍝佺被涓嶅悓锛堢墰浠旇¥ vs 杩愬姩瑁わ級锛岄鏍煎拰鍦烘櫙宸紓澶с
  163 +- 鏌ヨ锛氣滈澊瀛愨濓紝浜у搧锛氣滆繍鍔ㄩ瀷鈥 鈫 鏍稿績鍝佺被涓嶅悓锛屽姛鑳藉拰閫傜敤鍦烘櫙宸紓澶с
146 164
147 ## 鍒ゆ柇鍘熷垯 165 ## 鍒ゆ柇鍘熷垯
148 166
149 -1. 浜у搧绫诲瀷鏄渶楂樹紭鍏堢骇鍥犵礌銆  
150 - 濡傛灉鏌ヨ鏄庣‘鎸囧畾浜嗗叿浣撲骇鍝佺被鍨嬶紝閭d箞缁撴灉蹇呴』鍖归厤璇ヤ骇鍝佺被鍨嬶紝鎵嶅彲鑳藉垽涓衡滃畬鍏ㄧ浉鍏斥濇垨鈥滈儴鍒嗙浉鍏斥濄  
151 - 涓嶅悓浜у搧绫诲瀷閫氬父搴斿垽涓衡滀笉鐩稿叧鈥濓紝鑰屼笉鏄滈儴鍒嗙浉鍏斥濄  
152 -  
153 -2. 鐩镐技鎴栫浉鍏崇殑浜у搧绫诲瀷锛屽湪鏌ヨ鏄庣‘鏃堕氬父涓嶅彲浜掓崲銆  
154 - 渚嬪锛  
155 - - 杩炶。瑁 vs 鍗婅韩瑁 vs 杩炰綋瑁  
156 - - 鐗涗粩瑁 vs 瑁ゅ瓙  
157 - - T鎭 vs 琛~/涓婅。  
158 - - 寮琛 vs 姣涜。  
159 - - 闈村瓙 vs 闉嬪瓙  
160 - - 鏂囪兏 vs 涓婅。  
161 - - 鍙岃偐鍖 vs 鍖  
162 - 濡傛灉鐢ㄦ埛鏄庣‘鎼滅储鍏朵腑涓绉嶏紝鍏朵粬绫诲瀷閫氬父搴斿垽涓衡滀笉鐩稿叧鈥濄  
163 -  
164 -3. 褰撴牳蹇冧骇鍝佺被鍨嬪尮閰嶅悗锛屽啀璇勪及灞炴с  
165 - - 鎵鏈夋槑纭睘鎬ч兘鍖归厤 鈫 瀹屽叏鐩稿叧  
166 - - 閮ㄥ垎灞炴х己澶便佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄤ竴瀹氬亸宸紝浣嗕粛鏄彲鎺ュ彈鏇夸唬鍝 鈫 閮ㄥ垎鐩稿叧  
167 - - 鏄庣‘涓旈噸瑕佺殑灞炴ц鏄庢樉杩濊儗锛屼笖涓嶈兘浣滀负鍚堢悊鏇夸唬鍝 鈫 涓嶇浉鍏  
168 -  
169 -4. 瑕佷弗鏍煎尯鍒嗏滄湭鎻愬強/鏃犳硶纭鈥濆拰鈥滄槑纭啿绐佲濄  
170 - - 濡傛灉鏌愬睘鎬ф病鏈夋彁鍙婏紝鎴栨棤娉曢獙璇侊紝浼樺厛鍒や负鈥滈儴鍒嗙浉鍏斥濄  
171 - - 濡傛灉鏌愬睘鎬т笌鏌ヨ瑕佹眰鏄庣‘鐩稿弽锛屽垯鍒や负鈥滀笉鐩稿叧鈥濓紱闄ら潪鍦ㄨ喘鐗╄澧冧笅瀹冧粛鏄庢樉灞炰簬鍙帴鍙楁浛浠e搧銆 167 +1. **浜у搧绫诲瀷鏄渶楂樹紭鍏堢骇鍥犵礌銆**
  168 + 濡傛灉鏌ヨ鏄庣‘鎸囧畾浜嗗叿浣撲骇鍝佺被鍨嬶紝閭d箞缁撴灉蹇呴』鍖归厤璇ヤ骇鍝佺被鍨嬶紝鎵嶅彲鑳藉垽涓衡滃畬鍏ㄧ浉鍏斥濇垨鈥滃熀鏈浉鍏斥濄備笉鍚屼骇鍝佺被鍨嬮氬父搴斿垽涓衡滃急鐩稿叧鈥濇垨鈥滀笉鐩稿叧鈥濄
  169 + - **寮辩浉鍏**锛氫粎褰撲袱绉嶄骇鍝佺被鍨嬮鏍笺佸満鏅佸姛鑳介潪甯告帴杩戯紝鍙兘琚涓哄彲鎺ュ彈鐨勬浛浠e搧鏃朵娇鐢ㄣ
  170 + - **涓嶇浉鍏**锛氬叾浠栨墍鏈変骇鍝佺被鍨嬩笉鍖归厤鐨勬儏鍐点
  171 +
  172 +2. **鐩镐技鎴栫浉鍏崇殑浜у搧绫诲瀷锛屽湪鏌ヨ鏄庣‘鏃堕氬父涓嶅彲浜掓崲锛屼絾闇鏍规嵁鎺ヨ繎绋嬪害鍖哄垎銆**
  173 + 渚嬪锛
  174 + - **椋庢牸/鍦烘櫙楂樺害鎺ヨ繎锛屽彲鍒や负寮辩浉鍏**锛氳繛琛h vs 鍗婅韩瑁欍侀暱瑁 vs 涓暱瑁欍佺墰浠旇¥ vs 浼戦棽瑁ゃ佽繍鍔ㄩ瀷 vs 鏉块瀷銆
  175 + - **椋庢牸/鍦烘櫙宸紓澶э紝鍒や负涓嶇浉鍏**锛氳¥瀛 vs 闉嬪瓙銆乀鎭 vs 甯藉瓙銆侀澊瀛 vs 杩愬姩闉嬨佺墰浠旇¥ vs 瑗胯瑁ゃ佸弻鑲╁寘 vs 鎵嬫彁鍖呫
  176 + 濡傛灉鐢ㄦ埛鏄庣‘鎼滅储鍏朵腑涓绉嶏紝鍏朵粬绫诲瀷鏄惁鍙帴鍙楀彇鍐充簬鍏堕鏍笺佸満鏅殑鎺ヨ繎绋嬪害銆
  177 +
  178 +3. **褰撴牳蹇冧骇鍝佺被鍨嬪尮閰嶅悗锛屽啀璇勪及灞炴с**
  179 + - 鎵鏈夋槑纭睘鎬ч兘鍖归厤 鈫 **瀹屽叏鐩稿叧**
  180 + - 閮ㄥ垎灞炴х己澶便佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄨ緝灏忓亸宸 鈫 **鍩烘湰鐩稿叧**
  181 + - 鏄庣‘涓旈噸瑕佺殑灞炴ц鏄庢樉杩濊儗锛堝淇韩 vs 瀹芥澗锛夛紝浣嗘牳蹇冨搧绫讳粛鍖归厤 鈫 **寮辩浉鍏** 鎴 **涓嶇浉鍏**銆
  182 + - **寮辩浉鍏**锛氬睘鎬ф槑鏄捐繚鑳岋紝浣嗗瓨鍦ㄥ彲琚敤鎴锋帴鍙楃殑寰急鐞嗙敱锛堝鐗堝瀷铏戒笉鍚屼絾椋庢牸绫讳技锛夈
  183 + - **涓嶇浉鍏**锛氬睘鎬ф槑鏄捐繚鑳岋紝涓旀浛浠fф瀬浣庯紝鐢ㄦ埛鏃犵偣鍑诲姩鏈猴紙濡備慨韬 vs 瀹芥澗闃旇吙瑁わ級銆
  184 +
  185 +4. **瑕佷弗鏍煎尯鍒嗏滄湭鎻愬強/鏃犳硶纭鈥濄佲滆緝灏忓亸宸濄佲滄槑纭啿绐佲濄**
  186 + - 濡傛灉鏌愬睘鎬ф病鏈夋彁鍙婏紝鎴栨棤娉曢獙璇侊紝浼樺厛鍒や负鈥**鍩烘湰鐩稿叧**鈥濄
  187 + - 濡傛灉鏌愬睘鎬у瓨鍦ㄨ緝灏忓亸宸紙濡傞鑹蹭笉鍚屻佹潗璐ㄤ笉鍚岋級锛屽垽涓衡**鍩烘湰鐩稿叧**鈥濄
  188 + - 濡傛灉鏌愬睘鎬т笌鏌ヨ瑕佹眰鏄庣‘鐩稿弽锛屽垯闇鏍规嵁鍐茬獊鐨勪弗閲嶆у拰鏇夸唬鎬у垽涓衡**寮辩浉鍏**鈥濇垨鈥**涓嶇浉鍏**鈥濄
172 189
173 鏌ヨ锛歿query} 190 鏌ヨ锛歿query}
174 191