Commit bdb65283908f21ee69eadaf078075e4f9140a332
1 parent
167f33b4
标注框架 批量标注
Showing
5 changed files
with
277 additions
and
54 deletions
Show diff stats
scripts/evaluation/eval_framework/cli.py
| ... | ... | @@ -5,6 +5,7 @@ from __future__ import annotations |
| 5 | 5 | import argparse |
| 6 | 6 | import json |
| 7 | 7 | from pathlib import Path |
| 8 | +from typing import Any, Dict | |
| 8 | 9 | |
| 9 | 10 | from .constants import ( |
| 10 | 11 | DEFAULT_LABELER_MODE, |
| ... | ... | @@ -23,6 +24,38 @@ from .utils import ensure_dir, utc_now_iso, utc_timestamp |
| 23 | 24 | from .web_app import create_web_app |
| 24 | 25 | |
| 25 | 26 | |
| 27 | +def add_judge_llm_args(p: argparse.ArgumentParser) -> None: | |
| 28 | + p.add_argument( | |
| 29 | + "--judge-model", | |
| 30 | + default=None, | |
| 31 | + metavar="MODEL", | |
| 32 | + help="Judge LLM model (default: eval_framework.constants.DEFAULT_JUDGE_MODEL).", | |
| 33 | + ) | |
| 34 | + p.add_argument( | |
| 35 | + "--enable-thinking", | |
| 36 | + action=argparse.BooleanOptionalAction, | |
| 37 | + default=None, | |
| 38 | + help="enable_thinking for DashScope (default: DEFAULT_JUDGE_ENABLE_THINKING).", | |
| 39 | + ) | |
| 40 | + p.add_argument( | |
| 41 | + "--dashscope-batch", | |
| 42 | + action=argparse.BooleanOptionalAction, | |
| 43 | + default=None, | |
| 44 | + help="DashScope Batch File API vs sync chat (default: DEFAULT_JUDGE_DASHSCOPE_BATCH).", | |
| 45 | + ) | |
| 46 | + | |
| 47 | + | |
| 48 | +def framework_kwargs_from_args(args: argparse.Namespace) -> Dict[str, Any]: | |
| 49 | + kw: Dict[str, Any] = {} | |
| 50 | + if args.judge_model is not None: | |
| 51 | + kw["judge_model"] = args.judge_model | |
| 52 | + if args.enable_thinking is not None: | |
| 53 | + kw["enable_thinking"] = args.enable_thinking | |
| 54 | + if args.dashscope_batch is not None: | |
| 55 | + kw["use_dashscope_batch"] = args.dashscope_batch | |
| 56 | + return kw | |
| 57 | + | |
| 58 | + | |
| 26 | 59 | def build_cli_parser() -> argparse.ArgumentParser: |
| 27 | 60 | parser = argparse.ArgumentParser(description="Search evaluation annotation builder and web UI") |
| 28 | 61 | sub = parser.add_subparsers(dest="command", required=True) |
| ... | ... | @@ -71,6 +104,7 @@ def build_cli_parser() -> argparse.ArgumentParser: |
| 71 | 104 | build.add_argument("--force-refresh-rerank", action="store_true") |
| 72 | 105 | build.add_argument("--force-refresh-labels", action="store_true") |
| 73 | 106 | build.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) |
| 107 | + add_judge_llm_args(build) | |
| 74 | 108 | |
| 75 | 109 | batch = sub.add_parser("batch", help="Run batch evaluation against live search") |
| 76 | 110 | batch.add_argument("--tenant-id", default="163") |
| ... | ... | @@ -79,6 +113,7 @@ def build_cli_parser() -> argparse.ArgumentParser: |
| 79 | 113 | batch.add_argument("--language", default="en") |
| 80 | 114 | batch.add_argument("--force-refresh-labels", action="store_true") |
| 81 | 115 | batch.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) |
| 116 | + add_judge_llm_args(batch) | |
| 82 | 117 | |
| 83 | 118 | audit = sub.add_parser("audit", help="Audit annotation quality for queries") |
| 84 | 119 | audit.add_argument("--tenant-id", default="163") |
| ... | ... | @@ -88,6 +123,7 @@ def build_cli_parser() -> argparse.ArgumentParser: |
| 88 | 123 | audit.add_argument("--limit-suspicious", type=int, default=5) |
| 89 | 124 | audit.add_argument("--force-refresh-labels", action="store_true") |
| 90 | 125 | audit.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) |
| 126 | + add_judge_llm_args(audit) | |
| 91 | 127 | |
| 92 | 128 | serve = sub.add_parser("serve", help="Serve evaluation web UI on port 6010") |
| 93 | 129 | serve.add_argument("--tenant-id", default="163") |
| ... | ... | @@ -95,12 +131,15 @@ def build_cli_parser() -> argparse.ArgumentParser: |
| 95 | 131 | serve.add_argument("--host", default="0.0.0.0") |
| 96 | 132 | serve.add_argument("--port", type=int, default=6010) |
| 97 | 133 | serve.add_argument("--labeler-mode", default=DEFAULT_LABELER_MODE, choices=["simple", "complex"]) |
| 134 | + add_judge_llm_args(serve) | |
| 98 | 135 | |
| 99 | 136 | return parser |
| 100 | 137 | |
| 101 | 138 | |
| 102 | 139 | def run_build(args: argparse.Namespace) -> None: |
| 103 | - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) | |
| 140 | + framework = SearchEvaluationFramework( | |
| 141 | + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args) | |
| 142 | + ) | |
| 104 | 143 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 105 | 144 | summary = [] |
| 106 | 145 | rebuild_kwargs = {} |
| ... | ... | @@ -152,7 +191,9 @@ def run_build(args: argparse.Namespace) -> None: |
| 152 | 191 | |
| 153 | 192 | |
| 154 | 193 | def run_batch(args: argparse.Namespace) -> None: |
| 155 | - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) | |
| 194 | + framework = SearchEvaluationFramework( | |
| 195 | + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args) | |
| 196 | + ) | |
| 156 | 197 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 157 | 198 | payload = framework.batch_evaluate( |
| 158 | 199 | queries=queries, |
| ... | ... | @@ -165,7 +206,9 @@ def run_batch(args: argparse.Namespace) -> None: |
| 165 | 206 | |
| 166 | 207 | |
| 167 | 208 | def run_audit(args: argparse.Namespace) -> None: |
| 168 | - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) | |
| 209 | + framework = SearchEvaluationFramework( | |
| 210 | + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args) | |
| 211 | + ) | |
| 169 | 212 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 170 | 213 | audit_items = [] |
| 171 | 214 | for query in queries: |
| ... | ... | @@ -215,7 +258,9 @@ def run_audit(args: argparse.Namespace) -> None: |
| 215 | 258 | |
| 216 | 259 | |
| 217 | 260 | def run_serve(args: argparse.Namespace) -> None: |
| 218 | - framework = SearchEvaluationFramework(tenant_id=args.tenant_id, labeler_mode=args.labeler_mode) | |
| 261 | + framework = SearchEvaluationFramework( | |
| 262 | + tenant_id=args.tenant_id, labeler_mode=args.labeler_mode, **framework_kwargs_from_args(args) | |
| 263 | + ) | |
| 219 | 264 | app = create_web_app(framework, Path(args.queries_file)) |
| 220 | 265 | import uvicorn |
| 221 | 266 | ... | ... |
scripts/evaluation/eval_framework/clients.py
| ... | ... | @@ -2,6 +2,10 @@ |
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +import io | |
| 6 | +import json | |
| 7 | +import time | |
| 8 | +import uuid | |
| 5 | 9 | from typing import Any, Dict, List, Optional, Sequence, Tuple |
| 6 | 10 | |
| 7 | 11 | import requests |
| ... | ... | @@ -60,26 +64,51 @@ class RerankServiceClient: |
| 60 | 64 | |
| 61 | 65 | |
| 62 | 66 | class DashScopeLabelClient: |
| 63 | - def __init__(self, model: str, base_url: str, api_key: str, batch_size: int = 40): | |
| 67 | + """DashScope OpenAI-compatible chat: synchronous or Batch File API (JSONL job). | |
| 68 | + | |
| 69 | + Batch flow: https://help.aliyun.com/zh/model-studio/batch-interfaces-compatible-with-openai/ | |
| 70 | + """ | |
| 71 | + | |
| 72 | + def __init__( | |
| 73 | + self, | |
| 74 | + model: str, | |
| 75 | + base_url: str, | |
| 76 | + api_key: str, | |
| 77 | + batch_size: int = 40, | |
| 78 | + *, | |
| 79 | + batch_completion_window: str = "24h", | |
| 80 | + batch_poll_interval_sec: float = 10.0, | |
| 81 | + enable_thinking: bool = True, | |
| 82 | + use_batch: bool = True, | |
| 83 | + ): | |
| 64 | 84 | self.model = model |
| 65 | 85 | self.base_url = base_url.rstrip("/") |
| 66 | 86 | self.api_key = api_key |
| 67 | 87 | self.batch_size = int(batch_size) |
| 88 | + self.batch_completion_window = str(batch_completion_window) | |
| 89 | + self.batch_poll_interval_sec = float(batch_poll_interval_sec) | |
| 90 | + self.enable_thinking = bool(enable_thinking) | |
| 91 | + self.use_batch = bool(use_batch) | |
| 68 | 92 | self.session = requests.Session() |
| 69 | 93 | |
| 70 | - def _chat(self, prompt: str) -> Tuple[str, str]: | |
| 94 | + def _auth_headers(self) -> Dict[str, str]: | |
| 95 | + return {"Authorization": f"Bearer {self.api_key}"} | |
| 96 | + | |
| 97 | + def _completion_body(self, prompt: str) -> Dict[str, Any]: | |
| 98 | + body: Dict[str, Any] = { | |
| 99 | + "model": self.model, | |
| 100 | + "messages": [{"role": "user", "content": prompt}], | |
| 101 | + "temperature": 0, | |
| 102 | + "top_p": 0.1, | |
| 103 | + "enable_thinking": self.enable_thinking, | |
| 104 | + } | |
| 105 | + return body | |
| 106 | + | |
| 107 | + def _chat_sync(self, prompt: str) -> Tuple[str, str]: | |
| 71 | 108 | response = self.session.post( |
| 72 | 109 | f"{self.base_url}/chat/completions", |
| 73 | - headers={ | |
| 74 | - "Authorization": f"Bearer {self.api_key}", | |
| 75 | - "Content-Type": "application/json", | |
| 76 | - }, | |
| 77 | - json={ | |
| 78 | - "model": self.model, | |
| 79 | - "messages": [{"role": "user", "content": prompt}], | |
| 80 | - "temperature": 0, | |
| 81 | - "top_p": 0.1, | |
| 82 | - }, | |
| 110 | + headers={**self._auth_headers(), "Content-Type": "application/json"}, | |
| 111 | + json=self._completion_body(prompt), | |
| 83 | 112 | timeout=180, |
| 84 | 113 | ) |
| 85 | 114 | response.raise_for_status() |
| ... | ... | @@ -87,6 +116,114 @@ class DashScopeLabelClient: |
| 87 | 116 | content = str(((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip() |
| 88 | 117 | return content, safe_json_dumps(data) |
| 89 | 118 | |
| 119 | + def _chat_batch(self, prompt: str) -> Tuple[str, str]: | |
| 120 | + """One chat completion via Batch File API (single-line JSONL job).""" | |
| 121 | + custom_id = uuid.uuid4().hex | |
| 122 | + body = self._completion_body(prompt) | |
| 123 | + line_obj = { | |
| 124 | + "custom_id": custom_id, | |
| 125 | + "method": "POST", | |
| 126 | + "url": "/v1/chat/completions", | |
| 127 | + "body": body, | |
| 128 | + } | |
| 129 | + jsonl = json.dumps(line_obj, ensure_ascii=False, separators=(",", ":")) + "\n" | |
| 130 | + auth = self._auth_headers() | |
| 131 | + | |
| 132 | + up = self.session.post( | |
| 133 | + f"{self.base_url}/files", | |
| 134 | + headers=auth, | |
| 135 | + files={ | |
| 136 | + "file": ( | |
| 137 | + "eval_batch_input.jsonl", | |
| 138 | + io.BytesIO(jsonl.encode("utf-8")), | |
| 139 | + "application/octet-stream", | |
| 140 | + ) | |
| 141 | + }, | |
| 142 | + data={"purpose": "batch"}, | |
| 143 | + timeout=300, | |
| 144 | + ) | |
| 145 | + up.raise_for_status() | |
| 146 | + file_id = (up.json() or {}).get("id") | |
| 147 | + if not file_id: | |
| 148 | + raise RuntimeError(f"DashScope file upload returned no id: {up.text!r}") | |
| 149 | + | |
| 150 | + cr = self.session.post( | |
| 151 | + f"{self.base_url}/batches", | |
| 152 | + headers={**auth, "Content-Type": "application/json"}, | |
| 153 | + json={ | |
| 154 | + "input_file_id": file_id, | |
| 155 | + "endpoint": "/v1/chat/completions", | |
| 156 | + "completion_window": self.batch_completion_window, | |
| 157 | + }, | |
| 158 | + timeout=120, | |
| 159 | + ) | |
| 160 | + cr.raise_for_status() | |
| 161 | + batch_payload = cr.json() or {} | |
| 162 | + batch_id = batch_payload.get("id") | |
| 163 | + if not batch_id: | |
| 164 | + raise RuntimeError(f"DashScope batches.create returned no id: {cr.text!r}") | |
| 165 | + | |
| 166 | + terminal = frozenset({"completed", "failed", "expired", "cancelled"}) | |
| 167 | + batch: Dict[str, Any] = dict(batch_payload) | |
| 168 | + status = str(batch.get("status") or "") | |
| 169 | + while status not in terminal: | |
| 170 | + time.sleep(self.batch_poll_interval_sec) | |
| 171 | + br = self.session.get(f"{self.base_url}/batches/{batch_id}", headers=auth, timeout=120) | |
| 172 | + br.raise_for_status() | |
| 173 | + batch = br.json() or {} | |
| 174 | + status = str(batch.get("status") or "") | |
| 175 | + | |
| 176 | + if status != "completed": | |
| 177 | + raise RuntimeError( | |
| 178 | + f"DashScope batch {batch_id} ended with status={status!r} errors={batch.get('errors')!r}" | |
| 179 | + ) | |
| 180 | + | |
| 181 | + out_id = batch.get("output_file_id") | |
| 182 | + err_id = batch.get("error_file_id") | |
| 183 | + | |
| 184 | + row = self._find_batch_line_for_custom_id(out_id, custom_id, auth) | |
| 185 | + if row is None: | |
| 186 | + err_row = self._find_batch_line_for_custom_id(err_id, custom_id, auth) | |
| 187 | + if err_row is not None: | |
| 188 | + raise RuntimeError(f"DashScope batch request failed: {err_row!r}") | |
| 189 | + raise RuntimeError(f"DashScope batch output missing custom_id={custom_id!r}") | |
| 190 | + | |
| 191 | + resp = row.get("response") or {} | |
| 192 | + sc = resp.get("status_code") | |
| 193 | + if sc is not None and int(sc) != 200: | |
| 194 | + raise RuntimeError(f"DashScope batch line error: {row!r}") | |
| 195 | + | |
| 196 | + data = resp.get("body") or {} | |
| 197 | + content = str(((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip() | |
| 198 | + return content, safe_json_dumps(row) | |
| 199 | + | |
| 200 | + def _chat(self, prompt: str) -> Tuple[str, str]: | |
| 201 | + if self.use_batch: | |
| 202 | + return self._chat_batch(prompt) | |
| 203 | + return self._chat_sync(prompt) | |
| 204 | + | |
| 205 | + def _find_batch_line_for_custom_id( | |
| 206 | + self, | |
| 207 | + file_id: Optional[str], | |
| 208 | + custom_id: str, | |
| 209 | + auth: Dict[str, str], | |
| 210 | + ) -> Optional[Dict[str, Any]]: | |
| 211 | + if not file_id or str(file_id) in ("null", ""): | |
| 212 | + return None | |
| 213 | + r = self.session.get(f"{self.base_url}/files/{file_id}/content", headers=auth, timeout=300) | |
| 214 | + r.raise_for_status() | |
| 215 | + for raw in r.text.splitlines(): | |
| 216 | + raw = raw.strip() | |
| 217 | + if not raw: | |
| 218 | + continue | |
| 219 | + try: | |
| 220 | + obj = json.loads(raw) | |
| 221 | + except json.JSONDecodeError: | |
| 222 | + continue | |
| 223 | + if str(obj.get("custom_id")) == custom_id: | |
| 224 | + return obj | |
| 225 | + return None | |
| 226 | + | |
| 90 | 227 | def classify_batch_simple( |
| 91 | 228 | self, |
| 92 | 229 | query: str, | ... | ... |
scripts/evaluation/eval_framework/constants.py
| ... | ... | @@ -17,6 +17,12 @@ DEFAULT_QUERY_FILE = _SCRIPTS_EVAL_DIR / "queries" / "queries.txt" |
| 17 | 17 | JUDGE_PROMPT_VERSION_SIMPLE = "v3_simple_20260331" |
| 18 | 18 | JUDGE_PROMPT_VERSION_COMPLEX = "v2_structured_20260331" |
| 19 | 19 | DEFAULT_LABELER_MODE = "simple" |
| 20 | +# Judge LLM (eval_framework only; override via CLI --judge-model / constructor kwargs) | |
| 21 | +DEFAULT_JUDGE_MODEL = "qwen3.5-flash" | |
| 22 | +DEFAULT_JUDGE_ENABLE_THINKING = True | |
| 23 | +DEFAULT_JUDGE_DASHSCOPE_BATCH = True | |
| 24 | +DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW = "24h" | |
| 25 | +DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC = 10.0 | |
| 20 | 26 | |
| 21 | 27 | # Rebuild annotation pool (build --force-refresh-labels): search recall + full-corpus rerank + LLM batches |
| 22 | 28 | DEFAULT_SEARCH_RECALL_TOP_K = 500 | ... | ... |
scripts/evaluation/eval_framework/framework.py
| ... | ... | @@ -16,6 +16,11 @@ from indexer.mapping_generator import get_tenant_index_name |
| 16 | 16 | from .clients import DashScopeLabelClient, RerankServiceClient, SearchServiceClient |
| 17 | 17 | from .constants import ( |
| 18 | 18 | DEFAULT_ARTIFACT_ROOT, |
| 19 | + DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW, | |
| 20 | + DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC, | |
| 21 | + DEFAULT_JUDGE_DASHSCOPE_BATCH, | |
| 22 | + DEFAULT_JUDGE_ENABLE_THINKING, | |
| 23 | + DEFAULT_JUDGE_MODEL, | |
| 19 | 24 | DEFAULT_LABELER_MODE, |
| 20 | 25 | DEFAULT_REBUILD_IRRELEVANT_STOP_RATIO, |
| 21 | 26 | DEFAULT_REBUILD_IRRELEVANT_STOP_STREAK, |
| ... | ... | @@ -73,6 +78,10 @@ class SearchEvaluationFramework: |
| 73 | 78 | artifact_root: Path = DEFAULT_ARTIFACT_ROOT, |
| 74 | 79 | search_base_url: str = "http://localhost:6002", |
| 75 | 80 | labeler_mode: str = DEFAULT_LABELER_MODE, |
| 81 | + *, | |
| 82 | + judge_model: str | None = None, | |
| 83 | + enable_thinking: bool | None = None, | |
| 84 | + use_dashscope_batch: bool | None = None, | |
| 76 | 85 | ): |
| 77 | 86 | init_service(get_app_config().infrastructure.elasticsearch.host) |
| 78 | 87 | self.tenant_id = str(tenant_id) |
| ... | ... | @@ -89,13 +98,22 @@ class SearchEvaluationFramework: |
| 89 | 98 | api_key = app_cfg.infrastructure.secrets.dashscope_api_key |
| 90 | 99 | if not api_key: |
| 91 | 100 | raise RuntimeError("dashscope_api_key is required for search evaluation annotation") |
| 101 | + model = str(judge_model or DEFAULT_JUDGE_MODEL) | |
| 102 | + et = DEFAULT_JUDGE_ENABLE_THINKING if enable_thinking is None else enable_thinking | |
| 103 | + use_batch = DEFAULT_JUDGE_DASHSCOPE_BATCH if use_dashscope_batch is None else use_dashscope_batch | |
| 104 | + batch_window = DEFAULT_JUDGE_BATCH_COMPLETION_WINDOW | |
| 105 | + batch_poll = float(DEFAULT_JUDGE_BATCH_POLL_INTERVAL_SEC) | |
| 92 | 106 | self.label_client = DashScopeLabelClient( |
| 93 | - model=str(llm_cfg["model"]), | |
| 107 | + model=model, | |
| 94 | 108 | base_url=str(llm_cfg["base_url"]), |
| 95 | 109 | api_key=str(api_key), |
| 110 | + batch_completion_window=batch_window, | |
| 111 | + batch_poll_interval_sec=batch_poll, | |
| 112 | + enable_thinking=et, | |
| 113 | + use_batch=use_batch, | |
| 96 | 114 | ) |
| 97 | 115 | self.query_parser = None |
| 98 | - | |
| 116 | + | |
| 99 | 117 | def _get_query_parser(self): |
| 100 | 118 | if self.query_parser is None: |
| 101 | 119 | self.query_parser = get_query_parser() | ... | ... |
scripts/evaluation/eval_framework/prompts.py
| ... | ... | @@ -96,7 +96,7 @@ The lines must correspond sequentially to the products above. |
| 96 | 96 | Do not output any other information. |
| 97 | 97 | """ |
| 98 | 98 | |
| 99 | -_CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣犳槸涓涓湇楗扮數鍟嗘悳绱㈢郴缁熶腑鐨勭浉鍏虫у垽鏂姪鎵嬨 | |
| 99 | +_CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣犳槸涓涓湇楗扮數鍟嗘悳绱㈢郴缁熶腑鐨勭浉鍏虫у垽鏂姪鎵嬨 | |
| 100 | 100 | 缁欏畾鐢ㄦ埛鏌ヨ璇嶄互鍙婃瘡涓晢鍝佺殑淇℃伅锛岃涓烘瘡涓晢鍝佸垎閰嶄竴涓浉鍏虫ф爣绛俱 |
| 101 | 101 | |
| 102 | 102 | ## 鐩稿叧鎬ф爣绛 |
| ... | ... | @@ -108,9 +108,8 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣 |
| 108 | 108 | - 鏌ヨ浠呭寘鍚骇鍝佺被鍨嬶紝浜у搧鍗充负璇ョ被鍨嬨 |
| 109 | 109 | - 鏌ヨ鍖呭惈鈥滀骇鍝佺被鍨 + 灞炴р濓紝浜у搧鍦ㄧ被鍨嬪強鎵鏈夋槑纭睘鎬т笂鍧囩鍚堛 |
| 110 | 110 | |
| 111 | -### 閮ㄥ垎鐩稿叧 | |
| 112 | -浜у搧婊¤冻鐢ㄦ埛鐨勪富瑕佹剰鍥撅紙鏍稿績浜у搧绫诲瀷鍖归厤锛夛紝浣嗘煡璇腑鏄庣‘鐨勯儴鍒嗚姹傛湭浣撶幇锛屾垨瀛樺湪鍋忓樊銆傝櫧鐒舵湁涓嶄竴鑷达紝浣嗕粛灞炰簬鈥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併 | |
| 113 | - | |
| 111 | +### 鍩烘湰鐩稿叧 (High Relevant) | |
| 112 | +浜у搧婊¤冻鐢ㄦ埛鐨勪富瑕佹剰鍥撅紙鏍稿績浜у搧绫诲瀷鍖归厤锛夛紝浣嗘煡璇腑鏄庣‘鐨勯儴鍒嗚姹傛湭鍦ㄤ骇鍝佷俊鎭腑浣撶幇銆佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄥ苟涓嶄弗閲嶅啿绐佺殑鍋忓樊銆傝鍟嗗搧鏄弧瓒崇敤鎴锋牳蹇冮渶姹傜殑鑹ソ鏇夸唬鍝併 | |
| 114 | 113 | 鍦ㄤ互涓嬫儏鍐典娇鐢ㄩ儴鍒嗙浉鍏筹細 |
| 115 | 114 | - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾閮ㄥ垎璇锋眰鐨勫睘鎬у湪鍟嗗搧淇℃伅涓己澶便佹湭鎻愬強鎴栨棤娉曠‘璁ゃ |
| 116 | 115 | - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾鏉愯川銆佺増鍨嬨侀鏍肩瓑娆¤瑕佹眰瀛樺湪鍋忓樊鎴栦笉涓鑷淬 |
| ... | ... | @@ -130,45 +129,63 @@ _CLASSIFY_BATCH_SIMPLE_TEMPLATE__zh = _CLASSIFY_BATCH_SIMPLE_TEMPLATE_ZH = """浣 |
| 130 | 129 | - 鍦ㄧ數鍟嗘悳绱腑锛岀敤鎴蜂粛鍙兘鍥犱负娆惧紡銆佺┛鐫鍦烘櫙鐩歌繎鑰岀偣鍑昏鍟嗗搧銆 |
| 131 | 130 | - 鍥犳搴斿垽涓洪儴鍒嗙浉鍏筹紝鍗斥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併 |
| 132 | 131 | |
| 133 | -### 涓嶇浉鍏 | |
| 134 | -浜у搧鏈弧瓒崇敤鎴风殑涓昏璐墿鎰忓浘锛屼富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛 | |
| 135 | -- 鏍稿績浜у搧绫诲瀷涓庢煡璇笉鍖归厤銆 | |
| 136 | -- 浜у搧铏藉睘澶ц嚧鐩稿叧鐨勫ぇ绫伙紝浣嗕笌鏌ヨ鎸囧畾鐨勫叿浣撳瓙绫讳笉鍙簰鎹€ | |
| 137 | -- 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鏄庢樉杩濊儗浜嗘煡璇腑涓涓槑纭笖閲嶈鐨勫睘鎬ц姹傘 | |
| 132 | +璇︾粏妗堜緥锛 | |
| 133 | +- 鏌ヨ锛氣滈粦鑹蹭腑闀垮崐韬鈥 | |
| 134 | +- 鍟嗗搧锛氣滄槬绉嬪鏂版瀹芥澗鏄剧槮澶ф憜闀胯纰庤姳鍗婅韩瑁欒ざ鐨辫璁¤鈥 | |
| 135 | + | |
| 136 | +鍒嗘瀽锛 | |
| 137 | +- 鍝佺被鍖归厤锛氫骇鍝佹槸鈥滃崐韬鈥濓紝鍝佺被绗﹀悎銆 | |
| 138 | +- 棰滆壊涓嶅尮閰嶏細浜у搧鎻忚堪鏈彁鍙婇粦鑹蹭笖鏄庣‘鍖呭惈鈥滅鑺扁濓紙floral锛夛紝鑺辫壊涓庣函榛戝樊寮傝緝澶с | |
| 139 | +- 闀垮害瀛樺湪鍋忓樊锛氱敤鎴疯姹傗滀腑闀库濓紝鑰屼骇鍝佹爣棰樺己璋冣滈暱瑁欌濓紙Long Skirt锛夛紝闀垮害鍋忛暱銆 | |
| 140 | +- 鏍稿績鍝佺被鈥滃崐韬鈥濆尮閰嶏紝涓斺滄樉鐦︹濃滃ぇ鎽嗏濈瓑椋庢牸鍙兘绗﹀悎閮ㄥ垎鎼滅储鈥滀腑闀垮崐韬鈥濈敤鎴风殑娼滃湪鍋忓ソ锛堝鐗堝瀷鐩镐技锛夛紝鈥滈暱瑁欌濆拰鈥滀腑闀库濇棤涓ラ噸鐭涚浘锛屽睘浜庢牳蹇冨搧绫诲尮閰嶏紝灞炴у瓨鍦ㄤ笉涓ラ噸鍋忓樊鐨勨滃熀鏈浉鍏斥濄 | |
| 141 | + | |
| 142 | +### 寮辩浉鍏 (Low Relevant) | |
| 143 | +浜у搧涓庣敤鎴风殑鏍稿績鎰忓浘瀛樺湪宸窛锛屼富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛屼絾浠嶅彲鑳藉洜椋庢牸銆佸満鏅垨鍔熻兘涓婄殑鐩镐技鎬ц岃鐢ㄦ埛鎺ュ彈銆備负鈥滈潪鐩爣浣嗗彲鎺ュ彈鈥濈殑鏇夸唬鍝併 | |
| 144 | +- **鍏稿瀷鎯呭喌**锛 | |
| 145 | + - 鏍稿績浜у搧绫诲瀷鏈夊樊寮傦紝浣嗛鏍笺佺┛鐫鍦烘櫙鎴栧姛鑳介潪甯告帴杩戯紝濡傛煡璇⑩滈粦鑹蹭腑闀垮崐韬鈥濓紝鍟嗗搧涓衡滆繛琛h鈥濓紙鍚屽睘瑁欒澶х被锛屾寮忕浉浼硷級銆 | |
| 146 | + - 鏍稿績浜у搧绫诲瀷鏈夊樊寮傦紝浣嗗湪璐墿鍦烘櫙涓嬪睘浜庣浉杩戝搧绫伙紝鍙媺寮烘浛浠o紝濡傛煡璇⑩滅墰浠旇¥鈥濓紝鍟嗗搧涓衡滀紤闂茶¥鈥濓紙鍧囦负瑁ゅ瓙澶х被锛岄鏍煎彲鑳界浉杩戯級銆 | |
| 147 | + - 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鍦ㄥ涓潪鍏抽敭灞炴т笂瀛樺湪鍋忓樊锛屽鑷翠笌鐢ㄦ埛鐞嗘兂鐩爣宸窛杈冨ぇ锛屼絾浠嶄繚鐣欎竴瀹氬叧鑱旀с | |
| 148 | + | |
| 149 | +鍏稿瀷鎯呭喌锛 | |
| 150 | +- 鏌ヨ锛氣滈粦鑹蹭腑闀垮崐韬鈥濓紝浜у搧锛氣滄柊娆鹃珮鑵癡棰嗕腑闀挎杩炶。瑁 浼橀泤鍗拌姳榛戣壊鎬ф劅杩炶。瑁欌 鈫 鏍稿績浜у搧绫诲瀷鈥滃崐韬鈥濅笌鈥滆繛琛h鈥濇湁宸紓锛屼絾涓よ呭悓灞炶瑁呭ぇ绫讳笖娆惧紡涓婂潎涓衡滀腑闀挎鈥濓紝鍏锋湁鐩镐技鎬с | |
| 151 | + | |
| 152 | +### 涓嶇浉鍏 (Irrelevant) | |
| 153 | +浜у搧鏈弧瓒崇敤鎴风殑涓昏璐墿鎰忓浘锛岀敤鎴风偣鍑诲姩鏈烘瀬浣庛備富瑕佽〃鐜颁负浠ヤ笅鎯呭舰涔嬩竴锛 | |
| 154 | +- 鏍稿績浜у搧绫诲瀷涓庢煡璇笉鍖归厤锛屼笖涓嶅睘浜庨鏍/鍦烘櫙鐩歌繎鐨勬浛浠e搧銆 | |
| 155 | +- 浜у搧铏藉睘澶ц嚧鐩稿叧鐨勫ぇ绫伙紝浣嗕笌鏌ヨ鎸囧畾鐨勫叿浣撳瓙绫讳笉鍙簰鎹紝涓旈鏍/鍦烘櫙宸紓澶с | |
| 156 | +- 鏍稿績浜у搧绫诲瀷鍖归厤锛屼絾浜у搧鏄庢樉杩濊儗浜嗘煡璇腑涓涓槑纭笖閲嶈鐨勫睘鎬ц姹傦紝涓斾笉瀛樺湪鍙帴鍙楃殑鐞嗙敱銆 | |
| 138 | 157 | |
| 139 | 158 | 鍏稿瀷鎯呭喌锛 |
| 140 | 159 | - 鏌ヨ锛氣滆¥瀛愨濓紝浜у搧锛氣滈瀷瀛愨 鈫 浜у搧绫诲瀷閿欒銆 |
| 141 | -- 鏌ヨ锛氣滆繛琛h鈥濓紝浜у搧锛氣滃崐韬鈥 鈫 鍏蜂綋浜у搧绫诲瀷涓嶅悓銆 | |
| 142 | -- 鏌ヨ锛氣滀慨韬¥鈥濓紝浜у搧锛氣滃鏉鹃様鑵胯¥鈥 鈫 涓庣増鍨嬭姹傛槑鏄惧啿绐併 | |
| 160 | +- 鏌ヨ锛氣滀慨韬¥鈥濓紝浜у搧锛氣滃鏉鹃様鑵胯¥鈥 鈫 涓庣増鍨嬭姹傛槑鏄惧啿绐侊紝鏇夸唬鎬ф瀬浣庛 | |
| 143 | 161 | - 鏌ヨ锛氣滄棤琚栬繛琛h鈥濓紝浜у搧锛氣滈暱琚栬繛琛h鈥 鈫 涓庤鍨嬭姹傛槑鏄惧啿绐併 |
| 144 | - | |
| 145 | -璇ユ爣绛惧己璋冪敤鎴锋剰鍥剧殑鏄庣‘鎬с傚綋鏌ヨ鎸囧悜鍏蜂綋绫诲瀷鎴栧叧閿睘鎬ф椂锛屽嵆浣夸骇鍝佸湪鏇撮珮灞傜骇绫诲埆涓婄浉鍏筹紝涔熷簲鎸変笉鐩稿叧澶勭悊銆 | |
| 162 | +- 鏌ヨ锛氣滅墰浠旇¥鈥濓紝浜у搧锛氣滆繍鍔ㄨ¥鈥 鈫 鏍稿績鍝佺被涓嶅悓锛堢墰浠旇¥ vs 杩愬姩瑁わ級锛岄鏍煎拰鍦烘櫙宸紓澶с | |
| 163 | +- 鏌ヨ锛氣滈澊瀛愨濓紝浜у搧锛氣滆繍鍔ㄩ瀷鈥 鈫 鏍稿績鍝佺被涓嶅悓锛屽姛鑳藉拰閫傜敤鍦烘櫙宸紓澶с | |
| 146 | 164 | |
| 147 | 165 | ## 鍒ゆ柇鍘熷垯 |
| 148 | 166 | |
| 149 | -1. 浜у搧绫诲瀷鏄渶楂樹紭鍏堢骇鍥犵礌銆 | |
| 150 | - 濡傛灉鏌ヨ鏄庣‘鎸囧畾浜嗗叿浣撲骇鍝佺被鍨嬶紝閭d箞缁撴灉蹇呴』鍖归厤璇ヤ骇鍝佺被鍨嬶紝鎵嶅彲鑳藉垽涓衡滃畬鍏ㄧ浉鍏斥濇垨鈥滈儴鍒嗙浉鍏斥濄 | |
| 151 | - 涓嶅悓浜у搧绫诲瀷閫氬父搴斿垽涓衡滀笉鐩稿叧鈥濓紝鑰屼笉鏄滈儴鍒嗙浉鍏斥濄 | |
| 152 | - | |
| 153 | -2. 鐩镐技鎴栫浉鍏崇殑浜у搧绫诲瀷锛屽湪鏌ヨ鏄庣‘鏃堕氬父涓嶅彲浜掓崲銆 | |
| 154 | - 渚嬪锛 | |
| 155 | - - 杩炶。瑁 vs 鍗婅韩瑁 vs 杩炰綋瑁 | |
| 156 | - - 鐗涗粩瑁 vs 瑁ゅ瓙 | |
| 157 | - - T鎭 vs 琛~/涓婅。 | |
| 158 | - - 寮琛 vs 姣涜。 | |
| 159 | - - 闈村瓙 vs 闉嬪瓙 | |
| 160 | - - 鏂囪兏 vs 涓婅。 | |
| 161 | - - 鍙岃偐鍖 vs 鍖 | |
| 162 | - 濡傛灉鐢ㄦ埛鏄庣‘鎼滅储鍏朵腑涓绉嶏紝鍏朵粬绫诲瀷閫氬父搴斿垽涓衡滀笉鐩稿叧鈥濄 | |
| 163 | - | |
| 164 | -3. 褰撴牳蹇冧骇鍝佺被鍨嬪尮閰嶅悗锛屽啀璇勪及灞炴с | |
| 165 | - - 鎵鏈夋槑纭睘鎬ч兘鍖归厤 鈫 瀹屽叏鐩稿叧 | |
| 166 | - - 閮ㄥ垎灞炴х己澶便佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄤ竴瀹氬亸宸紝浣嗕粛鏄彲鎺ュ彈鏇夸唬鍝 鈫 閮ㄥ垎鐩稿叧 | |
| 167 | - - 鏄庣‘涓旈噸瑕佺殑灞炴ц鏄庢樉杩濊儗锛屼笖涓嶈兘浣滀负鍚堢悊鏇夸唬鍝 鈫 涓嶇浉鍏 | |
| 168 | - | |
| 169 | -4. 瑕佷弗鏍煎尯鍒嗏滄湭鎻愬強/鏃犳硶纭鈥濆拰鈥滄槑纭啿绐佲濄 | |
| 170 | - - 濡傛灉鏌愬睘鎬ф病鏈夋彁鍙婏紝鎴栨棤娉曢獙璇侊紝浼樺厛鍒や负鈥滈儴鍒嗙浉鍏斥濄 | |
| 171 | - - 濡傛灉鏌愬睘鎬т笌鏌ヨ瑕佹眰鏄庣‘鐩稿弽锛屽垯鍒や负鈥滀笉鐩稿叧鈥濓紱闄ら潪鍦ㄨ喘鐗╄澧冧笅瀹冧粛鏄庢樉灞炰簬鍙帴鍙楁浛浠e搧銆 | |
| 167 | +1. **浜у搧绫诲瀷鏄渶楂樹紭鍏堢骇鍥犵礌銆** | |
| 168 | + 濡傛灉鏌ヨ鏄庣‘鎸囧畾浜嗗叿浣撲骇鍝佺被鍨嬶紝閭d箞缁撴灉蹇呴』鍖归厤璇ヤ骇鍝佺被鍨嬶紝鎵嶅彲鑳藉垽涓衡滃畬鍏ㄧ浉鍏斥濇垨鈥滃熀鏈浉鍏斥濄備笉鍚屼骇鍝佺被鍨嬮氬父搴斿垽涓衡滃急鐩稿叧鈥濇垨鈥滀笉鐩稿叧鈥濄 | |
| 169 | + - **寮辩浉鍏**锛氫粎褰撲袱绉嶄骇鍝佺被鍨嬮鏍笺佸満鏅佸姛鑳介潪甯告帴杩戯紝鍙兘琚涓哄彲鎺ュ彈鐨勬浛浠e搧鏃朵娇鐢ㄣ | |
| 170 | + - **涓嶇浉鍏**锛氬叾浠栨墍鏈変骇鍝佺被鍨嬩笉鍖归厤鐨勬儏鍐点 | |
| 171 | + | |
| 172 | +2. **鐩镐技鎴栫浉鍏崇殑浜у搧绫诲瀷锛屽湪鏌ヨ鏄庣‘鏃堕氬父涓嶅彲浜掓崲锛屼絾闇鏍规嵁鎺ヨ繎绋嬪害鍖哄垎銆** | |
| 173 | + 渚嬪锛 | |
| 174 | + - **椋庢牸/鍦烘櫙楂樺害鎺ヨ繎锛屽彲鍒や负寮辩浉鍏**锛氳繛琛h vs 鍗婅韩瑁欍侀暱瑁 vs 涓暱瑁欍佺墰浠旇¥ vs 浼戦棽瑁ゃ佽繍鍔ㄩ瀷 vs 鏉块瀷銆 | |
| 175 | + - **椋庢牸/鍦烘櫙宸紓澶э紝鍒や负涓嶇浉鍏**锛氳¥瀛 vs 闉嬪瓙銆乀鎭 vs 甯藉瓙銆侀澊瀛 vs 杩愬姩闉嬨佺墰浠旇¥ vs 瑗胯瑁ゃ佸弻鑲╁寘 vs 鎵嬫彁鍖呫 | |
| 176 | + 濡傛灉鐢ㄦ埛鏄庣‘鎼滅储鍏朵腑涓绉嶏紝鍏朵粬绫诲瀷鏄惁鍙帴鍙楀彇鍐充簬鍏堕鏍笺佸満鏅殑鎺ヨ繎绋嬪害銆 | |
| 177 | + | |
| 178 | +3. **褰撴牳蹇冧骇鍝佺被鍨嬪尮閰嶅悗锛屽啀璇勪及灞炴с** | |
| 179 | + - 鎵鏈夋槑纭睘鎬ч兘鍖归厤 鈫 **瀹屽叏鐩稿叧** | |
| 180 | + - 閮ㄥ垎灞炴х己澶便佹棤娉曠‘璁わ紝鎴栧瓨鍦ㄨ緝灏忓亸宸 鈫 **鍩烘湰鐩稿叧** | |
| 181 | + - 鏄庣‘涓旈噸瑕佺殑灞炴ц鏄庢樉杩濊儗锛堝淇韩 vs 瀹芥澗锛夛紝浣嗘牳蹇冨搧绫讳粛鍖归厤 鈫 **寮辩浉鍏** 鎴 **涓嶇浉鍏**銆 | |
| 182 | + - **寮辩浉鍏**锛氬睘鎬ф槑鏄捐繚鑳岋紝浣嗗瓨鍦ㄥ彲琚敤鎴锋帴鍙楃殑寰急鐞嗙敱锛堝鐗堝瀷铏戒笉鍚屼絾椋庢牸绫讳技锛夈 | |
| 183 | + - **涓嶇浉鍏**锛氬睘鎬ф槑鏄捐繚鑳岋紝涓旀浛浠fф瀬浣庯紝鐢ㄦ埛鏃犵偣鍑诲姩鏈猴紙濡備慨韬 vs 瀹芥澗闃旇吙瑁わ級銆 | |
| 184 | + | |
| 185 | +4. **瑕佷弗鏍煎尯鍒嗏滄湭鎻愬強/鏃犳硶纭鈥濄佲滆緝灏忓亸宸濄佲滄槑纭啿绐佲濄** | |
| 186 | + - 濡傛灉鏌愬睘鎬ф病鏈夋彁鍙婏紝鎴栨棤娉曢獙璇侊紝浼樺厛鍒や负鈥**鍩烘湰鐩稿叧**鈥濄 | |
| 187 | + - 濡傛灉鏌愬睘鎬у瓨鍦ㄨ緝灏忓亸宸紙濡傞鑹蹭笉鍚屻佹潗璐ㄤ笉鍚岋級锛屽垽涓衡**鍩烘湰鐩稿叧**鈥濄 | |
| 188 | + - 濡傛灉鏌愬睘鎬т笌鏌ヨ瑕佹眰鏄庣‘鐩稿弽锛屽垯闇鏍规嵁鍐茬獊鐨勪弗閲嶆у拰鏇夸唬鎬у垽涓衡**寮辩浉鍏**鈥濇垨鈥**涓嶇浉鍏**鈥濄 | |
| 172 | 189 | |
| 173 | 190 | 鏌ヨ锛歿query} |
| 174 | 191 | ... | ... |