Commit a99e62ba4e6b6e7493d2d0afc445d5794edaa616

Authored by tangwang
1 parent c51d254f

记录各阶段耗时

config/config.yaml
@@ -188,7 +188,7 @@ services: @@ -188,7 +188,7 @@ services:
188 infer_batch_size: 64 188 infer_batch_size: 64
189 sort_by_doc_length: true 189 sort_by_doc_length: true
190 length_sort_mode: "char" # char | token 190 length_sort_mode: "char" # char | token
191 - instruction: "Given a web search query, retrieve relevant passages that answer the query" 191 + instruction: "Given a shopping query, rank product titles by relevance"
192 192
193 # SPU配置(已启用,使用嵌套skus) 193 # SPU配置(已启用,使用嵌套skus)
194 spu_config: 194 spu_config:
context/request_context.py
@@ -19,7 +19,10 @@ class RequestContextStage(Enum): @@ -19,7 +19,10 @@ class RequestContextStage(Enum):
19 QUERY_PARSING = "query_parsing" 19 QUERY_PARSING = "query_parsing"
20 BOOLEAN_PARSING = "boolean_parsing" 20 BOOLEAN_PARSING = "boolean_parsing"
21 QUERY_BUILDING = "query_building" 21 QUERY_BUILDING = "query_building"
22 - ELASTICSEARCH_SEARCH = "elasticsearch_search" 22 + # ES 主召回查询
  23 + ELASTICSEARCH_SEARCH_PRIMARY = "elasticsearch_search_primary"
  24 + # ES 按 ID 回源分页详情回填
  25 + ELASTICSEARCH_PAGE_FILL = "elasticsearch_page_fill"
23 RESULT_PROCESSING = "result_processing" 26 RESULT_PROCESSING = "result_processing"
24 RERANKING = "reranking" 27 RERANKING = "reranking"
25 28
docs/TEI_SERVICE说明文档.md
@@ -107,7 +107,7 @@ curl -sS http://127.0.0.1:8080/health @@ -107,7 +107,7 @@ curl -sS http://127.0.0.1:8080/health
107 ```bash 107 ```bash
108 curl -sS http://127.0.0.1:8080/embed \ 108 curl -sS http://127.0.0.1:8080/embed \
109 -H "Content-Type: application/json" \ 109 -H "Content-Type: application/json" \
110 - -d '{"inputs":["Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: What is the capital of China?"]}' 110 + -d '{"inputs":["Instruct: Given a shopping query, rank product titles by relevance\nQuery: What is the capital of China?"]}'
111 ``` 111 ```
112 112
113 返回应为二维数组(每条输入对应一个向量)。 113 返回应为二维数组(每条输入对应一个向量)。
docs/性能测试报告.md
@@ -93,12 +93,6 @@ source activate.sh @@ -93,12 +93,6 @@ source activate.sh
93 ./scripts/service_ctl.sh start embedding translator reranker backend 93 ./scripts/service_ctl.sh start embedding translator reranker backend
94 ``` 94 ```
95 95
96 -如果 `backend` 未成功常驻,可临时手动启动:  
97 -  
98 -```bash  
99 -.venv/bin/python main.py serve --host 0.0.0.0 --port 6002 --es-host http://localhost:9200  
100 -```  
101 -  
102 ### 5.3 健康检查 96 ### 5.3 健康检查
103 97
104 ```bash 98 ```bash
@@ -160,12 +154,38 @@ cd /data/saas-search @@ -160,12 +154,38 @@ cd /data/saas-search
160 154
161 ### 7.4 Reranker(rerank) 155 ### 7.4 Reranker(rerank)
162 156
  157 +测试方法(本节已按新口径重跑):
  158 +- `query` 固定为 `wireless mouse`
  159 +- 每次请求 `docs=386`
  160 +- 从 `1000` 个候选单词中随机采样,先随机句长 `15-40`,再生成每条 doc 句子
  161 +- 并发 `1/5/10/20`,每档 `20s`
  162 +- 结果文件:`perf_reports/2026-03-12/rerank_realistic/rerank_386docs.json`
  163 +
  164 +复现命令:
  165 +
  166 +```bash
  167 +.venv/bin/python scripts/perf_api_benchmark.py \
  168 + --scenario rerank \
  169 + --duration 20 \
  170 + --concurrency-list 1,5,10,20 \
  171 + --timeout 60 \
  172 + --rerank-dynamic-docs \
  173 + --rerank-doc-count 386 \
  174 + --rerank-vocab-size 1000 \
  175 + --rerank-sentence-min-words 15 \
  176 + --rerank-sentence-max-words 40 \
  177 + --rerank-query "wireless mouse" \
  178 + --rerank-seed 20260312 \
  179 + --reranker-base http://127.0.0.1:6007 \
  180 + --output perf_reports/2026-03-12/rerank_realistic/rerank_386docs.json
  181 +```
  182 +
163 | 并发 | 请求数 | 成功率 | 吞吐(RPS) | Avg(ms) | P95(ms) | Max(ms) | 183 | 并发 | 请求数 | 成功率 | 吞吐(RPS) | Avg(ms) | P95(ms) | Max(ms) |
164 |---:|---:|---:|---:|---:|---:|---:| 184 |---:|---:|---:|---:|---:|---:|---:|
165 -| 1 | 802 | 100.0% | 40.06 | 24.87 | 37.45 | 49.63 |  
166 -| 5 | 796 | 100.0% | 39.53 | 125.70 | 190.02 | 218.60 |  
167 -| 10 | 853 | 100.0% | 41.89 | 235.87 | 315.37 | 402.27 |  
168 -| 20 | 836 | 100.0% | 40.92 | 481.98 | 723.56 | 781.81 | 185 +| 1 | 14 | 100.0% | 0.67 | 1498.64 | 1799.25 | 2160.96 |
  186 +| 5 | 15 | 100.0% | 0.62 | 8011.99 | 9725.61 | 9726.02 |
  187 +| 10 | 20 | 100.0% | 0.61 | 16217.12 | 18043.05 | 18050.04 |
  188 +| 20 | 20 | 100.0% | 0.60 | 33252.35 | 33456.74 | 33480.14 |
169 189
170 ## 8. 指标解读与并发建议 190 ## 8. 指标解读与并发建议
171 191
@@ -174,7 +194,7 @@ cd /data/saas-search @@ -174,7 +194,7 @@ cd /data/saas-search
174 - `backend_search`:吞吐约 `8 rps` 平台化,延迟随并发上升明显,属于重链路(检索+向量+重排)特征。 194 - `backend_search`:吞吐约 `8 rps` 平台化,延迟随并发上升明显,属于重链路(检索+向量+重排)特征。
175 - `backend_suggest`:吞吐高且稳定(约 `200+ rps`),对并发更友好。 195 - `backend_suggest`:吞吐高且稳定(约 `200+ rps`),对并发更友好。
176 - `embed_text`:随并发提升吞吐持续增长,延迟平滑上升,扩展性较好。 196 - `embed_text`:随并发提升吞吐持续增长,延迟平滑上升,扩展性较好。
177 -- `rerank`:吞吐在 `~40 rps` 附近平台化,延迟随并发线性抬升,符合模型推理瓶颈特征 197 +- `rerank`:在 `docs=386` 的真实口径下,吞吐约 `0.6 rps`,延迟随并发显著抬升(并发20下 P95 约 `33.5s`),是当前最重瓶颈
178 198
179 ### 8.2 并发压测建议 199 ### 8.2 并发压测建议
180 200
@@ -232,6 +252,7 @@ cd /data/saas-search @@ -232,6 +252,7 @@ cd /data/saas-search
232 - 压测脚本:`scripts/perf_api_benchmark.py` 252 - 压测脚本:`scripts/perf_api_benchmark.py`
233 - 本次结果:`perf_reports/2026-03-12/perf_matrix_report.json` 253 - 本次结果:`perf_reports/2026-03-12/perf_matrix_report.json`
234 - Search 多租户补测:`perf_reports/2026-03-12/search_tenant_matrix/` 254 - Search 多租户补测:`perf_reports/2026-03-12/search_tenant_matrix/`
  255 +- Reranker 386 docs 口径补测:`perf_reports/2026-03-12/rerank_realistic/rerank_386docs.json`
235 256
236 ## 12. Search 多租户补测(2026-03-12) 257 ## 12. Search 多租户补测(2026-03-12)
237 258
reranker/README.md
@@ -54,16 +54,16 @@ services: @@ -54,16 +54,16 @@ services:
54 length_sort_mode: "char" # char | token 54 length_sort_mode: "char" # char | token
55 enable_prefix_caching: true 55 enable_prefix_caching: true
56 enforce_eager: false 56 enforce_eager: false
57 - instruction: "Given a web search query, retrieve relevant passages that answer the query" 57 + instruction: "Given a shopping query, rank product titles by relevance"
58 qwen3_transformers: 58 qwen3_transformers:
59 model_name: "Qwen/Qwen3-Reranker-0.6B" 59 model_name: "Qwen/Qwen3-Reranker-0.6B"
60 - instruction: "Given a web search query, retrieve relevant passages that answer the query" 60 + instruction: "Given a shopping query, rank product titles by relevance"
61 max_length: 8192 61 max_length: 8192
62 batch_size: 64 62 batch_size: 64
63 use_fp16: true 63 use_fp16: true
64 tensor_parallel_size: 1 64 tensor_parallel_size: 1
65 gpu_memory_utilization: 0.8 65 gpu_memory_utilization: 0.8
66 - instruction: "Given a web search query, retrieve relevant passages that answer the query" 66 + instruction: "Given a shopping query, rank product titles by relevance"
67 ``` 67 ```
68 68
69 - 服务端口、请求限制等仍在 `reranker/config.py`(或环境变量 `RERANKER_PORT`、`RERANKER_HOST`)。 69 - 服务端口、请求限制等仍在 `reranker/config.py`(或环境变量 `RERANKER_PORT`、`RERANKER_HOST`)。
reranker/backends/qwen3_transformers.py
@@ -42,7 +42,7 @@ class Qwen3TransformersRerankerBackend: @@ -42,7 +42,7 @@ class Qwen3TransformersRerankerBackend:
42 model_name = str(self._config.get("model_name") or "Qwen/Qwen3-Reranker-0.6B") 42 model_name = str(self._config.get("model_name") or "Qwen/Qwen3-Reranker-0.6B")
43 self._instruction = str( 43 self._instruction = str(
44 self._config.get("instruction") 44 self._config.get("instruction")
45 - or "Given a web search query, retrieve relevant passages that answer the query" 45 + or "Given a shopping query, rank product titles by relevance"
46 ) 46 )
47 max_length = int(self._config.get("max_length", 8192)) 47 max_length = int(self._config.get("max_length", 8192))
48 batch_size = int(self._config.get("batch_size", 64)) 48 batch_size = int(self._config.get("batch_size", 64))
reranker/backends/qwen3_vllm.py
@@ -65,7 +65,7 @@ class Qwen3VLLMRerankerBackend: @@ -65,7 +65,7 @@ class Qwen3VLLMRerankerBackend:
65 dtype = str(self._config.get("dtype", "float16")).strip().lower() 65 dtype = str(self._config.get("dtype", "float16")).strip().lower()
66 self._instruction = str( 66 self._instruction = str(
67 self._config.get("instruction") 67 self._config.get("instruction")
68 - or "Given a web search query, retrieve relevant passages that answer the query" 68 + or "Given a shopping query, rank product titles by relevance"
69 ) 69 )
70 infer_batch_size = os.getenv("RERANK_VLLM_INFER_BATCH_SIZE") or self._config.get("infer_batch_size", 64) 70 infer_batch_size = os.getenv("RERANK_VLLM_INFER_BATCH_SIZE") or self._config.get("infer_batch_size", 64)
71 sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH") 71 sort_by_doc_length = os.getenv("RERANK_VLLM_SORT_BY_DOC_LENGTH")
scripts/perf_api_benchmark.py
@@ -22,6 +22,7 @@ import argparse @@ -22,6 +22,7 @@ import argparse
22 import asyncio 22 import asyncio
23 import json 23 import json
24 import math 24 import math
  25 +import random
25 import statistics 26 import statistics
26 import time 27 import time
27 from dataclasses import dataclass 28 from dataclasses import dataclass
@@ -251,6 +252,7 @@ async def run_single_scenario( @@ -251,6 +252,7 @@ async def run_single_scenario(
251 concurrency: int, 252 concurrency: int,
252 max_requests: int, 253 max_requests: int,
253 max_errors: int, 254 max_errors: int,
  255 + rerank_dynamic_cfg: Optional[Dict[str, Any]] = None,
254 ) -> Dict[str, Any]: 256 ) -> Dict[str, Any]:
255 latencies: List[float] = [] 257 latencies: List[float] = []
256 status_counter: Dict[int, int] = {} 258 status_counter: Dict[int, int] = {}
@@ -267,6 +269,9 @@ async def run_single_scenario( @@ -267,6 +269,9 @@ async def run_single_scenario(
267 async def worker(worker_id: int, client: httpx.AsyncClient) -> None: 269 async def worker(worker_id: int, client: httpx.AsyncClient) -> None:
268 nonlocal total_requests, success_requests, stop_flag 270 nonlocal total_requests, success_requests, stop_flag
269 idx = worker_id % len(scenario.templates) 271 idx = worker_id % len(scenario.templates)
  272 + worker_rng: Optional[random.Random] = None
  273 + if rerank_dynamic_cfg is not None:
  274 + worker_rng = random.Random(int(rerank_dynamic_cfg["seed"]) + worker_id)
270 275
271 while not stop_flag: 276 while not stop_flag:
272 elapsed = time.perf_counter() - start 277 elapsed = time.perf_counter() - start
@@ -287,11 +292,14 @@ async def run_single_scenario( @@ -287,11 +292,14 @@ async def run_single_scenario(
287 status = 0 292 status = 0
288 err = "" 293 err = ""
289 try: 294 try:
  295 + req_json_body = tpl.json_body
  296 + if rerank_dynamic_cfg is not None and worker_rng is not None:
  297 + req_json_body = build_random_rerank_payload(rerank_dynamic_cfg, worker_rng)
290 resp = await client.request( 298 resp = await client.request(
291 method=tpl.method, 299 method=tpl.method,
292 url=tpl.path, 300 url=tpl.path,
293 params=tpl.params, 301 params=tpl.params,
294 - json=tpl.json_body, 302 + json=req_json_body,
295 headers=tpl.headers, 303 headers=tpl.headers,
296 ) 304 )
297 status = int(resp.status_code) 305 status = int(resp.status_code)
@@ -448,9 +456,83 @@ def parse_args() -> argparse.Namespace: @@ -448,9 +456,83 @@ def parse_args() -> argparse.Namespace:
448 default="", 456 default="",
449 help="Comma-separated concurrency list (e.g. 1,5,10,20). If set, overrides --concurrency.", 457 help="Comma-separated concurrency list (e.g. 1,5,10,20). If set, overrides --concurrency.",
450 ) 458 )
  459 + parser.add_argument(
  460 + "--rerank-dynamic-docs",
  461 + action="store_true",
  462 + help="For rerank scenario, generate docs payload dynamically on every request.",
  463 + )
  464 + parser.add_argument("--rerank-doc-count", type=int, default=386, help="Doc count per rerank request when dynamic docs are enabled")
  465 + parser.add_argument("--rerank-vocab-size", type=int, default=1000, help="Word pool size for rerank dynamic docs generation")
  466 + parser.add_argument("--rerank-sentence-min-words", type=int, default=15, help="Minimum words per generated doc sentence")
  467 + parser.add_argument("--rerank-sentence-max-words", type=int, default=40, help="Maximum words per generated doc sentence")
  468 + parser.add_argument("--rerank-query", type=str, default="wireless mouse", help="Fixed query used for rerank dynamic docs mode")
  469 + parser.add_argument("--rerank-seed", type=int, default=20260312, help="Base random seed for rerank dynamic docs mode")
451 return parser.parse_args() 470 return parser.parse_args()
452 471
453 472
  473 +def build_rerank_dynamic_cfg(args: argparse.Namespace) -> Dict[str, Any]:
  474 + min_words = int(args.rerank_sentence_min_words)
  475 + max_words = int(args.rerank_sentence_max_words)
  476 + doc_count = int(args.rerank_doc_count)
  477 + vocab_size = int(args.rerank_vocab_size)
  478 + if doc_count <= 0:
  479 + raise ValueError(f"rerank-doc-count must be > 0, got {doc_count}")
  480 + if vocab_size <= 0:
  481 + raise ValueError(f"rerank-vocab-size must be > 0, got {vocab_size}")
  482 + if min_words <= 0:
  483 + raise ValueError(f"rerank-sentence-min-words must be > 0, got {min_words}")
  484 + if max_words < min_words:
  485 + raise ValueError(
  486 + f"rerank-sentence-max-words must be >= rerank-sentence-min-words, got {max_words} < {min_words}"
  487 + )
  488 + if args.rerank_seed < 0:
  489 + raise ValueError(f"rerank-seed must be >= 0, got {args.rerank_seed}")
  490 +
  491 + # Use deterministic, letter-only pseudo words to avoid long tokenization of numeric strings.
  492 + syllables = [
  493 + "al", "an", "ar", "as", "at", "ba", "be", "bi", "bo", "ca",
  494 + "ce", "ci", "co", "da", "de", "di", "do", "el", "en", "er",
  495 + "fa", "fe", "fi", "fo", "ga", "ge", "gi", "go", "ha", "he",
  496 + "hi", "ho", "ia", "ie", "il", "in", "io", "is", "ka", "ke",
  497 + "ki", "ko", "la", "le", "li", "lo", "ma", "me", "mi", "mo",
  498 + ]
  499 + word_pool: List[str] = []
  500 + for a in syllables:
  501 + for b in syllables:
  502 + word_pool.append(f"{a}{b}")
  503 + if len(word_pool) >= vocab_size:
  504 + break
  505 + if len(word_pool) >= vocab_size:
  506 + break
  507 + if len(word_pool) < vocab_size:
  508 + raise ValueError(f"Unable to generate enough synthetic words: requested={vocab_size}, got={len(word_pool)}")
  509 + return {
  510 + "query": args.rerank_query,
  511 + "doc_count": doc_count,
  512 + "min_words": min_words,
  513 + "max_words": max_words,
  514 + "seed": int(args.rerank_seed),
  515 + "normalize": True,
  516 + "word_pool": word_pool,
  517 + }
  518 +
  519 +
  520 +def build_random_rerank_payload(
  521 + cfg: Dict[str, Any],
  522 + rng: random.Random,
  523 +) -> Dict[str, Any]:
  524 + word_pool: List[str] = cfg["word_pool"]
  525 + docs = []
  526 + for _ in range(cfg["doc_count"]):
  527 + doc_len = rng.randint(cfg["min_words"], cfg["max_words"])
  528 + docs.append(" ".join(rng.choices(word_pool, k=doc_len)))
  529 + return {
  530 + "query": cfg["query"],
  531 + "docs": docs,
  532 + "normalize": bool(cfg.get("normalize", True)),
  533 + }
  534 +
  535 +
454 async def main_async() -> int: 536 async def main_async() -> int:
455 args = parse_args() 537 args = parse_args()
456 scenarios = build_scenarios(args) 538 scenarios = build_scenarios(args)
@@ -474,6 +556,14 @@ async def main_async() -&gt; int: @@ -474,6 +556,14 @@ async def main_async() -&gt; int:
474 print("No scenarios to run.") 556 print("No scenarios to run.")
475 return 2 557 return 2
476 558
  559 + rerank_dynamic_cfg: Optional[Dict[str, Any]] = None
  560 + if args.rerank_dynamic_docs:
  561 + try:
  562 + rerank_dynamic_cfg = build_rerank_dynamic_cfg(args)
  563 + except ValueError as exc:
  564 + print(str(exc))
  565 + return 2
  566 +
477 concurrency_values = [args.concurrency] 567 concurrency_values = [args.concurrency]
478 if args.concurrency_list: 568 if args.concurrency_list:
479 try: 569 try:
@@ -498,6 +588,13 @@ async def main_async() -&gt; int: @@ -498,6 +588,13 @@ async def main_async() -&gt; int:
498 print(f" embedding_base={args.embedding_base}") 588 print(f" embedding_base={args.embedding_base}")
499 print(f" translator_base={args.translator_base}") 589 print(f" translator_base={args.translator_base}")
500 print(f" reranker_base={args.reranker_base}") 590 print(f" reranker_base={args.reranker_base}")
  591 + if args.rerank_dynamic_docs:
  592 + print(" rerank_dynamic_docs=True")
  593 + print(f" rerank_doc_count={args.rerank_doc_count}")
  594 + print(f" rerank_vocab_size={args.rerank_vocab_size}")
  595 + print(f" rerank_sentence_words=[{args.rerank_sentence_min_words},{args.rerank_sentence_max_words}]")
  596 + print(f" rerank_query={args.rerank_query}")
  597 + print(f" rerank_seed={args.rerank_seed}")
501 598
502 results: List[Dict[str, Any]] = [] 599 results: List[Dict[str, Any]] = []
503 total_jobs = len(run_names) * len(concurrency_values) 600 total_jobs = len(run_names) * len(concurrency_values)
@@ -513,6 +610,7 @@ async def main_async() -&gt; int: @@ -513,6 +610,7 @@ async def main_async() -&gt; int:
513 concurrency=c, 610 concurrency=c,
514 max_requests=args.max_requests, 611 max_requests=args.max_requests,
515 max_errors=args.max_errors, 612 max_errors=args.max_errors,
  613 + rerank_dynamic_cfg=rerank_dynamic_cfg if name == "rerank" else None,
516 ) 614 )
517 result["concurrency"] = c 615 result["concurrency"] = c
518 print(format_summary(result)) 616 print(format_summary(result))
@@ -538,6 +636,13 @@ async def main_async() -&gt; int: @@ -538,6 +636,13 @@ async def main_async() -&gt; int:
538 "translator_base": args.translator_base, 636 "translator_base": args.translator_base,
539 "reranker_base": args.reranker_base, 637 "reranker_base": args.reranker_base,
540 "cases_file": args.cases_file or None, 638 "cases_file": args.cases_file or None,
  639 + "rerank_dynamic_docs": args.rerank_dynamic_docs,
  640 + "rerank_doc_count": args.rerank_doc_count,
  641 + "rerank_vocab_size": args.rerank_vocab_size,
  642 + "rerank_sentence_min_words": args.rerank_sentence_min_words,
  643 + "rerank_sentence_max_words": args.rerank_sentence_max_words,
  644 + "rerank_query": args.rerank_query,
  645 + "rerank_seed": args.rerank_seed,
541 }, 646 },
542 "results": results, 647 "results": results,
543 "overall": aggregate_results(results), 648 "overall": aggregate_results(results),
search/searcher.py
@@ -459,8 +459,8 @@ class Searcher: @@ -459,8 +459,8 @@ class Searcher:
459 finally: 459 finally:
460 context.end_stage(RequestContextStage.QUERY_BUILDING) 460 context.end_stage(RequestContextStage.QUERY_BUILDING)
461 461
462 - # Step 4: Elasticsearch search  
463 - context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH) 462 + # Step 4: Elasticsearch search (primary recall)
  463 + context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY)
464 try: 464 try:
465 # Use tenant-specific index name(开启重排且在窗口内时已用 es_fetch_size/es_fetch_from) 465 # Use tenant-specific index name(开启重排且在窗口内时已用 es_fetch_size/es_fetch_from)
466 es_response = self.es_client.search( 466 es_response = self.es_client.search(
@@ -489,7 +489,7 @@ class Searcher: @@ -489,7 +489,7 @@ class Searcher:
489 ) 489 )
490 raise 490 raise
491 finally: 491 finally:
492 - context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH) 492 + context.end_stage(RequestContextStage.ELASTICSEARCH_SEARCH_PRIMARY)
493 493
494 # Optional Step 4.5: AI reranking(仅当请求范围在重排窗口内时执行) 494 # Optional Step 4.5: AI reranking(仅当请求范围在重排窗口内时执行)
495 if do_rerank and in_rerank_window: 495 if do_rerank and in_rerank_window:
@@ -557,29 +557,33 @@ class Searcher: @@ -557,29 +557,33 @@ class Searcher:
557 extra={'reqid': context.reqid, 'uid': context.uid} 557 extra={'reqid': context.reqid, 'uid': context.uid}
558 ) 558 )
559 else: 559 else:
560 - page_ids = [str(h.get("_id")) for h in sliced if h.get("_id") is not None]  
561 - details_by_id, fill_took = self._fetch_hits_by_ids(  
562 - index_name=index_name,  
563 - doc_ids=page_ids,  
564 - source_spec=response_source_spec,  
565 - )  
566 - filled = 0  
567 - for hit in sliced:  
568 - hid = hit.get("_id")  
569 - if hid is None:  
570 - continue  
571 - detail_hit = details_by_id.get(str(hid))  
572 - if detail_hit is None:  
573 - continue  
574 - if "_source" in detail_hit:  
575 - hit["_source"] = detail_hit.get("_source") or {}  
576 - filled += 1  
577 - if fill_took:  
578 - es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)  
579 - context.logger.info(  
580 - f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",  
581 - extra={'reqid': context.reqid, 'uid': context.uid}  
582 - ) 560 + context.start_stage(RequestContextStage.ELASTICSEARCH_PAGE_FILL)
  561 + try:
  562 + page_ids = [str(h.get("_id")) for h in sliced if h.get("_id") is not None]
  563 + details_by_id, fill_took = self._fetch_hits_by_ids(
  564 + index_name=index_name,
  565 + doc_ids=page_ids,
  566 + source_spec=response_source_spec,
  567 + )
  568 + filled = 0
  569 + for hit in sliced:
  570 + hid = hit.get("_id")
  571 + if hid is None:
  572 + continue
  573 + detail_hit = details_by_id.get(str(hid))
  574 + if detail_hit is None:
  575 + continue
  576 + if "_source" in detail_hit:
  577 + hit["_source"] = detail_hit.get("_source") or {}
  578 + filled += 1
  579 + if fill_took:
  580 + es_response["took"] = int((es_response.get("took", 0) or 0) + fill_took)
  581 + context.logger.info(
  582 + f"分页详情回填 | ids={len(page_ids)} | filled={filled} | took={fill_took}ms",
  583 + extra={'reqid': context.reqid, 'uid': context.uid}
  584 + )
  585 + finally:
  586 + context.end_stage(RequestContextStage.ELASTICSEARCH_PAGE_FILL)
583 587
584 context.logger.info( 588 context.logger.info(
585 f"重排分页切片 | from={from_}, size={size}, 返回={len(sliced)}条", 589 f"重排分页切片 | from={from_}, size={size}, 返回={len(sliced)}条",
@@ -191,7 +191,7 @@ def temp_config_file() -&gt; Generator[str, None, None]: @@ -191,7 +191,7 @@ def temp_config_file() -&gt; Generator[str, None, None]:
191 "functions": [] 191 "functions": []
192 }, 192 },
193 "rerank": { 193 "rerank": {
194 - "rerank_window": 400 194 + "rerank_window": 386
195 } 195 }
196 } 196 }
197 197