Commit deccd68adcbb038ef82062bfe047f5d560a4bd53

Authored by tangwang
1 parent e874eb50

Added the SKU pre-selection step in search/searcher.py right before

ResultFormatter.format_search_results() runs.

What changed:

For each final paginated SPU hit, the searcher now scans
skus[].option1_value against the query text set built from the original
query, normalized query, rewritten query, and translations.
If no option1_value matches textually, it falls back to embedding
similarity and picks the SKU with the highest inner product against the
query embedding.
The matched SKU is promoted to the front of the SPU’s skus list.
The SPU-level image_url is replaced with that matched SKU’s image_src.
I left api/result_formatter.py unchanged because it already preserves
the SKU order and reads image_url from _source; updating the page hits
in searcher makes the formatter return the desired result automatically.

Verification:

ReadLints on the edited files: no errors
Passed targeted tests:
pytest tests/test_search_rerank_window.py -k "translated_query or
no_direct_option_match"
] 0 → 100644
... ... @@ -0,0 +1,17 @@
  1 +docs
  2 +# Please enter the commit message for your changes. Lines starting
  3 +# with '#' will be ignored, and an empty message aborts the commit.
  4 +#
  5 +# On branch master
  6 +# Your branch is ahead of 'origin/master' by 5 commits.
  7 +# (use "git push" to publish your local commits)
  8 +#
  9 +# Changes to be committed:
  10 +# modified: config/config.yaml
  11 +# modified: docs/TODO.txt
  12 +# modified: "docs/\346\220\234\347\264\242API\345\257\271\346\216\245\346\214\207\345\215\227-07-\345\276\256\346\234\215\345\212\241\346\216\245\345\217\243\357\274\210Embedding-Reranker-Translation\357\274\211.md"
  13 +# modified: "docs/\347\233\270\345\205\263\346\200\247\346\243\200\347\264\242\344\274\230\345\214\226\350\257\264\346\230\216.md"
  14 +#
  15 +# Changes not staged for commit:
  16 +# modified: third-party/clip-as-service (untracked content)
  17 +#
... ...
frontend/index.html
... ... @@ -199,7 +199,7 @@
199 199 </footer>
200 200  
201 201 <script src="/static/js/tenant_facets_config.js?v=1.4"></script>
202   - <script src="/static/js/app.js?v=1.0"></script>
  202 + <script src="/static/js/app.js?v=1.1"></script>
203 203 <script>
204 204 // 自动补全功能(使用后端 /search/suggestions 接口)
205 205 const SUGGEST_API = API_BASE_URL + '/search/suggestions';
... ...
frontend/static/css/style.css
... ... @@ -371,9 +371,31 @@ body {
371 371 margin-bottom: 2px;
372 372 }
373 373  
  374 +.product-debug-actions {
  375 + display: flex;
  376 + flex-wrap: wrap;
  377 + align-items: center;
  378 + gap: 10px;
  379 + margin-top: 8px;
  380 +}
  381 +
  382 +.product-debug-btn-api-result {
  383 + font-family: inherit;
  384 + font-size: 12px;
  385 + padding: 4px 10px;
  386 + border: 1px solid #d35400;
  387 + border-radius: 4px;
  388 + background: #fff8f3;
  389 + color: #d35400;
  390 + cursor: pointer;
  391 +}
  392 +
  393 +.product-debug-btn-api-result:hover {
  394 + background: #fdebd0;
  395 +}
  396 +
374 397 .product-debug-link {
375 398 display: inline-block;
376   - margin-top: 6px;
377 399 font-size: 12px;
378 400 color: #e67e22;
379 401 text-decoration: none;
... ... @@ -383,6 +405,80 @@ body {
383 405 text-decoration: underline;
384 406 }
385 407  
  408 +.api-result-viewer-backdrop {
  409 + display: none;
  410 + position: fixed;
  411 + inset: 0;
  412 + z-index: 2000;
  413 + background: rgba(0, 0, 0, 0.45);
  414 + align-items: center;
  415 + justify-content: center;
  416 + padding: 24px;
  417 + box-sizing: border-box;
  418 +}
  419 +
  420 +.api-result-viewer-panel {
  421 + background: #fff;
  422 + border-radius: 8px;
  423 + max-width: min(920px, 100%);
  424 + max-height: min(85vh, 100%);
  425 + display: flex;
  426 + flex-direction: column;
  427 + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
  428 + overflow: hidden;
  429 +}
  430 +
  431 +.api-result-viewer-header {
  432 + display: flex;
  433 + align-items: center;
  434 + justify-content: space-between;
  435 + gap: 12px;
  436 + padding: 12px 14px;
  437 + border-bottom: 1px solid #eee;
  438 + flex-shrink: 0;
  439 +}
  440 +
  441 +.api-result-viewer-title {
  442 + font-size: 14px;
  443 + font-weight: 600;
  444 + color: #333;
  445 +}
  446 +
  447 +.api-result-viewer-title code {
  448 + font-size: 12px;
  449 + font-weight: normal;
  450 + background: #f4f4f4;
  451 + padding: 1px 6px;
  452 + border-radius: 3px;
  453 +}
  454 +
  455 +.api-result-viewer-close {
  456 + border: none;
  457 + background: transparent;
  458 + font-size: 22px;
  459 + line-height: 1;
  460 + color: #888;
  461 + cursor: pointer;
  462 + padding: 0 4px;
  463 +}
  464 +
  465 +.api-result-viewer-close:hover {
  466 + color: #333;
  467 +}
  468 +
  469 +.api-result-viewer-pre {
  470 + margin: 0;
  471 + padding: 14px;
  472 + overflow: auto;
  473 + flex: 1;
  474 + font-size: 12px;
  475 + line-height: 1.45;
  476 + background: #fafafa;
  477 + border: none;
  478 + font-family: Menlo, Consolas, "Courier New", monospace;
  479 + white-space: pre;
  480 +}
  481 +
386 482 .product-card:hover {
387 483 box-shadow: 0 4px 12px rgba(0,0,0,0.1);
388 484 transform: translateY(-2px);
... ...
frontend/static/js/app.js
... ... @@ -63,11 +63,81 @@ let state = {
63 63 debug: true // Always enable debug mode for test frontend
64 64 };
65 65  
  66 +// 弹层:展示 /search/ 返回的 results[] 单条元素(非 ES 原始文档)
  67 +function openApiResultViewer(item) {
  68 + let backdrop = document.getElementById('apiResultViewerBackdrop');
  69 + if (!backdrop) {
  70 + backdrop = document.createElement('div');
  71 + backdrop.id = 'apiResultViewerBackdrop';
  72 + backdrop.className = 'api-result-viewer-backdrop';
  73 + backdrop.innerHTML = `
  74 + <div class="api-result-viewer-panel" role="dialog" aria-modal="true" aria-labelledby="apiResultViewerTitle">
  75 + <div class="api-result-viewer-header">
  76 + <span id="apiResultViewerTitle" class="api-result-viewer-title">搜索结果项(API <code>results[]</code>)</span>
  77 + <button type="button" class="api-result-viewer-close" aria-label="关闭">&times;</button>
  78 + </div>
  79 + <pre class="api-result-viewer-pre"></pre>
  80 + </div>
  81 + `;
  82 + document.body.appendChild(backdrop);
  83 + backdrop.addEventListener('click', (e) => {
  84 + if (e.target === backdrop) {
  85 + closeApiResultViewer();
  86 + }
  87 + });
  88 + backdrop.querySelector('.api-result-viewer-close').addEventListener('click', closeApiResultViewer);
  89 + document.addEventListener('keydown', (e) => {
  90 + if (e.key === 'Escape') {
  91 + closeApiResultViewer();
  92 + }
  93 + });
  94 + }
  95 + const pre = backdrop.querySelector('.api-result-viewer-pre');
  96 + try {
  97 + pre.textContent = JSON.stringify(item, null, 2);
  98 + } catch (err) {
  99 + pre.textContent = String(item);
  100 + }
  101 + backdrop.style.display = 'flex';
  102 +}
  103 +
  104 +function closeApiResultViewer() {
  105 + const backdrop = document.getElementById('apiResultViewerBackdrop');
  106 + if (backdrop) {
  107 + backdrop.style.display = 'none';
  108 + }
  109 +}
  110 +
  111 +function initProductGridResultViewer() {
  112 + const grid = document.getElementById('productGrid');
  113 + if (!grid || grid.dataset.apiResultViewerBound === '1') {
  114 + return;
  115 + }
  116 + grid.dataset.apiResultViewerBound = '1';
  117 + grid.addEventListener('click', (e) => {
  118 + const btn = e.target.closest('.product-debug-btn-api-result');
  119 + if (!btn) {
  120 + return;
  121 + }
  122 + e.preventDefault();
  123 + const idx = parseInt(btn.getAttribute('data-result-index'), 10);
  124 + if (Number.isNaN(idx)) {
  125 + return;
  126 + }
  127 + const results = state.lastSearchData && state.lastSearchData.results;
  128 + if (!results || idx < 0 || idx >= results.length) {
  129 + return;
  130 + }
  131 + openApiResultViewer(results[idx]);
  132 + });
  133 +}
  134 +
66 135 // Initialize
67 136 function initializeApp() {
68 137 // 初始化租户下拉框和分面面板
69 138 console.log('Initializing app...');
70 139 initTenantSelect();
  140 + initProductGridResultViewer();
71 141 const searchInput = document.getElementById('searchInput');
72 142 if (searchInput) {
73 143 searchInput.focus();
... ... @@ -350,7 +420,7 @@ function displayResults(data) {
350 420  
351 421 const tenantId = getTenantId();
352 422  
353   - data.results.forEach((result) => {
  423 + data.results.forEach((result, resultIndex) => {
354 424 const product = result;
355 425 const title = product.title || product.name || 'N/A';
356 426 const price = product.min_price || product.price || 'N/A';
... ... @@ -401,9 +471,14 @@ function displayResults(data) {
401 471 <div class="product-debug-line">Rerank score: ${rerankScore}</div>
402 472 <div class="product-debug-line">Fused score: ${fusedScore}</div>
403 473 ${titleLines}
404   - <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
405   - 查看 ES 原始文档
406   - </a>
  474 + <div class="product-debug-actions">
  475 + <button type="button" class="product-debug-btn-api-result" data-result-index="${resultIndex}">
  476 + 查看 API 结果项
  477 + </button>
  478 + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
  479 + 查看 ES 原始文档
  480 + </a>
  481 + </div>
407 482 </div>
408 483 `;
409 484 }
... ...
search/searcher.py
... ... @@ -4,12 +4,13 @@ Main Searcher module - executes search queries against Elasticsearch.
4 4 Handles query parsing, ranking, and result formatting.
5 5 """
6 6  
7   -from typing import Dict, Any, List, Optional, Union
  7 +from typing import Dict, Any, List, Optional, Union, Tuple
8 8 import os
9 9 import time, json
10 10 import logging
11 11 import hashlib
12 12 from string import Formatter
  13 +import numpy as np
13 14  
14 15 from utils.es_client import ESClient
15 16 from query import QueryParser, ParsedQuery
... ... @@ -224,6 +225,231 @@ class Searcher:
224 225 hits_by_id[str(hid)] = hit
225 226 return hits_by_id, int(resp.get("took", 0) or 0)
226 227  
  228 + @staticmethod
  229 + def _normalize_sku_match_text(value: Optional[str]) -> str:
  230 + """Normalize free text for lightweight SKU option matching."""
  231 + if value is None:
  232 + return ""
  233 + return " ".join(str(value).strip().casefold().split())
  234 +
  235 + def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]:
  236 + """Collect original and translated query texts for SKU option matching."""
  237 + candidates: List[str] = []
  238 + for text in (
  239 + getattr(parsed_query, "original_query", None),
  240 + getattr(parsed_query, "query_normalized", None),
  241 + getattr(parsed_query, "rewritten_query", None),
  242 + ):
  243 + normalized = self._normalize_sku_match_text(text)
  244 + if normalized:
  245 + candidates.append(normalized)
  246 +
  247 + query_text_by_lang = getattr(parsed_query, "query_text_by_lang", {}) or {}
  248 + if isinstance(query_text_by_lang, dict):
  249 + for text in query_text_by_lang.values():
  250 + normalized = self._normalize_sku_match_text(text)
  251 + if normalized:
  252 + candidates.append(normalized)
  253 +
  254 + translations = getattr(parsed_query, "translations", {}) or {}
  255 + if isinstance(translations, dict):
  256 + for text in translations.values():
  257 + normalized = self._normalize_sku_match_text(text)
  258 + if normalized:
  259 + candidates.append(normalized)
  260 +
  261 + deduped: List[str] = []
  262 + seen = set()
  263 + for text in candidates:
  264 + if text in seen:
  265 + continue
  266 + seen.add(text)
  267 + deduped.append(text)
  268 + return deduped
  269 +
  270 + def _find_query_matching_sku_index(
  271 + self,
  272 + skus: List[Dict[str, Any]],
  273 + query_texts: List[str],
  274 + ) -> Optional[int]:
  275 + """Return the first SKU whose option1_value appears in query texts."""
  276 + if not skus or not query_texts:
  277 + return None
  278 +
  279 + for index, sku in enumerate(skus):
  280 + option1_value = self._normalize_sku_match_text(sku.get("option1_value"))
  281 + if not option1_value:
  282 + continue
  283 + if any(option1_value in query_text for query_text in query_texts):
  284 + return index
  285 + return None
  286 +
  287 + def _encode_query_vector_for_sku_matching(
  288 + self,
  289 + parsed_query: ParsedQuery,
  290 + context: Optional[RequestContext] = None,
  291 + ) -> Optional[np.ndarray]:
  292 + """Best-effort fallback query embedding for final-page SKU matching."""
  293 + query_text = (
  294 + getattr(parsed_query, "rewritten_query", None)
  295 + or getattr(parsed_query, "query_normalized", None)
  296 + or getattr(parsed_query, "original_query", None)
  297 + )
  298 + if not query_text:
  299 + return None
  300 +
  301 + text_encoder = getattr(self.query_parser, "text_encoder", None)
  302 + if text_encoder is None:
  303 + return None
  304 +
  305 + try:
  306 + vectors = text_encoder.encode([query_text], priority=1)
  307 + except Exception as exc:
  308 + logger.warning("Failed to encode query vector for SKU matching: %s", exc, exc_info=True)
  309 + if context is not None:
  310 + context.add_warning(f"SKU query embedding failed: {exc}")
  311 + return None
  312 +
  313 + if vectors is None or len(vectors) == 0:
  314 + return None
  315 +
  316 + vector = vectors[0]
  317 + if vector is None:
  318 + return None
  319 + return np.asarray(vector, dtype=np.float32)
  320 +
  321 + def _select_sku_by_embedding(
  322 + self,
  323 + skus: List[Dict[str, Any]],
  324 + option1_vectors: Dict[str, np.ndarray],
  325 + query_vector: np.ndarray,
  326 + ) -> Tuple[Optional[int], Optional[float]]:
  327 + """Select the SKU whose option1_value is most similar to the query."""
  328 + best_index: Optional[int] = None
  329 + best_score: Optional[float] = None
  330 +
  331 + for index, sku in enumerate(skus):
  332 + option1_value_raw = sku.get("option1_value")
  333 + if option1_value_raw is None:
  334 + continue
  335 + option1_value = str(option1_value_raw).strip()
  336 + if not option1_value:
  337 + continue
  338 + option_vector = option1_vectors.get(option1_value)
  339 + if option_vector is None:
  340 + continue
  341 + score = float(np.inner(query_vector, option_vector))
  342 + if best_score is None or score > best_score:
  343 + best_index = index
  344 + best_score = score
  345 +
  346 + return best_index, best_score
  347 +
  348 + @staticmethod
  349 + def _promote_matching_sku(source: Dict[str, Any], match_index: int) -> Optional[Dict[str, Any]]:
  350 + """Move the matched SKU to the front and swap the SPU image."""
  351 + skus = source.get("skus")
  352 + if not isinstance(skus, list) or match_index < 0 or match_index >= len(skus):
  353 + return None
  354 +
  355 + matched_sku = skus.pop(match_index)
  356 + skus.insert(0, matched_sku)
  357 +
  358 + image_src = matched_sku.get("image_src") or matched_sku.get("imageSrc")
  359 + if image_src:
  360 + source["image_url"] = image_src
  361 + return matched_sku
  362 +
  363 + def _apply_sku_sorting_for_page_hits(
  364 + self,
  365 + es_hits: List[Dict[str, Any]],
  366 + parsed_query: ParsedQuery,
  367 + context: Optional[RequestContext] = None,
  368 + ) -> None:
  369 + """Sort each page hit's SKUs so the best-matching SKU is first."""
  370 + if not es_hits:
  371 + return
  372 +
  373 + query_texts = self._build_sku_query_texts(parsed_query)
  374 + unmatched_hits: List[Dict[str, Any]] = []
  375 + option1_values_to_encode: List[str] = []
  376 + seen_option1_values = set()
  377 + text_matched = 0
  378 + embedding_matched = 0
  379 +
  380 + for hit in es_hits:
  381 + source = hit.get("_source")
  382 + if not isinstance(source, dict):
  383 + continue
  384 + skus = source.get("skus")
  385 + if not isinstance(skus, list) or not skus:
  386 + continue
  387 +
  388 + match_index = self._find_query_matching_sku_index(skus, query_texts)
  389 + if match_index is not None:
  390 + self._promote_matching_sku(source, match_index)
  391 + text_matched += 1
  392 + continue
  393 +
  394 + unmatched_hits.append(hit)
  395 + for sku in skus:
  396 + option1_value_raw = sku.get("option1_value")
  397 + if option1_value_raw is None:
  398 + continue
  399 + option1_value = str(option1_value_raw).strip()
  400 + if not option1_value or option1_value in seen_option1_values:
  401 + continue
  402 + seen_option1_values.add(option1_value)
  403 + option1_values_to_encode.append(option1_value)
  404 +
  405 + if not unmatched_hits or not option1_values_to_encode:
  406 + return
  407 +
  408 + query_vector = getattr(parsed_query, "query_vector", None)
  409 + if query_vector is None:
  410 + query_vector = self._encode_query_vector_for_sku_matching(parsed_query, context=context)
  411 + if query_vector is None:
  412 + return
  413 +
  414 + text_encoder = getattr(self.query_parser, "text_encoder", None)
  415 + if text_encoder is None:
  416 + return
  417 +
  418 + try:
  419 + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1)
  420 + except Exception as exc:
  421 + logger.warning("Failed to encode SKU option1 values for final-page sorting: %s", exc, exc_info=True)
  422 + if context is not None:
  423 + context.add_warning(f"SKU option embedding failed: {exc}")
  424 + return
  425 +
  426 + option1_vectors: Dict[str, np.ndarray] = {}
  427 + for option1_value, vector in zip(option1_values_to_encode, encoded_option_vectors):
  428 + if vector is None:
  429 + continue
  430 + option1_vectors[option1_value] = np.asarray(vector, dtype=np.float32)
  431 +
  432 + query_vector_array = np.asarray(query_vector, dtype=np.float32)
  433 + for hit in unmatched_hits:
  434 + source = hit.get("_source")
  435 + if not isinstance(source, dict):
  436 + continue
  437 + skus = source.get("skus")
  438 + if not isinstance(skus, list) or not skus:
  439 + continue
  440 + match_index, _ = self._select_sku_by_embedding(skus, option1_vectors, query_vector_array)
  441 + if match_index is None:
  442 + continue
  443 + self._promote_matching_sku(source, match_index)
  444 + embedding_matched += 1
  445 +
  446 + if text_matched or embedding_matched:
  447 + logger.info(
  448 + "Final-page SKU sorting completed | text_matched=%s | embedding_matched=%s",
  449 + text_matched,
  450 + embedding_matched,
  451 + )
  452 +
227 453 def search(
228 454 self,
229 455 query: str,
... ... @@ -622,6 +848,8 @@ class Searcher:
622 848 continue
623 849 rerank_debug_by_doc[str(doc_id)] = item
624 850  
  851 + self._apply_sku_sorting_for_page_hits(es_hits, parsed_query, context=context)
  852 +
625 853 # Format results using ResultFormatter
626 854 formatted_results = ResultFormatter.format_search_results(
627 855 es_hits,
... ...
tests/test_search_rerank_window.py
... ... @@ -5,6 +5,7 @@ from pathlib import Path
5 5 from types import SimpleNamespace
6 6 from typing import Any, Dict, List
7 7  
  8 +import numpy as np
8 9 import yaml
9 10  
10 11 from config import (
... ... @@ -157,9 +158,7 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38
157 158 rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window),
158 159 spu_config=SPUConfig(enabled=False),
159 160 es_index_name="test_products",
160   - tenant_config={},
161 161 es_settings={},
162   - services={},
163 162 )
164 163  
165 164  
... ... @@ -173,6 +172,19 @@ def _build_searcher(config: SearchConfig, es_client: _FakeESClient) -&gt; Searcher:
173 172 return searcher
174 173  
175 174  
  175 +class _FakeTextEncoder:
  176 + def __init__(self, vectors: Dict[str, List[float]]):
  177 + self.vectors = {
  178 + key: np.array(value, dtype=np.float32)
  179 + for key, value in vectors.items()
  180 + }
  181 +
  182 + def encode(self, sentences, priority: int = 0, **kwargs):
  183 + if isinstance(sentences, str):
  184 + sentences = [sentences]
  185 + return np.array([self.vectors[text] for text in sentences], dtype=object)
  186 +
  187 +
176 188 def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path):
177 189 config_data = {
178 190 "es_index_name": "test_products",
... ... @@ -327,3 +339,116 @@ def test_searcher_skips_rerank_when_page_exceeds_window(monkeypatch):
327 339 assert es_client.calls[0]["size"] == 10
328 340 assert es_client.calls[0]["include_named_queries_score"] is False
329 341 assert len(es_client.calls) == 1
  342 +
  343 +
  344 +def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch):
  345 + es_client = _FakeESClient(total_hits=1)
  346 + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
  347 + context = create_request_context(reqid="sku-text", uid="u-sku-text")
  348 +
  349 + monkeypatch.setattr(
  350 + "search.searcher.get_tenant_config_loader",
  351 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
  352 + )
  353 +
  354 + class _TranslatedQueryParser:
  355 + text_encoder = None
  356 +
  357 + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any):
  358 + return _FakeParsedQuery(
  359 + original_query=query,
  360 + query_normalized=query,
  361 + rewritten_query=query,
  362 + translations={"en": "black dress"},
  363 + )
  364 +
  365 + searcher.query_parser = _TranslatedQueryParser()
  366 +
  367 + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]:
  368 + return {
  369 + "spu_id": doc_id,
  370 + "title": {"en": f"product-{doc_id}"},
  371 + "brief": {"en": f"brief-{doc_id}"},
  372 + "vendor": {"en": f"vendor-{doc_id}"},
  373 + "image_url": "https://img/default.jpg",
  374 + "skus": [
  375 + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
  376 + {"sku_id": "sku-black", "option1_value": "Black", "image_src": "https://img/black.jpg"},
  377 + ],
  378 + }
  379 +
  380 + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus))
  381 +
  382 + result = searcher.search(
  383 + query="黑色 连衣裙",
  384 + tenant_id="162",
  385 + from_=0,
  386 + size=1,
  387 + context=context,
  388 + enable_rerank=False,
  389 + )
  390 +
  391 + assert len(result.results) == 1
  392 + assert result.results[0].skus[0].sku_id == "sku-black"
  393 + assert result.results[0].image_url == "https://img/black.jpg"
  394 +
  395 +
  396 +def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_match(monkeypatch):
  397 + es_client = _FakeESClient(total_hits=1)
  398 + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client)
  399 + context = create_request_context(reqid="sku-embed", uid="u-sku-embed")
  400 +
  401 + monkeypatch.setattr(
  402 + "search.searcher.get_tenant_config_loader",
  403 + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}),
  404 + )
  405 +
  406 + encoder = _FakeTextEncoder(
  407 + {
  408 + "linen summer dress": [0.8, 0.2],
  409 + "Red": [1.0, 0.0],
  410 + "Blue": [0.0, 1.0],
  411 + }
  412 + )
  413 +
  414 + class _EmbeddingQueryParser:
  415 + text_encoder = encoder
  416 +
  417 + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any):
  418 + return _FakeParsedQuery(
  419 + original_query=query,
  420 + query_normalized=query,
  421 + rewritten_query=query,
  422 + translations={},
  423 + query_vector=np.array([0.0, 1.0], dtype=np.float32),
  424 + )
  425 +
  426 + searcher.query_parser = _EmbeddingQueryParser()
  427 +
  428 + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]:
  429 + return {
  430 + "spu_id": doc_id,
  431 + "title": {"en": f"product-{doc_id}"},
  432 + "brief": {"en": f"brief-{doc_id}"},
  433 + "vendor": {"en": f"vendor-{doc_id}"},
  434 + "image_url": "https://img/default.jpg",
  435 + "skus": [
  436 + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
  437 + {"sku_id": "sku-blue", "option1_value": "Blue", "image_src": "https://img/blue.jpg"},
  438 + ],
  439 + }
  440 +
  441 + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus))
  442 +
  443 + result = searcher.search(
  444 + query="linen summer dress",
  445 + tenant_id="162",
  446 + from_=0,
  447 + size=1,
  448 + context=context,
  449 + enable_rerank=False,
  450 + )
  451 +
  452 + assert len(result.results) == 1
  453 + assert result.results[0].skus[0].sku_id == "sku-blue"
  454 + assert result.results[0].image_url == "https://img/blue.jpg"
... ...