Commit deccd68adcbb038ef82062bfe047f5d560a4bd53
1 parent
e874eb50
Added the SKU pre-selection step in search/searcher.py right before
ResultFormatter.format_search_results() runs. What changed: For each final paginated SPU hit, the searcher now scans skus[].option1_value against the query text set built from the original query, normalized query, rewritten query, and translations. If no option1_value matches textually, it falls back to embedding similarity and picks the SKU with the highest inner product against the query embedding. The matched SKU is promoted to the front of the SPU’s skus list. The SPU-level image_url is replaced with that matched SKU’s image_src. I left api/result_formatter.py unchanged because it already preserves the SKU order and reads image_url from _source; updating the page hits in searcher makes the formatter return the desired result automatically. Verification: ReadLints on the edited files: no errors Passed targeted tests: pytest tests/test_search_rerank_window.py -k "translated_query or no_direct_option_match"
Showing
6 changed files
with
550 additions
and
9 deletions
Show diff stats
| ... | ... | @@ -0,0 +1,17 @@ |
| 1 | +docs | |
| 2 | +# Please enter the commit message for your changes. Lines starting | |
| 3 | +# with '#' will be ignored, and an empty message aborts the commit. | |
| 4 | +# | |
| 5 | +# On branch master | |
| 6 | +# Your branch is ahead of 'origin/master' by 5 commits. | |
| 7 | +# (use "git push" to publish your local commits) | |
| 8 | +# | |
| 9 | +# Changes to be committed: | |
| 10 | +# modified: config/config.yaml | |
| 11 | +# modified: docs/TODO.txt | |
| 12 | +# modified: "docs/\346\220\234\347\264\242API\345\257\271\346\216\245\346\214\207\345\215\227-07-\345\276\256\346\234\215\345\212\241\346\216\245\345\217\243\357\274\210Embedding-Reranker-Translation\357\274\211.md" | |
| 13 | +# modified: "docs/\347\233\270\345\205\263\346\200\247\346\243\200\347\264\242\344\274\230\345\214\226\350\257\264\346\230\216.md" | |
| 14 | +# | |
| 15 | +# Changes not staged for commit: | |
| 16 | +# modified: third-party/clip-as-service (untracked content) | |
| 17 | +# | ... | ... |
frontend/index.html
| ... | ... | @@ -199,7 +199,7 @@ |
| 199 | 199 | </footer> |
| 200 | 200 | |
| 201 | 201 | <script src="/static/js/tenant_facets_config.js?v=1.4"></script> |
| 202 | - <script src="/static/js/app.js?v=1.0"></script> | |
| 202 | + <script src="/static/js/app.js?v=1.1"></script> | |
| 203 | 203 | <script> |
| 204 | 204 | // 自动补全功能(使用后端 /search/suggestions 接口) |
| 205 | 205 | const SUGGEST_API = API_BASE_URL + '/search/suggestions'; | ... | ... |
frontend/static/css/style.css
| ... | ... | @@ -371,9 +371,31 @@ body { |
| 371 | 371 | margin-bottom: 2px; |
| 372 | 372 | } |
| 373 | 373 | |
| 374 | +.product-debug-actions { | |
| 375 | + display: flex; | |
| 376 | + flex-wrap: wrap; | |
| 377 | + align-items: center; | |
| 378 | + gap: 10px; | |
| 379 | + margin-top: 8px; | |
| 380 | +} | |
| 381 | + | |
| 382 | +.product-debug-btn-api-result { | |
| 383 | + font-family: inherit; | |
| 384 | + font-size: 12px; | |
| 385 | + padding: 4px 10px; | |
| 386 | + border: 1px solid #d35400; | |
| 387 | + border-radius: 4px; | |
| 388 | + background: #fff8f3; | |
| 389 | + color: #d35400; | |
| 390 | + cursor: pointer; | |
| 391 | +} | |
| 392 | + | |
| 393 | +.product-debug-btn-api-result:hover { | |
| 394 | + background: #fdebd0; | |
| 395 | +} | |
| 396 | + | |
| 374 | 397 | .product-debug-link { |
| 375 | 398 | display: inline-block; |
| 376 | - margin-top: 6px; | |
| 377 | 399 | font-size: 12px; |
| 378 | 400 | color: #e67e22; |
| 379 | 401 | text-decoration: none; |
| ... | ... | @@ -383,6 +405,80 @@ body { |
| 383 | 405 | text-decoration: underline; |
| 384 | 406 | } |
| 385 | 407 | |
| 408 | +.api-result-viewer-backdrop { | |
| 409 | + display: none; | |
| 410 | + position: fixed; | |
| 411 | + inset: 0; | |
| 412 | + z-index: 2000; | |
| 413 | + background: rgba(0, 0, 0, 0.45); | |
| 414 | + align-items: center; | |
| 415 | + justify-content: center; | |
| 416 | + padding: 24px; | |
| 417 | + box-sizing: border-box; | |
| 418 | +} | |
| 419 | + | |
| 420 | +.api-result-viewer-panel { | |
| 421 | + background: #fff; | |
| 422 | + border-radius: 8px; | |
| 423 | + max-width: min(920px, 100%); | |
| 424 | + max-height: min(85vh, 100%); | |
| 425 | + display: flex; | |
| 426 | + flex-direction: column; | |
| 427 | + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2); | |
| 428 | + overflow: hidden; | |
| 429 | +} | |
| 430 | + | |
| 431 | +.api-result-viewer-header { | |
| 432 | + display: flex; | |
| 433 | + align-items: center; | |
| 434 | + justify-content: space-between; | |
| 435 | + gap: 12px; | |
| 436 | + padding: 12px 14px; | |
| 437 | + border-bottom: 1px solid #eee; | |
| 438 | + flex-shrink: 0; | |
| 439 | +} | |
| 440 | + | |
| 441 | +.api-result-viewer-title { | |
| 442 | + font-size: 14px; | |
| 443 | + font-weight: 600; | |
| 444 | + color: #333; | |
| 445 | +} | |
| 446 | + | |
| 447 | +.api-result-viewer-title code { | |
| 448 | + font-size: 12px; | |
| 449 | + font-weight: normal; | |
| 450 | + background: #f4f4f4; | |
| 451 | + padding: 1px 6px; | |
| 452 | + border-radius: 3px; | |
| 453 | +} | |
| 454 | + | |
| 455 | +.api-result-viewer-close { | |
| 456 | + border: none; | |
| 457 | + background: transparent; | |
| 458 | + font-size: 22px; | |
| 459 | + line-height: 1; | |
| 460 | + color: #888; | |
| 461 | + cursor: pointer; | |
| 462 | + padding: 0 4px; | |
| 463 | +} | |
| 464 | + | |
| 465 | +.api-result-viewer-close:hover { | |
| 466 | + color: #333; | |
| 467 | +} | |
| 468 | + | |
| 469 | +.api-result-viewer-pre { | |
| 470 | + margin: 0; | |
| 471 | + padding: 14px; | |
| 472 | + overflow: auto; | |
| 473 | + flex: 1; | |
| 474 | + font-size: 12px; | |
| 475 | + line-height: 1.45; | |
| 476 | + background: #fafafa; | |
| 477 | + border: none; | |
| 478 | + font-family: Menlo, Consolas, "Courier New", monospace; | |
| 479 | + white-space: pre; | |
| 480 | +} | |
| 481 | + | |
| 386 | 482 | .product-card:hover { |
| 387 | 483 | box-shadow: 0 4px 12px rgba(0,0,0,0.1); |
| 388 | 484 | transform: translateY(-2px); | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -63,11 +63,81 @@ let state = { |
| 63 | 63 | debug: true // Always enable debug mode for test frontend |
| 64 | 64 | }; |
| 65 | 65 | |
| 66 | +// 弹层:展示 /search/ 返回的 results[] 单条元素(非 ES 原始文档) | |
| 67 | +function openApiResultViewer(item) { | |
| 68 | + let backdrop = document.getElementById('apiResultViewerBackdrop'); | |
| 69 | + if (!backdrop) { | |
| 70 | + backdrop = document.createElement('div'); | |
| 71 | + backdrop.id = 'apiResultViewerBackdrop'; | |
| 72 | + backdrop.className = 'api-result-viewer-backdrop'; | |
| 73 | + backdrop.innerHTML = ` | |
| 74 | + <div class="api-result-viewer-panel" role="dialog" aria-modal="true" aria-labelledby="apiResultViewerTitle"> | |
| 75 | + <div class="api-result-viewer-header"> | |
| 76 | + <span id="apiResultViewerTitle" class="api-result-viewer-title">搜索结果项(API <code>results[]</code>)</span> | |
| 77 | + <button type="button" class="api-result-viewer-close" aria-label="关闭">×</button> | |
| 78 | + </div> | |
| 79 | + <pre class="api-result-viewer-pre"></pre> | |
| 80 | + </div> | |
| 81 | + `; | |
| 82 | + document.body.appendChild(backdrop); | |
| 83 | + backdrop.addEventListener('click', (e) => { | |
| 84 | + if (e.target === backdrop) { | |
| 85 | + closeApiResultViewer(); | |
| 86 | + } | |
| 87 | + }); | |
| 88 | + backdrop.querySelector('.api-result-viewer-close').addEventListener('click', closeApiResultViewer); | |
| 89 | + document.addEventListener('keydown', (e) => { | |
| 90 | + if (e.key === 'Escape') { | |
| 91 | + closeApiResultViewer(); | |
| 92 | + } | |
| 93 | + }); | |
| 94 | + } | |
| 95 | + const pre = backdrop.querySelector('.api-result-viewer-pre'); | |
| 96 | + try { | |
| 97 | + pre.textContent = JSON.stringify(item, null, 2); | |
| 98 | + } catch (err) { | |
| 99 | + pre.textContent = String(item); | |
| 100 | + } | |
| 101 | + backdrop.style.display = 'flex'; | |
| 102 | +} | |
| 103 | + | |
| 104 | +function closeApiResultViewer() { | |
| 105 | + const backdrop = document.getElementById('apiResultViewerBackdrop'); | |
| 106 | + if (backdrop) { | |
| 107 | + backdrop.style.display = 'none'; | |
| 108 | + } | |
| 109 | +} | |
| 110 | + | |
| 111 | +function initProductGridResultViewer() { | |
| 112 | + const grid = document.getElementById('productGrid'); | |
| 113 | + if (!grid || grid.dataset.apiResultViewerBound === '1') { | |
| 114 | + return; | |
| 115 | + } | |
| 116 | + grid.dataset.apiResultViewerBound = '1'; | |
| 117 | + grid.addEventListener('click', (e) => { | |
| 118 | + const btn = e.target.closest('.product-debug-btn-api-result'); | |
| 119 | + if (!btn) { | |
| 120 | + return; | |
| 121 | + } | |
| 122 | + e.preventDefault(); | |
| 123 | + const idx = parseInt(btn.getAttribute('data-result-index'), 10); | |
| 124 | + if (Number.isNaN(idx)) { | |
| 125 | + return; | |
| 126 | + } | |
| 127 | + const results = state.lastSearchData && state.lastSearchData.results; | |
| 128 | + if (!results || idx < 0 || idx >= results.length) { | |
| 129 | + return; | |
| 130 | + } | |
| 131 | + openApiResultViewer(results[idx]); | |
| 132 | + }); | |
| 133 | +} | |
| 134 | + | |
| 66 | 135 | // Initialize |
| 67 | 136 | function initializeApp() { |
| 68 | 137 | // 初始化租户下拉框和分面面板 |
| 69 | 138 | console.log('Initializing app...'); |
| 70 | 139 | initTenantSelect(); |
| 140 | + initProductGridResultViewer(); | |
| 71 | 141 | const searchInput = document.getElementById('searchInput'); |
| 72 | 142 | if (searchInput) { |
| 73 | 143 | searchInput.focus(); |
| ... | ... | @@ -350,7 +420,7 @@ function displayResults(data) { |
| 350 | 420 | |
| 351 | 421 | const tenantId = getTenantId(); |
| 352 | 422 | |
| 353 | - data.results.forEach((result) => { | |
| 423 | + data.results.forEach((result, resultIndex) => { | |
| 354 | 424 | const product = result; |
| 355 | 425 | const title = product.title || product.name || 'N/A'; |
| 356 | 426 | const price = product.min_price || product.price || 'N/A'; |
| ... | ... | @@ -401,9 +471,14 @@ function displayResults(data) { |
| 401 | 471 | <div class="product-debug-line">Rerank score: ${rerankScore}</div> |
| 402 | 472 | <div class="product-debug-line">Fused score: ${fusedScore}</div> |
| 403 | 473 | ${titleLines} |
| 404 | - <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | |
| 405 | - 查看 ES 原始文档 | |
| 406 | - </a> | |
| 474 | + <div class="product-debug-actions"> | |
| 475 | + <button type="button" class="product-debug-btn-api-result" data-result-index="${resultIndex}"> | |
| 476 | + 查看 API 结果项 | |
| 477 | + </button> | |
| 478 | + <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | |
| 479 | + 查看 ES 原始文档 | |
| 480 | + </a> | |
| 481 | + </div> | |
| 407 | 482 | </div> |
| 408 | 483 | `; |
| 409 | 484 | } | ... | ... |
search/searcher.py
| ... | ... | @@ -4,12 +4,13 @@ Main Searcher module - executes search queries against Elasticsearch. |
| 4 | 4 | Handles query parsing, ranking, and result formatting. |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | -from typing import Dict, Any, List, Optional, Union | |
| 7 | +from typing import Dict, Any, List, Optional, Union, Tuple | |
| 8 | 8 | import os |
| 9 | 9 | import time, json |
| 10 | 10 | import logging |
| 11 | 11 | import hashlib |
| 12 | 12 | from string import Formatter |
| 13 | +import numpy as np | |
| 13 | 14 | |
| 14 | 15 | from utils.es_client import ESClient |
| 15 | 16 | from query import QueryParser, ParsedQuery |
| ... | ... | @@ -224,6 +225,231 @@ class Searcher: |
| 224 | 225 | hits_by_id[str(hid)] = hit |
| 225 | 226 | return hits_by_id, int(resp.get("took", 0) or 0) |
| 226 | 227 | |
| 228 | + @staticmethod | |
| 229 | + def _normalize_sku_match_text(value: Optional[str]) -> str: | |
| 230 | + """Normalize free text for lightweight SKU option matching.""" | |
| 231 | + if value is None: | |
| 232 | + return "" | |
| 233 | + return " ".join(str(value).strip().casefold().split()) | |
| 234 | + | |
| 235 | + def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]: | |
| 236 | + """Collect original and translated query texts for SKU option matching.""" | |
| 237 | + candidates: List[str] = [] | |
| 238 | + for text in ( | |
| 239 | + getattr(parsed_query, "original_query", None), | |
| 240 | + getattr(parsed_query, "query_normalized", None), | |
| 241 | + getattr(parsed_query, "rewritten_query", None), | |
| 242 | + ): | |
| 243 | + normalized = self._normalize_sku_match_text(text) | |
| 244 | + if normalized: | |
| 245 | + candidates.append(normalized) | |
| 246 | + | |
| 247 | + query_text_by_lang = getattr(parsed_query, "query_text_by_lang", {}) or {} | |
| 248 | + if isinstance(query_text_by_lang, dict): | |
| 249 | + for text in query_text_by_lang.values(): | |
| 250 | + normalized = self._normalize_sku_match_text(text) | |
| 251 | + if normalized: | |
| 252 | + candidates.append(normalized) | |
| 253 | + | |
| 254 | + translations = getattr(parsed_query, "translations", {}) or {} | |
| 255 | + if isinstance(translations, dict): | |
| 256 | + for text in translations.values(): | |
| 257 | + normalized = self._normalize_sku_match_text(text) | |
| 258 | + if normalized: | |
| 259 | + candidates.append(normalized) | |
| 260 | + | |
| 261 | + deduped: List[str] = [] | |
| 262 | + seen = set() | |
| 263 | + for text in candidates: | |
| 264 | + if text in seen: | |
| 265 | + continue | |
| 266 | + seen.add(text) | |
| 267 | + deduped.append(text) | |
| 268 | + return deduped | |
| 269 | + | |
| 270 | + def _find_query_matching_sku_index( | |
| 271 | + self, | |
| 272 | + skus: List[Dict[str, Any]], | |
| 273 | + query_texts: List[str], | |
| 274 | + ) -> Optional[int]: | |
| 275 | + """Return the first SKU whose option1_value appears in query texts.""" | |
| 276 | + if not skus or not query_texts: | |
| 277 | + return None | |
| 278 | + | |
| 279 | + for index, sku in enumerate(skus): | |
| 280 | + option1_value = self._normalize_sku_match_text(sku.get("option1_value")) | |
| 281 | + if not option1_value: | |
| 282 | + continue | |
| 283 | + if any(option1_value in query_text for query_text in query_texts): | |
| 284 | + return index | |
| 285 | + return None | |
| 286 | + | |
| 287 | + def _encode_query_vector_for_sku_matching( | |
| 288 | + self, | |
| 289 | + parsed_query: ParsedQuery, | |
| 290 | + context: Optional[RequestContext] = None, | |
| 291 | + ) -> Optional[np.ndarray]: | |
| 292 | + """Best-effort fallback query embedding for final-page SKU matching.""" | |
| 293 | + query_text = ( | |
| 294 | + getattr(parsed_query, "rewritten_query", None) | |
| 295 | + or getattr(parsed_query, "query_normalized", None) | |
| 296 | + or getattr(parsed_query, "original_query", None) | |
| 297 | + ) | |
| 298 | + if not query_text: | |
| 299 | + return None | |
| 300 | + | |
| 301 | + text_encoder = getattr(self.query_parser, "text_encoder", None) | |
| 302 | + if text_encoder is None: | |
| 303 | + return None | |
| 304 | + | |
| 305 | + try: | |
| 306 | + vectors = text_encoder.encode([query_text], priority=1) | |
| 307 | + except Exception as exc: | |
| 308 | + logger.warning("Failed to encode query vector for SKU matching: %s", exc, exc_info=True) | |
| 309 | + if context is not None: | |
| 310 | + context.add_warning(f"SKU query embedding failed: {exc}") | |
| 311 | + return None | |
| 312 | + | |
| 313 | + if vectors is None or len(vectors) == 0: | |
| 314 | + return None | |
| 315 | + | |
| 316 | + vector = vectors[0] | |
| 317 | + if vector is None: | |
| 318 | + return None | |
| 319 | + return np.asarray(vector, dtype=np.float32) | |
| 320 | + | |
| 321 | + def _select_sku_by_embedding( | |
| 322 | + self, | |
| 323 | + skus: List[Dict[str, Any]], | |
| 324 | + option1_vectors: Dict[str, np.ndarray], | |
| 325 | + query_vector: np.ndarray, | |
| 326 | + ) -> Tuple[Optional[int], Optional[float]]: | |
| 327 | + """Select the SKU whose option1_value is most similar to the query.""" | |
| 328 | + best_index: Optional[int] = None | |
| 329 | + best_score: Optional[float] = None | |
| 330 | + | |
| 331 | + for index, sku in enumerate(skus): | |
| 332 | + option1_value_raw = sku.get("option1_value") | |
| 333 | + if option1_value_raw is None: | |
| 334 | + continue | |
| 335 | + option1_value = str(option1_value_raw).strip() | |
| 336 | + if not option1_value: | |
| 337 | + continue | |
| 338 | + option_vector = option1_vectors.get(option1_value) | |
| 339 | + if option_vector is None: | |
| 340 | + continue | |
| 341 | + score = float(np.inner(query_vector, option_vector)) | |
| 342 | + if best_score is None or score > best_score: | |
| 343 | + best_index = index | |
| 344 | + best_score = score | |
| 345 | + | |
| 346 | + return best_index, best_score | |
| 347 | + | |
| 348 | + @staticmethod | |
| 349 | + def _promote_matching_sku(source: Dict[str, Any], match_index: int) -> Optional[Dict[str, Any]]: | |
| 350 | + """Move the matched SKU to the front and swap the SPU image.""" | |
| 351 | + skus = source.get("skus") | |
| 352 | + if not isinstance(skus, list) or match_index < 0 or match_index >= len(skus): | |
| 353 | + return None | |
| 354 | + | |
| 355 | + matched_sku = skus.pop(match_index) | |
| 356 | + skus.insert(0, matched_sku) | |
| 357 | + | |
| 358 | + image_src = matched_sku.get("image_src") or matched_sku.get("imageSrc") | |
| 359 | + if image_src: | |
| 360 | + source["image_url"] = image_src | |
| 361 | + return matched_sku | |
| 362 | + | |
| 363 | + def _apply_sku_sorting_for_page_hits( | |
| 364 | + self, | |
| 365 | + es_hits: List[Dict[str, Any]], | |
| 366 | + parsed_query: ParsedQuery, | |
| 367 | + context: Optional[RequestContext] = None, | |
| 368 | + ) -> None: | |
| 369 | + """Sort each page hit's SKUs so the best-matching SKU is first.""" | |
| 370 | + if not es_hits: | |
| 371 | + return | |
| 372 | + | |
| 373 | + query_texts = self._build_sku_query_texts(parsed_query) | |
| 374 | + unmatched_hits: List[Dict[str, Any]] = [] | |
| 375 | + option1_values_to_encode: List[str] = [] | |
| 376 | + seen_option1_values = set() | |
| 377 | + text_matched = 0 | |
| 378 | + embedding_matched = 0 | |
| 379 | + | |
| 380 | + for hit in es_hits: | |
| 381 | + source = hit.get("_source") | |
| 382 | + if not isinstance(source, dict): | |
| 383 | + continue | |
| 384 | + skus = source.get("skus") | |
| 385 | + if not isinstance(skus, list) or not skus: | |
| 386 | + continue | |
| 387 | + | |
| 388 | + match_index = self._find_query_matching_sku_index(skus, query_texts) | |
| 389 | + if match_index is not None: | |
| 390 | + self._promote_matching_sku(source, match_index) | |
| 391 | + text_matched += 1 | |
| 392 | + continue | |
| 393 | + | |
| 394 | + unmatched_hits.append(hit) | |
| 395 | + for sku in skus: | |
| 396 | + option1_value_raw = sku.get("option1_value") | |
| 397 | + if option1_value_raw is None: | |
| 398 | + continue | |
| 399 | + option1_value = str(option1_value_raw).strip() | |
| 400 | + if not option1_value or option1_value in seen_option1_values: | |
| 401 | + continue | |
| 402 | + seen_option1_values.add(option1_value) | |
| 403 | + option1_values_to_encode.append(option1_value) | |
| 404 | + | |
| 405 | + if not unmatched_hits or not option1_values_to_encode: | |
| 406 | + return | |
| 407 | + | |
| 408 | + query_vector = getattr(parsed_query, "query_vector", None) | |
| 409 | + if query_vector is None: | |
| 410 | + query_vector = self._encode_query_vector_for_sku_matching(parsed_query, context=context) | |
| 411 | + if query_vector is None: | |
| 412 | + return | |
| 413 | + | |
| 414 | + text_encoder = getattr(self.query_parser, "text_encoder", None) | |
| 415 | + if text_encoder is None: | |
| 416 | + return | |
| 417 | + | |
| 418 | + try: | |
| 419 | + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | |
| 420 | + except Exception as exc: | |
| 421 | + logger.warning("Failed to encode SKU option1 values for final-page sorting: %s", exc, exc_info=True) | |
| 422 | + if context is not None: | |
| 423 | + context.add_warning(f"SKU option embedding failed: {exc}") | |
| 424 | + return | |
| 425 | + | |
| 426 | + option1_vectors: Dict[str, np.ndarray] = {} | |
| 427 | + for option1_value, vector in zip(option1_values_to_encode, encoded_option_vectors): | |
| 428 | + if vector is None: | |
| 429 | + continue | |
| 430 | + option1_vectors[option1_value] = np.asarray(vector, dtype=np.float32) | |
| 431 | + | |
| 432 | + query_vector_array = np.asarray(query_vector, dtype=np.float32) | |
| 433 | + for hit in unmatched_hits: | |
| 434 | + source = hit.get("_source") | |
| 435 | + if not isinstance(source, dict): | |
| 436 | + continue | |
| 437 | + skus = source.get("skus") | |
| 438 | + if not isinstance(skus, list) or not skus: | |
| 439 | + continue | |
| 440 | + match_index, _ = self._select_sku_by_embedding(skus, option1_vectors, query_vector_array) | |
| 441 | + if match_index is None: | |
| 442 | + continue | |
| 443 | + self._promote_matching_sku(source, match_index) | |
| 444 | + embedding_matched += 1 | |
| 445 | + | |
| 446 | + if text_matched or embedding_matched: | |
| 447 | + logger.info( | |
| 448 | + "Final-page SKU sorting completed | text_matched=%s | embedding_matched=%s", | |
| 449 | + text_matched, | |
| 450 | + embedding_matched, | |
| 451 | + ) | |
| 452 | + | |
| 227 | 453 | def search( |
| 228 | 454 | self, |
| 229 | 455 | query: str, |
| ... | ... | @@ -622,6 +848,8 @@ class Searcher: |
| 622 | 848 | continue |
| 623 | 849 | rerank_debug_by_doc[str(doc_id)] = item |
| 624 | 850 | |
| 851 | + self._apply_sku_sorting_for_page_hits(es_hits, parsed_query, context=context) | |
| 852 | + | |
| 625 | 853 | # Format results using ResultFormatter |
| 626 | 854 | formatted_results = ResultFormatter.format_search_results( |
| 627 | 855 | es_hits, | ... | ... |
tests/test_search_rerank_window.py
| ... | ... | @@ -5,6 +5,7 @@ from pathlib import Path |
| 5 | 5 | from types import SimpleNamespace |
| 6 | 6 | from typing import Any, Dict, List |
| 7 | 7 | |
| 8 | +import numpy as np | |
| 8 | 9 | import yaml |
| 9 | 10 | |
| 10 | 11 | from config import ( |
| ... | ... | @@ -157,9 +158,7 @@ def _build_search_config(*, rerank_enabled: bool = True, rerank_window: int = 38 |
| 157 | 158 | rerank=RerankConfig(enabled=rerank_enabled, rerank_window=rerank_window), |
| 158 | 159 | spu_config=SPUConfig(enabled=False), |
| 159 | 160 | es_index_name="test_products", |
| 160 | - tenant_config={}, | |
| 161 | 161 | es_settings={}, |
| 162 | - services={}, | |
| 163 | 162 | ) |
| 164 | 163 | |
| 165 | 164 | |
| ... | ... | @@ -173,6 +172,19 @@ def _build_searcher(config: SearchConfig, es_client: _FakeESClient) -> Searcher: |
| 173 | 172 | return searcher |
| 174 | 173 | |
| 175 | 174 | |
| 175 | +class _FakeTextEncoder: | |
| 176 | + def __init__(self, vectors: Dict[str, List[float]]): | |
| 177 | + self.vectors = { | |
| 178 | + key: np.array(value, dtype=np.float32) | |
| 179 | + for key, value in vectors.items() | |
| 180 | + } | |
| 181 | + | |
| 182 | + def encode(self, sentences, priority: int = 0, **kwargs): | |
| 183 | + if isinstance(sentences, str): | |
| 184 | + sentences = [sentences] | |
| 185 | + return np.array([self.vectors[text] for text in sentences], dtype=object) | |
| 186 | + | |
| 187 | + | |
| 176 | 188 | def test_config_loader_rerank_enabled_defaults_true(tmp_path: Path): |
| 177 | 189 | config_data = { |
| 178 | 190 | "es_index_name": "test_products", |
| ... | ... | @@ -327,3 +339,116 @@ def test_searcher_skips_rerank_when_page_exceeds_window(monkeypatch): |
| 327 | 339 | assert es_client.calls[0]["size"] == 10 |
| 328 | 340 | assert es_client.calls[0]["include_named_queries_score"] is False |
| 329 | 341 | assert len(es_client.calls) == 1 |
| 342 | + | |
| 343 | + | |
| 344 | +def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch): | |
| 345 | + es_client = _FakeESClient(total_hits=1) | |
| 346 | + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) | |
| 347 | + context = create_request_context(reqid="sku-text", uid="u-sku-text") | |
| 348 | + | |
| 349 | + monkeypatch.setattr( | |
| 350 | + "search.searcher.get_tenant_config_loader", | |
| 351 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), | |
| 352 | + ) | |
| 353 | + | |
| 354 | + class _TranslatedQueryParser: | |
| 355 | + text_encoder = None | |
| 356 | + | |
| 357 | + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any): | |
| 358 | + return _FakeParsedQuery( | |
| 359 | + original_query=query, | |
| 360 | + query_normalized=query, | |
| 361 | + rewritten_query=query, | |
| 362 | + translations={"en": "black dress"}, | |
| 363 | + ) | |
| 364 | + | |
| 365 | + searcher.query_parser = _TranslatedQueryParser() | |
| 366 | + | |
| 367 | + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]: | |
| 368 | + return { | |
| 369 | + "spu_id": doc_id, | |
| 370 | + "title": {"en": f"product-{doc_id}"}, | |
| 371 | + "brief": {"en": f"brief-{doc_id}"}, | |
| 372 | + "vendor": {"en": f"vendor-{doc_id}"}, | |
| 373 | + "image_url": "https://img/default.jpg", | |
| 374 | + "skus": [ | |
| 375 | + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | |
| 376 | + {"sku_id": "sku-black", "option1_value": "Black", "image_src": "https://img/black.jpg"}, | |
| 377 | + ], | |
| 378 | + } | |
| 379 | + | |
| 380 | + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus)) | |
| 381 | + | |
| 382 | + result = searcher.search( | |
| 383 | + query="黑色 连衣裙", | |
| 384 | + tenant_id="162", | |
| 385 | + from_=0, | |
| 386 | + size=1, | |
| 387 | + context=context, | |
| 388 | + enable_rerank=False, | |
| 389 | + ) | |
| 390 | + | |
| 391 | + assert len(result.results) == 1 | |
| 392 | + assert result.results[0].skus[0].sku_id == "sku-black" | |
| 393 | + assert result.results[0].image_url == "https://img/black.jpg" | |
| 394 | + | |
| 395 | + | |
| 396 | +def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_match(monkeypatch): | |
| 397 | + es_client = _FakeESClient(total_hits=1) | |
| 398 | + searcher = _build_searcher(_build_search_config(rerank_enabled=False), es_client) | |
| 399 | + context = create_request_context(reqid="sku-embed", uid="u-sku-embed") | |
| 400 | + | |
| 401 | + monkeypatch.setattr( | |
| 402 | + "search.searcher.get_tenant_config_loader", | |
| 403 | + lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en"]}), | |
| 404 | + ) | |
| 405 | + | |
| 406 | + encoder = _FakeTextEncoder( | |
| 407 | + { | |
| 408 | + "linen summer dress": [0.8, 0.2], | |
| 409 | + "Red": [1.0, 0.0], | |
| 410 | + "Blue": [0.0, 1.0], | |
| 411 | + } | |
| 412 | + ) | |
| 413 | + | |
| 414 | + class _EmbeddingQueryParser: | |
| 415 | + text_encoder = encoder | |
| 416 | + | |
| 417 | + def parse(self, query: str, tenant_id: str, generate_vector: bool, context: Any): | |
| 418 | + return _FakeParsedQuery( | |
| 419 | + original_query=query, | |
| 420 | + query_normalized=query, | |
| 421 | + rewritten_query=query, | |
| 422 | + translations={}, | |
| 423 | + query_vector=np.array([0.0, 1.0], dtype=np.float32), | |
| 424 | + ) | |
| 425 | + | |
| 426 | + searcher.query_parser = _EmbeddingQueryParser() | |
| 427 | + | |
| 428 | + def _full_source_with_skus(doc_id: str) -> Dict[str, Any]: | |
| 429 | + return { | |
| 430 | + "spu_id": doc_id, | |
| 431 | + "title": {"en": f"product-{doc_id}"}, | |
| 432 | + "brief": {"en": f"brief-{doc_id}"}, | |
| 433 | + "vendor": {"en": f"vendor-{doc_id}"}, | |
| 434 | + "image_url": "https://img/default.jpg", | |
| 435 | + "skus": [ | |
| 436 | + {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | |
| 437 | + {"sku_id": "sku-blue", "option1_value": "Blue", "image_src": "https://img/blue.jpg"}, | |
| 438 | + ], | |
| 439 | + } | |
| 440 | + | |
| 441 | + monkeypatch.setattr(_FakeESClient, "_full_source", staticmethod(_full_source_with_skus)) | |
| 442 | + | |
| 443 | + result = searcher.search( | |
| 444 | + query="linen summer dress", | |
| 445 | + tenant_id="162", | |
| 446 | + from_=0, | |
| 447 | + size=1, | |
| 448 | + context=context, | |
| 449 | + enable_rerank=False, | |
| 450 | + ) | |
| 451 | + | |
| 452 | + assert len(result.results) == 1 | |
| 453 | + assert result.results[0].skus[0].sku_id == "sku-blue" | |
| 454 | + assert result.results[0].image_url == "https://img/blue.jpg" | ... | ... |