Commit a7cc907878e7843651ce97fcc6ed1a02f5c91825
1 parent
deccd68a
sku排序
Showing
6 changed files
with
200 additions
and
172 deletions
Show diff stats
config/config.yaml
| @@ -90,7 +90,34 @@ query_config: | @@ -90,7 +90,34 @@ query_config: | ||
| 90 | 90 | ||
| 91 | # 返回字段配置(_source includes) | 91 | # 返回字段配置(_source includes) |
| 92 | # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 | 92 | # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 |
| 93 | - source_fields: null | 93 | + # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致 |
| 94 | + source_fields: | ||
| 95 | + - spu_id | ||
| 96 | + - handle | ||
| 97 | + - title | ||
| 98 | + - brief | ||
| 99 | + - description | ||
| 100 | + - vendor | ||
| 101 | + - category_name | ||
| 102 | + - category_name_text | ||
| 103 | + - category_path | ||
| 104 | + - category_id | ||
| 105 | + - category_level | ||
| 106 | + - category1_name | ||
| 107 | + - category2_name | ||
| 108 | + - category3_name | ||
| 109 | + - tags | ||
| 110 | + - min_price | ||
| 111 | + - compare_at_price | ||
| 112 | + - image_url | ||
| 113 | + - sku_prices | ||
| 114 | + - sku_weights | ||
| 115 | + - sku_weight_units | ||
| 116 | + - total_inventory | ||
| 117 | + - option1_name | ||
| 118 | + - option1_values | ||
| 119 | + - specifications | ||
| 120 | + - skus | ||
| 94 | 121 | ||
| 95 | # KNN boost配置(向量召回的boost值) | 122 | # KNN boost配置(向量召回的boost值) |
| 96 | knn_boost: 0.25 # Lower boost for embedding recall | 123 | knn_boost: 0.25 # Lower boost for embedding recall |
frontend/index.html
| @@ -199,7 +199,7 @@ | @@ -199,7 +199,7 @@ | ||
| 199 | </footer> | 199 | </footer> |
| 200 | 200 | ||
| 201 | <script src="/static/js/tenant_facets_config.js?v=1.4"></script> | 201 | <script src="/static/js/tenant_facets_config.js?v=1.4"></script> |
| 202 | - <script src="/static/js/app.js?v=1.1"></script> | 202 | + <script src="/static/js/app.js?v=1.0"></script> |
| 203 | <script> | 203 | <script> |
| 204 | // 自动补全功能(使用后端 /search/suggestions 接口) | 204 | // 自动补全功能(使用后端 /search/suggestions 接口) |
| 205 | const SUGGEST_API = API_BASE_URL + '/search/suggestions'; | 205 | const SUGGEST_API = API_BASE_URL + '/search/suggestions'; |
frontend/static/css/style.css
| @@ -375,108 +375,64 @@ body { | @@ -375,108 +375,64 @@ body { | ||
| 375 | display: flex; | 375 | display: flex; |
| 376 | flex-wrap: wrap; | 376 | flex-wrap: wrap; |
| 377 | align-items: center; | 377 | align-items: center; |
| 378 | - gap: 10px; | 378 | + gap: 10px 14px; |
| 379 | margin-top: 8px; | 379 | margin-top: 8px; |
| 380 | } | 380 | } |
| 381 | 381 | ||
| 382 | -.product-debug-btn-api-result { | 382 | +.product-debug-inline-es-btn { |
| 383 | font-family: inherit; | 383 | font-family: inherit; |
| 384 | font-size: 12px; | 384 | font-size: 12px; |
| 385 | padding: 4px 10px; | 385 | padding: 4px 10px; |
| 386 | - border: 1px solid #d35400; | 386 | + border: 1px solid #ccc; |
| 387 | border-radius: 4px; | 387 | border-radius: 4px; |
| 388 | - background: #fff8f3; | ||
| 389 | - color: #d35400; | 388 | + background: #fafafa; |
| 389 | + color: #333; | ||
| 390 | cursor: pointer; | 390 | cursor: pointer; |
| 391 | } | 391 | } |
| 392 | 392 | ||
| 393 | -.product-debug-btn-api-result:hover { | ||
| 394 | - background: #fdebd0; | ||
| 395 | -} | ||
| 396 | - | ||
| 397 | -.product-debug-link { | ||
| 398 | - display: inline-block; | ||
| 399 | - font-size: 12px; | ||
| 400 | - color: #e67e22; | ||
| 401 | - text-decoration: none; | ||
| 402 | -} | ||
| 403 | - | ||
| 404 | -.product-debug-link:hover { | ||
| 405 | - text-decoration: underline; | ||
| 406 | -} | ||
| 407 | - | ||
| 408 | -.api-result-viewer-backdrop { | ||
| 409 | - display: none; | ||
| 410 | - position: fixed; | ||
| 411 | - inset: 0; | ||
| 412 | - z-index: 2000; | ||
| 413 | - background: rgba(0, 0, 0, 0.45); | ||
| 414 | - align-items: center; | ||
| 415 | - justify-content: center; | ||
| 416 | - padding: 24px; | ||
| 417 | - box-sizing: border-box; | 393 | +.product-debug-inline-es-btn:hover { |
| 394 | + background: #f0f0f0; | ||
| 395 | + border-color: #bbb; | ||
| 418 | } | 396 | } |
| 419 | 397 | ||
| 420 | -.api-result-viewer-panel { | ||
| 421 | - background: #fff; | ||
| 422 | - border-radius: 8px; | ||
| 423 | - max-width: min(920px, 100%); | ||
| 424 | - max-height: min(85vh, 100%); | ||
| 425 | - display: flex; | ||
| 426 | - flex-direction: column; | ||
| 427 | - box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2); | ||
| 428 | - overflow: hidden; | 398 | +.product-debug--es-expanded { |
| 399 | + max-height: min(70vh, 720px); | ||
| 429 | } | 400 | } |
| 430 | 401 | ||
| 431 | -.api-result-viewer-header { | ||
| 432 | - display: flex; | ||
| 433 | - align-items: center; | ||
| 434 | - justify-content: space-between; | ||
| 435 | - gap: 12px; | ||
| 436 | - padding: 12px 14px; | ||
| 437 | - border-bottom: 1px solid #eee; | ||
| 438 | - flex-shrink: 0; | ||
| 439 | -} | ||
| 440 | - | ||
| 441 | -.api-result-viewer-title { | ||
| 442 | - font-size: 14px; | ||
| 443 | - font-weight: 600; | ||
| 444 | - color: #333; | 402 | +.product-es-doc-panel { |
| 403 | + margin-top: 10px; | ||
| 404 | + padding-top: 8px; | ||
| 405 | + border-top: 1px dashed #e8e8e8; | ||
| 445 | } | 406 | } |
| 446 | 407 | ||
| 447 | -.api-result-viewer-title code { | 408 | +.product-es-doc-panel-status { |
| 448 | font-size: 12px; | 409 | font-size: 12px; |
| 449 | - font-weight: normal; | ||
| 450 | - background: #f4f4f4; | ||
| 451 | - padding: 1px 6px; | ||
| 452 | - border-radius: 3px; | ||
| 453 | -} | ||
| 454 | - | ||
| 455 | -.api-result-viewer-close { | ||
| 456 | - border: none; | ||
| 457 | - background: transparent; | ||
| 458 | - font-size: 22px; | ||
| 459 | - line-height: 1; | ||
| 460 | color: #888; | 410 | color: #888; |
| 461 | - cursor: pointer; | ||
| 462 | - padding: 0 4px; | ||
| 463 | } | 411 | } |
| 464 | 412 | ||
| 465 | -.api-result-viewer-close:hover { | ||
| 466 | - color: #333; | 413 | +.product-es-doc-pre { |
| 414 | + margin: 6px 0 0; | ||
| 415 | + padding: 10px; | ||
| 416 | + background: #f5f5f5; | ||
| 417 | + border-radius: 4px; | ||
| 418 | + overflow: auto; | ||
| 419 | + max-height: 50vh; | ||
| 420 | + font-size: 11px; | ||
| 421 | + line-height: 1.35; | ||
| 422 | + white-space: pre-wrap; | ||
| 423 | + word-break: break-word; | ||
| 467 | } | 424 | } |
| 468 | 425 | ||
| 469 | -.api-result-viewer-pre { | ||
| 470 | - margin: 0; | ||
| 471 | - padding: 14px; | ||
| 472 | - overflow: auto; | ||
| 473 | - flex: 1; | 426 | +.product-debug-link { |
| 427 | + display: inline-block; | ||
| 428 | + margin-top: 0; | ||
| 474 | font-size: 12px; | 429 | font-size: 12px; |
| 475 | - line-height: 1.45; | ||
| 476 | - background: #fafafa; | ||
| 477 | - border: none; | ||
| 478 | - font-family: Menlo, Consolas, "Courier New", monospace; | ||
| 479 | - white-space: pre; | 430 | + color: #e67e22; |
| 431 | + text-decoration: none; | ||
| 432 | +} | ||
| 433 | + | ||
| 434 | +.product-debug-link:hover { | ||
| 435 | + text-decoration: underline; | ||
| 480 | } | 436 | } |
| 481 | 437 | ||
| 482 | .product-card:hover { | 438 | .product-card:hover { |
frontend/static/js/app.js
| @@ -63,85 +63,88 @@ let state = { | @@ -63,85 +63,88 @@ let state = { | ||
| 63 | debug: true // Always enable debug mode for test frontend | 63 | debug: true // Always enable debug mode for test frontend |
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | -// 弹层:展示 /search/ 返回的 results[] 单条元素(非 ES 原始文档) | ||
| 67 | -function openApiResultViewer(item) { | ||
| 68 | - let backdrop = document.getElementById('apiResultViewerBackdrop'); | ||
| 69 | - if (!backdrop) { | ||
| 70 | - backdrop = document.createElement('div'); | ||
| 71 | - backdrop.id = 'apiResultViewerBackdrop'; | ||
| 72 | - backdrop.className = 'api-result-viewer-backdrop'; | ||
| 73 | - backdrop.innerHTML = ` | ||
| 74 | - <div class="api-result-viewer-panel" role="dialog" aria-modal="true" aria-labelledby="apiResultViewerTitle"> | ||
| 75 | - <div class="api-result-viewer-header"> | ||
| 76 | - <span id="apiResultViewerTitle" class="api-result-viewer-title">搜索结果项(API <code>results[]</code>)</span> | ||
| 77 | - <button type="button" class="api-result-viewer-close" aria-label="关闭">×</button> | ||
| 78 | - </div> | ||
| 79 | - <pre class="api-result-viewer-pre"></pre> | ||
| 80 | - </div> | ||
| 81 | - `; | ||
| 82 | - document.body.appendChild(backdrop); | ||
| 83 | - backdrop.addEventListener('click', (e) => { | ||
| 84 | - if (e.target === backdrop) { | ||
| 85 | - closeApiResultViewer(); | ||
| 86 | - } | ||
| 87 | - }); | ||
| 88 | - backdrop.querySelector('.api-result-viewer-close').addEventListener('click', closeApiResultViewer); | ||
| 89 | - document.addEventListener('keydown', (e) => { | ||
| 90 | - if (e.key === 'Escape') { | ||
| 91 | - closeApiResultViewer(); | ||
| 92 | - } | ||
| 93 | - }); | ||
| 94 | - } | ||
| 95 | - const pre = backdrop.querySelector('.api-result-viewer-pre'); | ||
| 96 | - try { | ||
| 97 | - pre.textContent = JSON.stringify(item, null, 2); | ||
| 98 | - } catch (err) { | ||
| 99 | - pre.textContent = String(item); | 66 | +// Initialize |
| 67 | +function initializeApp() { | ||
| 68 | + // 初始化租户下拉框和分面面板 | ||
| 69 | + console.log('Initializing app...'); | ||
| 70 | + initTenantSelect(); | ||
| 71 | + setupProductGridEsDocToggle(); | ||
| 72 | + const searchInput = document.getElementById('searchInput'); | ||
| 73 | + if (searchInput) { | ||
| 74 | + searchInput.focus(); | ||
| 100 | } | 75 | } |
| 101 | - backdrop.style.display = 'flex'; | ||
| 102 | } | 76 | } |
| 103 | 77 | ||
| 104 | -function closeApiResultViewer() { | ||
| 105 | - const backdrop = document.getElementById('apiResultViewerBackdrop'); | ||
| 106 | - if (backdrop) { | ||
| 107 | - backdrop.style.display = 'none'; | 78 | +/** Delegated handler: toggle inline ES raw response under each result card (survives innerHTML refresh on re-search). */ |
| 79 | +function setupProductGridEsDocToggle() { | ||
| 80 | + const grid = document.getElementById('productGrid'); | ||
| 81 | + if (!grid || grid.dataset.esDocToggleBound === '1') { | ||
| 82 | + return; | ||
| 108 | } | 83 | } |
| 84 | + grid.dataset.esDocToggleBound = '1'; | ||
| 85 | + grid.addEventListener('click', onProductGridEsDocToggleClick); | ||
| 109 | } | 86 | } |
| 110 | 87 | ||
| 111 | -function initProductGridResultViewer() { | ||
| 112 | - const grid = document.getElementById('productGrid'); | ||
| 113 | - if (!grid || grid.dataset.apiResultViewerBound === '1') { | 88 | +async function onProductGridEsDocToggleClick(event) { |
| 89 | + const btn = event.target.closest('[data-action="toggle-es-inline-doc"]'); | ||
| 90 | + if (!btn) { | ||
| 91 | + return; | ||
| 92 | + } | ||
| 93 | + event.preventDefault(); | ||
| 94 | + const debugRoot = btn.closest('.product-debug'); | ||
| 95 | + if (!debugRoot) { | ||
| 96 | + return; | ||
| 97 | + } | ||
| 98 | + const panel = debugRoot.querySelector('.product-es-doc-panel'); | ||
| 99 | + const pre = debugRoot.querySelector('.product-es-doc-pre'); | ||
| 100 | + const statusEl = debugRoot.querySelector('.product-es-doc-panel-status'); | ||
| 101 | + if (!panel || !pre || !statusEl) { | ||
| 114 | return; | 102 | return; |
| 115 | } | 103 | } |
| 116 | - grid.dataset.apiResultViewerBound = '1'; | ||
| 117 | - grid.addEventListener('click', (e) => { | ||
| 118 | - const btn = e.target.closest('.product-debug-btn-api-result'); | ||
| 119 | - if (!btn) { | ||
| 120 | - return; | ||
| 121 | - } | ||
| 122 | - e.preventDefault(); | ||
| 123 | - const idx = parseInt(btn.getAttribute('data-result-index'), 10); | ||
| 124 | - if (Number.isNaN(idx)) { | ||
| 125 | - return; | ||
| 126 | - } | ||
| 127 | - const results = state.lastSearchData && state.lastSearchData.results; | ||
| 128 | - if (!results || idx < 0 || idx >= results.length) { | ||
| 129 | - return; | ||
| 130 | - } | ||
| 131 | - openApiResultViewer(results[idx]); | ||
| 132 | - }); | ||
| 133 | -} | ||
| 134 | 104 | ||
| 135 | -// Initialize | ||
| 136 | -function initializeApp() { | ||
| 137 | - // 初始化租户下拉框和分面面板 | ||
| 138 | - console.log('Initializing app...'); | ||
| 139 | - initTenantSelect(); | ||
| 140 | - initProductGridResultViewer(); | ||
| 141 | - const searchInput = document.getElementById('searchInput'); | ||
| 142 | - if (searchInput) { | ||
| 143 | - searchInput.focus(); | 105 | + const spuId = btn.getAttribute('data-spu-id') || ''; |
| 106 | + const tenantId = getTenantId(); | ||
| 107 | + const url = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`; | ||
| 108 | + | ||
| 109 | + if (debugRoot.dataset.esInlineOpen === '1') { | ||
| 110 | + panel.setAttribute('hidden', ''); | ||
| 111 | + debugRoot.classList.remove('product-debug--es-expanded'); | ||
| 112 | + debugRoot.dataset.esInlineOpen = '0'; | ||
| 113 | + btn.textContent = '在结果中显示 ES 文档'; | ||
| 114 | + return; | ||
| 144 | } | 115 | } |
| 116 | + | ||
| 117 | + panel.removeAttribute('hidden'); | ||
| 118 | + debugRoot.classList.add('product-debug--es-expanded'); | ||
| 119 | + debugRoot.dataset.esInlineOpen = '1'; | ||
| 120 | + btn.textContent = '隐藏 ES 文档'; | ||
| 121 | + | ||
| 122 | + if (pre.textContent.length > 0) { | ||
| 123 | + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); | ||
| 124 | + return; | ||
| 125 | + } | ||
| 126 | + | ||
| 127 | + statusEl.style.display = ''; | ||
| 128 | + statusEl.textContent = '加载中…'; | ||
| 129 | + pre.style.display = 'none'; | ||
| 130 | + | ||
| 131 | + try { | ||
| 132 | + const response = await fetch(url); | ||
| 133 | + if (!response.ok) { | ||
| 134 | + const errText = await response.text(); | ||
| 135 | + throw new Error(`HTTP ${response.status}: ${errText.slice(0, 200)}`); | ||
| 136 | + } | ||
| 137 | + const data = await response.json(); | ||
| 138 | + pre.textContent = customStringify(data); | ||
| 139 | + statusEl.style.display = 'none'; | ||
| 140 | + pre.style.display = 'block'; | ||
| 141 | + } catch (err) { | ||
| 142 | + console.error('ES doc fetch failed', err); | ||
| 143 | + statusEl.textContent = `加载失败: ${err.message || err}`; | ||
| 144 | + pre.style.display = 'none'; | ||
| 145 | + } | ||
| 146 | + | ||
| 147 | + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); | ||
| 145 | } | 148 | } |
| 146 | 149 | ||
| 147 | // 在 DOM 加载完成后初始化 | 150 | // 在 DOM 加载完成后初始化 |
| @@ -420,7 +423,7 @@ function displayResults(data) { | @@ -420,7 +423,7 @@ function displayResults(data) { | ||
| 420 | 423 | ||
| 421 | const tenantId = getTenantId(); | 424 | const tenantId = getTenantId(); |
| 422 | 425 | ||
| 423 | - data.results.forEach((result, resultIndex) => { | 426 | + data.results.forEach((result) => { |
| 424 | const product = result; | 427 | const product = result; |
| 425 | const title = product.title || product.name || 'N/A'; | 428 | const title = product.title || product.name || 'N/A'; |
| 426 | const price = product.min_price || product.price || 'N/A'; | 429 | const price = product.min_price || product.price || 'N/A'; |
| @@ -472,13 +475,19 @@ function displayResults(data) { | @@ -472,13 +475,19 @@ function displayResults(data) { | ||
| 472 | <div class="product-debug-line">Fused score: ${fusedScore}</div> | 475 | <div class="product-debug-line">Fused score: ${fusedScore}</div> |
| 473 | ${titleLines} | 476 | ${titleLines} |
| 474 | <div class="product-debug-actions"> | 477 | <div class="product-debug-actions"> |
| 475 | - <button type="button" class="product-debug-btn-api-result" data-result-index="${resultIndex}"> | ||
| 476 | - 查看 API 结果项 | 478 | + <button type="button" class="product-debug-inline-es-btn" |
| 479 | + data-action="toggle-es-inline-doc" | ||
| 480 | + data-spu-id="${escapeAttr(String(spuId || ''))}"> | ||
| 481 | + 在结果中显示 ES 文档 | ||
| 477 | </button> | 482 | </button> |
| 478 | <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> | 483 | <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer"> |
| 479 | 查看 ES 原始文档 | 484 | 查看 ES 原始文档 |
| 480 | </a> | 485 | </a> |
| 481 | </div> | 486 | </div> |
| 487 | + <div class="product-es-doc-panel" hidden> | ||
| 488 | + <div class="product-es-doc-panel-status"></div> | ||
| 489 | + <pre class="product-es-doc-pre"></pre> | ||
| 490 | + </div> | ||
| 482 | </div> | 491 | </div> |
| 483 | `; | 492 | `; |
| 484 | } | 493 | } |
search/searcher.py
| @@ -232,6 +232,29 @@ class Searcher: | @@ -232,6 +232,29 @@ class Searcher: | ||
| 232 | return "" | 232 | return "" |
| 233 | return " ".join(str(value).strip().casefold().split()) | 233 | return " ".join(str(value).strip().casefold().split()) |
| 234 | 234 | ||
| 235 | + @staticmethod | ||
| 236 | + def _sku_option1_embedding_key( | ||
| 237 | + sku: Dict[str, Any], | ||
| 238 | + spu_option1_name: Optional[Any] = None, | ||
| 239 | + ) -> Optional[str]: | ||
| 240 | + """ | ||
| 241 | + Text sent to the embedding service for option1 must be "name:value" | ||
| 242 | + (option name from SKU row or SPU-level option1_name). | ||
| 243 | + """ | ||
| 244 | + value_raw = sku.get("option1_value") | ||
| 245 | + if value_raw is None: | ||
| 246 | + return None | ||
| 247 | + value = str(value_raw).strip() | ||
| 248 | + if not value: | ||
| 249 | + return None | ||
| 250 | + name = sku.get("option1_name") | ||
| 251 | + if name is None or not str(name).strip(): | ||
| 252 | + name = spu_option1_name | ||
| 253 | + name_str = str(name).strip() if name is not None and str(name).strip() else "" | ||
| 254 | + if name_str: | ||
| 255 | + value = f"{name_str}:{value}" | ||
| 256 | + return value.casefold() | ||
| 257 | + | ||
| 235 | def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]: | 258 | def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]: |
| 236 | """Collect original and translated query texts for SKU option matching.""" | 259 | """Collect original and translated query texts for SKU option matching.""" |
| 237 | candidates: List[str] = [] | 260 | candidates: List[str] = [] |
| @@ -271,8 +294,9 @@ class Searcher: | @@ -271,8 +294,9 @@ class Searcher: | ||
| 271 | self, | 294 | self, |
| 272 | skus: List[Dict[str, Any]], | 295 | skus: List[Dict[str, Any]], |
| 273 | query_texts: List[str], | 296 | query_texts: List[str], |
| 297 | + spu_option1_name: Optional[Any] = None, | ||
| 274 | ) -> Optional[int]: | 298 | ) -> Optional[int]: |
| 275 | - """Return the first SKU whose option1_value appears in query texts.""" | 299 | + """Return the first SKU whose option1_value (or name:value) appears in query texts.""" |
| 276 | if not skus or not query_texts: | 300 | if not skus or not query_texts: |
| 277 | return None | 301 | return None |
| 278 | 302 | ||
| @@ -282,6 +306,13 @@ class Searcher: | @@ -282,6 +306,13 @@ class Searcher: | ||
| 282 | continue | 306 | continue |
| 283 | if any(option1_value in query_text for query_text in query_texts): | 307 | if any(option1_value in query_text for query_text in query_texts): |
| 284 | return index | 308 | return index |
| 309 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) | ||
| 310 | + if embed_key and embed_key != option1_value: | ||
| 311 | + composite_norm = self._normalize_sku_match_text(embed_key.replace(":", " ")) | ||
| 312 | + if any(composite_norm in query_text for query_text in query_texts): | ||
| 313 | + return index | ||
| 314 | + if any(embed_key.casefold() in query_text for query_text in query_texts): | ||
| 315 | + return index | ||
| 285 | return None | 316 | return None |
| 286 | 317 | ||
| 287 | def _encode_query_vector_for_sku_matching( | 318 | def _encode_query_vector_for_sku_matching( |
| @@ -323,19 +354,17 @@ class Searcher: | @@ -323,19 +354,17 @@ class Searcher: | ||
| 323 | skus: List[Dict[str, Any]], | 354 | skus: List[Dict[str, Any]], |
| 324 | option1_vectors: Dict[str, np.ndarray], | 355 | option1_vectors: Dict[str, np.ndarray], |
| 325 | query_vector: np.ndarray, | 356 | query_vector: np.ndarray, |
| 357 | + spu_option1_name: Optional[Any] = None, | ||
| 326 | ) -> Tuple[Optional[int], Optional[float]]: | 358 | ) -> Tuple[Optional[int], Optional[float]]: |
| 327 | - """Select the SKU whose option1_value is most similar to the query.""" | 359 | + """Select the SKU whose option1 embedding key (name:value) is most similar to the query.""" |
| 328 | best_index: Optional[int] = None | 360 | best_index: Optional[int] = None |
| 329 | best_score: Optional[float] = None | 361 | best_score: Optional[float] = None |
| 330 | 362 | ||
| 331 | for index, sku in enumerate(skus): | 363 | for index, sku in enumerate(skus): |
| 332 | - option1_value_raw = sku.get("option1_value") | ||
| 333 | - if option1_value_raw is None: | 364 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) |
| 365 | + if not embed_key: | ||
| 334 | continue | 366 | continue |
| 335 | - option1_value = str(option1_value_raw).strip() | ||
| 336 | - if not option1_value: | ||
| 337 | - continue | ||
| 338 | - option_vector = option1_vectors.get(option1_value) | 367 | + option_vector = option1_vectors.get(embed_key) |
| 339 | if option_vector is None: | 368 | if option_vector is None: |
| 340 | continue | 369 | continue |
| 341 | score = float(np.inner(query_vector, option_vector)) | 370 | score = float(np.inner(query_vector, option_vector)) |
| @@ -385,7 +414,10 @@ class Searcher: | @@ -385,7 +414,10 @@ class Searcher: | ||
| 385 | if not isinstance(skus, list) or not skus: | 414 | if not isinstance(skus, list) or not skus: |
| 386 | continue | 415 | continue |
| 387 | 416 | ||
| 388 | - match_index = self._find_query_matching_sku_index(skus, query_texts) | 417 | + spu_option1_name = source.get("option1_name") |
| 418 | + match_index = self._find_query_matching_sku_index( | ||
| 419 | + skus, query_texts, spu_option1_name=spu_option1_name | ||
| 420 | + ) | ||
| 389 | if match_index is not None: | 421 | if match_index is not None: |
| 390 | self._promote_matching_sku(source, match_index) | 422 | self._promote_matching_sku(source, match_index) |
| 391 | text_matched += 1 | 423 | text_matched += 1 |
| @@ -393,14 +425,11 @@ class Searcher: | @@ -393,14 +425,11 @@ class Searcher: | ||
| 393 | 425 | ||
| 394 | unmatched_hits.append(hit) | 426 | unmatched_hits.append(hit) |
| 395 | for sku in skus: | 427 | for sku in skus: |
| 396 | - option1_value_raw = sku.get("option1_value") | ||
| 397 | - if option1_value_raw is None: | 428 | + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name) |
| 429 | + if not embed_key or embed_key in seen_option1_values: | ||
| 398 | continue | 430 | continue |
| 399 | - option1_value = str(option1_value_raw).strip() | ||
| 400 | - if not option1_value or option1_value in seen_option1_values: | ||
| 401 | - continue | ||
| 402 | - seen_option1_values.add(option1_value) | ||
| 403 | - option1_values_to_encode.append(option1_value) | 431 | + seen_option1_values.add(embed_key) |
| 432 | + option1_values_to_encode.append(embed_key) | ||
| 404 | 433 | ||
| 405 | if not unmatched_hits or not option1_values_to_encode: | 434 | if not unmatched_hits or not option1_values_to_encode: |
| 406 | return | 435 | return |
| @@ -437,7 +466,12 @@ class Searcher: | @@ -437,7 +466,12 @@ class Searcher: | ||
| 437 | skus = source.get("skus") | 466 | skus = source.get("skus") |
| 438 | if not isinstance(skus, list) or not skus: | 467 | if not isinstance(skus, list) or not skus: |
| 439 | continue | 468 | continue |
| 440 | - match_index, _ = self._select_sku_by_embedding(skus, option1_vectors, query_vector_array) | 469 | + match_index, _ = self._select_sku_by_embedding( |
| 470 | + skus, | ||
| 471 | + option1_vectors, | ||
| 472 | + query_vector_array, | ||
| 473 | + spu_option1_name=source.get("option1_name"), | ||
| 474 | + ) | ||
| 441 | if match_index is None: | 475 | if match_index is None: |
| 442 | continue | 476 | continue |
| 443 | self._promote_matching_sku(source, match_index) | 477 | self._promote_matching_sku(source, match_index) |
tests/test_search_rerank_window.py
| @@ -370,6 +370,7 @@ def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch | @@ -370,6 +370,7 @@ def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch | ||
| 370 | "title": {"en": f"product-{doc_id}"}, | 370 | "title": {"en": f"product-{doc_id}"}, |
| 371 | "brief": {"en": f"brief-{doc_id}"}, | 371 | "brief": {"en": f"brief-{doc_id}"}, |
| 372 | "vendor": {"en": f"vendor-{doc_id}"}, | 372 | "vendor": {"en": f"vendor-{doc_id}"}, |
| 373 | + "option1_name": "Color", | ||
| 373 | "image_url": "https://img/default.jpg", | 374 | "image_url": "https://img/default.jpg", |
| 374 | "skus": [ | 375 | "skus": [ |
| 375 | {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | 376 | {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, |
| @@ -406,8 +407,8 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | @@ -406,8 +407,8 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | ||
| 406 | encoder = _FakeTextEncoder( | 407 | encoder = _FakeTextEncoder( |
| 407 | { | 408 | { |
| 408 | "linen summer dress": [0.8, 0.2], | 409 | "linen summer dress": [0.8, 0.2], |
| 409 | - "Red": [1.0, 0.0], | ||
| 410 | - "Blue": [0.0, 1.0], | 410 | + "color:Red": [1.0, 0.0], |
| 411 | + "color:Blue": [0.0, 1.0], | ||
| 411 | } | 412 | } |
| 412 | ) | 413 | ) |
| 413 | 414 | ||
| @@ -431,6 +432,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | @@ -431,6 +432,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc | ||
| 431 | "title": {"en": f"product-{doc_id}"}, | 432 | "title": {"en": f"product-{doc_id}"}, |
| 432 | "brief": {"en": f"brief-{doc_id}"}, | 433 | "brief": {"en": f"brief-{doc_id}"}, |
| 433 | "vendor": {"en": f"vendor-{doc_id}"}, | 434 | "vendor": {"en": f"vendor-{doc_id}"}, |
| 435 | + "option1_name": "Color", | ||
| 434 | "image_url": "https://img/default.jpg", | 436 | "image_url": "https://img/default.jpg", |
| 435 | "skus": [ | 437 | "skus": [ |
| 436 | {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, | 438 | {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"}, |