Commit a7cc907878e7843651ce97fcc6ed1a02f5c91825

Authored by tangwang
1 parent deccd68a

sku排序

config/config.yaml
... ... @@ -90,7 +90,34 @@ query_config:
90 90  
91 91 # 返回字段配置(_source includes)
92 92 # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
93   - source_fields: null
  93 + # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
  94 + source_fields:
  95 + - spu_id
  96 + - handle
  97 + - title
  98 + - brief
  99 + - description
  100 + - vendor
  101 + - category_name
  102 + - category_name_text
  103 + - category_path
  104 + - category_id
  105 + - category_level
  106 + - category1_name
  107 + - category2_name
  108 + - category3_name
  109 + - tags
  110 + - min_price
  111 + - compare_at_price
  112 + - image_url
  113 + - sku_prices
  114 + - sku_weights
  115 + - sku_weight_units
  116 + - total_inventory
  117 + - option1_name
  118 + - option1_values
  119 + - specifications
  120 + - skus
94 121  
95 122 # KNN boost配置(向量召回的boost值)
96 123 knn_boost: 0.25 # Lower boost for embedding recall
... ...
frontend/index.html
... ... @@ -199,7 +199,7 @@
199 199 </footer>
200 200  
201 201 <script src="/static/js/tenant_facets_config.js?v=1.4"></script>
202   - <script src="/static/js/app.js?v=1.1"></script>
  202 + <script src="/static/js/app.js?v=1.0"></script>
203 203 <script>
204 204 // 自动补全功能(使用后端 /search/suggestions 接口)
205 205 const SUGGEST_API = API_BASE_URL + '/search/suggestions';
... ...
frontend/static/css/style.css
... ... @@ -375,108 +375,64 @@ body {
375 375 display: flex;
376 376 flex-wrap: wrap;
377 377 align-items: center;
378   - gap: 10px;
  378 + gap: 10px 14px;
379 379 margin-top: 8px;
380 380 }
381 381  
382   -.product-debug-btn-api-result {
  382 +.product-debug-inline-es-btn {
383 383 font-family: inherit;
384 384 font-size: 12px;
385 385 padding: 4px 10px;
386   - border: 1px solid #d35400;
  386 + border: 1px solid #ccc;
387 387 border-radius: 4px;
388   - background: #fff8f3;
389   - color: #d35400;
  388 + background: #fafafa;
  389 + color: #333;
390 390 cursor: pointer;
391 391 }
392 392  
393   -.product-debug-btn-api-result:hover {
394   - background: #fdebd0;
395   -}
396   -
397   -.product-debug-link {
398   - display: inline-block;
399   - font-size: 12px;
400   - color: #e67e22;
401   - text-decoration: none;
402   -}
403   -
404   -.product-debug-link:hover {
405   - text-decoration: underline;
406   -}
407   -
408   -.api-result-viewer-backdrop {
409   - display: none;
410   - position: fixed;
411   - inset: 0;
412   - z-index: 2000;
413   - background: rgba(0, 0, 0, 0.45);
414   - align-items: center;
415   - justify-content: center;
416   - padding: 24px;
417   - box-sizing: border-box;
  393 +.product-debug-inline-es-btn:hover {
  394 + background: #f0f0f0;
  395 + border-color: #bbb;
418 396 }
419 397  
420   -.api-result-viewer-panel {
421   - background: #fff;
422   - border-radius: 8px;
423   - max-width: min(920px, 100%);
424   - max-height: min(85vh, 100%);
425   - display: flex;
426   - flex-direction: column;
427   - box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
428   - overflow: hidden;
  398 +.product-debug--es-expanded {
  399 + max-height: min(70vh, 720px);
429 400 }
430 401  
431   -.api-result-viewer-header {
432   - display: flex;
433   - align-items: center;
434   - justify-content: space-between;
435   - gap: 12px;
436   - padding: 12px 14px;
437   - border-bottom: 1px solid #eee;
438   - flex-shrink: 0;
439   -}
440   -
441   -.api-result-viewer-title {
442   - font-size: 14px;
443   - font-weight: 600;
444   - color: #333;
  402 +.product-es-doc-panel {
  403 + margin-top: 10px;
  404 + padding-top: 8px;
  405 + border-top: 1px dashed #e8e8e8;
445 406 }
446 407  
447   -.api-result-viewer-title code {
  408 +.product-es-doc-panel-status {
448 409 font-size: 12px;
449   - font-weight: normal;
450   - background: #f4f4f4;
451   - padding: 1px 6px;
452   - border-radius: 3px;
453   -}
454   -
455   -.api-result-viewer-close {
456   - border: none;
457   - background: transparent;
458   - font-size: 22px;
459   - line-height: 1;
460 410 color: #888;
461   - cursor: pointer;
462   - padding: 0 4px;
463 411 }
464 412  
465   -.api-result-viewer-close:hover {
466   - color: #333;
  413 +.product-es-doc-pre {
  414 + margin: 6px 0 0;
  415 + padding: 10px;
  416 + background: #f5f5f5;
  417 + border-radius: 4px;
  418 + overflow: auto;
  419 + max-height: 50vh;
  420 + font-size: 11px;
  421 + line-height: 1.35;
  422 + white-space: pre-wrap;
  423 + word-break: break-word;
467 424 }
468 425  
469   -.api-result-viewer-pre {
470   - margin: 0;
471   - padding: 14px;
472   - overflow: auto;
473   - flex: 1;
  426 +.product-debug-link {
  427 + display: inline-block;
  428 + margin-top: 0;
474 429 font-size: 12px;
475   - line-height: 1.45;
476   - background: #fafafa;
477   - border: none;
478   - font-family: Menlo, Consolas, "Courier New", monospace;
479   - white-space: pre;
  430 + color: #e67e22;
  431 + text-decoration: none;
  432 +}
  433 +
  434 +.product-debug-link:hover {
  435 + text-decoration: underline;
480 436 }
481 437  
482 438 .product-card:hover {
... ...
frontend/static/js/app.js
... ... @@ -63,85 +63,88 @@ let state = {
63 63 debug: true // Always enable debug mode for test frontend
64 64 };
65 65  
66   -// 弹层:展示 /search/ 返回的 results[] 单条元素(非 ES 原始文档)
67   -function openApiResultViewer(item) {
68   - let backdrop = document.getElementById('apiResultViewerBackdrop');
69   - if (!backdrop) {
70   - backdrop = document.createElement('div');
71   - backdrop.id = 'apiResultViewerBackdrop';
72   - backdrop.className = 'api-result-viewer-backdrop';
73   - backdrop.innerHTML = `
74   - <div class="api-result-viewer-panel" role="dialog" aria-modal="true" aria-labelledby="apiResultViewerTitle">
75   - <div class="api-result-viewer-header">
76   - <span id="apiResultViewerTitle" class="api-result-viewer-title">搜索结果项(API <code>results[]</code>)</span>
77   - <button type="button" class="api-result-viewer-close" aria-label="关闭">&times;</button>
78   - </div>
79   - <pre class="api-result-viewer-pre"></pre>
80   - </div>
81   - `;
82   - document.body.appendChild(backdrop);
83   - backdrop.addEventListener('click', (e) => {
84   - if (e.target === backdrop) {
85   - closeApiResultViewer();
86   - }
87   - });
88   - backdrop.querySelector('.api-result-viewer-close').addEventListener('click', closeApiResultViewer);
89   - document.addEventListener('keydown', (e) => {
90   - if (e.key === 'Escape') {
91   - closeApiResultViewer();
92   - }
93   - });
94   - }
95   - const pre = backdrop.querySelector('.api-result-viewer-pre');
96   - try {
97   - pre.textContent = JSON.stringify(item, null, 2);
98   - } catch (err) {
99   - pre.textContent = String(item);
  66 +// Initialize
  67 +function initializeApp() {
  68 + // 初始化租户下拉框和分面面板
  69 + console.log('Initializing app...');
  70 + initTenantSelect();
  71 + setupProductGridEsDocToggle();
  72 + const searchInput = document.getElementById('searchInput');
  73 + if (searchInput) {
  74 + searchInput.focus();
100 75 }
101   - backdrop.style.display = 'flex';
102 76 }
103 77  
104   -function closeApiResultViewer() {
105   - const backdrop = document.getElementById('apiResultViewerBackdrop');
106   - if (backdrop) {
107   - backdrop.style.display = 'none';
  78 +/** Delegated handler: toggle inline ES raw response under each result card (survives innerHTML refresh on re-search). */
  79 +function setupProductGridEsDocToggle() {
  80 + const grid = document.getElementById('productGrid');
  81 + if (!grid || grid.dataset.esDocToggleBound === '1') {
  82 + return;
108 83 }
  84 + grid.dataset.esDocToggleBound = '1';
  85 + grid.addEventListener('click', onProductGridEsDocToggleClick);
109 86 }
110 87  
111   -function initProductGridResultViewer() {
112   - const grid = document.getElementById('productGrid');
113   - if (!grid || grid.dataset.apiResultViewerBound === '1') {
  88 +async function onProductGridEsDocToggleClick(event) {
  89 + const btn = event.target.closest('[data-action="toggle-es-inline-doc"]');
  90 + if (!btn) {
  91 + return;
  92 + }
  93 + event.preventDefault();
  94 + const debugRoot = btn.closest('.product-debug');
  95 + if (!debugRoot) {
  96 + return;
  97 + }
  98 + const panel = debugRoot.querySelector('.product-es-doc-panel');
  99 + const pre = debugRoot.querySelector('.product-es-doc-pre');
  100 + const statusEl = debugRoot.querySelector('.product-es-doc-panel-status');
  101 + if (!panel || !pre || !statusEl) {
114 102 return;
115 103 }
116   - grid.dataset.apiResultViewerBound = '1';
117   - grid.addEventListener('click', (e) => {
118   - const btn = e.target.closest('.product-debug-btn-api-result');
119   - if (!btn) {
120   - return;
121   - }
122   - e.preventDefault();
123   - const idx = parseInt(btn.getAttribute('data-result-index'), 10);
124   - if (Number.isNaN(idx)) {
125   - return;
126   - }
127   - const results = state.lastSearchData && state.lastSearchData.results;
128   - if (!results || idx < 0 || idx >= results.length) {
129   - return;
130   - }
131   - openApiResultViewer(results[idx]);
132   - });
133   -}
134 104  
135   -// Initialize
136   -function initializeApp() {
137   - // 初始化租户下拉框和分面面板
138   - console.log('Initializing app...');
139   - initTenantSelect();
140   - initProductGridResultViewer();
141   - const searchInput = document.getElementById('searchInput');
142   - if (searchInput) {
143   - searchInput.focus();
  105 + const spuId = btn.getAttribute('data-spu-id') || '';
  106 + const tenantId = getTenantId();
  107 + const url = `${API_BASE_URL}/search/es-doc/${encodeURIComponent(spuId)}?tenant_id=${encodeURIComponent(tenantId)}`;
  108 +
  109 + if (debugRoot.dataset.esInlineOpen === '1') {
  110 + panel.setAttribute('hidden', '');
  111 + debugRoot.classList.remove('product-debug--es-expanded');
  112 + debugRoot.dataset.esInlineOpen = '0';
  113 + btn.textContent = '在结果中显示 ES 文档';
  114 + return;
144 115 }
  116 +
  117 + panel.removeAttribute('hidden');
  118 + debugRoot.classList.add('product-debug--es-expanded');
  119 + debugRoot.dataset.esInlineOpen = '1';
  120 + btn.textContent = '隐藏 ES 文档';
  121 +
  122 + if (pre.textContent.length > 0) {
  123 + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
  124 + return;
  125 + }
  126 +
  127 + statusEl.style.display = '';
  128 + statusEl.textContent = '加载中…';
  129 + pre.style.display = 'none';
  130 +
  131 + try {
  132 + const response = await fetch(url);
  133 + if (!response.ok) {
  134 + const errText = await response.text();
  135 + throw new Error(`HTTP ${response.status}: ${errText.slice(0, 200)}`);
  136 + }
  137 + const data = await response.json();
  138 + pre.textContent = customStringify(data);
  139 + statusEl.style.display = 'none';
  140 + pre.style.display = 'block';
  141 + } catch (err) {
  142 + console.error('ES doc fetch failed', err);
  143 + statusEl.textContent = `加载失败: ${err.message || err}`;
  144 + pre.style.display = 'none';
  145 + }
  146 +
  147 + panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
145 148 }
146 149  
147 150 // 在 DOM 加载完成后初始化
... ... @@ -420,7 +423,7 @@ function displayResults(data) {
420 423  
421 424 const tenantId = getTenantId();
422 425  
423   - data.results.forEach((result, resultIndex) => {
  426 + data.results.forEach((result) => {
424 427 const product = result;
425 428 const title = product.title || product.name || 'N/A';
426 429 const price = product.min_price || product.price || 'N/A';
... ... @@ -472,13 +475,19 @@ function displayResults(data) {
472 475 <div class="product-debug-line">Fused score: ${fusedScore}</div>
473 476 ${titleLines}
474 477 <div class="product-debug-actions">
475   - <button type="button" class="product-debug-btn-api-result" data-result-index="${resultIndex}">
476   - 查看 API 结果项
  478 + <button type="button" class="product-debug-inline-es-btn"
  479 + data-action="toggle-es-inline-doc"
  480 + data-spu-id="${escapeAttr(String(spuId || ''))}">
  481 + 在结果中显示 ES 文档
477 482 </button>
478 483 <a class="product-debug-link" href="${rawUrl}" target="_blank" rel="noopener noreferrer">
479 484 查看 ES 原始文档
480 485 </a>
481 486 </div>
  487 + <div class="product-es-doc-panel" hidden>
  488 + <div class="product-es-doc-panel-status"></div>
  489 + <pre class="product-es-doc-pre"></pre>
  490 + </div>
482 491 </div>
483 492 `;
484 493 }
... ...
search/searcher.py
... ... @@ -232,6 +232,29 @@ class Searcher:
232 232 return ""
233 233 return " ".join(str(value).strip().casefold().split())
234 234  
  235 + @staticmethod
  236 + def _sku_option1_embedding_key(
  237 + sku: Dict[str, Any],
  238 + spu_option1_name: Optional[Any] = None,
  239 + ) -> Optional[str]:
  240 + """
  241 + Text sent to the embedding service for option1 must be "name:value"
  242 + (option name from SKU row or SPU-level option1_name).
  243 + """
  244 + value_raw = sku.get("option1_value")
  245 + if value_raw is None:
  246 + return None
  247 + value = str(value_raw).strip()
  248 + if not value:
  249 + return None
  250 + name = sku.get("option1_name")
  251 + if name is None or not str(name).strip():
  252 + name = spu_option1_name
  253 + name_str = str(name).strip() if name is not None and str(name).strip() else ""
  254 + if name_str:
  255 + value = f"{name_str}:{value}"
  256 + return value.casefold()
  257 +
235 258 def _build_sku_query_texts(self, parsed_query: ParsedQuery) -> List[str]:
236 259 """Collect original and translated query texts for SKU option matching."""
237 260 candidates: List[str] = []
... ... @@ -271,8 +294,9 @@ class Searcher:
271 294 self,
272 295 skus: List[Dict[str, Any]],
273 296 query_texts: List[str],
  297 + spu_option1_name: Optional[Any] = None,
274 298 ) -> Optional[int]:
275   - """Return the first SKU whose option1_value appears in query texts."""
  299 + """Return the first SKU whose option1_value (or name:value) appears in query texts."""
276 300 if not skus or not query_texts:
277 301 return None
278 302  
... ... @@ -282,6 +306,13 @@ class Searcher:
282 306 continue
283 307 if any(option1_value in query_text for query_text in query_texts):
284 308 return index
  309 + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name)
  310 + if embed_key and embed_key != option1_value:
  311 + composite_norm = self._normalize_sku_match_text(embed_key.replace(":", " "))
  312 + if any(composite_norm in query_text for query_text in query_texts):
  313 + return index
  314 + if any(embed_key.casefold() in query_text for query_text in query_texts):
  315 + return index
285 316 return None
286 317  
287 318 def _encode_query_vector_for_sku_matching(
... ... @@ -323,19 +354,17 @@ class Searcher:
323 354 skus: List[Dict[str, Any]],
324 355 option1_vectors: Dict[str, np.ndarray],
325 356 query_vector: np.ndarray,
  357 + spu_option1_name: Optional[Any] = None,
326 358 ) -> Tuple[Optional[int], Optional[float]]:
327   - """Select the SKU whose option1_value is most similar to the query."""
  359 + """Select the SKU whose option1 embedding key (name:value) is most similar to the query."""
328 360 best_index: Optional[int] = None
329 361 best_score: Optional[float] = None
330 362  
331 363 for index, sku in enumerate(skus):
332   - option1_value_raw = sku.get("option1_value")
333   - if option1_value_raw is None:
  364 + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name)
  365 + if not embed_key:
334 366 continue
335   - option1_value = str(option1_value_raw).strip()
336   - if not option1_value:
337   - continue
338   - option_vector = option1_vectors.get(option1_value)
  367 + option_vector = option1_vectors.get(embed_key)
339 368 if option_vector is None:
340 369 continue
341 370 score = float(np.inner(query_vector, option_vector))
... ... @@ -385,7 +414,10 @@ class Searcher:
385 414 if not isinstance(skus, list) or not skus:
386 415 continue
387 416  
388   - match_index = self._find_query_matching_sku_index(skus, query_texts)
  417 + spu_option1_name = source.get("option1_name")
  418 + match_index = self._find_query_matching_sku_index(
  419 + skus, query_texts, spu_option1_name=spu_option1_name
  420 + )
389 421 if match_index is not None:
390 422 self._promote_matching_sku(source, match_index)
391 423 text_matched += 1
... ... @@ -393,14 +425,11 @@ class Searcher:
393 425  
394 426 unmatched_hits.append(hit)
395 427 for sku in skus:
396   - option1_value_raw = sku.get("option1_value")
397   - if option1_value_raw is None:
  428 + embed_key = self._sku_option1_embedding_key(sku, spu_option1_name)
  429 + if not embed_key or embed_key in seen_option1_values:
398 430 continue
399   - option1_value = str(option1_value_raw).strip()
400   - if not option1_value or option1_value in seen_option1_values:
401   - continue
402   - seen_option1_values.add(option1_value)
403   - option1_values_to_encode.append(option1_value)
  431 + seen_option1_values.add(embed_key)
  432 + option1_values_to_encode.append(embed_key)
404 433  
405 434 if not unmatched_hits or not option1_values_to_encode:
406 435 return
... ... @@ -437,7 +466,12 @@ class Searcher:
437 466 skus = source.get("skus")
438 467 if not isinstance(skus, list) or not skus:
439 468 continue
440   - match_index, _ = self._select_sku_by_embedding(skus, option1_vectors, query_vector_array)
  469 + match_index, _ = self._select_sku_by_embedding(
  470 + skus,
  471 + option1_vectors,
  472 + query_vector_array,
  473 + spu_option1_name=source.get("option1_name"),
  474 + )
441 475 if match_index is None:
442 476 continue
443 477 self._promote_matching_sku(source, match_index)
... ...
tests/test_search_rerank_window.py
... ... @@ -370,6 +370,7 @@ def test_searcher_promotes_sku_when_option1_matches_translated_query(monkeypatch
370 370 "title": {"en": f"product-{doc_id}"},
371 371 "brief": {"en": f"brief-{doc_id}"},
372 372 "vendor": {"en": f"vendor-{doc_id}"},
  373 + "option1_name": "Color",
373 374 "image_url": "https://img/default.jpg",
374 375 "skus": [
375 376 {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
... ... @@ -406,8 +407,8 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
406 407 encoder = _FakeTextEncoder(
407 408 {
408 409 "linen summer dress": [0.8, 0.2],
409   - "Red": [1.0, 0.0],
410   - "Blue": [0.0, 1.0],
  410 + "color:Red": [1.0, 0.0],
  411 + "color:Blue": [0.0, 1.0],
411 412 }
412 413 )
413 414  
... ... @@ -431,6 +432,7 @@ def test_searcher_promotes_sku_by_embedding_when_query_has_no_direct_option_matc
431 432 "title": {"en": f"product-{doc_id}"},
432 433 "brief": {"en": f"brief-{doc_id}"},
433 434 "vendor": {"en": f"vendor-{doc_id}"},
  435 + "option1_name": "Color",
434 436 "image_url": "https://img/default.jpg",
435 437 "skus": [
436 438 {"sku_id": "sku-red", "option1_value": "Red", "image_src": "https://img/red.jpg"},
... ...