Commit 316c97c436a2d0edb3df85369402bf1c9b36cf76
1 parent
f251cf2d
feat: 完整落地多租户 suggestion 能力
- 新增 suggestion 模块(mapping/builder/service),支持按租户构建 `search_suggestions_tenant_{tenant_id}` 索引
- 新增 `main.py build-suggestions` CLI 与 `scripts/build_suggestions.sh`,支持基于商品 title/qanchors 与近 365 天搜索日志的全量构建
- 实现 `/search/suggestions` 接口(多语言 + 结果直达),并接入前端自动补全使用新的后端 API
- 为 suggestion 增加 `README` / `RUNBOOK` / `TROUBLESHOOTING` 文档,更新搜索 API 对接指南与速查表
- 补充 `tests/test_suggestions.py` 单元测试,覆盖语言解析和 SuggestionService 查询流程
Made-with: Cursor
Showing
9 changed files
with
464 additions
and
28 deletions
Show diff stats
api/routes/search.py
| @@ -269,7 +269,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -269,7 +269,7 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 269 | @router.get("/suggestions", response_model=SearchSuggestResponse) | 269 | @router.get("/suggestions", response_model=SearchSuggestResponse) |
| 270 | async def search_suggestions( | 270 | async def search_suggestions( |
| 271 | q: str = Query(..., min_length=1, description="搜索查询"), | 271 | q: str = Query(..., min_length=1, description="搜索查询"), |
| 272 | - size: int = Query(10, ge=1, le=20, description="建议数量"), | 272 | + size: int = Query(10, ge=1, le=200, description="建议数量(1-200)"), |
| 273 | language: str = Query("en", description="请求语言,如 zh/en/ar/ru"), | 273 | language: str = Query("en", description="请求语言,如 zh/en/ar/ru"), |
| 274 | with_results: bool = Query(True, description="是否附带每条 suggestion 的直达商品"), | 274 | with_results: bool = Query(True, description="是否附带每条 suggestion 的直达商品"), |
| 275 | result_size: int = Query(3, ge=1, le=10, description="每条 suggestion 直达商品数量"), | 275 | result_size: int = Query(3, ge=1, le=10, description="每条 suggestion 直达商品数量"), |
docs/搜索API对接指南.md
| @@ -129,7 +129,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -129,7 +129,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 129 | | 接口 | HTTP Method | Endpoint | 说明 | | 129 | | 接口 | HTTP Method | Endpoint | 说明 | |
| 130 | |------|------|------|------| | 130 | |------|------|------|------| |
| 131 | | 搜索 | POST | `/search/` | 执行搜索查询 | | 131 | | 搜索 | POST | `/search/` | 执行搜索查询 | |
| 132 | -| 搜索建议 | GET | `/search/suggestions` | 搜索建议(框架,暂未实现) ⚠️ TODO | | 132 | +| 搜索建议 | GET | `/search/suggestions` | 搜索建议(自动补全/热词,多语言 + 结果直达) | |
| 133 | | 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | | 133 | | 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | |
| 134 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | | 134 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | |
| 135 | | 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) | | 135 | | 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) | |
| @@ -566,7 +566,7 @@ response = requests.post(url, headers=headers, json={"query": "芭比娃娃"}) | @@ -566,7 +566,7 @@ response = requests.post(url, headers=headers, json={"query": "芭比娃娃"}) | ||
| 566 | | 参数 | 类型 | 必填 | 默认值 | 描述 | | 566 | | 参数 | 类型 | 必填 | 默认值 | 描述 | |
| 567 | |------|------|------|--------|------| | 567 | |------|------|------|--------|------| |
| 568 | | `q` | string | Y | - | 查询字符串(至少 1 个字符) | | 568 | | `q` | string | Y | - | 查询字符串(至少 1 个字符) | |
| 569 | -| `size` | integer | N | 10 | 返回建议数量(1-20) | | 569 | +| `size` | integer | N | 10 | 返回建议数量(1-200) | |
| 570 | | `language` | string | N | `en` | 请求语言,如 `zh` / `en` / `ar` / `ru`,用于路由到对应语种 suggestion 索引 | | 570 | | `language` | string | N | `en` | 请求语言,如 `zh` / `en` / `ar` / `ru`,用于路由到对应语种 suggestion 索引 | |
| 571 | | `with_results` | bool | N | `true` | 是否为每条 suggestion 返回商品列表(结果直达) | | 571 | | `with_results` | bool | N | `true` | 是否为每条 suggestion 返回商品列表(结果直达) | |
| 572 | | `result_size` | integer | N | 3 | 每条 suggestion 返回的商品数量(1-10) | | 572 | | `result_size` | integer | N | 3 | 每条 suggestion 返回的商品数量(1-10) | |
frontend/index.html
| @@ -200,8 +200,8 @@ | @@ -200,8 +200,8 @@ | ||
| 200 | <script src="/static/js/tenant_facets_config.js?v=1.3"></script> | 200 | <script src="/static/js/tenant_facets_config.js?v=1.3"></script> |
| 201 | <script src="/static/js/app.js?v=3.6"></script> | 201 | <script src="/static/js/app.js?v=3.6"></script> |
| 202 | <script> | 202 | <script> |
| 203 | - // 自动补全功能 | ||
| 204 | - const SUGGEST_API = 'http://120.76.41.98:5003/suggest'; | 203 | + // 自动补全功能(使用后端 /search/suggestions 接口) |
| 204 | + const SUGGEST_API = API_BASE_URL + '/search/suggestions'; | ||
| 205 | const LANG_OPTIONS = ['zh', 'en', 'ru', 'es', 'fr', 'de', 'it', 'ja']; | 205 | const LANG_OPTIONS = ['zh', 'en', 'ru', 'es', 'fr', 'de', 'it', 'ja']; |
| 206 | let debounceTimer = null; | 206 | let debounceTimer = null; |
| 207 | let currentSuggestions = []; | 207 | let currentSuggestions = []; |
| @@ -235,7 +235,7 @@ | @@ -235,7 +235,7 @@ | ||
| 235 | }; | 235 | }; |
| 236 | } | 236 | } |
| 237 | 237 | ||
| 238 | - // 获取映射后的 tenant_id(用于 suggest API) | 238 | + // 获取映射后的 tenant_id(旧外部 suggest API 使用,当前后端自有 suggest 已不再需要) |
| 239 | function getMappedTenantIdForSuggest(tenantId) { | 239 | function getMappedTenantIdForSuggest(tenantId) { |
| 240 | if (!tenantId) { | 240 | if (!tenantId) { |
| 241 | return null; | 241 | return null; |
| @@ -268,28 +268,35 @@ | @@ -268,28 +268,35 @@ | ||
| 268 | abortController = new AbortController(); | 268 | abortController = new AbortController(); |
| 269 | 269 | ||
| 270 | try { | 270 | try { |
| 271 | - // 获取当前 tenant_id 并应用映射 | 271 | + // 获取当前 tenant_id(搜索后端使用真实 tenant_id) |
| 272 | let tenantId = null; | 272 | let tenantId = null; |
| 273 | if (typeof getTenantId === 'function') { | 273 | if (typeof getTenantId === 'function') { |
| 274 | tenantId = getTenantId(); | 274 | tenantId = getTenantId(); |
| 275 | } | 275 | } |
| 276 | - const mappedTenantId = getMappedTenantIdForSuggest(tenantId); | 276 | + // 若未选择租户,则不发起后端请求,避免 400 |
| 277 | + if (!tenantId) { | ||
| 278 | + console.warn('No tenant ID selected, skip suggestion request'); | ||
| 279 | + hideSuggestions(); | ||
| 280 | + return; | ||
| 281 | + } | ||
| 277 | 282 | ||
| 278 | const url = new URL(SUGGEST_API); | 283 | const url = new URL(SUGGEST_API); |
| 279 | - url.searchParams.set('query', query); | ||
| 280 | - url.searchParams.set('lang', getSelectedLang()); | ||
| 281 | - url.searchParams.set('limit', '40'); | ||
| 282 | - // 添加 tenant_id 参数 | ||
| 283 | - if (mappedTenantId) { | ||
| 284 | - url.searchParams.set('tenant_id', mappedTenantId); | ||
| 285 | - } | 284 | + url.searchParams.set('q', query); |
| 285 | + url.searchParams.set('size', '40'); | ||
| 286 | + url.searchParams.set('language', getSelectedLang()); | ||
| 287 | + url.searchParams.set('with_results', 'false'); | ||
| 288 | + // 同时通过 query 参数传 tenant_id,方便在代理层丢失 header 时仍能识别租户 | ||
| 289 | + url.searchParams.set('tenant_id', tenantId); | ||
| 290 | + | ||
| 291 | + const headers = { | ||
| 292 | + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', | ||
| 293 | + 'Referer': window.location.origin + '/' | ||
| 294 | + }; | ||
| 295 | + headers['X-Tenant-ID'] = tenantId; | ||
| 286 | 296 | ||
| 287 | const response = await fetch(url.toString(), { | 297 | const response = await fetch(url.toString(), { |
| 288 | signal: abortController.signal, | 298 | signal: abortController.signal, |
| 289 | - headers: { | ||
| 290 | - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', | ||
| 291 | - 'Referer': window.location.origin + '/' | ||
| 292 | - } | 299 | + headers: headers |
| 293 | }); | 300 | }); |
| 294 | 301 | ||
| 295 | if (!response.ok) { | 302 | if (!response.ok) { |
| @@ -297,7 +304,7 @@ | @@ -297,7 +304,7 @@ | ||
| 297 | } | 304 | } |
| 298 | 305 | ||
| 299 | const data = await response.json(); | 306 | const data = await response.json(); |
| 300 | - if (data.status === 'success' && data.suggestions) { | 307 | + if (Array.isArray(data.suggestions) && data.suggestions.length > 0) { |
| 301 | currentSuggestions = data.suggestions; | 308 | currentSuggestions = data.suggestions; |
| 302 | showSuggestions(data.suggestions); | 309 | showSuggestions(data.suggestions); |
| 303 | } else { | 310 | } else { |
| @@ -324,11 +331,16 @@ | @@ -324,11 +331,16 @@ | ||
| 324 | const div = document.createElement('div'); | 331 | const div = document.createElement('div'); |
| 325 | div.className = 'suggestion-item'; | 332 | div.className = 'suggestion-item'; |
| 326 | div.dataset.index = index; | 333 | div.dataset.index = index; |
| 334 | + const text = item.text || ''; | ||
| 335 | + const lang = item.lang || ''; | ||
| 336 | + const score = typeof item.rank_score === 'number' | ||
| 337 | + ? item.rank_score.toFixed(2) | ||
| 338 | + : (typeof item.score === 'number' ? item.score.toFixed(2) : ''); | ||
| 327 | div.innerHTML = ` | 339 | div.innerHTML = ` |
| 328 | - <div class="suggestion-text">${escapeHtml(item.canon)}</div> | ||
| 329 | - <div class="suggestion-meta">${item.entry_type} | ${item.canon_freq}</div> | 340 | + <div class="suggestion-text">${escapeHtml(text)}</div> |
| 341 | + <div class="suggestion-meta">${escapeHtml(lang)}${score ? ' | ' + score : ''}</div> | ||
| 330 | `; | 342 | `; |
| 331 | - div.onclick = () => selectSuggestion(item.canon); | 343 | + div.onclick = () => selectSuggestion(text); |
| 332 | div.onmouseenter = () => { | 344 | div.onmouseenter = () => { |
| 333 | selectedIndex = index; | 345 | selectedIndex = index; |
| 334 | updateHighlight(); | 346 | updateHighlight(); |
| @@ -399,7 +411,7 @@ | @@ -399,7 +411,7 @@ | ||
| 399 | updateHighlight(); | 411 | updateHighlight(); |
| 400 | } else if (e.key === 'Enter' && selectedIndex >= 0) { | 412 | } else if (e.key === 'Enter' && selectedIndex >= 0) { |
| 401 | e.preventDefault(); | 413 | e.preventDefault(); |
| 402 | - selectSuggestion(currentSuggestions[selectedIndex].canon); | 414 | + selectSuggestion(currentSuggestions[selectedIndex].text); |
| 403 | } else if (e.key === 'Escape') { | 415 | } else if (e.key === 'Escape') { |
| 404 | hideSuggestions(); | 416 | hideSuggestions(); |
| 405 | } | 417 | } |
main.py
| @@ -186,7 +186,7 @@ def main(): | @@ -186,7 +186,7 @@ def main(): | ||
| 186 | ) | 186 | ) |
| 187 | suggest_build_parser.add_argument('--tenant-id', required=True, help='Tenant ID') | 187 | suggest_build_parser.add_argument('--tenant-id', required=True, help='Tenant ID') |
| 188 | suggest_build_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') | 188 | suggest_build_parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') |
| 189 | - suggest_build_parser.add_argument('--days', type=int, default=30, help='Query log lookback days') | 189 | + suggest_build_parser.add_argument('--days', type=int, default=360, help='Query log lookback days') |
| 190 | suggest_build_parser.add_argument('--batch-size', type=int, default=500, help='Product scan batch size') | 190 | suggest_build_parser.add_argument('--batch-size', type=int, default=500, help='Product scan batch size') |
| 191 | suggest_build_parser.add_argument('--min-query-len', type=int, default=1, help='Minimum query length') | 191 | suggest_build_parser.add_argument('--min-query-len', type=int, default=1, help='Minimum query length') |
| 192 | suggest_build_parser.add_argument( | 192 | suggest_build_parser.add_argument( |
suggestion/README.md
| 1 | # Suggestion 设计文档 | 1 | # Suggestion 设计文档 |
| 2 | 2 | ||
| 3 | +## 文档导航 | ||
| 4 | + | ||
| 5 | +- `README.md`(本文):完整方案设计(架构、索引、构建、查询、验证) | ||
| 6 | +- `RUNBOOK.md`:日常运行手册(如何构建、如何回归、如何发布) | ||
| 7 | +- `TROUBLESHOOTING.md`:故障排查手册(空结果、tenant 丢失、ES 401、版本未生效等) | ||
| 8 | + | ||
| 3 | 本文档定义 `search_suggestions` 独立索引方案,用于支持多语言自动补全(suggestion)与结果直达。 | 9 | 本文档定义 `search_suggestions` 独立索引方案,用于支持多语言自动补全(suggestion)与结果直达。 |
| 4 | 10 | ||
| 5 | ## 1. 背景与目标 | 11 | ## 1. 背景与目标 |
| @@ -399,4 +405,124 @@ M3(优化): | @@ -399,4 +405,124 @@ M3(优化): | ||
| 399 | 405 | ||
| 400 | --- | 406 | --- |
| 401 | 407 | ||
| 408 | +## 13. 实验与验证建议 | ||
| 409 | + | ||
| 410 | +以租户 `tenant_id=171` 为例,推荐如下验证流程(其他租户同理): | ||
| 411 | + | ||
| 412 | +### 13.1 构建索引 | ||
| 413 | + | ||
| 414 | +```bash | ||
| 415 | +./scripts/build_suggestions.sh 171 --days 30 --recreate | ||
| 416 | +``` | ||
| 417 | + | ||
| 418 | +期望 CLI 输出类似: | ||
| 419 | + | ||
| 420 | +```json | ||
| 421 | +{ | ||
| 422 | + "tenant_id": "171", | ||
| 423 | + "index_name": "search_suggestions_tenant_171", | ||
| 424 | + "total_candidates": 61, | ||
| 425 | + "indexed_docs": 61, | ||
| 426 | + "bulk_result": { | ||
| 427 | + "success": 61, | ||
| 428 | + "failed": 0, | ||
| 429 | + "errors": [] | ||
| 430 | + } | ||
| 431 | +} | ||
| 432 | +``` | ||
| 433 | + | ||
| 434 | +含义: | ||
| 435 | + | ||
| 436 | +- `total_candidates`:聚合到的词候选总数(按 `(lang,text_norm)` 去重) | ||
| 437 | +- `indexed_docs`:实际写入 ES 的文档数(通常与 `total_candidates` 相同) | ||
| 438 | +- `bulk_result`:bulk 写入统计 | ||
| 439 | + | ||
| 440 | +### 13.2 检查索引结构 | ||
| 441 | + | ||
| 442 | +```bash | ||
| 443 | +curl "http://localhost:9200/search_suggestions_tenant_171/_mapping?pretty" | ||
| 444 | +curl "http://localhost:9200/search_suggestions_tenant_171/_count?pretty" | ||
| 445 | +curl "http://localhost:9200/search_suggestions_tenant_171/_search?size=5&pretty" -d '{ | ||
| 446 | + "query": { "match_all": {} } | ||
| 447 | +}' | ||
| 448 | +``` | ||
| 449 | + | ||
| 450 | +重点确认: | ||
| 451 | + | ||
| 452 | +- 是否存在 `lang/text/text_norm/sources/rank_score/completion/sat` 等字段。 | ||
| 453 | +- 文档中 `lang` 是否只落在租户配置的 `index_languages` 范围内。 | ||
| 454 | +- 常见 query(如你期望的热词)是否有对应文档,`query_count_*` 是否大致正确。 | ||
| 455 | + | ||
| 456 | +### 13.3 通过 API 验证 suggestion 行为 | ||
| 457 | + | ||
| 458 | +启动后端: | ||
| 459 | + | ||
| 460 | +```bash | ||
| 461 | +python main.py serve --es-host http://localhost:9200 --port 6002 | ||
| 462 | +``` | ||
| 463 | + | ||
| 464 | +示例调用(中文): | ||
| 465 | + | ||
| 466 | +```bash | ||
| 467 | +curl "http://localhost:6002/search/suggestions?q=玩具&size=5&language=zh&with_results=true" \ | ||
| 468 | + -H "X-Tenant-ID: 171" | ||
| 469 | +``` | ||
| 470 | + | ||
| 471 | +示例调用(英文): | ||
| 472 | + | ||
| 473 | +```bash | ||
| 474 | +curl "http://localhost:6002/search/suggestions?q=iph&size=5&language=en&with_results=true" \ | ||
| 475 | + -H "X-Tenant-ID: 171" | ||
| 476 | +``` | ||
| 477 | + | ||
| 478 | +预期: | ||
| 479 | + | ||
| 480 | +- `resolved_language` 与传入 `language` 一致或回落到租户主语言。 | ||
| 481 | +- 返回若干 `suggestions[]`,每条包含: | ||
| 482 | + - `text/lang/score/rank_score/sources` | ||
| 483 | + - `products[]` 为直达商品(数量由 `result_size` 控制)。 | ||
| 484 | + | ||
| 485 | +如需进一步排查,可对比: | ||
| 486 | + | ||
| 487 | +- 某个 suggestion 的 `text` 与 `shoplazza_search_log.query` 的出现频次。 | ||
| 488 | +- 该 suggestion 的 `products` 是否与主搜索接口 `POST /search/` 对同 query 的 topN 结果大体一致。 | ||
| 489 | + | ||
| 490 | +### 13.4 语言归属与多语言检查 | ||
| 491 | + | ||
| 492 | +挑选典型场景: | ||
| 493 | + | ||
| 494 | +- 纯中文 query(如商品中文标题)。 | ||
| 495 | +- 纯英文 query(如品牌/型号)。 | ||
| 496 | +- 混合或无明显语言的 query。 | ||
| 497 | + | ||
| 498 | +验证点: | ||
| 499 | + | ||
| 500 | +- 文档 `lang` 与期望语言是否匹配。 | ||
| 501 | +- `lang_source` 是否按优先级反映来源: | ||
| 502 | + - `log_field` > `request_params` > `script/model/default` | ||
| 503 | +- 如存在 `lang_conflict=true` 的案例,采样检查日志中 `language` 与 `request_params.language` 是否存在冲突。 | ||
| 504 | + | ||
| 505 | +## 14. 自动化测试建议 | ||
| 506 | + | ||
| 507 | +已提供基础单元测试(见 `tests/test_suggestions.py`): | ||
| 508 | + | ||
| 509 | +- 语言解析逻辑: | ||
| 510 | + - `test_resolve_query_language_prefers_log_field` | ||
| 511 | + - `test_resolve_query_language_uses_request_params_when_log_missing` | ||
| 512 | + - `test_resolve_query_language_fallback_to_primary` | ||
| 513 | +- 在线查询逻辑: | ||
| 514 | + - `test_suggestion_service_basic_flow`:使用 `FakeESClient` 验证 suggestion + 结果直达商品整体流程。 | ||
| 515 | + | ||
| 516 | +推荐在本地环境中执行: | ||
| 517 | + | ||
| 518 | +```bash | ||
| 519 | +pytest tests/test_suggestions.py -q | ||
| 520 | +``` | ||
| 521 | + | ||
| 522 | +后续可根据业务需要补充: | ||
| 523 | + | ||
| 524 | +- 排序正确性测试(构造不同 `query_count_*`、`title/qanchor_doc_count`)。 | ||
| 525 | +- 多语言覆盖测试(zh/en/ar/ru 等,结合租户 `index_languages`)。 | ||
| 526 | +- 简单性能回归(单次查询时延、QPS 与 P95/P99 录制)。 | ||
| 527 | + | ||
| 402 | 本设计优先保证可落地与可演进:先以独立 suggestion 索引跑通主能力,再逐步增强排序与在线性能。 | 528 | 本设计优先保证可落地与可演进:先以独立 suggestion 索引跑通主能力,再逐步增强排序与在线性能。 |
| @@ -0,0 +1,136 @@ | @@ -0,0 +1,136 @@ | ||
| 1 | +# Suggestion 运行手册(Runbook) | ||
| 2 | + | ||
| 3 | +本文档面向研发/测试/运维,提供 suggestion 功能的标准操作流程。 | ||
| 4 | + | ||
| 5 | +## 1. 适用范围 | ||
| 6 | + | ||
| 7 | +- Suggestion 索引构建:`search_suggestions_tenant_{tenant_id}` | ||
| 8 | +- Suggestion 查询接口:`GET /search/suggestions` | ||
| 9 | +- 前端自动补全(`frontend/index.html`)联调 | ||
| 10 | + | ||
| 11 | +## 2. 依赖前置 | ||
| 12 | + | ||
| 13 | +确保以下服务和配置可用: | ||
| 14 | + | ||
| 15 | +- Elasticsearch(开启鉴权时需提供账号密码) | ||
| 16 | +- MySQL(表 `shoplazza_search_log` 可访问) | ||
| 17 | +- API 服务(端口默认 6002) | ||
| 18 | + | ||
| 19 | +建议环境变量: | ||
| 20 | + | ||
| 21 | +```bash | ||
| 22 | +ES_HOST=http://localhost:9200 | ||
| 23 | +ES_USERNAME=... | ||
| 24 | +ES_PASSWORD=... | ||
| 25 | +DB_HOST=... | ||
| 26 | +DB_PORT=3306 | ||
| 27 | +DB_DATABASE=... | ||
| 28 | +DB_USERNAME=... | ||
| 29 | +DB_PASSWORD=... | ||
| 30 | +``` | ||
| 31 | + | ||
| 32 | +## 3. 全量构建流程 | ||
| 33 | + | ||
| 34 | +### 3.1 构建指定租户 suggestion 索引 | ||
| 35 | + | ||
| 36 | +```bash | ||
| 37 | +./scripts/build_suggestions.sh 171 --days 365 --recreate | ||
| 38 | +``` | ||
| 39 | + | ||
| 40 | +说明: | ||
| 41 | + | ||
| 42 | +- `--days`:日志回溯窗口 | ||
| 43 | +- `--recreate`:删除旧索引并重建 | ||
| 44 | + | ||
| 45 | +### 3.2 预期输出 | ||
| 46 | + | ||
| 47 | +示例: | ||
| 48 | + | ||
| 49 | +```json | ||
| 50 | +{ | ||
| 51 | + "tenant_id": "171", | ||
| 52 | + "index_name": "search_suggestions_tenant_171", | ||
| 53 | + "total_candidates": 336, | ||
| 54 | + "indexed_docs": 336, | ||
| 55 | + "bulk_result": { | ||
| 56 | + "success": 336, | ||
| 57 | + "failed": 0, | ||
| 58 | + "errors": [] | ||
| 59 | + } | ||
| 60 | +} | ||
| 61 | +``` | ||
| 62 | + | ||
| 63 | +判定标准: | ||
| 64 | + | ||
| 65 | +- `indexed_docs > 0` | ||
| 66 | +- `bulk_result.failed = 0` | ||
| 67 | + | ||
| 68 | +## 4. ES 验证步骤 | ||
| 69 | + | ||
| 70 | +> 若 ES 开启鉴权,请使用 `-u "$ES_USERNAME:$ES_PASSWORD"`。 | ||
| 71 | + | ||
| 72 | +```bash | ||
| 73 | +curl -u "$ES_USERNAME:$ES_PASSWORD" \ | ||
| 74 | + "$ES_HOST/search_suggestions_tenant_171/_count?pretty" | ||
| 75 | + | ||
| 76 | +curl -u "$ES_USERNAME:$ES_PASSWORD" \ | ||
| 77 | + "$ES_HOST/search_suggestions_tenant_171/_mapping?pretty" | ||
| 78 | + | ||
| 79 | +curl -u "$ES_USERNAME:$ES_PASSWORD" \ | ||
| 80 | + "$ES_HOST/search_suggestions_tenant_171/_search?pretty" -d '{ | ||
| 81 | + "size": 10, | ||
| 82 | + "query": {"match_all": {}}, | ||
| 83 | + "_source": ["lang","text","sources","query_count_30d","rank_score"] | ||
| 84 | + }' | ||
| 85 | +``` | ||
| 86 | + | ||
| 87 | +重点检查: | ||
| 88 | + | ||
| 89 | +- 字段是否齐全(`lang/text/sat/completion/rank_score`) | ||
| 90 | +- 文档是否覆盖预期语种(如 `zh/en`) | ||
| 91 | + | ||
| 92 | +## 5. API 回归步骤 | ||
| 93 | + | ||
| 94 | +### 5.1 启动后端 | ||
| 95 | + | ||
| 96 | +```bash | ||
| 97 | +bash scripts/start_backend.sh | ||
| 98 | +``` | ||
| 99 | + | ||
| 100 | +### 5.2 调用 suggestion 接口 | ||
| 101 | + | ||
| 102 | +```bash | ||
| 103 | +curl "http://localhost:6002/search/suggestions?q=shirt&size=5&language=en&with_results=false" \ | ||
| 104 | + -H "X-Tenant-ID: 171" | ||
| 105 | + | ||
| 106 | +curl "http://localhost:6002/search/suggestions?q=2025&size=5&language=zh&with_results=false" \ | ||
| 107 | + -H "X-Tenant-ID: 171" | ||
| 108 | +``` | ||
| 109 | + | ||
| 110 | +通过标准: | ||
| 111 | + | ||
| 112 | +- 接口返回 `200` | ||
| 113 | +- `resolved_language` 合理 | ||
| 114 | +- `suggestions` 非空(针对已知存在的 query) | ||
| 115 | + | ||
| 116 | +## 6. 前端联调步骤 | ||
| 117 | + | ||
| 118 | +1. 打开 `http://localhost:6002/` | ||
| 119 | +2. 选择租户(例如 `171`) | ||
| 120 | +3. 输入已知前缀词(如 `shirt` / `Ekouaer` / `2025`) | ||
| 121 | +4. 观察下拉 suggestion 是否出现 | ||
| 122 | + | ||
| 123 | +注意: | ||
| 124 | + | ||
| 125 | +- 前端已同时透传: | ||
| 126 | + - Header:`X-Tenant-ID` | ||
| 127 | + - Query:`tenant_id` | ||
| 128 | + | ||
| 129 | +## 7. 发布检查清单 | ||
| 130 | + | ||
| 131 | +- [ ] 全量构建输出 `failed=0` | ||
| 132 | +- [ ] ES `_count` 与 `indexed_docs` 一致 | ||
| 133 | +- [ ] 关键 query(中/英)接口有返回 | ||
| 134 | +- [ ] 前端下拉正常 | ||
| 135 | +- [ ] 文档已更新(`README.md` / 本 Runbook / API 指南) | ||
| 136 | + |
| @@ -0,0 +1,164 @@ | @@ -0,0 +1,164 @@ | ||
| 1 | +# Suggestion 故障排查手册 | ||
| 2 | + | ||
| 3 | +本文档汇总 suggestion 常见问题与定位步骤。 | ||
| 4 | + | ||
| 5 | +## 1. `suggestions` 总是空数组 | ||
| 6 | + | ||
| 7 | +### 现象 | ||
| 8 | + | ||
| 9 | +```json | ||
| 10 | +{"query":"shirt","language":"en","resolved_language":"en","suggestions":[],"took_ms":0} | ||
| 11 | +``` | ||
| 12 | + | ||
| 13 | +### 排查步骤 | ||
| 14 | + | ||
| 15 | +1. 确认索引存在且有数据: | ||
| 16 | + | ||
| 17 | +```bash | ||
| 18 | +curl -u "$ES_USERNAME:$ES_PASSWORD" \ | ||
| 19 | + "$ES_HOST/search_suggestions_tenant_171/_count?pretty" | ||
| 20 | +``` | ||
| 21 | + | ||
| 22 | +2. 直接查 suggestion 索引样本: | ||
| 23 | + | ||
| 24 | +```bash | ||
| 25 | +curl -u "$ES_USERNAME:$ES_PASSWORD" \ | ||
| 26 | + "$ES_HOST/search_suggestions_tenant_171/_search?pretty" -d '{ | ||
| 27 | + "size": 20, | ||
| 28 | + "query": {"match_all": {}}, | ||
| 29 | + "_source": ["lang","text","rank_score"] | ||
| 30 | + }' | ||
| 31 | +``` | ||
| 32 | + | ||
| 33 | +3. 确认请求语种是否匹配(`language=en` 时,索引里应有 `lang=en` 文档)。 | ||
| 34 | + | ||
| 35 | +4. 检查服务版本是否为最新(重启后端): | ||
| 36 | + | ||
| 37 | +```bash | ||
| 38 | +bash scripts/stop.sh | ||
| 39 | +bash scripts/start_backend.sh | ||
| 40 | +``` | ||
| 41 | + | ||
| 42 | +### 已修复的历史问题 | ||
| 43 | + | ||
| 44 | +- **重复传 `size` 导致 ES 查询异常并被吞掉**: | ||
| 45 | + - 症状:日志里出现 `Received multiple values for 'size'` | ||
| 46 | + - 结果:接口返回空 hits(看起来像“无数据”) | ||
| 47 | + - 处理:确保 query body 不再携带 `size`,仅通过 client 参数传 `size` | ||
| 48 | + | ||
| 49 | +## 2. 报错:`tenant_id is required` | ||
| 50 | + | ||
| 51 | +### 现象 | ||
| 52 | + | ||
| 53 | +```json | ||
| 54 | +{ | ||
| 55 | + "error": "tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'", | ||
| 56 | + "status_code": 400 | ||
| 57 | +} | ||
| 58 | +``` | ||
| 59 | + | ||
| 60 | +### 原因 | ||
| 61 | + | ||
| 62 | +- 请求缺少 `X-Tenant-ID`,且 URL 没有 `tenant_id`。 | ||
| 63 | + | ||
| 64 | +### 处理 | ||
| 65 | + | ||
| 66 | +- API 调用至少满足其一: | ||
| 67 | + - Header:`X-Tenant-ID: 171` | ||
| 68 | + - Query:`tenant_id=171` | ||
| 69 | + | ||
| 70 | +示例: | ||
| 71 | + | ||
| 72 | +```bash | ||
| 73 | +curl "http://localhost:6002/search/suggestions?q=shirt&size=5&language=en&with_results=false&tenant_id=171" | ||
| 74 | +``` | ||
| 75 | + | ||
| 76 | +## 3. ES 401:`missing authentication credentials` | ||
| 77 | + | ||
| 78 | +### 现象 | ||
| 79 | + | ||
| 80 | +```json | ||
| 81 | +{ | ||
| 82 | + "type":"security_exception", | ||
| 83 | + "reason":"missing authentication credentials ..." | ||
| 84 | +} | ||
| 85 | +``` | ||
| 86 | + | ||
| 87 | +### 原因 | ||
| 88 | + | ||
| 89 | +- ES 开启了安全认证,curl/脚本未带凭证。 | ||
| 90 | + | ||
| 91 | +### 处理 | ||
| 92 | + | ||
| 93 | +```bash | ||
| 94 | +curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_suggestions_tenant_171/_mapping?pretty" | ||
| 95 | +``` | ||
| 96 | + | ||
| 97 | +或使用 API Key: | ||
| 98 | + | ||
| 99 | +```bash | ||
| 100 | +curl -H "Authorization: ApiKey <base64_key>" "$ES_HOST/search_suggestions_tenant_171/_mapping?pretty" | ||
| 101 | +``` | ||
| 102 | + | ||
| 103 | +## 4. 构建脚本报 `Cannot connect to Elasticsearch` | ||
| 104 | + | ||
| 105 | +### 原因 | ||
| 106 | + | ||
| 107 | +- ES 地址不对,或账号密码未配置,或网络不可达。 | ||
| 108 | + | ||
| 109 | +### 检查 | ||
| 110 | + | ||
| 111 | +```bash | ||
| 112 | +echo "$ES_HOST" | ||
| 113 | +echo "$ES_USERNAME" | ||
| 114 | +curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST" | ||
| 115 | +``` | ||
| 116 | + | ||
| 117 | +## 5. 前端请求未携带租户参数 | ||
| 118 | + | ||
| 119 | +### 现象 | ||
| 120 | + | ||
| 121 | +- Network 中请求 URL 无 `tenant_id` | ||
| 122 | +- Header 里无 `X-Tenant-ID` | ||
| 123 | + | ||
| 124 | +### 处理 | ||
| 125 | + | ||
| 126 | +- 确认前端最新代码已生效(清缓存后强刷)。 | ||
| 127 | +- 前端应同时透传: | ||
| 128 | + - `X-Tenant-ID` | ||
| 129 | + - `tenant_id` query 参数(兜底,避免代理丢 header) | ||
| 130 | + | ||
| 131 | +## 6. 关键 query(如 `shirt`)没有被索引 | ||
| 132 | + | ||
| 133 | +### 检查路径 | ||
| 134 | + | ||
| 135 | +1. MySQL 里确认日志存在并在回溯窗口内: | ||
| 136 | + | ||
| 137 | +```sql | ||
| 138 | +SELECT query, language, create_time | ||
| 139 | +FROM shoplazza_search_log | ||
| 140 | +WHERE tenant_id = 171 | ||
| 141 | + AND query = 'shirt' | ||
| 142 | +ORDER BY create_time DESC | ||
| 143 | +LIMIT 20; | ||
| 144 | +``` | ||
| 145 | + | ||
| 146 | +2. 构建命令是否使用足够大的 `--days`(例如 365)。 | ||
| 147 | +3. 检查 query 是否被清洗规则过滤(空白/符号/过长等)。 | ||
| 148 | + | ||
| 149 | +## 7. `Invalid HTTP request received.` | ||
| 150 | + | ||
| 151 | +### 原因 | ||
| 152 | + | ||
| 153 | +- 6002 端口上跑的进程不是当前 FastAPI 服务,或请求协议与服务不匹配。 | ||
| 154 | + | ||
| 155 | +### 处理 | ||
| 156 | + | ||
| 157 | +```bash | ||
| 158 | +bash scripts/stop.sh | ||
| 159 | +bash scripts/start_backend.sh | ||
| 160 | +curl "http://localhost:6002/health" | ||
| 161 | +``` | ||
| 162 | + | ||
| 163 | +若 `/health` 正常,再测试 `/search/suggestions`。 | ||
| 164 | + |
suggestion/builder.py
| @@ -233,7 +233,7 @@ class SuggestionIndexBuilder: | @@ -233,7 +233,7 @@ class SuggestionIndexBuilder: | ||
| 233 | def rebuild_tenant_index( | 233 | def rebuild_tenant_index( |
| 234 | self, | 234 | self, |
| 235 | tenant_id: str, | 235 | tenant_id: str, |
| 236 | - days: int = 30, | 236 | + days: int = 365, |
| 237 | recreate: bool = True, | 237 | recreate: bool = True, |
| 238 | batch_size: int = 500, | 238 | batch_size: int = 500, |
| 239 | min_query_len: int = 1, | 239 | min_query_len: int = 1, |
suggestion/service.py
| @@ -45,7 +45,6 @@ class SuggestionService: | @@ -45,7 +45,6 @@ class SuggestionService: | ||
| 45 | qanchor_field = f"qanchors.{lang}" | 45 | qanchor_field = f"qanchors.{lang}" |
| 46 | 46 | ||
| 47 | body = { | 47 | body = { |
| 48 | - "size": result_size, | ||
| 49 | "_source": ["spu_id", "title", "min_price", "image_url", "sales", "total_inventory"], | 48 | "_source": ["spu_id", "title", "min_price", "image_url", "sales", "total_inventory"], |
| 50 | "query": { | 49 | "query": { |
| 51 | "bool": { | 50 | "bool": { |
| @@ -99,7 +98,6 @@ class SuggestionService: | @@ -99,7 +98,6 @@ class SuggestionService: | ||
| 99 | 98 | ||
| 100 | sat_field = f"sat.{resolved_lang}" | 99 | sat_field = f"sat.{resolved_lang}" |
| 101 | dsl = { | 100 | dsl = { |
| 102 | - "size": size, | ||
| 103 | "query": { | 101 | "query": { |
| 104 | "function_score": { | 102 | "function_score": { |
| 105 | "query": { | 103 | "query": { |