Commit e8443ea049196ecb9b92ae1ac110710ea05e9203
1 parent
39306492
docs
Showing
3 changed files
with
57 additions
and
7 deletions
Show diff stats
docs/TODO.txt
| 1 | 1 | |
| 2 | 2 | |
| 3 | +增加意图识别模块 | |
| 4 | +是否有: | |
| 5 | +颜色需求 | |
| 6 | +尺码需求 | |
| 7 | + | |
| 8 | +如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。 | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | +2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s] | |
| 13 | +2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms | |
| 14 | +2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条 | |
| 15 | +2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 16 | +Traceback (most recent call last): | |
| 17 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 18 | + response.raise_for_status() | |
| 19 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 20 | + raise HTTPError(http_error_msg, response=self) | |
| 21 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 22 | +2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 23 | +Traceback (most recent call last): | |
| 24 | + File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits | |
| 25 | + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | |
| 26 | + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 27 | + File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode | |
| 28 | + response_data = self._call_service( | |
| 29 | + ^^^^^^^^^^^^^^^^^^^ | |
| 30 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | |
| 31 | + response.raise_for_status() | |
| 32 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | |
| 33 | + raise HTTPError(http_error_msg, response=self) | |
| 34 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 35 | +2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 3 | 39 | |
| 4 | 40 | 先阅读文本embedding相关的代码: |
| 5 | 41 | @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py |
| ... | ... | @@ -246,12 +282,20 @@ config/environments/<env>.yaml |
| 246 | 282 | |
| 247 | 283 | |
| 248 | 284 | |
| 285 | +属性的筛选: | |
| 286 | +训练一个bert/transformer多分类模型,分类: 颜色、尺寸、材质 等等。但是要注意一些属性的值不规范、非常多,要考虑 是不是做规范化,如何规范化。 | |
| 287 | + | |
| 288 | + | |
| 289 | + | |
| 249 | 290 | |
| 250 | 291 | 无结果重查 |
| 251 | 292 | 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) |
| 252 | 293 | |
| 253 | 294 | |
| 254 | 295 | |
| 296 | + | |
| 297 | + | |
| 298 | + | |
| 255 | 299 | 检索相关性优化: |
| 256 | 300 | 原始搜索词和翻译的词,都需要有对应的主干分析 |
| 257 | 301 | 这个主干可以根据词性简单提取名词即可 | ... | ... |
scripts/rebuild_suggestions.sh
| ... | ... | @@ -2,15 +2,16 @@ |
| 2 | 2 | set -euo pipefail |
| 3 | 3 | |
| 4 | 4 | if [ $# -lt 1 ]; then |
| 5 | - echo "Usage: $0 <tenant_id> [sample_query] [sample_language]" | |
| 6 | - echo "Example: $0 162 shi en" | |
| 5 | + echo "Usage: $0 <tenant_id>" | |
| 6 | + echo "Example: $0 162" | |
| 7 | 7 | exit 1 |
| 8 | 8 | fi |
| 9 | 9 | |
| 10 | 10 | ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| 11 | 11 | TENANT_ID="$1" |
| 12 | -SAMPLE_Q="${2:-shi}" | |
| 13 | -SAMPLE_LANG="${3:-en}" | |
| 12 | +# Fixed smoke-test queries and languages (no CLI args). | |
| 13 | +SAMPLE_QUERIES=(s sh dress tshirt) | |
| 14 | +SAMPLE_LANGS=(en zh) | |
| 14 | 15 | API_BASE="${API_BASE_URL:-http://localhost:6002}" |
| 15 | 16 | |
| 16 | 17 | cd "$ROOT_DIR" |
| ... | ... | @@ -76,5 +77,10 @@ curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: ap |
| 76 | 77 | echo |
| 77 | 78 | |
| 78 | 79 | echo "[4/4] API smoke test" |
| 79 | -curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}" | |
| 80 | -echo | |
| 80 | +for lang in "${SAMPLE_LANGS[@]}"; do | |
| 81 | + for q in "${SAMPLE_QUERIES[@]}"; do | |
| 82 | + echo "--- GET /search/suggestions?q=${q}&language=${lang} ---" | |
| 83 | + curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}" | |
| 84 | + echo | |
| 85 | + done | |
| 86 | +done | ... | ... |
suggestion/TROUBLESHOOTING.md
| ... | ... | @@ -131,6 +131,6 @@ curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_products_tenant_162/_search |
| 131 | 131 | 4. 重建后再测 API: |
| 132 | 132 | |
| 133 | 133 | ```bash |
| 134 | -./scripts/rebuild_suggestions.sh 162 F en | |
| 134 | +./scripts/rebuild_suggestions.sh 162 | |
| 135 | 135 | curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" |
| 136 | 136 | ``` | ... | ... |