Commit e8443ea049196ecb9b92ae1ac110710ea05e9203
1 parent
39306492
docs
Showing
3 changed files
with
57 additions
and
7 deletions
Show diff stats
docs/TODO.txt
| 1 | 1 | ||
| 2 | 2 | ||
| 3 | +增加意图识别模块 | ||
| 4 | +是否有: | ||
| 5 | +颜色需求 | ||
| 6 | +尺码需求 | ||
| 7 | + | ||
| 8 | +如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。 | ||
| 9 | + | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s] | ||
| 13 | +2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms | ||
| 14 | +2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条 | ||
| 15 | +2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | ||
| 16 | +Traceback (most recent call last): | ||
| 17 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | ||
| 18 | + response.raise_for_status() | ||
| 19 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | ||
| 20 | + raise HTTPError(http_error_msg, response=self) | ||
| 21 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | ||
| 22 | +2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | ||
| 23 | +Traceback (most recent call last): | ||
| 24 | + File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits | ||
| 25 | + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) | ||
| 26 | + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
| 27 | + File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode | ||
| 28 | + response_data = self._call_service( | ||
| 29 | + ^^^^^^^^^^^^^^^^^^^ | ||
| 30 | + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service | ||
| 31 | + response.raise_for_status() | ||
| 32 | + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status | ||
| 33 | + raise HTTPError(http_error_msg, response=self) | ||
| 34 | +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | ||
| 35 | +2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 | ||
| 36 | + | ||
| 37 | + | ||
| 38 | + | ||
| 3 | 39 | ||
| 4 | 先阅读文本embedding相关的代码: | 40 | 先阅读文本embedding相关的代码: |
| 5 | @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py | 41 | @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py |
| @@ -246,12 +282,20 @@ config/environments/<env>.yaml | @@ -246,12 +282,20 @@ config/environments/<env>.yaml | ||
| 246 | 282 | ||
| 247 | 283 | ||
| 248 | 284 | ||
| 285 | +属性的筛选: | ||
| 286 | +训练一个bert/transformer多分类模型,分类: 颜色、尺寸、材质 等等。但是要注意一些属性的值不规范、非常多,要考虑 是不是做规范化,如何规范化。 | ||
| 287 | + | ||
| 288 | + | ||
| 289 | + | ||
| 249 | 290 | ||
| 250 | 无结果重查 | 291 | 无结果重查 |
| 251 | 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) | 292 | 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) |
| 252 | 293 | ||
| 253 | 294 | ||
| 254 | 295 | ||
| 296 | + | ||
| 297 | + | ||
| 298 | + | ||
| 255 | 检索相关性优化: | 299 | 检索相关性优化: |
| 256 | 原始搜索词和翻译的词,都需要有对应的主干分析 | 300 | 原始搜索词和翻译的词,都需要有对应的主干分析 |
| 257 | 这个主干可以根据词性简单提取名词即可 | 301 | 这个主干可以根据词性简单提取名词即可 |
scripts/rebuild_suggestions.sh
| @@ -2,15 +2,16 @@ | @@ -2,15 +2,16 @@ | ||
| 2 | set -euo pipefail | 2 | set -euo pipefail |
| 3 | 3 | ||
| 4 | if [ $# -lt 1 ]; then | 4 | if [ $# -lt 1 ]; then |
| 5 | - echo "Usage: $0 <tenant_id> [sample_query] [sample_language]" | ||
| 6 | - echo "Example: $0 162 shi en" | 5 | + echo "Usage: $0 <tenant_id>" |
| 6 | + echo "Example: $0 162" | ||
| 7 | exit 1 | 7 | exit 1 |
| 8 | fi | 8 | fi |
| 9 | 9 | ||
| 10 | ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | 10 | ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| 11 | TENANT_ID="$1" | 11 | TENANT_ID="$1" |
| 12 | -SAMPLE_Q="${2:-shi}" | ||
| 13 | -SAMPLE_LANG="${3:-en}" | 12 | +# Fixed smoke-test queries and languages (no CLI args). |
| 13 | +SAMPLE_QUERIES=(s sh dress tshirt) | ||
| 14 | +SAMPLE_LANGS=(en zh) | ||
| 14 | API_BASE="${API_BASE_URL:-http://localhost:6002}" | 15 | API_BASE="${API_BASE_URL:-http://localhost:6002}" |
| 15 | 16 | ||
| 16 | cd "$ROOT_DIR" | 17 | cd "$ROOT_DIR" |
| @@ -76,5 +77,10 @@ curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: ap | @@ -76,5 +77,10 @@ curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: ap | ||
| 76 | echo | 77 | echo |
| 77 | 78 | ||
| 78 | echo "[4/4] API smoke test" | 79 | echo "[4/4] API smoke test" |
| 79 | -curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}" | ||
| 80 | -echo | 80 | +for lang in "${SAMPLE_LANGS[@]}"; do |
| 81 | + for q in "${SAMPLE_QUERIES[@]}"; do | ||
| 82 | + echo "--- GET /search/suggestions?q=${q}&language=${lang} ---" | ||
| 83 | + curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}" | ||
| 84 | + echo | ||
| 85 | + done | ||
| 86 | +done |
suggestion/TROUBLESHOOTING.md
| @@ -131,6 +131,6 @@ curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_products_tenant_162/_search | @@ -131,6 +131,6 @@ curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_products_tenant_162/_search | ||
| 131 | 4. 重建后再测 API: | 131 | 4. 重建后再测 API: |
| 132 | 132 | ||
| 133 | ```bash | 133 | ```bash |
| 134 | -./scripts/rebuild_suggestions.sh 162 F en | 134 | +./scripts/rebuild_suggestions.sh 162 |
| 135 | curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" | 135 | curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" |
| 136 | ``` | 136 | ``` |