From e8443ea049196ecb9b92ae1ac110710ea05e9203 Mon Sep 17 00:00:00 2001 From: tangwang Date: Sat, 21 Mar 2026 14:56:12 +0800 Subject: [PATCH] docs --- docs/TODO.txt | 44 ++++++++++++++++++++++++++++++++++++++++++++ scripts/rebuild_suggestions.sh | 18 ++++++++++++------ suggestion/TROUBLESHOOTING.md | 2 +- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/docs/TODO.txt b/docs/TODO.txt index a66f3a5..f31c378 100644 --- a/docs/TODO.txt +++ b/docs/TODO.txt @@ -1,5 +1,41 @@ +增加意图识别模块 +是否有: +颜色需求 +尺码需求 + +如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。 + + + +2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s] +2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms +2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条 +2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 +Traceback (most recent call last): + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service + response.raise_for_status() + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status + raise HTTPError(http_error_msg, response=self) +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 +2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 +Traceback (most recent call last): + File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode + response_data = self._call_service( + ^^^^^^^^^^^^^^^^^^^ + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service + response.raise_for_status() + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status + raise HTTPError(http_error_msg, response=self) +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 +2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1 + + + 先阅读文本embedding相关的代码: @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py @@ -246,12 +282,20 @@ config/environments/.yaml +属性的筛选: +训练一个bert/transformer多分类模型,分类: 颜色、尺寸、材质 等等。但是要注意一些属性的值不规范、非常多,要考虑 是不是做规范化,如何规范化。 + + + 无结果重查 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) + + + 检索相关性优化: 原始搜索词和翻译的词,都需要有对应的主干分析 这个主干可以根据词性简单提取名词即可 diff --git a/scripts/rebuild_suggestions.sh b/scripts/rebuild_suggestions.sh index 3a62827..d909902 100755 --- a/scripts/rebuild_suggestions.sh +++ b/scripts/rebuild_suggestions.sh @@ -2,15 +2,16 @@ set -euo pipefail if [ $# -lt 1 ]; then - echo "Usage: $0 [sample_query] [sample_language]" - echo "Example: $0 162 shi en" + echo "Usage: $0 " + echo "Example: $0 162" exit 1 fi ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" TENANT_ID="$1" -SAMPLE_Q="${2:-shi}" -SAMPLE_LANG="${3:-en}" +# Fixed smoke-test queries and languages (no CLI args). +SAMPLE_QUERIES=(s sh dress tshirt) +SAMPLE_LANGS=(en zh) API_BASE="${API_BASE_URL:-http://localhost:6002}" cd "$ROOT_DIR" @@ -76,5 +77,10 @@ curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: ap echo echo "[4/4] API smoke test" -curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}" -echo +for lang in "${SAMPLE_LANGS[@]}"; do + for q in "${SAMPLE_QUERIES[@]}"; do + echo "--- GET /search/suggestions?q=${q}&language=${lang} ---" + curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}" + echo + done +done diff --git a/suggestion/TROUBLESHOOTING.md b/suggestion/TROUBLESHOOTING.md index 577aa54..6079ae4 100644 --- a/suggestion/TROUBLESHOOTING.md +++ b/suggestion/TROUBLESHOOTING.md @@ -131,6 +131,6 @@ curl -u "$ES_USERNAME:$ES_PASSWORD" "$ES_HOST/search_products_tenant_162/_search 4. 重建后再测 API: ```bash -./scripts/rebuild_suggestions.sh 162 F en +./scripts/rebuild_suggestions.sh 162 curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" ``` -- libgit2 0.21.2