Commit e8443ea049196ecb9b92ae1ac110710ea05e9203

Authored by tangwang
1 parent 39306492

docs

1 1
2 2
  3 +增加意图识别模块
  4 +是否有:
  5 +颜色需求
  6 +尺码需求
  7 +
  8 +如果有: 先做sku筛选,然后把最优的拼接到名称中,参与reranker。
  9 +
  10 +
  11 +
  12 +2026-03-21 10:29:23,698 - elastic_transport.transport - INFO - POST http://localhost:9200/search_products_tenant_163/_search?include_named_queries_score=false [status:200 duration:0.009s]
  13 +2026-03-21 10:29:23,700 - request_context - INFO - 分页详情回填 | ids=20 | filled=20 | took=7ms
  14 +2026-03-21 10:29:23,700 - request_context - INFO - 重排分页切片 | from=20, size=20, 返回=20条
  15 +2026-03-21 10:29:23,720 - embeddings.text_encoder - ERROR - TextEmbeddingEncoder service request failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1
  16 +Traceback (most recent call last):
  17 + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service
  18 + response.raise_for_status()
  19 + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status
  20 + raise HTTPError(http_error_msg, response=self)
  21 +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1
  22 +2026-03-21 10:29:23,720 - search.searcher - WARNING - Failed to encode SKU option1 values for final-page sorting: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1
  23 +Traceback (most recent call last):
  24 + File "/data/saas-search/search/searcher.py", line 448, in _apply_sku_sorting_for_page_hits
  25 + encoded_option_vectors = text_encoder.encode(option1_values_to_encode, priority=1)
  26 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  27 + File "/data/saas-search/embeddings/text_encoder.py", line 112, in encode
  28 + response_data = self._call_service(
  29 + ^^^^^^^^^^^^^^^^^^^
  30 + File "/data/saas-search/embeddings/text_encoder.py", line 63, in _call_service
  31 + response.raise_for_status()
  32 + File "/data/saas-search/.venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status
  33 + raise HTTPError(http_error_msg, response=self)
  34 +requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1
  35 +2026-03-21 10:29:23,721 - request_context - WARNING - SKU option embedding failed: 502 Server Error: Bad Gateway for url: http://127.0.0.1:6005/embed/text?normalize=true&priority=1
  36 +
  37 +
  38 +
3 39
4 先阅读文本embedding相关的代码: 40 先阅读文本embedding相关的代码:
5 @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py 41 @embeddings/README.md @embeddings/server.py @docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md @embeddings/text_encoder.py
@@ -246,12 +282,20 @@ config/environments/<env>.yaml @@ -246,12 +282,20 @@ config/environments/<env>.yaml
246 282
247 283
248 284
  285 +属性的筛选:
  286 +训练一个bert/transformer多分类模型,分类: 颜色、尺寸、材质 等等。但是要注意一些属性的值不规范、非常多,要考虑 是不是做规范化,如何规范化。
  287 +
  288 +
  289 +
249 290
250 无结果重查 291 无结果重查
251 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长) 292 稀有语言,翻译可能超时(因为zh-en互译之外的翻译耗时更长)
252 293
253 294
254 295
  296 +
  297 +
  298 +
255 检索相关性优化: 299 检索相关性优化:
256 原始搜索词和翻译的词,都需要有对应的主干分析 300 原始搜索词和翻译的词,都需要有对应的主干分析
257 这个主干可以根据词性简单提取名词即可 301 这个主干可以根据词性简单提取名词即可
scripts/rebuild_suggestions.sh
@@ -2,15 +2,16 @@ @@ -2,15 +2,16 @@
2 set -euo pipefail 2 set -euo pipefail
3 3
4 if [ $# -lt 1 ]; then 4 if [ $# -lt 1 ]; then
5 - echo "Usage: $0 <tenant_id> [sample_query] [sample_language]"  
6 - echo "Example: $0 162 shi en" 5 + echo "Usage: $0 <tenant_id>"
  6 + echo "Example: $0 162"
7 exit 1 7 exit 1
8 fi 8 fi
9 9
10 ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" 10 ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
11 TENANT_ID="$1" 11 TENANT_ID="$1"
12 -SAMPLE_Q="${2:-shi}"  
13 -SAMPLE_LANG="${3:-en}" 12 +# Fixed smoke-test queries and languages (no CLI args).
  13 +SAMPLE_QUERIES=(s sh dress tshirt)
  14 +SAMPLE_LANGS=(en zh)
14 API_BASE="${API_BASE_URL:-http://localhost:6002}" 15 API_BASE="${API_BASE_URL:-http://localhost:6002}"
15 16
16 cd "$ROOT_DIR" 17 cd "$ROOT_DIR"
@@ -76,5 +77,10 @@ curl -sS &quot;${AUTH[@]}&quot; &quot;$ES_HOST/$ALIAS_NAME/_search?pretty&quot; -H &#39;Content-Type: ap @@ -76,5 +77,10 @@ curl -sS &quot;${AUTH[@]}&quot; &quot;$ES_HOST/$ALIAS_NAME/_search?pretty&quot; -H &#39;Content-Type: ap
76 echo 77 echo
77 78
78 echo "[4/4] API smoke test" 79 echo "[4/4] API smoke test"
79 -curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}"  
80 -echo 80 +for lang in "${SAMPLE_LANGS[@]}"; do
  81 + for q in "${SAMPLE_QUERIES[@]}"; do
  82 + echo "--- GET /search/suggestions?q=${q}&language=${lang} ---"
  83 + curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}"
  84 + echo
  85 + done
  86 +done
suggestion/TROUBLESHOOTING.md
@@ -131,6 +131,6 @@ curl -u &quot;$ES_USERNAME:$ES_PASSWORD&quot; &quot;$ES_HOST/search_products_tenant_162/_search @@ -131,6 +131,6 @@ curl -u &quot;$ES_USERNAME:$ES_PASSWORD&quot; &quot;$ES_HOST/search_products_tenant_162/_search
131 4. 重建后再测 API: 131 4. 重建后再测 API:
132 132
133 ```bash 133 ```bash
134 -./scripts/rebuild_suggestions.sh 162 F en 134 +./scripts/rebuild_suggestions.sh 162
135 curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162" 135 curl "http://localhost:6002/search/suggestions?q=F&size=40&language=en&tenant_id=162"
136 ``` 136 ```