diff --git a/docs/Usage-Guide.md b/docs/Usage-Guide.md
index 9aab5d5..a4a8a1b 100644
--- a/docs/Usage-Guide.md
+++ b/docs/Usage-Guide.md
@@ -343,6 +343,49 @@ curl -X POST http://localhost:6002/search/image \
---
+## 8. Suggestion 索引与接口使用
+
+### 8.1 构建 Suggestion 索引(全量)
+
+Suggestion 索引会从:
+
+- ES 商品索引:`title.{lang}`, `qanchors.{lang}`
+- MySQL 日志表:`shoplazza_search_log.query`(含 `language`、`request_params`)
+
+聚合生成 `search_suggestions_tenant_{tenant_id}`。
+
+在项目根目录执行:
+
+```bash
+# 为指定租户全量重建 suggestion 索引(会删除旧索引)
+python main.py build-suggestions \
+ --tenant-id 162 \
+ --es-host http://localhost:9200 \
+ --days 30 \
+ --recreate
+```
+
+可选参数:
+
+- `--days`:回溯日志天数(默认 30)
+- `--batch-size`:扫描商品索引的批大小(默认 500)
+- `--min-query-len`:参与 suggestion 的最小查询长度(默认 1)
+
+> 建议在商品索引构建完成、日志正常写入一段时间后执行一次全量构建,然后按天/小时增加增量构建任务。
+
+### 8.2 调用 Suggestion 接口
+
+全量构建完成后,可直接通过 `/search/suggestions` 获取自动补全结果:
+
+```bash
+curl "http://localhost:6002/search/suggestions?q=iph&size=5&language=en&with_results=true" \
+ -H "X-Tenant-ID: 162"
+```
+
+接口返回结构详见 `docs/搜索API对接指南.md` 的“3.7 搜索建议接口”章节。
+
+---
+
## 常见问题
### Q1: MySQL连接失败
diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md
index 6d9553d..2f4ed28 100644
--- a/docs/搜索API对接指南.md
+++ b/docs/搜索API对接指南.md
@@ -558,40 +558,59 @@ response = requests.post(url, headers=headers, json={"query": "芭比娃娃"})
### 3.7 搜索建议接口
-> ⚠️ **TODO**: 此接口当前为框架实现,功能暂未实现,仅返回空结果。接口和响应格式已经固定,可平滑扩展。
-
- **端点**: `GET /search/suggestions`
-- **描述**: 返回搜索建议(自动补全/热词)。当前为框架实现,接口和响应格式已经固定,可平滑扩展。
+- **描述**: 返回搜索建议(自动补全/热词),支持多语言与“结果直达”(每条 suggestion 附带商品列表)。
#### 查询参数
| 参数 | 类型 | 必填 | 默认值 | 描述 |
|------|------|------|--------|------|
| `q` | string | Y | - | 查询字符串(至少 1 个字符) |
-| `size` | integer | N | 5 | 返回建议数量(1-20) |
-| `types` | string | N | `query` | 建议类型(逗号分隔):`query`, `product`, `category`, `brand` |
+| `size` | integer | N | 10 | 返回建议数量(1-20) |
+| `language` | string | N | `en` | 请求语言,如 `zh` / `en` / `ar` / `ru`,用于路由到对应语种 suggestion 索引 |
+| `with_results` | bool | N | `true` | 是否为每条 suggestion 返回商品列表(结果直达) |
+| `result_size` | integer | N | 3 | 每条 suggestion 返回的商品数量(1-10) |
+| `debug` | bool | N | `false` | 是否开启调试(目前主要用于排查 suggestion 排序与语言解析) |
+
+> **租户标识**:同 [3.1](#31-接口信息),通过请求头 `X-Tenant-ID` 或 query 参数 `tenant_id` 传递。
#### 响应示例
```json
{
- "query": "芭",
+ "query": "iph",
+ "language": "en",
+ "resolved_language": "en",
"suggestions": [
{
- "text": "芭比娃娃",
- "type": "query",
- "highlight": "芭比娃娃",
- "popularity": 850
+ "text": "iphone 15",
+ "lang": "en",
+ "score": 12.37,
+ "rank_score": 5.1,
+ "sources": ["query_log", "qanchor"],
+ "lang_source": "log_field",
+ "lang_confidence": 1.0,
+ "lang_conflict": false,
+ "products": [
+ {
+ "spu_id": "12345",
+ "title": "iPhone 15 Pro Max",
+ "price": 999.0,
+ "image_url": "https://example.com/image.jpg",
+ "score": 3.21
+ }
+ ]
}
],
- "took_ms": 5
+ "took_ms": 12
}
```
#### 请求示例
```bash
-curl "http://localhost:6002/search/suggestions?q=芭&size=5&types=query,product"
+curl "http://localhost:6002/search/suggestions?q=芭&size=5&language=zh&with_results=true" \
+ -H "X-Tenant-ID: 162"
```
### 3.8 即时搜索接口
diff --git a/docs/搜索API速查表.md b/docs/搜索API速查表.md
index e485230..fd6f4fe 100644
--- a/docs/搜索API速查表.md
+++ b/docs/搜索API速查表.md
@@ -294,7 +294,7 @@ POST /search/image
"size": 20
}
-GET /search/suggestions?q=芭&size=5
+GET /search/suggestions?q=芭&size=5&language=zh&with_results=true
GET /search/instant?q=玩具&size=5
diff --git a/main.py b/main.py
index 4c0ae91..9e49725 100755
--- a/main.py
+++ b/main.py
@@ -12,13 +12,12 @@ import sys
import os
import argparse
import json
-import uvicorn
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from config import ConfigLoader
-from config.env_config import DB_CONFIG
+from config.env_config import ES_CONFIG
from utils import ESClient
from search import Searcher
from suggestion import SuggestionIndexBuilder
@@ -27,6 +26,7 @@ from utils.db_connector import create_db_connection
def cmd_serve(args):
"""Start API service."""
+ import uvicorn
os.environ['ES_HOST'] = args.es_host
print("Starting API service (multi-tenant)...")
@@ -43,6 +43,7 @@ def cmd_serve(args):
def cmd_serve_indexer(args):
"""Start dedicated Indexer API service (no search endpoints)."""
+ import uvicorn
os.environ['ES_HOST'] = args.es_host
print("Starting Indexer API service...")
@@ -102,17 +103,33 @@ def cmd_search(args):
def cmd_build_suggestions(args):
"""Build suggestion index for a tenant."""
- es_client = ESClient(hosts=[args.es_host])
+ # Initialize ES client with optional authentication
+ es_username = os.getenv("ES_USERNAME") or ES_CONFIG.get("username")
+ es_password = os.getenv("ES_PASSWORD") or ES_CONFIG.get("password")
+ if es_username and es_password:
+ es_client = ESClient(hosts=[args.es_host], username=es_username, password=es_password)
+ else:
+ es_client = ESClient(hosts=[args.es_host])
if not es_client.ping():
print(f"ERROR: Cannot connect to Elasticsearch at {args.es_host}")
return 1
+ # Build DB config directly from environment to avoid dotenv dependency
+ db_host = os.getenv("DB_HOST")
+ db_port = int(os.getenv("DB_PORT", "3306"))
+ db_name = os.getenv("DB_DATABASE")
+ db_user = os.getenv("DB_USERNAME")
+ db_pass = os.getenv("DB_PASSWORD")
+ if not all([db_host, db_name, db_user, db_pass]):
+ print("ERROR: DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD must be set in environment")
+ return 1
+
db_engine = create_db_connection(
- host=DB_CONFIG["host"],
- port=DB_CONFIG["port"],
- database=DB_CONFIG["database"],
- username=DB_CONFIG["username"],
- password=DB_CONFIG["password"],
+ host=db_host,
+ port=db_port,
+ database=db_name,
+ username=db_user,
+ password=db_pass,
)
builder = SuggestionIndexBuilder(es_client=es_client, db_engine=db_engine)
result = builder.rebuild_tenant_index(
diff --git a/scripts/build_suggestions.sh b/scripts/build_suggestions.sh
new file mode 100755
index 0000000..a0095e4
--- /dev/null
+++ b/scripts/build_suggestions.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+#
+# Convenience script to rebuild suggestion index for a tenant.
+#
+# Usage:
+# ./scripts/build_suggestions.sh [--days 30] [--batch-size 500] [--min-query-len 1] [--es-host http://localhost:9200]
+#
+
+set -euo pipefail
+
+if [ $# -lt 1 ]; then
+ echo "Usage: $0 [extra args...]"
+ echo "Example: $0 162 --days 30 --recreate"
+ exit 1
+fi
+
+TENANT_ID="$1"
+shift || true
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+cd "$ROOT_DIR"
+
+python main.py build-suggestions \
+ --tenant-id "$TENANT_ID" \
+ "$@"
+
diff --git a/tests/test_suggestions.py b/tests/test_suggestions.py
new file mode 100644
index 0000000..ece27a6
--- /dev/null
+++ b/tests/test_suggestions.py
@@ -0,0 +1,183 @@
+import json
+from typing import Any, Dict, List
+
+import pytest
+
+from suggestion.builder import SuggestionIndexBuilder
+from suggestion.service import SuggestionService
+
+
+class FakeESClient:
+ """Minimal fake ES client for SuggestionService tests."""
+
+ def __init__(self) -> None:
+ self.calls: List[Dict[str, Any]] = []
+
+ def search(self, index_name: str, body: Dict[str, Any], size: int = 10, from_: int = 0) -> Dict[str, Any]:
+ self.calls.append({"index": index_name, "body": body, "size": size, "from": from_})
+ # Suggestion index
+ if "search_suggestions_tenant_" in index_name:
+ return {
+ "hits": {
+ "total": {"value": 1},
+ "max_score": 3.2,
+ "hits": [
+ {
+ "_id": "1",
+ "_score": 3.2,
+ "_source": {
+ "text": "iphone 15",
+ "lang": "en",
+ "rank_score": 5.0,
+ "sources": ["query_log", "qanchor"],
+ "lang_source": "log_field",
+ "lang_confidence": 1.0,
+ "lang_conflict": False,
+ "top_spu_ids": ["12345"],
+ },
+ }
+ ],
+ }
+ }
+ # Product index
+ if "search_products_tenant_" in index_name:
+ return {
+ "hits": {
+ "total": {"value": 1},
+ "max_score": 2.5,
+ "hits": [
+ {
+ "_id": "12345",
+ "_score": 2.5,
+ "_source": {
+ "spu_id": "12345",
+ "title": {"en": "iPhone 15 Pro Max"},
+ "min_price": 999.0,
+ "image_url": "https://example.com/image.jpg",
+ "sales": 100,
+ "total_inventory": 50,
+ },
+ }
+ ],
+ }
+ }
+ return {"hits": {"total": {"value": 0}, "max_score": 0.0, "hits": []}}
+
+ # For builder.bulk_index usage compatibility in full runs (not used in these unit tests)
+ def bulk_index(self, index_name: str, docs: List[Dict[str, Any]]) -> Dict[str, Any]:
+ self.calls.append({"index": index_name, "bulk": True, "docs": docs})
+ return {"success": len(docs), "failed": 0, "errors": []}
+
+ def index_exists(self, index_name: str) -> bool:
+ return False
+
+ def delete_index(self, index_name: str) -> bool:
+ return True
+
+ def create_index(self, index_name: str, body: Dict[str, Any]) -> bool:
+ self.calls.append({"index": index_name, "create": True, "body": body})
+ return True
+
+ def refresh(self, index_name: str) -> bool:
+ return True
+
+
+@pytest.mark.unit
+def test_resolve_query_language_prefers_log_field(monkeypatch):
+ """builder.resolve_query_language 应优先使用日志 language 字段。"""
+ fake_es = FakeESClient()
+ builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None)
+ # index_languages 里包含 en/zh,primary 设为 zh
+ lang, conf, source, conflict = builder._resolve_query_language(
+ query="iphone 15",
+ log_language="en",
+ request_params=None,
+ index_languages=["zh", "en"],
+ primary_language="zh",
+ )
+ assert lang == "en"
+ assert conf == 1.0
+ assert source == "log_field"
+ assert conflict is False
+
+
+@pytest.mark.unit
+def test_resolve_query_language_uses_request_params_when_log_missing():
+ """当日志 language 为空时,应从 request_params.language 解析。"""
+ fake_es = FakeESClient()
+ builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None)
+ request_params = json.dumps({"language": "zh"})
+ lang, conf, source, conflict = builder._resolve_query_language(
+ query="芭比娃娃",
+ log_language=None,
+ request_params=request_params,
+ index_languages=["zh", "en"],
+ primary_language="en",
+ )
+ assert lang == "zh"
+ assert conf == 1.0
+ assert source == "request_params"
+ assert conflict is False
+
+
+@pytest.mark.unit
+def test_resolve_query_language_fallback_to_primary():
+ """当无任何语言线索时,应回落到租户 primary_language。"""
+ fake_es = FakeESClient()
+ builder = SuggestionIndexBuilder(es_client=fake_es, db_engine=None)
+ lang, conf, source, conflict = builder._resolve_query_language(
+ query="some text",
+ log_language=None,
+ request_params=None,
+ index_languages=["zh", "en"],
+ primary_language="zh",
+ )
+ assert lang == "zh"
+ assert source == "default"
+ assert conflict is False
+
+
+@pytest.mark.unit
+def test_suggestion_service_basic_flow(monkeypatch):
+ """
+ SuggestionService.search 应正确调用 ES 并返回 suggestion + products。
+ 使用 FakeESClient 避免真实 ES 依赖。
+ """
+ # 覆盖 tenant_config_loader 以避免依赖外部 config.yaml 改动
+ from config import tenant_config_loader as tcl
+
+ loader = tcl.get_tenant_config_loader()
+ # 强制覆盖内部缓存配置
+ loader._config = {
+ "default": {"primary_language": "en", "index_languages": ["en", "zh"]},
+ "tenants": {
+ "1": {"primary_language": "en", "index_languages": ["en", "zh"]},
+ },
+ }
+
+ fake_es = FakeESClient()
+ service = SuggestionService(es_client=fake_es)
+ result = service.search(
+ tenant_id="1",
+ query="iph",
+ language="en",
+ size=5,
+ with_results=True,
+ result_size=2,
+ )
+
+ assert result["resolved_language"] == "en"
+ assert result["query"] == "iph"
+ assert result["took_ms"] >= 0
+ suggestions = result["suggestions"]
+ assert len(suggestions) == 1
+ s0 = suggestions[0]
+ assert s0["text"] == "iphone 15"
+ assert s0["lang"] == "en"
+ assert isinstance(s0.get("products"), list)
+ assert len(s0["products"]) >= 1
+ p0 = s0["products"][0]
+ assert p0["spu_id"] == "12345"
+ assert "title" in p0
+ assert "price" in p0
+
--
libgit2 0.21.2