Commit 351a7eb583e617e62f26ed8e4b309b6073f19b61

Authored by tangwang
1 parent 345d960b

1. 新的重建索引脚本

新增:scripts/recreate_index.py
功能:初始化 indexer 的 ES/DB 服务,然后调用 BulkIndexingService.bulk_index(…, recreate_index=True) 为指定 tenant_id 做「删除并重建索引 + 全量导入」。
用法示例:
cd /home/tw/SearchEngine# 使用默认 batch_size=500python scripts/recreate_index.py 162# 指定 batch_sizepython scripts/recreate_index.py 162 --batch-size 1000
脚本依赖和 Indexer API 一样的环境变量:DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD、ES_HOST/ES_USERNAME/ES_PASSWORD。
2. 清理与引用更新
原来的 scripts/recreate_index.sh 已经删除。
api/routes/indexer.py 里的说明改成引用 scripts/recreate_index.py。
docs/搜索API对接指南.md 中的提示也从 .sh 改为:
> python scripts/recreate_index.py <tenant_id> [--batch-size 500]
api/routes/indexer.py
... ... @@ -21,7 +21,6 @@ router = APIRouter(prefix=&quot;/indexer&quot;, tags=[&quot;indexer&quot;])
21 21 class ReindexRequest(BaseModel):
22 22 """全量重建索引请求"""
23 23 tenant_id: str
24   - recreate_index: bool = False
25 24 batch_size: int = 500
26 25  
27 26  
... ... @@ -43,7 +42,8 @@ async def reindex_all(request: ReindexRequest):
43 42 """
44 43 全量重建索引接口
45 44  
46   - 将指定租户的所有SPU数据重新索引到ES。支持删除旧索引并重建。
  45 + 将指定租户的所有SPU数据重新索引到ES。
  46 + 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引(删除后重建),请在服务器上执行 scripts/recreate_index.py 脚本。
47 47 """
48 48 try:
49 49 service = get_bulk_indexing_service()
... ... @@ -51,7 +51,7 @@ async def reindex_all(request: ReindexRequest):
51 51 raise HTTPException(status_code=503, detail="Bulk indexing service is not initialized")
52 52 return service.bulk_index(
53 53 tenant_id=request.tenant_id,
54   - recreate_index=request.recreate_index,
  54 + recreate_index=False,
55 55 batch_size=request.batch_size
56 56 )
57 57 except HTTPException:
... ...
config/config.yaml
... ... @@ -169,8 +169,8 @@ tenant_config:
169 169 translate_to_zh: false
170 170 "162":
171 171 primary_language: "zh"
172   - translate_to_en: false
173   - translate_to_zh: false
  172 + translate_to_en: true
  173 + translate_to_zh: true
174 174 "170":
175 175 primary_language: "en"
176 176 translate_to_en: true
... ...
docs/搜索API对接指南.md
... ... @@ -795,17 +795,16 @@ curl &quot;http://localhost:6002/search/12345&quot;
795 795  
796 796 ## 索引接口
797 797  
798   -### 5.1 全量重建索引接口
  798 +### 5.1 全量索引接口
799 799  
800 800 - **端点**: `POST /indexer/reindex`
801   -- **描述**: 全量重建索引,将指定租户的所有SPU数据导入到ES索引
  801 +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引)
802 802  
803 803 #### 请求参数
804 804  
805 805 ```json
806 806 {
807 807 "tenant_id": "162",
808   - "recreate_index": false,
809 808 "batch_size": 500
810 809 }
811 810 ```
... ... @@ -813,7 +812,6 @@ curl &quot;http://localhost:6002/search/12345&quot;
813 812 | 参数 | 类型 | 必填 | 默认值 | 说明 |
814 813 |------|------|------|--------|------|
815 814 | `tenant_id` | string | Y | - | 租户ID |
816   -| `recreate_index` | boolean | N | false | 是否重建索引(删除旧索引后创建新索引) |
817 815 | `batch_size` | integer | N | 500 | 批量导入大小 |
818 816  
819 817 #### 响应格式
... ... @@ -837,13 +835,12 @@ curl &quot;http://localhost:6002/search/12345&quot;
837 835  
838 836 #### 请求示例
839 837  
840   -**首次索引(重建索引)**:
  838 +**全量索引(不会删除现有索引)**:
841 839 ```bash
842 840 curl -X POST "http://localhost:6004/indexer/reindex" \
843 841 -H "Content-Type: application/json" \
844 842 -d '{
845 843 "tenant_id": "162",
846   - "recreate_index": true,
847 844 "batch_size": 500
848 845 }'
849 846 ```
... ... @@ -857,16 +854,7 @@ tail -f logs/api.log
857 854 tail -f logs/*.log
858 855 ```
859 856  
860   -**增量更新(不重建索引)**:
861   -```bash
862   -curl -X POST "http://localhost:6004/indexer/reindex" \
863   - -H "Content-Type: application/json" \
864   - -d '{
865   - "tenant_id": "162",
866   - "recreate_index": false,
867   - "batch_size": 500
868   - }'
869   -```
  857 +> 如需 **重建索引(会删除并重建整份 ES 索引结构)**,在服务器上执行内部脚本:`python scripts/recreate_index.py`。重建后需要按租户调用 `/indexer/reindex` 重新导入各租户数据。
870 858  
871 859 **查看索引日志**:
872 860  
... ...
scripts/recreate_index.py 0 → 100644
... ... @@ -0,0 +1,102 @@
  1 +#!/usr/bin/env python3
  2 +"""重建 ES 索引(仅索引结构,不导入数据)。
  3 +
  4 +- 删除并重建索引(基于 mappings/search_products.json)
  5 +- 依赖环境变量中的 ES 配置:
  6 + - ES_HOST(默认: http://localhost:9200)
  7 + - ES_USERNAME(可选)
  8 + - ES_PASSWORD(可选)
  9 +
  10 +用法:
  11 + python scripts/recreate_index.py
  12 +"""
  13 +
  14 +import os
  15 +import sys
  16 +from pathlib import Path
  17 +
  18 +# 将项目根目录加入 sys.path
  19 +PROJECT_ROOT = Path(__file__).resolve().parent.parent
  20 +sys.path.insert(0, str(PROJECT_ROOT))
  21 +
  22 +from utils.es_client import ESClient # type: ignore
  23 +from indexer.mapping_generator import ( # type: ignore
  24 + load_mapping,
  25 + delete_index_if_exists,
  26 + DEFAULT_INDEX_NAME,
  27 +)
  28 +
  29 +
  30 +def main() -> int:
  31 + print("=" * 60)
  32 + print("Recreate Elasticsearch index (structure only, no data import)")
  33 + print("=" * 60)
  34 +
  35 + # 1. 连接 Elasticsearch
  36 + es_host = os.environ.get("ES_HOST", "http://localhost:9200")
  37 + es_username = os.environ.get("ES_USERNAME")
  38 + es_password = os.environ.get("ES_PASSWORD")
  39 +
  40 + print(f"ES host: {es_host}")
  41 + if es_username:
  42 + print(f"ES username: {es_username}")
  43 +
  44 + try:
  45 + if es_username and es_password:
  46 + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
  47 + else:
  48 + es_client = ESClient(hosts=[es_host])
  49 +
  50 + if not es_client.ping():
  51 + print(f"[ERROR] Cannot connect to Elasticsearch at {es_host}")
  52 + return 1
  53 + except Exception as e:
  54 + print(f"[ERROR] Failed to connect to Elasticsearch: {e}")
  55 + return 1
  56 +
  57 + index_name = DEFAULT_INDEX_NAME
  58 + print(f"Index name: {index_name}")
  59 +
  60 + # 2. 加载 mapping
  61 + try:
  62 + mapping = load_mapping()
  63 + print("Loaded mapping configuration.")
  64 + except Exception as e:
  65 + print(f"[ERROR] Failed to load mapping: {e}")
  66 + return 1
  67 +
  68 + # 3. 删除旧索引(如果存在)
  69 + print(f"Deleting existing index if exists: {index_name} ...")
  70 + try:
  71 + if es_client.index_exists(index_name):
  72 + if delete_index_if_exists(es_client, index_name):
  73 + print(f"✓ Deleted index: {index_name}")
  74 + else:
  75 + print(f"[ERROR] Failed to delete index: {index_name}")
  76 + return 1
  77 + else:
  78 + print(f"Index does not exist, skip delete: {index_name}")
  79 + except Exception as e:
  80 + print(f"[ERROR] Error while deleting index: {e}")
  81 + return 1
  82 +
  83 + # 4. 创建新索引
  84 + print(f"Creating index: {index_name} ...")
  85 + try:
  86 + if es_client.create_index(index_name, mapping):
  87 + print(f"✓ Created index: {index_name}")
  88 + else:
  89 + print(f"[ERROR] Failed to create index: {index_name}")
  90 + return 1
  91 + except Exception as e:
  92 + print(f"[ERROR] Error while creating index: {e}")
  93 + return 1
  94 +
  95 + print("=" * 60)
  96 + print("Index recreation completed. Please trigger /indexer/reindex per tenant to re-import data.")
  97 + print("=" * 60)
  98 + return 0
  99 +
  100 +
  101 +if __name__ == "__main__":
  102 + raise SystemExit(main())
... ...