From 351a7eb583e617e62f26ed8e4b309b6073f19b61 Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 18 Dec 2025 20:28:23 +0800 Subject: [PATCH] 1. 新的重建索引脚本 新增:scripts/recreate_index.py 功能:初始化 indexer 的 ES/DB 服务,然后调用 BulkIndexingService.bulk_index(…, recreate_index=True) 为指定 tenant_id 做「删除并重建索引 + 全量导入」。 用法示例: cd /home/tw/SearchEngine# 使用默认 batch_size=500python scripts/recreate_index.py 162# 指定 batch_sizepython scripts/recreate_index.py 162 --batch-size 1000 脚本依赖和 Indexer API 一样的环境变量:DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD、ES_HOST/ES_USERNAME/ES_PASSWORD。 2. 清理与引用更新 原来的 scripts/recreate_index.sh 已经删除。 api/routes/indexer.py 里的说明改成引用 scripts/recreate_index.py。 docs/搜索API对接指南.md 中的提示也从 .sh 改为: > python scripts/recreate_index.py [--batch-size 500] --- api/routes/indexer.py | 6 +++--- config/config.yaml | 4 ++-- docs/搜索API对接指南.md | 20 ++++---------------- scripts/recreate_index.py | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 21 deletions(-) create mode 100644 scripts/recreate_index.py diff --git a/api/routes/indexer.py b/api/routes/indexer.py index 1cd17a6..a739a9a 100644 --- a/api/routes/indexer.py +++ b/api/routes/indexer.py @@ -21,7 +21,6 @@ router = APIRouter(prefix="/indexer", tags=["indexer"]) class ReindexRequest(BaseModel): """全量重建索引请求""" tenant_id: str - recreate_index: bool = False batch_size: int = 500 @@ -43,7 +42,8 @@ async def reindex_all(request: ReindexRequest): """ 全量重建索引接口 - 将指定租户的所有SPU数据重新索引到ES。支持删除旧索引并重建。 + 将指定租户的所有SPU数据重新索引到ES。 + 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引(删除后重建),请在服务器上执行 scripts/recreate_index.py 脚本。 """ try: service = get_bulk_indexing_service() @@ -51,7 +51,7 @@ async def reindex_all(request: ReindexRequest): raise HTTPException(status_code=503, detail="Bulk indexing service is not initialized") return service.bulk_index( tenant_id=request.tenant_id, - recreate_index=request.recreate_index, + recreate_index=False, batch_size=request.batch_size ) except HTTPException: diff --git a/config/config.yaml b/config/config.yaml index faaa95f..a907a17 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -169,8 +169,8 @@ tenant_config: translate_to_zh: false "162": primary_language: "zh" - translate_to_en: false - translate_to_zh: false + translate_to_en: true + translate_to_zh: true "170": primary_language: "en" translate_to_en: true diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index e1c662b..c2be7c0 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -795,17 +795,16 @@ curl "http://localhost:6002/search/12345" ## 索引接口 -### 5.1 全量重建索引接口 +### 5.1 全量索引接口 - **端点**: `POST /indexer/reindex` -- **描述**: 全量重建索引,将指定租户的所有SPU数据导入到ES索引 +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引) #### 请求参数 ```json { "tenant_id": "162", - "recreate_index": false, "batch_size": 500 } ``` @@ -813,7 +812,6 @@ curl "http://localhost:6002/search/12345" | 参数 | 类型 | 必填 | 默认值 | 说明 | |------|------|------|--------|------| | `tenant_id` | string | Y | - | 租户ID | -| `recreate_index` | boolean | N | false | 是否重建索引(删除旧索引后创建新索引) | | `batch_size` | integer | N | 500 | 批量导入大小 | #### 响应格式 @@ -837,13 +835,12 @@ curl "http://localhost:6002/search/12345" #### 请求示例 -**首次索引(重建索引)**: +**全量索引(不会删除现有索引)**: ```bash curl -X POST "http://localhost:6004/indexer/reindex" \ -H "Content-Type: application/json" \ -d '{ "tenant_id": "162", - "recreate_index": true, "batch_size": 500 }' ``` @@ -857,16 +854,7 @@ tail -f logs/api.log tail -f logs/*.log ``` -**增量更新(不重建索引)**: -```bash -curl -X POST "http://localhost:6004/indexer/reindex" \ - -H "Content-Type: application/json" \ - -d '{ - "tenant_id": "162", - "recreate_index": false, - "batch_size": 500 - }' -``` +> 如需 **重建索引(会删除并重建整份 ES 索引结构)**,在服务器上执行内部脚本:`python scripts/recreate_index.py`。重建后需要按租户调用 `/indexer/reindex` 重新导入各租户数据。 **查看索引日志**: diff --git a/scripts/recreate_index.py b/scripts/recreate_index.py new file mode 100644 index 0000000..eae4bf2 --- /dev/null +++ b/scripts/recreate_index.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""重建 ES 索引(仅索引结构,不导入数据)。 + +- 删除并重建索引(基于 mappings/search_products.json) +- 依赖环境变量中的 ES 配置: + - ES_HOST(默认: http://localhost:9200) + - ES_USERNAME(可选) + - ES_PASSWORD(可选) + +用法: + python scripts/recreate_index.py +""" + +import os +import sys +from pathlib import Path + +# 将项目根目录加入 sys.path +PROJECT_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from utils.es_client import ESClient # type: ignore +from indexer.mapping_generator import ( # type: ignore + load_mapping, + delete_index_if_exists, + DEFAULT_INDEX_NAME, +) + + +def main() -> int: + print("=" * 60) + print("Recreate Elasticsearch index (structure only, no data import)") + print("=" * 60) + + # 1. 连接 Elasticsearch + es_host = os.environ.get("ES_HOST", "http://localhost:9200") + es_username = os.environ.get("ES_USERNAME") + es_password = os.environ.get("ES_PASSWORD") + + print(f"ES host: {es_host}") + if es_username: + print(f"ES username: {es_username}") + + try: + if es_username and es_password: + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) + else: + es_client = ESClient(hosts=[es_host]) + + if not es_client.ping(): + print(f"[ERROR] Cannot connect to Elasticsearch at {es_host}") + return 1 + except Exception as e: + print(f"[ERROR] Failed to connect to Elasticsearch: {e}") + return 1 + + index_name = DEFAULT_INDEX_NAME + print(f"Index name: {index_name}") + + # 2. 加载 mapping + try: + mapping = load_mapping() + print("Loaded mapping configuration.") + except Exception as e: + print(f"[ERROR] Failed to load mapping: {e}") + return 1 + + # 3. 删除旧索引(如果存在) + print(f"Deleting existing index if exists: {index_name} ...") + try: + if es_client.index_exists(index_name): + if delete_index_if_exists(es_client, index_name): + print(f"✓ Deleted index: {index_name}") + else: + print(f"[ERROR] Failed to delete index: {index_name}") + return 1 + else: + print(f"Index does not exist, skip delete: {index_name}") + except Exception as e: + print(f"[ERROR] Error while deleting index: {e}") + return 1 + + # 4. 创建新索引 + print(f"Creating index: {index_name} ...") + try: + if es_client.create_index(index_name, mapping): + print(f"✓ Created index: {index_name}") + else: + print(f"[ERROR] Failed to create index: {index_name}") + return 1 + except Exception as e: + print(f"[ERROR] Error while creating index: {e}") + return 1 + + print("=" * 60) + print("Index recreation completed. Please trigger /indexer/reindex per tenant to re-import data.") + print("=" * 60) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) -- libgit2 0.21.2