diff --git a/api/routes/indexer.py b/api/routes/indexer.py index 1cd17a6..a739a9a 100644 --- a/api/routes/indexer.py +++ b/api/routes/indexer.py @@ -21,7 +21,6 @@ router = APIRouter(prefix="/indexer", tags=["indexer"]) class ReindexRequest(BaseModel): """全量重建索引请求""" tenant_id: str - recreate_index: bool = False batch_size: int = 500 @@ -43,7 +42,8 @@ async def reindex_all(request: ReindexRequest): """ 全量重建索引接口 - 将指定租户的所有SPU数据重新索引到ES。支持删除旧索引并重建。 + 将指定租户的所有SPU数据重新索引到ES。 + 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引(删除后重建),请在服务器上执行 scripts/recreate_index.py 脚本。 """ try: service = get_bulk_indexing_service() @@ -51,7 +51,7 @@ async def reindex_all(request: ReindexRequest): raise HTTPException(status_code=503, detail="Bulk indexing service is not initialized") return service.bulk_index( tenant_id=request.tenant_id, - recreate_index=request.recreate_index, + recreate_index=False, batch_size=request.batch_size ) except HTTPException: diff --git a/config/config.yaml b/config/config.yaml index faaa95f..a907a17 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -169,8 +169,8 @@ tenant_config: translate_to_zh: false "162": primary_language: "zh" - translate_to_en: false - translate_to_zh: false + translate_to_en: true + translate_to_zh: true "170": primary_language: "en" translate_to_en: true diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index e1c662b..c2be7c0 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -795,17 +795,16 @@ curl "http://localhost:6002/search/12345" ## 索引接口 -### 5.1 全量重建索引接口 +### 5.1 全量索引接口 - **端点**: `POST /indexer/reindex` -- **描述**: 全量重建索引,将指定租户的所有SPU数据导入到ES索引 +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引) #### 请求参数 ```json { "tenant_id": "162", - "recreate_index": false, "batch_size": 500 } ``` @@ -813,7 +812,6 @@ curl "http://localhost:6002/search/12345" | 参数 | 类型 | 必填 | 默认值 | 说明 | |------|------|------|--------|------| | `tenant_id` | string | Y | - | 租户ID | -| `recreate_index` | boolean | N | false | 是否重建索引(删除旧索引后创建新索引) | | `batch_size` | integer | N | 500 | 批量导入大小 | #### 响应格式 @@ -837,13 +835,12 @@ curl "http://localhost:6002/search/12345" #### 请求示例 -**首次索引(重建索引)**: +**全量索引(不会删除现有索引)**: ```bash curl -X POST "http://localhost:6004/indexer/reindex" \ -H "Content-Type: application/json" \ -d '{ "tenant_id": "162", - "recreate_index": true, "batch_size": 500 }' ``` @@ -857,16 +854,7 @@ tail -f logs/api.log tail -f logs/*.log ``` -**增量更新(不重建索引)**: -```bash -curl -X POST "http://localhost:6004/indexer/reindex" \ - -H "Content-Type: application/json" \ - -d '{ - "tenant_id": "162", - "recreate_index": false, - "batch_size": 500 - }' -``` +> 如需 **重建索引(会删除并重建整份 ES 索引结构)**,在服务器上执行内部脚本:`python scripts/recreate_index.py`。重建后需要按租户调用 `/indexer/reindex` 重新导入各租户数据。 **查看索引日志**: diff --git a/scripts/recreate_index.py b/scripts/recreate_index.py new file mode 100644 index 0000000..eae4bf2 --- /dev/null +++ b/scripts/recreate_index.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""重建 ES 索引(仅索引结构,不导入数据)。 + +- 删除并重建索引(基于 mappings/search_products.json) +- 依赖环境变量中的 ES 配置: + - ES_HOST(默认: http://localhost:9200) + - ES_USERNAME(可选) + - ES_PASSWORD(可选) + +用法: + python scripts/recreate_index.py +""" + +import os +import sys +from pathlib import Path + +# 将项目根目录加入 sys.path +PROJECT_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from utils.es_client import ESClient # type: ignore +from indexer.mapping_generator import ( # type: ignore + load_mapping, + delete_index_if_exists, + DEFAULT_INDEX_NAME, +) + + +def main() -> int: + print("=" * 60) + print("Recreate Elasticsearch index (structure only, no data import)") + print("=" * 60) + + # 1. 连接 Elasticsearch + es_host = os.environ.get("ES_HOST", "http://localhost:9200") + es_username = os.environ.get("ES_USERNAME") + es_password = os.environ.get("ES_PASSWORD") + + print(f"ES host: {es_host}") + if es_username: + print(f"ES username: {es_username}") + + try: + if es_username and es_password: + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) + else: + es_client = ESClient(hosts=[es_host]) + + if not es_client.ping(): + print(f"[ERROR] Cannot connect to Elasticsearch at {es_host}") + return 1 + except Exception as e: + print(f"[ERROR] Failed to connect to Elasticsearch: {e}") + return 1 + + index_name = DEFAULT_INDEX_NAME + print(f"Index name: {index_name}") + + # 2. 加载 mapping + try: + mapping = load_mapping() + print("Loaded mapping configuration.") + except Exception as e: + print(f"[ERROR] Failed to load mapping: {e}") + return 1 + + # 3. 删除旧索引(如果存在) + print(f"Deleting existing index if exists: {index_name} ...") + try: + if es_client.index_exists(index_name): + if delete_index_if_exists(es_client, index_name): + print(f"✓ Deleted index: {index_name}") + else: + print(f"[ERROR] Failed to delete index: {index_name}") + return 1 + else: + print(f"Index does not exist, skip delete: {index_name}") + except Exception as e: + print(f"[ERROR] Error while deleting index: {e}") + return 1 + + # 4. 创建新索引 + print(f"Creating index: {index_name} ...") + try: + if es_client.create_index(index_name, mapping): + print(f"✓ Created index: {index_name}") + else: + print(f"[ERROR] Failed to create index: {index_name}") + return 1 + except Exception as e: + print(f"[ERROR] Error while creating index: {e}") + return 1 + + print("=" * 60) + print("Index recreation completed. Please trigger /indexer/reindex per tenant to re-import data.") + print("=" * 60) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) -- libgit2 0.21.2