Commit 351a7eb583e617e62f26ed8e4b309b6073f19b61
1 parent
345d960b
1. 新的重建索引脚本
新增:scripts/recreate_index.py 功能:初始化 indexer 的 ES/DB 服务,然后调用 BulkIndexingService.bulk_index(…, recreate_index=True) 为指定 tenant_id 做「删除并重建索引 + 全量导入」。 用法示例: cd /home/tw/SearchEngine# 使用默认 batch_size=500python scripts/recreate_index.py 162# 指定 batch_sizepython scripts/recreate_index.py 162 --batch-size 1000 脚本依赖和 Indexer API 一样的环境变量:DB_HOST/DB_PORT/DB_DATABASE/DB_USERNAME/DB_PASSWORD、ES_HOST/ES_USERNAME/ES_PASSWORD。 2. 清理与引用更新 原来的 scripts/recreate_index.sh 已经删除。 api/routes/indexer.py 里的说明改成引用 scripts/recreate_index.py。 docs/搜索API对接指南.md 中的提示也从 .sh 改为: > python scripts/recreate_index.py <tenant_id> [--batch-size 500]
Showing
4 changed files
with
111 additions
and
21 deletions
Show diff stats
api/routes/indexer.py
| ... | ... | @@ -21,7 +21,6 @@ router = APIRouter(prefix="/indexer", tags=["indexer"]) |
| 21 | 21 | class ReindexRequest(BaseModel): |
| 22 | 22 | """全量重建索引请求""" |
| 23 | 23 | tenant_id: str |
| 24 | - recreate_index: bool = False | |
| 25 | 24 | batch_size: int = 500 |
| 26 | 25 | |
| 27 | 26 | |
| ... | ... | @@ -43,7 +42,8 @@ async def reindex_all(request: ReindexRequest): |
| 43 | 42 | """ |
| 44 | 43 | 全量重建索引接口 |
| 45 | 44 | |
| 46 | - 将指定租户的所有SPU数据重新索引到ES。支持删除旧索引并重建。 | |
| 45 | + 将指定租户的所有SPU数据重新索引到ES。 | |
| 46 | + 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引(删除后重建),请在服务器上执行 scripts/recreate_index.py 脚本。 | |
| 47 | 47 | """ |
| 48 | 48 | try: |
| 49 | 49 | service = get_bulk_indexing_service() |
| ... | ... | @@ -51,7 +51,7 @@ async def reindex_all(request: ReindexRequest): |
| 51 | 51 | raise HTTPException(status_code=503, detail="Bulk indexing service is not initialized") |
| 52 | 52 | return service.bulk_index( |
| 53 | 53 | tenant_id=request.tenant_id, |
| 54 | - recreate_index=request.recreate_index, | |
| 54 | + recreate_index=False, | |
| 55 | 55 | batch_size=request.batch_size |
| 56 | 56 | ) |
| 57 | 57 | except HTTPException: | ... | ... |
config/config.yaml
| ... | ... | @@ -169,8 +169,8 @@ tenant_config: |
| 169 | 169 | translate_to_zh: false |
| 170 | 170 | "162": |
| 171 | 171 | primary_language: "zh" |
| 172 | - translate_to_en: false | |
| 173 | - translate_to_zh: false | |
| 172 | + translate_to_en: true | |
| 173 | + translate_to_zh: true | |
| 174 | 174 | "170": |
| 175 | 175 | primary_language: "en" |
| 176 | 176 | translate_to_en: true | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -795,17 +795,16 @@ curl "http://localhost:6002/search/12345" |
| 795 | 795 | |
| 796 | 796 | ## 索引接口 |
| 797 | 797 | |
| 798 | -### 5.1 全量重建索引接口 | |
| 798 | +### 5.1 全量索引接口 | |
| 799 | 799 | |
| 800 | 800 | - **端点**: `POST /indexer/reindex` |
| 801 | -- **描述**: 全量重建索引,将指定租户的所有SPU数据导入到ES索引 | |
| 801 | +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引) | |
| 802 | 802 | |
| 803 | 803 | #### 请求参数 |
| 804 | 804 | |
| 805 | 805 | ```json |
| 806 | 806 | { |
| 807 | 807 | "tenant_id": "162", |
| 808 | - "recreate_index": false, | |
| 809 | 808 | "batch_size": 500 |
| 810 | 809 | } |
| 811 | 810 | ``` |
| ... | ... | @@ -813,7 +812,6 @@ curl "http://localhost:6002/search/12345" |
| 813 | 812 | | 参数 | 类型 | 必填 | 默认值 | 说明 | |
| 814 | 813 | |------|------|------|--------|------| |
| 815 | 814 | | `tenant_id` | string | Y | - | 租户ID | |
| 816 | -| `recreate_index` | boolean | N | false | 是否重建索引(删除旧索引后创建新索引) | | |
| 817 | 815 | | `batch_size` | integer | N | 500 | 批量导入大小 | |
| 818 | 816 | |
| 819 | 817 | #### 响应格式 |
| ... | ... | @@ -837,13 +835,12 @@ curl "http://localhost:6002/search/12345" |
| 837 | 835 | |
| 838 | 836 | #### 请求示例 |
| 839 | 837 | |
| 840 | -**首次索引(重建索引)**: | |
| 838 | +**全量索引(不会删除现有索引)**: | |
| 841 | 839 | ```bash |
| 842 | 840 | curl -X POST "http://localhost:6004/indexer/reindex" \ |
| 843 | 841 | -H "Content-Type: application/json" \ |
| 844 | 842 | -d '{ |
| 845 | 843 | "tenant_id": "162", |
| 846 | - "recreate_index": true, | |
| 847 | 844 | "batch_size": 500 |
| 848 | 845 | }' |
| 849 | 846 | ``` |
| ... | ... | @@ -857,16 +854,7 @@ tail -f logs/api.log |
| 857 | 854 | tail -f logs/*.log |
| 858 | 855 | ``` |
| 859 | 856 | |
| 860 | -**增量更新(不重建索引)**: | |
| 861 | -```bash | |
| 862 | -curl -X POST "http://localhost:6004/indexer/reindex" \ | |
| 863 | - -H "Content-Type: application/json" \ | |
| 864 | - -d '{ | |
| 865 | - "tenant_id": "162", | |
| 866 | - "recreate_index": false, | |
| 867 | - "batch_size": 500 | |
| 868 | - }' | |
| 869 | -``` | |
| 857 | +> 如需 **重建索引(会删除并重建整份 ES 索引结构)**,在服务器上执行内部脚本:`python scripts/recreate_index.py`。重建后需要按租户调用 `/indexer/reindex` 重新导入各租户数据。 | |
| 870 | 858 | |
| 871 | 859 | **查看索引日志**: |
| 872 | 860 | ... | ... |
| ... | ... | @@ -0,0 +1,102 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +"""重建 ES 索引(仅索引结构,不导入数据)。 | |
| 3 | + | |
| 4 | +- 删除并重建索引(基于 mappings/search_products.json) | |
| 5 | +- 依赖环境变量中的 ES 配置: | |
| 6 | + - ES_HOST(默认: http://localhost:9200) | |
| 7 | + - ES_USERNAME(可选) | |
| 8 | + - ES_PASSWORD(可选) | |
| 9 | + | |
| 10 | +用法: | |
| 11 | + python scripts/recreate_index.py | |
| 12 | +""" | |
| 13 | + | |
| 14 | +import os | |
| 15 | +import sys | |
| 16 | +from pathlib import Path | |
| 17 | + | |
| 18 | +# 将项目根目录加入 sys.path | |
| 19 | +PROJECT_ROOT = Path(__file__).resolve().parent.parent | |
| 20 | +sys.path.insert(0, str(PROJECT_ROOT)) | |
| 21 | + | |
| 22 | +from utils.es_client import ESClient # type: ignore | |
| 23 | +from indexer.mapping_generator import ( # type: ignore | |
| 24 | + load_mapping, | |
| 25 | + delete_index_if_exists, | |
| 26 | + DEFAULT_INDEX_NAME, | |
| 27 | +) | |
| 28 | + | |
| 29 | + | |
| 30 | +def main() -> int: | |
| 31 | + print("=" * 60) | |
| 32 | + print("Recreate Elasticsearch index (structure only, no data import)") | |
| 33 | + print("=" * 60) | |
| 34 | + | |
| 35 | + # 1. 连接 Elasticsearch | |
| 36 | + es_host = os.environ.get("ES_HOST", "http://localhost:9200") | |
| 37 | + es_username = os.environ.get("ES_USERNAME") | |
| 38 | + es_password = os.environ.get("ES_PASSWORD") | |
| 39 | + | |
| 40 | + print(f"ES host: {es_host}") | |
| 41 | + if es_username: | |
| 42 | + print(f"ES username: {es_username}") | |
| 43 | + | |
| 44 | + try: | |
| 45 | + if es_username and es_password: | |
| 46 | + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) | |
| 47 | + else: | |
| 48 | + es_client = ESClient(hosts=[es_host]) | |
| 49 | + | |
| 50 | + if not es_client.ping(): | |
| 51 | + print(f"[ERROR] Cannot connect to Elasticsearch at {es_host}") | |
| 52 | + return 1 | |
| 53 | + except Exception as e: | |
| 54 | + print(f"[ERROR] Failed to connect to Elasticsearch: {e}") | |
| 55 | + return 1 | |
| 56 | + | |
| 57 | + index_name = DEFAULT_INDEX_NAME | |
| 58 | + print(f"Index name: {index_name}") | |
| 59 | + | |
| 60 | + # 2. 加载 mapping | |
| 61 | + try: | |
| 62 | + mapping = load_mapping() | |
| 63 | + print("Loaded mapping configuration.") | |
| 64 | + except Exception as e: | |
| 65 | + print(f"[ERROR] Failed to load mapping: {e}") | |
| 66 | + return 1 | |
| 67 | + | |
| 68 | + # 3. 删除旧索引(如果存在) | |
| 69 | + print(f"Deleting existing index if exists: {index_name} ...") | |
| 70 | + try: | |
| 71 | + if es_client.index_exists(index_name): | |
| 72 | + if delete_index_if_exists(es_client, index_name): | |
| 73 | + print(f"✓ Deleted index: {index_name}") | |
| 74 | + else: | |
| 75 | + print(f"[ERROR] Failed to delete index: {index_name}") | |
| 76 | + return 1 | |
| 77 | + else: | |
| 78 | + print(f"Index does not exist, skip delete: {index_name}") | |
| 79 | + except Exception as e: | |
| 80 | + print(f"[ERROR] Error while deleting index: {e}") | |
| 81 | + return 1 | |
| 82 | + | |
| 83 | + # 4. 创建新索引 | |
| 84 | + print(f"Creating index: {index_name} ...") | |
| 85 | + try: | |
| 86 | + if es_client.create_index(index_name, mapping): | |
| 87 | + print(f"✓ Created index: {index_name}") | |
| 88 | + else: | |
| 89 | + print(f"[ERROR] Failed to create index: {index_name}") | |
| 90 | + return 1 | |
| 91 | + except Exception as e: | |
| 92 | + print(f"[ERROR] Error while creating index: {e}") | |
| 93 | + return 1 | |
| 94 | + | |
| 95 | + print("=" * 60) | |
| 96 | + print("Index recreation completed. Please trigger /indexer/reindex per tenant to re-import data.") | |
| 97 | + print("=" * 60) | |
| 98 | + return 0 | |
| 99 | + | |
| 100 | + | |
| 101 | +if __name__ == "__main__": | |
| 102 | + raise SystemExit(main()) | ... | ... |