Commit 80f87e57cd0557d592946757d2618800f3ffeaee
1 parent
430ffe48
多语言索引修改 对应的 索引创建、数据灌入脚本、文档 同步修改
Showing
8 changed files
with
118 additions
and
113 deletions
Show diff stats
api/routes/indexer.py
| @@ -44,7 +44,7 @@ async def reindex_all(request: ReindexRequest): | @@ -44,7 +44,7 @@ async def reindex_all(request: ReindexRequest): | ||
| 44 | 全量重建索引接口 | 44 | 全量重建索引接口 |
| 45 | 45 | ||
| 46 | 将指定租户的所有SPU数据重新索引到ES。 | 46 | 将指定租户的所有SPU数据重新索引到ES。 |
| 47 | - 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引(删除后重建),请在服务器上执行 scripts/recreate_index.py 脚本。 | 47 | + 注意:此接口不会删除旧索引,只会更新或创建索引。如需重建索引结构(删除后重建),请使用 `scripts/create_tenant_index.sh` 脚本。 |
| 48 | 48 | ||
| 49 | 注意:全量索引是长时间运行的操作,会在线程池中执行,不会阻塞其他请求。 | 49 | 注意:全量索引是长时间运行的操作,会在线程池中执行,不会阻塞其他请求。 |
| 50 | 全量索引和增量索引可以并行执行。 | 50 | 全量索引和增量索引可以并行执行。 |
docs/常用查询 - ES.md
| @@ -6,9 +6,8 @@ | @@ -6,9 +6,8 @@ | ||
| 6 | 6 | ||
| 7 | ### 1. 根据 tenant_id / spu_id 查询 | 7 | ### 1. 根据 tenant_id / spu_id 查询 |
| 8 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 8 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 9 | -{ | ||
| 10 | "size": 11, | 9 | "size": 11, |
| 11 | - "_source": "*", | 10 | + "_source": ["title"], |
| 12 | "query": { | 11 | "query": { |
| 13 | "bool": { | 12 | "bool": { |
| 14 | "filter": [ | 13 | "filter": [ |
docs/搜索API对接指南.md
| @@ -31,7 +31,8 @@ | @@ -31,7 +31,8 @@ | ||
| 31 | - 4.5 [多语言字段说明](#45-多语言字段说明) | 31 | - 4.5 [多语言字段说明](#45-多语言字段说明) |
| 32 | 32 | ||
| 33 | 5. [索引接口](#索引接口) | 33 | 5. [索引接口](#索引接口) |
| 34 | - - 5.1 [全量重建索引接口](#51-全量重建索引接口) | 34 | + - 5.0 [为租户创建索引](#50-为租户创建索引) |
| 35 | + - 5.1 [全量索引接口](#51-全量索引接口) | ||
| 35 | - 5.2 [增量索引接口](#52-增量索引接口) | 36 | - 5.2 [增量索引接口](#52-增量索引接口) |
| 36 | - 5.3 [查询文档接口](#53-查询文档接口) | 37 | - 5.3 [查询文档接口](#53-查询文档接口) |
| 37 | - 5.4 [索引健康检查接口](#54-索引健康检查接口) | 38 | - 5.4 [索引健康检查接口](#54-索引健康检查接口) |
| @@ -130,7 +131,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -130,7 +131,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 130 | | 搜索建议 | GET | `/search/suggestions` | 搜索建议(框架,暂未实现) ⚠️ TODO | | 131 | | 搜索建议 | GET | `/search/suggestions` | 搜索建议(框架,暂未实现) ⚠️ TODO | |
| 131 | | 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | | 132 | | 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | |
| 132 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | | 133 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | |
| 133 | -| 全量重建索引 | POST | `/indexer/reindex` | 全量重建索引接口 | | 134 | +| 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引) | |
| 134 | | 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除) | | 135 | | 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除) | |
| 135 | | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据(不写入ES) | | 136 | | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据(不写入ES) | |
| 136 | | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | | 137 | | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | |
| @@ -797,6 +798,32 @@ curl "http://localhost:6002/search/12345" | @@ -797,6 +798,32 @@ curl "http://localhost:6002/search/12345" | ||
| 797 | 798 | ||
| 798 | ## 索引接口 | 799 | ## 索引接口 |
| 799 | 800 | ||
| 801 | +### 5.0 为租户创建索引 | ||
| 802 | + | ||
| 803 | +为租户创建索引需要两个步骤: | ||
| 804 | + | ||
| 805 | +1. **创建索引结构**(可选,仅在需要更新 mapping 时执行) | ||
| 806 | + - 使用脚本创建 ES 索引结构(基于 `mappings/search_products.json`) | ||
| 807 | + - 如果索引已存在,会提示用户确认(会删除现有数据) | ||
| 808 | + | ||
| 809 | +2. **导入数据**(必需) | ||
| 810 | + - 使用全量索引接口 `/indexer/reindex` 导入数据 | ||
| 811 | + | ||
| 812 | +**创建索引结构**: | ||
| 813 | + | ||
| 814 | +```bash | ||
| 815 | +./scripts/create_tenant_index.sh 170 | ||
| 816 | +``` | ||
| 817 | + | ||
| 818 | +脚本会自动从项目根目录的 `.env` 文件加载 ES 配置。 | ||
| 819 | + | ||
| 820 | +**注意事项**: | ||
| 821 | +- ⚠️ 如果索引已存在,脚本会提示确认,确认后会删除现有数据 | ||
| 822 | +- 创建索引后,**必须**调用 `/indexer/reindex` 导入数据 | ||
| 823 | +- 如果只是更新数据而不需要修改索引结构,直接使用 `/indexer/reindex` 即可 | ||
| 824 | + | ||
| 825 | +--- | ||
| 826 | + | ||
| 800 | ### 5.1 全量索引接口 | 827 | ### 5.1 全量索引接口 |
| 801 | 828 | ||
| 802 | - **端点**: `POST /indexer/reindex` | 829 | - **端点**: `POST /indexer/reindex` |
| @@ -856,7 +883,7 @@ tail -f logs/api.log | @@ -856,7 +883,7 @@ tail -f logs/api.log | ||
| 856 | tail -f logs/*.log | 883 | tail -f logs/*.log |
| 857 | ``` | 884 | ``` |
| 858 | 885 | ||
| 859 | -> 如需 **重建索引(会删除并重建整份 ES 索引结构)**,在服务器上执行内部脚本:`python scripts/recreate_index.py`。重建后需要按租户调用 `/indexer/reindex` 重新导入各租户数据。 | 886 | +> ⚠️ **重要提示**:如需 **创建索引结构**,请参考 [5.0 为租户创建索引](#50-为租户创建索引) 章节,使用 `scripts/recreate_all_tenant_indices.py` 脚本。创建后需要调用 `/indexer/reindex` 导入数据。 |
| 860 | 887 | ||
| 861 | **查看索引日志**: | 888 | **查看索引日志**: |
| 862 | 889 |
indexer/__init__.py
| 1 | """Indexer package initialization.""" | 1 | """Indexer package initialization.""" |
| 2 | 2 | ||
| 3 | -from .mapping_generator import load_mapping, create_index_if_not_exists, delete_index_if_exists, DEFAULT_INDEX_NAME | 3 | +from .mapping_generator import load_mapping, create_index_if_not_exists, delete_index_if_exists, get_tenant_index_name |
| 4 | from .spu_transformer import SPUTransformer | 4 | from .spu_transformer import SPUTransformer |
| 5 | from .bulk_indexer import BulkIndexer | 5 | from .bulk_indexer import BulkIndexer |
| 6 | 6 | ||
| @@ -8,7 +8,7 @@ __all__ = [ | @@ -8,7 +8,7 @@ __all__ = [ | ||
| 8 | 'load_mapping', | 8 | 'load_mapping', |
| 9 | 'create_index_if_not_exists', | 9 | 'create_index_if_not_exists', |
| 10 | 'delete_index_if_exists', | 10 | 'delete_index_if_exists', |
| 11 | - 'DEFAULT_INDEX_NAME', | 11 | + 'get_tenant_index_name', |
| 12 | 'SPUTransformer', | 12 | 'SPUTransformer', |
| 13 | 'BulkIndexer', | 13 | 'BulkIndexer', |
| 14 | ] | 14 | ] |
indexer/bulk_indexer.py
| @@ -7,7 +7,6 @@ Handles batch indexing of documents with progress tracking and error handling. | @@ -7,7 +7,6 @@ Handles batch indexing of documents with progress tracking and error handling. | ||
| 7 | from typing import List, Dict, Any, Optional | 7 | from typing import List, Dict, Any, Optional |
| 8 | from elasticsearch.helpers import bulk, BulkIndexError | 8 | from elasticsearch.helpers import bulk, BulkIndexError |
| 9 | from utils.es_client import ESClient | 9 | from utils.es_client import ESClient |
| 10 | -from indexer.mapping_generator import DEFAULT_INDEX_NAME | ||
| 11 | import time | 10 | import time |
| 12 | 11 | ||
| 13 | 12 |
scripts/check_index_mapping.py
| @@ -11,7 +11,7 @@ from pathlib import Path | @@ -11,7 +11,7 @@ from pathlib import Path | ||
| 11 | sys.path.insert(0, str(Path(__file__).parent.parent)) | 11 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 12 | 12 | ||
| 13 | from utils.es_client import get_es_client_from_env | 13 | from utils.es_client import get_es_client_from_env |
| 14 | -from indexer.mapping_generator import DEFAULT_INDEX_NAME | 14 | +from indexer.mapping_generator import get_tenant_index_name |
| 15 | 15 | ||
| 16 | 16 | ||
| 17 | def check_field_mapping(mapping_dict, field_path): | 17 | def check_field_mapping(mapping_dict, field_path): |
| @@ -38,6 +38,12 @@ def check_field_mapping(mapping_dict, field_path): | @@ -38,6 +38,12 @@ def check_field_mapping(mapping_dict, field_path): | ||
| 38 | 38 | ||
| 39 | 39 | ||
| 40 | def main(): | 40 | def main(): |
| 41 | + import argparse | ||
| 42 | + | ||
| 43 | + parser = argparse.ArgumentParser(description="检查 Elasticsearch 索引实际映射配置") | ||
| 44 | + parser.add_argument("--tenant-id", type=str, required=True, help="租户ID") | ||
| 45 | + args = parser.parse_args() | ||
| 46 | + | ||
| 41 | print("=" * 80) | 47 | print("=" * 80) |
| 42 | print("检查 Elasticsearch 索引实际映射配置") | 48 | print("检查 Elasticsearch 索引实际映射配置") |
| 43 | print("=" * 80) | 49 | print("=" * 80) |
| @@ -53,7 +59,7 @@ def main(): | @@ -53,7 +59,7 @@ def main(): | ||
| 53 | print(f"✗ 连接 Elasticsearch 失败: {e}") | 59 | print(f"✗ 连接 Elasticsearch 失败: {e}") |
| 54 | return 1 | 60 | return 1 |
| 55 | 61 | ||
| 56 | - index_name = DEFAULT_INDEX_NAME | 62 | + index_name = get_tenant_index_name(args.tenant_id) |
| 57 | 63 | ||
| 58 | # 检查索引是否存在 | 64 | # 检查索引是否存在 |
| 59 | if not es_client.index_exists(index_name): | 65 | if not es_client.index_exists(index_name): |
| @@ -0,0 +1,76 @@ | @@ -0,0 +1,76 @@ | ||
| 1 | +#!/bin/bash | ||
| 2 | + | ||
| 3 | +# 为租户创建 ES 索引 | ||
| 4 | +# 用法: ./scripts/create_tenant_index.sh <tenant_id> | ||
| 5 | + | ||
| 6 | +# 切换到项目根目录 | ||
| 7 | +cd "$(dirname "$0")/.." | ||
| 8 | + | ||
| 9 | +# 加载 .env 文件(如果存在) | ||
| 10 | +if [ -f .env ]; then | ||
| 11 | + set -a | ||
| 12 | + source .env | ||
| 13 | + set +a | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +ES_HOST="${ES_HOST:-http://localhost:9200}" | ||
| 17 | +ES_USERNAME="${ES_USERNAME:-}" | ||
| 18 | +ES_PASSWORD="${ES_PASSWORD:-}" | ||
| 19 | + | ||
| 20 | +# 检查命令行参数 | ||
| 21 | +if [ $# -eq 0 ]; then | ||
| 22 | + echo "用法: $0 <tenant_id>" | ||
| 23 | + echo "示例: $0 170" | ||
| 24 | + exit 1 | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +TENANT_ID="$1" | ||
| 28 | +ES_INDEX="search_products_tenant_${TENANT_ID}" | ||
| 29 | +MAPPING_FILE="mappings/search_products.json" | ||
| 30 | + | ||
| 31 | +# 检查 mapping 文件是否存在 | ||
| 32 | +if [ ! -f "$MAPPING_FILE" ]; then | ||
| 33 | + echo "错误: mapping 文件不存在: $MAPPING_FILE" | ||
| 34 | + exit 1 | ||
| 35 | +fi | ||
| 36 | + | ||
| 37 | +# 手动确认 | ||
| 38 | +echo "创建索引前,将删除已有的同名索引。" | ||
| 39 | +echo "索引名称: $ES_INDEX" | ||
| 40 | +echo "请输入索引名称 '$ES_INDEX' 来确认:" | ||
| 41 | +read -r user_input | ||
| 42 | + | ||
| 43 | +if [ "$user_input" != "$ES_INDEX" ]; then | ||
| 44 | + echo "确认失败,索引名称不匹配。退出操作。" | ||
| 45 | + exit 1 | ||
| 46 | +fi | ||
| 47 | + | ||
| 48 | +echo "确认成功,继续创建索引..." | ||
| 49 | + | ||
| 50 | +# 构建 curl 认证参数 | ||
| 51 | +AUTH_PARAM="" | ||
| 52 | +if [ -n "$ES_USERNAME" ] && [ -n "$ES_PASSWORD" ]; then | ||
| 53 | + AUTH_PARAM="-u ${ES_USERNAME}:${ES_PASSWORD}" | ||
| 54 | +fi | ||
| 55 | + | ||
| 56 | +# 删除已存在的索引(如果存在) | ||
| 57 | +echo | ||
| 58 | +echo "删除索引: $ES_INDEX" | ||
| 59 | +echo | ||
| 60 | +curl -X DELETE "${ES_HOST}/${ES_INDEX}" $AUTH_PARAM -s -o /dev/null -w "HTTP状态码: %{http_code}\n" | ||
| 61 | + | ||
| 62 | +echo | ||
| 63 | +echo "创建索引: $ES_INDEX" | ||
| 64 | +echo | ||
| 65 | + | ||
| 66 | +# 创建索引(使用 mapping 文件) | ||
| 67 | +curl -X PUT "${ES_HOST}/${ES_INDEX}" \ | ||
| 68 | + -H "Content-Type: application/json" \ | ||
| 69 | + $AUTH_PARAM \ | ||
| 70 | + -d @"${MAPPING_FILE}" \ | ||
| 71 | + -w "\nHTTP状态码: %{http_code}\n" | ||
| 72 | + | ||
| 73 | +echo | ||
| 74 | +echo "完成!" | ||
| 75 | +echo "提示: 请调用 /indexer/reindex 接口导入数据" | ||
| 76 | + |
scripts/recreate_index.py deleted
| @@ -1,102 +0,0 @@ | @@ -1,102 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -"""重建 ES 索引(仅索引结构,不导入数据)。 | ||
| 3 | - | ||
| 4 | -- 删除并重建索引(基于 mappings/search_products.json) | ||
| 5 | -- 依赖环境变量中的 ES 配置: | ||
| 6 | - - ES_HOST(默认: http://localhost:9200) | ||
| 7 | - - ES_USERNAME(可选) | ||
| 8 | - - ES_PASSWORD(可选) | ||
| 9 | - | ||
| 10 | -用法: | ||
| 11 | - python scripts/recreate_index.py | ||
| 12 | -""" | ||
| 13 | - | ||
| 14 | -import os | ||
| 15 | -import sys | ||
| 16 | -from pathlib import Path | ||
| 17 | - | ||
| 18 | -# 将项目根目录加入 sys.path | ||
| 19 | -PROJECT_ROOT = Path(__file__).resolve().parent.parent | ||
| 20 | -sys.path.insert(0, str(PROJECT_ROOT)) | ||
| 21 | - | ||
| 22 | -from utils.es_client import ESClient # type: ignore | ||
| 23 | -from indexer.mapping_generator import ( # type: ignore | ||
| 24 | - load_mapping, | ||
| 25 | - delete_index_if_exists, | ||
| 26 | - DEFAULT_INDEX_NAME, | ||
| 27 | -) | ||
| 28 | - | ||
| 29 | - | ||
| 30 | -def main() -> int: | ||
| 31 | - print("=" * 60) | ||
| 32 | - print("Recreate Elasticsearch index (structure only, no data import)") | ||
| 33 | - print("=" * 60) | ||
| 34 | - | ||
| 35 | - # 1. 连接 Elasticsearch | ||
| 36 | - es_host = os.environ.get("ES_HOST", "http://localhost:9200") | ||
| 37 | - es_username = os.environ.get("ES_USERNAME") | ||
| 38 | - es_password = os.environ.get("ES_PASSWORD") | ||
| 39 | - | ||
| 40 | - print(f"ES host: {es_host}") | ||
| 41 | - if es_username: | ||
| 42 | - print(f"ES username: {es_username}") | ||
| 43 | - | ||
| 44 | - try: | ||
| 45 | - if es_username and es_password: | ||
| 46 | - es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) | ||
| 47 | - else: | ||
| 48 | - es_client = ESClient(hosts=[es_host]) | ||
| 49 | - | ||
| 50 | - if not es_client.ping(): | ||
| 51 | - print(f"[ERROR] Cannot connect to Elasticsearch at {es_host}") | ||
| 52 | - return 1 | ||
| 53 | - except Exception as e: | ||
| 54 | - print(f"[ERROR] Failed to connect to Elasticsearch: {e}") | ||
| 55 | - return 1 | ||
| 56 | - | ||
| 57 | - index_name = DEFAULT_INDEX_NAME | ||
| 58 | - print(f"Index name: {index_name}") | ||
| 59 | - | ||
| 60 | - # 2. 加载 mapping | ||
| 61 | - try: | ||
| 62 | - mapping = load_mapping() | ||
| 63 | - print("Loaded mapping configuration.") | ||
| 64 | - except Exception as e: | ||
| 65 | - print(f"[ERROR] Failed to load mapping: {e}") | ||
| 66 | - return 1 | ||
| 67 | - | ||
| 68 | - # 3. 删除旧索引(如果存在) | ||
| 69 | - print(f"Deleting existing index if exists: {index_name} ...") | ||
| 70 | - try: | ||
| 71 | - if es_client.index_exists(index_name): | ||
| 72 | - if delete_index_if_exists(es_client, index_name): | ||
| 73 | - print(f"✓ Deleted index: {index_name}") | ||
| 74 | - else: | ||
| 75 | - print(f"[ERROR] Failed to delete index: {index_name}") | ||
| 76 | - return 1 | ||
| 77 | - else: | ||
| 78 | - print(f"Index does not exist, skip delete: {index_name}") | ||
| 79 | - except Exception as e: | ||
| 80 | - print(f"[ERROR] Error while deleting index: {e}") | ||
| 81 | - return 1 | ||
| 82 | - | ||
| 83 | - # 4. 创建新索引 | ||
| 84 | - print(f"Creating index: {index_name} ...") | ||
| 85 | - try: | ||
| 86 | - if es_client.create_index(index_name, mapping): | ||
| 87 | - print(f"✓ Created index: {index_name}") | ||
| 88 | - else: | ||
| 89 | - print(f"[ERROR] Failed to create index: {index_name}") | ||
| 90 | - return 1 | ||
| 91 | - except Exception as e: | ||
| 92 | - print(f"[ERROR] Error while creating index: {e}") | ||
| 93 | - return 1 | ||
| 94 | - | ||
| 95 | - print("=" * 60) | ||
| 96 | - print("Index recreation completed. Please trigger /indexer/reindex per tenant to re-import data.") | ||
| 97 | - print("=" * 60) | ||
| 98 | - return 0 | ||
| 99 | - | ||
| 100 | - | ||
| 101 | -if __name__ == "__main__": | ||
| 102 | - raise SystemExit(main()) |