Blame view

indexer/mapping_generator.py 4.65 KB
be52af70   tangwang   first commit
1
  """
59b0a342   tangwang   创建手写 mapping JSON
2
  Elasticsearch mapping loader.
be52af70   tangwang   first commit
3
  
59b0a342   tangwang   创建手写 mapping JSON
4
  Loads Elasticsearch index mapping from JSON file.
be52af70   tangwang   first commit
5
6
7
  """
  
  from typing import Dict, Any
59b0a342   tangwang   创建手写 mapping JSON
8
  import json
325eec03   tangwang   1. 日志、配置基础设施,使用优化
9
  import logging
59b0a342   tangwang   创建手写 mapping JSON
10
  from pathlib import Path
2059d959   tangwang   feat(eval): 多评估集统...
11
  import os
be52af70   tangwang   first commit
12
  
86d8358b   tangwang   config optimize
13
  from config.loader import get_app_config
648cb4c2   tangwang   ES docs
14
  
325eec03   tangwang   1. 日志、配置基础设施,使用优化
15
16
  logger = logging.getLogger(__name__)
  
e4a39cc8   tangwang   索引隔离。 不同的tenant_i...
17
  # Default index name (deprecated, use get_tenant_index_name instead)
59b0a342   tangwang   创建手写 mapping JSON
18
19
20
21
22
23
  DEFAULT_INDEX_NAME = "search_products"
  
  # Default mapping file path
  DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_products.json"
  
  
e4a39cc8   tangwang   索引隔离。 不同的tenant_i...
24
25
26
  def get_tenant_index_name(tenant_id: str) -> str:
      """
      Generate index name for a tenant.
648cb4c2   tangwang   ES docs
27
28
29
30
31
32
  
      索引命名规则统一为:
        {ES_INDEX_NAMESPACE}search_products_tenant_{tenant_id}
  
      其中 ES_INDEX_NAMESPACE  config.env_config.ES_INDEX_NAMESPACE 控制,
      用于区分 prod/uat/test 等不同运行环境。
e4a39cc8   tangwang   索引隔离。 不同的tenant_i...
33
      """
2059d959   tangwang   feat(eval): 多评估集统...
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
      # Temporary override hooks (non-official, for ops/debug):
      # - ES_INDEX_OVERRIDE_TENANT_<tenant_id>: absolute index name (without namespace auto-prefix)
      # - ES_INDEX_OVERRIDE: absolute index name OR format string supporting "{tenant_id}"
      #
      # Examples:
      #   export ES_INDEX_OVERRIDE_TENANT_163="search_products_tenant_163_backup_20260415_1438"
      #   export ES_INDEX_OVERRIDE="search_products_tenant_{tenant_id}_backup_20260415_1438"
      per_tenant_key = f"ES_INDEX_OVERRIDE_TENANT_{tenant_id}"
      if (v := os.environ.get(per_tenant_key)):
          return str(v)
      if (v := os.environ.get("ES_INDEX_OVERRIDE")):
          try:
              return str(v).format(tenant_id=tenant_id)
          except Exception:
              return str(v)
86d8358b   tangwang   config optimize
49
      prefix = get_app_config().runtime.index_namespace or ""
648cb4c2   tangwang   ES docs
50
      return f"{prefix}search_products_tenant_{tenant_id}"
e4a39cc8   tangwang   索引隔离。 不同的tenant_i...
51
52
  
  
59b0a342   tangwang   创建手写 mapping JSON
53
  def load_mapping(mapping_file: str = None) -> Dict[str, Any]:
33839b37   tangwang   属性值参与搜索:
54
      """
59b0a342   tangwang   创建手写 mapping JSON
55
56
      Load Elasticsearch mapping from JSON file.
  
33839b37   tangwang   属性值参与搜索:
57
      Args:
59b0a342   tangwang   创建手写 mapping JSON
58
59
          mapping_file: Path to mapping JSON file. If None, uses default.
  
33839b37   tangwang   属性值参与搜索:
60
      Returns:
59b0a342   tangwang   创建手写 mapping JSON
61
62
63
64
65
66
67
68
69
70
71
72
          Dictionary containing index configuration (settings + mappings)
  
      Raises:
          FileNotFoundError: If mapping file doesn't exist
          json.JSONDecodeError: If mapping file is invalid JSON
      """
      if mapping_file is None:
          mapping_file = str(DEFAULT_MAPPING_FILE)
  
      mapping_path = Path(mapping_file)
      if not mapping_path.exists():
          raise FileNotFoundError(f"Mapping file not found: {mapping_path}")
be52af70   tangwang   first commit
73
  
59b0a342   tangwang   创建手写 mapping JSON
74
75
76
77
78
79
80
81
      with open(mapping_path, 'r', encoding='utf-8') as f:
          mapping = json.load(f)
  
      logger.info(f"Loaded mapping from {mapping_path}")
      return mapping
  
  
  def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, Any] = None) -> bool:
be52af70   tangwang   first commit
82
83
84
85
      """
      Create Elasticsearch index if it doesn't exist.
  
      Args:
bf89b597   tangwang   feat(search): ada...
86
          es_client: ESClient instance
be52af70   tangwang   first commit
87
          index_name: Name of the index to create
59b0a342   tangwang   创建手写 mapping JSON
88
          mapping: Index mapping configuration. If None, loads from default file.
be52af70   tangwang   first commit
89
90
91
92
  
      Returns:
          True if index was created, False if it already exists
      """
bf89b597   tangwang   feat(search): ada...
93
      if es_client.index_exists(index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
94
          logger.info(f"Index '{index_name}' already exists")
be52af70   tangwang   first commit
95
96
          return False
  
59b0a342   tangwang   创建手写 mapping JSON
97
98
99
      if mapping is None:
          mapping = load_mapping()
  
bf89b597   tangwang   feat(search): ada...
100
      if es_client.create_index(index_name, mapping):
33839b37   tangwang   属性值参与搜索:
101
102
          logger.info(f"Index '{index_name}' created successfully")
          return True
bf89b597   tangwang   feat(search): ada...
103
104
105
      else:
          logger.error(f"Failed to create index '{index_name}'")
          return False
be52af70   tangwang   first commit
106
107
108
109
110
111
112
  
  
  def delete_index_if_exists(es_client, index_name: str) -> bool:
      """
      Delete Elasticsearch index if it exists.
  
      Args:
bf89b597   tangwang   feat(search): ada...
113
          es_client: ESClient instance
be52af70   tangwang   first commit
114
115
116
117
118
          index_name: Name of the index to delete
  
      Returns:
          True if index was deleted, False if it didn't exist
      """
bf89b597   tangwang   feat(search): ada...
119
      if not es_client.index_exists(index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
120
          logger.warning(f"Index '{index_name}' does not exist")
be52af70   tangwang   first commit
121
122
          return False
  
bf89b597   tangwang   feat(search): ada...
123
      if es_client.delete_index(index_name):
33839b37   tangwang   属性值参与搜索:
124
125
          logger.info(f"Index '{index_name}' deleted successfully")
          return True
bf89b597   tangwang   feat(search): ada...
126
127
128
      else:
          logger.error(f"Failed to delete index '{index_name}'")
          return False
be52af70   tangwang   first commit
129
130
131
132
133
134
135
  
  
  def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bool:
      """
      Update mapping for existing index (only adding new fields).
  
      Args:
bf89b597   tangwang   feat(search): ada...
136
          es_client: ESClient instance
be52af70   tangwang   first commit
137
138
139
140
141
142
          index_name: Name of the index
          new_fields: New field mappings to add
  
      Returns:
          True if successful
      """
bf89b597   tangwang   feat(search): ada...
143
      if not es_client.index_exists(index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
144
          logger.error(f"Index '{index_name}' does not exist")
be52af70   tangwang   first commit
145
146
147
          return False
  
      mapping = {"properties": new_fields}
bf89b597   tangwang   feat(search): ada...
148
      if es_client.update_mapping(index_name, mapping):
33839b37   tangwang   属性值参与搜索:
149
150
          logger.info(f"Mapping updated for index '{index_name}'")
          return True
bf89b597   tangwang   feat(search): ada...
151
152
153
      else:
          logger.error(f"Failed to update mapping for index '{index_name}'")
          return False