Blame view

indexer/mapping_generator.py 3.14 KB
be52af70   tangwang   first commit
1
  """
59b0a342   tangwang   创建手写 mapping JSON
2
  Elasticsearch mapping loader.
be52af70   tangwang   first commit
3
  
59b0a342   tangwang   创建手写 mapping JSON
4
  Loads Elasticsearch index mapping from JSON file.
be52af70   tangwang   first commit
5
6
7
  """
  
  from typing import Dict, Any
59b0a342   tangwang   创建手写 mapping JSON
8
  import json
325eec03   tangwang   1. 日志、配置基础设施,使用优化
9
  import logging
59b0a342   tangwang   创建手写 mapping JSON
10
  from pathlib import Path
be52af70   tangwang   first commit
11
  
325eec03   tangwang   1. 日志、配置基础设施,使用优化
12
13
  logger = logging.getLogger(__name__)
  
59b0a342   tangwang   创建手写 mapping JSON
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  # Default index name
  DEFAULT_INDEX_NAME = "search_products"
  
  # Default mapping file path
  DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_products.json"
  
  
  def load_mapping(mapping_file: str = None) -> Dict[str, Any]:
      """
      Load Elasticsearch mapping from JSON file.
  
      Args:
          mapping_file: Path to mapping JSON file. If None, uses default.
  
      Returns:
          Dictionary containing index configuration (settings + mappings)
  
      Raises:
          FileNotFoundError: If mapping file doesn't exist
          json.JSONDecodeError: If mapping file is invalid JSON
      """
      if mapping_file is None:
          mapping_file = str(DEFAULT_MAPPING_FILE)
  
      mapping_path = Path(mapping_file)
      if not mapping_path.exists():
          raise FileNotFoundError(f"Mapping file not found: {mapping_path}")
be52af70   tangwang   first commit
41
  
59b0a342   tangwang   创建手写 mapping JSON
42
43
44
45
46
47
48
49
      with open(mapping_path, 'r', encoding='utf-8') as f:
          mapping = json.load(f)
  
      logger.info(f"Loaded mapping from {mapping_path}")
      return mapping
  
  
  def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, Any] = None) -> bool:
be52af70   tangwang   first commit
50
51
52
53
54
55
      """
      Create Elasticsearch index if it doesn't exist.
  
      Args:
          es_client: Elasticsearch client instance
          index_name: Name of the index to create
59b0a342   tangwang   创建手写 mapping JSON
56
          mapping: Index mapping configuration. If None, loads from default file.
be52af70   tangwang   first commit
57
58
59
60
61
  
      Returns:
          True if index was created, False if it already exists
      """
      if es_client.indices.exists(index=index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
62
          logger.info(f"Index '{index_name}' already exists")
be52af70   tangwang   first commit
63
64
          return False
  
59b0a342   tangwang   创建手写 mapping JSON
65
66
67
      if mapping is None:
          mapping = load_mapping()
  
be52af70   tangwang   first commit
68
      es_client.indices.create(index=index_name, body=mapping)
325eec03   tangwang   1. 日志、配置基础设施,使用优化
69
      logger.info(f"Index '{index_name}' created successfully")
be52af70   tangwang   first commit
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
      return True
  
  
  def delete_index_if_exists(es_client, index_name: str) -> bool:
      """
      Delete Elasticsearch index if it exists.
  
      Args:
          es_client: Elasticsearch client instance
          index_name: Name of the index to delete
  
      Returns:
          True if index was deleted, False if it didn't exist
      """
      if not es_client.indices.exists(index=index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
85
          logger.warning(f"Index '{index_name}' does not exist")
be52af70   tangwang   first commit
86
87
88
          return False
  
      es_client.indices.delete(index=index_name)
325eec03   tangwang   1. 日志、配置基础设施,使用优化
89
      logger.info(f"Index '{index_name}' deleted successfully")
be52af70   tangwang   first commit
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
      return True
  
  
  def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bool:
      """
      Update mapping for existing index (only adding new fields).
  
      Args:
          es_client: Elasticsearch client instance
          index_name: Name of the index
          new_fields: New field mappings to add
  
      Returns:
          True if successful
      """
      if not es_client.indices.exists(index=index_name):
325eec03   tangwang   1. 日志、配置基础设施,使用优化
106
          logger.error(f"Index '{index_name}' does not exist")
be52af70   tangwang   first commit
107
108
109
110
          return False
  
      mapping = {"properties": new_fields}
      es_client.indices.put_mapping(index=index_name, body=mapping)
325eec03   tangwang   1. 日志、配置基础设施,使用优化
111
      logger.info(f"Mapping updated for index '{index_name}'")
be52af70   tangwang   first commit
112
      return True