Blame view

indexer/indexing_utils.py 3.95 KB
3c1f8031   tangwang   api/routes/indexe...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  """
  索引工具函数。
  
  提取公共逻辑,避免代码重复。
  """
  
  import logging
  from typing import Dict, Any, Optional
  from sqlalchemy import Engine, text
  from config import ConfigLoader
  from config.tenant_config_loader import get_tenant_config_loader
  from indexer.document_transformer import SPUDocumentTransformer
  
  logger = logging.getLogger(__name__)
  
  
  def load_category_mapping(db_engine: Engine) -> Dict[str, str]:
      """
      加载分类ID到名称的映射(全局,所有租户共享)。
      
      Args:
          db_engine: SQLAlchemy database engine
          
      Returns:
          Dictionary mapping category_id to category_name
      """
      query = text("""
          SELECT DISTINCT
              category_id,
              category
          FROM shoplazza_product_spu
          WHERE deleted = 0 AND category_id IS NOT NULL
      """)
      
      mapping = {}
      try:
          with db_engine.connect() as conn:
              result = conn.execute(query)
              for row in result:
                  category_id = str(int(row.category_id))
                  category_name = row.category
                  
                  if not category_name or not category_name.strip():
                      logger.warning(f"Category ID {category_id} has empty name, skipping")
                      continue
                  
                  mapping[category_id] = category_name
      except Exception as e:
          logger.error(f"Failed to load category mapping: {e}", exc_info=True)
      
      return mapping
  
  
  def create_document_transformer(
      category_id_to_name: Dict[str, str],
      tenant_id: str,
      searchable_option_dimensions: Optional[list] = None,
      translator: Optional[Any] = None,
      translation_prompts: Optional[Dict[str, str]] = None
  ) -> SPUDocumentTransformer:
      """
      创建文档转换器(统一初始化逻辑)。
      
      Args:
          category_id_to_name: 分类ID到名称的映射
          tenant_id: 租户ID
          searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载)
          translator: 翻译器实例(如果为None则根据配置初始化)
          translation_prompts: 翻译提示词配置(如果为None则从配置加载)
          
      Returns:
          SPUDocumentTransformer实例
      """
      # 加载租户配置
      tenant_config_loader = get_tenant_config_loader()
      tenant_config = tenant_config_loader.get_tenant_config(tenant_id)
      
      # 加载搜索配置(如果需要)
      if searchable_option_dimensions is None or translator is None or translation_prompts is None:
          try:
              config_loader = ConfigLoader()
              config = config_loader.load_config()
              
              if searchable_option_dimensions is None:
                  searchable_option_dimensions = config.spu_config.searchable_option_dimensions
              
              if translator is None and config.query_config.enable_translation:
                  from query.translator import Translator
                  translator = Translator(
                      api_key=config.query_config.translation_api_key,
                      use_cache=True,
                      glossary_id=config.query_config.translation_glossary_id,
                      translation_context=config.query_config.translation_context
                  )
              
              if translation_prompts is None:
                  translation_prompts = config.query_config.translation_prompts
          except Exception as e:
              logger.warning(f"Failed to load config, using defaults: {e}")
              if searchable_option_dimensions is None:
                  searchable_option_dimensions = ['option1', 'option2', 'option3']
              if translation_prompts is None:
                  translation_prompts = {}
      
      return SPUDocumentTransformer(
          category_id_to_name=category_id_to_name,
          searchable_option_dimensions=searchable_option_dimensions,
          tenant_config=tenant_config,
          translator=translator,
          translation_prompts=translation_prompts
      )