indexing_utils.py
3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
索引工具函数。
提取公共逻辑,避免代码重复。
"""
import logging
from typing import Dict, Any, Optional
from sqlalchemy import Engine, text
from config import ConfigLoader
from config.tenant_config_loader import get_tenant_config_loader
from indexer.document_transformer import SPUDocumentTransformer
logger = logging.getLogger(__name__)
def load_category_mapping(db_engine: Engine) -> Dict[str, str]:
"""
加载分类ID到名称的映射(全局,所有租户共享)。
Args:
db_engine: SQLAlchemy database engine
Returns:
Dictionary mapping category_id to category_name
"""
query = text("""
SELECT DISTINCT
category_id,
category
FROM shoplazza_product_spu
WHERE deleted = 0 AND category_id IS NOT NULL
""")
mapping = {}
try:
with db_engine.connect() as conn:
result = conn.execute(query)
for row in result:
category_id = str(int(row.category_id))
category_name = row.category
if not category_name or not category_name.strip():
logger.warning(f"Category ID {category_id} has empty name, skipping")
continue
mapping[category_id] = category_name
except Exception as e:
logger.error(f"Failed to load category mapping: {e}", exc_info=True)
return mapping
def create_document_transformer(
category_id_to_name: Dict[str, str],
tenant_id: str,
searchable_option_dimensions: Optional[list] = None,
translator: Optional[Any] = None,
translation_prompts: Optional[Dict[str, str]] = None
) -> SPUDocumentTransformer:
"""
创建文档转换器(统一初始化逻辑)。
Args:
category_id_to_name: 分类ID到名称的映射
tenant_id: 租户ID
searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载)
translator: 翻译器实例(如果为None则根据配置初始化)
translation_prompts: 翻译提示词配置(如果为None则从配置加载)
Returns:
SPUDocumentTransformer实例
"""
# 加载租户配置
tenant_config_loader = get_tenant_config_loader()
tenant_config = tenant_config_loader.get_tenant_config(tenant_id)
# 加载搜索配置(如果需要)
if searchable_option_dimensions is None or translator is None or translation_prompts is None:
try:
config_loader = ConfigLoader()
config = config_loader.load_config()
if searchable_option_dimensions is None:
searchable_option_dimensions = config.spu_config.searchable_option_dimensions
if translator is None and config.query_config.enable_translation:
from query.translator import Translator
translator = Translator(
api_key=config.query_config.translation_api_key,
use_cache=True,
glossary_id=config.query_config.translation_glossary_id,
translation_context=config.query_config.translation_context
)
if translation_prompts is None:
translation_prompts = config.query_config.translation_prompts
except Exception as e:
logger.warning(f"Failed to load config, using defaults: {e}")
if searchable_option_dimensions is None:
searchable_option_dimensions = ['option1', 'option2', 'option3']
if translation_prompts is None:
translation_prompts = {}
return SPUDocumentTransformer(
category_id_to_name=category_id_to_name,
searchable_option_dimensions=searchable_option_dimensions,
tenant_config=tenant_config,
translator=translator,
translation_prompts=translation_prompts
)