3c1f8031
tangwang
api/routes/indexe...
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
"""
索引工具函数。
提取公共逻辑,避免代码重复。
"""
import logging
from typing import Dict, Any, Optional
from sqlalchemy import Engine, text
from config import ConfigLoader
from config.tenant_config_loader import get_tenant_config_loader
from indexer.document_transformer import SPUDocumentTransformer
logger = logging.getLogger(__name__)
def load_category_mapping(db_engine: Engine) -> Dict[str, str]:
"""
加载分类ID到名称的映射(全局,所有租户共享)。
Args:
db_engine: SQLAlchemy database engine
Returns:
Dictionary mapping category_id to category_name
"""
query = text("""
SELECT DISTINCT
category_id,
category
FROM shoplazza_product_spu
WHERE deleted = 0 AND category_id IS NOT NULL
""")
mapping = {}
try:
with db_engine.connect() as conn:
result = conn.execute(query)
for row in result:
category_id = str(int(row.category_id))
category_name = row.category
if not category_name or not category_name.strip():
logger.warning(f"Category ID {category_id} has empty name, skipping")
continue
mapping[category_id] = category_name
except Exception as e:
logger.error(f"Failed to load category mapping: {e}", exc_info=True)
return mapping
def create_document_transformer(
category_id_to_name: Dict[str, str],
tenant_id: str,
searchable_option_dimensions: Optional[list] = None,
translator: Optional[Any] = None,
|
453992a8
tangwang
需求:
|
59
60
|
translation_prompts: Optional[Dict[str, str]] = None,
encoder: Optional[Any] = None,
|
5c2b70a2
tangwang
search_products.json
|
61
|
enable_title_embedding: bool = True,
|
e7a2c0b7
tangwang
img encode
|
62
63
|
image_encoder: Optional[Any] = None,
enable_image_embedding: bool = False,
|
5c2b70a2
tangwang
search_products.json
|
64
|
config: Optional[Any] = None,
|
3c1f8031
tangwang
api/routes/indexe...
|
65
66
67
|
) -> SPUDocumentTransformer:
"""
创建文档转换器(统一初始化逻辑)。
|
e7a2c0b7
tangwang
img encode
|
68
|
|
3c1f8031
tangwang
api/routes/indexe...
|
69
70
71
72
73
74
|
Args:
category_id_to_name: 分类ID到名称的映射
tenant_id: 租户ID
searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载)
translator: 翻译器实例(如果为None则根据配置初始化)
translation_prompts: 翻译提示词配置(如果为None则从配置加载)
|
453992a8
tangwang
需求:
|
75
76
|
encoder: 文本编码器实例(如果为None且enable_title_embedding为True则根据配置初始化)
enable_title_embedding: 是否启用标题向量化(默认True)
|
e7a2c0b7
tangwang
img encode
|
77
78
79
|
image_encoder: 图片编码器(可选,需实现 encode_image_urls(urls))
enable_image_embedding: 是否启用 image_embedding 填充(默认False)
|
3c1f8031
tangwang
api/routes/indexe...
|
80
81
82
83
84
85
86
87
|
Returns:
SPUDocumentTransformer实例
"""
# 加载租户配置
tenant_config_loader = get_tenant_config_loader()
tenant_config = tenant_config_loader.get_tenant_config(tenant_id)
# 加载搜索配置(如果需要)
|
5c2b70a2
tangwang
search_products.json
|
88
89
90
91
92
93
94
|
if (
searchable_option_dimensions is None
or translator is None
or translation_prompts is None
or (encoder is None and enable_title_embedding)
or config is None
):
|
ed948666
tangwang
tidy
|
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
if config is None:
config_loader = ConfigLoader()
config = config_loader.load_config()
if searchable_option_dimensions is None:
searchable_option_dimensions = config.spu_config.searchable_option_dimensions
index_langs = tenant_config.get("index_languages") or []
need_translator = len(index_langs) > 1
if translator is None and need_translator:
from providers import create_translation_provider
translator = create_translation_provider(config.query_config)
if translation_prompts is None:
translation_prompts = config.query_config.translation_prompts
# 初始化encoder(如果启用标题向量化且未提供encoder)
if encoder is None and enable_title_embedding and config.query_config.enable_text_embedding:
from embeddings.text_encoder import TextEmbeddingEncoder
encoder = TextEmbeddingEncoder()
logger.info("TextEmbeddingEncoder initialized for title embedding")
|
3c1f8031
tangwang
api/routes/indexe...
|
118
119
120
121
122
123
|
return SPUDocumentTransformer(
category_id_to_name=category_id_to_name,
searchable_option_dimensions=searchable_option_dimensions,
tenant_config=tenant_config,
translator=translator,
|
453992a8
tangwang
需求:
|
124
125
|
translation_prompts=translation_prompts,
encoder=encoder,
|
e7a2c0b7
tangwang
img encode
|
126
127
128
|
enable_title_embedding=enable_title_embedding,
image_encoder=image_encoder,
enable_image_embedding=enable_image_embedding,
|
3c1f8031
tangwang
api/routes/indexe...
|
129
|
)
|