3c1f8031
tangwang
api/routes/indexe...
|
1
2
3
4
5
6
7
8
9
|
"""
索引工具函数。
提取公共逻辑,避免代码重复。
"""
import logging
from typing import Dict, Any, Optional
from sqlalchemy import Engine, text
|
86d8358b
tangwang
config optimize
|
10
|
from config import get_app_config
|
3c1f8031
tangwang
api/routes/indexe...
|
11
12
|
from config.tenant_config_loader import get_tenant_config_loader
from indexer.document_transformer import SPUDocumentTransformer
|
0fd2f875
tangwang
translate
|
13
|
from translation import create_translation_client
|
3c1f8031
tangwang
api/routes/indexe...
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
logger = logging.getLogger(__name__)
def load_category_mapping(db_engine: Engine) -> Dict[str, str]:
"""
加载分类ID到名称的映射(全局,所有租户共享)。
Args:
db_engine: SQLAlchemy database engine
Returns:
Dictionary mapping category_id to category_name
"""
query = text("""
SELECT DISTINCT
category_id,
category
FROM shoplazza_product_spu
WHERE deleted = 0 AND category_id IS NOT NULL
""")
mapping = {}
try:
with db_engine.connect() as conn:
result = conn.execute(query)
for row in result:
category_id = str(int(row.category_id))
category_name = row.category
|
07cf5a93
tangwang
START_EMBEDDING=...
|
43
|
|
3c1f8031
tangwang
api/routes/indexe...
|
44
45
46
|
if not category_name or not category_name.strip():
logger.warning(f"Category ID {category_id} has empty name, skipping")
continue
|
07cf5a93
tangwang
START_EMBEDDING=...
|
47
|
|
3c1f8031
tangwang
api/routes/indexe...
|
48
49
50
|
mapping[category_id] = category_name
except Exception as e:
logger.error(f"Failed to load category mapping: {e}", exc_info=True)
|
07cf5a93
tangwang
START_EMBEDDING=...
|
51
|
raise RuntimeError("Failed to load category mapping from MySQL") from e
|
3c1f8031
tangwang
api/routes/indexe...
|
52
53
54
55
56
57
58
59
60
|
return mapping
def create_document_transformer(
category_id_to_name: Dict[str, str],
tenant_id: str,
searchable_option_dimensions: Optional[list] = None,
translator: Optional[Any] = None,
|
453992a8
tangwang
需求:
|
61
|
encoder: Optional[Any] = None,
|
5c2b70a2
tangwang
search_products.json
|
62
|
enable_title_embedding: bool = True,
|
e7a2c0b7
tangwang
img encode
|
63
64
|
image_encoder: Optional[Any] = None,
enable_image_embedding: bool = False,
|
5c2b70a2
tangwang
search_products.json
|
65
|
config: Optional[Any] = None,
|
3c1f8031
tangwang
api/routes/indexe...
|
66
67
68
|
) -> SPUDocumentTransformer:
"""
创建文档转换器(统一初始化逻辑)。
|
e7a2c0b7
tangwang
img encode
|
69
|
|
3c1f8031
tangwang
api/routes/indexe...
|
70
71
72
73
74
|
Args:
category_id_to_name: 分类ID到名称的映射
tenant_id: 租户ID
searchable_option_dimensions: 可搜索的option维度列表(如果为None则从配置加载)
translator: 翻译器实例(如果为None则根据配置初始化)
|
453992a8
tangwang
需求:
|
75
76
|
encoder: 文本编码器实例(如果为None且enable_title_embedding为True则根据配置初始化)
enable_title_embedding: 是否启用标题向量化(默认True)
|
e7a2c0b7
tangwang
img encode
|
77
78
79
|
image_encoder: 图片编码器(可选,需实现 encode_image_urls(urls))
enable_image_embedding: 是否启用 image_embedding 填充(默认False)
|
3c1f8031
tangwang
api/routes/indexe...
|
80
81
82
83
84
85
86
87
|
Returns:
SPUDocumentTransformer实例
"""
# 加载租户配置
tenant_config_loader = get_tenant_config_loader()
tenant_config = tenant_config_loader.get_tenant_config(tenant_id)
# 加载搜索配置(如果需要)
|
5c2b70a2
tangwang
search_products.json
|
88
89
90
|
if (
searchable_option_dimensions is None
or translator is None
|
5c2b70a2
tangwang
search_products.json
|
91
92
93
|
or (encoder is None and enable_title_embedding)
or config is None
):
|
ed948666
tangwang
tidy
|
94
|
if config is None:
|
86d8358b
tangwang
config optimize
|
95
|
config = get_app_config().search
|
ed948666
tangwang
tidy
|
96
97
98
99
100
101
102
|
if searchable_option_dimensions is None:
searchable_option_dimensions = config.spu_config.searchable_option_dimensions
index_langs = tenant_config.get("index_languages") or []
need_translator = len(index_langs) > 1
if translator is None and need_translator:
|
0fd2f875
tangwang
translate
|
103
|
translator = create_translation_client()
|
ed948666
tangwang
tidy
|
104
|
|
ed948666
tangwang
tidy
|
105
106
107
108
109
110
|
# 初始化encoder(如果启用标题向量化且未提供encoder)
if encoder is None and enable_title_embedding and config.query_config.enable_text_embedding:
from embeddings.text_encoder import TextEmbeddingEncoder
encoder = TextEmbeddingEncoder()
logger.info("TextEmbeddingEncoder initialized for title embedding")
|
3c1f8031
tangwang
api/routes/indexe...
|
111
112
113
114
115
116
|
return SPUDocumentTransformer(
category_id_to_name=category_id_to_name,
searchable_option_dimensions=searchable_option_dimensions,
tenant_config=tenant_config,
translator=translator,
|
453992a8
tangwang
需求:
|
117
|
encoder=encoder,
|
e7a2c0b7
tangwang
img encode
|
118
119
120
|
enable_title_embedding=enable_title_embedding,
image_encoder=image_encoder,
enable_image_embedding=enable_image_embedding,
|
3c1f8031
tangwang
api/routes/indexe...
|
121
|
)
|