config.yaml
4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Unified Configuration for Multi-Tenant Search Engine
# 统一配置文件,所有租户共用一套配置
# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
# Elasticsearch Index
es_index_name: "search_products"
# ES Index Settings (基础设置)
es_settings:
number_of_shards: 1
number_of_replicas: 0
refresh_interval: "30s"
# 字段权重配置(用于搜索时的字段boost)
# 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义)
field_boosts:
# 文本相关性字段
title_zh: 3.0
brief_zh: 1.5
description_zh: 1.0
vendor_zh: 1.5
title_en: 3.0
brief_en: 1.5
description_en: 1.0
vendor_en: 1.5
# 分类相关字段
category_path_zh: 1.5
category_name_zh: 1.5
category_path_en: 1.5
category_name_en: 1.5
# 标签和属性值字段
tags: 1.0
option1_values: 0.5
option2_values: 0.5
option3_values: 0.5
# 搜索域配置(Query Domains)
# 定义不同的搜索策略,指定哪些字段组合在一起搜索
indexes:
- name: "default"
label: "默认搜索"
fields:
- "title_zh"
- "brief_zh"
- "description_zh"
- "vendor_zh"
- "tags"
- "category_path_zh"
- "category_name_zh"
- "option1_values"
boost: 1.0
- name: "title"
label: "标题搜索"
fields:
- "title_zh"
boost: 2.0
- name: "vendor"
label: "品牌搜索"
fields:
- "vendor_zh"
boost: 1.5
- name: "category"
label: "类目搜索"
fields:
- "category_path_zh"
- "category_name_zh"
boost: 1.5
- name: "tags"
label: "标签搜索"
fields:
- "tags"
boost: 1.0
# Query Configuration(查询配置)
query_config:
# 支持的语言
supported_languages:
- "zh"
- "en"
default_language: "zh"
# 功能开关
enable_translation: true
enable_text_embedding: true
enable_query_rewrite: true
enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索)
# Embedding字段名称
text_embedding_field: "title_embedding"
image_embedding_field: null
# Embedding禁用阈值(短查询不使用向量搜索)
embedding_disable_thresholds:
chinese_char_limit: 4
english_word_limit: 3
# 翻译API配置
translation_service: "deepl"
translation_api_key: null # 通过环境变量设置
# 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数)
translation_prompts:
# 商品标题翻译提示词
product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
# query翻译提示词
query_zh: "电商领域"
query_en: "e-commerce domain"
# 默认翻译用词
default_zh: "电商领域"
default_en: "e-commerce domain"
# 返回字段配置(_source includes)
# null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
source_fields: null
# Ranking Configuration(排序配置)
ranking:
expression: "bm25() + 0.2*text_embedding_relevance()"
description: "BM25 text relevance combined with semantic embedding similarity"
# Function Score配置(ES层打分规则)
function_score:
score_mode: "sum"
boost_mode: "multiply"
functions: []
# Rerank配置(本地重排,当前禁用)
rerank:
enabled: false
expression: ""
description: "Local reranking (disabled, use ES function_score instead)"
# SPU配置(已启用,使用嵌套skus)
spu_config:
enabled: true
spu_field: "spu_id"
inner_hits_size: 10
# 配置哪些option维度参与检索(进索引、以及在线搜索)
# 格式为list,选择option1/option2/option3中的一个或多个
searchable_option_dimensions: ['option1', 'option2', 'option3']
# 租户配置(Tenant Configuration)
# 每个租户可以配置主语言和翻译选项
tenant_config:
# 默认配置(未配置的租户使用此配置)
default:
primary_language: "zh"
translate_to_en: true
translate_to_zh: false
# 租户特定配置
tenants:
"1":
primary_language: "zh"
translate_to_en: true
translate_to_zh: false
"2":
primary_language: "en"
translate_to_en: false
translate_to_zh: true
"3":
primary_language: "zh"
translate_to_en: true
translate_to_zh: false
"162":
primary_language: "zh"
translate_to_en: false
translate_to_zh: false