4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
1
|
# Unified Configuration for Multi-Tenant Search Engine
|
33839b37
tangwang
属性值参与搜索:
|
2
3
|
# 统一配置文件,所有租户共用一套配置
# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
4
5
6
7
|
# Elasticsearch Index
es_index_name: "search_products"
|
33839b37
tangwang
属性值参与搜索:
|
8
|
# ES Index Settings (基础设置)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
9
10
11
12
13
|
es_settings:
number_of_shards: 1
number_of_replicas: 0
refresh_interval: "30s"
|
33839b37
tangwang
属性值参与搜索:
|
14
|
# 字段权重配置(用于搜索时的字段boost)
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
15
16
|
# 统一按“字段基名”配置;查询时按 search_langs 动态拼接 .{lang}。
# 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
|
33839b37
tangwang
属性值参与搜索:
|
17
|
field_boosts:
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
18
19
20
21
22
23
|
title: 3.0
brief: 1.5
description: 1.0
vendor: 1.5
category_path: 1.5
category_name_text: 1.5
|
33839b37
tangwang
属性值参与搜索:
|
24
25
26
27
28
|
tags: 1.0
option1_values: 0.5
option2_values: 0.5
option3_values: 0.5
|
33839b37
tangwang
属性值参与搜索:
|
29
|
# Query Configuration(查询配置)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
30
|
query_config:
|
33839b37
tangwang
属性值参与搜索:
|
31
|
# 支持的语言
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
32
33
34
|
supported_languages:
- "zh"
- "en"
|
2739b281
tangwang
多语言索引调整
|
35
|
default_language: "en"
|
33839b37
tangwang
属性值参与搜索:
|
36
|
|
345d960b
tangwang
1. 删除全局 enable_tr...
|
37
|
# 功能开关(翻译开关由tenant_config控制)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
38
39
|
enable_text_embedding: true
enable_query_rewrite: true
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
40
|
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# 动态多语言检索字段配置
# multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
# shared_fields 为无语言后缀字段。
search_fields:
multilingual_fields:
- "title"
- "brief"
- "description"
- "vendor"
- "category_path"
- "category_name_text"
shared_fields:
- "tags"
- "option1_values"
- "option2_values"
- "option3_values"
core_multilingual_fields:
- "title"
- "brief"
- "vendor"
- "category_name_text"
# 统一文本召回策略(主查询 + 翻译查询 + phrase/keywords)
text_query_strategy:
base_minimum_should_match: "75%"
translation_minimum_should_match: "75%"
translation_boost: 0.4
translation_boost_when_source_missing: 1.0
source_boost_when_missing: 0.6
original_query_fallback_boost_when_translation_missing: 0.2
keywords_boost: 0.1
enable_phrase_query: true
tie_breaker_base_query: 0.9
tie_breaker_keywords: 0.9
|
33839b37
tangwang
属性值参与搜索:
|
76
77
78
|
# Embedding字段名称
text_embedding_field: "title_embedding"
image_embedding_field: null
|
325eec03
tangwang
1. 日志、配置基础设施,使用优化
|
79
|
|
33839b37
tangwang
属性值参与搜索:
|
80
|
# Embedding禁用阈值(短查询不使用向量搜索)
|
9f96d6f3
tangwang
短query不用语义搜索
|
81
|
embedding_disable_thresholds:
|
33839b37
tangwang
属性值参与搜索:
|
82
83
|
chinese_char_limit: 4
english_word_limit: 3
|
9f96d6f3
tangwang
短query不用语义搜索
|
84
|
|
42e3aea6
tangwang
tidy
|
85
|
# 翻译API配置(provider/URL 在 services.translation)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
86
|
translation_service: "deepl"
|
33839b37
tangwang
属性值参与搜索:
|
87
|
translation_api_key: null # 通过环境变量设置
|
42e3aea6
tangwang
tidy
|
88
|
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
89
90
91
92
93
94
95
96
97
98
99
100
|
# 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数)
translation_prompts:
# 商品标题翻译提示词
product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
# query翻译提示词
query_zh: "电商领域"
query_en: "e-commerce domain"
# 默认翻译用词
default_zh: "电商领域"
default_en: "e-commerce domain"
|
33839b37
tangwang
属性值参与搜索:
|
101
102
103
|
# 返回字段配置(_source includes)
# null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
source_fields: null
|
70dab99f
tangwang
add logs
|
104
105
106
|
# KNN boost配置(向量召回的boost值)
knn_boost: 0.25 # Lower boost for embedding recall
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
107
|
|
33839b37
tangwang
属性值参与搜索:
|
108
|
# Ranking Configuration(排序配置)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
109
|
ranking:
|
70dab99f
tangwang
add logs
|
110
|
expression: "bm25() + 0.25*text_embedding_relevance()"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
111
112
113
114
115
116
|
description: "BM25 text relevance combined with semantic embedding similarity"
# Function Score配置(ES层打分规则)
function_score:
score_mode: "sum"
boost_mode: "multiply"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
117
118
|
functions: []
|
42e3aea6
tangwang
tidy
|
119
|
# 重排配置(provider/URL 在 services.rerank)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
120
|
rerank:
|
506c39b7
tangwang
feat(search): 统一重...
|
121
|
rerank_window: 1000
|
42e3aea6
tangwang
tidy
|
122
|
timeout_sec: 15.0
|
506c39b7
tangwang
feat(search): 统一重...
|
123
124
|
weight_es: 0.4
weight_ai: 0.6
|
ff32d894
tangwang
rerank
|
125
126
|
rerank_query_template: "{query}"
rerank_doc_template: "{title}"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
127
|
|
42e3aea6
tangwang
tidy
|
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
# 可扩展服务/provider 注册表(单一配置源)
services:
translation:
provider: "direct" # direct | http | google(reserved)
base_url: "http://127.0.0.1:6006"
model: "qwen"
timeout_sec: 10.0
providers:
direct:
model: "qwen"
http:
base_url: "http://127.0.0.1:6006"
model: "qwen"
timeout_sec: 10.0
google:
enabled: false
project_id: ""
location: "global"
model: ""
embedding:
|
950a640e
tangwang
embeddings
|
148
|
provider: "http" # http
|
42e3aea6
tangwang
tidy
|
149
150
151
152
|
base_url: "http://127.0.0.1:6005"
providers:
http:
base_url: "http://127.0.0.1:6005"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
153
|
# 服务内文本后端(embedding 进程启动时读取)
|
54ccf28c
tangwang
tei
|
154
|
backend: "local_st" # tei | local_st
|
07cf5a93
tangwang
START_EMBEDDING=...
|
155
156
157
158
159
160
161
162
163
164
|
backends:
tei:
base_url: "http://127.0.0.1:8080"
timeout_sec: 60
model_id: "Qwen/Qwen3-Embedding-0.6B"
local_st:
model_id: "Qwen/Qwen3-Embedding-0.6B"
device: "cuda"
batch_size: 32
normalize_embeddings: true
|
42e3aea6
tangwang
tidy
|
165
|
rerank:
|
701ae503
tangwang
docs
|
166
|
provider: "http"
|
42e3aea6
tangwang
tidy
|
167
168
169
170
|
base_url: "http://127.0.0.1:6007"
providers:
http:
base_url: "http://127.0.0.1:6007"
|
701ae503
tangwang
docs
|
171
172
|
service_url: "http://127.0.0.1:6007/rerank"
# 服务内后端(reranker 进程启动时读取)
|
07cf5a93
tangwang
START_EMBEDDING=...
|
173
|
backend: "qwen3_vllm" # bge | qwen3_vllm
|
701ae503
tangwang
docs
|
174
175
176
177
178
179
180
181
182
183
184
185
|
backends:
bge:
model_name: "BAAI/bge-reranker-v2-m3"
device: null
use_fp16: true
batch_size: 64
max_length: 512
cache_dir: "./model_cache"
enable_warmup: true
qwen3_vllm:
model_name: "Qwen/Qwen3-Reranker-0.6B"
engine: "vllm"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
186
|
max_model_len: 256
|
701ae503
tangwang
docs
|
187
|
tensor_parallel_size: 1
|
07cf5a93
tangwang
START_EMBEDDING=...
|
188
189
|
gpu_memory_utilization: 0.36
dtype: "float16"
|
bc089b43
tangwang
refactor(reranker...
|
190
191
|
enable_prefix_caching: true
enforce_eager: false
|
701ae503
tangwang
docs
|
192
|
instruction: "Given a web search query, retrieve relevant passages that answer the query"
|
42e3aea6
tangwang
tidy
|
193
|
|
cadc77b6
tangwang
索引字段名、变量名、API数据结构...
|
194
|
# SPU配置(已启用,使用嵌套skus)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
195
196
|
spu_config:
enabled: true
|
cadc77b6
tangwang
索引字段名、变量名、API数据结构...
|
197
|
spu_field: "spu_id"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
198
|
inner_hits_size: 10
|
33839b37
tangwang
属性值参与搜索:
|
199
200
201
|
# 配置哪些option维度参与检索(进索引、以及在线搜索)
# 格式为list,选择option1/option2/option3中的一个或多个
searchable_option_dimensions: ['option1', 'option2', 'option3']
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
202
203
|
# 租户配置(Tenant Configuration)
|
038e4e2f
tangwang
refactor(i18n): t...
|
204
205
|
# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
# 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
206
|
tenant_config:
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
207
|
default:
|
2739b281
tangwang
多语言索引调整
|
208
|
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
209
|
index_languages: ["en", "zh"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
210
211
212
|
tenants:
"1":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
213
|
index_languages: ["zh", "en"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
214
215
|
"2":
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
216
|
index_languages: ["en", "zh"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
217
218
|
"3":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
219
|
index_languages: ["zh", "en"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
220
221
|
"162":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
222
|
index_languages: ["zh", "en"]
|
cff5e86f
tangwang
reindex
|
223
224
|
"170":
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
225
|
index_languages: ["en", "zh"]
|