4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
1
|
# Unified Configuration for Multi-Tenant Search Engine
|
33839b37
tangwang
属性值参与搜索:
|
2
3
|
# 统一配置文件,所有租户共用一套配置
# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
4
5
6
7
|
# Elasticsearch Index
es_index_name: "search_products"
|
33839b37
tangwang
属性值参与搜索:
|
8
|
# ES Index Settings (基础设置)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
9
10
11
12
13
|
es_settings:
number_of_shards: 1
number_of_replicas: 0
refresh_interval: "30s"
|
33839b37
tangwang
属性值参与搜索:
|
14
|
# 字段权重配置(用于搜索时的字段boost)
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
15
16
|
# 统一按“字段基名”配置;查询时按 search_langs 动态拼接 .{lang}。
# 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
|
33839b37
tangwang
属性值参与搜索:
|
17
|
field_boosts:
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
18
19
20
21
22
23
|
title: 3.0
brief: 1.5
description: 1.0
vendor: 1.5
category_path: 1.5
category_name_text: 1.5
|
33839b37
tangwang
属性值参与搜索:
|
24
25
26
27
28
|
tags: 1.0
option1_values: 0.5
option2_values: 0.5
option3_values: 0.5
|
33839b37
tangwang
属性值参与搜索:
|
29
|
# Query Configuration(查询配置)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
30
|
query_config:
|
33839b37
tangwang
属性值参与搜索:
|
31
|
# 支持的语言
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
32
33
34
|
supported_languages:
- "zh"
- "en"
|
2739b281
tangwang
多语言索引调整
|
35
|
default_language: "en"
|
33839b37
tangwang
属性值参与搜索:
|
36
|
|
345d960b
tangwang
1. 删除全局 enable_tr...
|
37
|
# 功能开关(翻译开关由tenant_config控制)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
38
39
|
enable_text_embedding: true
enable_query_rewrite: true
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
40
|
|
bd96cead
tangwang
1. 动态多语言字段与统一策略配置
|
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# 动态多语言检索字段配置
# multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
# shared_fields 为无语言后缀字段。
search_fields:
multilingual_fields:
- "title"
- "brief"
- "description"
- "vendor"
- "category_path"
- "category_name_text"
shared_fields:
- "tags"
- "option1_values"
- "option2_values"
- "option3_values"
core_multilingual_fields:
- "title"
- "brief"
- "vendor"
- "category_name_text"
# 统一文本召回策略(主查询 + 翻译查询 + phrase/keywords)
text_query_strategy:
base_minimum_should_match: "75%"
translation_minimum_should_match: "75%"
translation_boost: 0.4
translation_boost_when_source_missing: 1.0
source_boost_when_missing: 0.6
original_query_fallback_boost_when_translation_missing: 0.2
keywords_boost: 0.1
enable_phrase_query: true
tie_breaker_base_query: 0.9
tie_breaker_keywords: 0.9
|
33839b37
tangwang
属性值参与搜索:
|
76
77
78
|
# Embedding字段名称
text_embedding_field: "title_embedding"
image_embedding_field: null
|
325eec03
tangwang
1. 日志、配置基础设施,使用优化
|
79
|
|
33839b37
tangwang
属性值参与搜索:
|
80
81
82
|
# 返回字段配置(_source includes)
# null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
source_fields: null
|
70dab99f
tangwang
add logs
|
83
84
85
|
# KNN boost配置(向量召回的boost值)
knn_boost: 0.25 # Lower boost for embedding recall
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
86
|
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
87
88
89
90
|
# Function Score配置(ES层打分规则)
function_score:
score_mode: "sum"
boost_mode: "multiply"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
91
92
|
functions: []
|
42e3aea6
tangwang
tidy
|
93
|
# 重排配置(provider/URL 在 services.rerank)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
94
|
rerank:
|
5f7d7f09
tangwang
性能测试报告.md
|
95
|
enabled: true
|
c51d254f
tangwang
性能测试
|
96
|
rerank_window: 384
|
42e3aea6
tangwang
tidy
|
97
|
timeout_sec: 15.0
|
506c39b7
tangwang
feat(search): 统一重...
|
98
99
|
weight_es: 0.4
weight_ai: 0.6
|
ff32d894
tangwang
rerank
|
100
101
|
rerank_query_template: "{query}"
rerank_doc_template: "{title}"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
102
|
|
42e3aea6
tangwang
tidy
|
103
104
105
|
# 可扩展服务/provider 注册表(单一配置源)
services:
translation:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
106
107
108
|
service_url: "http://127.0.0.1:6006"
default_model: "llm"
default_scene: "general"
|
42e3aea6
tangwang
tidy
|
109
|
timeout_sec: 10.0
|
d4cadc13
tangwang
翻译重构
|
110
111
112
113
114
|
cache:
enabled: true
key_prefix: "trans:v2"
ttl_seconds: 62208000
sliding_expiration: true
|
0fd2f875
tangwang
translate
|
115
|
key_include_scene: true
|
d4cadc13
tangwang
翻译重构
|
116
|
key_include_source_lang: true
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
117
|
capabilities:
|
d4cadc13
tangwang
翻译重构
|
118
|
qwen-mt:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
119
|
enabled: true
|
0fd2f875
tangwang
translate
|
120
|
backend: "qwen_mt"
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
121
|
model: "qwen-mt-flash"
|
0fd2f875
tangwang
translate
|
122
|
base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
|
42e3aea6
tangwang
tidy
|
123
|
timeout_sec: 10.0
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
124
|
use_cache: true
|
a0a173ae
tangwang
last
|
125
|
llm:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
126
|
enabled: true
|
0fd2f875
tangwang
translate
|
127
|
backend: "llm"
|
a0a173ae
tangwang
last
|
128
|
model: "qwen-flash"
|
0fd2f875
tangwang
translate
|
129
|
base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
|
a0a173ae
tangwang
last
|
130
|
timeout_sec: 30.0
|
d4cadc13
tangwang
翻译重构
|
131
|
deepl:
|
5e4dc8e4
tangwang
翻译架构按“一个翻译服务 +
|
132
|
enabled: false
|
0fd2f875
tangwang
translate
|
133
134
|
backend: "deepl"
api_url: "https://api.deepl.com/v2/translate"
|
d4cadc13
tangwang
翻译重构
|
135
|
timeout_sec: 10.0
|
d4cadc13
tangwang
翻译重构
|
136
|
glossary_id: ""
|
0fd2f875
tangwang
translate
|
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
nllb-200-distilled-600m:
enabled: true
backend: "local_nllb"
model_id: "facebook/nllb-200-distilled-600M"
model_dir: "./models/translation/facebook/nllb-200-distilled-600M"
device: "cuda"
torch_dtype: "float16"
batch_size: 8
max_input_length: 256
max_new_tokens: 256
num_beams: 1
opus-mt-zh-en:
enabled: true
backend: "local_marian"
model_id: "Helsinki-NLP/opus-mt-zh-en"
model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en"
device: "cuda"
torch_dtype: "float16"
batch_size: 16
max_input_length: 256
max_new_tokens: 256
num_beams: 1
opus-mt-en-zh:
enabled: true
backend: "local_marian"
model_id: "Helsinki-NLP/opus-mt-en-zh"
model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh"
device: "cuda"
torch_dtype: "float16"
batch_size: 16
max_input_length: 256
max_new_tokens: 256
num_beams: 1
|
42e3aea6
tangwang
tidy
|
170
|
embedding:
|
950a640e
tangwang
embeddings
|
171
|
provider: "http" # http
|
42e3aea6
tangwang
tidy
|
172
173
174
175
|
base_url: "http://127.0.0.1:6005"
providers:
http:
base_url: "http://127.0.0.1:6005"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
176
|
# 服务内文本后端(embedding 进程启动时读取)
|
efd435cf
tangwang
tei性能调优:
|
177
|
backend: "tei" # tei | local_st
|
07cf5a93
tangwang
START_EMBEDDING=...
|
178
179
180
|
backends:
tei:
base_url: "http://127.0.0.1:8080"
|
efd435cf
tangwang
tei性能调优:
|
181
|
timeout_sec: 20
|
07cf5a93
tangwang
START_EMBEDDING=...
|
182
183
184
185
186
187
|
model_id: "Qwen/Qwen3-Embedding-0.6B"
local_st:
model_id: "Qwen/Qwen3-Embedding-0.6B"
device: "cuda"
batch_size: 32
normalize_embeddings: true
|
42e3aea6
tangwang
tidy
|
188
|
rerank:
|
701ae503
tangwang
docs
|
189
|
provider: "http"
|
42e3aea6
tangwang
tidy
|
190
191
192
193
|
base_url: "http://127.0.0.1:6007"
providers:
http:
base_url: "http://127.0.0.1:6007"
|
701ae503
tangwang
docs
|
194
195
|
service_url: "http://127.0.0.1:6007/rerank"
# 服务内后端(reranker 进程启动时读取)
|
22ae00c7
tangwang
product_annotator
|
196
|
backend: "qwen3_vllm" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank
|
701ae503
tangwang
docs
|
197
198
199
200
201
202
203
204
205
206
207
208
|
backends:
bge:
model_name: "BAAI/bge-reranker-v2-m3"
device: null
use_fp16: true
batch_size: 64
max_length: 512
cache_dir: "./model_cache"
enable_warmup: true
qwen3_vllm:
model_name: "Qwen/Qwen3-Reranker-0.6B"
engine: "vllm"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
209
|
max_model_len: 256
|
701ae503
tangwang
docs
|
210
|
tensor_parallel_size: 1
|
07cf5a93
tangwang
START_EMBEDDING=...
|
211
212
|
gpu_memory_utilization: 0.36
dtype: "float16"
|
bc089b43
tangwang
refactor(reranker...
|
213
214
|
enable_prefix_caching: true
enforce_eager: false
|
9f5994b4
tangwang
reranker
|
215
216
217
|
infer_batch_size: 64
sort_by_doc_length: true
length_sort_mode: "char" # char | token
|
a99e62ba
tangwang
记录各阶段耗时
|
218
|
instruction: "Given a shopping query, rank product titles by relevance"
|
d31c7f65
tangwang
补充云服务reranker
|
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
qwen3_transformers:
model_name: "Qwen/Qwen3-Reranker-0.6B"
instruction: "Given a shopping query, rank product titles by relevance"
max_length: 8192
batch_size: 64
use_fp16: true
attn_implementation: "flash_attention_2"
dashscope_rerank:
model_name: "qwen3-rerank"
# 按地域选择 endpoint:
# 中国: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
# 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
# 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
|
0d3e73ba
tangwang
rerank mini batch
|
233
234
|
api_key_env: "RERANK_DASHSCOPE_API_KEY_CN"
timeout_sec: 10.0 #
|
d31c7f65
tangwang
补充云服务reranker
|
235
|
top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
|
0d3e73ba
tangwang
rerank mini batch
|
236
|
batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
|
d31c7f65
tangwang
补充云服务reranker
|
237
238
239
|
instruct: "Given a shopping query, rank product titles by relevance"
max_retries: 2
retry_backoff_sec: 0.2
|
42e3aea6
tangwang
tidy
|
240
|
|
cadc77b6
tangwang
索引字段名、变量名、API数据结构...
|
241
|
# SPU配置(已启用,使用嵌套skus)
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
242
243
|
spu_config:
enabled: true
|
cadc77b6
tangwang
索引字段名、变量名、API数据结构...
|
244
|
spu_field: "spu_id"
|
4d824a77
tangwang
所有租户共用一套统一配置.tena...
|
245
|
inner_hits_size: 10
|
33839b37
tangwang
属性值参与搜索:
|
246
247
248
|
# 配置哪些option维度参与检索(进索引、以及在线搜索)
# 格式为list,选择option1/option2/option3中的一个或多个
searchable_option_dimensions: ['option1', 'option2', 'option3']
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
249
250
|
# 租户配置(Tenant Configuration)
|
038e4e2f
tangwang
refactor(i18n): t...
|
251
|
# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
|
6f7840cf
tangwang
refactor: rename ...
|
252
|
# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
253
|
tenant_config:
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
254
|
default:
|
2739b281
tangwang
多语言索引调整
|
255
|
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
256
|
index_languages: ["en", "zh"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
257
258
259
|
tenants:
"1":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
260
|
index_languages: ["zh", "en"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
261
262
|
"2":
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
263
|
index_languages: ["en", "zh"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
264
265
|
"3":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
266
|
index_languages: ["zh", "en"]
|
0064e946
tangwang
feat: 增量索引服务、租户配置...
|
267
268
|
"162":
primary_language: "zh"
|
038e4e2f
tangwang
refactor(i18n): t...
|
269
|
index_languages: ["zh", "en"]
|
cff5e86f
tangwang
reindex
|
270
271
|
"170":
primary_language: "en"
|
038e4e2f
tangwang
refactor(i18n): t...
|
272
|
index_languages: ["en", "zh"]
|