config.yaml
19.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
# Unified Configuration for Multi-Tenant Search Engine
# 统一配置文件,所有租户共用一套配置
# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
#
# 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项
#(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。
# Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
runtime:
environment: "prod"
index_namespace: ""
api_host: "0.0.0.0"
api_port: 6002
indexer_host: "0.0.0.0"
indexer_port: 6004
embedding_host: "0.0.0.0"
embedding_port: 6005
embedding_text_port: 6005
embedding_image_port: 6008
translator_host: "0.0.0.0"
translator_port: 6006
reranker_host: "0.0.0.0"
reranker_port: 6007
# 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY)
infrastructure:
elasticsearch:
host: "http://localhost:9200"
username: null
password: null
redis:
host: "localhost"
port: 6479
snapshot_db: 0
password: null
socket_timeout: 1
socket_connect_timeout: 1
retry_on_timeout: false
cache_expire_days: 720
embedding_cache_prefix: "embedding"
anchor_cache_prefix: "product_anchors"
anchor_cache_expire_days: 30
database:
host: null
port: 3306
database: null
username: null
password: null
secrets:
dashscope_api_key: null
deepl_auth_key: null
# Elasticsearch Index
es_index_name: "search_products"
# 检索域 / 索引列表(可为空列表;每项字段均需显式给出)
indexes: []
# Config assets
assets:
query_rewrite_dictionary_path: "config/dictionaries/query_rewrite.dict"
# Product content understanding (LLM enrich-content) configuration
product_enrich:
max_workers: 40
# ES Index Settings (基础设置)
es_settings:
number_of_shards: 1
number_of_replicas: 0
refresh_interval: "30s"
# 字段权重配置(用于搜索时的字段boost)
# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。
# 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
field_boosts:
title: 3.0
qanchors: 2.5
tags: 2.0
category_name_text: 2.0
category_path: 2.0
brief: 1.5
description: 1.5
vendor: 1.5
option1_values: 1.5
option2_values: 1.5
option3_values: 1.5
# Query Configuration(查询配置)
query_config:
# 支持的语言
supported_languages:
- "zh"
- "en"
default_language: "en"
# 功能开关(翻译开关由tenant_config控制)
enable_text_embedding: true
enable_query_rewrite: true
# 查询翻译模型(须与 services.translation.capabilities 中某项一致)
# 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
# zh_to_en_model: "opus-mt-zh-en"
# en_to_zh_model: "opus-mt-en-zh"
# default_translation_model: "nllb-200-distilled-600m"
zh_to_en_model: "deepl"
en_to_zh_model: "deepl"
default_translation_model: "deepl"
# 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
zh_to_en_model__source_not_in_index: "deepl"
en_to_zh_model__source_not_in_index: "deepl"
default_translation_model__source_not_in_index: "deepl"
# 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
# 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
translation_embedding_wait_budget_ms_source_in_index: 500 # 80
translation_embedding_wait_budget_ms_source_not_in_index: 700 #200
style_intent:
enabled: true
selected_sku_boost: 1.2
color_dictionary_path: "config/dictionaries/style_intent_color.csv"
size_dictionary_path: "config/dictionaries/style_intent_size.csv"
dimension_aliases:
color: ["color", "colors", "colour", "colours", "颜色", "色", "色系"]
size: ["size", "sizes", "sizing", "尺码", "尺寸", "码数", "号码", "码"]
product_title_exclusion:
enabled: true
dictionary_path: "config/dictionaries/product_title_exclusion.tsv"
# 动态多语言检索字段配置
# multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
# shared_fields 为无语言后缀字段。
search_fields:
multilingual_fields:
- "title"
- "qanchors"
- "category_path"
- "category_name_text"
- "brief"
- "description"
- "vendor"
shared_fields:
# - "tags"
# - "option1_values"
# - "option2_values"
# - "option3_values"
core_multilingual_fields:
- "title"
- "qanchors"
- "category_name_text"
# 统一文本召回策略(主查询 + 翻译查询)
text_query_strategy:
base_minimum_should_match: "75%"
translation_minimum_should_match: "75%"
translation_boost: 0.75
tie_breaker_base_query: 0.5
best_fields_boost: 2.0
best_fields:
title: 4.0
qanchors: 3.0
category_name_text: 2.0
phrase_fields:
title: 5.0
qanchors: 4.0
phrase_match_boost: 3.0
# Embedding字段名称
text_embedding_field: "title_embedding"
image_embedding_field: null
# 返回字段配置(_source includes)
# null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
# 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
source_fields:
- spu_id
- handle
- title
- brief
- description
- vendor
- category_name
- category_name_text
- category_path
- category_id
- category_level
- category1_name
- category2_name
- category3_name
- tags
- min_price
- compare_at_price
- image_url
- sku_prices
- sku_weights
- sku_weight_units
- total_inventory
- option1_name
- option1_values
- option2_name
- option2_values
- option3_name
- option3_values
- specifications
- skus
# KNN boost配置(向量召回的boost值)
knn_boost: 2.0 # Lower boost for embedding recall
# Function Score配置(ES层打分规则)
function_score:
score_mode: "sum"
boost_mode: "multiply"
functions: []
# 重排配置(provider/URL 在 services.rerank)
rerank:
enabled: true
rerank_window: 400
timeout_sec: 15.0
weight_es: 0.4
weight_ai: 0.6
rerank_query_template: "{query}"
rerank_doc_template: "{title}"
# 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项)
fusion:
rerank_bias: 0.00001
rerank_exponent: 1.0
text_bias: 0.1
text_exponent: 0.35
knn_bias: 0.6
knn_exponent: 0.0
# 可扩展服务/provider 注册表(单一配置源)
services:
translation:
service_url: "http://127.0.0.1:6006"
default_model: "nllb-200-distilled-600m"
default_scene: "general"
timeout_sec: 10.0
cache:
ttl_seconds: 62208000
sliding_expiration: true
# When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
enable_model_quality_tier_cache: true
# Higher tier = better quality. Multiple models may share one tier (同级).
# A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
model_quality_tiers:
deepl: 30
qwen-mt: 30
llm: 30
nllb-200-distilled-600m: 20
opus-mt-zh-en: 10
opus-mt-en-zh: 10
capabilities:
qwen-mt:
enabled: true
backend: "qwen_mt"
model: "qwen-mt-flash"
base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
timeout_sec: 10.0
use_cache: true
llm:
enabled: true
backend: "llm"
model: "qwen-flash"
base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
timeout_sec: 30.0
use_cache: true
deepl:
enabled: true
backend: "deepl"
api_url: "https://api.deepl.com/v2/translate"
timeout_sec: 10.0
glossary_id: ""
use_cache: true
nllb-200-distilled-600m:
enabled: true
backend: "local_nllb"
model_id: "facebook/nllb-200-distilled-600M"
model_dir: "./models/translation/facebook/nllb-200-distilled-600M"
ct2_model_dir: "./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16"
ct2_compute_type: "float16"
ct2_conversion_quantization: "float16"
ct2_auto_convert: true
ct2_inter_threads: 4
ct2_intra_threads: 0
ct2_max_queued_batches: 32
ct2_batch_type: "examples"
ct2_decoding_length_mode: "source"
ct2_decoding_length_extra: 8
ct2_decoding_length_min: 32
device: "cuda"
torch_dtype: "float16"
batch_size: 64
max_input_length: 256
max_new_tokens: 64
num_beams: 1
use_cache: true
opus-mt-zh-en:
enabled: true
backend: "local_marian"
model_id: "Helsinki-NLP/opus-mt-zh-en"
model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en"
ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16"
ct2_compute_type: "float16"
ct2_conversion_quantization: "float16"
ct2_auto_convert: true
ct2_inter_threads: 1
ct2_intra_threads: 0
ct2_max_queued_batches: 0
ct2_batch_type: "examples"
device: "cuda"
torch_dtype: "float16"
batch_size: 16
max_input_length: 256
max_new_tokens: 256
num_beams: 1
use_cache: true
opus-mt-en-zh:
enabled: true
backend: "local_marian"
model_id: "Helsinki-NLP/opus-mt-en-zh"
model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh"
ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16"
ct2_compute_type: "float16"
ct2_conversion_quantization: "float16"
ct2_auto_convert: true
ct2_inter_threads: 1
ct2_intra_threads: 0
ct2_max_queued_batches: 0
ct2_batch_type: "examples"
device: "cuda"
torch_dtype: "float16"
batch_size: 16
max_input_length: 256
max_new_tokens: 256
num_beams: 1
use_cache: true
embedding:
provider: "http" # http
providers:
http:
text_base_url: "http://127.0.0.1:6005"
image_base_url: "http://127.0.0.1:6008"
# 服务内文本后端(embedding 进程启动时读取)
backend: "tei" # tei | local_st
backends:
tei:
base_url: "http://127.0.0.1:8080"
timeout_sec: 20
model_id: "Qwen/Qwen3-Embedding-0.6B"
local_st:
model_id: "Qwen/Qwen3-Embedding-0.6B"
device: "cuda"
batch_size: 32
normalize_embeddings: true
# 服务内图片后端(embedding 进程启动时读取)
image_backend: "clip_as_service" # clip_as_service | local_cnclip
image_backends:
clip_as_service:
server: "grpc://127.0.0.1:51000"
model_name: "CN-CLIP/ViT-L-14"
batch_size: 8
normalize_embeddings: true
local_cnclip:
model_name: "ViT-L-14"
device: null
batch_size: 8
normalize_embeddings: true
rerank:
provider: "http"
base_url: "http://127.0.0.1:6007"
providers:
http:
base_url: "http://127.0.0.1:6007"
service_url: "http://127.0.0.1:6007/rerank"
request:
max_docs: 1000
normalize: true
# 服务内后端(reranker 进程启动时读取)
backend: "qwen3_vllm" # bge | qwen3_vllm | qwen3_vllm_score | qwen3_transformers | qwen3_transformers_packed | qwen3_gguf | qwen3_gguf_06b | dashscope_rerank
backends:
bge:
model_name: "BAAI/bge-reranker-v2-m3"
device: null
use_fp16: true
batch_size: 64
max_length: 160
cache_dir: "./model_cache"
enable_warmup: true
qwen3_vllm:
model_name: "Qwen/Qwen3-Reranker-0.6B"
engine: "vllm"
max_model_len: 160
tensor_parallel_size: 1
gpu_memory_utilization: 0.20
dtype: "float16"
enable_prefix_caching: true
enforce_eager: false
infer_batch_size: 100
sort_by_doc_length: true
# 与 reranker/backends/qwen3_vllm.py 一致:standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
# instruction_format: compact
instruction_format: compact
# instruction: "Given a query, score the product for relevance"
# "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
# instruction: "rank products by given query, category match first"
# instruction: "Rank products by query relevance, prioritizing category match"
# instruction: "Rank products by query relevance, prioritizing category and style match"
# instruction: "Rank by query relevance, prioritize category & style"
# instruction: "Relevance ranking: category & style match first"
# instruction: "Score product relevance by query with category & style match prioritized"
instruction: "Rank products by query with category & style match prioritized"
# vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
# 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
qwen3_vllm_score:
model_name: "Qwen/Qwen3-Reranker-0.6B"
# 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
use_original_qwen3_hf_overrides: true
# vLLM 0.18:算力 < 8(如 T4)默认自动用 TRITON_ATTN;Ampere+ 可省略或设 auto。也可设环境变量 RERANK_VLLM_ATTENTION_BACKEND
# vllm_attention_backend: "auto"
# 可选:与 vLLM 对齐;一般保持 auto
# vllm_runner: "auto"
# vllm_convert: "auto"
# 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
# hf_overrides: {}
engine: "vllm"
max_model_len: 160
tensor_parallel_size: 1
gpu_memory_utilization: 0.20
dtype: "float16"
enable_prefix_caching: true
enforce_eager: false
infer_batch_size: 100
sort_by_doc_length: true
# 与 qwen3_vllm 同名项语义一致;默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
# instruction_format: compact
instruction_format: standard
instruction: "Rank products by query with category & style match prioritized"
qwen3_transformers:
model_name: "Qwen/Qwen3-Reranker-0.6B"
instruction: "rank products by given query"
# instruction: "Score the product’s relevance to the given query"
max_length: 8192
batch_size: 64
use_fp16: true
# sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2
attn_implementation: "sdpa"
# Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
# For 1 query + many short docs (for example 400 product titles), this usually reduces
# repeated prefix work and padding waste compared with pairwise batching.
qwen3_transformers_packed:
model_name: "Qwen/Qwen3-Reranker-0.6B"
instruction: "Rank products by query with category & style match prioritized"
max_model_len: 4096
max_doc_len: 160
max_docs_per_pack: 0
use_fp16: true
sort_by_doc_length: true
# Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
# If your torch/transformers stack validates it, you can benchmark "sdpa".
attn_implementation: "eager"
qwen3_gguf:
repo_id: "DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF"
filename: "*Q8_0.gguf"
cache_dir: "./model_cache"
local_dir: "./models/reranker/qwen3-reranker-4b-gguf"
instruction: "Rank products by query with category & style match prioritized"
# T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快
n_ctx: 512
n_batch: 512
n_ubatch: 512
n_gpu_layers: 999
main_gpu: 0
n_threads: 2
n_threads_batch: 4
flash_attn: true
offload_kqv: true
use_mmap: true
use_mlock: false
infer_batch_size: 8
sort_by_doc_length: true
length_sort_mode: "char"
enable_warmup: true
verbose: false
qwen3_gguf_06b:
repo_id: "ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF"
filename: "qwen3-reranker-0.6b-q8_0.gguf"
cache_dir: "./model_cache"
local_dir: "./models/reranker/qwen3-reranker-0.6b-q8_0-gguf"
instruction: "Rank products by query with category & style match prioritized"
# 0.6B GGUF / online rerank baseline:
# 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。
n_ctx: 256
n_batch: 256
n_ubatch: 256
n_gpu_layers: 999
main_gpu: 0
n_threads: 2
n_threads_batch: 4
flash_attn: true
offload_kqv: true
use_mmap: true
use_mlock: false
infer_batch_size: 32
sort_by_doc_length: true
length_sort_mode: "char"
reuse_query_state: false
enable_warmup: true
verbose: false
dashscope_rerank:
model_name: "qwen3-rerank"
# 按地域选择 endpoint:
# 中国: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
# 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
# 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
api_key_env: "RERANK_DASHSCOPE_API_KEY_CN"
timeout_sec: 10.0 #
top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
instruct: "Given a shopping query, rank product titles by relevance"
max_retries: 2
retry_backoff_sec: 0.2
# SPU配置(已启用,使用嵌套skus)
spu_config:
enabled: true
spu_field: "spu_id"
inner_hits_size: 10
# 配置哪些option维度参与检索(进索引、以及在线搜索)
# 格式为list,选择option1/option2/option3中的一个或多个
searchable_option_dimensions: ['option1', 'option2', 'option3']
# 租户配置(Tenant Configuration)
# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
tenant_config:
default:
primary_language: "en"
index_languages: ["en", "zh"]
tenants:
"1":
primary_language: "zh"
index_languages: ["zh", "en"]
"2":
primary_language: "en"
index_languages: ["en", "zh"]
"3":
primary_language: "zh"
index_languages: ["zh", "en"]
"162":
primary_language: "zh"
index_languages: ["zh", "en"]
"170":
primary_language: "en"
index_languages: ["en", "zh"]