Blame view

config/config.yaml 23.4 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
  # Unified Configuration for Multi-Tenant Search Engine
  # 统一配置文件,所有租户共用一套配置
  # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
  #
  # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项
  #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。
  
  # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
9
  runtime:
432d1c88   tangwang   评估框架
10
11
12
    environment: prod
    index_namespace: ''
    api_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
13
    api_port: 6002
432d1c88   tangwang   评估框架
14
    indexer_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
15
    indexer_port: 6004
432d1c88   tangwang   评估框架
16
    embedding_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
17
18
19
    embedding_port: 6005
    embedding_text_port: 6005
    embedding_image_port: 6008
432d1c88   tangwang   评估框架
20
    translator_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
21
    translator_port: 6006
432d1c88   tangwang   评估框架
22
    reranker_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
23
    reranker_port: 6007
881d338b   tangwang   评估框架
24
25
  
  # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
26
27
  infrastructure:
    elasticsearch:
432d1c88   tangwang   评估框架
28
      host: http://localhost:9200
86d0e83d   tangwang   query翻译,根据源语言是否在索...
29
30
31
      username: null
      password: null
    redis:
432d1c88   tangwang   评估框架
32
      host: localhost
86d0e83d   tangwang   query翻译,根据源语言是否在索...
33
34
35
36
37
38
39
      port: 6479
      snapshot_db: 0
      password: null
      socket_timeout: 1
      socket_connect_timeout: 1
      retry_on_timeout: false
      cache_expire_days: 720
432d1c88   tangwang   评估框架
40
      embedding_cache_prefix: embedding
86d0e83d   tangwang   query翻译,根据源语言是否在索...
41
42
43
44
45
46
47
48
49
    database:
      host: null
      port: 3306
      database: null
      username: null
      password: null
    secrets:
      dashscope_api_key: null
      deepl_auth_key: null
881d338b   tangwang   评估框架
50
51
  
  # Elasticsearch Index
432d1c88   tangwang   评估框架
52
  es_index_name: search_products
881d338b   tangwang   评估框架
53
54
  
  # 检索域 / 索引列表(可为空列表;每项字段均需显式给出)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
55
  indexes: []
881d338b   tangwang   评估框架
56
57
  
  # Config assets
86d8358b   tangwang   config optimize
58
  assets:
432d1c88   tangwang   评估框架
59
    query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
881d338b   tangwang   评估框架
60
  
331861d5   tangwang   eval框架配置化
61
62
63
64
65
66
67
68
69
70
  # 离线 / Web 相关性评估(scripts/evaluation、eval-web)
  # CLI 未显式传参时使用此处默认值;search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port}
  search_evaluation:
    artifact_root: artifacts/search_evaluation
    queries_file: scripts/evaluation/queries/queries.txt
    eval_log_dir: logs
    default_tenant_id: '163'
    search_base_url: ''
    web_host: 0.0.0.0
    web_port: 6010
984f14f9   tangwang   product_enrich模块迁出
71
    judge_model: qwen3.6-plus
331861d5   tangwang   eval框架配置化
72
73
    judge_enable_thinking: false
    judge_dashscope_batch: false
984f14f9   tangwang   product_enrich模块迁出
74
    intent_model: qwen3.6-plus
331861d5   tangwang   eval框架配置化
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    intent_enable_thinking: true
    judge_batch_completion_window: 24h
    judge_batch_poll_interval_sec: 10.0
    build_search_depth: 1000
    build_rerank_depth: 10000
    annotate_search_top_k: 120
    annotate_rerank_top_k: 200
    batch_top_k: 100
    audit_top_k: 100
    audit_limit_suspicious: 5
    default_language: en
    search_recall_top_k: 200
    rerank_high_threshold: 0.5
    rerank_high_skip_count: 1000
    rebuild_llm_batch_size: 50
    rebuild_min_llm_batches: 10
    rebuild_max_llm_batches: 40
    rebuild_irrelevant_stop_ratio: 0.799
    rebuild_irrel_low_combined_stop_ratio: 0.959
    rebuild_irrelevant_stop_streak: 3
  
881d338b   tangwang   评估框架
96
  # ES Index Settings (基础设置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
97
98
99
  es_settings:
    number_of_shards: 1
    number_of_replicas: 0
432d1c88   tangwang   评估框架
100
    refresh_interval: 30s
881d338b   tangwang   评估框架
101
102
103
104
  
  # 字段权重配置(用于搜索时的字段boost)
  # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。
  # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
33839b37   tangwang   属性值参与搜索:
105
  field_boosts:
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
106
    title: 3.0
1fdab52d   tangwang   This change adjus...
107
108
109
    # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重
    qanchors: 1.0
    enriched_tags: 1.0
483a05d9   tangwang   文本搜索权重调整(qanchors...
110
    enriched_attributes.value: 1.5
3abbc95a   tangwang   重构(scripts): 整理sc...
111
    # enriched_taxonomy_attributes.value: 0.3
69881ecb   tangwang   相关性调参、enrich内容解析优化
112
113
    category_name_text: 2.0
    category_path: 2.0
ccbdf870   tangwang   enriched_attribut...
114
115
116
117
118
119
120
121
    keywords: 2.0
    tags: 2.0
    option1_values: 1.7
    option2_values: 1.7
    option3_values: 1.7
    brief: 1.0
    description: 1.0
    vendor: 1.0
881d338b   tangwang   评估框架
122
123
  
  # Query Configuration(查询配置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
124
  query_config:
881d338b   tangwang   评估框架
125
    # 支持的语言
4d824a77   tangwang   所有租户共用一套统一配置.tena...
126
    supported_languages:
432d1c88   tangwang   评估框架
127
128
129
    - zh
    - en
    default_language: en
881d338b   tangwang   评估框架
130
131
  
    # 功能开关(翻译开关由tenant_config控制)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
132
133
    enable_text_embedding: true
    enable_query_rewrite: true
881d338b   tangwang   评估框架
134
135
136
137
138
  
    # 查询翻译模型(须与 services.translation.capabilities 中某项一致)
    # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
    zh_to_en_model: nllb-200-distilled-600m  # "opus-mt-zh-en"
    en_to_zh_model: nllb-200-distilled-600m  # "opus-mt-en-zh"
432d1c88   tangwang   评估框架
139
    default_translation_model: nllb-200-distilled-600m
881d338b   tangwang   评估框架
140
141
142
143
    # zh_to_en_model: deepl
    # en_to_zh_model: deepl
    # default_translation_model: deepl
    # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
432d1c88   tangwang   评估框架
144
145
146
    zh_to_en_model__source_not_in_index: nllb-200-distilled-600m
    en_to_zh_model__source_not_in_index: nllb-200-distilled-600m
    default_translation_model__source_not_in_index: nllb-200-distilled-600m
881d338b   tangwang   评估框架
147
148
149
150
151
152
    # zh_to_en_model__source_not_in_index: deepl
    # en_to_zh_model__source_not_in_index: deepl
    # default_translation_model__source_not_in_index: deepl
  
    # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
    # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
310bb3bc   tangwang   eval tools
153
154
    translation_embedding_wait_budget_ms_source_in_index: 300  # 80
    translation_embedding_wait_budget_ms_source_not_in_index: 400  # 200
cda1cd62   tangwang   意图分析&应用 baseline
155
156
    style_intent:
      enabled: true
87cacb1b   tangwang   融合公式优化。加入意图匹配因子
157
      selected_sku_boost: 1.2
432d1c88   tangwang   评估框架
158
159
      color_dictionary_path: config/dictionaries/style_intent_color.csv
      size_dictionary_path: config/dictionaries/style_intent_size.csv
cda1cd62   tangwang   意图分析&应用 baseline
160
      dimension_aliases:
432d1c88   tangwang   评估框架
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        color:
        - color
        - colors
        - colour
        - colours
        - 颜色
        - 
        - 色系
        size:
        - size
        - sizes
        - sizing
        - 尺码
        - 尺寸
        - 码数
        - 号码
        - 
74fdf9bd   tangwang   1.
178
179
    product_title_exclusion:
      enabled: true
432d1c88   tangwang   评估框架
180
      dictionary_path: config/dictionaries/product_title_exclusion.tsv
881d338b   tangwang   评估框架
181
182
183
184
  
    # 动态多语言检索字段配置
    # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
    # shared_fields 为无语言后缀字段。
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
185
186
    search_fields:
      multilingual_fields:
432d1c88   tangwang   评估框架
187
      - title
331861d5   tangwang   eval框架配置化
188
      - keywords
ccbdf870   tangwang   enriched_attribut...
189
      - qanchors
1c2ba48e   tangwang   eval tagger
190
      - enriched_tags
ccbdf870   tangwang   enriched_attribut...
191
      - enriched_attributes.value
3abbc95a   tangwang   重构(scripts): 整理sc...
192
      # - enriched_taxonomy_attributes.value
331861d5   tangwang   eval框架配置化
193
194
195
      - option1_values
      - option2_values
      - option3_values
432d1c88   tangwang   评估框架
196
197
      - category_path
      - category_name_text
ccbdf870   tangwang   enriched_attribut...
198
199
200
      # - brief
      # - description
      # - vendor
881d338b   tangwang   评估框架
201
      # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values
432d1c88   tangwang   评估框架
202
      shared_fields: null
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
203
      core_multilingual_fields:
432d1c88   tangwang   评估框架
204
205
206
      - title
      - qanchors
      - category_name_text
881d338b   tangwang   评估框架
207
208
  
    # 统一文本召回策略(主查询 + 翻译查询)
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
209
    text_query_strategy:
432d1c88   tangwang   评估框架
210
211
      base_minimum_should_match: 60%
      translation_minimum_should_match: 60%
69881ecb   tangwang   相关性调参、enrich内容解析优化
212
213
      translation_boost: 0.75
      tie_breaker_base_query: 0.5
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
214
215
216
217
218
219
220
221
222
      best_fields_boost: 2.0
      best_fields:
        title: 4.0
        qanchors: 3.0
        category_name_text: 2.0
      phrase_fields:
        title: 5.0
        qanchors: 4.0
      phrase_match_boost: 3.0
881d338b   tangwang   评估框架
223
224
  
    # Embedding字段名称
432d1c88   tangwang   评估框架
225
226
    text_embedding_field: title_embedding
    image_embedding_field: image_embedding.vector
881d338b   tangwang   评估框架
227
228
229
230
  
    # 返回字段配置(_source includes)
    # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
    # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
a7cc9078   tangwang   sku排序
231
    source_fields:
432d1c88   tangwang   评估框架
232
233
234
235
236
237
238
239
240
241
242
243
244
245
    - spu_id
    - handle
    - title
    - brief
    - description
    - vendor
    - category_name
    - category_name_text
    - category_path
    - category_id
    - category_level
    - category1_name
    - category2_name
    - category3_name
ccbdf870   tangwang   enriched_attribut...
246
247
248
249
250
    # - tags
    # - keywords
    # - qanchors
    # - enriched_tags
    # - enriched_attributes
3abbc95a   tangwang   重构(scripts): 整理sc...
251
    # - # enriched_taxonomy_attributes.value
432d1c88   tangwang   评估框架
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
    - min_price
    - compare_at_price
    - image_url
    - sku_prices
    - sku_weights
    - sku_weight_units
    - total_inventory
    - option1_name
    - option1_values
    - option2_name
    - option2_values
    - option3_name
    - option3_values
    - specifications
    - skus
881d338b   tangwang   评估框架
267
268
  
    # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
269
270
    knn_text_boost: 4
    knn_image_boost: 4
881d338b   tangwang   评估框架
271
272
  
    # knn_text_num_candidates = k * 3.4
de98daa3   tangwang   多模态召回优化
273
274
    knn_text_k: 160
    knn_text_num_candidates: 560
de98daa3   tangwang   多模态召回优化
275
276
    knn_text_k_long: 400
    knn_text_num_candidates_long: 1200
de98daa3   tangwang   多模态召回优化
277
278
    knn_image_k: 400
    knn_image_num_candidates: 1200
881d338b   tangwang   评估框架
279
280
  
  # Function Score配置(ES层打分规则)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
281
  function_score:
432d1c88   tangwang   评估框架
282
283
    score_mode: sum
    boost_mode: multiply
4d824a77   tangwang   所有租户共用一套统一配置.tena...
284
    functions: []
881d338b   tangwang   评估框架
285
286
  
  # 粗排配置(仅融合 ES 文本/向量信号,不调用模型)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
287
288
289
290
291
  coarse_rank:
    enabled: true
    input_window: 700
    output_window: 240
    fusion:
4d000c94   tangwang   融合公式调参
292
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
293
      es_exponent: 0.05
8c8b9d84   tangwang   ES 拉取 coarse_rank...
294
295
      text_bias: 0.1
      text_exponent: 0.35
881d338b   tangwang   评估框架
296
297
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
      # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣
de98daa3   tangwang   多模态召回优化
298
      text_translation_weight: 1.0
8c8b9d84   tangwang   ES 拉取 coarse_rank...
299
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
300
301
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
8c8b9d84   tangwang   ES 拉取 coarse_rank...
302
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
303
      knn_exponent: 0.4
881d338b   tangwang   评估框架
304
305
  
  # 精排配置(轻量 reranker)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
306
  fine_rank:
418b6a4a   tangwang   调参
307
308
    enabled: false
    input_window: 160
8c8b9d84   tangwang   ES 拉取 coarse_rank...
309
310
    output_window: 80
    timeout_sec: 10.0
432d1c88   tangwang   评估框架
311
312
313
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: fine
881d338b   tangwang   评估框架
314
315
  
  # 重排配置(provider/URL 在 services.rerank)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
316
  rerank:
5f7d7f09   tangwang   性能测试报告.md
317
    enabled: true
418b6a4a   tangwang   调参
318
    rerank_window: 160
42e3aea6   tangwang   tidy
319
    timeout_sec: 15.0
506c39b7   tangwang   feat(search): 统一重...
320
321
    weight_es: 0.4
    weight_ai: 0.6
432d1c88   tangwang   评估框架
322
323
324
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: default
881d338b   tangwang   评估框架
325
  
9df421ed   tangwang   基于eval框架开始调参
326
    # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn)
881d338b   tangwang   评估框架
327
328
329
    # 其中 knn_score 先做一层 dis_max:
    #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
    #   + knn_tie_breaker * 另一侧较弱信号
814e352b   tangwang   乘法公式配置化
330
    fusion:
4d000c94   tangwang   融合公式调参
331
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
332
      es_exponent: 0.05
4d000c94   tangwang   融合公式调参
333
      rerank_bias: 0.1
432d1c88   tangwang   评估框架
334
      rerank_exponent: 1.15
4d000c94   tangwang   融合公式调参
335
      fine_bias: 0.1
8c8b9d84   tangwang   ES 拉取 coarse_rank...
336
      fine_exponent: 1.0
814e352b   tangwang   乘法公式配置化
337
      text_bias: 0.1
432d1c88   tangwang   评估框架
338
      text_exponent: 0.25
881d338b   tangwang   评估框架
339
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
432d1c88   tangwang   评估框架
340
      text_translation_weight: 0.8
24edc208   tangwang   修改_extract_combin...
341
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
342
343
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
814e352b   tangwang   乘法公式配置化
344
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
345
      knn_exponent: 0.4
881d338b   tangwang   评估框架
346
347
  
  # 可扩展服务/provider 注册表(单一配置源)
42e3aea6   tangwang   tidy
348
349
  services:
    translation:
432d1c88   tangwang   评估框架
350
      service_url: http://127.0.0.1:6006
881d338b   tangwang   评估框架
351
      # default_model: nllb-200-distilled-600m
432d1c88   tangwang   评估框架
352
353
      default_model: nllb-200-distilled-600m
      default_scene: general
42e3aea6   tangwang   tidy
354
      timeout_sec: 10.0
d4cadc13   tangwang   翻译重构
355
      cache:
d4cadc13   tangwang   翻译重构
356
357
        ttl_seconds: 62208000
        sliding_expiration: true
881d338b   tangwang   评估框架
358
        # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
8140e942   tangwang   translator model ...
359
        enable_model_quality_tier_cache: true
881d338b   tangwang   评估框架
360
361
        # Higher tier = better quality. Multiple models may share one tier (同级).
        # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
8140e942   tangwang   translator model ...
362
363
364
365
366
367
368
        model_quality_tiers:
          deepl: 30
          qwen-mt: 30
          llm: 30
          nllb-200-distilled-600m: 20
          opus-mt-zh-en: 10
          opus-mt-en-zh: 10
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
369
      capabilities:
d4cadc13   tangwang   翻译重构
370
        qwen-mt:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
371
          enabled: true
432d1c88   tangwang   评估框架
372
373
374
          backend: qwen_mt
          model: qwen-mt-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
42e3aea6   tangwang   tidy
375
          timeout_sec: 10.0
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
376
          use_cache: true
a0a173ae   tangwang   last
377
        llm:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
378
          enabled: true
432d1c88   tangwang   评估框架
379
380
381
          backend: llm
          model: qwen-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
a0a173ae   tangwang   last
382
          timeout_sec: 30.0
cd4ce66d   tangwang   trans logs
383
          use_cache: true
d4cadc13   tangwang   翻译重构
384
        deepl:
cd4ce66d   tangwang   trans logs
385
          enabled: true
432d1c88   tangwang   评估框架
386
387
          backend: deepl
          api_url: https://api.deepl.com/v2/translate
d4cadc13   tangwang   翻译重构
388
          timeout_sec: 10.0
432d1c88   tangwang   评估框架
389
          glossary_id: ''
cd4ce66d   tangwang   trans logs
390
          use_cache: true
0fd2f875   tangwang   translate
391
        nllb-200-distilled-600m:
93be98cb   tangwang   清理过时的文档
392
          enabled: true
432d1c88   tangwang   评估框架
393
394
395
396
397
398
          backend: local_nllb
          model_id: facebook/nllb-200-distilled-600M
          model_dir: ./models/translation/facebook/nllb-200-distilled-600M
          ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
399
          ct2_auto_convert: true
46ce858d   tangwang   在NLLB模型的 /data/sa...
400
          ct2_inter_threads: 4
ea293660   tangwang   CTranslate2
401
          ct2_intra_threads: 0
46ce858d   tangwang   在NLLB模型的 /data/sa...
402
          ct2_max_queued_batches: 32
432d1c88   tangwang   评估框架
403
404
          ct2_batch_type: examples
          ct2_decoding_length_mode: source
46ce858d   tangwang   在NLLB模型的 /data/sa...
405
406
          ct2_decoding_length_extra: 8
          ct2_decoding_length_min: 32
432d1c88   tangwang   评估框架
407
408
          device: cuda
          torch_dtype: float16
4747e2f4   tangwang   embedding perform...
409
          batch_size: 64
0fd2f875   tangwang   translate
410
          max_input_length: 256
3eff49b7   tangwang   trans nllb-200-di...
411
          max_new_tokens: 64
0fd2f875   tangwang   translate
412
          num_beams: 1
cd4ce66d   tangwang   trans logs
413
          use_cache: true
0fd2f875   tangwang   translate
414
        opus-mt-zh-en:
f86c5fee   tangwang   reranker性能参数脚本放在:...
415
          enabled: false
432d1c88   tangwang   评估框架
416
417
418
419
420
421
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-zh-en
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
422
423
424
425
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
426
427
428
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
429
430
431
432
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
433
          use_cache: true
0fd2f875   tangwang   translate
434
        opus-mt-en-zh:
f86c5fee   tangwang   reranker性能参数脚本放在:...
435
          enabled: false
432d1c88   tangwang   评估框架
436
437
438
439
440
441
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-en-zh
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
442
443
444
445
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
446
447
448
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
449
450
451
452
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
453
          use_cache: true
42e3aea6   tangwang   tidy
454
    embedding:
881d338b   tangwang   评估框架
455
      provider: http  # http
42e3aea6   tangwang   tidy
456
457
      providers:
        http:
432d1c88   tangwang   评估框架
458
459
          text_base_url: http://127.0.0.1:6005
          image_base_url: http://127.0.0.1:6008
881d338b   tangwang   评估框架
460
461
      # 服务内文本后端(embedding 进程启动时读取)
      backend: tei  # tei | local_st
07cf5a93   tangwang   START_EMBEDDING=...
462
463
      backends:
        tei:
432d1c88   tangwang   评估框架
464
          base_url: http://127.0.0.1:8080
efd435cf   tangwang   tei性能调优:
465
          timeout_sec: 20
432d1c88   tangwang   评估框架
466
          model_id: Qwen/Qwen3-Embedding-0.6B
07cf5a93   tangwang   START_EMBEDDING=...
467
        local_st:
432d1c88   tangwang   评估框架
468
469
          model_id: Qwen/Qwen3-Embedding-0.6B
          device: cuda
07cf5a93   tangwang   START_EMBEDDING=...
470
471
          batch_size: 32
          normalize_embeddings: true
881d338b   tangwang   评估框架
472
473
474
475
      # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name)
      # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中
      # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。
      image_backend: clip_as_service  # clip_as_service | local_cnclip
86d8358b   tangwang   config optimize
476
477
      image_backends:
        clip_as_service:
432d1c88   tangwang   评估框架
478
479
          server: grpc://127.0.0.1:51000
          model_name: CN-CLIP/ViT-L-14
86d8358b   tangwang   config optimize
480
481
482
          batch_size: 8
          normalize_embeddings: true
        local_cnclip:
432d1c88   tangwang   评估框架
483
          model_name: ViT-L-14
86d8358b   tangwang   config optimize
484
485
486
          device: null
          batch_size: 8
          normalize_embeddings: true
42e3aea6   tangwang   tidy
487
    rerank:
432d1c88   tangwang   评估框架
488
      provider: http
42e3aea6   tangwang   tidy
489
490
      providers:
        http:
daa2690b   tangwang   漏斗参数调优&呈现优化
491
492
          instances:
            default:
432d1c88   tangwang   评估框架
493
494
              base_url: http://127.0.0.1:6007
              service_url: http://127.0.0.1:6007/rerank
daa2690b   tangwang   漏斗参数调优&呈现优化
495
            fine:
432d1c88   tangwang   评估框架
496
497
              base_url: http://127.0.0.1:6009
              service_url: http://127.0.0.1:6009/rerank
86d8358b   tangwang   config optimize
498
499
500
      request:
        max_docs: 1000
        normalize: true
432d1c88   tangwang   评估框架
501
      default_instance: default
881d338b   tangwang   评估框架
502
      # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
daa2690b   tangwang   漏斗参数调优&呈现优化
503
504
      instances:
        default:
432d1c88   tangwang   评估框架
505
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
506
          port: 6007
432d1c88   tangwang   评估框架
507
508
          backend: qwen3_vllm_score
          runtime_dir: ./.runtime/reranker/default
daa2690b   tangwang   漏斗参数调优&呈现优化
509
        fine:
432d1c88   tangwang   评估框架
510
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
511
          port: 6009
432d1c88   tangwang   评估框架
512
513
          backend: bge
          runtime_dir: ./.runtime/reranker/fine
701ae503   tangwang   docs
514
515
      backends:
        bge:
432d1c88   tangwang   评估框架
516
          model_name: BAAI/bge-reranker-v2-m3
701ae503   tangwang   docs
517
518
          device: null
          use_fp16: true
418b6a4a   tangwang   调参
519
          batch_size: 80
00c8ddb9   tangwang   suggest rank opti...
520
          max_length: 160
432d1c88   tangwang   评估框架
521
          cache_dir: ./model_cache
701ae503   tangwang   docs
522
          enable_warmup: true
971a0851   tangwang   补充reranker-jina,探...
523
        jina_reranker_v3:
432d1c88   tangwang   评估框架
524
          model_name: jinaai/jina-reranker-v3
971a0851   tangwang   补充reranker-jina,探...
525
          device: null
432d1c88   tangwang   评估框架
526
          dtype: float16
971a0851   tangwang   补充reranker-jina,探...
527
          batch_size: 64
74116f05   tangwang   jina-reranker-v3性...
528
529
530
          max_doc_length: 160
          max_query_length: 64
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
531
          cache_dir: ./model_cache
971a0851   tangwang   补充reranker-jina,探...
532
          trust_remote_code: true
701ae503   tangwang   docs
533
        qwen3_vllm:
432d1c88   tangwang   评估框架
534
535
          model_name: Qwen/Qwen3-Reranker-0.6B
          engine: vllm
b0972ff9   tangwang   qwen3_vllm_score ...
536
          max_model_len: 256
701ae503   tangwang   docs
537
          tensor_parallel_size: 1
432d1c88   tangwang   评估框架
538
539
          gpu_memory_utilization: 0.2
          dtype: float16
bc089b43   tangwang   refactor(reranker...
540
541
          enable_prefix_caching: true
          enforce_eager: false
00c8ddb9   tangwang   suggest rank opti...
542
          infer_batch_size: 100
9f5994b4   tangwang   reranker
543
          sort_by_doc_length: true
881d338b   tangwang   评估框架
544
545
546
547
548
549
550
551
552
553
554
555
          # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
          instruction_format: standard  # compact standard
          # instruction: "Given a query, score the product for relevance"
          # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
          # instruction: "rank products by given query, category match first"
          # instruction: "Rank products by query relevance, prioritizing category match"
          # instruction: "Rank products by query relevance, prioritizing category and style match"
          # instruction: "Rank by query relevance, prioritize category & style"
          # instruction: "Relevance ranking: category & style match first"
          # instruction: "Score product relevance by query with category & style match prioritized"
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
432d1c88   tangwang   评估框架
556
          instruction: rank products by given query
881d338b   tangwang   评估框架
557
558
        # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
        # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
9de5ef49   tangwang   qwen3_vllm_score ...
559
        qwen3_vllm_score:
432d1c88   tangwang   评估框架
560
          model_name: Qwen/Qwen3-Reranker-0.6B
881d338b   tangwang   评估框架
561
          # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
9de5ef49   tangwang   qwen3_vllm_score ...
562
          use_original_qwen3_hf_overrides: true
881d338b   tangwang   评估框架
563
564
565
566
          # vllm_runner: "auto"
          # vllm_convert: "auto"
          # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
          # hf_overrides: {}
432d1c88   tangwang   评估框架
567
          engine: vllm
f86c5fee   tangwang   reranker性能参数脚本放在:...
568
          max_model_len: 172
9de5ef49   tangwang   qwen3_vllm_score ...
569
          tensor_parallel_size: 1
c3425429   tangwang   在以下文件中完成精排/融合清理工作...
570
          gpu_memory_utilization: 0.15
432d1c88   tangwang   评估框架
571
          dtype: float16
9de5ef49   tangwang   qwen3_vllm_score ...
572
573
          enable_prefix_caching: true
          enforce_eager: false
3b35f139   tangwang   search evalution
574
          infer_batch_size: 80
9de5ef49   tangwang   qwen3_vllm_score ...
575
          sort_by_doc_length: true
881d338b   tangwang   评估框架
576
577
578
579
          # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
          instruction_format: standard  # compact standard
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a shopping query, rank products by relevance"
432d1c88   tangwang   评估框架
580
          instruction: Rank products by query with category & style match prioritized
d31c7f65   tangwang   补充云服务reranker
581
        qwen3_transformers:
432d1c88   tangwang   评估框架
582
583
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: rank products by given query
881d338b   tangwang   评估框架
584
          # instruction: "Score the product’s relevance to the given query"
d31c7f65   tangwang   补充云服务reranker
585
586
587
          max_length: 8192
          batch_size: 64
          use_fp16: true
881d338b   tangwang   评估框架
588
          # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2
432d1c88   tangwang   评估框架
589
          attn_implementation: sdpa
881d338b   tangwang   评估框架
590
591
592
        # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
        # For 1 query + many short docs (for example 400 product titles), this usually reduces
        # repeated prefix work and padding waste compared with pairwise batching.
4823f463   tangwang   qwen3_vllm_score ...
593
        qwen3_transformers_packed:
432d1c88   tangwang   评估框架
594
595
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: Rank products by query with category & style match prioritized
b0972ff9   tangwang   qwen3_vllm_score ...
596
          max_model_len: 256
4823f463   tangwang   qwen3_vllm_score ...
597
598
599
600
          max_doc_len: 160
          max_docs_per_pack: 0
          use_fp16: true
          sort_by_doc_length: true
881d338b   tangwang   评估框架
601
602
          # Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
          # If your torch/transformers stack validates it, you can benchmark "sdpa".
432d1c88   tangwang   评估框架
603
          attn_implementation: eager
3d508beb   tangwang   reranker-4b-gguf
604
        qwen3_gguf:
432d1c88   tangwang   评估框架
605
606
607
608
609
          repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
          filename: '*Q8_0.gguf'
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-4b-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
610
          # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快
5c21a485   tangwang   qwen3-reranker-0....
611
612
613
614
          n_ctx: 512
          n_batch: 512
          n_ubatch: 512
          n_gpu_layers: 999
3d508beb   tangwang   reranker-4b-gguf
615
616
617
618
619
620
621
622
623
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 8
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
624
          length_sort_mode: char
3d508beb   tangwang   reranker-4b-gguf
625
626
          enable_warmup: true
          verbose: false
5c21a485   tangwang   qwen3-reranker-0....
627
        qwen3_gguf_06b:
432d1c88   tangwang   评估框架
628
629
630
631
632
          repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF
          filename: qwen3-reranker-0.6b-q8_0.gguf
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
633
634
          # 0.6B GGUF / online rerank baseline:
          # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。
5c21a485   tangwang   qwen3-reranker-0....
635
636
637
638
639
640
641
642
643
644
645
646
647
          n_ctx: 256
          n_batch: 256
          n_ubatch: 256
          n_gpu_layers: 999
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 32
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
648
          length_sort_mode: char
5c21a485   tangwang   qwen3-reranker-0....
649
650
651
          reuse_query_state: false
          enable_warmup: true
          verbose: false
d31c7f65   tangwang   补充云服务reranker
652
        dashscope_rerank:
432d1c88   tangwang   评估框架
653
          model_name: qwen3-rerank
881d338b   tangwang   评估框架
654
655
656
657
          # 按地域选择 endpoint:
          # 中国:   https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
          # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
432d1c88   tangwang   评估框架
658
659
660
          endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          api_key_env: RERANK_DASHSCOPE_API_KEY_CN
          timeout_sec: 10.0
881d338b   tangwang   评估框架
661
662
          top_n_cap: 0   # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
          batchsize: 64  # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
432d1c88   tangwang   评估框架
663
          instruct: Given a shopping query, rank product titles by relevance
d31c7f65   tangwang   补充云服务reranker
664
665
          max_retries: 2
          retry_backoff_sec: 0.2
881d338b   tangwang   评估框架
666
667
  
  # SPU配置(已启用,使用嵌套skus)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
668
669
  spu_config:
    enabled: true
432d1c88   tangwang   评估框架
670
    spu_field: spu_id
4d824a77   tangwang   所有租户共用一套统一配置.tena...
671
    inner_hits_size: 10
881d338b   tangwang   评估框架
672
673
    # 配置哪些option维度参与检索(进索引、以及在线搜索)
    # 格式为list,选择option1/option2/option3中的一个或多个
432d1c88   tangwang   评估框架
674
675
676
677
    searchable_option_dimensions:
    - option1
    - option2
    - option3
881d338b   tangwang   评估框架
678
679
680
681
  
  # 租户配置(Tenant Configuration)
  # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
0064e946   tangwang   feat: 增量索引服务、租户配置...
682
  tenant_config:
0064e946   tangwang   feat: 增量索引服务、租户配置...
683
    default:
432d1c88   tangwang   评估框架
684
685
686
687
      primary_language: en
      index_languages:
      - en
      - zh
0064e946   tangwang   feat: 增量索引服务、租户配置...
688
    tenants:
432d1c88   tangwang   评估框架
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
      '1':
        primary_language: zh
        index_languages:
        - zh
        - en
      '2':
        primary_language: en
        index_languages:
        - en
        - zh
      '3':
        primary_language: zh
        index_languages:
        - zh
        - en
      '162':
        primary_language: zh
        index_languages:
        - zh
        - en
      '170':
        primary_language: en
        index_languages:
        - en
        - zh