Blame view

config/config.yaml 23.5 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
  # Unified Configuration for Multi-Tenant Search Engine
  # 统一配置文件,所有租户共用一套配置
  # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
  #
  # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项
  #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。
  
  # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
9
  runtime:
432d1c88   tangwang   评估框架
10
11
12
    environment: prod
    index_namespace: ''
    api_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
13
    api_port: 6002
432d1c88   tangwang   评估框架
14
    indexer_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
15
    indexer_port: 6004
432d1c88   tangwang   评估框架
16
    embedding_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
17
18
19
    embedding_port: 6005
    embedding_text_port: 6005
    embedding_image_port: 6008
432d1c88   tangwang   评估框架
20
    translator_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
21
    translator_port: 6006
432d1c88   tangwang   评估框架
22
    reranker_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
23
    reranker_port: 6007
881d338b   tangwang   评估框架
24
25
  
  # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
26
27
  infrastructure:
    elasticsearch:
432d1c88   tangwang   评估框架
28
      host: http://localhost:9200
86d0e83d   tangwang   query翻译,根据源语言是否在索...
29
30
31
      username: null
      password: null
    redis:
432d1c88   tangwang   评估框架
32
      host: localhost
86d0e83d   tangwang   query翻译,根据源语言是否在索...
33
34
35
36
37
38
39
      port: 6479
      snapshot_db: 0
      password: null
      socket_timeout: 1
      socket_connect_timeout: 1
      retry_on_timeout: false
      cache_expire_days: 720
432d1c88   tangwang   评估框架
40
41
      embedding_cache_prefix: embedding
      anchor_cache_prefix: product_anchors
86d0e83d   tangwang   query翻译,根据源语言是否在索...
42
43
44
45
46
47
48
49
50
51
      anchor_cache_expire_days: 30
    database:
      host: null
      port: 3306
      database: null
      username: null
      password: null
    secrets:
      dashscope_api_key: null
      deepl_auth_key: null
881d338b   tangwang   评估框架
52
53
  
  # Elasticsearch Index
432d1c88   tangwang   评估框架
54
  es_index_name: search_products
881d338b   tangwang   评估框架
55
56
  
  # 检索域 / 索引列表(可为空列表;每项字段均需显式给出)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
57
  indexes: []
881d338b   tangwang   评估框架
58
59
  
  # Config assets
86d8358b   tangwang   config optimize
60
  assets:
432d1c88   tangwang   评估框架
61
    query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
881d338b   tangwang   评估框架
62
63
  
  # Product content understanding (LLM enrich-content) configuration
41f0b2e9   tangwang   product_enrich支持并发
64
65
  product_enrich:
    max_workers: 40
881d338b   tangwang   评估框架
66
  
331861d5   tangwang   eval框架配置化
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
  # 离线 / Web 相关性评估(scripts/evaluation、eval-web)
  # CLI 未显式传参时使用此处默认值;search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port}
  search_evaluation:
    artifact_root: artifacts/search_evaluation
    queries_file: scripts/evaluation/queries/queries.txt
    eval_log_dir: logs
    default_tenant_id: '163'
    search_base_url: ''
    web_host: 0.0.0.0
    web_port: 6010
    judge_model: qwen3.5-plus
    judge_enable_thinking: false
    judge_dashscope_batch: false
    intent_model: qwen3-max
    intent_enable_thinking: true
    judge_batch_completion_window: 24h
    judge_batch_poll_interval_sec: 10.0
    build_search_depth: 1000
    build_rerank_depth: 10000
    annotate_search_top_k: 120
    annotate_rerank_top_k: 200
    batch_top_k: 100
    audit_top_k: 100
    audit_limit_suspicious: 5
    default_language: en
    search_recall_top_k: 200
    rerank_high_threshold: 0.5
    rerank_high_skip_count: 1000
    rebuild_llm_batch_size: 50
    rebuild_min_llm_batches: 10
    rebuild_max_llm_batches: 40
    rebuild_irrelevant_stop_ratio: 0.799
    rebuild_irrel_low_combined_stop_ratio: 0.959
    rebuild_irrelevant_stop_streak: 3
  
881d338b   tangwang   评估框架
102
  # ES Index Settings (基础设置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
103
104
105
  es_settings:
    number_of_shards: 1
    number_of_replicas: 0
432d1c88   tangwang   评估框架
106
    refresh_interval: 30s
881d338b   tangwang   评估框架
107
108
109
110
  
  # 字段权重配置(用于搜索时的字段boost)
  # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。
  # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
33839b37   tangwang   属性值参与搜索:
111
  field_boosts:
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
112
    title: 3.0
1fdab52d   tangwang   This change adjus...
113
114
115
    # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重
    qanchors: 1.0
    enriched_tags: 1.0
483a05d9   tangwang   文本搜索权重调整(qanchors...
116
    enriched_attributes.value: 1.5
3abbc95a   tangwang   重构(scripts): 整理sc...
117
    # enriched_taxonomy_attributes.value: 0.3
69881ecb   tangwang   相关性调参、enrich内容解析优化
118
119
    category_name_text: 2.0
    category_path: 2.0
ccbdf870   tangwang   enriched_attribut...
120
121
122
123
124
125
126
127
    keywords: 2.0
    tags: 2.0
    option1_values: 1.7
    option2_values: 1.7
    option3_values: 1.7
    brief: 1.0
    description: 1.0
    vendor: 1.0
881d338b   tangwang   评估框架
128
129
  
  # Query Configuration(查询配置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
130
  query_config:
881d338b   tangwang   评估框架
131
    # 支持的语言
4d824a77   tangwang   所有租户共用一套统一配置.tena...
132
    supported_languages:
432d1c88   tangwang   评估框架
133
134
135
    - zh
    - en
    default_language: en
881d338b   tangwang   评估框架
136
137
  
    # 功能开关(翻译开关由tenant_config控制)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
138
139
    enable_text_embedding: true
    enable_query_rewrite: true
881d338b   tangwang   评估框架
140
141
142
143
144
  
    # 查询翻译模型(须与 services.translation.capabilities 中某项一致)
    # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
    zh_to_en_model: nllb-200-distilled-600m  # "opus-mt-zh-en"
    en_to_zh_model: nllb-200-distilled-600m  # "opus-mt-en-zh"
432d1c88   tangwang   评估框架
145
    default_translation_model: nllb-200-distilled-600m
881d338b   tangwang   评估框架
146
147
148
149
    # zh_to_en_model: deepl
    # en_to_zh_model: deepl
    # default_translation_model: deepl
    # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
432d1c88   tangwang   评估框架
150
151
152
    zh_to_en_model__source_not_in_index: nllb-200-distilled-600m
    en_to_zh_model__source_not_in_index: nllb-200-distilled-600m
    default_translation_model__source_not_in_index: nllb-200-distilled-600m
881d338b   tangwang   评估框架
153
154
155
156
157
158
    # zh_to_en_model__source_not_in_index: deepl
    # en_to_zh_model__source_not_in_index: deepl
    # default_translation_model__source_not_in_index: deepl
  
    # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
    # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
310bb3bc   tangwang   eval tools
159
160
    translation_embedding_wait_budget_ms_source_in_index: 300  # 80
    translation_embedding_wait_budget_ms_source_not_in_index: 400  # 200
cda1cd62   tangwang   意图分析&应用 baseline
161
162
    style_intent:
      enabled: true
87cacb1b   tangwang   融合公式优化。加入意图匹配因子
163
      selected_sku_boost: 1.2
432d1c88   tangwang   评估框架
164
165
      color_dictionary_path: config/dictionaries/style_intent_color.csv
      size_dictionary_path: config/dictionaries/style_intent_size.csv
cda1cd62   tangwang   意图分析&应用 baseline
166
      dimension_aliases:
432d1c88   tangwang   评估框架
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        color:
        - color
        - colors
        - colour
        - colours
        - 颜色
        - 
        - 色系
        size:
        - size
        - sizes
        - sizing
        - 尺码
        - 尺寸
        - 码数
        - 号码
        - 
74fdf9bd   tangwang   1.
184
185
    product_title_exclusion:
      enabled: true
432d1c88   tangwang   评估框架
186
      dictionary_path: config/dictionaries/product_title_exclusion.tsv
881d338b   tangwang   评估框架
187
188
189
190
  
    # 动态多语言检索字段配置
    # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
    # shared_fields 为无语言后缀字段。
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
191
192
    search_fields:
      multilingual_fields:
432d1c88   tangwang   评估框架
193
      - title
331861d5   tangwang   eval框架配置化
194
      - keywords
ccbdf870   tangwang   enriched_attribut...
195
      - qanchors
1c2ba48e   tangwang   eval tagger
196
      - enriched_tags
ccbdf870   tangwang   enriched_attribut...
197
      - enriched_attributes.value
3abbc95a   tangwang   重构(scripts): 整理sc...
198
      # - enriched_taxonomy_attributes.value
331861d5   tangwang   eval框架配置化
199
200
201
      - option1_values
      - option2_values
      - option3_values
432d1c88   tangwang   评估框架
202
203
      - category_path
      - category_name_text
ccbdf870   tangwang   enriched_attribut...
204
205
206
      # - brief
      # - description
      # - vendor
881d338b   tangwang   评估框架
207
      # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values
432d1c88   tangwang   评估框架
208
      shared_fields: null
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
209
      core_multilingual_fields:
432d1c88   tangwang   评估框架
210
211
212
      - title
      - qanchors
      - category_name_text
881d338b   tangwang   评估框架
213
214
  
    # 统一文本召回策略(主查询 + 翻译查询)
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
215
    text_query_strategy:
432d1c88   tangwang   评估框架
216
217
      base_minimum_should_match: 60%
      translation_minimum_should_match: 60%
69881ecb   tangwang   相关性调参、enrich内容解析优化
218
219
      translation_boost: 0.75
      tie_breaker_base_query: 0.5
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
220
221
222
223
224
225
226
227
228
      best_fields_boost: 2.0
      best_fields:
        title: 4.0
        qanchors: 3.0
        category_name_text: 2.0
      phrase_fields:
        title: 5.0
        qanchors: 4.0
      phrase_match_boost: 3.0
881d338b   tangwang   评估框架
229
230
  
    # Embedding字段名称
432d1c88   tangwang   评估框架
231
232
    text_embedding_field: title_embedding
    image_embedding_field: image_embedding.vector
881d338b   tangwang   评估框架
233
234
235
236
  
    # 返回字段配置(_source includes)
    # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
    # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
a7cc9078   tangwang   sku排序
237
    source_fields:
432d1c88   tangwang   评估框架
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    - spu_id
    - handle
    - title
    - brief
    - description
    - vendor
    - category_name
    - category_name_text
    - category_path
    - category_id
    - category_level
    - category1_name
    - category2_name
    - category3_name
ccbdf870   tangwang   enriched_attribut...
252
253
254
255
256
    # - tags
    # - keywords
    # - qanchors
    # - enriched_tags
    # - enriched_attributes
3abbc95a   tangwang   重构(scripts): 整理sc...
257
    # - # enriched_taxonomy_attributes.value
432d1c88   tangwang   评估框架
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
    - min_price
    - compare_at_price
    - image_url
    - sku_prices
    - sku_weights
    - sku_weight_units
    - total_inventory
    - option1_name
    - option1_values
    - option2_name
    - option2_values
    - option3_name
    - option3_values
    - specifications
    - skus
881d338b   tangwang   评估框架
273
274
  
    # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
275
276
    knn_text_boost: 4
    knn_image_boost: 4
881d338b   tangwang   评估框架
277
278
  
    # knn_text_num_candidates = k * 3.4
de98daa3   tangwang   多模态召回优化
279
280
    knn_text_k: 160
    knn_text_num_candidates: 560
de98daa3   tangwang   多模态召回优化
281
282
    knn_text_k_long: 400
    knn_text_num_candidates_long: 1200
de98daa3   tangwang   多模态召回优化
283
284
    knn_image_k: 400
    knn_image_num_candidates: 1200
881d338b   tangwang   评估框架
285
286
  
  # Function Score配置(ES层打分规则)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
287
  function_score:
432d1c88   tangwang   评估框架
288
289
    score_mode: sum
    boost_mode: multiply
4d824a77   tangwang   所有租户共用一套统一配置.tena...
290
    functions: []
881d338b   tangwang   评估框架
291
292
  
  # 粗排配置(仅融合 ES 文本/向量信号,不调用模型)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
293
294
295
296
297
  coarse_rank:
    enabled: true
    input_window: 700
    output_window: 240
    fusion:
4d000c94   tangwang   融合公式调参
298
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
299
      es_exponent: 0.05
8c8b9d84   tangwang   ES 拉取 coarse_rank...
300
301
      text_bias: 0.1
      text_exponent: 0.35
881d338b   tangwang   评估框架
302
303
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
      # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣
de98daa3   tangwang   多模态召回优化
304
      text_translation_weight: 1.0
8c8b9d84   tangwang   ES 拉取 coarse_rank...
305
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
306
307
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
8c8b9d84   tangwang   ES 拉取 coarse_rank...
308
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
309
      knn_exponent: 0.4
881d338b   tangwang   评估框架
310
311
  
  # 精排配置(轻量 reranker)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
312
  fine_rank:
418b6a4a   tangwang   调参
313
314
    enabled: false
    input_window: 160
8c8b9d84   tangwang   ES 拉取 coarse_rank...
315
316
    output_window: 80
    timeout_sec: 10.0
432d1c88   tangwang   评估框架
317
318
319
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: fine
881d338b   tangwang   评估框架
320
321
  
  # 重排配置(provider/URL 在 services.rerank)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
322
  rerank:
5f7d7f09   tangwang   性能测试报告.md
323
    enabled: true
418b6a4a   tangwang   调参
324
    rerank_window: 160
42e3aea6   tangwang   tidy
325
    timeout_sec: 15.0
506c39b7   tangwang   feat(search): 统一重...
326
327
    weight_es: 0.4
    weight_ai: 0.6
432d1c88   tangwang   评估框架
328
329
330
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: default
881d338b   tangwang   评估框架
331
  
9df421ed   tangwang   基于eval框架开始调参
332
    # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn)
881d338b   tangwang   评估框架
333
334
335
    # 其中 knn_score 先做一层 dis_max:
    #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
    #   + knn_tie_breaker * 另一侧较弱信号
814e352b   tangwang   乘法公式配置化
336
    fusion:
4d000c94   tangwang   融合公式调参
337
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
338
      es_exponent: 0.05
4d000c94   tangwang   融合公式调参
339
      rerank_bias: 0.1
432d1c88   tangwang   评估框架
340
      rerank_exponent: 1.15
4d000c94   tangwang   融合公式调参
341
      fine_bias: 0.1
8c8b9d84   tangwang   ES 拉取 coarse_rank...
342
      fine_exponent: 1.0
814e352b   tangwang   乘法公式配置化
343
      text_bias: 0.1
432d1c88   tangwang   评估框架
344
      text_exponent: 0.25
881d338b   tangwang   评估框架
345
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
432d1c88   tangwang   评估框架
346
      text_translation_weight: 0.8
24edc208   tangwang   修改_extract_combin...
347
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
348
349
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
814e352b   tangwang   乘法公式配置化
350
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
351
      knn_exponent: 0.4
881d338b   tangwang   评估框架
352
353
  
  # 可扩展服务/provider 注册表(单一配置源)
42e3aea6   tangwang   tidy
354
355
  services:
    translation:
432d1c88   tangwang   评估框架
356
      service_url: http://127.0.0.1:6006
881d338b   tangwang   评估框架
357
      # default_model: nllb-200-distilled-600m
432d1c88   tangwang   评估框架
358
359
      default_model: nllb-200-distilled-600m
      default_scene: general
42e3aea6   tangwang   tidy
360
      timeout_sec: 10.0
d4cadc13   tangwang   翻译重构
361
      cache:
d4cadc13   tangwang   翻译重构
362
363
        ttl_seconds: 62208000
        sliding_expiration: true
881d338b   tangwang   评估框架
364
        # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
8140e942   tangwang   translator model ...
365
        enable_model_quality_tier_cache: true
881d338b   tangwang   评估框架
366
367
        # Higher tier = better quality. Multiple models may share one tier (同级).
        # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
8140e942   tangwang   translator model ...
368
369
370
371
372
373
374
        model_quality_tiers:
          deepl: 30
          qwen-mt: 30
          llm: 30
          nllb-200-distilled-600m: 20
          opus-mt-zh-en: 10
          opus-mt-en-zh: 10
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
375
      capabilities:
d4cadc13   tangwang   翻译重构
376
        qwen-mt:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
377
          enabled: true
432d1c88   tangwang   评估框架
378
379
380
          backend: qwen_mt
          model: qwen-mt-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
42e3aea6   tangwang   tidy
381
          timeout_sec: 10.0
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
382
          use_cache: true
a0a173ae   tangwang   last
383
        llm:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
384
          enabled: true
432d1c88   tangwang   评估框架
385
386
387
          backend: llm
          model: qwen-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
a0a173ae   tangwang   last
388
          timeout_sec: 30.0
cd4ce66d   tangwang   trans logs
389
          use_cache: true
d4cadc13   tangwang   翻译重构
390
        deepl:
cd4ce66d   tangwang   trans logs
391
          enabled: true
432d1c88   tangwang   评估框架
392
393
          backend: deepl
          api_url: https://api.deepl.com/v2/translate
d4cadc13   tangwang   翻译重构
394
          timeout_sec: 10.0
432d1c88   tangwang   评估框架
395
          glossary_id: ''
cd4ce66d   tangwang   trans logs
396
          use_cache: true
0fd2f875   tangwang   translate
397
        nllb-200-distilled-600m:
93be98cb   tangwang   清理过时的文档
398
          enabled: true
432d1c88   tangwang   评估框架
399
400
401
402
403
404
          backend: local_nllb
          model_id: facebook/nllb-200-distilled-600M
          model_dir: ./models/translation/facebook/nllb-200-distilled-600M
          ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
405
          ct2_auto_convert: true
46ce858d   tangwang   在NLLB模型的 /data/sa...
406
          ct2_inter_threads: 4
ea293660   tangwang   CTranslate2
407
          ct2_intra_threads: 0
46ce858d   tangwang   在NLLB模型的 /data/sa...
408
          ct2_max_queued_batches: 32
432d1c88   tangwang   评估框架
409
410
          ct2_batch_type: examples
          ct2_decoding_length_mode: source
46ce858d   tangwang   在NLLB模型的 /data/sa...
411
412
          ct2_decoding_length_extra: 8
          ct2_decoding_length_min: 32
432d1c88   tangwang   评估框架
413
414
          device: cuda
          torch_dtype: float16
4747e2f4   tangwang   embedding perform...
415
          batch_size: 64
0fd2f875   tangwang   translate
416
          max_input_length: 256
3eff49b7   tangwang   trans nllb-200-di...
417
          max_new_tokens: 64
0fd2f875   tangwang   translate
418
          num_beams: 1
cd4ce66d   tangwang   trans logs
419
          use_cache: true
0fd2f875   tangwang   translate
420
        opus-mt-zh-en:
f86c5fee   tangwang   reranker性能参数脚本放在:...
421
          enabled: false
432d1c88   tangwang   评估框架
422
423
424
425
426
427
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-zh-en
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
428
429
430
431
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
432
433
434
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
435
436
437
438
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
439
          use_cache: true
0fd2f875   tangwang   translate
440
        opus-mt-en-zh:
f86c5fee   tangwang   reranker性能参数脚本放在:...
441
          enabled: false
432d1c88   tangwang   评估框架
442
443
444
445
446
447
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-en-zh
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
448
449
450
451
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
452
453
454
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
455
456
457
458
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
459
          use_cache: true
42e3aea6   tangwang   tidy
460
    embedding:
881d338b   tangwang   评估框架
461
      provider: http  # http
42e3aea6   tangwang   tidy
462
463
      providers:
        http:
432d1c88   tangwang   评估框架
464
465
          text_base_url: http://127.0.0.1:6005
          image_base_url: http://127.0.0.1:6008
881d338b   tangwang   评估框架
466
467
      # 服务内文本后端(embedding 进程启动时读取)
      backend: tei  # tei | local_st
07cf5a93   tangwang   START_EMBEDDING=...
468
469
      backends:
        tei:
432d1c88   tangwang   评估框架
470
          base_url: http://127.0.0.1:8080
efd435cf   tangwang   tei性能调优:
471
          timeout_sec: 20
432d1c88   tangwang   评估框架
472
          model_id: Qwen/Qwen3-Embedding-0.6B
07cf5a93   tangwang   START_EMBEDDING=...
473
        local_st:
432d1c88   tangwang   评估框架
474
475
          model_id: Qwen/Qwen3-Embedding-0.6B
          device: cuda
07cf5a93   tangwang   START_EMBEDDING=...
476
477
          batch_size: 32
          normalize_embeddings: true
881d338b   tangwang   评估框架
478
479
480
481
      # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name)
      # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中
      # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。
      image_backend: clip_as_service  # clip_as_service | local_cnclip
86d8358b   tangwang   config optimize
482
483
      image_backends:
        clip_as_service:
432d1c88   tangwang   评估框架
484
485
          server: grpc://127.0.0.1:51000
          model_name: CN-CLIP/ViT-L-14
86d8358b   tangwang   config optimize
486
487
488
          batch_size: 8
          normalize_embeddings: true
        local_cnclip:
432d1c88   tangwang   评估框架
489
          model_name: ViT-L-14
86d8358b   tangwang   config optimize
490
491
492
          device: null
          batch_size: 8
          normalize_embeddings: true
42e3aea6   tangwang   tidy
493
    rerank:
432d1c88   tangwang   评估框架
494
      provider: http
42e3aea6   tangwang   tidy
495
496
      providers:
        http:
daa2690b   tangwang   漏斗参数调优&呈现优化
497
498
          instances:
            default:
432d1c88   tangwang   评估框架
499
500
              base_url: http://127.0.0.1:6007
              service_url: http://127.0.0.1:6007/rerank
daa2690b   tangwang   漏斗参数调优&呈现优化
501
            fine:
432d1c88   tangwang   评估框架
502
503
              base_url: http://127.0.0.1:6009
              service_url: http://127.0.0.1:6009/rerank
86d8358b   tangwang   config optimize
504
505
506
      request:
        max_docs: 1000
        normalize: true
432d1c88   tangwang   评估框架
507
      default_instance: default
881d338b   tangwang   评估框架
508
      # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
daa2690b   tangwang   漏斗参数调优&呈现优化
509
510
      instances:
        default:
432d1c88   tangwang   评估框架
511
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
512
          port: 6007
432d1c88   tangwang   评估框架
513
514
          backend: qwen3_vllm_score
          runtime_dir: ./.runtime/reranker/default
daa2690b   tangwang   漏斗参数调优&呈现优化
515
        fine:
432d1c88   tangwang   评估框架
516
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
517
          port: 6009
432d1c88   tangwang   评估框架
518
519
          backend: bge
          runtime_dir: ./.runtime/reranker/fine
701ae503   tangwang   docs
520
521
      backends:
        bge:
432d1c88   tangwang   评估框架
522
          model_name: BAAI/bge-reranker-v2-m3
701ae503   tangwang   docs
523
524
          device: null
          use_fp16: true
418b6a4a   tangwang   调参
525
          batch_size: 80
00c8ddb9   tangwang   suggest rank opti...
526
          max_length: 160
432d1c88   tangwang   评估框架
527
          cache_dir: ./model_cache
701ae503   tangwang   docs
528
          enable_warmup: true
971a0851   tangwang   补充reranker-jina,探...
529
        jina_reranker_v3:
432d1c88   tangwang   评估框架
530
          model_name: jinaai/jina-reranker-v3
971a0851   tangwang   补充reranker-jina,探...
531
          device: null
432d1c88   tangwang   评估框架
532
          dtype: float16
971a0851   tangwang   补充reranker-jina,探...
533
          batch_size: 64
74116f05   tangwang   jina-reranker-v3性...
534
535
536
          max_doc_length: 160
          max_query_length: 64
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
537
          cache_dir: ./model_cache
971a0851   tangwang   补充reranker-jina,探...
538
          trust_remote_code: true
701ae503   tangwang   docs
539
        qwen3_vllm:
432d1c88   tangwang   评估框架
540
541
          model_name: Qwen/Qwen3-Reranker-0.6B
          engine: vllm
b0972ff9   tangwang   qwen3_vllm_score ...
542
          max_model_len: 256
701ae503   tangwang   docs
543
          tensor_parallel_size: 1
432d1c88   tangwang   评估框架
544
545
          gpu_memory_utilization: 0.2
          dtype: float16
bc089b43   tangwang   refactor(reranker...
546
547
          enable_prefix_caching: true
          enforce_eager: false
00c8ddb9   tangwang   suggest rank opti...
548
          infer_batch_size: 100
9f5994b4   tangwang   reranker
549
          sort_by_doc_length: true
881d338b   tangwang   评估框架
550
551
552
553
554
555
556
557
558
559
560
561
          # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
          instruction_format: standard  # compact standard
          # instruction: "Given a query, score the product for relevance"
          # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
          # instruction: "rank products by given query, category match first"
          # instruction: "Rank products by query relevance, prioritizing category match"
          # instruction: "Rank products by query relevance, prioritizing category and style match"
          # instruction: "Rank by query relevance, prioritize category & style"
          # instruction: "Relevance ranking: category & style match first"
          # instruction: "Score product relevance by query with category & style match prioritized"
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
432d1c88   tangwang   评估框架
562
          instruction: rank products by given query
881d338b   tangwang   评估框架
563
564
        # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
        # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
9de5ef49   tangwang   qwen3_vllm_score ...
565
        qwen3_vllm_score:
432d1c88   tangwang   评估框架
566
          model_name: Qwen/Qwen3-Reranker-0.6B
881d338b   tangwang   评估框架
567
          # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
9de5ef49   tangwang   qwen3_vllm_score ...
568
          use_original_qwen3_hf_overrides: true
881d338b   tangwang   评估框架
569
570
571
572
          # vllm_runner: "auto"
          # vllm_convert: "auto"
          # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
          # hf_overrides: {}
432d1c88   tangwang   评估框架
573
          engine: vllm
f86c5fee   tangwang   reranker性能参数脚本放在:...
574
          max_model_len: 172
9de5ef49   tangwang   qwen3_vllm_score ...
575
          tensor_parallel_size: 1
c3425429   tangwang   在以下文件中完成精排/融合清理工作...
576
          gpu_memory_utilization: 0.15
432d1c88   tangwang   评估框架
577
          dtype: float16
9de5ef49   tangwang   qwen3_vllm_score ...
578
579
          enable_prefix_caching: true
          enforce_eager: false
3b35f139   tangwang   search evalution
580
          infer_batch_size: 80
9de5ef49   tangwang   qwen3_vllm_score ...
581
          sort_by_doc_length: true
881d338b   tangwang   评估框架
582
583
584
585
          # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
          instruction_format: standard  # compact standard
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a shopping query, rank products by relevance"
432d1c88   tangwang   评估框架
586
          instruction: Rank products by query with category & style match prioritized
d31c7f65   tangwang   补充云服务reranker
587
        qwen3_transformers:
432d1c88   tangwang   评估框架
588
589
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: rank products by given query
881d338b   tangwang   评估框架
590
          # instruction: "Score the product’s relevance to the given query"
d31c7f65   tangwang   补充云服务reranker
591
592
593
          max_length: 8192
          batch_size: 64
          use_fp16: true
881d338b   tangwang   评估框架
594
          # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2
432d1c88   tangwang   评估框架
595
          attn_implementation: sdpa
881d338b   tangwang   评估框架
596
597
598
        # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
        # For 1 query + many short docs (for example 400 product titles), this usually reduces
        # repeated prefix work and padding waste compared with pairwise batching.
4823f463   tangwang   qwen3_vllm_score ...
599
        qwen3_transformers_packed:
432d1c88   tangwang   评估框架
600
601
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: Rank products by query with category & style match prioritized
b0972ff9   tangwang   qwen3_vllm_score ...
602
          max_model_len: 256
4823f463   tangwang   qwen3_vllm_score ...
603
604
605
606
          max_doc_len: 160
          max_docs_per_pack: 0
          use_fp16: true
          sort_by_doc_length: true
881d338b   tangwang   评估框架
607
608
          # Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
          # If your torch/transformers stack validates it, you can benchmark "sdpa".
432d1c88   tangwang   评估框架
609
          attn_implementation: eager
3d508beb   tangwang   reranker-4b-gguf
610
        qwen3_gguf:
432d1c88   tangwang   评估框架
611
612
613
614
615
          repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
          filename: '*Q8_0.gguf'
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-4b-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
616
          # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快
5c21a485   tangwang   qwen3-reranker-0....
617
618
619
620
          n_ctx: 512
          n_batch: 512
          n_ubatch: 512
          n_gpu_layers: 999
3d508beb   tangwang   reranker-4b-gguf
621
622
623
624
625
626
627
628
629
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 8
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
630
          length_sort_mode: char
3d508beb   tangwang   reranker-4b-gguf
631
632
          enable_warmup: true
          verbose: false
5c21a485   tangwang   qwen3-reranker-0....
633
        qwen3_gguf_06b:
432d1c88   tangwang   评估框架
634
635
636
637
638
          repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF
          filename: qwen3-reranker-0.6b-q8_0.gguf
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
639
640
          # 0.6B GGUF / online rerank baseline:
          # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。
5c21a485   tangwang   qwen3-reranker-0....
641
642
643
644
645
646
647
648
649
650
651
652
653
          n_ctx: 256
          n_batch: 256
          n_ubatch: 256
          n_gpu_layers: 999
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 32
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
654
          length_sort_mode: char
5c21a485   tangwang   qwen3-reranker-0....
655
656
657
          reuse_query_state: false
          enable_warmup: true
          verbose: false
d31c7f65   tangwang   补充云服务reranker
658
        dashscope_rerank:
432d1c88   tangwang   评估框架
659
          model_name: qwen3-rerank
881d338b   tangwang   评估框架
660
661
662
663
          # 按地域选择 endpoint:
          # 中国:   https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
          # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
432d1c88   tangwang   评估框架
664
665
666
          endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          api_key_env: RERANK_DASHSCOPE_API_KEY_CN
          timeout_sec: 10.0
881d338b   tangwang   评估框架
667
668
          top_n_cap: 0   # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
          batchsize: 64  # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
432d1c88   tangwang   评估框架
669
          instruct: Given a shopping query, rank product titles by relevance
d31c7f65   tangwang   补充云服务reranker
670
671
          max_retries: 2
          retry_backoff_sec: 0.2
881d338b   tangwang   评估框架
672
673
  
  # SPU配置(已启用,使用嵌套skus)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
674
675
  spu_config:
    enabled: true
432d1c88   tangwang   评估框架
676
    spu_field: spu_id
4d824a77   tangwang   所有租户共用一套统一配置.tena...
677
    inner_hits_size: 10
881d338b   tangwang   评估框架
678
679
    # 配置哪些option维度参与检索(进索引、以及在线搜索)
    # 格式为list,选择option1/option2/option3中的一个或多个
432d1c88   tangwang   评估框架
680
681
682
683
    searchable_option_dimensions:
    - option1
    - option2
    - option3
881d338b   tangwang   评估框架
684
685
686
687
  
  # 租户配置(Tenant Configuration)
  # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
0064e946   tangwang   feat: 增量索引服务、租户配置...
688
  tenant_config:
0064e946   tangwang   feat: 增量索引服务、租户配置...
689
    default:
432d1c88   tangwang   评估框架
690
691
692
693
      primary_language: en
      index_languages:
      - en
      - zh
0064e946   tangwang   feat: 增量索引服务、租户配置...
694
    tenants:
432d1c88   tangwang   评估框架
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
      '1':
        primary_language: zh
        index_languages:
        - zh
        - en
      '2':
        primary_language: en
        index_languages:
        - en
        - zh
      '3':
        primary_language: zh
        index_languages:
        - zh
        - en
      '162':
        primary_language: zh
        index_languages:
        - zh
        - en
      '170':
        primary_language: en
        index_languages:
        - en
        - zh