Blame view

config/config.yaml 23.5 KB
881d338b   tangwang   评估框架
1
2
3
4
5
6
7
8
  # Unified Configuration for Multi-Tenant Search Engine
  # 统一配置文件,所有租户共用一套配置
  # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
  #
  # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项
  #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。
  
  # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
9
  runtime:
432d1c88   tangwang   评估框架
10
11
12
    environment: prod
    index_namespace: ''
    api_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
13
    api_port: 6002
432d1c88   tangwang   评估框架
14
    indexer_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
15
    indexer_port: 6004
432d1c88   tangwang   评估框架
16
    embedding_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
17
18
19
    embedding_port: 6005
    embedding_text_port: 6005
    embedding_image_port: 6008
432d1c88   tangwang   评估框架
20
    translator_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
21
    translator_port: 6006
432d1c88   tangwang   评估框架
22
    reranker_host: 0.0.0.0
86d0e83d   tangwang   query翻译,根据源语言是否在索...
23
    reranker_port: 6007
881d338b   tangwang   评估框架
24
25
  
  # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
26
27
  infrastructure:
    elasticsearch:
432d1c88   tangwang   评估框架
28
      host: http://localhost:9200
86d0e83d   tangwang   query翻译,根据源语言是否在索...
29
30
31
      username: null
      password: null
    redis:
432d1c88   tangwang   评估框架
32
      host: localhost
86d0e83d   tangwang   query翻译,根据源语言是否在索...
33
34
35
36
37
38
39
      port: 6479
      snapshot_db: 0
      password: null
      socket_timeout: 1
      socket_connect_timeout: 1
      retry_on_timeout: false
      cache_expire_days: 720
432d1c88   tangwang   评估框架
40
41
      embedding_cache_prefix: embedding
      anchor_cache_prefix: product_anchors
86d0e83d   tangwang   query翻译,根据源语言是否在索...
42
43
44
45
46
47
48
49
50
51
      anchor_cache_expire_days: 30
    database:
      host: null
      port: 3306
      database: null
      username: null
      password: null
    secrets:
      dashscope_api_key: null
      deepl_auth_key: null
881d338b   tangwang   评估框架
52
53
  
  # Elasticsearch Index
432d1c88   tangwang   评估框架
54
  es_index_name: search_products
881d338b   tangwang   评估框架
55
56
  
  # 检索域 / 索引列表(可为空列表;每项字段均需显式给出)
86d0e83d   tangwang   query翻译,根据源语言是否在索...
57
  indexes: []
881d338b   tangwang   评估框架
58
59
  
  # Config assets
86d8358b   tangwang   config optimize
60
  assets:
432d1c88   tangwang   评估框架
61
    query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
881d338b   tangwang   评估框架
62
63
  
  # Product content understanding (LLM enrich-content) configuration
41f0b2e9   tangwang   product_enrich支持并发
64
65
  product_enrich:
    max_workers: 40
881d338b   tangwang   评估框架
66
  
331861d5   tangwang   eval框架配置化
67
68
69
70
71
72
73
74
75
76
  # 离线 / Web 相关性评估(scripts/evaluation、eval-web)
  # CLI 未显式传参时使用此处默认值;search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port}
  search_evaluation:
    artifact_root: artifacts/search_evaluation
    queries_file: scripts/evaluation/queries/queries.txt
    eval_log_dir: logs
    default_tenant_id: '163'
    search_base_url: ''
    web_host: 0.0.0.0
    web_port: 6010
984f14f9   tangwang   product_enrich模块迁出
77
    judge_model: qwen3.6-plus
331861d5   tangwang   eval框架配置化
78
79
    judge_enable_thinking: false
    judge_dashscope_batch: false
984f14f9   tangwang   product_enrich模块迁出
80
    intent_model: qwen3.6-plus
331861d5   tangwang   eval框架配置化
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    intent_enable_thinking: true
    judge_batch_completion_window: 24h
    judge_batch_poll_interval_sec: 10.0
    build_search_depth: 1000
    build_rerank_depth: 10000
    annotate_search_top_k: 120
    annotate_rerank_top_k: 200
    batch_top_k: 100
    audit_top_k: 100
    audit_limit_suspicious: 5
    default_language: en
    search_recall_top_k: 200
    rerank_high_threshold: 0.5
    rerank_high_skip_count: 1000
    rebuild_llm_batch_size: 50
    rebuild_min_llm_batches: 10
    rebuild_max_llm_batches: 40
    rebuild_irrelevant_stop_ratio: 0.799
    rebuild_irrel_low_combined_stop_ratio: 0.959
    rebuild_irrelevant_stop_streak: 3
  
881d338b   tangwang   评估框架
102
  # ES Index Settings (基础设置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
103
104
105
  es_settings:
    number_of_shards: 1
    number_of_replicas: 0
432d1c88   tangwang   评估框架
106
    refresh_interval: 30s
881d338b   tangwang   评估框架
107
108
109
110
  
  # 字段权重配置(用于搜索时的字段boost)
  # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。
  # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
33839b37   tangwang   属性值参与搜索:
111
  field_boosts:
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
112
    title: 3.0
1fdab52d   tangwang   This change adjus...
113
114
115
    # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重
    qanchors: 1.0
    enriched_tags: 1.0
483a05d9   tangwang   文本搜索权重调整(qanchors...
116
    enriched_attributes.value: 1.5
3abbc95a   tangwang   重构(scripts): 整理sc...
117
    # enriched_taxonomy_attributes.value: 0.3
69881ecb   tangwang   相关性调参、enrich内容解析优化
118
119
    category_name_text: 2.0
    category_path: 2.0
ccbdf870   tangwang   enriched_attribut...
120
121
122
123
124
125
126
127
    keywords: 2.0
    tags: 2.0
    option1_values: 1.7
    option2_values: 1.7
    option3_values: 1.7
    brief: 1.0
    description: 1.0
    vendor: 1.0
881d338b   tangwang   评估框架
128
129
  
  # Query Configuration(查询配置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
130
  query_config:
881d338b   tangwang   评估框架
131
    # 支持的语言
4d824a77   tangwang   所有租户共用一套统一配置.tena...
132
    supported_languages:
432d1c88   tangwang   评估框架
133
134
135
    - zh
    - en
    default_language: en
881d338b   tangwang   评估框架
136
137
  
    # 功能开关(翻译开关由tenant_config控制)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
138
139
    enable_text_embedding: true
    enable_query_rewrite: true
881d338b   tangwang   评估框架
140
141
142
143
144
  
    # 查询翻译模型(须与 services.translation.capabilities 中某项一致)
    # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
    zh_to_en_model: nllb-200-distilled-600m  # "opus-mt-zh-en"
    en_to_zh_model: nllb-200-distilled-600m  # "opus-mt-en-zh"
432d1c88   tangwang   评估框架
145
    default_translation_model: nllb-200-distilled-600m
881d338b   tangwang   评估框架
146
147
148
149
    # zh_to_en_model: deepl
    # en_to_zh_model: deepl
    # default_translation_model: deepl
    # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
432d1c88   tangwang   评估框架
150
151
152
    zh_to_en_model__source_not_in_index: nllb-200-distilled-600m
    en_to_zh_model__source_not_in_index: nllb-200-distilled-600m
    default_translation_model__source_not_in_index: nllb-200-distilled-600m
881d338b   tangwang   评估框架
153
154
155
156
157
158
    # zh_to_en_model__source_not_in_index: deepl
    # en_to_zh_model__source_not_in_index: deepl
    # default_translation_model__source_not_in_index: deepl
  
    # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
    # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
310bb3bc   tangwang   eval tools
159
160
    translation_embedding_wait_budget_ms_source_in_index: 300  # 80
    translation_embedding_wait_budget_ms_source_not_in_index: 400  # 200
cda1cd62   tangwang   意图分析&应用 baseline
161
162
    style_intent:
      enabled: true
87cacb1b   tangwang   融合公式优化。加入意图匹配因子
163
      selected_sku_boost: 1.2
432d1c88   tangwang   评估框架
164
165
      color_dictionary_path: config/dictionaries/style_intent_color.csv
      size_dictionary_path: config/dictionaries/style_intent_size.csv
cda1cd62   tangwang   意图分析&应用 baseline
166
      dimension_aliases:
432d1c88   tangwang   评估框架
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        color:
        - color
        - colors
        - colour
        - colours
        - 颜色
        - 
        - 色系
        size:
        - size
        - sizes
        - sizing
        - 尺码
        - 尺寸
        - 码数
        - 号码
        - 
74fdf9bd   tangwang   1.
184
185
    product_title_exclusion:
      enabled: true
432d1c88   tangwang   评估框架
186
      dictionary_path: config/dictionaries/product_title_exclusion.tsv
881d338b   tangwang   评估框架
187
188
189
190
  
    # 动态多语言检索字段配置
    # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
    # shared_fields 为无语言后缀字段。
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
191
192
    search_fields:
      multilingual_fields:
432d1c88   tangwang   评估框架
193
      - title
331861d5   tangwang   eval框架配置化
194
      - keywords
ccbdf870   tangwang   enriched_attribut...
195
      - qanchors
1c2ba48e   tangwang   eval tagger
196
      - enriched_tags
ccbdf870   tangwang   enriched_attribut...
197
      - enriched_attributes.value
3abbc95a   tangwang   重构(scripts): 整理sc...
198
      # - enriched_taxonomy_attributes.value
331861d5   tangwang   eval框架配置化
199
200
201
      - option1_values
      - option2_values
      - option3_values
432d1c88   tangwang   评估框架
202
203
      - category_path
      - category_name_text
ccbdf870   tangwang   enriched_attribut...
204
205
206
      # - brief
      # - description
      # - vendor
881d338b   tangwang   评估框架
207
      # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values
432d1c88   tangwang   评估框架
208
      shared_fields: null
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
209
      core_multilingual_fields:
432d1c88   tangwang   评估框架
210
211
212
      - title
      - qanchors
      - category_name_text
881d338b   tangwang   评估框架
213
214
  
    # 统一文本召回策略(主查询 + 翻译查询)
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
215
    text_query_strategy:
432d1c88   tangwang   评估框架
216
217
      base_minimum_should_match: 60%
      translation_minimum_should_match: 60%
69881ecb   tangwang   相关性调参、enrich内容解析优化
218
219
      translation_boost: 0.75
      tie_breaker_base_query: 0.5
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
220
221
222
223
224
225
226
227
228
      best_fields_boost: 2.0
      best_fields:
        title: 4.0
        qanchors: 3.0
        category_name_text: 2.0
      phrase_fields:
        title: 5.0
        qanchors: 4.0
      phrase_match_boost: 3.0
881d338b   tangwang   评估框架
229
230
  
    # Embedding字段名称
432d1c88   tangwang   评估框架
231
232
    text_embedding_field: title_embedding
    image_embedding_field: image_embedding.vector
881d338b   tangwang   评估框架
233
234
235
236
  
    # 返回字段配置(_source includes)
    # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
    # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
a7cc9078   tangwang   sku排序
237
    source_fields:
432d1c88   tangwang   评估框架
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    - spu_id
    - handle
    - title
    - brief
    - description
    - vendor
    - category_name
    - category_name_text
    - category_path
    - category_id
    - category_level
    - category1_name
    - category2_name
    - category3_name
ccbdf870   tangwang   enriched_attribut...
252
253
254
255
256
    # - tags
    # - keywords
    # - qanchors
    # - enriched_tags
    # - enriched_attributes
3abbc95a   tangwang   重构(scripts): 整理sc...
257
    # - # enriched_taxonomy_attributes.value
432d1c88   tangwang   评估框架
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
    - min_price
    - compare_at_price
    - image_url
    - sku_prices
    - sku_weights
    - sku_weight_units
    - total_inventory
    - option1_name
    - option1_values
    - option2_name
    - option2_values
    - option3_name
    - option3_values
    - specifications
    - skus
881d338b   tangwang   评估框架
273
274
  
    # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
275
276
    knn_text_boost: 4
    knn_image_boost: 4
881d338b   tangwang   评估框架
277
278
  
    # knn_text_num_candidates = k * 3.4
de98daa3   tangwang   多模态召回优化
279
280
    knn_text_k: 160
    knn_text_num_candidates: 560
de98daa3   tangwang   多模态召回优化
281
282
    knn_text_k_long: 400
    knn_text_num_candidates_long: 1200
de98daa3   tangwang   多模态召回优化
283
284
    knn_image_k: 400
    knn_image_num_candidates: 1200
881d338b   tangwang   评估框架
285
286
  
  # Function Score配置(ES层打分规则)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
287
  function_score:
432d1c88   tangwang   评估框架
288
289
    score_mode: sum
    boost_mode: multiply
4d824a77   tangwang   所有租户共用一套统一配置.tena...
290
    functions: []
881d338b   tangwang   评估框架
291
292
  
  # 粗排配置(仅融合 ES 文本/向量信号,不调用模型)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
293
294
295
296
297
  coarse_rank:
    enabled: true
    input_window: 700
    output_window: 240
    fusion:
4d000c94   tangwang   融合公式调参
298
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
299
      es_exponent: 0.05
8c8b9d84   tangwang   ES 拉取 coarse_rank...
300
301
      text_bias: 0.1
      text_exponent: 0.35
881d338b   tangwang   评估框架
302
303
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
      # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣
de98daa3   tangwang   多模态召回优化
304
      text_translation_weight: 1.0
8c8b9d84   tangwang   ES 拉取 coarse_rank...
305
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
306
307
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
8c8b9d84   tangwang   ES 拉取 coarse_rank...
308
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
309
      knn_exponent: 0.4
881d338b   tangwang   评估框架
310
311
  
  # 精排配置(轻量 reranker)
8c8b9d84   tangwang   ES 拉取 coarse_rank...
312
  fine_rank:
418b6a4a   tangwang   调参
313
314
    enabled: false
    input_window: 160
8c8b9d84   tangwang   ES 拉取 coarse_rank...
315
316
    output_window: 80
    timeout_sec: 10.0
432d1c88   tangwang   评估框架
317
318
319
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: fine
881d338b   tangwang   评估框架
320
321
  
  # 重排配置(provider/URL 在 services.rerank)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
322
  rerank:
5f7d7f09   tangwang   性能测试报告.md
323
    enabled: true
418b6a4a   tangwang   调参
324
    rerank_window: 160
42e3aea6   tangwang   tidy
325
    timeout_sec: 15.0
506c39b7   tangwang   feat(search): 统一重...
326
327
    weight_es: 0.4
    weight_ai: 0.6
432d1c88   tangwang   评估框架
328
329
330
    rerank_query_template: '{query}'
    rerank_doc_template: '{title}'
    service_profile: default
881d338b   tangwang   评估框架
331
  
9df421ed   tangwang   基于eval框架开始调参
332
    # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn)
881d338b   tangwang   评估框架
333
334
335
    # 其中 knn_score 先做一层 dis_max:
    #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
    #   + knn_tie_breaker * 另一侧较弱信号
814e352b   tangwang   乘法公式配置化
336
    fusion:
4d000c94   tangwang   融合公式调参
337
      es_bias: 10.0
9df421ed   tangwang   基于eval框架开始调参
338
      es_exponent: 0.05
4d000c94   tangwang   融合公式调参
339
      rerank_bias: 0.1
432d1c88   tangwang   评估框架
340
      rerank_exponent: 1.15
4d000c94   tangwang   融合公式调参
341
      fine_bias: 0.1
8c8b9d84   tangwang   ES 拉取 coarse_rank...
342
      fine_exponent: 1.0
814e352b   tangwang   乘法公式配置化
343
      text_bias: 0.1
432d1c88   tangwang   评估框架
344
      text_exponent: 0.25
881d338b   tangwang   评估框架
345
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
432d1c88   tangwang   评估框架
346
      text_translation_weight: 0.8
24edc208   tangwang   修改_extract_combin...
347
      knn_text_weight: 1.0
4d000c94   tangwang   融合公式调参
348
349
      knn_image_weight: 2.0
      knn_tie_breaker: 0.3
814e352b   tangwang   乘法公式配置化
350
      knn_bias: 0.6
4d000c94   tangwang   融合公式调参
351
      knn_exponent: 0.4
881d338b   tangwang   评估框架
352
353
  
  # 可扩展服务/provider 注册表(单一配置源)
42e3aea6   tangwang   tidy
354
355
  services:
    translation:
432d1c88   tangwang   评估框架
356
      service_url: http://127.0.0.1:6006
881d338b   tangwang   评估框架
357
      # default_model: nllb-200-distilled-600m
432d1c88   tangwang   评估框架
358
359
      default_model: nllb-200-distilled-600m
      default_scene: general
42e3aea6   tangwang   tidy
360
      timeout_sec: 10.0
d4cadc13   tangwang   翻译重构
361
      cache:
d4cadc13   tangwang   翻译重构
362
363
        ttl_seconds: 62208000
        sliding_expiration: true
881d338b   tangwang   评估框架
364
        # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
8140e942   tangwang   translator model ...
365
        enable_model_quality_tier_cache: true
881d338b   tangwang   评估框架
366
367
        # Higher tier = better quality. Multiple models may share one tier (同级).
        # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
8140e942   tangwang   translator model ...
368
369
370
371
372
373
374
        model_quality_tiers:
          deepl: 30
          qwen-mt: 30
          llm: 30
          nllb-200-distilled-600m: 20
          opus-mt-zh-en: 10
          opus-mt-en-zh: 10
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
375
      capabilities:
d4cadc13   tangwang   翻译重构
376
        qwen-mt:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
377
          enabled: true
432d1c88   tangwang   评估框架
378
379
380
          backend: qwen_mt
          model: qwen-mt-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
42e3aea6   tangwang   tidy
381
          timeout_sec: 10.0
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
382
          use_cache: true
a0a173ae   tangwang   last
383
        llm:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
384
          enabled: true
432d1c88   tangwang   评估框架
385
386
387
          backend: llm
          model: qwen-flash
          base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
a0a173ae   tangwang   last
388
          timeout_sec: 30.0
cd4ce66d   tangwang   trans logs
389
          use_cache: true
d4cadc13   tangwang   翻译重构
390
        deepl:
cd4ce66d   tangwang   trans logs
391
          enabled: true
432d1c88   tangwang   评估框架
392
393
          backend: deepl
          api_url: https://api.deepl.com/v2/translate
d4cadc13   tangwang   翻译重构
394
          timeout_sec: 10.0
432d1c88   tangwang   评估框架
395
          glossary_id: ''
cd4ce66d   tangwang   trans logs
396
          use_cache: true
0fd2f875   tangwang   translate
397
        nllb-200-distilled-600m:
93be98cb   tangwang   清理过时的文档
398
          enabled: true
432d1c88   tangwang   评估框架
399
400
401
402
403
404
          backend: local_nllb
          model_id: facebook/nllb-200-distilled-600M
          model_dir: ./models/translation/facebook/nllb-200-distilled-600M
          ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
405
          ct2_auto_convert: true
46ce858d   tangwang   在NLLB模型的 /data/sa...
406
          ct2_inter_threads: 4
ea293660   tangwang   CTranslate2
407
          ct2_intra_threads: 0
46ce858d   tangwang   在NLLB模型的 /data/sa...
408
          ct2_max_queued_batches: 32
432d1c88   tangwang   评估框架
409
410
          ct2_batch_type: examples
          ct2_decoding_length_mode: source
46ce858d   tangwang   在NLLB模型的 /data/sa...
411
412
          ct2_decoding_length_extra: 8
          ct2_decoding_length_min: 32
432d1c88   tangwang   评估框架
413
414
          device: cuda
          torch_dtype: float16
4747e2f4   tangwang   embedding perform...
415
          batch_size: 64
0fd2f875   tangwang   translate
416
          max_input_length: 256
3eff49b7   tangwang   trans nllb-200-di...
417
          max_new_tokens: 64
0fd2f875   tangwang   translate
418
          num_beams: 1
cd4ce66d   tangwang   trans logs
419
          use_cache: true
0fd2f875   tangwang   translate
420
        opus-mt-zh-en:
f86c5fee   tangwang   reranker性能参数脚本放在:...
421
          enabled: false
432d1c88   tangwang   评估框架
422
423
424
425
426
427
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-zh-en
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
428
429
430
431
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
432
433
434
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
435
436
437
438
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
439
          use_cache: true
0fd2f875   tangwang   translate
440
        opus-mt-en-zh:
f86c5fee   tangwang   reranker性能参数脚本放在:...
441
          enabled: false
432d1c88   tangwang   评估框架
442
443
444
445
446
447
          backend: local_marian
          model_id: Helsinki-NLP/opus-mt-en-zh
          model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh
          ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16
          ct2_compute_type: float16
          ct2_conversion_quantization: float16
ea293660   tangwang   CTranslate2
448
449
450
451
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
432d1c88   tangwang   评估框架
452
453
454
          ct2_batch_type: examples
          device: cuda
          torch_dtype: float16
0fd2f875   tangwang   translate
455
456
457
458
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
459
          use_cache: true
42e3aea6   tangwang   tidy
460
    embedding:
881d338b   tangwang   评估框架
461
      provider: http  # http
42e3aea6   tangwang   tidy
462
463
      providers:
        http:
432d1c88   tangwang   评估框架
464
465
          text_base_url: http://127.0.0.1:6005
          image_base_url: http://127.0.0.1:6008
881d338b   tangwang   评估框架
466
467
      # 服务内文本后端(embedding 进程启动时读取)
      backend: tei  # tei | local_st
07cf5a93   tangwang   START_EMBEDDING=...
468
469
      backends:
        tei:
432d1c88   tangwang   评估框架
470
          base_url: http://127.0.0.1:8080
efd435cf   tangwang   tei性能调优:
471
          timeout_sec: 20
432d1c88   tangwang   评估框架
472
          model_id: Qwen/Qwen3-Embedding-0.6B
07cf5a93   tangwang   START_EMBEDDING=...
473
        local_st:
432d1c88   tangwang   评估框架
474
475
          model_id: Qwen/Qwen3-Embedding-0.6B
          device: cuda
07cf5a93   tangwang   START_EMBEDDING=...
476
477
          batch_size: 32
          normalize_embeddings: true
881d338b   tangwang   评估框架
478
479
480
481
      # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name)
      # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中
      # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。
      image_backend: clip_as_service  # clip_as_service | local_cnclip
86d8358b   tangwang   config optimize
482
483
      image_backends:
        clip_as_service:
432d1c88   tangwang   评估框架
484
485
          server: grpc://127.0.0.1:51000
          model_name: CN-CLIP/ViT-L-14
86d8358b   tangwang   config optimize
486
487
488
          batch_size: 8
          normalize_embeddings: true
        local_cnclip:
432d1c88   tangwang   评估框架
489
          model_name: ViT-L-14
86d8358b   tangwang   config optimize
490
491
492
          device: null
          batch_size: 8
          normalize_embeddings: true
42e3aea6   tangwang   tidy
493
    rerank:
432d1c88   tangwang   评估框架
494
      provider: http
42e3aea6   tangwang   tidy
495
496
      providers:
        http:
daa2690b   tangwang   漏斗参数调优&呈现优化
497
498
          instances:
            default:
432d1c88   tangwang   评估框架
499
500
              base_url: http://127.0.0.1:6007
              service_url: http://127.0.0.1:6007/rerank
daa2690b   tangwang   漏斗参数调优&呈现优化
501
            fine:
432d1c88   tangwang   评估框架
502
503
              base_url: http://127.0.0.1:6009
              service_url: http://127.0.0.1:6009/rerank
86d8358b   tangwang   config optimize
504
505
506
      request:
        max_docs: 1000
        normalize: true
432d1c88   tangwang   评估框架
507
      default_instance: default
881d338b   tangwang   评估框架
508
      # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
daa2690b   tangwang   漏斗参数调优&呈现优化
509
510
      instances:
        default:
432d1c88   tangwang   评估框架
511
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
512
          port: 6007
432d1c88   tangwang   评估框架
513
514
          backend: qwen3_vllm_score
          runtime_dir: ./.runtime/reranker/default
daa2690b   tangwang   漏斗参数调优&呈现优化
515
        fine:
432d1c88   tangwang   评估框架
516
          host: 0.0.0.0
daa2690b   tangwang   漏斗参数调优&呈现优化
517
          port: 6009
432d1c88   tangwang   评估框架
518
519
          backend: bge
          runtime_dir: ./.runtime/reranker/fine
701ae503   tangwang   docs
520
521
      backends:
        bge:
432d1c88   tangwang   评估框架
522
          model_name: BAAI/bge-reranker-v2-m3
701ae503   tangwang   docs
523
524
          device: null
          use_fp16: true
418b6a4a   tangwang   调参
525
          batch_size: 80
00c8ddb9   tangwang   suggest rank opti...
526
          max_length: 160
432d1c88   tangwang   评估框架
527
          cache_dir: ./model_cache
701ae503   tangwang   docs
528
          enable_warmup: true
971a0851   tangwang   补充reranker-jina,探...
529
        jina_reranker_v3:
432d1c88   tangwang   评估框架
530
          model_name: jinaai/jina-reranker-v3
971a0851   tangwang   补充reranker-jina,探...
531
          device: null
432d1c88   tangwang   评估框架
532
          dtype: float16
971a0851   tangwang   补充reranker-jina,探...
533
          batch_size: 64
74116f05   tangwang   jina-reranker-v3性...
534
535
536
          max_doc_length: 160
          max_query_length: 64
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
537
          cache_dir: ./model_cache
971a0851   tangwang   补充reranker-jina,探...
538
          trust_remote_code: true
701ae503   tangwang   docs
539
        qwen3_vllm:
432d1c88   tangwang   评估框架
540
541
          model_name: Qwen/Qwen3-Reranker-0.6B
          engine: vllm
b0972ff9   tangwang   qwen3_vllm_score ...
542
          max_model_len: 256
701ae503   tangwang   docs
543
          tensor_parallel_size: 1
432d1c88   tangwang   评估框架
544
545
          gpu_memory_utilization: 0.2
          dtype: float16
bc089b43   tangwang   refactor(reranker...
546
547
          enable_prefix_caching: true
          enforce_eager: false
00c8ddb9   tangwang   suggest rank opti...
548
          infer_batch_size: 100
9f5994b4   tangwang   reranker
549
          sort_by_doc_length: true
881d338b   tangwang   评估框架
550
551
552
553
554
555
556
557
558
559
560
561
          # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
          instruction_format: standard  # compact standard
          # instruction: "Given a query, score the product for relevance"
          # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
          # instruction: "rank products by given query, category match first"
          # instruction: "Rank products by query relevance, prioritizing category match"
          # instruction: "Rank products by query relevance, prioritizing category and style match"
          # instruction: "Rank by query relevance, prioritize category & style"
          # instruction: "Relevance ranking: category & style match first"
          # instruction: "Score product relevance by query with category & style match prioritized"
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
432d1c88   tangwang   评估框架
562
          instruction: rank products by given query
881d338b   tangwang   评估框架
563
564
        # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
        # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
9de5ef49   tangwang   qwen3_vllm_score ...
565
        qwen3_vllm_score:
432d1c88   tangwang   评估框架
566
          model_name: Qwen/Qwen3-Reranker-0.6B
881d338b   tangwang   评估框架
567
          # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
9de5ef49   tangwang   qwen3_vllm_score ...
568
          use_original_qwen3_hf_overrides: true
881d338b   tangwang   评估框架
569
570
571
572
          # vllm_runner: "auto"
          # vllm_convert: "auto"
          # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
          # hf_overrides: {}
432d1c88   tangwang   评估框架
573
          engine: vllm
f86c5fee   tangwang   reranker性能参数脚本放在:...
574
          max_model_len: 172
9de5ef49   tangwang   qwen3_vllm_score ...
575
          tensor_parallel_size: 1
c3425429   tangwang   在以下文件中完成精排/融合清理工作...
576
          gpu_memory_utilization: 0.15
432d1c88   tangwang   评估框架
577
          dtype: float16
9de5ef49   tangwang   qwen3_vllm_score ...
578
579
          enable_prefix_caching: true
          enforce_eager: false
3b35f139   tangwang   search evalution
580
          infer_batch_size: 80
9de5ef49   tangwang   qwen3_vllm_score ...
581
          sort_by_doc_length: true
881d338b   tangwang   评估框架
582
583
584
585
          # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
          instruction_format: standard  # compact standard
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a shopping query, rank products by relevance"
432d1c88   tangwang   评估框架
586
          instruction: Rank products by query with category & style match prioritized
d31c7f65   tangwang   补充云服务reranker
587
        qwen3_transformers:
432d1c88   tangwang   评估框架
588
589
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: rank products by given query
881d338b   tangwang   评估框架
590
          # instruction: "Score the product’s relevance to the given query"
d31c7f65   tangwang   补充云服务reranker
591
592
593
          max_length: 8192
          batch_size: 64
          use_fp16: true
881d338b   tangwang   评估框架
594
          # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2
432d1c88   tangwang   评估框架
595
          attn_implementation: sdpa
881d338b   tangwang   评估框架
596
597
598
        # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
        # For 1 query + many short docs (for example 400 product titles), this usually reduces
        # repeated prefix work and padding waste compared with pairwise batching.
4823f463   tangwang   qwen3_vllm_score ...
599
        qwen3_transformers_packed:
432d1c88   tangwang   评估框架
600
601
          model_name: Qwen/Qwen3-Reranker-0.6B
          instruction: Rank products by query with category & style match prioritized
b0972ff9   tangwang   qwen3_vllm_score ...
602
          max_model_len: 256
4823f463   tangwang   qwen3_vllm_score ...
603
604
605
606
          max_doc_len: 160
          max_docs_per_pack: 0
          use_fp16: true
          sort_by_doc_length: true
881d338b   tangwang   评估框架
607
608
          # Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
          # If your torch/transformers stack validates it, you can benchmark "sdpa".
432d1c88   tangwang   评估框架
609
          attn_implementation: eager
3d508beb   tangwang   reranker-4b-gguf
610
        qwen3_gguf:
432d1c88   tangwang   评估框架
611
612
613
614
615
          repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
          filename: '*Q8_0.gguf'
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-4b-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
616
          # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快
5c21a485   tangwang   qwen3-reranker-0....
617
618
619
620
          n_ctx: 512
          n_batch: 512
          n_ubatch: 512
          n_gpu_layers: 999
3d508beb   tangwang   reranker-4b-gguf
621
622
623
624
625
626
627
628
629
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 8
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
630
          length_sort_mode: char
3d508beb   tangwang   reranker-4b-gguf
631
632
          enable_warmup: true
          verbose: false
5c21a485   tangwang   qwen3-reranker-0....
633
        qwen3_gguf_06b:
432d1c88   tangwang   评估框架
634
635
636
637
638
          repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF
          filename: qwen3-reranker-0.6b-q8_0.gguf
          cache_dir: ./model_cache
          local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
          instruction: Rank products by query with category & style match prioritized
881d338b   tangwang   评估框架
639
640
          # 0.6B GGUF / online rerank baseline:
          # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。
5c21a485   tangwang   qwen3-reranker-0....
641
642
643
644
645
646
647
648
649
650
651
652
653
          n_ctx: 256
          n_batch: 256
          n_ubatch: 256
          n_gpu_layers: 999
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 32
          sort_by_doc_length: true
432d1c88   tangwang   评估框架
654
          length_sort_mode: char
5c21a485   tangwang   qwen3-reranker-0....
655
656
657
          reuse_query_state: false
          enable_warmup: true
          verbose: false
d31c7f65   tangwang   补充云服务reranker
658
        dashscope_rerank:
432d1c88   tangwang   评估框架
659
          model_name: qwen3-rerank
881d338b   tangwang   评估框架
660
661
662
663
          # 按地域选择 endpoint:
          # 中国:   https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
          # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
432d1c88   tangwang   评估框架
664
665
666
          endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          api_key_env: RERANK_DASHSCOPE_API_KEY_CN
          timeout_sec: 10.0
881d338b   tangwang   评估框架
667
668
          top_n_cap: 0   # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
          batchsize: 64  # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
432d1c88   tangwang   评估框架
669
          instruct: Given a shopping query, rank product titles by relevance
d31c7f65   tangwang   补充云服务reranker
670
671
          max_retries: 2
          retry_backoff_sec: 0.2
881d338b   tangwang   评估框架
672
673
  
  # SPU配置(已启用,使用嵌套skus)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
674
675
  spu_config:
    enabled: true
432d1c88   tangwang   评估框架
676
    spu_field: spu_id
4d824a77   tangwang   所有租户共用一套统一配置.tena...
677
    inner_hits_size: 10
881d338b   tangwang   评估框架
678
679
    # 配置哪些option维度参与检索(进索引、以及在线搜索)
    # 格式为list,选择option1/option2/option3中的一个或多个
432d1c88   tangwang   评估框架
680
681
682
683
    searchable_option_dimensions:
    - option1
    - option2
    - option3
881d338b   tangwang   评估框架
684
685
686
687
  
  # 租户配置(Tenant Configuration)
  # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
0064e946   tangwang   feat: 增量索引服务、租户配置...
688
  tenant_config:
0064e946   tangwang   feat: 增量索引服务、租户配置...
689
    default:
432d1c88   tangwang   评估框架
690
691
692
693
      primary_language: en
      index_languages:
      - en
      - zh
0064e946   tangwang   feat: 增量索引服务、租户配置...
694
    tenants:
432d1c88   tangwang   评估框架
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
      '1':
        primary_language: zh
        index_languages:
        - zh
        - en
      '2':
        primary_language: en
        index_languages:
        - en
        - zh
      '3':
        primary_language: zh
        index_languages:
        - zh
        - en
      '162':
        primary_language: zh
        index_languages:
        - zh
        - en
      '170':
        primary_language: en
        index_languages:
        - en
        - zh