Blame view

config/config.yaml 22 KB
4d824a77   tangwang   所有租户共用一套统一配置.tena...
1
  # Unified Configuration for Multi-Tenant Search Engine
33839b37   tangwang   属性值参与搜索:
2
3
  # 统一配置文件,所有租户共用一套配置
  # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为
86d0e83d   tangwang   query翻译,根据源语言是否在索...
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
  #
  # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项
  #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。
  
  # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义)
  runtime:
    environment: "prod"
    index_namespace: ""
    api_host: "0.0.0.0"
    api_port: 6002
    indexer_host: "0.0.0.0"
    indexer_port: 6004
    embedding_host: "0.0.0.0"
    embedding_port: 6005
    embedding_text_port: 6005
    embedding_image_port: 6008
cda1cd62   tangwang   意图分析&应用 baseline
20
    translator_host: "0.0.0.0"
86d0e83d   tangwang   query翻译,根据源语言是否在索...
21
    translator_port: 6006
cda1cd62   tangwang   意图分析&应用 baseline
22
    reranker_host: "0.0.0.0"
86d0e83d   tangwang   query翻译,根据源语言是否在索...
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
    reranker_port: 6007
  
  # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY)
  infrastructure:
    elasticsearch:
      host: "http://localhost:9200"
      username: null
      password: null
    redis:
      host: "localhost"
      port: 6479
      snapshot_db: 0
      password: null
      socket_timeout: 1
      socket_connect_timeout: 1
      retry_on_timeout: false
      cache_expire_days: 720
      embedding_cache_prefix: "embedding"
      anchor_cache_prefix: "product_anchors"
      anchor_cache_expire_days: 30
    database:
      host: null
      port: 3306
      database: null
      username: null
      password: null
    secrets:
      dashscope_api_key: null
      deepl_auth_key: null
4d824a77   tangwang   所有租户共用一套统一配置.tena...
52
53
54
55
  
  # Elasticsearch Index
  es_index_name: "search_products"
  
86d0e83d   tangwang   query翻译,根据源语言是否在索...
56
57
58
  # 检索域 / 索引列表(可为空列表;每项字段均需显式给出)
  indexes: []
  
86d8358b   tangwang   config optimize
59
60
61
62
  # Config assets
  assets:
    query_rewrite_dictionary_path: "config/dictionaries/query_rewrite.dict"
  
41f0b2e9   tangwang   product_enrich支持并发
63
64
65
66
  # Product content understanding (LLM enrich-content) configuration
  product_enrich:
    max_workers: 40
  
33839b37   tangwang   属性值参与搜索:
67
  # ES Index Settings (基础设置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
68
69
70
71
72
  es_settings:
    number_of_shards: 1
    number_of_replicas: 0
    refresh_interval: "30s"
  
33839b37   tangwang   属性值参与搜索:
73
  # 字段权重配置(用于搜索时的字段boost)
0536222c   tangwang   query parser优化
74
  # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
75
  # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。
33839b37   tangwang   属性值参与搜索:
76
  field_boosts:
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
77
    title: 3.0
69881ecb   tangwang   相关性调参、enrich内容解析优化
78
79
80
81
    qanchors: 2.5
    tags: 2.0
    category_name_text: 2.0
    category_path: 2.0
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
82
    brief: 1.5
69881ecb   tangwang   相关性调参、enrich内容解析优化
83
    description: 1.5
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
84
    vendor: 1.5
69881ecb   tangwang   相关性调参、enrich内容解析优化
85
86
87
    option1_values: 1.5
    option2_values: 1.5
    option3_values: 1.5
33839b37   tangwang   属性值参与搜索:
88
  
33839b37   tangwang   属性值参与搜索:
89
  # Query Configuration(查询配置)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
90
  query_config:
33839b37   tangwang   属性值参与搜索:
91
    # 支持的语言
4d824a77   tangwang   所有租户共用一套统一配置.tena...
92
93
94
    supported_languages:
      - "zh"
      - "en"
2739b281   tangwang   多语言索引调整
95
    default_language: "en"
33839b37   tangwang   属性值参与搜索:
96
    
345d960b   tangwang   1. 删除全局 enable_tr...
97
    # 功能开关(翻译开关由tenant_config控制)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
98
99
    enable_text_embedding: true
    enable_query_rewrite: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
100
  
86d0e83d   tangwang   query翻译,根据源语言是否在索...
101
102
    # 查询翻译模型(须与 services.translation.capabilities 中某项一致)
    # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
103
104
105
106
107
108
    zh_to_en_model: "nllb-200-distilled-600m" #  "opus-mt-zh-en"
    en_to_zh_model: "nllb-200-distilled-600m" #  "opus-mt-en-zh"
    default_translation_model: "nllb-200-distilled-600m"
    # zh_to_en_model: "deepl"
    # en_to_zh_model: "deepl"
    # default_translation_model: "deepl"
86d0e83d   tangwang   query翻译,根据源语言是否在索...
109
    # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同)
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
110
111
112
113
114
115
    zh_to_en_model__source_not_in_index: "nllb-200-distilled-600m"
    en_to_zh_model__source_not_in_index: "nllb-200-distilled-600m"
    default_translation_model__source_not_in_index: "nllb-200-distilled-600m"
    # zh_to_en_model__source_not_in_index: "deepl"
    # en_to_zh_model__source_not_in_index: "deepl"
    # default_translation_model__source_not_in_index: "deepl"
86d0e83d   tangwang   query翻译,根据源语言是否在索...
116
  
1556989b   tangwang   query翻译等待超时逻辑
117
118
    # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。
    # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。
86d0e83d   tangwang   query翻译,根据源语言是否在索...
119
    translation_embedding_wait_budget_ms_source_in_index: 500 # 80
837d5d76   tangwang   sku筛选匹配规则优化,按 tok...
120
    translation_embedding_wait_budget_ms_source_not_in_index: 700 #200
1556989b   tangwang   query翻译等待超时逻辑
121
  
cda1cd62   tangwang   意图分析&应用 baseline
122
123
    style_intent:
      enabled: true
87cacb1b   tangwang   融合公式优化。加入意图匹配因子
124
      selected_sku_boost: 1.2
cda1cd62   tangwang   意图分析&应用 baseline
125
126
127
128
129
130
      color_dictionary_path: "config/dictionaries/style_intent_color.csv"
      size_dictionary_path: "config/dictionaries/style_intent_size.csv"
      dimension_aliases:
        color: ["color", "colors", "colour", "colours", "颜色", "色", "色系"]
        size: ["size", "sizes", "sizing", "尺码", "尺寸", "码数", "号码", "码"]
  
74fdf9bd   tangwang   1.
131
132
133
134
    product_title_exclusion:
      enabled: true
      dictionary_path: "config/dictionaries/product_title_exclusion.tsv"
  
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
135
136
137
138
139
140
    # 动态多语言检索字段配置
    # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式;
    # shared_fields 为无语言后缀字段。
    search_fields:
      multilingual_fields:
        - "title"
69881ecb   tangwang   相关性调参、enrich内容解析优化
141
        - "qanchors"
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
142
143
        - "category_path"
        - "category_name_text"
69881ecb   tangwang   相关性调参、enrich内容解析优化
144
145
        - "brief"
        - "description"
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
146
        - "vendor"
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
147
      shared_fields:
445496cd   tangwang   fix last up: 每个翻译...
148
149
150
151
        # - "tags"
        # - "option1_values"
        # - "option2_values"
        # - "option3_values"
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
152
153
      core_multilingual_fields:
        - "title"
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
154
        - "qanchors"
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
155
156
        - "category_name_text"
  
0536222c   tangwang   query parser优化
157
    # 统一文本召回策略(主查询 + 翻译查询)
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
158
    text_query_strategy:
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
159
160
      base_minimum_should_match: "60%"
      translation_minimum_should_match: "60%"
69881ecb   tangwang   相关性调参、enrich内容解析优化
161
162
      translation_boost: 0.75
      tie_breaker_base_query: 0.5
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
163
164
165
166
167
168
169
170
171
      best_fields_boost: 2.0
      best_fields:
        title: 4.0
        qanchors: 3.0
        category_name_text: 2.0
      phrase_fields:
        title: 5.0
        qanchors: 4.0
      phrase_match_boost: 3.0
bd96cead   tangwang   1. 动态多语言字段与统一策略配置
172
  
33839b37   tangwang   属性值参与搜索:
173
174
    # Embedding字段名称
    text_embedding_field: "title_embedding"
24edc208   tangwang   修改_extract_combin...
175
    image_embedding_field: "image_embedding.vector"
325eec03   tangwang   1. 日志、配置基础设施,使用优化
176
  
33839b37   tangwang   属性值参与搜索:
177
178
    # 返回字段配置(_source includes)
    # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
a7cc9078   tangwang   sku排序
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
    # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致
    source_fields:
      - spu_id
      - handle
      - title
      - brief
      - description
      - vendor
      - category_name
      - category_name_text
      - category_path
      - category_id
      - category_level
      - category1_name
      - category2_name
      - category3_name
      - tags
      - min_price
      - compare_at_price
      - image_url
      - sku_prices
      - sku_weights
      - sku_weight_units
      - total_inventory
      - option1_name
      - option1_values
dad3c867   tangwang   configs
205
206
207
208
      - option2_name
      - option2_values
      - option3_name
      - option3_values
a7cc9078   tangwang   sku排序
209
210
      - specifications
      - skus
70dab99f   tangwang   add logs
211
    
ed13851c   tangwang   图片文本两个knn召回相关参数配置
212
    # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
213
214
    knn_text_boost: 4
    knn_image_boost: 4
ed13851c   tangwang   图片文本两个knn召回相关参数配置
215
  
de98daa3   tangwang   多模态召回优化
216
217
218
    # knn_text_num_candidates = k * 3.4
    knn_text_k: 160
    knn_text_num_candidates: 560
ed13851c   tangwang   图片文本两个knn召回相关参数配置
219
  
de98daa3   tangwang   多模态召回优化
220
221
    knn_text_k_long: 400
    knn_text_num_candidates_long: 1200
ed13851c   tangwang   图片文本两个knn召回相关参数配置
222
  
de98daa3   tangwang   多模态召回优化
223
224
    knn_image_k: 400
    knn_image_num_candidates: 1200
4d824a77   tangwang   所有租户共用一套统一配置.tena...
225
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
226
227
228
229
  # Function Score配置(ES层打分规则)
  function_score:
    score_mode: "sum"
    boost_mode: "multiply"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
230
231
    functions: []
  
8c8b9d84   tangwang   ES 拉取 coarse_rank...
232
233
234
235
236
237
238
239
  # 粗排配置(仅融合 ES 文本/向量信号,不调用模型)
  coarse_rank:
    enabled: true
    input_window: 700
    output_window: 240
    fusion:
      text_bias: 0.1
      text_exponent: 0.35
de98daa3   tangwang   多模态召回优化
240
241
242
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
      # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣
      text_translation_weight: 1.0
8c8b9d84   tangwang   ES 拉取 coarse_rank...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
      knn_text_weight: 1.0
      knn_image_weight: 1.0
      knn_tie_breaker: 0.1
      knn_bias: 0.6
      knn_exponent: 0.0
  
  # 精排配置(轻量 reranker)
  fine_rank:
    enabled: true
    input_window: 240
    output_window: 80
    timeout_sec: 10.0
    rerank_query_template: "{query}"
    rerank_doc_template: "{title}"
    service_profile: "fine"
  
42e3aea6   tangwang   tidy
259
  # 重排配置(provider/URL 在 services.rerank)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
260
  rerank:
5f7d7f09   tangwang   性能测试报告.md
261
    enabled: true
8c8b9d84   tangwang   ES 拉取 coarse_rank...
262
    rerank_window: 80
42e3aea6   tangwang   tidy
263
    timeout_sec: 15.0
506c39b7   tangwang   feat(search): 统一重...
264
265
    weight_es: 0.4
    weight_ai: 0.6
ff32d894   tangwang   rerank
266
267
    rerank_query_template: "{query}"
    rerank_doc_template: "{title}"
8c8b9d84   tangwang   ES 拉取 coarse_rank...
268
    service_profile: "default"
814e352b   tangwang   乘法公式配置化
269
    # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项)
24edc208   tangwang   修改_extract_combin...
270
271
272
    # 其中 knn_score 先做一层 dis_max:
    #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
    #   + knn_tie_breaker * 另一侧较弱信号
814e352b   tangwang   乘法公式配置化
273
274
275
    fusion:
      rerank_bias: 0.00001
      rerank_exponent: 1.0
8c8b9d84   tangwang   ES 拉取 coarse_rank...
276
277
      fine_bias: 0.00001
      fine_exponent: 1.0
814e352b   tangwang   乘法公式配置化
278
279
      text_bias: 0.1
      text_exponent: 0.35
de98daa3   tangwang   多模态召回优化
280
281
      # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
      text_translation_weight: 1.0
24edc208   tangwang   修改_extract_combin...
282
283
284
      knn_text_weight: 1.0
      knn_image_weight: 1.0
      knn_tie_breaker: 0.1
814e352b   tangwang   乘法公式配置化
285
      knn_bias: 0.6
ceaf6d03   tangwang   召回限定:must条件补充主干词命...
286
      knn_exponent: 0.0
4d824a77   tangwang   所有租户共用一套统一配置.tena...
287
  
42e3aea6   tangwang   tidy
288
289
290
  # 可扩展服务/provider 注册表(单一配置源)
  services:
    translation:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
291
      service_url: "http://127.0.0.1:6006"
f86c5fee   tangwang   reranker性能参数脚本放在:...
292
      # default_model: "nllb-200-distilled-600m"
971a0851   tangwang   补充reranker-jina,探...
293
      default_model: "nllb-200-distilled-600m"
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
294
      default_scene: "general"
42e3aea6   tangwang   tidy
295
      timeout_sec: 10.0
d4cadc13   tangwang   翻译重构
296
      cache:
d4cadc13   tangwang   翻译重构
297
298
        ttl_seconds: 62208000
        sliding_expiration: true
8140e942   tangwang   translator model ...
299
300
301
302
303
304
305
306
307
308
309
        # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups).
        enable_model_quality_tier_cache: true
        # Higher tier = better quality. Multiple models may share one tier (同级).
        # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
        model_quality_tiers:
          deepl: 30
          qwen-mt: 30
          llm: 30
          nllb-200-distilled-600m: 20
          opus-mt-zh-en: 10
          opus-mt-en-zh: 10
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
310
      capabilities:
d4cadc13   tangwang   翻译重构
311
        qwen-mt:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
312
          enabled: true
0fd2f875   tangwang   translate
313
          backend: "qwen_mt"
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
314
          model: "qwen-mt-flash"
0fd2f875   tangwang   translate
315
          base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
42e3aea6   tangwang   tidy
316
          timeout_sec: 10.0
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
317
          use_cache: true
a0a173ae   tangwang   last
318
        llm:
5e4dc8e4   tangwang   翻译架构按“一个翻译服务 +
319
          enabled: true
0fd2f875   tangwang   translate
320
          backend: "llm"
a0a173ae   tangwang   last
321
          model: "qwen-flash"
0fd2f875   tangwang   translate
322
          base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
a0a173ae   tangwang   last
323
          timeout_sec: 30.0
cd4ce66d   tangwang   trans logs
324
          use_cache: true
d4cadc13   tangwang   翻译重构
325
        deepl:
cd4ce66d   tangwang   trans logs
326
          enabled: true
0fd2f875   tangwang   translate
327
328
          backend: "deepl"
          api_url: "https://api.deepl.com/v2/translate"
d4cadc13   tangwang   翻译重构
329
          timeout_sec: 10.0
d4cadc13   tangwang   翻译重构
330
          glossary_id: ""
cd4ce66d   tangwang   trans logs
331
          use_cache: true
0fd2f875   tangwang   translate
332
        nllb-200-distilled-600m:
93be98cb   tangwang   清理过时的文档
333
          enabled: true
0fd2f875   tangwang   translate
334
335
336
          backend: "local_nllb"
          model_id: "facebook/nllb-200-distilled-600M"
          model_dir: "./models/translation/facebook/nllb-200-distilled-600M"
ea293660   tangwang   CTranslate2
337
338
339
340
          ct2_model_dir: "./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16"
          ct2_compute_type: "float16"
          ct2_conversion_quantization: "float16"
          ct2_auto_convert: true
46ce858d   tangwang   在NLLB模型的 /data/sa...
341
          ct2_inter_threads: 4
ea293660   tangwang   CTranslate2
342
          ct2_intra_threads: 0
46ce858d   tangwang   在NLLB模型的 /data/sa...
343
          ct2_max_queued_batches: 32
ea293660   tangwang   CTranslate2
344
          ct2_batch_type: "examples"
46ce858d   tangwang   在NLLB模型的 /data/sa...
345
346
347
          ct2_decoding_length_mode: "source"
          ct2_decoding_length_extra: 8
          ct2_decoding_length_min: 32
0fd2f875   tangwang   translate
348
349
          device: "cuda"
          torch_dtype: "float16"
4747e2f4   tangwang   embedding perform...
350
          batch_size: 64
0fd2f875   tangwang   translate
351
          max_input_length: 256
3eff49b7   tangwang   trans nllb-200-di...
352
          max_new_tokens: 64
0fd2f875   tangwang   translate
353
          num_beams: 1
cd4ce66d   tangwang   trans logs
354
          use_cache: true
0fd2f875   tangwang   translate
355
        opus-mt-zh-en:
f86c5fee   tangwang   reranker性能参数脚本放在:...
356
          enabled: false
0fd2f875   tangwang   translate
357
358
359
          backend: "local_marian"
          model_id: "Helsinki-NLP/opus-mt-zh-en"
          model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en"
ea293660   tangwang   CTranslate2
360
361
362
363
364
365
366
367
          ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16"
          ct2_compute_type: "float16"
          ct2_conversion_quantization: "float16"
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
          ct2_batch_type: "examples"
0fd2f875   tangwang   translate
368
369
370
371
372
373
          device: "cuda"
          torch_dtype: "float16"
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
374
          use_cache: true
0fd2f875   tangwang   translate
375
        opus-mt-en-zh:
f86c5fee   tangwang   reranker性能参数脚本放在:...
376
          enabled: false
0fd2f875   tangwang   translate
377
378
379
          backend: "local_marian"
          model_id: "Helsinki-NLP/opus-mt-en-zh"
          model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh"
ea293660   tangwang   CTranslate2
380
381
382
383
384
385
386
387
          ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16"
          ct2_compute_type: "float16"
          ct2_conversion_quantization: "float16"
          ct2_auto_convert: true
          ct2_inter_threads: 1
          ct2_intra_threads: 0
          ct2_max_queued_batches: 0
          ct2_batch_type: "examples"
0fd2f875   tangwang   translate
388
389
390
391
392
393
          device: "cuda"
          torch_dtype: "float16"
          batch_size: 16
          max_input_length: 256
          max_new_tokens: 256
          num_beams: 1
cd4ce66d   tangwang   trans logs
394
          use_cache: true
42e3aea6   tangwang   tidy
395
    embedding:
950a640e   tangwang   embeddings
396
      provider: "http"  # http
42e3aea6   tangwang   tidy
397
398
      providers:
        http:
7214c2e7   tangwang   mplemented**
399
400
          text_base_url: "http://127.0.0.1:6005"
          image_base_url: "http://127.0.0.1:6008"
07cf5a93   tangwang   START_EMBEDDING=...
401
      # 服务内文本后端(embedding 进程启动时读取)
efd435cf   tangwang   tei性能调优:
402
      backend: "tei"  # tei | local_st
07cf5a93   tangwang   START_EMBEDDING=...
403
404
405
      backends:
        tei:
          base_url: "http://127.0.0.1:8080"
efd435cf   tangwang   tei性能调优:
406
          timeout_sec: 20
07cf5a93   tangwang   START_EMBEDDING=...
407
408
409
410
411
412
          model_id: "Qwen/Qwen3-Embedding-0.6B"
        local_st:
          model_id: "Qwen/Qwen3-Embedding-0.6B"
          device: "cuda"
          batch_size: 32
          normalize_embeddings: true
6d71d8e0   tangwang   多模态模型配置
413
414
415
      # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name)
      # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中
      # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。
86d8358b   tangwang   config optimize
416
417
418
419
      image_backend: "clip_as_service"  # clip_as_service | local_cnclip
      image_backends:
        clip_as_service:
          server: "grpc://127.0.0.1:51000"
1681a135   tangwang   image_embeddin si...
420
          model_name: "CN-CLIP/ViT-L-14"
86d8358b   tangwang   config optimize
421
422
423
          batch_size: 8
          normalize_embeddings: true
        local_cnclip:
1681a135   tangwang   image_embeddin si...
424
          model_name: "ViT-L-14"
86d8358b   tangwang   config optimize
425
426
427
          device: null
          batch_size: 8
          normalize_embeddings: true
42e3aea6   tangwang   tidy
428
    rerank:
701ae503   tangwang   docs
429
      provider: "http"
42e3aea6   tangwang   tidy
430
431
      providers:
        http:
daa2690b   tangwang   漏斗参数调优&呈现优化
432
433
434
435
436
437
438
          instances:
            default:
              base_url: "http://127.0.0.1:6007"
              service_url: "http://127.0.0.1:6007/rerank"
            fine:
              base_url: "http://127.0.0.1:6009"
              service_url: "http://127.0.0.1:6009/rerank"
86d8358b   tangwang   config optimize
439
440
441
      request:
        max_docs: 1000
        normalize: true
daa2690b   tangwang   漏斗参数调优&呈现优化
442
443
444
445
446
447
448
449
450
451
452
453
454
      default_instance: "default"
      # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
      instances:
        default:
          host: "0.0.0.0"
          port: 6007
          backend: "qwen3_vllm_score"
          runtime_dir: "./.runtime/reranker/default"
        fine:
          host: "0.0.0.0"
          port: 6009
          backend: "bge"
          runtime_dir: "./.runtime/reranker/fine"
701ae503   tangwang   docs
455
456
457
458
459
460
      backends:
        bge:
          model_name: "BAAI/bge-reranker-v2-m3"
          device: null
          use_fp16: true
          batch_size: 64
00c8ddb9   tangwang   suggest rank opti...
461
          max_length: 160
701ae503   tangwang   docs
462
463
          cache_dir: "./model_cache"
          enable_warmup: true
971a0851   tangwang   补充reranker-jina,探...
464
465
466
        jina_reranker_v3:
          model_name: "jinaai/jina-reranker-v3"
          device: null
74116f05   tangwang   jina-reranker-v3性...
467
          dtype: "float16"
971a0851   tangwang   补充reranker-jina,探...
468
          batch_size: 64
74116f05   tangwang   jina-reranker-v3性...
469
470
471
          max_doc_length: 160
          max_query_length: 64
          sort_by_doc_length: true
971a0851   tangwang   补充reranker-jina,探...
472
473
          cache_dir: "./model_cache"
          trust_remote_code: true
701ae503   tangwang   docs
474
475
476
        qwen3_vllm:
          model_name: "Qwen/Qwen3-Reranker-0.6B"
          engine: "vllm"
b0972ff9   tangwang   qwen3_vllm_score ...
477
          max_model_len: 256
701ae503   tangwang   docs
478
          tensor_parallel_size: 1
ef5baa86   tangwang   混杂语言处理
479
          gpu_memory_utilization: 0.20
07cf5a93   tangwang   START_EMBEDDING=...
480
          dtype: "float16"
bc089b43   tangwang   refactor(reranker...
481
482
          enable_prefix_caching: true
          enforce_eager: false
00c8ddb9   tangwang   suggest rank opti...
483
          infer_batch_size: 100
9f5994b4   tangwang   reranker
484
          sort_by_doc_length: true
b0972ff9   tangwang   qwen3_vllm_score ...
485
486
          # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
          instruction_format: standard # compact standard
fb973d19   tangwang   configs
487
          # instruction: "Given a query, score the product for relevance"
6adbf18a   tangwang   reranker提示词优化
488
489
490
491
492
493
494
          # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
          # instruction: "rank products by given query, category match first" 
          # instruction: "Rank products by query relevance, prioritizing category match"
          # instruction: "Rank products by query relevance, prioritizing category and style match"
          # instruction: "Rank by query relevance, prioritize category & style"
          # instruction: "Relevance ranking: category & style match first"
          # instruction: "Score product relevance by query with category & style match prioritized"
e38dc1be   tangwang   融合公式参数调整、以及展示信息优化
495
496
497
          # instruction: "Rank products by query with category & style match prioritized"
          # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
          instruction: "rank products by given query"
4823f463   tangwang   qwen3_vllm_score ...
498
499
        # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
        # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
9de5ef49   tangwang   qwen3_vllm_score ...
500
501
502
503
        qwen3_vllm_score:
          model_name: "Qwen/Qwen3-Reranker-0.6B"
          # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
          use_original_qwen3_hf_overrides: true
9de5ef49   tangwang   qwen3_vllm_score ...
504
505
506
507
508
          # vllm_runner: "auto"
          # vllm_convert: "auto"
          # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
          # hf_overrides: {}
          engine: "vllm"
f86c5fee   tangwang   reranker性能参数脚本放在:...
509
          max_model_len: 172
9de5ef49   tangwang   qwen3_vllm_score ...
510
          tensor_parallel_size: 1
c3425429   tangwang   在以下文件中完成精排/融合清理工作...
511
          gpu_memory_utilization: 0.15
9de5ef49   tangwang   qwen3_vllm_score ...
512
513
514
          dtype: "float16"
          enable_prefix_caching: true
          enforce_eager: false
f86c5fee   tangwang   reranker性能参数脚本放在:...
515
          infer_batch_size: 84
9de5ef49   tangwang   qwen3_vllm_score ...
516
          sort_by_doc_length: true
b0972ff9   tangwang   qwen3_vllm_score ...
517
518
          # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
          instruction_format: standard # compact standard
e38dc1be   tangwang   融合公式参数调整、以及展示信息优化
519
          # instruction: "Rank products by query with category & style match prioritized"
9de5ef49   tangwang   qwen3_vllm_score ...
520
          instruction: "Rank products by query with category & style match prioritized"
e38dc1be   tangwang   融合公式参数调整、以及展示信息优化
521
          # instruction: "Given a shopping query, rank products by relevance"
d31c7f65   tangwang   补充云服务reranker
522
523
        qwen3_transformers:
          model_name: "Qwen/Qwen3-Reranker-0.6B"
77bfa7e3   tangwang   query translate
524
          instruction: "rank products by given query"
fb973d19   tangwang   configs
525
          # instruction: "Score the product’s relevance to the given query"
d31c7f65   tangwang   补充云服务reranker
526
527
528
          max_length: 8192
          batch_size: 64
          use_fp16: true
fb973d19   tangwang   configs
529
530
          # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2
          attn_implementation: "sdpa"
4823f463   tangwang   qwen3_vllm_score ...
531
532
533
534
535
536
        # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask.
        # For 1 query + many short docs (for example 400 product titles), this usually reduces
        # repeated prefix work and padding waste compared with pairwise batching.
        qwen3_transformers_packed:
          model_name: "Qwen/Qwen3-Reranker-0.6B"
          instruction: "Rank products by query with category & style match prioritized"
b0972ff9   tangwang   qwen3_vllm_score ...
537
          max_model_len: 256
4823f463   tangwang   qwen3_vllm_score ...
538
539
540
541
542
543
544
          max_doc_len: 160
          max_docs_per_pack: 0
          use_fp16: true
          sort_by_doc_length: true
          # Packed mode relies on a custom 4D attention mask. "eager" is the safest default.
          # If your torch/transformers stack validates it, you can benchmark "sdpa".
          attn_implementation: "eager"
3d508beb   tangwang   reranker-4b-gguf
545
546
547
548
549
550
        qwen3_gguf:
          repo_id: "DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF"
          filename: "*Q8_0.gguf"
          cache_dir: "./model_cache"
          local_dir: "./models/reranker/qwen3-reranker-4b-gguf"
          instruction: "Rank products by query with category & style match prioritized"
5c21a485   tangwang   qwen3-reranker-0....
551
552
553
554
555
          # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快
          n_ctx: 512
          n_batch: 512
          n_ubatch: 512
          n_gpu_layers: 999
3d508beb   tangwang   reranker-4b-gguf
556
557
558
559
560
561
562
563
564
565
566
567
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 8
          sort_by_doc_length: true
          length_sort_mode: "char"
          enable_warmup: true
          verbose: false
5c21a485   tangwang   qwen3-reranker-0....
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
        qwen3_gguf_06b:
          repo_id: "ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF"
          filename: "qwen3-reranker-0.6b-q8_0.gguf"
          cache_dir: "./model_cache"
          local_dir: "./models/reranker/qwen3-reranker-0.6b-q8_0-gguf"
          instruction: "Rank products by query with category & style match prioritized"
          # 0.6B GGUF / online rerank baseline:
          # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。
          n_ctx: 256
          n_batch: 256
          n_ubatch: 256
          n_gpu_layers: 999
          main_gpu: 0
          n_threads: 2
          n_threads_batch: 4
          flash_attn: true
          offload_kqv: true
          use_mmap: true
          use_mlock: false
          infer_batch_size: 32
          sort_by_doc_length: true
          length_sort_mode: "char"
          reuse_query_state: false
          enable_warmup: true
          verbose: false
d31c7f65   tangwang   补充云服务reranker
593
594
595
596
597
598
599
        dashscope_rerank:
          model_name: "qwen3-rerank"
          # 按地域选择 endpoint:
          # 中国:   https://dashscope.aliyuncs.com/compatible-api/v1/reranks
          # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks
          # 美国:   https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks
          endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks"
0d3e73ba   tangwang   rerank mini batch
600
601
          api_key_env: "RERANK_DASHSCOPE_API_KEY_CN"
          timeout_sec: 10.0 # 
d31c7f65   tangwang   补充云服务reranker
602
          top_n_cap: 0   # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限
0d3e73ba   tangwang   rerank mini batch
603
          batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
d31c7f65   tangwang   补充云服务reranker
604
605
606
          instruct: "Given a shopping query, rank product titles by relevance"
          max_retries: 2
          retry_backoff_sec: 0.2
42e3aea6   tangwang   tidy
607
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
608
  # SPU配置(已启用,使用嵌套skus)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
609
610
  spu_config:
    enabled: true
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
611
    spu_field: "spu_id"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
612
    inner_hits_size: 10
33839b37   tangwang   属性值参与搜索:
613
614
615
    # 配置哪些option维度参与检索(进索引、以及在线搜索)
    # 格式为list,选择option1/option2/option3中的一个或多个
    searchable_option_dimensions: ['option1', 'option2', 'option3']
0064e946   tangwang   feat: 增量索引服务、租户配置...
616
617
  
  # 租户配置(Tenant Configuration)
038e4e2f   tangwang   refactor(i18n): t...
618
  # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
6f7840cf   tangwang   refactor: rename ...
619
  # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
0064e946   tangwang   feat: 增量索引服务、租户配置...
620
  tenant_config:
0064e946   tangwang   feat: 增量索引服务、租户配置...
621
    default:
2739b281   tangwang   多语言索引调整
622
      primary_language: "en"
038e4e2f   tangwang   refactor(i18n): t...
623
      index_languages: ["en", "zh"]
0064e946   tangwang   feat: 增量索引服务、租户配置...
624
625
626
    tenants:
      "1":
        primary_language: "zh"
038e4e2f   tangwang   refactor(i18n): t...
627
        index_languages: ["zh", "en"]
0064e946   tangwang   feat: 增量索引服务、租户配置...
628
629
      "2":
        primary_language: "en"
038e4e2f   tangwang   refactor(i18n): t...
630
        index_languages: ["en", "zh"]
0064e946   tangwang   feat: 增量索引服务、租户配置...
631
632
      "3":
        primary_language: "zh"
038e4e2f   tangwang   refactor(i18n): t...
633
        index_languages: ["zh", "en"]
0064e946   tangwang   feat: 增量索引服务、租户配置...
634
635
      "162":
        primary_language: "zh"
038e4e2f   tangwang   refactor(i18n): t...
636
        index_languages: ["zh", "en"]
cff5e86f   tangwang   reindex
637
638
      "170":
        primary_language: "en"
038e4e2f   tangwang   refactor(i18n): t...
639
        index_languages: ["en", "zh"]