Commit 628ff04f9cbe3a7bbcaa9f3e5f92d378b0b2774d
1 parent
b2dff38f
config-with-reranker.yaml
Showing
3 changed files
with
932 additions
and
0 deletions
Show diff stats
artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid
0 → 100644
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +2900923 | ... | ... |
| ... | ... | @@ -0,0 +1,673 @@ |
| 1 | +runtime: | |
| 2 | + environment: prod | |
| 3 | + index_namespace: '' | |
| 4 | + api_host: 0.0.0.0 | |
| 5 | + api_port: 6002 | |
| 6 | + indexer_host: 0.0.0.0 | |
| 7 | + indexer_port: 6004 | |
| 8 | + embedding_host: 0.0.0.0 | |
| 9 | + embedding_port: 6005 | |
| 10 | + embedding_text_port: 6005 | |
| 11 | + embedding_image_port: 6008 | |
| 12 | + translator_host: 0.0.0.0 | |
| 13 | + translator_port: 6006 | |
| 14 | + reranker_host: 0.0.0.0 | |
| 15 | + reranker_port: 6007 | |
| 16 | +infrastructure: | |
| 17 | + elasticsearch: | |
| 18 | + host: http://localhost:9200 | |
| 19 | + username: null | |
| 20 | + password: null | |
| 21 | + redis: | |
| 22 | + host: localhost | |
| 23 | + port: 6479 | |
| 24 | + snapshot_db: 0 | |
| 25 | + password: null | |
| 26 | + socket_timeout: 1 | |
| 27 | + socket_connect_timeout: 1 | |
| 28 | + retry_on_timeout: false | |
| 29 | + cache_expire_days: 720 | |
| 30 | + embedding_cache_prefix: embedding | |
| 31 | + anchor_cache_prefix: product_anchors | |
| 32 | + anchor_cache_expire_days: 30 | |
| 33 | + database: | |
| 34 | + host: null | |
| 35 | + port: 3306 | |
| 36 | + database: null | |
| 37 | + username: null | |
| 38 | + password: null | |
| 39 | + secrets: | |
| 40 | + dashscope_api_key: null | |
| 41 | + deepl_auth_key: null | |
| 42 | +es_index_name: search_products | |
| 43 | +indexes: [] | |
| 44 | +assets: | |
| 45 | + query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict | |
| 46 | +product_enrich: | |
| 47 | + max_workers: 40 | |
| 48 | +suggestion: | |
| 49 | + sat_recall_min: 40 | |
| 50 | + sat_recall_cap: 100 | |
| 51 | +search_evaluation: | |
| 52 | + artifact_root: artifacts/search_evaluation | |
| 53 | + queries_file: scripts/evaluation/queries/queries.txt | |
| 54 | + default_dataset_id: core_queries | |
| 55 | + datasets: | |
| 56 | + - dataset_id: core_queries | |
| 57 | + display_name: Core Queries | |
| 58 | + description: Legacy baseline evaluation set from queries.txt | |
| 59 | + query_file: scripts/evaluation/queries/queries.txt | |
| 60 | + tenant_id: '163' | |
| 61 | + language: en | |
| 62 | + enabled: true | |
| 63 | + - dataset_id: clothing_top771 | |
| 64 | + display_name: Clothing Filtered 771 | |
| 65 | + description: 771 clothing / shoes / accessories queries filtered from top1k | |
| 66 | + query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered | |
| 67 | + tenant_id: '163' | |
| 68 | + language: en | |
| 69 | + enabled: true | |
| 70 | + eval_log_dir: logs | |
| 71 | + default_tenant_id: '163' | |
| 72 | + search_base_url: '' | |
| 73 | + web_host: 0.0.0.0 | |
| 74 | + web_port: 6010 | |
| 75 | + judge_model: qwen3.6-plus | |
| 76 | + judge_enable_thinking: false | |
| 77 | + judge_dashscope_batch: false | |
| 78 | + intent_model: qwen3.6-plus | |
| 79 | + intent_enable_thinking: true | |
| 80 | + judge_batch_completion_window: 24h | |
| 81 | + judge_batch_poll_interval_sec: 10.0 | |
| 82 | + build_search_depth: 1000 | |
| 83 | + build_rerank_depth: 10000 | |
| 84 | + annotate_search_top_k: 120 | |
| 85 | + annotate_rerank_top_k: 200 | |
| 86 | + batch_top_k: 100 | |
| 87 | + audit_top_k: 100 | |
| 88 | + audit_limit_suspicious: 5 | |
| 89 | + default_language: en | |
| 90 | + search_recall_top_k: 200 | |
| 91 | + rerank_high_threshold: 0.5 | |
| 92 | + rerank_high_skip_count: 1000 | |
| 93 | + rebuild_llm_batch_size: 50 | |
| 94 | + rebuild_min_llm_batches: 10 | |
| 95 | + rebuild_max_llm_batches: 40 | |
| 96 | + rebuild_irrelevant_stop_ratio: 0.799 | |
| 97 | + rebuild_irrel_low_combined_stop_ratio: 0.959 | |
| 98 | + rebuild_irrelevant_stop_streak: 3 | |
| 99 | +es_settings: | |
| 100 | + number_of_shards: 1 | |
| 101 | + number_of_replicas: 0 | |
| 102 | + refresh_interval: 30s | |
| 103 | + | |
| 104 | +# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} | |
| 105 | +field_boosts: | |
| 106 | + title: 3.0 | |
| 107 | + # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 | |
| 108 | + qanchors: 1.0 | |
| 109 | + enriched_tags: 1.0 | |
| 110 | + enriched_attributes.value: 1.5 | |
| 111 | + category_name_text: 2.0 | |
| 112 | + category_path: 2.0 | |
| 113 | + keywords: 2.0 | |
| 114 | + tags: 2.0 | |
| 115 | + option1_values: 1.7 | |
| 116 | + option2_values: 1.7 | |
| 117 | + option3_values: 1.7 | |
| 118 | + brief: 1.0 | |
| 119 | + description: 1.0 | |
| 120 | + vendor: 1.0 | |
| 121 | + | |
| 122 | +query_config: | |
| 123 | + supported_languages: | |
| 124 | + - zh | |
| 125 | + - en | |
| 126 | + default_language: en | |
| 127 | + enable_text_embedding: true | |
| 128 | + enable_query_rewrite: true | |
| 129 | + | |
| 130 | + zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh | |
| 131 | + en_to_zh_model: nllb-200-distilled-600m | |
| 132 | + default_translation_model: nllb-200-distilled-600m | |
| 133 | + # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 | |
| 134 | + zh_to_en_model__source_not_in_index: deepl | |
| 135 | + en_to_zh_model__source_not_in_index: deepl | |
| 136 | + default_translation_model__source_not_in_index: deepl | |
| 137 | + | |
| 138 | + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) | |
| 139 | + translation_embedding_wait_budget_ms_source_in_index: 300 | |
| 140 | + translation_embedding_wait_budget_ms_source_not_in_index: 400 | |
| 141 | + style_intent: | |
| 142 | + enabled: true | |
| 143 | + selected_sku_boost: 1.2 | |
| 144 | + color_dictionary_path: config/dictionaries/style_intent_color.csv | |
| 145 | + size_dictionary_path: config/dictionaries/style_intent_size.csv | |
| 146 | + dimension_aliases: | |
| 147 | + color: | |
| 148 | + - color | |
| 149 | + - colors | |
| 150 | + - colour | |
| 151 | + - colours | |
| 152 | + - 颜色 | |
| 153 | + - 色 | |
| 154 | + - 色系 | |
| 155 | + size: | |
| 156 | + - size | |
| 157 | + - sizes | |
| 158 | + - sizing | |
| 159 | + - 尺码 | |
| 160 | + - 尺寸 | |
| 161 | + - 码数 | |
| 162 | + - 号码 | |
| 163 | + - 码 | |
| 164 | + product_title_exclusion: | |
| 165 | + enabled: true | |
| 166 | + dictionary_path: config/dictionaries/product_title_exclusion.tsv | |
| 167 | + search_fields: | |
| 168 | + # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} | |
| 169 | + multilingual_fields: | |
| 170 | + - title | |
| 171 | + - keywords | |
| 172 | + - qanchors | |
| 173 | + - enriched_tags | |
| 174 | + - enriched_attributes.value | |
| 175 | + # - enriched_taxonomy_attributes.value | |
| 176 | + - option1_values | |
| 177 | + - option2_values | |
| 178 | + - option3_values | |
| 179 | + - category_path | |
| 180 | + - category_name_text | |
| 181 | + # - brief | |
| 182 | + # - description | |
| 183 | + # - vendor | |
| 184 | + # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values | |
| 185 | + | |
| 186 | + shared_fields: null | |
| 187 | + core_multilingual_fields: | |
| 188 | + - title | |
| 189 | + - qanchors | |
| 190 | + - category_name_text | |
| 191 | + | |
| 192 | + # 文本召回(主查询 + 翻译查询) | |
| 193 | + text_query_strategy: | |
| 194 | + base_minimum_should_match: 60% | |
| 195 | + translation_minimum_should_match: 60% | |
| 196 | + translation_boost: 0.75 | |
| 197 | + tie_breaker_base_query: 0.5 | |
| 198 | + best_fields_boost: 2.0 | |
| 199 | + best_fields: | |
| 200 | + title: 4.0 | |
| 201 | + qanchors: 3.0 | |
| 202 | + category_name_text: 2.0 | |
| 203 | + phrase_fields: | |
| 204 | + title: 5.0 | |
| 205 | + qanchors: 4.0 | |
| 206 | + phrase_match_boost: 3.0 | |
| 207 | + text_embedding_field: title_embedding | |
| 208 | + image_embedding_field: image_embedding.vector | |
| 209 | + | |
| 210 | + # null表示返回所有字段,[]表示不返回任何字段 | |
| 211 | + source_fields: | |
| 212 | + - spu_id | |
| 213 | + - handle | |
| 214 | + - title | |
| 215 | + - brief | |
| 216 | + - description | |
| 217 | + - vendor | |
| 218 | + - category_name | |
| 219 | + - category_name_text | |
| 220 | + - category_path | |
| 221 | + - category_id | |
| 222 | + - category_level | |
| 223 | + - category1_name | |
| 224 | + - category2_name | |
| 225 | + - category3_name | |
| 226 | + # - tags | |
| 227 | + # - keywords | |
| 228 | + # - qanchors | |
| 229 | + # - enriched_tags | |
| 230 | + - enriched_attributes | |
| 231 | + - enriched_taxonomy_attributes | |
| 232 | + | |
| 233 | + - min_price | |
| 234 | + - compare_at_price | |
| 235 | + - image_url | |
| 236 | + - sku_prices | |
| 237 | + - sku_weights | |
| 238 | + - sku_weight_units | |
| 239 | + - total_inventory | |
| 240 | + - option1_name | |
| 241 | + - option1_values | |
| 242 | + - option2_name | |
| 243 | + - option2_values | |
| 244 | + - option3_name | |
| 245 | + - option3_values | |
| 246 | + - specifications | |
| 247 | + - skus | |
| 248 | + | |
| 249 | + # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) | |
| 250 | + knn_text_boost: 4 | |
| 251 | + knn_image_boost: 4 | |
| 252 | + knn_text_k: 160 | |
| 253 | + knn_text_num_candidates: 560 # k * 3.4 | |
| 254 | + knn_text_k_long: 400 | |
| 255 | + knn_text_num_candidates_long: 1200 | |
| 256 | + knn_image_k: 400 | |
| 257 | + knn_image_num_candidates: 1200 | |
| 258 | + | |
| 259 | +function_score: | |
| 260 | + score_mode: sum | |
| 261 | + boost_mode: multiply | |
| 262 | + functions: [] | |
| 263 | +coarse_rank: | |
| 264 | + enabled: true | |
| 265 | + input_window: 480 | |
| 266 | + output_window: 160 | |
| 267 | + fusion: | |
| 268 | + es_bias: 10.0 | |
| 269 | + es_exponent: 0.05 | |
| 270 | + text_bias: 0.1 | |
| 271 | + text_exponent: 0.35 | |
| 272 | + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) | |
| 273 | + # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 | |
| 274 | + text_translation_weight: 0.8 | |
| 275 | + knn_text_weight: 1.0 | |
| 276 | + knn_image_weight: 2.0 | |
| 277 | + knn_tie_breaker: 0.3 | |
| 278 | + knn_bias: 0.6 | |
| 279 | + knn_exponent: 0.4 | |
| 280 | + knn_text_bias: 0.2 | |
| 281 | + knn_text_exponent: 0.0 | |
| 282 | + knn_image_bias: 0.2 | |
| 283 | + knn_image_exponent: 0.0 | |
| 284 | +fine_rank: | |
| 285 | + enabled: false # false 时保序透传 | |
| 286 | + input_window: 160 | |
| 287 | + output_window: 80 | |
| 288 | + timeout_sec: 10.0 | |
| 289 | + rerank_query_template: '{query}' | |
| 290 | + rerank_doc_template: '{title}' | |
| 291 | + service_profile: fine | |
| 292 | +rerank: | |
| 293 | + enabled: true # false 时保序透传 | |
| 294 | + rerank_window: 160 | |
| 295 | + exact_knn_rescore_enabled: true | |
| 296 | + exact_knn_rescore_window: 160 | |
| 297 | + timeout_sec: 15.0 | |
| 298 | + weight_es: 0.4 | |
| 299 | + weight_ai: 0.6 | |
| 300 | + rerank_query_template: '{query}' | |
| 301 | + rerank_doc_template: '{title}' | |
| 302 | + service_profile: default | |
| 303 | + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) | |
| 304 | + # 其中 knn_score 先做一层 dis_max: | |
| 305 | + # max(knn_text_weight * text_knn, knn_image_weight * image_knn) | |
| 306 | + # + knn_tie_breaker * 另一侧较弱信号 | |
| 307 | + fusion: | |
| 308 | + es_bias: 10.0 | |
| 309 | + es_exponent: 0.05 | |
| 310 | + rerank_bias: 0.1 | |
| 311 | + rerank_exponent: 1.15 | |
| 312 | + fine_bias: 0.1 | |
| 313 | + fine_exponent: 1.0 | |
| 314 | + text_bias: 0.1 | |
| 315 | + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) | |
| 316 | + text_exponent: 0.25 | |
| 317 | + text_translation_weight: 0.8 | |
| 318 | + knn_text_weight: 1.0 | |
| 319 | + knn_image_weight: 2.0 | |
| 320 | + knn_tie_breaker: 0.3 | |
| 321 | + knn_bias: 0.6 | |
| 322 | + knn_exponent: 0.4 | |
| 323 | + | |
| 324 | +services: | |
| 325 | + translation: | |
| 326 | + service_url: http://127.0.0.1:6006 | |
| 327 | + default_model: nllb-200-distilled-600m | |
| 328 | + default_scene: general | |
| 329 | + timeout_sec: 10.0 | |
| 330 | + cache: | |
| 331 | + ttl_seconds: 62208000 | |
| 332 | + sliding_expiration: true | |
| 333 | + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) | |
| 334 | + # Higher tier = better quality. Multiple models may share one tier (同级). | |
| 335 | + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). | |
| 336 | + enable_model_quality_tier_cache: true | |
| 337 | + model_quality_tiers: | |
| 338 | + deepl: 30 | |
| 339 | + qwen-mt: 30 | |
| 340 | + llm: 30 | |
| 341 | + nllb-200-distilled-600m: 20 | |
| 342 | + opus-mt-zh-en: 10 | |
| 343 | + opus-mt-en-zh: 10 | |
| 344 | + capabilities: | |
| 345 | + qwen-mt: | |
| 346 | + enabled: true | |
| 347 | + backend: qwen_mt | |
| 348 | + model: qwen-mt-flash | |
| 349 | + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 | |
| 350 | + timeout_sec: 10.0 | |
| 351 | + use_cache: true | |
| 352 | + llm: | |
| 353 | + enabled: true | |
| 354 | + backend: llm | |
| 355 | + model: qwen-flash | |
| 356 | + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 | |
| 357 | + timeout_sec: 30.0 | |
| 358 | + use_cache: true | |
| 359 | + deepl: | |
| 360 | + enabled: true | |
| 361 | + backend: deepl | |
| 362 | + api_url: https://api.deepl.com/v2/translate | |
| 363 | + timeout_sec: 10.0 | |
| 364 | + glossary_id: '' | |
| 365 | + use_cache: true | |
| 366 | + nllb-200-distilled-600m: | |
| 367 | + enabled: true | |
| 368 | + backend: local_nllb | |
| 369 | + model_id: facebook/nllb-200-distilled-600M | |
| 370 | + model_dir: ./models/translation/facebook/nllb-200-distilled-600M | |
| 371 | + ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16 | |
| 372 | + ct2_compute_type: float16 | |
| 373 | + ct2_conversion_quantization: float16 | |
| 374 | + ct2_auto_convert: true | |
| 375 | + ct2_inter_threads: 4 | |
| 376 | + ct2_intra_threads: 0 | |
| 377 | + ct2_max_queued_batches: 32 | |
| 378 | + ct2_batch_type: examples | |
| 379 | + ct2_decoding_length_mode: source | |
| 380 | + ct2_decoding_length_extra: 16 | |
| 381 | + ct2_decoding_length_min: 32 | |
| 382 | + device: cuda | |
| 383 | + torch_dtype: float16 | |
| 384 | + batch_size: 64 | |
| 385 | + max_input_length: 256 | |
| 386 | + max_new_tokens: 96 | |
| 387 | + num_beams: 4 | |
| 388 | + use_cache: true | |
| 389 | + opus-mt-zh-en: | |
| 390 | + enabled: false | |
| 391 | + backend: local_marian | |
| 392 | + model_id: Helsinki-NLP/opus-mt-zh-en | |
| 393 | + model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en | |
| 394 | + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16 | |
| 395 | + ct2_compute_type: float16 | |
| 396 | + ct2_conversion_quantization: float16 | |
| 397 | + ct2_auto_convert: true | |
| 398 | + ct2_inter_threads: 1 | |
| 399 | + ct2_intra_threads: 0 | |
| 400 | + ct2_max_queued_batches: 0 | |
| 401 | + ct2_batch_type: examples | |
| 402 | + device: cuda | |
| 403 | + torch_dtype: float16 | |
| 404 | + batch_size: 16 | |
| 405 | + max_input_length: 256 | |
| 406 | + max_new_tokens: 256 | |
| 407 | + num_beams: 1 | |
| 408 | + use_cache: true | |
| 409 | + opus-mt-en-zh: | |
| 410 | + enabled: false | |
| 411 | + backend: local_marian | |
| 412 | + model_id: Helsinki-NLP/opus-mt-en-zh | |
| 413 | + model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh | |
| 414 | + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16 | |
| 415 | + ct2_compute_type: float16 | |
| 416 | + ct2_conversion_quantization: float16 | |
| 417 | + ct2_auto_convert: true | |
| 418 | + ct2_inter_threads: 1 | |
| 419 | + ct2_intra_threads: 0 | |
| 420 | + ct2_max_queued_batches: 0 | |
| 421 | + ct2_batch_type: examples | |
| 422 | + device: cuda | |
| 423 | + torch_dtype: float16 | |
| 424 | + batch_size: 16 | |
| 425 | + max_input_length: 256 | |
| 426 | + max_new_tokens: 256 | |
| 427 | + num_beams: 1 | |
| 428 | + use_cache: true | |
| 429 | + embedding: | |
| 430 | + provider: http | |
| 431 | + providers: | |
| 432 | + http: | |
| 433 | + text_base_url: http://127.0.0.1:6005 | |
| 434 | + image_base_url: http://127.0.0.1:6008 | |
| 435 | + backend: tei | |
| 436 | + backends: | |
| 437 | + tei: | |
| 438 | + base_url: http://127.0.0.1:8080 | |
| 439 | + timeout_sec: 20 | |
| 440 | + model_id: Qwen/Qwen3-Embedding-0.6B | |
| 441 | + local_st: | |
| 442 | + model_id: Qwen/Qwen3-Embedding-0.6B | |
| 443 | + device: cuda | |
| 444 | + batch_size: 32 | |
| 445 | + normalize_embeddings: true | |
| 446 | + # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) | |
| 447 | + # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 | |
| 448 | + # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 | |
| 449 | + image_backend: clip_as_service # clip_as_service | local_cnclip | |
| 450 | + image_backends: | |
| 451 | + clip_as_service: | |
| 452 | + server: grpc://127.0.0.1:51000 | |
| 453 | + model_name: CN-CLIP/ViT-L-14 | |
| 454 | + batch_size: 8 | |
| 455 | + normalize_embeddings: true | |
| 456 | + local_cnclip: | |
| 457 | + model_name: ViT-L-14 | |
| 458 | + device: null | |
| 459 | + batch_size: 8 | |
| 460 | + normalize_embeddings: true | |
| 461 | + rerank: | |
| 462 | + provider: http | |
| 463 | + providers: | |
| 464 | + http: | |
| 465 | + instances: | |
| 466 | + default: | |
| 467 | + base_url: http://127.0.0.1:6007 | |
| 468 | + service_url: http://127.0.0.1:6007/rerank | |
| 469 | + fine: | |
| 470 | + base_url: http://127.0.0.1:6009 | |
| 471 | + service_url: http://127.0.0.1:6009/rerank | |
| 472 | + request: | |
| 473 | + max_docs: 1000 | |
| 474 | + normalize: true | |
| 475 | + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 | |
| 476 | + default_instance: default | |
| 477 | + instances: | |
| 478 | + default: | |
| 479 | + host: 0.0.0.0 | |
| 480 | + port: 6007 | |
| 481 | + backend: bge | |
| 482 | + runtime_dir: ./.runtime/reranker/default | |
| 483 | + fine: | |
| 484 | + host: 0.0.0.0 | |
| 485 | + port: 6009 | |
| 486 | + backend: bge | |
| 487 | + runtime_dir: ./.runtime/reranker/fine | |
| 488 | + backends: | |
| 489 | + bge: | |
| 490 | + model_name: BAAI/bge-reranker-v2-m3 | |
| 491 | + device: null | |
| 492 | + use_fp16: true | |
| 493 | + batch_size: 80 | |
| 494 | + max_length: 160 | |
| 495 | + cache_dir: ./model_cache | |
| 496 | + enable_warmup: true | |
| 497 | + jina_reranker_v3: | |
| 498 | + model_name: jinaai/jina-reranker-v3 | |
| 499 | + device: null | |
| 500 | + dtype: float16 | |
| 501 | + batch_size: 64 | |
| 502 | + max_doc_length: 160 | |
| 503 | + max_query_length: 64 | |
| 504 | + sort_by_doc_length: true | |
| 505 | + cache_dir: ./model_cache | |
| 506 | + trust_remote_code: true | |
| 507 | + qwen3_vllm: | |
| 508 | + model_name: Qwen/Qwen3-Reranker-0.6B | |
| 509 | + engine: vllm | |
| 510 | + max_model_len: 256 | |
| 511 | + tensor_parallel_size: 1 | |
| 512 | + gpu_memory_utilization: 0.2 | |
| 513 | + dtype: float16 | |
| 514 | + enable_prefix_caching: true | |
| 515 | + enforce_eager: false | |
| 516 | + infer_batch_size: 100 | |
| 517 | + sort_by_doc_length: true | |
| 518 | + | |
| 519 | + # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) | |
| 520 | + instruction_format: standard # compact standard | |
| 521 | + # instruction: "Given a query, score the product for relevance" | |
| 522 | + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 | |
| 523 | + # instruction: "rank products by given query, category match first" | |
| 524 | + # instruction: "Rank products by query relevance, prioritizing category match" | |
| 525 | + # instruction: "Rank products by query relevance, prioritizing category and style match" | |
| 526 | + # instruction: "Rank by query relevance, prioritize category & style" | |
| 527 | + # instruction: "Relevance ranking: category & style match first" | |
| 528 | + # instruction: "Score product relevance by query with category & style match prioritized" | |
| 529 | + # instruction: "Rank products by query with category & style match prioritized" | |
| 530 | + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" | |
| 531 | + instruction: rank products by given query | |
| 532 | + | |
| 533 | + # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score | |
| 534 | + # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 | |
| 535 | + qwen3_vllm_score: | |
| 536 | + model_name: Qwen/Qwen3-Reranker-0.6B | |
| 537 | + # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false | |
| 538 | + use_original_qwen3_hf_overrides: true | |
| 539 | + # vllm_runner: "auto" | |
| 540 | + # vllm_convert: "auto" | |
| 541 | + # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 | |
| 542 | + # hf_overrides: {} | |
| 543 | + engine: vllm | |
| 544 | + max_model_len: 172 | |
| 545 | + tensor_parallel_size: 1 | |
| 546 | + gpu_memory_utilization: 0.15 | |
| 547 | + dtype: float16 | |
| 548 | + enable_prefix_caching: true | |
| 549 | + enforce_eager: false | |
| 550 | + infer_batch_size: 80 | |
| 551 | + sort_by_doc_length: true | |
| 552 | + # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 | |
| 553 | + instruction_format: standard # compact standard | |
| 554 | + # instruction: "Rank products by query with category & style match prioritized" | |
| 555 | + # instruction: "Given a shopping query, rank products by relevance" | |
| 556 | + instruction: Rank products by query with category & style match prioritized | |
| 557 | + qwen3_transformers: | |
| 558 | + model_name: Qwen/Qwen3-Reranker-0.6B | |
| 559 | + instruction: rank products by given query | |
| 560 | + max_length: 8192 | |
| 561 | + batch_size: 64 | |
| 562 | + use_fp16: true | |
| 563 | + attn_implementation: sdpa | |
| 564 | + qwen3_transformers_packed: | |
| 565 | + model_name: Qwen/Qwen3-Reranker-0.6B | |
| 566 | + instruction: Rank products by query with category & style match prioritized | |
| 567 | + max_model_len: 256 | |
| 568 | + max_doc_len: 160 | |
| 569 | + max_docs_per_pack: 0 | |
| 570 | + use_fp16: true | |
| 571 | + sort_by_doc_length: true | |
| 572 | + attn_implementation: eager | |
| 573 | + qwen3_gguf: | |
| 574 | + repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF | |
| 575 | + filename: '*Q8_0.gguf' | |
| 576 | + cache_dir: ./model_cache | |
| 577 | + local_dir: ./models/reranker/qwen3-reranker-4b-gguf | |
| 578 | + instruction: Rank products by query with category & style match prioritized | |
| 579 | + n_ctx: 512 | |
| 580 | + n_batch: 512 | |
| 581 | + n_ubatch: 512 | |
| 582 | + n_gpu_layers: 999 | |
| 583 | + main_gpu: 0 | |
| 584 | + n_threads: 2 | |
| 585 | + n_threads_batch: 4 | |
| 586 | + flash_attn: true | |
| 587 | + offload_kqv: true | |
| 588 | + use_mmap: true | |
| 589 | + use_mlock: false | |
| 590 | + infer_batch_size: 8 | |
| 591 | + sort_by_doc_length: true | |
| 592 | + length_sort_mode: char | |
| 593 | + enable_warmup: true | |
| 594 | + verbose: false | |
| 595 | + qwen3_gguf_06b: | |
| 596 | + repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF | |
| 597 | + filename: qwen3-reranker-0.6b-q8_0.gguf | |
| 598 | + cache_dir: ./model_cache | |
| 599 | + local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf | |
| 600 | + instruction: Rank products by query with category & style match prioritized | |
| 601 | + n_ctx: 256 | |
| 602 | + n_batch: 256 | |
| 603 | + n_ubatch: 256 | |
| 604 | + n_gpu_layers: 999 | |
| 605 | + main_gpu: 0 | |
| 606 | + n_threads: 2 | |
| 607 | + n_threads_batch: 4 | |
| 608 | + flash_attn: true | |
| 609 | + offload_kqv: true | |
| 610 | + use_mmap: true | |
| 611 | + use_mlock: false | |
| 612 | + infer_batch_size: 32 | |
| 613 | + sort_by_doc_length: true | |
| 614 | + length_sort_mode: char | |
| 615 | + reuse_query_state: false | |
| 616 | + enable_warmup: true | |
| 617 | + verbose: false | |
| 618 | + dashscope_rerank: | |
| 619 | + model_name: qwen3-rerank | |
| 620 | + endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks | |
| 621 | + api_key_env: RERANK_DASHSCOPE_API_KEY_CN | |
| 622 | + timeout_sec: 10.0 | |
| 623 | + top_n_cap: 0 # 0 表示 top_n=当前请求文档数 | |
| 624 | + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) | |
| 625 | + instruct: Given a shopping query, rank product titles by relevance | |
| 626 | + max_retries: 2 | |
| 627 | + retry_backoff_sec: 0.2 | |
| 628 | + | |
| 629 | +spu_config: | |
| 630 | + enabled: true | |
| 631 | + spu_field: spu_id | |
| 632 | + inner_hits_size: 10 | |
| 633 | + # 配置哪些option维度参与检索(进索引、以及在线搜索) | |
| 634 | + # 格式为list,选择option1/option2/option3中的一个或多个 | |
| 635 | + searchable_option_dimensions: | |
| 636 | + - option1 | |
| 637 | + - option2 | |
| 638 | + - option3 | |
| 639 | + | |
| 640 | +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) | |
| 641 | +# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 | |
| 642 | +tenant_config: | |
| 643 | + default: | |
| 644 | + primary_language: en | |
| 645 | + index_languages: | |
| 646 | + - en | |
| 647 | + - zh | |
| 648 | + tenants: | |
| 649 | + '1': | |
| 650 | + primary_language: zh | |
| 651 | + index_languages: | |
| 652 | + - zh | |
| 653 | + - en | |
| 654 | + '2': | |
| 655 | + primary_language: en | |
| 656 | + index_languages: | |
| 657 | + - en | |
| 658 | + - zh | |
| 659 | + '3': | |
| 660 | + primary_language: zh | |
| 661 | + index_languages: | |
| 662 | + - zh | |
| 663 | + - en | |
| 664 | + '162': | |
| 665 | + primary_language: zh | |
| 666 | + index_languages: | |
| 667 | + - zh | |
| 668 | + - en | |
| 669 | + '170': | |
| 670 | + primary_language: en | |
| 671 | + index_languages: | |
| 672 | + - en | |
| 673 | + - zh | ... | ... |
| ... | ... | @@ -0,0 +1,258 @@ |
| 1 | +import argparse | |
| 2 | +import json | |
| 3 | +import os | |
| 4 | +import random | |
| 5 | +import statistics | |
| 6 | +import time | |
| 7 | +from dataclasses import dataclass | |
| 8 | + | |
| 9 | +import requests | |
| 10 | + | |
| 11 | + | |
| 12 | +def _now() -> float: | |
| 13 | + return time.perf_counter() | |
| 14 | + | |
| 15 | + | |
| 16 | +def _mask(s: str, keep: int = 3) -> str: | |
| 17 | + if not s: | |
| 18 | + return "" | |
| 19 | + if len(s) <= keep * 2: | |
| 20 | + return "*" * len(s) | |
| 21 | + return f"{s[:keep]}***{s[-keep:]}" | |
| 22 | + | |
| 23 | + | |
| 24 | +def get_access_token(api_key: str, secret_key: str, timeout_s: float) -> str: | |
| 25 | + url = "https://aip.baidubce.com/oauth/2.0/token" | |
| 26 | + params = { | |
| 27 | + "grant_type": "client_credentials", | |
| 28 | + "client_id": api_key, | |
| 29 | + "client_secret": secret_key, | |
| 30 | + } | |
| 31 | + r = requests.post(url, params=params, timeout=timeout_s) | |
| 32 | + r.raise_for_status() | |
| 33 | + data = r.json() | |
| 34 | + token = data.get("access_token") | |
| 35 | + if not token: | |
| 36 | + raise RuntimeError(f"no access_token in response: {data}") | |
| 37 | + return str(token) | |
| 38 | + | |
| 39 | + | |
| 40 | +def translate_one( | |
| 41 | + *, | |
| 42 | + access_token: str, | |
| 43 | + q: str, | |
| 44 | + from_lang: str, | |
| 45 | + to_lang: str, | |
| 46 | + timeout_s: float, | |
| 47 | +) -> dict: | |
| 48 | + url = ( | |
| 49 | + "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1" | |
| 50 | + + "?access_token=" | |
| 51 | + + access_token | |
| 52 | + ) | |
| 53 | + payload = json.dumps({"from": from_lang, "to": to_lang, "q": q}, ensure_ascii=False) | |
| 54 | + headers = {"Content-Type": "application/json", "Accept": "application/json"} | |
| 55 | + r = requests.post(url, headers=headers, data=payload.encode("utf-8"), timeout=timeout_s) | |
| 56 | + r.raise_for_status() | |
| 57 | + return r.json() | |
| 58 | + | |
| 59 | + | |
| 60 | +def random_english_samples(n: int, seed: int | None) -> list[str]: | |
| 61 | + rng = random.Random(seed) | |
| 62 | + | |
| 63 | + starters = [ | |
| 64 | + "Hello", | |
| 65 | + "Please", | |
| 66 | + "Could you", | |
| 67 | + "I wonder if you can", | |
| 68 | + "Let's", | |
| 69 | + "We should", | |
| 70 | + "It's important to", | |
| 71 | + "Don't forget to", | |
| 72 | + "I need to", | |
| 73 | + "Can we", | |
| 74 | + ] | |
| 75 | + verbs = [ | |
| 76 | + "check", | |
| 77 | + "compare", | |
| 78 | + "translate", | |
| 79 | + "summarize", | |
| 80 | + "review", | |
| 81 | + "optimize", | |
| 82 | + "measure", | |
| 83 | + "confirm", | |
| 84 | + "fix", | |
| 85 | + "verify", | |
| 86 | + ] | |
| 87 | + objects = [ | |
| 88 | + "the latest order status", | |
| 89 | + "this short sentence", | |
| 90 | + "the product title and description", | |
| 91 | + "our search results", | |
| 92 | + "the API response payload", | |
| 93 | + "the translation quality", | |
| 94 | + "the page load time", | |
| 95 | + "a few random examples", | |
| 96 | + "the error message", | |
| 97 | + "the performance metrics", | |
| 98 | + ] | |
| 99 | + endings = [ | |
| 100 | + "today.", | |
| 101 | + "right now.", | |
| 102 | + "before we ship.", | |
| 103 | + "in the next minute.", | |
| 104 | + "without changing the meaning.", | |
| 105 | + "as soon as possible.", | |
| 106 | + "for a quick smoke test.", | |
| 107 | + "with a clear output.", | |
| 108 | + "and report the timing.", | |
| 109 | + "to ensure everything works.", | |
| 110 | + ] | |
| 111 | + | |
| 112 | + samples: list[str] = [] | |
| 113 | + while len(samples) < n: | |
| 114 | + s = f"{rng.choice(starters)} {rng.choice(verbs)} {rng.choice(objects)} {rng.choice(endings)}" | |
| 115 | + # Add a little variation | |
| 116 | + if rng.random() < 0.25: | |
| 117 | + s = s.replace("the ", "our ", 1) | |
| 118 | + if rng.random() < 0.20: | |
| 119 | + s = s.replace(".", "!") | |
| 120 | + samples.append(s) | |
| 121 | + return samples | |
| 122 | + | |
| 123 | + | |
| 124 | +@dataclass(frozen=True) | |
| 125 | +class Timing: | |
| 126 | + text: str | |
| 127 | + seconds: float | |
| 128 | + ok: bool | |
| 129 | + error: str | None | |
| 130 | + sample_out: str | None | |
| 131 | + | |
| 132 | + | |
| 133 | +def main() -> int: | |
| 134 | + p = argparse.ArgumentParser( | |
| 135 | + description="Benchmark Baidu texttrans latency with random samples." | |
| 136 | + ) | |
| 137 | + p.add_argument("--n", type=int, default=15, help="Number of samples (10-20 suggested).") | |
| 138 | + p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.") | |
| 139 | + p.add_argument("--from", dest="from_lang", default="en", help="Source language code.") | |
| 140 | + p.add_argument("--to", dest="to_lang", default="zh", help="Target language code.") | |
| 141 | + p.add_argument("--timeout", type=float, default=20.0, help="HTTP timeout seconds.") | |
| 142 | + p.add_argument( | |
| 143 | + "--print-first", | |
| 144 | + type=int, | |
| 145 | + default=5, | |
| 146 | + help="Print first N translations for sanity check.", | |
| 147 | + ) | |
| 148 | + args = p.parse_args() | |
| 149 | + | |
| 150 | + if args.n <= 0: | |
| 151 | + raise SystemExit("--n must be positive") | |
| 152 | + | |
| 153 | + api_key = os.environ.get("BAIDU_API_KEY", "").strip() | |
| 154 | + secret_key = os.environ.get("BAIDU_SECRET_KEY", "").strip() | |
| 155 | + if not api_key or not secret_key: | |
| 156 | + raise SystemExit( | |
| 157 | + "Missing BAIDU_API_KEY / BAIDU_SECRET_KEY env vars. " | |
| 158 | + "Example:\n" | |
| 159 | + " export BAIDU_API_KEY='...'\n" | |
| 160 | + " export BAIDU_SECRET_KEY='...'\n" | |
| 161 | + ) | |
| 162 | + | |
| 163 | + print("Baidu credentials:") | |
| 164 | + print(f" BAIDU_API_KEY={_mask(api_key)}") | |
| 165 | + print(f" BAIDU_SECRET_KEY={_mask(secret_key)}") | |
| 166 | + print() | |
| 167 | + | |
| 168 | + t0 = _now() | |
| 169 | + token = get_access_token(api_key, secret_key, args.timeout) | |
| 170 | + token_s = _now() - t0 | |
| 171 | + print(f"Access token acquired in {token_s:.3f}s") | |
| 172 | + print() | |
| 173 | + | |
| 174 | + samples = random_english_samples(args.n, args.seed) | |
| 175 | + timings: list[Timing] = [] | |
| 176 | + | |
| 177 | + for i, text in enumerate(samples, start=1): | |
| 178 | + start = _now() | |
| 179 | + try: | |
| 180 | + out = translate_one( | |
| 181 | + access_token=token, | |
| 182 | + q=text, | |
| 183 | + from_lang=args.from_lang, | |
| 184 | + to_lang=args.to_lang, | |
| 185 | + timeout_s=args.timeout, | |
| 186 | + ) | |
| 187 | + elapsed = _now() - start | |
| 188 | + | |
| 189 | + # Try to extract a plausible translation field, but keep whole JSON as fallback. | |
| 190 | + translated: str | None = None | |
| 191 | + if isinstance(out, dict): | |
| 192 | + result = out.get("result") | |
| 193 | + if isinstance(result, dict): | |
| 194 | + translated = result.get("trans_result") | |
| 195 | + if isinstance(translated, list) and translated: | |
| 196 | + # Baidu often returns list of dicts with dst/src | |
| 197 | + first = translated[0] | |
| 198 | + if isinstance(first, dict): | |
| 199 | + translated = first.get("dst") | |
| 200 | + timings.append( | |
| 201 | + Timing( | |
| 202 | + text=text, | |
| 203 | + seconds=elapsed, | |
| 204 | + ok=True, | |
| 205 | + error=None, | |
| 206 | + sample_out=translated if isinstance(translated, str) else None, | |
| 207 | + ) | |
| 208 | + ) | |
| 209 | + except Exception as e: # noqa: BLE001 - benchmark script, keep it simple | |
| 210 | + elapsed = _now() - start | |
| 211 | + timings.append(Timing(text=text, seconds=elapsed, ok=False, error=str(e), sample_out=None)) | |
| 212 | + | |
| 213 | + print(f"[{i:02d}/{len(samples):02d}] {timings[-1].seconds:.3f}s {'OK' if timings[-1].ok else 'ERR'}") | |
| 214 | + | |
| 215 | + ok = [t for t in timings if t.ok] | |
| 216 | + errs = [t for t in timings if not t.ok] | |
| 217 | + latencies = [t.seconds for t in ok] | |
| 218 | + | |
| 219 | + print() | |
| 220 | + print("=== Summary ===") | |
| 221 | + print(f"token_time_s: {token_s:.3f}") | |
| 222 | + print(f"requests_total: {len(timings)}") | |
| 223 | + print(f"requests_ok: {len(ok)}") | |
| 224 | + print(f"requests_err: {len(errs)}") | |
| 225 | + if latencies: | |
| 226 | + print(f"total_time_s: {sum(latencies):.3f}") | |
| 227 | + print(f"avg_s: {statistics.mean(latencies):.3f}") | |
| 228 | + print(f"p50_s: {statistics.median(latencies):.3f}") | |
| 229 | + print(f"min_s: {min(latencies):.3f}") | |
| 230 | + print(f"max_s: {max(latencies):.3f}") | |
| 231 | + else: | |
| 232 | + print("No successful requests; cannot compute latency stats.") | |
| 233 | + | |
| 234 | + if args.print_first > 0: | |
| 235 | + print() | |
| 236 | + print("=== Samples (first N) ===") | |
| 237 | + shown = 0 | |
| 238 | + for t in ok: | |
| 239 | + print(f"- IN: {t.text}") | |
| 240 | + if t.sample_out: | |
| 241 | + print(f" OUT: {t.sample_out}") | |
| 242 | + else: | |
| 243 | + print(" OUT: (could not parse translation field; see raw JSON by editing script if needed)") | |
| 244 | + shown += 1 | |
| 245 | + if shown >= args.print_first: | |
| 246 | + break | |
| 247 | + | |
| 248 | + if errs: | |
| 249 | + print() | |
| 250 | + print("=== Errors (first 3) ===") | |
| 251 | + for t in errs[:3]: | |
| 252 | + print(f"- {t.seconds:.3f}s: {t.error}") | |
| 253 | + | |
| 254 | + return 0 if not errs else 2 | |
| 255 | + | |
| 256 | + | |
| 257 | +if __name__ == "__main__": | |
| 258 | + raise SystemExit(main()) | ... | ... |