Commit 628ff04f9cbe3a7bbcaa9f3e5f92d378b0b2774d
1 parent
b2dff38f
config-with-reranker.yaml
Showing
3 changed files
with
932 additions
and
0 deletions
Show diff stats
artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid
0 → 100644
| @@ -0,0 +1 @@ | @@ -0,0 +1 @@ | ||
| 1 | +2900923 |
| @@ -0,0 +1,673 @@ | @@ -0,0 +1,673 @@ | ||
| 1 | +runtime: | ||
| 2 | + environment: prod | ||
| 3 | + index_namespace: '' | ||
| 4 | + api_host: 0.0.0.0 | ||
| 5 | + api_port: 6002 | ||
| 6 | + indexer_host: 0.0.0.0 | ||
| 7 | + indexer_port: 6004 | ||
| 8 | + embedding_host: 0.0.0.0 | ||
| 9 | + embedding_port: 6005 | ||
| 10 | + embedding_text_port: 6005 | ||
| 11 | + embedding_image_port: 6008 | ||
| 12 | + translator_host: 0.0.0.0 | ||
| 13 | + translator_port: 6006 | ||
| 14 | + reranker_host: 0.0.0.0 | ||
| 15 | + reranker_port: 6007 | ||
| 16 | +infrastructure: | ||
| 17 | + elasticsearch: | ||
| 18 | + host: http://localhost:9200 | ||
| 19 | + username: null | ||
| 20 | + password: null | ||
| 21 | + redis: | ||
| 22 | + host: localhost | ||
| 23 | + port: 6479 | ||
| 24 | + snapshot_db: 0 | ||
| 25 | + password: null | ||
| 26 | + socket_timeout: 1 | ||
| 27 | + socket_connect_timeout: 1 | ||
| 28 | + retry_on_timeout: false | ||
| 29 | + cache_expire_days: 720 | ||
| 30 | + embedding_cache_prefix: embedding | ||
| 31 | + anchor_cache_prefix: product_anchors | ||
| 32 | + anchor_cache_expire_days: 30 | ||
| 33 | + database: | ||
| 34 | + host: null | ||
| 35 | + port: 3306 | ||
| 36 | + database: null | ||
| 37 | + username: null | ||
| 38 | + password: null | ||
| 39 | + secrets: | ||
| 40 | + dashscope_api_key: null | ||
| 41 | + deepl_auth_key: null | ||
| 42 | +es_index_name: search_products | ||
| 43 | +indexes: [] | ||
| 44 | +assets: | ||
| 45 | + query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict | ||
| 46 | +product_enrich: | ||
| 47 | + max_workers: 40 | ||
| 48 | +suggestion: | ||
| 49 | + sat_recall_min: 40 | ||
| 50 | + sat_recall_cap: 100 | ||
| 51 | +search_evaluation: | ||
| 52 | + artifact_root: artifacts/search_evaluation | ||
| 53 | + queries_file: scripts/evaluation/queries/queries.txt | ||
| 54 | + default_dataset_id: core_queries | ||
| 55 | + datasets: | ||
| 56 | + - dataset_id: core_queries | ||
| 57 | + display_name: Core Queries | ||
| 58 | + description: Legacy baseline evaluation set from queries.txt | ||
| 59 | + query_file: scripts/evaluation/queries/queries.txt | ||
| 60 | + tenant_id: '163' | ||
| 61 | + language: en | ||
| 62 | + enabled: true | ||
| 63 | + - dataset_id: clothing_top771 | ||
| 64 | + display_name: Clothing Filtered 771 | ||
| 65 | + description: 771 clothing / shoes / accessories queries filtered from top1k | ||
| 66 | + query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered | ||
| 67 | + tenant_id: '163' | ||
| 68 | + language: en | ||
| 69 | + enabled: true | ||
| 70 | + eval_log_dir: logs | ||
| 71 | + default_tenant_id: '163' | ||
| 72 | + search_base_url: '' | ||
| 73 | + web_host: 0.0.0.0 | ||
| 74 | + web_port: 6010 | ||
| 75 | + judge_model: qwen3.6-plus | ||
| 76 | + judge_enable_thinking: false | ||
| 77 | + judge_dashscope_batch: false | ||
| 78 | + intent_model: qwen3.6-plus | ||
| 79 | + intent_enable_thinking: true | ||
| 80 | + judge_batch_completion_window: 24h | ||
| 81 | + judge_batch_poll_interval_sec: 10.0 | ||
| 82 | + build_search_depth: 1000 | ||
| 83 | + build_rerank_depth: 10000 | ||
| 84 | + annotate_search_top_k: 120 | ||
| 85 | + annotate_rerank_top_k: 200 | ||
| 86 | + batch_top_k: 100 | ||
| 87 | + audit_top_k: 100 | ||
| 88 | + audit_limit_suspicious: 5 | ||
| 89 | + default_language: en | ||
| 90 | + search_recall_top_k: 200 | ||
| 91 | + rerank_high_threshold: 0.5 | ||
| 92 | + rerank_high_skip_count: 1000 | ||
| 93 | + rebuild_llm_batch_size: 50 | ||
| 94 | + rebuild_min_llm_batches: 10 | ||
| 95 | + rebuild_max_llm_batches: 40 | ||
| 96 | + rebuild_irrelevant_stop_ratio: 0.799 | ||
| 97 | + rebuild_irrel_low_combined_stop_ratio: 0.959 | ||
| 98 | + rebuild_irrelevant_stop_streak: 3 | ||
| 99 | +es_settings: | ||
| 100 | + number_of_shards: 1 | ||
| 101 | + number_of_replicas: 0 | ||
| 102 | + refresh_interval: 30s | ||
| 103 | + | ||
| 104 | +# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} | ||
| 105 | +field_boosts: | ||
| 106 | + title: 3.0 | ||
| 107 | + # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 | ||
| 108 | + qanchors: 1.0 | ||
| 109 | + enriched_tags: 1.0 | ||
| 110 | + enriched_attributes.value: 1.5 | ||
| 111 | + category_name_text: 2.0 | ||
| 112 | + category_path: 2.0 | ||
| 113 | + keywords: 2.0 | ||
| 114 | + tags: 2.0 | ||
| 115 | + option1_values: 1.7 | ||
| 116 | + option2_values: 1.7 | ||
| 117 | + option3_values: 1.7 | ||
| 118 | + brief: 1.0 | ||
| 119 | + description: 1.0 | ||
| 120 | + vendor: 1.0 | ||
| 121 | + | ||
| 122 | +query_config: | ||
| 123 | + supported_languages: | ||
| 124 | + - zh | ||
| 125 | + - en | ||
| 126 | + default_language: en | ||
| 127 | + enable_text_embedding: true | ||
| 128 | + enable_query_rewrite: true | ||
| 129 | + | ||
| 130 | + zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh | ||
| 131 | + en_to_zh_model: nllb-200-distilled-600m | ||
| 132 | + default_translation_model: nllb-200-distilled-600m | ||
| 133 | + # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 | ||
| 134 | + zh_to_en_model__source_not_in_index: deepl | ||
| 135 | + en_to_zh_model__source_not_in_index: deepl | ||
| 136 | + default_translation_model__source_not_in_index: deepl | ||
| 137 | + | ||
| 138 | + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) | ||
| 139 | + translation_embedding_wait_budget_ms_source_in_index: 300 | ||
| 140 | + translation_embedding_wait_budget_ms_source_not_in_index: 400 | ||
| 141 | + style_intent: | ||
| 142 | + enabled: true | ||
| 143 | + selected_sku_boost: 1.2 | ||
| 144 | + color_dictionary_path: config/dictionaries/style_intent_color.csv | ||
| 145 | + size_dictionary_path: config/dictionaries/style_intent_size.csv | ||
| 146 | + dimension_aliases: | ||
| 147 | + color: | ||
| 148 | + - color | ||
| 149 | + - colors | ||
| 150 | + - colour | ||
| 151 | + - colours | ||
| 152 | + - 颜色 | ||
| 153 | + - 色 | ||
| 154 | + - 色系 | ||
| 155 | + size: | ||
| 156 | + - size | ||
| 157 | + - sizes | ||
| 158 | + - sizing | ||
| 159 | + - 尺码 | ||
| 160 | + - 尺寸 | ||
| 161 | + - 码数 | ||
| 162 | + - 号码 | ||
| 163 | + - 码 | ||
| 164 | + product_title_exclusion: | ||
| 165 | + enabled: true | ||
| 166 | + dictionary_path: config/dictionaries/product_title_exclusion.tsv | ||
| 167 | + search_fields: | ||
| 168 | + # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} | ||
| 169 | + multilingual_fields: | ||
| 170 | + - title | ||
| 171 | + - keywords | ||
| 172 | + - qanchors | ||
| 173 | + - enriched_tags | ||
| 174 | + - enriched_attributes.value | ||
| 175 | + # - enriched_taxonomy_attributes.value | ||
| 176 | + - option1_values | ||
| 177 | + - option2_values | ||
| 178 | + - option3_values | ||
| 179 | + - category_path | ||
| 180 | + - category_name_text | ||
| 181 | + # - brief | ||
| 182 | + # - description | ||
| 183 | + # - vendor | ||
| 184 | + # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values | ||
| 185 | + | ||
| 186 | + shared_fields: null | ||
| 187 | + core_multilingual_fields: | ||
| 188 | + - title | ||
| 189 | + - qanchors | ||
| 190 | + - category_name_text | ||
| 191 | + | ||
| 192 | + # 文本召回(主查询 + 翻译查询) | ||
| 193 | + text_query_strategy: | ||
| 194 | + base_minimum_should_match: 60% | ||
| 195 | + translation_minimum_should_match: 60% | ||
| 196 | + translation_boost: 0.75 | ||
| 197 | + tie_breaker_base_query: 0.5 | ||
| 198 | + best_fields_boost: 2.0 | ||
| 199 | + best_fields: | ||
| 200 | + title: 4.0 | ||
| 201 | + qanchors: 3.0 | ||
| 202 | + category_name_text: 2.0 | ||
| 203 | + phrase_fields: | ||
| 204 | + title: 5.0 | ||
| 205 | + qanchors: 4.0 | ||
| 206 | + phrase_match_boost: 3.0 | ||
| 207 | + text_embedding_field: title_embedding | ||
| 208 | + image_embedding_field: image_embedding.vector | ||
| 209 | + | ||
| 210 | + # null表示返回所有字段,[]表示不返回任何字段 | ||
| 211 | + source_fields: | ||
| 212 | + - spu_id | ||
| 213 | + - handle | ||
| 214 | + - title | ||
| 215 | + - brief | ||
| 216 | + - description | ||
| 217 | + - vendor | ||
| 218 | + - category_name | ||
| 219 | + - category_name_text | ||
| 220 | + - category_path | ||
| 221 | + - category_id | ||
| 222 | + - category_level | ||
| 223 | + - category1_name | ||
| 224 | + - category2_name | ||
| 225 | + - category3_name | ||
| 226 | + # - tags | ||
| 227 | + # - keywords | ||
| 228 | + # - qanchors | ||
| 229 | + # - enriched_tags | ||
| 230 | + - enriched_attributes | ||
| 231 | + - enriched_taxonomy_attributes | ||
| 232 | + | ||
| 233 | + - min_price | ||
| 234 | + - compare_at_price | ||
| 235 | + - image_url | ||
| 236 | + - sku_prices | ||
| 237 | + - sku_weights | ||
| 238 | + - sku_weight_units | ||
| 239 | + - total_inventory | ||
| 240 | + - option1_name | ||
| 241 | + - option1_values | ||
| 242 | + - option2_name | ||
| 243 | + - option2_values | ||
| 244 | + - option3_name | ||
| 245 | + - option3_values | ||
| 246 | + - specifications | ||
| 247 | + - skus | ||
| 248 | + | ||
| 249 | + # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) | ||
| 250 | + knn_text_boost: 4 | ||
| 251 | + knn_image_boost: 4 | ||
| 252 | + knn_text_k: 160 | ||
| 253 | + knn_text_num_candidates: 560 # k * 3.4 | ||
| 254 | + knn_text_k_long: 400 | ||
| 255 | + knn_text_num_candidates_long: 1200 | ||
| 256 | + knn_image_k: 400 | ||
| 257 | + knn_image_num_candidates: 1200 | ||
| 258 | + | ||
| 259 | +function_score: | ||
| 260 | + score_mode: sum | ||
| 261 | + boost_mode: multiply | ||
| 262 | + functions: [] | ||
| 263 | +coarse_rank: | ||
| 264 | + enabled: true | ||
| 265 | + input_window: 480 | ||
| 266 | + output_window: 160 | ||
| 267 | + fusion: | ||
| 268 | + es_bias: 10.0 | ||
| 269 | + es_exponent: 0.05 | ||
| 270 | + text_bias: 0.1 | ||
| 271 | + text_exponent: 0.35 | ||
| 272 | + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) | ||
| 273 | + # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 | ||
| 274 | + text_translation_weight: 0.8 | ||
| 275 | + knn_text_weight: 1.0 | ||
| 276 | + knn_image_weight: 2.0 | ||
| 277 | + knn_tie_breaker: 0.3 | ||
| 278 | + knn_bias: 0.6 | ||
| 279 | + knn_exponent: 0.4 | ||
| 280 | + knn_text_bias: 0.2 | ||
| 281 | + knn_text_exponent: 0.0 | ||
| 282 | + knn_image_bias: 0.2 | ||
| 283 | + knn_image_exponent: 0.0 | ||
| 284 | +fine_rank: | ||
| 285 | + enabled: false # false 时保序透传 | ||
| 286 | + input_window: 160 | ||
| 287 | + output_window: 80 | ||
| 288 | + timeout_sec: 10.0 | ||
| 289 | + rerank_query_template: '{query}' | ||
| 290 | + rerank_doc_template: '{title}' | ||
| 291 | + service_profile: fine | ||
| 292 | +rerank: | ||
| 293 | + enabled: true # false 时保序透传 | ||
| 294 | + rerank_window: 160 | ||
| 295 | + exact_knn_rescore_enabled: true | ||
| 296 | + exact_knn_rescore_window: 160 | ||
| 297 | + timeout_sec: 15.0 | ||
| 298 | + weight_es: 0.4 | ||
| 299 | + weight_ai: 0.6 | ||
| 300 | + rerank_query_template: '{query}' | ||
| 301 | + rerank_doc_template: '{title}' | ||
| 302 | + service_profile: default | ||
| 303 | + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) | ||
| 304 | + # 其中 knn_score 先做一层 dis_max: | ||
| 305 | + # max(knn_text_weight * text_knn, knn_image_weight * image_knn) | ||
| 306 | + # + knn_tie_breaker * 另一侧较弱信号 | ||
| 307 | + fusion: | ||
| 308 | + es_bias: 10.0 | ||
| 309 | + es_exponent: 0.05 | ||
| 310 | + rerank_bias: 0.1 | ||
| 311 | + rerank_exponent: 1.15 | ||
| 312 | + fine_bias: 0.1 | ||
| 313 | + fine_exponent: 1.0 | ||
| 314 | + text_bias: 0.1 | ||
| 315 | + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) | ||
| 316 | + text_exponent: 0.25 | ||
| 317 | + text_translation_weight: 0.8 | ||
| 318 | + knn_text_weight: 1.0 | ||
| 319 | + knn_image_weight: 2.0 | ||
| 320 | + knn_tie_breaker: 0.3 | ||
| 321 | + knn_bias: 0.6 | ||
| 322 | + knn_exponent: 0.4 | ||
| 323 | + | ||
| 324 | +services: | ||
| 325 | + translation: | ||
| 326 | + service_url: http://127.0.0.1:6006 | ||
| 327 | + default_model: nllb-200-distilled-600m | ||
| 328 | + default_scene: general | ||
| 329 | + timeout_sec: 10.0 | ||
| 330 | + cache: | ||
| 331 | + ttl_seconds: 62208000 | ||
| 332 | + sliding_expiration: true | ||
| 333 | + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) | ||
| 334 | + # Higher tier = better quality. Multiple models may share one tier (同级). | ||
| 335 | + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). | ||
| 336 | + enable_model_quality_tier_cache: true | ||
| 337 | + model_quality_tiers: | ||
| 338 | + deepl: 30 | ||
| 339 | + qwen-mt: 30 | ||
| 340 | + llm: 30 | ||
| 341 | + nllb-200-distilled-600m: 20 | ||
| 342 | + opus-mt-zh-en: 10 | ||
| 343 | + opus-mt-en-zh: 10 | ||
| 344 | + capabilities: | ||
| 345 | + qwen-mt: | ||
| 346 | + enabled: true | ||
| 347 | + backend: qwen_mt | ||
| 348 | + model: qwen-mt-flash | ||
| 349 | + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 | ||
| 350 | + timeout_sec: 10.0 | ||
| 351 | + use_cache: true | ||
| 352 | + llm: | ||
| 353 | + enabled: true | ||
| 354 | + backend: llm | ||
| 355 | + model: qwen-flash | ||
| 356 | + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 | ||
| 357 | + timeout_sec: 30.0 | ||
| 358 | + use_cache: true | ||
| 359 | + deepl: | ||
| 360 | + enabled: true | ||
| 361 | + backend: deepl | ||
| 362 | + api_url: https://api.deepl.com/v2/translate | ||
| 363 | + timeout_sec: 10.0 | ||
| 364 | + glossary_id: '' | ||
| 365 | + use_cache: true | ||
| 366 | + nllb-200-distilled-600m: | ||
| 367 | + enabled: true | ||
| 368 | + backend: local_nllb | ||
| 369 | + model_id: facebook/nllb-200-distilled-600M | ||
| 370 | + model_dir: ./models/translation/facebook/nllb-200-distilled-600M | ||
| 371 | + ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16 | ||
| 372 | + ct2_compute_type: float16 | ||
| 373 | + ct2_conversion_quantization: float16 | ||
| 374 | + ct2_auto_convert: true | ||
| 375 | + ct2_inter_threads: 4 | ||
| 376 | + ct2_intra_threads: 0 | ||
| 377 | + ct2_max_queued_batches: 32 | ||
| 378 | + ct2_batch_type: examples | ||
| 379 | + ct2_decoding_length_mode: source | ||
| 380 | + ct2_decoding_length_extra: 16 | ||
| 381 | + ct2_decoding_length_min: 32 | ||
| 382 | + device: cuda | ||
| 383 | + torch_dtype: float16 | ||
| 384 | + batch_size: 64 | ||
| 385 | + max_input_length: 256 | ||
| 386 | + max_new_tokens: 96 | ||
| 387 | + num_beams: 4 | ||
| 388 | + use_cache: true | ||
| 389 | + opus-mt-zh-en: | ||
| 390 | + enabled: false | ||
| 391 | + backend: local_marian | ||
| 392 | + model_id: Helsinki-NLP/opus-mt-zh-en | ||
| 393 | + model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en | ||
| 394 | + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16 | ||
| 395 | + ct2_compute_type: float16 | ||
| 396 | + ct2_conversion_quantization: float16 | ||
| 397 | + ct2_auto_convert: true | ||
| 398 | + ct2_inter_threads: 1 | ||
| 399 | + ct2_intra_threads: 0 | ||
| 400 | + ct2_max_queued_batches: 0 | ||
| 401 | + ct2_batch_type: examples | ||
| 402 | + device: cuda | ||
| 403 | + torch_dtype: float16 | ||
| 404 | + batch_size: 16 | ||
| 405 | + max_input_length: 256 | ||
| 406 | + max_new_tokens: 256 | ||
| 407 | + num_beams: 1 | ||
| 408 | + use_cache: true | ||
| 409 | + opus-mt-en-zh: | ||
| 410 | + enabled: false | ||
| 411 | + backend: local_marian | ||
| 412 | + model_id: Helsinki-NLP/opus-mt-en-zh | ||
| 413 | + model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh | ||
| 414 | + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16 | ||
| 415 | + ct2_compute_type: float16 | ||
| 416 | + ct2_conversion_quantization: float16 | ||
| 417 | + ct2_auto_convert: true | ||
| 418 | + ct2_inter_threads: 1 | ||
| 419 | + ct2_intra_threads: 0 | ||
| 420 | + ct2_max_queued_batches: 0 | ||
| 421 | + ct2_batch_type: examples | ||
| 422 | + device: cuda | ||
| 423 | + torch_dtype: float16 | ||
| 424 | + batch_size: 16 | ||
| 425 | + max_input_length: 256 | ||
| 426 | + max_new_tokens: 256 | ||
| 427 | + num_beams: 1 | ||
| 428 | + use_cache: true | ||
| 429 | + embedding: | ||
| 430 | + provider: http | ||
| 431 | + providers: | ||
| 432 | + http: | ||
| 433 | + text_base_url: http://127.0.0.1:6005 | ||
| 434 | + image_base_url: http://127.0.0.1:6008 | ||
| 435 | + backend: tei | ||
| 436 | + backends: | ||
| 437 | + tei: | ||
| 438 | + base_url: http://127.0.0.1:8080 | ||
| 439 | + timeout_sec: 20 | ||
| 440 | + model_id: Qwen/Qwen3-Embedding-0.6B | ||
| 441 | + local_st: | ||
| 442 | + model_id: Qwen/Qwen3-Embedding-0.6B | ||
| 443 | + device: cuda | ||
| 444 | + batch_size: 32 | ||
| 445 | + normalize_embeddings: true | ||
| 446 | + # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) | ||
| 447 | + # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 | ||
| 448 | + # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 | ||
| 449 | + image_backend: clip_as_service # clip_as_service | local_cnclip | ||
| 450 | + image_backends: | ||
| 451 | + clip_as_service: | ||
| 452 | + server: grpc://127.0.0.1:51000 | ||
| 453 | + model_name: CN-CLIP/ViT-L-14 | ||
| 454 | + batch_size: 8 | ||
| 455 | + normalize_embeddings: true | ||
| 456 | + local_cnclip: | ||
| 457 | + model_name: ViT-L-14 | ||
| 458 | + device: null | ||
| 459 | + batch_size: 8 | ||
| 460 | + normalize_embeddings: true | ||
| 461 | + rerank: | ||
| 462 | + provider: http | ||
| 463 | + providers: | ||
| 464 | + http: | ||
| 465 | + instances: | ||
| 466 | + default: | ||
| 467 | + base_url: http://127.0.0.1:6007 | ||
| 468 | + service_url: http://127.0.0.1:6007/rerank | ||
| 469 | + fine: | ||
| 470 | + base_url: http://127.0.0.1:6009 | ||
| 471 | + service_url: http://127.0.0.1:6009/rerank | ||
| 472 | + request: | ||
| 473 | + max_docs: 1000 | ||
| 474 | + normalize: true | ||
| 475 | + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 | ||
| 476 | + default_instance: default | ||
| 477 | + instances: | ||
| 478 | + default: | ||
| 479 | + host: 0.0.0.0 | ||
| 480 | + port: 6007 | ||
| 481 | + backend: bge | ||
| 482 | + runtime_dir: ./.runtime/reranker/default | ||
| 483 | + fine: | ||
| 484 | + host: 0.0.0.0 | ||
| 485 | + port: 6009 | ||
| 486 | + backend: bge | ||
| 487 | + runtime_dir: ./.runtime/reranker/fine | ||
| 488 | + backends: | ||
| 489 | + bge: | ||
| 490 | + model_name: BAAI/bge-reranker-v2-m3 | ||
| 491 | + device: null | ||
| 492 | + use_fp16: true | ||
| 493 | + batch_size: 80 | ||
| 494 | + max_length: 160 | ||
| 495 | + cache_dir: ./model_cache | ||
| 496 | + enable_warmup: true | ||
| 497 | + jina_reranker_v3: | ||
| 498 | + model_name: jinaai/jina-reranker-v3 | ||
| 499 | + device: null | ||
| 500 | + dtype: float16 | ||
| 501 | + batch_size: 64 | ||
| 502 | + max_doc_length: 160 | ||
| 503 | + max_query_length: 64 | ||
| 504 | + sort_by_doc_length: true | ||
| 505 | + cache_dir: ./model_cache | ||
| 506 | + trust_remote_code: true | ||
| 507 | + qwen3_vllm: | ||
| 508 | + model_name: Qwen/Qwen3-Reranker-0.6B | ||
| 509 | + engine: vllm | ||
| 510 | + max_model_len: 256 | ||
| 511 | + tensor_parallel_size: 1 | ||
| 512 | + gpu_memory_utilization: 0.2 | ||
| 513 | + dtype: float16 | ||
| 514 | + enable_prefix_caching: true | ||
| 515 | + enforce_eager: false | ||
| 516 | + infer_batch_size: 100 | ||
| 517 | + sort_by_doc_length: true | ||
| 518 | + | ||
| 519 | + # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) | ||
| 520 | + instruction_format: standard # compact standard | ||
| 521 | + # instruction: "Given a query, score the product for relevance" | ||
| 522 | + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 | ||
| 523 | + # instruction: "rank products by given query, category match first" | ||
| 524 | + # instruction: "Rank products by query relevance, prioritizing category match" | ||
| 525 | + # instruction: "Rank products by query relevance, prioritizing category and style match" | ||
| 526 | + # instruction: "Rank by query relevance, prioritize category & style" | ||
| 527 | + # instruction: "Relevance ranking: category & style match first" | ||
| 528 | + # instruction: "Score product relevance by query with category & style match prioritized" | ||
| 529 | + # instruction: "Rank products by query with category & style match prioritized" | ||
| 530 | + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" | ||
| 531 | + instruction: rank products by given query | ||
| 532 | + | ||
| 533 | + # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score | ||
| 534 | + # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 | ||
| 535 | + qwen3_vllm_score: | ||
| 536 | + model_name: Qwen/Qwen3-Reranker-0.6B | ||
| 537 | + # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false | ||
| 538 | + use_original_qwen3_hf_overrides: true | ||
| 539 | + # vllm_runner: "auto" | ||
| 540 | + # vllm_convert: "auto" | ||
| 541 | + # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 | ||
| 542 | + # hf_overrides: {} | ||
| 543 | + engine: vllm | ||
| 544 | + max_model_len: 172 | ||
| 545 | + tensor_parallel_size: 1 | ||
| 546 | + gpu_memory_utilization: 0.15 | ||
| 547 | + dtype: float16 | ||
| 548 | + enable_prefix_caching: true | ||
| 549 | + enforce_eager: false | ||
| 550 | + infer_batch_size: 80 | ||
| 551 | + sort_by_doc_length: true | ||
| 552 | + # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 | ||
| 553 | + instruction_format: standard # compact standard | ||
| 554 | + # instruction: "Rank products by query with category & style match prioritized" | ||
| 555 | + # instruction: "Given a shopping query, rank products by relevance" | ||
| 556 | + instruction: Rank products by query with category & style match prioritized | ||
| 557 | + qwen3_transformers: | ||
| 558 | + model_name: Qwen/Qwen3-Reranker-0.6B | ||
| 559 | + instruction: rank products by given query | ||
| 560 | + max_length: 8192 | ||
| 561 | + batch_size: 64 | ||
| 562 | + use_fp16: true | ||
| 563 | + attn_implementation: sdpa | ||
| 564 | + qwen3_transformers_packed: | ||
| 565 | + model_name: Qwen/Qwen3-Reranker-0.6B | ||
| 566 | + instruction: Rank products by query with category & style match prioritized | ||
| 567 | + max_model_len: 256 | ||
| 568 | + max_doc_len: 160 | ||
| 569 | + max_docs_per_pack: 0 | ||
| 570 | + use_fp16: true | ||
| 571 | + sort_by_doc_length: true | ||
| 572 | + attn_implementation: eager | ||
| 573 | + qwen3_gguf: | ||
| 574 | + repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF | ||
| 575 | + filename: '*Q8_0.gguf' | ||
| 576 | + cache_dir: ./model_cache | ||
| 577 | + local_dir: ./models/reranker/qwen3-reranker-4b-gguf | ||
| 578 | + instruction: Rank products by query with category & style match prioritized | ||
| 579 | + n_ctx: 512 | ||
| 580 | + n_batch: 512 | ||
| 581 | + n_ubatch: 512 | ||
| 582 | + n_gpu_layers: 999 | ||
| 583 | + main_gpu: 0 | ||
| 584 | + n_threads: 2 | ||
| 585 | + n_threads_batch: 4 | ||
| 586 | + flash_attn: true | ||
| 587 | + offload_kqv: true | ||
| 588 | + use_mmap: true | ||
| 589 | + use_mlock: false | ||
| 590 | + infer_batch_size: 8 | ||
| 591 | + sort_by_doc_length: true | ||
| 592 | + length_sort_mode: char | ||
| 593 | + enable_warmup: true | ||
| 594 | + verbose: false | ||
| 595 | + qwen3_gguf_06b: | ||
| 596 | + repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF | ||
| 597 | + filename: qwen3-reranker-0.6b-q8_0.gguf | ||
| 598 | + cache_dir: ./model_cache | ||
| 599 | + local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf | ||
| 600 | + instruction: Rank products by query with category & style match prioritized | ||
| 601 | + n_ctx: 256 | ||
| 602 | + n_batch: 256 | ||
| 603 | + n_ubatch: 256 | ||
| 604 | + n_gpu_layers: 999 | ||
| 605 | + main_gpu: 0 | ||
| 606 | + n_threads: 2 | ||
| 607 | + n_threads_batch: 4 | ||
| 608 | + flash_attn: true | ||
| 609 | + offload_kqv: true | ||
| 610 | + use_mmap: true | ||
| 611 | + use_mlock: false | ||
| 612 | + infer_batch_size: 32 | ||
| 613 | + sort_by_doc_length: true | ||
| 614 | + length_sort_mode: char | ||
| 615 | + reuse_query_state: false | ||
| 616 | + enable_warmup: true | ||
| 617 | + verbose: false | ||
| 618 | + dashscope_rerank: | ||
| 619 | + model_name: qwen3-rerank | ||
| 620 | + endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks | ||
| 621 | + api_key_env: RERANK_DASHSCOPE_API_KEY_CN | ||
| 622 | + timeout_sec: 10.0 | ||
| 623 | + top_n_cap: 0 # 0 表示 top_n=当前请求文档数 | ||
| 624 | + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) | ||
| 625 | + instruct: Given a shopping query, rank product titles by relevance | ||
| 626 | + max_retries: 2 | ||
| 627 | + retry_backoff_sec: 0.2 | ||
| 628 | + | ||
| 629 | +spu_config: | ||
| 630 | + enabled: true | ||
| 631 | + spu_field: spu_id | ||
| 632 | + inner_hits_size: 10 | ||
| 633 | + # 配置哪些option维度参与检索(进索引、以及在线搜索) | ||
| 634 | + # 格式为list,选择option1/option2/option3中的一个或多个 | ||
| 635 | + searchable_option_dimensions: | ||
| 636 | + - option1 | ||
| 637 | + - option2 | ||
| 638 | + - option3 | ||
| 639 | + | ||
| 640 | +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) | ||
| 641 | +# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 | ||
| 642 | +tenant_config: | ||
| 643 | + default: | ||
| 644 | + primary_language: en | ||
| 645 | + index_languages: | ||
| 646 | + - en | ||
| 647 | + - zh | ||
| 648 | + tenants: | ||
| 649 | + '1': | ||
| 650 | + primary_language: zh | ||
| 651 | + index_languages: | ||
| 652 | + - zh | ||
| 653 | + - en | ||
| 654 | + '2': | ||
| 655 | + primary_language: en | ||
| 656 | + index_languages: | ||
| 657 | + - en | ||
| 658 | + - zh | ||
| 659 | + '3': | ||
| 660 | + primary_language: zh | ||
| 661 | + index_languages: | ||
| 662 | + - zh | ||
| 663 | + - en | ||
| 664 | + '162': | ||
| 665 | + primary_language: zh | ||
| 666 | + index_languages: | ||
| 667 | + - zh | ||
| 668 | + - en | ||
| 669 | + '170': | ||
| 670 | + primary_language: en | ||
| 671 | + index_languages: | ||
| 672 | + - en | ||
| 673 | + - zh |
| @@ -0,0 +1,258 @@ | @@ -0,0 +1,258 @@ | ||
| 1 | +import argparse | ||
| 2 | +import json | ||
| 3 | +import os | ||
| 4 | +import random | ||
| 5 | +import statistics | ||
| 6 | +import time | ||
| 7 | +from dataclasses import dataclass | ||
| 8 | + | ||
| 9 | +import requests | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +def _now() -> float: | ||
| 13 | + return time.perf_counter() | ||
| 14 | + | ||
| 15 | + | ||
| 16 | +def _mask(s: str, keep: int = 3) -> str: | ||
| 17 | + if not s: | ||
| 18 | + return "" | ||
| 19 | + if len(s) <= keep * 2: | ||
| 20 | + return "*" * len(s) | ||
| 21 | + return f"{s[:keep]}***{s[-keep:]}" | ||
| 22 | + | ||
| 23 | + | ||
| 24 | +def get_access_token(api_key: str, secret_key: str, timeout_s: float) -> str: | ||
| 25 | + url = "https://aip.baidubce.com/oauth/2.0/token" | ||
| 26 | + params = { | ||
| 27 | + "grant_type": "client_credentials", | ||
| 28 | + "client_id": api_key, | ||
| 29 | + "client_secret": secret_key, | ||
| 30 | + } | ||
| 31 | + r = requests.post(url, params=params, timeout=timeout_s) | ||
| 32 | + r.raise_for_status() | ||
| 33 | + data = r.json() | ||
| 34 | + token = data.get("access_token") | ||
| 35 | + if not token: | ||
| 36 | + raise RuntimeError(f"no access_token in response: {data}") | ||
| 37 | + return str(token) | ||
| 38 | + | ||
| 39 | + | ||
| 40 | +def translate_one( | ||
| 41 | + *, | ||
| 42 | + access_token: str, | ||
| 43 | + q: str, | ||
| 44 | + from_lang: str, | ||
| 45 | + to_lang: str, | ||
| 46 | + timeout_s: float, | ||
| 47 | +) -> dict: | ||
| 48 | + url = ( | ||
| 49 | + "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1" | ||
| 50 | + + "?access_token=" | ||
| 51 | + + access_token | ||
| 52 | + ) | ||
| 53 | + payload = json.dumps({"from": from_lang, "to": to_lang, "q": q}, ensure_ascii=False) | ||
| 54 | + headers = {"Content-Type": "application/json", "Accept": "application/json"} | ||
| 55 | + r = requests.post(url, headers=headers, data=payload.encode("utf-8"), timeout=timeout_s) | ||
| 56 | + r.raise_for_status() | ||
| 57 | + return r.json() | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +def random_english_samples(n: int, seed: int | None) -> list[str]: | ||
| 61 | + rng = random.Random(seed) | ||
| 62 | + | ||
| 63 | + starters = [ | ||
| 64 | + "Hello", | ||
| 65 | + "Please", | ||
| 66 | + "Could you", | ||
| 67 | + "I wonder if you can", | ||
| 68 | + "Let's", | ||
| 69 | + "We should", | ||
| 70 | + "It's important to", | ||
| 71 | + "Don't forget to", | ||
| 72 | + "I need to", | ||
| 73 | + "Can we", | ||
| 74 | + ] | ||
| 75 | + verbs = [ | ||
| 76 | + "check", | ||
| 77 | + "compare", | ||
| 78 | + "translate", | ||
| 79 | + "summarize", | ||
| 80 | + "review", | ||
| 81 | + "optimize", | ||
| 82 | + "measure", | ||
| 83 | + "confirm", | ||
| 84 | + "fix", | ||
| 85 | + "verify", | ||
| 86 | + ] | ||
| 87 | + objects = [ | ||
| 88 | + "the latest order status", | ||
| 89 | + "this short sentence", | ||
| 90 | + "the product title and description", | ||
| 91 | + "our search results", | ||
| 92 | + "the API response payload", | ||
| 93 | + "the translation quality", | ||
| 94 | + "the page load time", | ||
| 95 | + "a few random examples", | ||
| 96 | + "the error message", | ||
| 97 | + "the performance metrics", | ||
| 98 | + ] | ||
| 99 | + endings = [ | ||
| 100 | + "today.", | ||
| 101 | + "right now.", | ||
| 102 | + "before we ship.", | ||
| 103 | + "in the next minute.", | ||
| 104 | + "without changing the meaning.", | ||
| 105 | + "as soon as possible.", | ||
| 106 | + "for a quick smoke test.", | ||
| 107 | + "with a clear output.", | ||
| 108 | + "and report the timing.", | ||
| 109 | + "to ensure everything works.", | ||
| 110 | + ] | ||
| 111 | + | ||
| 112 | + samples: list[str] = [] | ||
| 113 | + while len(samples) < n: | ||
| 114 | + s = f"{rng.choice(starters)} {rng.choice(verbs)} {rng.choice(objects)} {rng.choice(endings)}" | ||
| 115 | + # Add a little variation | ||
| 116 | + if rng.random() < 0.25: | ||
| 117 | + s = s.replace("the ", "our ", 1) | ||
| 118 | + if rng.random() < 0.20: | ||
| 119 | + s = s.replace(".", "!") | ||
| 120 | + samples.append(s) | ||
| 121 | + return samples | ||
| 122 | + | ||
| 123 | + | ||
| 124 | +@dataclass(frozen=True) | ||
| 125 | +class Timing: | ||
| 126 | + text: str | ||
| 127 | + seconds: float | ||
| 128 | + ok: bool | ||
| 129 | + error: str | None | ||
| 130 | + sample_out: str | None | ||
| 131 | + | ||
| 132 | + | ||
| 133 | +def main() -> int: | ||
| 134 | + p = argparse.ArgumentParser( | ||
| 135 | + description="Benchmark Baidu texttrans latency with random samples." | ||
| 136 | + ) | ||
| 137 | + p.add_argument("--n", type=int, default=15, help="Number of samples (10-20 suggested).") | ||
| 138 | + p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.") | ||
| 139 | + p.add_argument("--from", dest="from_lang", default="en", help="Source language code.") | ||
| 140 | + p.add_argument("--to", dest="to_lang", default="zh", help="Target language code.") | ||
| 141 | + p.add_argument("--timeout", type=float, default=20.0, help="HTTP timeout seconds.") | ||
| 142 | + p.add_argument( | ||
| 143 | + "--print-first", | ||
| 144 | + type=int, | ||
| 145 | + default=5, | ||
| 146 | + help="Print first N translations for sanity check.", | ||
| 147 | + ) | ||
| 148 | + args = p.parse_args() | ||
| 149 | + | ||
| 150 | + if args.n <= 0: | ||
| 151 | + raise SystemExit("--n must be positive") | ||
| 152 | + | ||
| 153 | + api_key = os.environ.get("BAIDU_API_KEY", "").strip() | ||
| 154 | + secret_key = os.environ.get("BAIDU_SECRET_KEY", "").strip() | ||
| 155 | + if not api_key or not secret_key: | ||
| 156 | + raise SystemExit( | ||
| 157 | + "Missing BAIDU_API_KEY / BAIDU_SECRET_KEY env vars. " | ||
| 158 | + "Example:\n" | ||
| 159 | + " export BAIDU_API_KEY='...'\n" | ||
| 160 | + " export BAIDU_SECRET_KEY='...'\n" | ||
| 161 | + ) | ||
| 162 | + | ||
| 163 | + print("Baidu credentials:") | ||
| 164 | + print(f" BAIDU_API_KEY={_mask(api_key)}") | ||
| 165 | + print(f" BAIDU_SECRET_KEY={_mask(secret_key)}") | ||
| 166 | + print() | ||
| 167 | + | ||
| 168 | + t0 = _now() | ||
| 169 | + token = get_access_token(api_key, secret_key, args.timeout) | ||
| 170 | + token_s = _now() - t0 | ||
| 171 | + print(f"Access token acquired in {token_s:.3f}s") | ||
| 172 | + print() | ||
| 173 | + | ||
| 174 | + samples = random_english_samples(args.n, args.seed) | ||
| 175 | + timings: list[Timing] = [] | ||
| 176 | + | ||
| 177 | + for i, text in enumerate(samples, start=1): | ||
| 178 | + start = _now() | ||
| 179 | + try: | ||
| 180 | + out = translate_one( | ||
| 181 | + access_token=token, | ||
| 182 | + q=text, | ||
| 183 | + from_lang=args.from_lang, | ||
| 184 | + to_lang=args.to_lang, | ||
| 185 | + timeout_s=args.timeout, | ||
| 186 | + ) | ||
| 187 | + elapsed = _now() - start | ||
| 188 | + | ||
| 189 | + # Try to extract a plausible translation field, but keep whole JSON as fallback. | ||
| 190 | + translated: str | None = None | ||
| 191 | + if isinstance(out, dict): | ||
| 192 | + result = out.get("result") | ||
| 193 | + if isinstance(result, dict): | ||
| 194 | + translated = result.get("trans_result") | ||
| 195 | + if isinstance(translated, list) and translated: | ||
| 196 | + # Baidu often returns list of dicts with dst/src | ||
| 197 | + first = translated[0] | ||
| 198 | + if isinstance(first, dict): | ||
| 199 | + translated = first.get("dst") | ||
| 200 | + timings.append( | ||
| 201 | + Timing( | ||
| 202 | + text=text, | ||
| 203 | + seconds=elapsed, | ||
| 204 | + ok=True, | ||
| 205 | + error=None, | ||
| 206 | + sample_out=translated if isinstance(translated, str) else None, | ||
| 207 | + ) | ||
| 208 | + ) | ||
| 209 | + except Exception as e: # noqa: BLE001 - benchmark script, keep it simple | ||
| 210 | + elapsed = _now() - start | ||
| 211 | + timings.append(Timing(text=text, seconds=elapsed, ok=False, error=str(e), sample_out=None)) | ||
| 212 | + | ||
| 213 | + print(f"[{i:02d}/{len(samples):02d}] {timings[-1].seconds:.3f}s {'OK' if timings[-1].ok else 'ERR'}") | ||
| 214 | + | ||
| 215 | + ok = [t for t in timings if t.ok] | ||
| 216 | + errs = [t for t in timings if not t.ok] | ||
| 217 | + latencies = [t.seconds for t in ok] | ||
| 218 | + | ||
| 219 | + print() | ||
| 220 | + print("=== Summary ===") | ||
| 221 | + print(f"token_time_s: {token_s:.3f}") | ||
| 222 | + print(f"requests_total: {len(timings)}") | ||
| 223 | + print(f"requests_ok: {len(ok)}") | ||
| 224 | + print(f"requests_err: {len(errs)}") | ||
| 225 | + if latencies: | ||
| 226 | + print(f"total_time_s: {sum(latencies):.3f}") | ||
| 227 | + print(f"avg_s: {statistics.mean(latencies):.3f}") | ||
| 228 | + print(f"p50_s: {statistics.median(latencies):.3f}") | ||
| 229 | + print(f"min_s: {min(latencies):.3f}") | ||
| 230 | + print(f"max_s: {max(latencies):.3f}") | ||
| 231 | + else: | ||
| 232 | + print("No successful requests; cannot compute latency stats.") | ||
| 233 | + | ||
| 234 | + if args.print_first > 0: | ||
| 235 | + print() | ||
| 236 | + print("=== Samples (first N) ===") | ||
| 237 | + shown = 0 | ||
| 238 | + for t in ok: | ||
| 239 | + print(f"- IN: {t.text}") | ||
| 240 | + if t.sample_out: | ||
| 241 | + print(f" OUT: {t.sample_out}") | ||
| 242 | + else: | ||
| 243 | + print(" OUT: (could not parse translation field; see raw JSON by editing script if needed)") | ||
| 244 | + shown += 1 | ||
| 245 | + if shown >= args.print_first: | ||
| 246 | + break | ||
| 247 | + | ||
| 248 | + if errs: | ||
| 249 | + print() | ||
| 250 | + print("=== Errors (first 3) ===") | ||
| 251 | + for t in errs[:3]: | ||
| 252 | + print(f"- {t.seconds:.3f}s: {t.error}") | ||
| 253 | + | ||
| 254 | + return 0 if not errs else 2 | ||
| 255 | + | ||
| 256 | + | ||
| 257 | +if __name__ == "__main__": | ||
| 258 | + raise SystemExit(main()) |