Commit 628ff04f9cbe3a7bbcaa9f3e5f92d378b0b2774d

Authored by tangwang
1 parent b2dff38f

config-with-reranker.yaml

artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid 0 → 100644
... ... @@ -0,0 +1 @@
  1 +2900923
... ...
config/config-with-reranker.yaml 0 → 100644
... ... @@ -0,0 +1,673 @@
  1 +runtime:
  2 + environment: prod
  3 + index_namespace: ''
  4 + api_host: 0.0.0.0
  5 + api_port: 6002
  6 + indexer_host: 0.0.0.0
  7 + indexer_port: 6004
  8 + embedding_host: 0.0.0.0
  9 + embedding_port: 6005
  10 + embedding_text_port: 6005
  11 + embedding_image_port: 6008
  12 + translator_host: 0.0.0.0
  13 + translator_port: 6006
  14 + reranker_host: 0.0.0.0
  15 + reranker_port: 6007
  16 +infrastructure:
  17 + elasticsearch:
  18 + host: http://localhost:9200
  19 + username: null
  20 + password: null
  21 + redis:
  22 + host: localhost
  23 + port: 6479
  24 + snapshot_db: 0
  25 + password: null
  26 + socket_timeout: 1
  27 + socket_connect_timeout: 1
  28 + retry_on_timeout: false
  29 + cache_expire_days: 720
  30 + embedding_cache_prefix: embedding
  31 + anchor_cache_prefix: product_anchors
  32 + anchor_cache_expire_days: 30
  33 + database:
  34 + host: null
  35 + port: 3306
  36 + database: null
  37 + username: null
  38 + password: null
  39 + secrets:
  40 + dashscope_api_key: null
  41 + deepl_auth_key: null
  42 +es_index_name: search_products
  43 +indexes: []
  44 +assets:
  45 + query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
  46 +product_enrich:
  47 + max_workers: 40
  48 +suggestion:
  49 + sat_recall_min: 40
  50 + sat_recall_cap: 100
  51 +search_evaluation:
  52 + artifact_root: artifacts/search_evaluation
  53 + queries_file: scripts/evaluation/queries/queries.txt
  54 + default_dataset_id: core_queries
  55 + datasets:
  56 + - dataset_id: core_queries
  57 + display_name: Core Queries
  58 + description: Legacy baseline evaluation set from queries.txt
  59 + query_file: scripts/evaluation/queries/queries.txt
  60 + tenant_id: '163'
  61 + language: en
  62 + enabled: true
  63 + - dataset_id: clothing_top771
  64 + display_name: Clothing Filtered 771
  65 + description: 771 clothing / shoes / accessories queries filtered from top1k
  66 + query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered
  67 + tenant_id: '163'
  68 + language: en
  69 + enabled: true
  70 + eval_log_dir: logs
  71 + default_tenant_id: '163'
  72 + search_base_url: ''
  73 + web_host: 0.0.0.0
  74 + web_port: 6010
  75 + judge_model: qwen3.6-plus
  76 + judge_enable_thinking: false
  77 + judge_dashscope_batch: false
  78 + intent_model: qwen3.6-plus
  79 + intent_enable_thinking: true
  80 + judge_batch_completion_window: 24h
  81 + judge_batch_poll_interval_sec: 10.0
  82 + build_search_depth: 1000
  83 + build_rerank_depth: 10000
  84 + annotate_search_top_k: 120
  85 + annotate_rerank_top_k: 200
  86 + batch_top_k: 100
  87 + audit_top_k: 100
  88 + audit_limit_suspicious: 5
  89 + default_language: en
  90 + search_recall_top_k: 200
  91 + rerank_high_threshold: 0.5
  92 + rerank_high_skip_count: 1000
  93 + rebuild_llm_batch_size: 50
  94 + rebuild_min_llm_batches: 10
  95 + rebuild_max_llm_batches: 40
  96 + rebuild_irrelevant_stop_ratio: 0.799
  97 + rebuild_irrel_low_combined_stop_ratio: 0.959
  98 + rebuild_irrelevant_stop_streak: 3
  99 +es_settings:
  100 + number_of_shards: 1
  101 + number_of_replicas: 0
  102 + refresh_interval: 30s
  103 +
  104 +# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}
  105 +field_boosts:
  106 + title: 3.0
  107 + # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重
  108 + qanchors: 1.0
  109 + enriched_tags: 1.0
  110 + enriched_attributes.value: 1.5
  111 + category_name_text: 2.0
  112 + category_path: 2.0
  113 + keywords: 2.0
  114 + tags: 2.0
  115 + option1_values: 1.7
  116 + option2_values: 1.7
  117 + option3_values: 1.7
  118 + brief: 1.0
  119 + description: 1.0
  120 + vendor: 1.0
  121 +
  122 +query_config:
  123 + supported_languages:
  124 + - zh
  125 + - en
  126 + default_language: en
  127 + enable_text_embedding: true
  128 + enable_query_rewrite: true
  129 +
  130 + zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh
  131 + en_to_zh_model: nllb-200-distilled-600m
  132 + default_translation_model: nllb-200-distilled-600m
  133 + # 源语种不在 index_languages时翻译质量比较重要,因此单独配置
  134 + zh_to_en_model__source_not_in_index: deepl
  135 + en_to_zh_model__source_not_in_index: deepl
  136 + default_translation_model__source_not_in_index: deepl
  137 +
  138 + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)
  139 + translation_embedding_wait_budget_ms_source_in_index: 300
  140 + translation_embedding_wait_budget_ms_source_not_in_index: 400
  141 + style_intent:
  142 + enabled: true
  143 + selected_sku_boost: 1.2
  144 + color_dictionary_path: config/dictionaries/style_intent_color.csv
  145 + size_dictionary_path: config/dictionaries/style_intent_size.csv
  146 + dimension_aliases:
  147 + color:
  148 + - color
  149 + - colors
  150 + - colour
  151 + - colours
  152 + - 颜色
  153 + - 色
  154 + - 色系
  155 + size:
  156 + - size
  157 + - sizes
  158 + - sizing
  159 + - 尺码
  160 + - 尺寸
  161 + - 码数
  162 + - 号码
  163 + - 码
  164 + product_title_exclusion:
  165 + enabled: true
  166 + dictionary_path: config/dictionaries/product_title_exclusion.tsv
  167 + search_fields:
  168 + # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}
  169 + multilingual_fields:
  170 + - title
  171 + - keywords
  172 + - qanchors
  173 + - enriched_tags
  174 + - enriched_attributes.value
  175 + # - enriched_taxonomy_attributes.value
  176 + - option1_values
  177 + - option2_values
  178 + - option3_values
  179 + - category_path
  180 + - category_name_text
  181 + # - brief
  182 + # - description
  183 + # - vendor
  184 + # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values
  185 +
  186 + shared_fields: null
  187 + core_multilingual_fields:
  188 + - title
  189 + - qanchors
  190 + - category_name_text
  191 +
  192 + # 文本召回(主查询 + 翻译查询)
  193 + text_query_strategy:
  194 + base_minimum_should_match: 60%
  195 + translation_minimum_should_match: 60%
  196 + translation_boost: 0.75
  197 + tie_breaker_base_query: 0.5
  198 + best_fields_boost: 2.0
  199 + best_fields:
  200 + title: 4.0
  201 + qanchors: 3.0
  202 + category_name_text: 2.0
  203 + phrase_fields:
  204 + title: 5.0
  205 + qanchors: 4.0
  206 + phrase_match_boost: 3.0
  207 + text_embedding_field: title_embedding
  208 + image_embedding_field: image_embedding.vector
  209 +
  210 + # null表示返回所有字段,[]表示不返回任何字段
  211 + source_fields:
  212 + - spu_id
  213 + - handle
  214 + - title
  215 + - brief
  216 + - description
  217 + - vendor
  218 + - category_name
  219 + - category_name_text
  220 + - category_path
  221 + - category_id
  222 + - category_level
  223 + - category1_name
  224 + - category2_name
  225 + - category3_name
  226 + # - tags
  227 + # - keywords
  228 + # - qanchors
  229 + # - enriched_tags
  230 + - enriched_attributes
  231 + - enriched_taxonomy_attributes
  232 +
  233 + - min_price
  234 + - compare_at_price
  235 + - image_url
  236 + - sku_prices
  237 + - sku_weights
  238 + - sku_weight_units
  239 + - total_inventory
  240 + - option1_name
  241 + - option1_values
  242 + - option2_name
  243 + - option2_values
  244 + - option3_name
  245 + - option3_values
  246 + - specifications
  247 + - skus
  248 +
  249 + # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates)
  250 + knn_text_boost: 4
  251 + knn_image_boost: 4
  252 + knn_text_k: 160
  253 + knn_text_num_candidates: 560 # k * 3.4
  254 + knn_text_k_long: 400
  255 + knn_text_num_candidates_long: 1200
  256 + knn_image_k: 400
  257 + knn_image_num_candidates: 1200
  258 +
  259 +function_score:
  260 + score_mode: sum
  261 + boost_mode: multiply
  262 + functions: []
  263 +coarse_rank:
  264 + enabled: true
  265 + input_window: 480
  266 + output_window: 160
  267 + fusion:
  268 + es_bias: 10.0
  269 + es_exponent: 0.05
  270 + text_bias: 0.1
  271 + text_exponent: 0.35
  272 + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
  273 + # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣
  274 + text_translation_weight: 0.8
  275 + knn_text_weight: 1.0
  276 + knn_image_weight: 2.0
  277 + knn_tie_breaker: 0.3
  278 + knn_bias: 0.6
  279 + knn_exponent: 0.4
  280 + knn_text_bias: 0.2
  281 + knn_text_exponent: 0.0
  282 + knn_image_bias: 0.2
  283 + knn_image_exponent: 0.0
  284 +fine_rank:
  285 + enabled: false # false 时保序透传
  286 + input_window: 160
  287 + output_window: 80
  288 + timeout_sec: 10.0
  289 + rerank_query_template: '{query}'
  290 + rerank_doc_template: '{title}'
  291 + service_profile: fine
  292 +rerank:
  293 + enabled: true # false 时保序透传
  294 + rerank_window: 160
  295 + exact_knn_rescore_enabled: true
  296 + exact_knn_rescore_window: 160
  297 + timeout_sec: 15.0
  298 + weight_es: 0.4
  299 + weight_ai: 0.6
  300 + rerank_query_template: '{query}'
  301 + rerank_doc_template: '{title}'
  302 + service_profile: default
  303 + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn)
  304 + # 其中 knn_score 先做一层 dis_max:
  305 + # max(knn_text_weight * text_knn, knn_image_weight * image_knn)
  306 + # + knn_tie_breaker * 另一侧较弱信号
  307 + fusion:
  308 + es_bias: 10.0
  309 + es_exponent: 0.05
  310 + rerank_bias: 0.1
  311 + rerank_exponent: 1.15
  312 + fine_bias: 0.1
  313 + fine_exponent: 1.0
  314 + text_bias: 0.1
  315 + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合)
  316 + text_exponent: 0.25
  317 + text_translation_weight: 0.8
  318 + knn_text_weight: 1.0
  319 + knn_image_weight: 2.0
  320 + knn_tie_breaker: 0.3
  321 + knn_bias: 0.6
  322 + knn_exponent: 0.4
  323 +
  324 +services:
  325 + translation:
  326 + service_url: http://127.0.0.1:6006
  327 + default_model: nllb-200-distilled-600m
  328 + default_scene: general
  329 + timeout_sec: 10.0
  330 + cache:
  331 + ttl_seconds: 62208000
  332 + sliding_expiration: true
  333 + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups)
  334 + # Higher tier = better quality. Multiple models may share one tier (同级).
  335 + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
  336 + enable_model_quality_tier_cache: true
  337 + model_quality_tiers:
  338 + deepl: 30
  339 + qwen-mt: 30
  340 + llm: 30
  341 + nllb-200-distilled-600m: 20
  342 + opus-mt-zh-en: 10
  343 + opus-mt-en-zh: 10
  344 + capabilities:
  345 + qwen-mt:
  346 + enabled: true
  347 + backend: qwen_mt
  348 + model: qwen-mt-flash
  349 + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
  350 + timeout_sec: 10.0
  351 + use_cache: true
  352 + llm:
  353 + enabled: true
  354 + backend: llm
  355 + model: qwen-flash
  356 + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
  357 + timeout_sec: 30.0
  358 + use_cache: true
  359 + deepl:
  360 + enabled: true
  361 + backend: deepl
  362 + api_url: https://api.deepl.com/v2/translate
  363 + timeout_sec: 10.0
  364 + glossary_id: ''
  365 + use_cache: true
  366 + nllb-200-distilled-600m:
  367 + enabled: true
  368 + backend: local_nllb
  369 + model_id: facebook/nllb-200-distilled-600M
  370 + model_dir: ./models/translation/facebook/nllb-200-distilled-600M
  371 + ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16
  372 + ct2_compute_type: float16
  373 + ct2_conversion_quantization: float16
  374 + ct2_auto_convert: true
  375 + ct2_inter_threads: 4
  376 + ct2_intra_threads: 0
  377 + ct2_max_queued_batches: 32
  378 + ct2_batch_type: examples
  379 + ct2_decoding_length_mode: source
  380 + ct2_decoding_length_extra: 16
  381 + ct2_decoding_length_min: 32
  382 + device: cuda
  383 + torch_dtype: float16
  384 + batch_size: 64
  385 + max_input_length: 256
  386 + max_new_tokens: 96
  387 + num_beams: 4
  388 + use_cache: true
  389 + opus-mt-zh-en:
  390 + enabled: false
  391 + backend: local_marian
  392 + model_id: Helsinki-NLP/opus-mt-zh-en
  393 + model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en
  394 + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16
  395 + ct2_compute_type: float16
  396 + ct2_conversion_quantization: float16
  397 + ct2_auto_convert: true
  398 + ct2_inter_threads: 1
  399 + ct2_intra_threads: 0
  400 + ct2_max_queued_batches: 0
  401 + ct2_batch_type: examples
  402 + device: cuda
  403 + torch_dtype: float16
  404 + batch_size: 16
  405 + max_input_length: 256
  406 + max_new_tokens: 256
  407 + num_beams: 1
  408 + use_cache: true
  409 + opus-mt-en-zh:
  410 + enabled: false
  411 + backend: local_marian
  412 + model_id: Helsinki-NLP/opus-mt-en-zh
  413 + model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh
  414 + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16
  415 + ct2_compute_type: float16
  416 + ct2_conversion_quantization: float16
  417 + ct2_auto_convert: true
  418 + ct2_inter_threads: 1
  419 + ct2_intra_threads: 0
  420 + ct2_max_queued_batches: 0
  421 + ct2_batch_type: examples
  422 + device: cuda
  423 + torch_dtype: float16
  424 + batch_size: 16
  425 + max_input_length: 256
  426 + max_new_tokens: 256
  427 + num_beams: 1
  428 + use_cache: true
  429 + embedding:
  430 + provider: http
  431 + providers:
  432 + http:
  433 + text_base_url: http://127.0.0.1:6005
  434 + image_base_url: http://127.0.0.1:6008
  435 + backend: tei
  436 + backends:
  437 + tei:
  438 + base_url: http://127.0.0.1:8080
  439 + timeout_sec: 20
  440 + model_id: Qwen/Qwen3-Embedding-0.6B
  441 + local_st:
  442 + model_id: Qwen/Qwen3-Embedding-0.6B
  443 + device: cuda
  444 + batch_size: 32
  445 + normalize_embeddings: true
  446 + # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name)
  447 + # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中
  448 + # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。
  449 + image_backend: clip_as_service # clip_as_service | local_cnclip
  450 + image_backends:
  451 + clip_as_service:
  452 + server: grpc://127.0.0.1:51000
  453 + model_name: CN-CLIP/ViT-L-14
  454 + batch_size: 8
  455 + normalize_embeddings: true
  456 + local_cnclip:
  457 + model_name: ViT-L-14
  458 + device: null
  459 + batch_size: 8
  460 + normalize_embeddings: true
  461 + rerank:
  462 + provider: http
  463 + providers:
  464 + http:
  465 + instances:
  466 + default:
  467 + base_url: http://127.0.0.1:6007
  468 + service_url: http://127.0.0.1:6007/rerank
  469 + fine:
  470 + base_url: http://127.0.0.1:6009
  471 + service_url: http://127.0.0.1:6009/rerank
  472 + request:
  473 + max_docs: 1000
  474 + normalize: true
  475 + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
  476 + default_instance: default
  477 + instances:
  478 + default:
  479 + host: 0.0.0.0
  480 + port: 6007
  481 + backend: bge
  482 + runtime_dir: ./.runtime/reranker/default
  483 + fine:
  484 + host: 0.0.0.0
  485 + port: 6009
  486 + backend: bge
  487 + runtime_dir: ./.runtime/reranker/fine
  488 + backends:
  489 + bge:
  490 + model_name: BAAI/bge-reranker-v2-m3
  491 + device: null
  492 + use_fp16: true
  493 + batch_size: 80
  494 + max_length: 160
  495 + cache_dir: ./model_cache
  496 + enable_warmup: true
  497 + jina_reranker_v3:
  498 + model_name: jinaai/jina-reranker-v3
  499 + device: null
  500 + dtype: float16
  501 + batch_size: 64
  502 + max_doc_length: 160
  503 + max_query_length: 64
  504 + sort_by_doc_length: true
  505 + cache_dir: ./model_cache
  506 + trust_remote_code: true
  507 + qwen3_vllm:
  508 + model_name: Qwen/Qwen3-Reranker-0.6B
  509 + engine: vllm
  510 + max_model_len: 256
  511 + tensor_parallel_size: 1
  512 + gpu_memory_utilization: 0.2
  513 + dtype: float16
  514 + enable_prefix_caching: true
  515 + enforce_eager: false
  516 + infer_batch_size: 100
  517 + sort_by_doc_length: true
  518 +
  519 + # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct)
  520 + instruction_format: standard # compact standard
  521 + # instruction: "Given a query, score the product for relevance"
  522 + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
  523 + # instruction: "rank products by given query, category match first"
  524 + # instruction: "Rank products by query relevance, prioritizing category match"
  525 + # instruction: "Rank products by query relevance, prioritizing category and style match"
  526 + # instruction: "Rank by query relevance, prioritize category & style"
  527 + # instruction: "Relevance ranking: category & style match first"
  528 + # instruction: "Score product relevance by query with category & style match prioritized"
  529 + # instruction: "Rank products by query with category & style match prioritized"
  530 + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
  531 + instruction: rank products by given query
  532 +
  533 + # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score
  534 + # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。
  535 + qwen3_vllm_score:
  536 + model_name: Qwen/Qwen3-Reranker-0.6B
  537 + # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false
  538 + use_original_qwen3_hf_overrides: true
  539 + # vllm_runner: "auto"
  540 + # vllm_convert: "auto"
  541 + # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
  542 + # hf_overrides: {}
  543 + engine: vllm
  544 + max_model_len: 172
  545 + tensor_parallel_size: 1
  546 + gpu_memory_utilization: 0.15
  547 + dtype: float16
  548 + enable_prefix_caching: true
  549 + enforce_eager: false
  550 + infer_batch_size: 80
  551 + sort_by_doc_length: true
  552 + # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
  553 + instruction_format: standard # compact standard
  554 + # instruction: "Rank products by query with category & style match prioritized"
  555 + # instruction: "Given a shopping query, rank products by relevance"
  556 + instruction: Rank products by query with category & style match prioritized
  557 + qwen3_transformers:
  558 + model_name: Qwen/Qwen3-Reranker-0.6B
  559 + instruction: rank products by given query
  560 + max_length: 8192
  561 + batch_size: 64
  562 + use_fp16: true
  563 + attn_implementation: sdpa
  564 + qwen3_transformers_packed:
  565 + model_name: Qwen/Qwen3-Reranker-0.6B
  566 + instruction: Rank products by query with category & style match prioritized
  567 + max_model_len: 256
  568 + max_doc_len: 160
  569 + max_docs_per_pack: 0
  570 + use_fp16: true
  571 + sort_by_doc_length: true
  572 + attn_implementation: eager
  573 + qwen3_gguf:
  574 + repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
  575 + filename: '*Q8_0.gguf'
  576 + cache_dir: ./model_cache
  577 + local_dir: ./models/reranker/qwen3-reranker-4b-gguf
  578 + instruction: Rank products by query with category & style match prioritized
  579 + n_ctx: 512
  580 + n_batch: 512
  581 + n_ubatch: 512
  582 + n_gpu_layers: 999
  583 + main_gpu: 0
  584 + n_threads: 2
  585 + n_threads_batch: 4
  586 + flash_attn: true
  587 + offload_kqv: true
  588 + use_mmap: true
  589 + use_mlock: false
  590 + infer_batch_size: 8
  591 + sort_by_doc_length: true
  592 + length_sort_mode: char
  593 + enable_warmup: true
  594 + verbose: false
  595 + qwen3_gguf_06b:
  596 + repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF
  597 + filename: qwen3-reranker-0.6b-q8_0.gguf
  598 + cache_dir: ./model_cache
  599 + local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
  600 + instruction: Rank products by query with category & style match prioritized
  601 + n_ctx: 256
  602 + n_batch: 256
  603 + n_ubatch: 256
  604 + n_gpu_layers: 999
  605 + main_gpu: 0
  606 + n_threads: 2
  607 + n_threads_batch: 4
  608 + flash_attn: true
  609 + offload_kqv: true
  610 + use_mmap: true
  611 + use_mlock: false
  612 + infer_batch_size: 32
  613 + sort_by_doc_length: true
  614 + length_sort_mode: char
  615 + reuse_query_state: false
  616 + enable_warmup: true
  617 + verbose: false
  618 + dashscope_rerank:
  619 + model_name: qwen3-rerank
  620 + endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
  621 + api_key_env: RERANK_DASHSCOPE_API_KEY_CN
  622 + timeout_sec: 10.0
  623 + top_n_cap: 0 # 0 表示 top_n=当前请求文档数
  624 + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断)
  625 + instruct: Given a shopping query, rank product titles by relevance
  626 + max_retries: 2
  627 + retry_backoff_sec: 0.2
  628 +
  629 +spu_config:
  630 + enabled: true
  631 + spu_field: spu_id
  632 + inner_hits_size: 10
  633 + # 配置哪些option维度参与检索(进索引、以及在线搜索)
  634 + # 格式为list,选择option1/option2/option3中的一个或多个
  635 + searchable_option_dimensions:
  636 + - option1
  637 + - option2
  638 + - option3
  639 +
  640 +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选)
  641 +# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
  642 +tenant_config:
  643 + default:
  644 + primary_language: en
  645 + index_languages:
  646 + - en
  647 + - zh
  648 + tenants:
  649 + '1':
  650 + primary_language: zh
  651 + index_languages:
  652 + - zh
  653 + - en
  654 + '2':
  655 + primary_language: en
  656 + index_languages:
  657 + - en
  658 + - zh
  659 + '3':
  660 + primary_language: zh
  661 + index_languages:
  662 + - zh
  663 + - en
  664 + '162':
  665 + primary_language: zh
  666 + index_languages:
  667 + - zh
  668 + - en
  669 + '170':
  670 + primary_language: en
  671 + index_languages:
  672 + - en
  673 + - zh
... ...
scripts/manual/baidu_translate_benchmark.py 0 → 100644
... ... @@ -0,0 +1,258 @@
  1 +import argparse
  2 +import json
  3 +import os
  4 +import random
  5 +import statistics
  6 +import time
  7 +from dataclasses import dataclass
  8 +
  9 +import requests
  10 +
  11 +
  12 +def _now() -> float:
  13 + return time.perf_counter()
  14 +
  15 +
  16 +def _mask(s: str, keep: int = 3) -> str:
  17 + if not s:
  18 + return ""
  19 + if len(s) <= keep * 2:
  20 + return "*" * len(s)
  21 + return f"{s[:keep]}***{s[-keep:]}"
  22 +
  23 +
  24 +def get_access_token(api_key: str, secret_key: str, timeout_s: float) -> str:
  25 + url = "https://aip.baidubce.com/oauth/2.0/token"
  26 + params = {
  27 + "grant_type": "client_credentials",
  28 + "client_id": api_key,
  29 + "client_secret": secret_key,
  30 + }
  31 + r = requests.post(url, params=params, timeout=timeout_s)
  32 + r.raise_for_status()
  33 + data = r.json()
  34 + token = data.get("access_token")
  35 + if not token:
  36 + raise RuntimeError(f"no access_token in response: {data}")
  37 + return str(token)
  38 +
  39 +
  40 +def translate_one(
  41 + *,
  42 + access_token: str,
  43 + q: str,
  44 + from_lang: str,
  45 + to_lang: str,
  46 + timeout_s: float,
  47 +) -> dict:
  48 + url = (
  49 + "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1"
  50 + + "?access_token="
  51 + + access_token
  52 + )
  53 + payload = json.dumps({"from": from_lang, "to": to_lang, "q": q}, ensure_ascii=False)
  54 + headers = {"Content-Type": "application/json", "Accept": "application/json"}
  55 + r = requests.post(url, headers=headers, data=payload.encode("utf-8"), timeout=timeout_s)
  56 + r.raise_for_status()
  57 + return r.json()
  58 +
  59 +
  60 +def random_english_samples(n: int, seed: int | None) -> list[str]:
  61 + rng = random.Random(seed)
  62 +
  63 + starters = [
  64 + "Hello",
  65 + "Please",
  66 + "Could you",
  67 + "I wonder if you can",
  68 + "Let's",
  69 + "We should",
  70 + "It's important to",
  71 + "Don't forget to",
  72 + "I need to",
  73 + "Can we",
  74 + ]
  75 + verbs = [
  76 + "check",
  77 + "compare",
  78 + "translate",
  79 + "summarize",
  80 + "review",
  81 + "optimize",
  82 + "measure",
  83 + "confirm",
  84 + "fix",
  85 + "verify",
  86 + ]
  87 + objects = [
  88 + "the latest order status",
  89 + "this short sentence",
  90 + "the product title and description",
  91 + "our search results",
  92 + "the API response payload",
  93 + "the translation quality",
  94 + "the page load time",
  95 + "a few random examples",
  96 + "the error message",
  97 + "the performance metrics",
  98 + ]
  99 + endings = [
  100 + "today.",
  101 + "right now.",
  102 + "before we ship.",
  103 + "in the next minute.",
  104 + "without changing the meaning.",
  105 + "as soon as possible.",
  106 + "for a quick smoke test.",
  107 + "with a clear output.",
  108 + "and report the timing.",
  109 + "to ensure everything works.",
  110 + ]
  111 +
  112 + samples: list[str] = []
  113 + while len(samples) < n:
  114 + s = f"{rng.choice(starters)} {rng.choice(verbs)} {rng.choice(objects)} {rng.choice(endings)}"
  115 + # Add a little variation
  116 + if rng.random() < 0.25:
  117 + s = s.replace("the ", "our ", 1)
  118 + if rng.random() < 0.20:
  119 + s = s.replace(".", "!")
  120 + samples.append(s)
  121 + return samples
  122 +
  123 +
  124 +@dataclass(frozen=True)
  125 +class Timing:
  126 + text: str
  127 + seconds: float
  128 + ok: bool
  129 + error: str | None
  130 + sample_out: str | None
  131 +
  132 +
  133 +def main() -> int:
  134 + p = argparse.ArgumentParser(
  135 + description="Benchmark Baidu texttrans latency with random samples."
  136 + )
  137 + p.add_argument("--n", type=int, default=15, help="Number of samples (10-20 suggested).")
  138 + p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
  139 + p.add_argument("--from", dest="from_lang", default="en", help="Source language code.")
  140 + p.add_argument("--to", dest="to_lang", default="zh", help="Target language code.")
  141 + p.add_argument("--timeout", type=float, default=20.0, help="HTTP timeout seconds.")
  142 + p.add_argument(
  143 + "--print-first",
  144 + type=int,
  145 + default=5,
  146 + help="Print first N translations for sanity check.",
  147 + )
  148 + args = p.parse_args()
  149 +
  150 + if args.n <= 0:
  151 + raise SystemExit("--n must be positive")
  152 +
  153 + api_key = os.environ.get("BAIDU_API_KEY", "").strip()
  154 + secret_key = os.environ.get("BAIDU_SECRET_KEY", "").strip()
  155 + if not api_key or not secret_key:
  156 + raise SystemExit(
  157 + "Missing BAIDU_API_KEY / BAIDU_SECRET_KEY env vars. "
  158 + "Example:\n"
  159 + " export BAIDU_API_KEY='...'\n"
  160 + " export BAIDU_SECRET_KEY='...'\n"
  161 + )
  162 +
  163 + print("Baidu credentials:")
  164 + print(f" BAIDU_API_KEY={_mask(api_key)}")
  165 + print(f" BAIDU_SECRET_KEY={_mask(secret_key)}")
  166 + print()
  167 +
  168 + t0 = _now()
  169 + token = get_access_token(api_key, secret_key, args.timeout)
  170 + token_s = _now() - t0
  171 + print(f"Access token acquired in {token_s:.3f}s")
  172 + print()
  173 +
  174 + samples = random_english_samples(args.n, args.seed)
  175 + timings: list[Timing] = []
  176 +
  177 + for i, text in enumerate(samples, start=1):
  178 + start = _now()
  179 + try:
  180 + out = translate_one(
  181 + access_token=token,
  182 + q=text,
  183 + from_lang=args.from_lang,
  184 + to_lang=args.to_lang,
  185 + timeout_s=args.timeout,
  186 + )
  187 + elapsed = _now() - start
  188 +
  189 + # Try to extract a plausible translation field, but keep whole JSON as fallback.
  190 + translated: str | None = None
  191 + if isinstance(out, dict):
  192 + result = out.get("result")
  193 + if isinstance(result, dict):
  194 + translated = result.get("trans_result")
  195 + if isinstance(translated, list) and translated:
  196 + # Baidu often returns list of dicts with dst/src
  197 + first = translated[0]
  198 + if isinstance(first, dict):
  199 + translated = first.get("dst")
  200 + timings.append(
  201 + Timing(
  202 + text=text,
  203 + seconds=elapsed,
  204 + ok=True,
  205 + error=None,
  206 + sample_out=translated if isinstance(translated, str) else None,
  207 + )
  208 + )
  209 + except Exception as e: # noqa: BLE001 - benchmark script, keep it simple
  210 + elapsed = _now() - start
  211 + timings.append(Timing(text=text, seconds=elapsed, ok=False, error=str(e), sample_out=None))
  212 +
  213 + print(f"[{i:02d}/{len(samples):02d}] {timings[-1].seconds:.3f}s {'OK' if timings[-1].ok else 'ERR'}")
  214 +
  215 + ok = [t for t in timings if t.ok]
  216 + errs = [t for t in timings if not t.ok]
  217 + latencies = [t.seconds for t in ok]
  218 +
  219 + print()
  220 + print("=== Summary ===")
  221 + print(f"token_time_s: {token_s:.3f}")
  222 + print(f"requests_total: {len(timings)}")
  223 + print(f"requests_ok: {len(ok)}")
  224 + print(f"requests_err: {len(errs)}")
  225 + if latencies:
  226 + print(f"total_time_s: {sum(latencies):.3f}")
  227 + print(f"avg_s: {statistics.mean(latencies):.3f}")
  228 + print(f"p50_s: {statistics.median(latencies):.3f}")
  229 + print(f"min_s: {min(latencies):.3f}")
  230 + print(f"max_s: {max(latencies):.3f}")
  231 + else:
  232 + print("No successful requests; cannot compute latency stats.")
  233 +
  234 + if args.print_first > 0:
  235 + print()
  236 + print("=== Samples (first N) ===")
  237 + shown = 0
  238 + for t in ok:
  239 + print(f"- IN: {t.text}")
  240 + if t.sample_out:
  241 + print(f" OUT: {t.sample_out}")
  242 + else:
  243 + print(" OUT: (could not parse translation field; see raw JSON by editing script if needed)")
  244 + shown += 1
  245 + if shown >= args.print_first:
  246 + break
  247 +
  248 + if errs:
  249 + print()
  250 + print("=== Errors (first 3) ===")
  251 + for t in errs[:3]:
  252 + print(f"- {t.seconds:.3f}s: {t.error}")
  253 +
  254 + return 0 if not errs else 2
  255 +
  256 +
  257 +if __name__ == "__main__":
  258 + raise SystemExit(main())
... ...