config-with-reranker.yaml

tangwang
1 parent b2dff38f
Showing 3 changed files with 932 additions and 0 deletions Show diff stats
artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid
config/config-with-reranker.yaml
scripts/manual/baidu_translate_benchmark.py
@@ -0,0 +1 @@
+2900923
@@ -0,0 +1,673 @@
+runtime:
+  environment: prod
+  index_namespace: ''
+  api_host: 0.0.0.0
+  api_port: 6002
+  indexer_host: 0.0.0.0
+  indexer_port: 6004
+  embedding_host: 0.0.0.0
+  embedding_port: 6005
+  embedding_text_port: 6005
+  embedding_image_port: 6008
+  translator_host: 0.0.0.0
+  translator_port: 6006
+  reranker_host: 0.0.0.0
+  reranker_port: 6007
+infrastructure:
+  elasticsearch:
+    host: http://localhost:9200
+    username: null
+    password: null
+  redis:
+    host: localhost
+    port: 6479
+    snapshot_db: 0
+    password: null
+    socket_timeout: 1
+    socket_connect_timeout: 1
+    retry_on_timeout: false
+    cache_expire_days: 720
+    embedding_cache_prefix: embedding
+    anchor_cache_prefix: product_anchors
+    anchor_cache_expire_days: 30
+  database:
+    host: null
+    port: 3306
+    database: null
+    username: null
+    password: null
+  secrets:
+    dashscope_api_key: null
+    deepl_auth_key: null
+es_index_name: search_products
+indexes: []
+assets:
+  query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict
+product_enrich:
+  max_workers: 40
+suggestion:
+  sat_recall_min: 40
+  sat_recall_cap: 100
+search_evaluation:
+  artifact_root: artifacts/search_evaluation
+  queries_file: scripts/evaluation/queries/queries.txt
+  default_dataset_id: core_queries
+  datasets:
+  - dataset_id: core_queries
+    display_name: Core Queries
+    description: Legacy baseline evaluation set from queries.txt
+    query_file: scripts/evaluation/queries/queries.txt
+    tenant_id: '163'
+    language: en
+    enabled: true
+  - dataset_id: clothing_top771
+    display_name: Clothing Filtered 771
+    description: 771 clothing / shoes / accessories queries filtered from top1k
+    query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered
+    tenant_id: '163'
+    language: en
+    enabled: true
+  eval_log_dir: logs
+  default_tenant_id: '163'
+  search_base_url: ''
+  web_host: 0.0.0.0
+  web_port: 6010
+  judge_model: qwen3.6-plus
+  judge_enable_thinking: false
+  judge_dashscope_batch: false
+  intent_model: qwen3.6-plus
+  intent_enable_thinking: true
+  judge_batch_completion_window: 24h
+  judge_batch_poll_interval_sec: 10.0
+  build_search_depth: 1000
+  build_rerank_depth: 10000
+  annotate_search_top_k: 120
+  annotate_rerank_top_k: 200
+  batch_top_k: 100
+  audit_top_k: 100
+  audit_limit_suspicious: 5
+  default_language: en
+  search_recall_top_k: 200
+  rerank_high_threshold: 0.5
+  rerank_high_skip_count: 1000
+  rebuild_llm_batch_size: 50
+  rebuild_min_llm_batches: 10
+  rebuild_max_llm_batches: 40
+  rebuild_irrelevant_stop_ratio: 0.799
+  rebuild_irrel_low_combined_stop_ratio: 0.959
+  rebuild_irrelevant_stop_streak: 3
+es_settings:
+  number_of_shards: 1
+  number_of_replicas: 0
+  refresh_interval: 30s
+
+# 统一按“字段基名”配置；查询时按实际检索语言动态拼接 .{lang}
+field_boosts:
+  title: 3.0
+  # qanchors enriched_tags 在 enriched_attributes.value中也存在，所以其实他的权重为自身权重+enriched_attributes.value的权重
+  qanchors: 1.0
+  enriched_tags: 1.0
+  enriched_attributes.value: 1.5
+  category_name_text: 2.0
+  category_path: 2.0
+  keywords: 2.0
+  tags: 2.0
+  option1_values: 1.7
+  option2_values: 1.7
+  option3_values: 1.7
+  brief: 1.0
+  description: 1.0
+  vendor: 1.0
+
+query_config:
+  supported_languages:
+  - zh
+  - en
+  default_language: en
+  enable_text_embedding: true
+  enable_query_rewrite: true
+
+  zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh
+  en_to_zh_model: nllb-200-distilled-600m
+  default_translation_model: nllb-200-distilled-600m
+  # 源语种不在 index_languages时翻译质量比较重要，因此单独配置
+  zh_to_en_model__source_not_in_index: deepl
+  en_to_zh_model__source_not_in_index: deepl
+  default_translation_model__source_not_in_index: deepl
+
+  # 查询解析阶段：翻译与 query 向量并发执行，共用同一等待预算（毫秒）
+  translation_embedding_wait_budget_ms_source_in_index: 300
+  translation_embedding_wait_budget_ms_source_not_in_index: 400
+  style_intent:
+    enabled: true
+    selected_sku_boost: 1.2
+    color_dictionary_path: config/dictionaries/style_intent_color.csv
+    size_dictionary_path: config/dictionaries/style_intent_size.csv
+    dimension_aliases:
+      color:
+      - color
+      - colors
+      - colour
+      - colours
+      - 颜色
+      - 色
+      - 色系
+      size:
+      - size
+      - sizes
+      - sizing
+      - 尺码
+      - 尺寸
+      - 码数
+      - 号码
+      - 码
+  product_title_exclusion:
+    enabled: true
+    dictionary_path: config/dictionaries/product_title_exclusion.tsv
+  search_fields:
+    # 统一按“字段基名”配置；查询时按实际检索语言动态拼接 .{lang}
+    multilingual_fields:
+    - title
+    - keywords
+    - qanchors
+    - enriched_tags
+    - enriched_attributes.value
+    # - enriched_taxonomy_attributes.value
+    - option1_values
+    - option2_values
+    - option3_values
+    - category_path
+    - category_name_text
+    # - brief
+    # - description
+    # - vendor
+    # shared_fields: 无语言后缀字段；示例: tags, option1_values, option2_values, option3_values
+
+    shared_fields: null
+    core_multilingual_fields:
+    - title
+    - qanchors
+    - category_name_text
+
+  # 文本召回（主查询 + 翻译查询）
+  text_query_strategy:
+    base_minimum_should_match: 60%
+    translation_minimum_should_match: 60%
+    translation_boost: 0.75
+    tie_breaker_base_query: 0.5
+    best_fields_boost: 2.0
+    best_fields:
+      title: 4.0
+      qanchors: 3.0
+      category_name_text: 2.0
+    phrase_fields:
+      title: 5.0
+      qanchors: 4.0
+    phrase_match_boost: 3.0
+  text_embedding_field: title_embedding
+  image_embedding_field: image_embedding.vector
+
+  # null表示返回所有字段，[]表示不返回任何字段
+  source_fields:
+  - spu_id
+  - handle
+  - title
+  - brief
+  - description
+  - vendor
+  - category_name
+  - category_name_text
+  - category_path
+  - category_id
+  - category_level
+  - category1_name
+  - category2_name
+  - category3_name
+  # - tags
+  # - keywords
+  # - qanchors
+  # - enriched_tags
+  - enriched_attributes
+  - enriched_taxonomy_attributes
+
+  - min_price
+  - compare_at_price
+  - image_url
+  - sku_prices
+  - sku_weights
+  - sku_weight_units
+  - total_inventory
+  - option1_name
+  - option1_values
+  - option2_name
+  - option2_values
+  - option3_name
+  - option3_values
+  - specifications
+  - skus
+
+  # KNN：文本向量与多模态（图片）向量各自 boost 与召回（k / num_candidates）
+  knn_text_boost: 4
+  knn_image_boost: 4
+  knn_text_k: 160
+  knn_text_num_candidates: 560 #  k * 3.4
+  knn_text_k_long: 400
+  knn_text_num_candidates_long: 1200
+  knn_image_k: 400
+  knn_image_num_candidates: 1200
+
+function_score:
+  score_mode: sum
+  boost_mode: multiply
+  functions: []
+coarse_rank:
+  enabled: true
+  input_window: 480
+  output_window: 160
+  fusion:
+    es_bias: 10.0
+    es_exponent: 0.05
+    text_bias: 0.1
+    text_exponent: 0.35
+    # base_query_trans_* 相对 base_query 的权重（见 search/rerank_client 中文本 dismax 融合）
+    # 因为es的打分已经给了trans进行了折扣，所以这里不再继续折扣
+    text_translation_weight: 0.8
+    knn_text_weight: 1.0
+    knn_image_weight: 2.0
+    knn_tie_breaker: 0.3
+    knn_bias: 0.6
+    knn_exponent: 0.4
+    knn_text_bias: 0.2
+    knn_text_exponent: 0.0
+    knn_image_bias: 0.2
+    knn_image_exponent: 0.0
+fine_rank:
+  enabled: false # false 时保序透传
+  input_window: 160
+  output_window: 80
+  timeout_sec: 10.0
+  rerank_query_template: '{query}'
+  rerank_doc_template: '{title}'
+  service_profile: fine
+rerank:
+  enabled: true # false 时保序透传
+  rerank_window: 160
+  exact_knn_rescore_enabled: true
+  exact_knn_rescore_window: 160
+  timeout_sec: 15.0
+  weight_es: 0.4
+  weight_ai: 0.6
+  rerank_query_template: '{query}'
+  rerank_doc_template: '{title}'
+  service_profile: default
+  # 乘法融合：fused = Π (max(score,0) + bias) ** exponent（es / rerank / fine / text / knn）
+  # 其中 knn_score 先做一层 dis_max：
+  #   max(knn_text_weight * text_knn, knn_image_weight * image_knn)
+  #   + knn_tie_breaker * 另一侧较弱信号
+  fusion:
+    es_bias: 10.0
+    es_exponent: 0.05
+    rerank_bias: 0.1
+    rerank_exponent: 1.15
+    fine_bias: 0.1
+    fine_exponent: 1.0
+    text_bias: 0.1
+    # base_query_trans_* 相对 base_query 的权重（见 search/rerank_client 中文本 dismax 融合）
+    text_exponent: 0.25
+    text_translation_weight: 0.8
+    knn_text_weight: 1.0
+    knn_image_weight: 2.0
+    knn_tie_breaker: 0.3
+    knn_bias: 0.6
+    knn_exponent: 0.4
+
+services:
+  translation:
+    service_url: http://127.0.0.1:6006
+    default_model: nllb-200-distilled-600m
+    default_scene: general
+    timeout_sec: 10.0
+    cache:
+      ttl_seconds: 62208000
+      sliding_expiration: true
+      # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups)
+      # Higher tier = better quality. Multiple models may share one tier (同级).
+      # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers).
+      enable_model_quality_tier_cache: true
+      model_quality_tiers:
+        deepl: 30
+        qwen-mt: 30
+        llm: 30
+        nllb-200-distilled-600m: 20
+        opus-mt-zh-en: 10
+        opus-mt-en-zh: 10
+    capabilities:
+      qwen-mt:
+        enabled: true
+        backend: qwen_mt
+        model: qwen-mt-flash
+        base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
+        timeout_sec: 10.0
+        use_cache: true
+      llm:
+        enabled: true
+        backend: llm
+        model: qwen-flash
+        base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1
+        timeout_sec: 30.0
+        use_cache: true
+      deepl:
+        enabled: true
+        backend: deepl
+        api_url: https://api.deepl.com/v2/translate
+        timeout_sec: 10.0
+        glossary_id: ''
+        use_cache: true
+      nllb-200-distilled-600m:
+        enabled: true
+        backend: local_nllb
+        model_id: facebook/nllb-200-distilled-600M
+        model_dir: ./models/translation/facebook/nllb-200-distilled-600M
+        ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16
+        ct2_compute_type: float16
+        ct2_conversion_quantization: float16
+        ct2_auto_convert: true
+        ct2_inter_threads: 4
+        ct2_intra_threads: 0
+        ct2_max_queued_batches: 32
+        ct2_batch_type: examples
+        ct2_decoding_length_mode: source
+        ct2_decoding_length_extra: 16
+        ct2_decoding_length_min: 32
+        device: cuda
+        torch_dtype: float16
+        batch_size: 64
+        max_input_length: 256
+        max_new_tokens: 96
+        num_beams: 4
+        use_cache: true
+      opus-mt-zh-en:
+        enabled: false
+        backend: local_marian
+        model_id: Helsinki-NLP/opus-mt-zh-en
+        model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en
+        ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16
+        ct2_compute_type: float16
+        ct2_conversion_quantization: float16
+        ct2_auto_convert: true
+        ct2_inter_threads: 1
+        ct2_intra_threads: 0
+        ct2_max_queued_batches: 0
+        ct2_batch_type: examples
+        device: cuda
+        torch_dtype: float16
+        batch_size: 16
+        max_input_length: 256
+        max_new_tokens: 256
+        num_beams: 1
+        use_cache: true
+      opus-mt-en-zh:
+        enabled: false
+        backend: local_marian
+        model_id: Helsinki-NLP/opus-mt-en-zh
+        model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh
+        ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16
+        ct2_compute_type: float16
+        ct2_conversion_quantization: float16
+        ct2_auto_convert: true
+        ct2_inter_threads: 1
+        ct2_intra_threads: 0
+        ct2_max_queued_batches: 0
+        ct2_batch_type: examples
+        device: cuda
+        torch_dtype: float16
+        batch_size: 16
+        max_input_length: 256
+        max_new_tokens: 256
+        num_beams: 1
+        use_cache: true
+  embedding:
+    provider: http
+    providers:
+      http:
+        text_base_url: http://127.0.0.1:6005
+        image_base_url: http://127.0.0.1:6008
+    backend: tei
+    backends:
+      tei:
+        base_url: http://127.0.0.1:8080
+        timeout_sec: 20
+        model_id: Qwen/Qwen3-Embedding-0.6B
+      local_st:
+        model_id: Qwen/Qwen3-Embedding-0.6B
+        device: cuda
+        batch_size: 32
+        normalize_embeddings: true
+    # 服务内图片后端（embedding 进程启动时读取；cnclip gRPC 与 6008 须同一 model_name）
+    # Chinese-CLIP：ViT-H-14 → 1024 维，ViT-L-14 → 768 维。须与 mappings/search_products.json 中
+    # image_embedding.vector.dims 一致（当前索引为 1024 → 默认 ViT-H-14）。
+    image_backend: clip_as_service  # clip_as_service | local_cnclip
+    image_backends:
+      clip_as_service:
+        server: grpc://127.0.0.1:51000
+        model_name: CN-CLIP/ViT-L-14
+        batch_size: 8
+        normalize_embeddings: true
+      local_cnclip:
+        model_name: ViT-L-14
+        device: null
+        batch_size: 8
+        normalize_embeddings: true
+  rerank:
+    provider: http
+    providers:
+      http:
+        instances:
+          default:
+            base_url: http://127.0.0.1:6007
+            service_url: http://127.0.0.1:6007/rerank
+          fine:
+            base_url: http://127.0.0.1:6009
+            service_url: http://127.0.0.1:6009/rerank
+    request:
+      max_docs: 1000
+      normalize: true
+    # 命名实例：同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。
+    default_instance: default
+    instances:
+      default:
+        host: 0.0.0.0
+        port: 6007
+        backend: bge
+        runtime_dir: ./.runtime/reranker/default
+      fine:
+        host: 0.0.0.0
+        port: 6009
+        backend: bge
+        runtime_dir: ./.runtime/reranker/fine
+    backends:
+      bge:
+        model_name: BAAI/bge-reranker-v2-m3
+        device: null
+        use_fp16: true
+        batch_size: 80
+        max_length: 160
+        cache_dir: ./model_cache
+        enable_warmup: true
+      jina_reranker_v3:
+        model_name: jinaai/jina-reranker-v3
+        device: null
+        dtype: float16
+        batch_size: 64
+        max_doc_length: 160
+        max_query_length: 64
+        sort_by_doc_length: true
+        cache_dir: ./model_cache
+        trust_remote_code: true
+      qwen3_vllm:
+        model_name: Qwen/Qwen3-Reranker-0.6B
+        engine: vllm
+        max_model_len: 256
+        tensor_parallel_size: 1
+        gpu_memory_utilization: 0.2
+        dtype: float16
+        enable_prefix_caching: true
+        enforce_eager: false
+        infer_batch_size: 100
+        sort_by_doc_length: true
+
+        # standard=_format_instruction__standard（固定 yes/no system）；compact=_format_instruction（instruction 作 system 且 user 内重复 Instruct）
+        instruction_format: standard  # compact standard
+        # instruction: "Given a query, score the product for relevance"
+        # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点
+        # instruction: "rank products by given query, category match first"
+        # instruction: "Rank products by query relevance, prioritizing category match"
+        # instruction: "Rank products by query relevance, prioritizing category and style match"
+        # instruction: "Rank by query relevance, prioritize category & style"
+        # instruction: "Relevance ranking: category & style match first"
+        # instruction: "Score product relevance by query with category & style match prioritized"
+        # instruction: "Rank products by query with category & style match prioritized"
+        # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query"
+        instruction: rank products by given query
+
+      # vLLM LLM.score()（跨编码打分）。独立高性能环境 .venv-reranker-score（vllm 0.18 固定版）：./scripts/setup_reranker_venv.sh qwen3_vllm_score
+      # 与 qwen3_vllm 可共用同一 model_name / HF 缓存；venv 分离以便升级 vLLM 而不影响 generate 后端。
+      qwen3_vllm_score:
+        model_name: Qwen/Qwen3-Reranker-0.6B
+        # 官方 Hub 原版需 true；若改用已转换的 seq-cls 权重（如 tomaarsen/...-seq-cls）则设为 false
+        use_original_qwen3_hf_overrides: true
+        # vllm_runner: "auto"
+        # vllm_convert: "auto"
+        # 可选：在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并
+        # hf_overrides: {}
+        engine: vllm
+        max_model_len: 172
+        tensor_parallel_size: 1
+        gpu_memory_utilization: 0.15
+        dtype: float16
+        enable_prefix_caching: true
+        enforce_eager: false
+        infer_batch_size: 80
+        sort_by_doc_length: true
+        # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致
+        instruction_format: standard  # compact standard
+        # instruction: "Rank products by query with category & style match prioritized"
+        # instruction: "Given a shopping query, rank products by relevance"
+        instruction: Rank products by query with category & style match prioritized
+      qwen3_transformers:
+        model_name: Qwen/Qwen3-Reranker-0.6B
+        instruction: rank products by given query
+        max_length: 8192
+        batch_size: 64
+        use_fp16: true
+        attn_implementation: sdpa
+      qwen3_transformers_packed:
+        model_name: Qwen/Qwen3-Reranker-0.6B
+        instruction: Rank products by query with category & style match prioritized
+        max_model_len: 256
+        max_doc_len: 160
+        max_docs_per_pack: 0
+        use_fp16: true
+        sort_by_doc_length: true
+        attn_implementation: eager
+      qwen3_gguf:
+        repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF
+        filename: '*Q8_0.gguf'
+        cache_dir: ./model_cache
+        local_dir: ./models/reranker/qwen3-reranker-4b-gguf
+        instruction: Rank products by query with category & style match prioritized
+        n_ctx: 512
+        n_batch: 512
+        n_ubatch: 512
+        n_gpu_layers: 999
+        main_gpu: 0
+        n_threads: 2
+        n_threads_batch: 4
+        flash_attn: true
+        offload_kqv: true
+        use_mmap: true
+        use_mlock: false
+        infer_batch_size: 8
+        sort_by_doc_length: true
+        length_sort_mode: char
+        enable_warmup: true
+        verbose: false
+      qwen3_gguf_06b:
+        repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF
+        filename: qwen3-reranker-0.6b-q8_0.gguf
+        cache_dir: ./model_cache
+        local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf
+        instruction: Rank products by query with category & style match prioritized
+        n_ctx: 256
+        n_batch: 256
+        n_ubatch: 256
+        n_gpu_layers: 999
+        main_gpu: 0
+        n_threads: 2
+        n_threads_batch: 4
+        flash_attn: true
+        offload_kqv: true
+        use_mmap: true
+        use_mlock: false
+        infer_batch_size: 32
+        sort_by_doc_length: true
+        length_sort_mode: char
+        reuse_query_state: false
+        enable_warmup: true
+        verbose: false
+      dashscope_rerank:
+        model_name: qwen3-rerank
+        endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks
+        api_key_env: RERANK_DASHSCOPE_API_KEY_CN
+        timeout_sec: 10.0
+        top_n_cap: 0 # 0 表示 top_n=当前请求文档数
+        batchsize: 64 # 0 关闭；>0 启用并发小包调度（top_n/top_n_cap 仍生效，分包后全局截断）
+        instruct: Given a shopping query, rank product titles by relevance
+        max_retries: 2
+        retry_backoff_sec: 0.2
+
+spu_config:
+  enabled: true
+  spu_field: spu_id
+  inner_hits_size: 10
+  # 配置哪些option维度参与检索（进索引、以及在线搜索）
+  # 格式为list，选择option1/option2/option3中的一个或多个
+  searchable_option_dimensions:
+  - option1
+  - option2
+  - option3
+
+# 每个租户可配置主语言 primary_language 与索引语言 index_languages（主市场语言，商家可勾选）
+# 默认 index_languages: [en, zh]，可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集
+tenant_config:
+  default:
+    primary_language: en
+    index_languages:
+    - en
+    - zh
+  tenants:
+    '1':
+      primary_language: zh
+      index_languages:
+      - zh
+      - en
+    '2':
+      primary_language: en
+      index_languages:
+      - en
+      - zh
+    '3':
+      primary_language: zh
+      index_languages:
+      - zh
+      - en
+    '162':
+      primary_language: zh
+      index_languages:
+      - zh
+      - en
+    '170':
+      primary_language: en
+      index_languages:
+      - en
+      - zh
@@ -0,0 +1,258 @@
+import argparse
+import json
+import os
+import random
+import statistics
+import time
+from dataclasses import dataclass
+
+import requests
+
+
+def _now() -> float:
+    return time.perf_counter()
+
+
+def _mask(s: str, keep: int = 3) -> str:
+    if not s:
+        return ""
+    if len(s) <= keep * 2:
+        return "*" * len(s)
+    return f"{s[:keep]}***{s[-keep:]}"
+
+
+def get_access_token(api_key: str, secret_key: str, timeout_s: float) -> str:
+    url = "https://aip.baidubce.com/oauth/2.0/token"
+    params = {
+        "grant_type": "client_credentials",
+        "client_id": api_key,
+        "client_secret": secret_key,
+    }
+    r = requests.post(url, params=params, timeout=timeout_s)
+    r.raise_for_status()
+    data = r.json()
+    token = data.get("access_token")
+    if not token:
+        raise RuntimeError(f"no access_token in response: {data}")
+    return str(token)
+
+
+def translate_one(
+    *,
+    access_token: str,
+    q: str,
+    from_lang: str,
+    to_lang: str,
+    timeout_s: float,
+) -> dict:
+    url = (
+        "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1"
+        + "?access_token="
+        + access_token
+    )
+    payload = json.dumps({"from": from_lang, "to": to_lang, "q": q}, ensure_ascii=False)
+    headers = {"Content-Type": "application/json", "Accept": "application/json"}
+    r = requests.post(url, headers=headers, data=payload.encode("utf-8"), timeout=timeout_s)
+    r.raise_for_status()
+    return r.json()
+
+
+def random_english_samples(n: int, seed: int | None) -> list[str]:
+    rng = random.Random(seed)
+
+    starters = [
+        "Hello",
+        "Please",
+        "Could you",
+        "I wonder if you can",
+        "Let's",
+        "We should",
+        "It's important to",
+        "Don't forget to",
+        "I need to",
+        "Can we",
+    ]
+    verbs = [
+        "check",
+        "compare",
+        "translate",
+        "summarize",
+        "review",
+        "optimize",
+        "measure",
+        "confirm",
+        "fix",
+        "verify",
+    ]
+    objects = [
+        "the latest order status",
+        "this short sentence",
+        "the product title and description",
+        "our search results",
+        "the API response payload",
+        "the translation quality",
+        "the page load time",
+        "a few random examples",
+        "the error message",
+        "the performance metrics",
+    ]
+    endings = [
+        "today.",
+        "right now.",
+        "before we ship.",
+        "in the next minute.",
+        "without changing the meaning.",
+        "as soon as possible.",
+        "for a quick smoke test.",
+        "with a clear output.",
+        "and report the timing.",
+        "to ensure everything works.",
+    ]
+
+    samples: list[str] = []
+    while len(samples) < n:
+        s = f"{rng.choice(starters)} {rng.choice(verbs)} {rng.choice(objects)} {rng.choice(endings)}"
+        # Add a little variation
+        if rng.random() < 0.25:
+            s = s.replace("the ", "our ", 1)
+        if rng.random() < 0.20:
+            s = s.replace(".", "!")
+        samples.append(s)
+    return samples
+
+
+@dataclass(frozen=True)
+class Timing:
+    text: str
+    seconds: float
+    ok: bool
+    error: str | None
+    sample_out: str | None
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(
+        description="Benchmark Baidu texttrans latency with random samples."
+    )
+    p.add_argument("--n", type=int, default=15, help="Number of samples (10-20 suggested).")
+    p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
+    p.add_argument("--from", dest="from_lang", default="en", help="Source language code.")
+    p.add_argument("--to", dest="to_lang", default="zh", help="Target language code.")
+    p.add_argument("--timeout", type=float, default=20.0, help="HTTP timeout seconds.")
+    p.add_argument(
+        "--print-first",
+        type=int,
+        default=5,
+        help="Print first N translations for sanity check.",
+    )
+    args = p.parse_args()
+
+    if args.n <= 0:
+        raise SystemExit("--n must be positive")
+
+    api_key = os.environ.get("BAIDU_API_KEY", "").strip()
+    secret_key = os.environ.get("BAIDU_SECRET_KEY", "").strip()
+    if not api_key or not secret_key:
+        raise SystemExit(
+            "Missing BAIDU_API_KEY / BAIDU_SECRET_KEY env vars. "
+            "Example:\n"
+            "  export BAIDU_API_KEY='...'\n"
+            "  export BAIDU_SECRET_KEY='...'\n"
+        )
+
+    print("Baidu credentials:")
+    print(f"  BAIDU_API_KEY={_mask(api_key)}")
+    print(f"  BAIDU_SECRET_KEY={_mask(secret_key)}")
+    print()
+
+    t0 = _now()
+    token = get_access_token(api_key, secret_key, args.timeout)
+    token_s = _now() - t0
+    print(f"Access token acquired in {token_s:.3f}s")
+    print()
+
+    samples = random_english_samples(args.n, args.seed)
+    timings: list[Timing] = []
+
+    for i, text in enumerate(samples, start=1):
+        start = _now()
+        try:
+            out = translate_one(
+                access_token=token,
+                q=text,
+                from_lang=args.from_lang,
+                to_lang=args.to_lang,
+                timeout_s=args.timeout,
+            )
+            elapsed = _now() - start
+
+            # Try to extract a plausible translation field, but keep whole JSON as fallback.
+            translated: str | None = None
+            if isinstance(out, dict):
+                result = out.get("result")
+                if isinstance(result, dict):
+                    translated = result.get("trans_result")
+                if isinstance(translated, list) and translated:
+                    # Baidu often returns list of dicts with dst/src
+                    first = translated[0]
+                    if isinstance(first, dict):
+                        translated = first.get("dst")
+            timings.append(
+                Timing(
+                    text=text,
+                    seconds=elapsed,
+                    ok=True,
+                    error=None,
+                    sample_out=translated if isinstance(translated, str) else None,
+                )
+            )
+        except Exception as e:  # noqa: BLE001 - benchmark script, keep it simple
+            elapsed = _now() - start
+            timings.append(Timing(text=text, seconds=elapsed, ok=False, error=str(e), sample_out=None))
+
+        print(f"[{i:02d}/{len(samples):02d}] {timings[-1].seconds:.3f}s {'OK' if timings[-1].ok else 'ERR'}")
+
+    ok = [t for t in timings if t.ok]
+    errs = [t for t in timings if not t.ok]
+    latencies = [t.seconds for t in ok]
+
+    print()
+    print("=== Summary ===")
+    print(f"token_time_s: {token_s:.3f}")
+    print(f"requests_total: {len(timings)}")
+    print(f"requests_ok: {len(ok)}")
+    print(f"requests_err: {len(errs)}")
+    if latencies:
+        print(f"total_time_s: {sum(latencies):.3f}")
+        print(f"avg_s: {statistics.mean(latencies):.3f}")
+        print(f"p50_s: {statistics.median(latencies):.3f}")
+        print(f"min_s: {min(latencies):.3f}")
+        print(f"max_s: {max(latencies):.3f}")
+    else:
+        print("No successful requests; cannot compute latency stats.")
+
+    if args.print_first > 0:
+        print()
+        print("=== Samples (first N) ===")
+        shown = 0
+        for t in ok:
+            print(f"- IN:  {t.text}")
+            if t.sample_out:
+                print(f"  OUT: {t.sample_out}")
+            else:
+                print("  OUT: (could not parse translation field; see raw JSON by editing script if needed)")
+            shown += 1
+            if shown >= args.print_first:
+                break
+
+    if errs:
+        print()
+        print("=== Errors (first 3) ===")
+        for t in errs[:3]:
+            print(f"- {t.seconds:.3f}s: {t.error}")
+
+    return 0 if not errs else 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())