diff --git a/artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid b/artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid new file mode 100644 index 0000000..cf43bba --- /dev/null +++ b/artifacts/search_evaluation/build_launches/clothing_top771_build_resume_direct_20260421T034611Z.pid @@ -0,0 +1 @@ +2900923 diff --git a/config/config-with-reranker.yaml b/config/config-with-reranker.yaml new file mode 100644 index 0000000..eb7e182 --- /dev/null +++ b/config/config-with-reranker.yaml @@ -0,0 +1,673 @@ +runtime: + environment: prod + index_namespace: '' + api_host: 0.0.0.0 + api_port: 6002 + indexer_host: 0.0.0.0 + indexer_port: 6004 + embedding_host: 0.0.0.0 + embedding_port: 6005 + embedding_text_port: 6005 + embedding_image_port: 6008 + translator_host: 0.0.0.0 + translator_port: 6006 + reranker_host: 0.0.0.0 + reranker_port: 6007 +infrastructure: + elasticsearch: + host: http://localhost:9200 + username: null + password: null + redis: + host: localhost + port: 6479 + snapshot_db: 0 + password: null + socket_timeout: 1 + socket_connect_timeout: 1 + retry_on_timeout: false + cache_expire_days: 720 + embedding_cache_prefix: embedding + anchor_cache_prefix: product_anchors + anchor_cache_expire_days: 30 + database: + host: null + port: 3306 + database: null + username: null + password: null + secrets: + dashscope_api_key: null + deepl_auth_key: null +es_index_name: search_products +indexes: [] +assets: + query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict +product_enrich: + max_workers: 40 +suggestion: + sat_recall_min: 40 + sat_recall_cap: 100 +search_evaluation: + artifact_root: artifacts/search_evaluation + queries_file: scripts/evaluation/queries/queries.txt + default_dataset_id: core_queries + datasets: + - dataset_id: core_queries + display_name: Core Queries + description: Legacy baseline evaluation set from queries.txt + query_file: scripts/evaluation/queries/queries.txt + tenant_id: '163' + language: en + enabled: true + - dataset_id: clothing_top771 + display_name: Clothing Filtered 771 + description: 771 clothing / shoes / accessories queries filtered from top1k + query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered + tenant_id: '163' + language: en + enabled: true + eval_log_dir: logs + default_tenant_id: '163' + search_base_url: '' + web_host: 0.0.0.0 + web_port: 6010 + judge_model: qwen3.6-plus + judge_enable_thinking: false + judge_dashscope_batch: false + intent_model: qwen3.6-plus + intent_enable_thinking: true + judge_batch_completion_window: 24h + judge_batch_poll_interval_sec: 10.0 + build_search_depth: 1000 + build_rerank_depth: 10000 + annotate_search_top_k: 120 + annotate_rerank_top_k: 200 + batch_top_k: 100 + audit_top_k: 100 + audit_limit_suspicious: 5 + default_language: en + search_recall_top_k: 200 + rerank_high_threshold: 0.5 + rerank_high_skip_count: 1000 + rebuild_llm_batch_size: 50 + rebuild_min_llm_batches: 10 + rebuild_max_llm_batches: 40 + rebuild_irrelevant_stop_ratio: 0.799 + rebuild_irrel_low_combined_stop_ratio: 0.959 + rebuild_irrelevant_stop_streak: 3 +es_settings: + number_of_shards: 1 + number_of_replicas: 0 + refresh_interval: 30s + +# 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} +field_boosts: + title: 3.0 + # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 + qanchors: 1.0 + enriched_tags: 1.0 + enriched_attributes.value: 1.5 + category_name_text: 2.0 + category_path: 2.0 + keywords: 2.0 + tags: 2.0 + option1_values: 1.7 + option2_values: 1.7 + option3_values: 1.7 + brief: 1.0 + description: 1.0 + vendor: 1.0 + +query_config: + supported_languages: + - zh + - en + default_language: en + enable_text_embedding: true + enable_query_rewrite: true + + zh_to_en_model: nllb-200-distilled-600m # nllb-200-distilled-600m deepl opus-mt-zh-en / opus-mt-en-zh + en_to_zh_model: nllb-200-distilled-600m + default_translation_model: nllb-200-distilled-600m + # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 + zh_to_en_model__source_not_in_index: deepl + en_to_zh_model__source_not_in_index: deepl + default_translation_model__source_not_in_index: deepl + + # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) + translation_embedding_wait_budget_ms_source_in_index: 300 + translation_embedding_wait_budget_ms_source_not_in_index: 400 + style_intent: + enabled: true + selected_sku_boost: 1.2 + color_dictionary_path: config/dictionaries/style_intent_color.csv + size_dictionary_path: config/dictionaries/style_intent_size.csv + dimension_aliases: + color: + - color + - colors + - colour + - colours + - 颜色 + - 色 + - 色系 + size: + - size + - sizes + - sizing + - 尺码 + - 尺寸 + - 码数 + - 号码 + - 码 + product_title_exclusion: + enabled: true + dictionary_path: config/dictionaries/product_title_exclusion.tsv + search_fields: + # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} + multilingual_fields: + - title + - keywords + - qanchors + - enriched_tags + - enriched_attributes.value + # - enriched_taxonomy_attributes.value + - option1_values + - option2_values + - option3_values + - category_path + - category_name_text + # - brief + # - description + # - vendor + # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values + + shared_fields: null + core_multilingual_fields: + - title + - qanchors + - category_name_text + + # 文本召回(主查询 + 翻译查询) + text_query_strategy: + base_minimum_should_match: 60% + translation_minimum_should_match: 60% + translation_boost: 0.75 + tie_breaker_base_query: 0.5 + best_fields_boost: 2.0 + best_fields: + title: 4.0 + qanchors: 3.0 + category_name_text: 2.0 + phrase_fields: + title: 5.0 + qanchors: 4.0 + phrase_match_boost: 3.0 + text_embedding_field: title_embedding + image_embedding_field: image_embedding.vector + + # null表示返回所有字段,[]表示不返回任何字段 + source_fields: + - spu_id + - handle + - title + - brief + - description + - vendor + - category_name + - category_name_text + - category_path + - category_id + - category_level + - category1_name + - category2_name + - category3_name + # - tags + # - keywords + # - qanchors + # - enriched_tags + - enriched_attributes + - enriched_taxonomy_attributes + + - min_price + - compare_at_price + - image_url + - sku_prices + - sku_weights + - sku_weight_units + - total_inventory + - option1_name + - option1_values + - option2_name + - option2_values + - option3_name + - option3_values + - specifications + - skus + + # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) + knn_text_boost: 4 + knn_image_boost: 4 + knn_text_k: 160 + knn_text_num_candidates: 560 # k * 3.4 + knn_text_k_long: 400 + knn_text_num_candidates_long: 1200 + knn_image_k: 400 + knn_image_num_candidates: 1200 + +function_score: + score_mode: sum + boost_mode: multiply + functions: [] +coarse_rank: + enabled: true + input_window: 480 + output_window: 160 + fusion: + es_bias: 10.0 + es_exponent: 0.05 + text_bias: 0.1 + text_exponent: 0.35 + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) + # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 + text_translation_weight: 0.8 + knn_text_weight: 1.0 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 + knn_bias: 0.6 + knn_exponent: 0.4 + knn_text_bias: 0.2 + knn_text_exponent: 0.0 + knn_image_bias: 0.2 + knn_image_exponent: 0.0 +fine_rank: + enabled: false # false 时保序透传 + input_window: 160 + output_window: 80 + timeout_sec: 10.0 + rerank_query_template: '{query}' + rerank_doc_template: '{title}' + service_profile: fine +rerank: + enabled: true # false 时保序透传 + rerank_window: 160 + exact_knn_rescore_enabled: true + exact_knn_rescore_window: 160 + timeout_sec: 15.0 + weight_es: 0.4 + weight_ai: 0.6 + rerank_query_template: '{query}' + rerank_doc_template: '{title}' + service_profile: default + # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) + # 其中 knn_score 先做一层 dis_max: + # max(knn_text_weight * text_knn, knn_image_weight * image_knn) + # + knn_tie_breaker * 另一侧较弱信号 + fusion: + es_bias: 10.0 + es_exponent: 0.05 + rerank_bias: 0.1 + rerank_exponent: 1.15 + fine_bias: 0.1 + fine_exponent: 1.0 + text_bias: 0.1 + # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) + text_exponent: 0.25 + text_translation_weight: 0.8 + knn_text_weight: 1.0 + knn_image_weight: 2.0 + knn_tie_breaker: 0.3 + knn_bias: 0.6 + knn_exponent: 0.4 + +services: + translation: + service_url: http://127.0.0.1:6006 + default_model: nllb-200-distilled-600m + default_scene: general + timeout_sec: 10.0 + cache: + ttl_seconds: 62208000 + sliding_expiration: true + # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) + # Higher tier = better quality. Multiple models may share one tier (同级). + # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). + enable_model_quality_tier_cache: true + model_quality_tiers: + deepl: 30 + qwen-mt: 30 + llm: 30 + nllb-200-distilled-600m: 20 + opus-mt-zh-en: 10 + opus-mt-en-zh: 10 + capabilities: + qwen-mt: + enabled: true + backend: qwen_mt + model: qwen-mt-flash + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 + timeout_sec: 10.0 + use_cache: true + llm: + enabled: true + backend: llm + model: qwen-flash + base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 + timeout_sec: 30.0 + use_cache: true + deepl: + enabled: true + backend: deepl + api_url: https://api.deepl.com/v2/translate + timeout_sec: 10.0 + glossary_id: '' + use_cache: true + nllb-200-distilled-600m: + enabled: true + backend: local_nllb + model_id: facebook/nllb-200-distilled-600M + model_dir: ./models/translation/facebook/nllb-200-distilled-600M + ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16 + ct2_compute_type: float16 + ct2_conversion_quantization: float16 + ct2_auto_convert: true + ct2_inter_threads: 4 + ct2_intra_threads: 0 + ct2_max_queued_batches: 32 + ct2_batch_type: examples + ct2_decoding_length_mode: source + ct2_decoding_length_extra: 16 + ct2_decoding_length_min: 32 + device: cuda + torch_dtype: float16 + batch_size: 64 + max_input_length: 256 + max_new_tokens: 96 + num_beams: 4 + use_cache: true + opus-mt-zh-en: + enabled: false + backend: local_marian + model_id: Helsinki-NLP/opus-mt-zh-en + model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16 + ct2_compute_type: float16 + ct2_conversion_quantization: float16 + ct2_auto_convert: true + ct2_inter_threads: 1 + ct2_intra_threads: 0 + ct2_max_queued_batches: 0 + ct2_batch_type: examples + device: cuda + torch_dtype: float16 + batch_size: 16 + max_input_length: 256 + max_new_tokens: 256 + num_beams: 1 + use_cache: true + opus-mt-en-zh: + enabled: false + backend: local_marian + model_id: Helsinki-NLP/opus-mt-en-zh + model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh + ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16 + ct2_compute_type: float16 + ct2_conversion_quantization: float16 + ct2_auto_convert: true + ct2_inter_threads: 1 + ct2_intra_threads: 0 + ct2_max_queued_batches: 0 + ct2_batch_type: examples + device: cuda + torch_dtype: float16 + batch_size: 16 + max_input_length: 256 + max_new_tokens: 256 + num_beams: 1 + use_cache: true + embedding: + provider: http + providers: + http: + text_base_url: http://127.0.0.1:6005 + image_base_url: http://127.0.0.1:6008 + backend: tei + backends: + tei: + base_url: http://127.0.0.1:8080 + timeout_sec: 20 + model_id: Qwen/Qwen3-Embedding-0.6B + local_st: + model_id: Qwen/Qwen3-Embedding-0.6B + device: cuda + batch_size: 32 + normalize_embeddings: true + # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) + # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 + # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 + image_backend: clip_as_service # clip_as_service | local_cnclip + image_backends: + clip_as_service: + server: grpc://127.0.0.1:51000 + model_name: CN-CLIP/ViT-L-14 + batch_size: 8 + normalize_embeddings: true + local_cnclip: + model_name: ViT-L-14 + device: null + batch_size: 8 + normalize_embeddings: true + rerank: + provider: http + providers: + http: + instances: + default: + base_url: http://127.0.0.1:6007 + service_url: http://127.0.0.1:6007/rerank + fine: + base_url: http://127.0.0.1:6009 + service_url: http://127.0.0.1:6009/rerank + request: + max_docs: 1000 + normalize: true + # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 + default_instance: default + instances: + default: + host: 0.0.0.0 + port: 6007 + backend: bge + runtime_dir: ./.runtime/reranker/default + fine: + host: 0.0.0.0 + port: 6009 + backend: bge + runtime_dir: ./.runtime/reranker/fine + backends: + bge: + model_name: BAAI/bge-reranker-v2-m3 + device: null + use_fp16: true + batch_size: 80 + max_length: 160 + cache_dir: ./model_cache + enable_warmup: true + jina_reranker_v3: + model_name: jinaai/jina-reranker-v3 + device: null + dtype: float16 + batch_size: 64 + max_doc_length: 160 + max_query_length: 64 + sort_by_doc_length: true + cache_dir: ./model_cache + trust_remote_code: true + qwen3_vllm: + model_name: Qwen/Qwen3-Reranker-0.6B + engine: vllm + max_model_len: 256 + tensor_parallel_size: 1 + gpu_memory_utilization: 0.2 + dtype: float16 + enable_prefix_caching: true + enforce_eager: false + infer_batch_size: 100 + sort_by_doc_length: true + + # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) + instruction_format: standard # compact standard + # instruction: "Given a query, score the product for relevance" + # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 + # instruction: "rank products by given query, category match first" + # instruction: "Rank products by query relevance, prioritizing category match" + # instruction: "Rank products by query relevance, prioritizing category and style match" + # instruction: "Rank by query relevance, prioritize category & style" + # instruction: "Relevance ranking: category & style match first" + # instruction: "Score product relevance by query with category & style match prioritized" + # instruction: "Rank products by query with category & style match prioritized" + # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" + instruction: rank products by given query + + # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score + # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 + qwen3_vllm_score: + model_name: Qwen/Qwen3-Reranker-0.6B + # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false + use_original_qwen3_hf_overrides: true + # vllm_runner: "auto" + # vllm_convert: "auto" + # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 + # hf_overrides: {} + engine: vllm + max_model_len: 172 + tensor_parallel_size: 1 + gpu_memory_utilization: 0.15 + dtype: float16 + enable_prefix_caching: true + enforce_eager: false + infer_batch_size: 80 + sort_by_doc_length: true + # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 + instruction_format: standard # compact standard + # instruction: "Rank products by query with category & style match prioritized" + # instruction: "Given a shopping query, rank products by relevance" + instruction: Rank products by query with category & style match prioritized + qwen3_transformers: + model_name: Qwen/Qwen3-Reranker-0.6B + instruction: rank products by given query + max_length: 8192 + batch_size: 64 + use_fp16: true + attn_implementation: sdpa + qwen3_transformers_packed: + model_name: Qwen/Qwen3-Reranker-0.6B + instruction: Rank products by query with category & style match prioritized + max_model_len: 256 + max_doc_len: 160 + max_docs_per_pack: 0 + use_fp16: true + sort_by_doc_length: true + attn_implementation: eager + qwen3_gguf: + repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF + filename: '*Q8_0.gguf' + cache_dir: ./model_cache + local_dir: ./models/reranker/qwen3-reranker-4b-gguf + instruction: Rank products by query with category & style match prioritized + n_ctx: 512 + n_batch: 512 + n_ubatch: 512 + n_gpu_layers: 999 + main_gpu: 0 + n_threads: 2 + n_threads_batch: 4 + flash_attn: true + offload_kqv: true + use_mmap: true + use_mlock: false + infer_batch_size: 8 + sort_by_doc_length: true + length_sort_mode: char + enable_warmup: true + verbose: false + qwen3_gguf_06b: + repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF + filename: qwen3-reranker-0.6b-q8_0.gguf + cache_dir: ./model_cache + local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf + instruction: Rank products by query with category & style match prioritized + n_ctx: 256 + n_batch: 256 + n_ubatch: 256 + n_gpu_layers: 999 + main_gpu: 0 + n_threads: 2 + n_threads_batch: 4 + flash_attn: true + offload_kqv: true + use_mmap: true + use_mlock: false + infer_batch_size: 32 + sort_by_doc_length: true + length_sort_mode: char + reuse_query_state: false + enable_warmup: true + verbose: false + dashscope_rerank: + model_name: qwen3-rerank + endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks + api_key_env: RERANK_DASHSCOPE_API_KEY_CN + timeout_sec: 10.0 + top_n_cap: 0 # 0 表示 top_n=当前请求文档数 + batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) + instruct: Given a shopping query, rank product titles by relevance + max_retries: 2 + retry_backoff_sec: 0.2 + +spu_config: + enabled: true + spu_field: spu_id + inner_hits_size: 10 + # 配置哪些option维度参与检索(进索引、以及在线搜索) + # 格式为list,选择option1/option2/option3中的一个或多个 + searchable_option_dimensions: + - option1 + - option2 + - option3 + +# 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) +# 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 +tenant_config: + default: + primary_language: en + index_languages: + - en + - zh + tenants: + '1': + primary_language: zh + index_languages: + - zh + - en + '2': + primary_language: en + index_languages: + - en + - zh + '3': + primary_language: zh + index_languages: + - zh + - en + '162': + primary_language: zh + index_languages: + - zh + - en + '170': + primary_language: en + index_languages: + - en + - zh diff --git a/scripts/manual/baidu_translate_benchmark.py b/scripts/manual/baidu_translate_benchmark.py new file mode 100644 index 0000000..5de9eb3 --- /dev/null +++ b/scripts/manual/baidu_translate_benchmark.py @@ -0,0 +1,258 @@ +import argparse +import json +import os +import random +import statistics +import time +from dataclasses import dataclass + +import requests + + +def _now() -> float: + return time.perf_counter() + + +def _mask(s: str, keep: int = 3) -> str: + if not s: + return "" + if len(s) <= keep * 2: + return "*" * len(s) + return f"{s[:keep]}***{s[-keep:]}" + + +def get_access_token(api_key: str, secret_key: str, timeout_s: float) -> str: + url = "https://aip.baidubce.com/oauth/2.0/token" + params = { + "grant_type": "client_credentials", + "client_id": api_key, + "client_secret": secret_key, + } + r = requests.post(url, params=params, timeout=timeout_s) + r.raise_for_status() + data = r.json() + token = data.get("access_token") + if not token: + raise RuntimeError(f"no access_token in response: {data}") + return str(token) + + +def translate_one( + *, + access_token: str, + q: str, + from_lang: str, + to_lang: str, + timeout_s: float, +) -> dict: + url = ( + "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1" + + "?access_token=" + + access_token + ) + payload = json.dumps({"from": from_lang, "to": to_lang, "q": q}, ensure_ascii=False) + headers = {"Content-Type": "application/json", "Accept": "application/json"} + r = requests.post(url, headers=headers, data=payload.encode("utf-8"), timeout=timeout_s) + r.raise_for_status() + return r.json() + + +def random_english_samples(n: int, seed: int | None) -> list[str]: + rng = random.Random(seed) + + starters = [ + "Hello", + "Please", + "Could you", + "I wonder if you can", + "Let's", + "We should", + "It's important to", + "Don't forget to", + "I need to", + "Can we", + ] + verbs = [ + "check", + "compare", + "translate", + "summarize", + "review", + "optimize", + "measure", + "confirm", + "fix", + "verify", + ] + objects = [ + "the latest order status", + "this short sentence", + "the product title and description", + "our search results", + "the API response payload", + "the translation quality", + "the page load time", + "a few random examples", + "the error message", + "the performance metrics", + ] + endings = [ + "today.", + "right now.", + "before we ship.", + "in the next minute.", + "without changing the meaning.", + "as soon as possible.", + "for a quick smoke test.", + "with a clear output.", + "and report the timing.", + "to ensure everything works.", + ] + + samples: list[str] = [] + while len(samples) < n: + s = f"{rng.choice(starters)} {rng.choice(verbs)} {rng.choice(objects)} {rng.choice(endings)}" + # Add a little variation + if rng.random() < 0.25: + s = s.replace("the ", "our ", 1) + if rng.random() < 0.20: + s = s.replace(".", "!") + samples.append(s) + return samples + + +@dataclass(frozen=True) +class Timing: + text: str + seconds: float + ok: bool + error: str | None + sample_out: str | None + + +def main() -> int: + p = argparse.ArgumentParser( + description="Benchmark Baidu texttrans latency with random samples." + ) + p.add_argument("--n", type=int, default=15, help="Number of samples (10-20 suggested).") + p.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.") + p.add_argument("--from", dest="from_lang", default="en", help="Source language code.") + p.add_argument("--to", dest="to_lang", default="zh", help="Target language code.") + p.add_argument("--timeout", type=float, default=20.0, help="HTTP timeout seconds.") + p.add_argument( + "--print-first", + type=int, + default=5, + help="Print first N translations for sanity check.", + ) + args = p.parse_args() + + if args.n <= 0: + raise SystemExit("--n must be positive") + + api_key = os.environ.get("BAIDU_API_KEY", "").strip() + secret_key = os.environ.get("BAIDU_SECRET_KEY", "").strip() + if not api_key or not secret_key: + raise SystemExit( + "Missing BAIDU_API_KEY / BAIDU_SECRET_KEY env vars. " + "Example:\n" + " export BAIDU_API_KEY='...'\n" + " export BAIDU_SECRET_KEY='...'\n" + ) + + print("Baidu credentials:") + print(f" BAIDU_API_KEY={_mask(api_key)}") + print(f" BAIDU_SECRET_KEY={_mask(secret_key)}") + print() + + t0 = _now() + token = get_access_token(api_key, secret_key, args.timeout) + token_s = _now() - t0 + print(f"Access token acquired in {token_s:.3f}s") + print() + + samples = random_english_samples(args.n, args.seed) + timings: list[Timing] = [] + + for i, text in enumerate(samples, start=1): + start = _now() + try: + out = translate_one( + access_token=token, + q=text, + from_lang=args.from_lang, + to_lang=args.to_lang, + timeout_s=args.timeout, + ) + elapsed = _now() - start + + # Try to extract a plausible translation field, but keep whole JSON as fallback. + translated: str | None = None + if isinstance(out, dict): + result = out.get("result") + if isinstance(result, dict): + translated = result.get("trans_result") + if isinstance(translated, list) and translated: + # Baidu often returns list of dicts with dst/src + first = translated[0] + if isinstance(first, dict): + translated = first.get("dst") + timings.append( + Timing( + text=text, + seconds=elapsed, + ok=True, + error=None, + sample_out=translated if isinstance(translated, str) else None, + ) + ) + except Exception as e: # noqa: BLE001 - benchmark script, keep it simple + elapsed = _now() - start + timings.append(Timing(text=text, seconds=elapsed, ok=False, error=str(e), sample_out=None)) + + print(f"[{i:02d}/{len(samples):02d}] {timings[-1].seconds:.3f}s {'OK' if timings[-1].ok else 'ERR'}") + + ok = [t for t in timings if t.ok] + errs = [t for t in timings if not t.ok] + latencies = [t.seconds for t in ok] + + print() + print("=== Summary ===") + print(f"token_time_s: {token_s:.3f}") + print(f"requests_total: {len(timings)}") + print(f"requests_ok: {len(ok)}") + print(f"requests_err: {len(errs)}") + if latencies: + print(f"total_time_s: {sum(latencies):.3f}") + print(f"avg_s: {statistics.mean(latencies):.3f}") + print(f"p50_s: {statistics.median(latencies):.3f}") + print(f"min_s: {min(latencies):.3f}") + print(f"max_s: {max(latencies):.3f}") + else: + print("No successful requests; cannot compute latency stats.") + + if args.print_first > 0: + print() + print("=== Samples (first N) ===") + shown = 0 + for t in ok: + print(f"- IN: {t.text}") + if t.sample_out: + print(f" OUT: {t.sample_out}") + else: + print(" OUT: (could not parse translation field; see raw JSON by editing script if needed)") + shown += 1 + if shown >= args.print_first: + break + + if errs: + print() + print("=== Errors (first 3) ===") + for t in errs[:3]: + print(f"- {t.seconds:.3f}s: {t.error}") + + return 0 if not errs else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) -- libgit2 0.21.2