runtime: environment: prod index_namespace: '' api_host: 0.0.0.0 api_port: 6002 indexer_host: 0.0.0.0 indexer_port: 6004 embedding_host: 0.0.0.0 embedding_port: 6005 embedding_text_port: 6005 embedding_image_port: 6008 translator_host: 0.0.0.0 translator_port: 6006 reranker_host: 0.0.0.0 reranker_port: 6007 infrastructure: elasticsearch: host: http://localhost:9200 username: null password: null redis: host: localhost port: 6479 snapshot_db: 0 password: null socket_timeout: 1 socket_connect_timeout: 1 retry_on_timeout: false cache_expire_days: 720 embedding_cache_prefix: embedding anchor_cache_prefix: product_anchors anchor_cache_expire_days: 30 database: host: null port: 3306 database: null username: null password: null secrets: dashscope_api_key: null deepl_auth_key: null es_index_name: search_products indexes: [] assets: query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict product_enrich: max_workers: 40 search_evaluation: artifact_root: artifacts/search_evaluation queries_file: scripts/evaluation/queries/queries.txt default_dataset_id: core_queries datasets: - dataset_id: core_queries display_name: Core Queries description: Legacy baseline evaluation set from queries.txt query_file: scripts/evaluation/queries/queries.txt tenant_id: '163' language: en enabled: true - dataset_id: clothing_top771 display_name: Clothing Filtered 771 description: 771 clothing / shoes / accessories queries filtered from top1k query_file: scripts/evaluation/queries/all_keywords.txt.top1w.shuf.top1k.clothing_filtered tenant_id: '163' language: en enabled: true eval_log_dir: logs default_tenant_id: '163' search_base_url: '' web_host: 0.0.0.0 web_port: 6010 judge_model: qwen3.6-plus judge_enable_thinking: false judge_dashscope_batch: false intent_model: qwen3.6-plus intent_enable_thinking: true judge_batch_completion_window: 24h judge_batch_poll_interval_sec: 10.0 build_search_depth: 1000 build_rerank_depth: 10000 annotate_search_top_k: 120 annotate_rerank_top_k: 200 batch_top_k: 100 audit_top_k: 100 audit_limit_suspicious: 5 default_language: en search_recall_top_k: 200 rerank_high_threshold: 0.5 rerank_high_skip_count: 1000 rebuild_llm_batch_size: 50 rebuild_min_llm_batches: 10 rebuild_max_llm_batches: 40 rebuild_irrelevant_stop_ratio: 0.799 rebuild_irrel_low_combined_stop_ratio: 0.959 rebuild_irrelevant_stop_streak: 3 es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: 30s # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} field_boosts: title: 3.0 # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 qanchors: 1.0 enriched_tags: 1.0 enriched_attributes.value: 1.5 category_name_text: 2.0 category_path: 2.0 keywords: 2.0 tags: 2.0 option1_values: 1.7 option2_values: 1.7 option3_values: 1.7 brief: 1.0 description: 1.0 vendor: 1.0 query_config: supported_languages: - zh - en default_language: en enable_text_embedding: true enable_query_rewrite: true zh_to_en_model: deepl # nllb-200-distilled-600m en_to_zh_model: deepl default_translation_model: deepl # 源语种不在 index_languages时翻译质量比较重要,因此单独配置 zh_to_en_model__source_not_in_index: deepl en_to_zh_model__source_not_in_index: deepl default_translation_model__source_not_in_index: deepl # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒) translation_embedding_wait_budget_ms_source_in_index: 300 translation_embedding_wait_budget_ms_source_not_in_index: 400 style_intent: enabled: true selected_sku_boost: 1.2 color_dictionary_path: config/dictionaries/style_intent_color.csv size_dictionary_path: config/dictionaries/style_intent_size.csv dimension_aliases: color: - color - colors - colour - colours - 颜色 - 色 - 色系 size: - size - sizes - sizing - 尺码 - 尺寸 - 码数 - 号码 - 码 product_title_exclusion: enabled: true dictionary_path: config/dictionaries/product_title_exclusion.tsv search_fields: # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang} multilingual_fields: - title - keywords - qanchors - enriched_tags - enriched_attributes.value # - enriched_taxonomy_attributes.value - option1_values - option2_values - option3_values - category_path - category_name_text # - brief # - description # - vendor # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values shared_fields: null core_multilingual_fields: - title - qanchors - category_name_text # 文本召回(主查询 + 翻译查询) text_query_strategy: base_minimum_should_match: 60% translation_minimum_should_match: 60% translation_boost: 0.75 tie_breaker_base_query: 0.5 best_fields_boost: 2.0 best_fields: title: 4.0 qanchors: 3.0 category_name_text: 2.0 phrase_fields: title: 5.0 qanchors: 4.0 phrase_match_boost: 3.0 text_embedding_field: title_embedding image_embedding_field: image_embedding.vector # null表示返回所有字段,[]表示不返回任何字段 source_fields: - spu_id - handle - title - brief - description - vendor - category_name - category_name_text - category_path - category_id - category_level - category1_name - category2_name - category3_name # - tags # - keywords # - qanchors # - enriched_tags # - enriched_attributes # - # enriched_taxonomy_attributes.value - min_price - compare_at_price - image_url - sku_prices - sku_weights - sku_weight_units - total_inventory - option1_name - option1_values - option2_name - option2_values - option3_name - option3_values - specifications - skus # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) knn_text_boost: 4 knn_image_boost: 4 knn_text_k: 160 knn_text_num_candidates: 560 # k * 3.4 knn_text_k_long: 400 knn_text_num_candidates_long: 1200 knn_image_k: 400 knn_image_num_candidates: 1200 function_score: score_mode: sum boost_mode: multiply functions: [] coarse_rank: enabled: true input_window: 480 output_window: 160 fusion: es_bias: 10.0 es_exponent: 0.05 text_bias: 0.1 text_exponent: 0.35 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 text_translation_weight: 1.0 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 knn_bias: 0.2 knn_exponent: 5.6 knn_text_bias: 0.2 knn_text_exponent: 0.0 knn_image_bias: 0.2 knn_image_exponent: 0.0 fine_rank: enabled: false # false 时保序透传 input_window: 160 output_window: 80 timeout_sec: 10.0 rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: fine rerank: enabled: false # false 时保序透传 rerank_window: 160 exact_knn_rescore_enabled: true exact_knn_rescore_window: 160 timeout_sec: 15.0 weight_es: 0.4 weight_ai: 0.6 rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: default # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) # 其中 knn_score 先做一层 dis_max: # max(knn_text_weight * text_knn, knn_image_weight * image_knn) # + knn_tie_breaker * 另一侧较弱信号 fusion: es_bias: 10.0 es_exponent: 0.05 rerank_bias: 0.1 rerank_exponent: 1.15 fine_bias: 0.1 fine_exponent: 1.0 text_bias: 0.1 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) text_exponent: 0.25 text_translation_weight: 0.8 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 knn_bias: 0.0 knn_exponent: 5.6 services: translation: service_url: http://127.0.0.1:6006 default_model: nllb-200-distilled-600m default_scene: general timeout_sec: 10.0 cache: ttl_seconds: 62208000 sliding_expiration: true # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups) # Higher tier = better quality. Multiple models may share one tier (同级). # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). enable_model_quality_tier_cache: true model_quality_tiers: deepl: 30 qwen-mt: 30 llm: 30 nllb-200-distilled-600m: 20 opus-mt-zh-en: 10 opus-mt-en-zh: 10 capabilities: qwen-mt: enabled: true backend: qwen_mt model: qwen-mt-flash base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 timeout_sec: 10.0 use_cache: true llm: enabled: true backend: llm model: qwen-flash base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 timeout_sec: 30.0 use_cache: true deepl: enabled: true backend: deepl api_url: https://api.deepl.com/v2/translate timeout_sec: 10.0 glossary_id: '' use_cache: true nllb-200-distilled-600m: enabled: true backend: local_nllb model_id: facebook/nllb-200-distilled-600M model_dir: ./models/translation/facebook/nllb-200-distilled-600M ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 4 ct2_intra_threads: 0 ct2_max_queued_batches: 32 ct2_batch_type: examples ct2_decoding_length_mode: source ct2_decoding_length_extra: 8 ct2_decoding_length_min: 32 device: cuda torch_dtype: float16 batch_size: 64 max_input_length: 256 max_new_tokens: 64 num_beams: 1 use_cache: true opus-mt-zh-en: enabled: false backend: local_marian model_id: Helsinki-NLP/opus-mt-zh-en model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: examples device: cuda torch_dtype: float16 batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true opus-mt-en-zh: enabled: false backend: local_marian model_id: Helsinki-NLP/opus-mt-en-zh model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: examples device: cuda torch_dtype: float16 batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true embedding: provider: http providers: http: text_base_url: http://127.0.0.1:6005 image_base_url: http://127.0.0.1:6008 backend: tei backends: tei: base_url: http://127.0.0.1:8080 timeout_sec: 20 model_id: Qwen/Qwen3-Embedding-0.6B local_st: model_id: Qwen/Qwen3-Embedding-0.6B device: cuda batch_size: 32 normalize_embeddings: true # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 image_backend: clip_as_service # clip_as_service | local_cnclip image_backends: clip_as_service: server: grpc://127.0.0.1:51000 model_name: CN-CLIP/ViT-L-14 batch_size: 8 normalize_embeddings: true local_cnclip: model_name: ViT-L-14 device: null batch_size: 8 normalize_embeddings: true rerank: provider: http providers: http: instances: default: base_url: http://127.0.0.1:6007 service_url: http://127.0.0.1:6007/rerank fine: base_url: http://127.0.0.1:6009 service_url: http://127.0.0.1:6009/rerank request: max_docs: 1000 normalize: true # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 default_instance: default instances: default: host: 0.0.0.0 port: 6007 backend: bge runtime_dir: ./.runtime/reranker/default fine: host: 0.0.0.0 port: 6009 backend: bge runtime_dir: ./.runtime/reranker/fine backends: bge: model_name: BAAI/bge-reranker-v2-m3 device: null use_fp16: true batch_size: 80 max_length: 160 cache_dir: ./model_cache enable_warmup: true jina_reranker_v3: model_name: jinaai/jina-reranker-v3 device: null dtype: float16 batch_size: 64 max_doc_length: 160 max_query_length: 64 sort_by_doc_length: true cache_dir: ./model_cache trust_remote_code: true qwen3_vllm: model_name: Qwen/Qwen3-Reranker-0.6B engine: vllm max_model_len: 256 tensor_parallel_size: 1 gpu_memory_utilization: 0.2 dtype: float16 enable_prefix_caching: true enforce_eager: false infer_batch_size: 100 sort_by_doc_length: true # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) instruction_format: standard # compact standard # instruction: "Given a query, score the product for relevance" # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 # instruction: "rank products by given query, category match first" # instruction: "Rank products by query relevance, prioritizing category match" # instruction: "Rank products by query relevance, prioritizing category and style match" # instruction: "Rank by query relevance, prioritize category & style" # instruction: "Relevance ranking: category & style match first" # instruction: "Score product relevance by query with category & style match prioritized" # instruction: "Rank products by query with category & style match prioritized" # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" instruction: rank products by given query # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 qwen3_vllm_score: model_name: Qwen/Qwen3-Reranker-0.6B # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false use_original_qwen3_hf_overrides: true # vllm_runner: "auto" # vllm_convert: "auto" # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 # hf_overrides: {} engine: vllm max_model_len: 172 tensor_parallel_size: 1 gpu_memory_utilization: 0.15 dtype: float16 enable_prefix_caching: true enforce_eager: false infer_batch_size: 80 sort_by_doc_length: true # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 instruction_format: standard # compact standard # instruction: "Rank products by query with category & style match prioritized" # instruction: "Given a shopping query, rank products by relevance" instruction: Rank products by query with category & style match prioritized qwen3_transformers: model_name: Qwen/Qwen3-Reranker-0.6B instruction: rank products by given query max_length: 8192 batch_size: 64 use_fp16: true attn_implementation: sdpa qwen3_transformers_packed: model_name: Qwen/Qwen3-Reranker-0.6B instruction: Rank products by query with category & style match prioritized max_model_len: 256 max_doc_len: 160 max_docs_per_pack: 0 use_fp16: true sort_by_doc_length: true attn_implementation: eager qwen3_gguf: repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF filename: '*Q8_0.gguf' cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-4b-gguf instruction: Rank products by query with category & style match prioritized n_ctx: 512 n_batch: 512 n_ubatch: 512 n_gpu_layers: 999 main_gpu: 0 n_threads: 2 n_threads_batch: 4 flash_attn: true offload_kqv: true use_mmap: true use_mlock: false infer_batch_size: 8 sort_by_doc_length: true length_sort_mode: char enable_warmup: true verbose: false qwen3_gguf_06b: repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF filename: qwen3-reranker-0.6b-q8_0.gguf cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf instruction: Rank products by query with category & style match prioritized n_ctx: 256 n_batch: 256 n_ubatch: 256 n_gpu_layers: 999 main_gpu: 0 n_threads: 2 n_threads_batch: 4 flash_attn: true offload_kqv: true use_mmap: true use_mlock: false infer_batch_size: 32 sort_by_doc_length: true length_sort_mode: char reuse_query_state: false enable_warmup: true verbose: false dashscope_rerank: model_name: qwen3-rerank endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks api_key_env: RERANK_DASHSCOPE_API_KEY_CN timeout_sec: 10.0 top_n_cap: 0 # 0 表示 top_n=当前请求文档数 batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: Given a shopping query, rank product titles by relevance max_retries: 2 retry_backoff_sec: 0.2 spu_config: enabled: true spu_field: spu_id inner_hits_size: 10 # 配置哪些option维度参与检索(进索引、以及在线搜索) # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: - option1 - option2 - option3 # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 tenant_config: default: primary_language: en index_languages: - en - zh tenants: '1': primary_language: zh index_languages: - zh - en '2': primary_language: en index_languages: - en - zh '3': primary_language: zh index_languages: - zh - en '162': primary_language: zh index_languages: - zh - en '170': primary_language: en index_languages: - en - zh