# Unified Configuration for Multi-Tenant Search Engine # 统一配置文件,所有租户共用一套配置 # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为 # # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项 #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。 # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义) runtime: environment: prod index_namespace: '' api_host: 0.0.0.0 api_port: 6002 indexer_host: 0.0.0.0 indexer_port: 6004 embedding_host: 0.0.0.0 embedding_port: 6005 embedding_text_port: 6005 embedding_image_port: 6008 translator_host: 0.0.0.0 translator_port: 6006 reranker_host: 0.0.0.0 reranker_port: 6007 # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY) infrastructure: elasticsearch: host: http://localhost:9200 username: null password: null redis: host: localhost port: 6479 snapshot_db: 0 password: null socket_timeout: 1 socket_connect_timeout: 1 retry_on_timeout: false cache_expire_days: 720 embedding_cache_prefix: embedding database: host: null port: 3306 database: null username: null password: null secrets: dashscope_api_key: null deepl_auth_key: null # Elasticsearch Index es_index_name: search_products # 检索域 / 索引列表(可为空列表;每项字段均需显式给出) indexes: [] # Config assets assets: query_rewrite_dictionary_path: config/dictionaries/query_rewrite.dict # 离线 / Web 相关性评估(scripts/evaluation、eval-web) # CLI 未显式传参时使用此处默认值;search_base_url 未配置时自动为 http://127.0.0.1:{runtime.api_port} search_evaluation: artifact_root: artifacts/search_evaluation queries_file: scripts/evaluation/queries/queries.txt eval_log_dir: logs default_tenant_id: '163' search_base_url: '' web_host: 0.0.0.0 web_port: 6010 judge_model: qwen3.6-plus judge_enable_thinking: false judge_dashscope_batch: false intent_model: qwen3.6-plus intent_enable_thinking: true judge_batch_completion_window: 24h judge_batch_poll_interval_sec: 10.0 build_search_depth: 1000 build_rerank_depth: 10000 annotate_search_top_k: 120 annotate_rerank_top_k: 200 batch_top_k: 100 audit_top_k: 100 audit_limit_suspicious: 5 default_language: en search_recall_top_k: 200 rerank_high_threshold: 0.5 rerank_high_skip_count: 1000 rebuild_llm_batch_size: 50 rebuild_min_llm_batches: 10 rebuild_max_llm_batches: 40 rebuild_irrelevant_stop_ratio: 0.799 rebuild_irrel_low_combined_stop_ratio: 0.959 rebuild_irrelevant_stop_streak: 3 # ES Index Settings (基础设置) es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: 30s # 字段权重配置(用于搜索时的字段boost) # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。 # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。 field_boosts: title: 3.0 # qanchors enriched_tags 在 enriched_attributes.value中也存在,所以其实他的权重为自身权重+enriched_attributes.value的权重 qanchors: 1.0 enriched_tags: 1.0 enriched_attributes.value: 1.5 # enriched_taxonomy_attributes.value: 0.3 category_name_text: 2.0 category_path: 2.0 keywords: 2.0 tags: 2.0 option1_values: 1.7 option2_values: 1.7 option3_values: 1.7 brief: 1.0 description: 1.0 vendor: 1.0 # Query Configuration(查询配置) query_config: # 支持的语言 supported_languages: - zh - en default_language: en # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true # 查询翻译模型(须与 services.translation.capabilities 中某项一致) # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。 zh_to_en_model: nllb-200-distilled-600m # "opus-mt-zh-en" en_to_zh_model: nllb-200-distilled-600m # "opus-mt-en-zh" default_translation_model: nllb-200-distilled-600m # zh_to_en_model: deepl # en_to_zh_model: deepl # default_translation_model: deepl # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同) zh_to_en_model__source_not_in_index: nllb-200-distilled-600m en_to_zh_model__source_not_in_index: nllb-200-distilled-600m default_translation_model__source_not_in_index: nllb-200-distilled-600m # zh_to_en_model__source_not_in_index: deepl # en_to_zh_model__source_not_in_index: deepl # default_translation_model__source_not_in_index: deepl # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。 # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。 translation_embedding_wait_budget_ms_source_in_index: 300 # 80 translation_embedding_wait_budget_ms_source_not_in_index: 400 # 200 style_intent: enabled: true selected_sku_boost: 1.2 color_dictionary_path: config/dictionaries/style_intent_color.csv size_dictionary_path: config/dictionaries/style_intent_size.csv dimension_aliases: color: - color - colors - colour - colours - 颜色 - 色 - 色系 size: - size - sizes - sizing - 尺码 - 尺寸 - 码数 - 号码 - 码 product_title_exclusion: enabled: true dictionary_path: config/dictionaries/product_title_exclusion.tsv # 动态多语言检索字段配置 # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式; # shared_fields 为无语言后缀字段。 search_fields: multilingual_fields: - title - keywords - qanchors - enriched_tags - enriched_attributes.value # - enriched_taxonomy_attributes.value - option1_values - option2_values - option3_values - category_path - category_name_text # - brief # - description # - vendor # shared_fields: 无语言后缀字段;示例: tags, option1_values, option2_values, option3_values shared_fields: null core_multilingual_fields: - title - qanchors - category_name_text # 统一文本召回策略(主查询 + 翻译查询) text_query_strategy: base_minimum_should_match: 60% translation_minimum_should_match: 60% translation_boost: 0.75 tie_breaker_base_query: 0.5 best_fields_boost: 2.0 best_fields: title: 4.0 qanchors: 3.0 category_name_text: 2.0 phrase_fields: title: 5.0 qanchors: 4.0 phrase_match_boost: 3.0 # Embedding字段名称 text_embedding_field: title_embedding image_embedding_field: image_embedding.vector # 返回字段配置(_source includes) # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致 source_fields: - spu_id - handle - title - brief - description - vendor - category_name - category_name_text - category_path - category_id - category_level - category1_name - category2_name - category3_name # - tags # - keywords # - qanchors # - enriched_tags # - enriched_attributes # - # enriched_taxonomy_attributes.value - min_price - compare_at_price - image_url - sku_prices - sku_weights - sku_weight_units - total_inventory - option1_name - option1_values - option2_name - option2_values - option3_name - option3_values - specifications - skus # KNN:文本向量与多模态(图片)向量各自 boost 与召回(k / num_candidates) knn_text_boost: 4 knn_image_boost: 4 # knn_text_num_candidates = k * 3.4 knn_text_k: 160 knn_text_num_candidates: 560 knn_text_k_long: 400 knn_text_num_candidates_long: 1200 knn_image_k: 400 knn_image_num_candidates: 1200 # Function Score配置(ES层打分规则) function_score: score_mode: sum boost_mode: multiply functions: [] # 粗排配置(仅融合 ES 文本/向量信号,不调用模型) coarse_rank: enabled: true input_window: 700 output_window: 240 fusion: es_bias: 10.0 es_exponent: 0.05 text_bias: 0.1 text_exponent: 0.35 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) # 因为es的打分已经给了trans进行了折扣,所以这里不再继续折扣 text_translation_weight: 1.0 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 knn_bias: 0.6 knn_exponent: 0.4 # 精排配置(轻量 reranker) fine_rank: enabled: false input_window: 160 output_window: 80 timeout_sec: 10.0 rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: fine # 重排配置(provider/URL 在 services.rerank) rerank: enabled: true rerank_window: 160 timeout_sec: 15.0 weight_es: 0.4 weight_ai: 0.6 rerank_query_template: '{query}' rerank_doc_template: '{title}' service_profile: default # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(es / rerank / fine / text / knn) # 其中 knn_score 先做一层 dis_max: # max(knn_text_weight * text_knn, knn_image_weight * image_knn) # + knn_tie_breaker * 另一侧较弱信号 fusion: es_bias: 10.0 es_exponent: 0.05 rerank_bias: 0.1 rerank_exponent: 1.15 fine_bias: 0.1 fine_exponent: 1.0 text_bias: 0.1 text_exponent: 0.25 # base_query_trans_* 相对 base_query 的权重(见 search/rerank_client 中文本 dismax 融合) text_translation_weight: 0.8 knn_text_weight: 1.0 knn_image_weight: 2.0 knn_tie_breaker: 0.3 knn_bias: 0.6 knn_exponent: 0.4 # 可扩展服务/provider 注册表(单一配置源) services: translation: service_url: http://127.0.0.1:6006 # default_model: nllb-200-distilled-600m default_model: nllb-200-distilled-600m default_scene: general timeout_sec: 10.0 cache: ttl_seconds: 62208000 sliding_expiration: true # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups). enable_model_quality_tier_cache: true # Higher tier = better quality. Multiple models may share one tier (同级). # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). model_quality_tiers: deepl: 30 qwen-mt: 30 llm: 30 nllb-200-distilled-600m: 20 opus-mt-zh-en: 10 opus-mt-en-zh: 10 capabilities: qwen-mt: enabled: true backend: qwen_mt model: qwen-mt-flash base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 timeout_sec: 10.0 use_cache: true llm: enabled: true backend: llm model: qwen-flash base_url: https://dashscope-us.aliyuncs.com/compatible-mode/v1 timeout_sec: 30.0 use_cache: true deepl: enabled: true backend: deepl api_url: https://api.deepl.com/v2/translate timeout_sec: 10.0 glossary_id: '' use_cache: true nllb-200-distilled-600m: enabled: true backend: local_nllb model_id: facebook/nllb-200-distilled-600M model_dir: ./models/translation/facebook/nllb-200-distilled-600M ct2_model_dir: ./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 4 ct2_intra_threads: 0 ct2_max_queued_batches: 32 ct2_batch_type: examples ct2_decoding_length_mode: source ct2_decoding_length_extra: 8 ct2_decoding_length_min: 32 device: cuda torch_dtype: float16 batch_size: 64 max_input_length: 256 max_new_tokens: 64 num_beams: 1 use_cache: true opus-mt-zh-en: enabled: false backend: local_marian model_id: Helsinki-NLP/opus-mt-zh-en model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: examples device: cuda torch_dtype: float16 batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true opus-mt-en-zh: enabled: false backend: local_marian model_id: Helsinki-NLP/opus-mt-en-zh model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh ct2_model_dir: ./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16 ct2_compute_type: float16 ct2_conversion_quantization: float16 ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: examples device: cuda torch_dtype: float16 batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true embedding: provider: http # http providers: http: text_base_url: http://127.0.0.1:6005 image_base_url: http://127.0.0.1:6008 # 服务内文本后端(embedding 进程启动时读取) backend: tei # tei | local_st backends: tei: base_url: http://127.0.0.1:8080 timeout_sec: 20 model_id: Qwen/Qwen3-Embedding-0.6B local_st: model_id: Qwen/Qwen3-Embedding-0.6B device: cuda batch_size: 32 normalize_embeddings: true # 服务内图片后端(embedding 进程启动时读取;cnclip gRPC 与 6008 须同一 model_name) # Chinese-CLIP:ViT-H-14 → 1024 维,ViT-L-14 → 768 维。须与 mappings/search_products.json 中 # image_embedding.vector.dims 一致(当前索引为 1024 → 默认 ViT-H-14)。 image_backend: clip_as_service # clip_as_service | local_cnclip image_backends: clip_as_service: server: grpc://127.0.0.1:51000 model_name: CN-CLIP/ViT-L-14 batch_size: 8 normalize_embeddings: true local_cnclip: model_name: ViT-L-14 device: null batch_size: 8 normalize_embeddings: true rerank: provider: http providers: http: instances: default: base_url: http://127.0.0.1:6007 service_url: http://127.0.0.1:6007/rerank fine: base_url: http://127.0.0.1:6009 service_url: http://127.0.0.1:6009/rerank request: max_docs: 1000 normalize: true default_instance: default # 命名实例:同一套 reranker 代码按实例名读取不同端口 / 后端 / runtime 目录。 instances: default: host: 0.0.0.0 port: 6007 backend: qwen3_vllm_score runtime_dir: ./.runtime/reranker/default fine: host: 0.0.0.0 port: 6009 backend: bge runtime_dir: ./.runtime/reranker/fine backends: bge: model_name: BAAI/bge-reranker-v2-m3 device: null use_fp16: true batch_size: 80 max_length: 160 cache_dir: ./model_cache enable_warmup: true jina_reranker_v3: model_name: jinaai/jina-reranker-v3 device: null dtype: float16 batch_size: 64 max_doc_length: 160 max_query_length: 64 sort_by_doc_length: true cache_dir: ./model_cache trust_remote_code: true qwen3_vllm: model_name: Qwen/Qwen3-Reranker-0.6B engine: vllm max_model_len: 256 tensor_parallel_size: 1 gpu_memory_utilization: 0.2 dtype: float16 enable_prefix_caching: true enforce_eager: false infer_batch_size: 100 sort_by_doc_length: true # standard=_format_instruction__standard(固定 yes/no system);compact=_format_instruction(instruction 作 system 且 user 内重复 Instruct) instruction_format: standard # compact standard # instruction: "Given a query, score the product for relevance" # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 # instruction: "rank products by given query, category match first" # instruction: "Rank products by query relevance, prioritizing category match" # instruction: "Rank products by query relevance, prioritizing category and style match" # instruction: "Rank by query relevance, prioritize category & style" # instruction: "Relevance ranking: category & style match first" # instruction: "Score product relevance by query with category & style match prioritized" # instruction: "Rank products by query with category & style match prioritized" # instruction: "Given a fashion shopping query, retrieve relevant products that answer the query" instruction: rank products by given query # vLLM LLM.score()(跨编码打分)。独立高性能环境 .venv-reranker-score(vllm 0.18 固定版):./scripts/setup_reranker_venv.sh qwen3_vllm_score # 与 qwen3_vllm 可共用同一 model_name / HF 缓存;venv 分离以便升级 vLLM 而不影响 generate 后端。 qwen3_vllm_score: model_name: Qwen/Qwen3-Reranker-0.6B # 官方 Hub 原版需 true;若改用已转换的 seq-cls 权重(如 tomaarsen/...-seq-cls)则设为 false use_original_qwen3_hf_overrides: true # vllm_runner: "auto" # vllm_convert: "auto" # 可选:在 use_original_qwen3_hf_overrides 为 true 时与内置 overrides 合并 # hf_overrides: {} engine: vllm max_model_len: 172 tensor_parallel_size: 1 gpu_memory_utilization: 0.15 dtype: float16 enable_prefix_caching: true enforce_eager: false infer_batch_size: 80 sort_by_doc_length: true # 默认 standard 与 vLLM 官方 Qwen3 reranker 前缀一致 instruction_format: standard # compact standard # instruction: "Rank products by query with category & style match prioritized" # instruction: "Given a shopping query, rank products by relevance" instruction: Rank products by query with category & style match prioritized qwen3_transformers: model_name: Qwen/Qwen3-Reranker-0.6B instruction: rank products by given query # instruction: "Score the product’s relevance to the given query" max_length: 8192 batch_size: 64 use_fp16: true # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2 attn_implementation: sdpa # Packed Transformers backend: shared query prefix + custom position_ids/attention_mask. # For 1 query + many short docs (for example 400 product titles), this usually reduces # repeated prefix work and padding waste compared with pairwise batching. qwen3_transformers_packed: model_name: Qwen/Qwen3-Reranker-0.6B instruction: Rank products by query with category & style match prioritized max_model_len: 256 max_doc_len: 160 max_docs_per_pack: 0 use_fp16: true sort_by_doc_length: true # Packed mode relies on a custom 4D attention mask. "eager" is the safest default. # If your torch/transformers stack validates it, you can benchmark "sdpa". attn_implementation: eager qwen3_gguf: repo_id: DevQuasar/Qwen.Qwen3-Reranker-4B-GGUF filename: '*Q8_0.gguf' cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-4b-gguf instruction: Rank products by query with category & style match prioritized # T4 16GB / 性能优先配置:全量层 offload,实测比保守配置明显更快 n_ctx: 512 n_batch: 512 n_ubatch: 512 n_gpu_layers: 999 main_gpu: 0 n_threads: 2 n_threads_batch: 4 flash_attn: true offload_kqv: true use_mmap: true use_mlock: false infer_batch_size: 8 sort_by_doc_length: true length_sort_mode: char enable_warmup: true verbose: false qwen3_gguf_06b: repo_id: ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF filename: qwen3-reranker-0.6b-q8_0.gguf cache_dir: ./model_cache local_dir: ./models/reranker/qwen3-reranker-0.6b-q8_0-gguf instruction: Rank products by query with category & style match prioritized # 0.6B GGUF / online rerank baseline: # 实测 400 titles 单请求约 265s,因此它更适合作为低显存功能后备,不适合在线低延迟主路由。 n_ctx: 256 n_batch: 256 n_ubatch: 256 n_gpu_layers: 999 main_gpu: 0 n_threads: 2 n_threads_batch: 4 flash_attn: true offload_kqv: true use_mmap: true use_mlock: false infer_batch_size: 32 sort_by_doc_length: true length_sort_mode: char reuse_query_state: false enable_warmup: true verbose: false dashscope_rerank: model_name: qwen3-rerank # 按地域选择 endpoint: # 中国: https://dashscope.aliyuncs.com/compatible-api/v1/reranks # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks # 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks endpoint: https://dashscope.aliyuncs.com/compatible-api/v1/reranks api_key_env: RERANK_DASHSCOPE_API_KEY_CN timeout_sec: 10.0 top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限 batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: Given a shopping query, rank product titles by relevance max_retries: 2 retry_backoff_sec: 0.2 # SPU配置(已启用,使用嵌套skus) spu_config: enabled: true spu_field: spu_id inner_hits_size: 10 # 配置哪些option维度参与检索(进索引、以及在线搜索) # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: - option1 - option2 - option3 # 租户配置(Tenant Configuration) # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 tenant_config: default: primary_language: en index_languages: - en - zh tenants: '1': primary_language: zh index_languages: - zh - en '2': primary_language: en index_languages: - en - zh '3': primary_language: zh index_languages: - zh - en '162': primary_language: zh index_languages: - zh - en '170': primary_language: en index_languages: - en - zh