# Unified Configuration for Multi-Tenant Search Engine # 统一配置文件,所有租户共用一套配置 # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为 # # 约定:下列键为必填;进程环境变量可覆盖 infrastructure / runtime 中同名语义项 #(如 ES_HOST、API_PORT 等),未设置环境变量时使用本文件中的值。 # Process / bind addresses (环境变量 APP_ENV、RUNTIME_ENV、ES_INDEX_NAMESPACE 可覆盖前两者的语义) runtime: environment: "prod" index_namespace: "" api_host: "0.0.0.0" api_port: 6002 indexer_host: "0.0.0.0" indexer_port: 6004 embedding_host: "0.0.0.0" embedding_port: 6005 embedding_text_port: 6005 embedding_image_port: 6008 translator_host: "0.0.0.0" translator_port: 6006 reranker_host: "0.0.0.0" reranker_port: 6007 # 基础设施连接(敏感项优先读环境变量:ES_*、REDIS_*、DB_*、DASHSCOPE_API_KEY、DEEPL_AUTH_KEY) infrastructure: elasticsearch: host: "http://localhost:9200" username: null password: null redis: host: "localhost" port: 6479 snapshot_db: 0 password: null socket_timeout: 1 socket_connect_timeout: 1 retry_on_timeout: false cache_expire_days: 720 embedding_cache_prefix: "embedding" anchor_cache_prefix: "product_anchors" anchor_cache_expire_days: 30 database: host: null port: 3306 database: null username: null password: null secrets: dashscope_api_key: null deepl_auth_key: null # Elasticsearch Index es_index_name: "search_products" # 检索域 / 索引列表(可为空列表;每项字段均需显式给出) indexes: [] # Config assets assets: query_rewrite_dictionary_path: "config/dictionaries/query_rewrite.dict" # Product content understanding (LLM enrich-content) configuration product_enrich: max_workers: 40 # ES Index Settings (基础设置) es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: "30s" # 字段权重配置(用于搜索时的字段boost) # 统一按“字段基名”配置;查询时按实际检索语言动态拼接 .{lang}。 # 若需要按某个语言单独调权,也可以加显式 key(例如 title.de: 3.2)。 field_boosts: title: 3.0 qanchors: 2.5 tags: 2.0 category_name_text: 2.0 category_path: 2.0 brief: 1.5 description: 1.5 vendor: 1.5 option1_values: 1.5 option2_values: 1.5 option3_values: 1.5 # Query Configuration(查询配置) query_config: # 支持的语言 supported_languages: - "zh" - "en" default_language: "en" # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true # 查询翻译模型(须与 services.translation.capabilities 中某项一致) # 源语种在租户 index_languages 内:主召回可打在源语种字段,用下面三项。 # zh_to_en_model: "opus-mt-zh-en" # en_to_zh_model: "opus-mt-en-zh" # default_translation_model: "nllb-200-distilled-600m" zh_to_en_model: "deepl" en_to_zh_model: "deepl" default_translation_model: "deepl" # 源语种不在 index_languages:翻译对可检索文本更关键,可单独指定(缺省则与上一组相同) zh_to_en_model__source_not_in_index: "deepl" en_to_zh_model__source_not_in_index: "deepl" default_translation_model__source_not_in_index: "deepl" # 查询解析阶段:翻译与 query 向量并发执行,共用同一等待预算(毫秒)。 # 检测语言已在租户 index_languages 内:较短;不在索引语言内:较长(翻译对召回更关键)。 translation_embedding_wait_budget_ms_source_in_index: 500 # 80 translation_embedding_wait_budget_ms_source_not_in_index: 500 #200 style_intent: enabled: true color_dictionary_path: "config/dictionaries/style_intent_color.csv" size_dictionary_path: "config/dictionaries/style_intent_size.csv" dimension_aliases: color: ["color", "colors", "colour", "colours", "颜色", "色", "色系"] size: ["size", "sizes", "sizing", "尺码", "尺寸", "码数", "号码", "码"] product_title_exclusion: enabled: true dictionary_path: "config/dictionaries/product_title_exclusion.tsv" # 动态多语言检索字段配置 # multilingual_fields 会被拼成 title.{lang}/brief.{lang}/... 形式; # shared_fields 为无语言后缀字段。 search_fields: multilingual_fields: - "title" - "qanchors" - "category_path" - "category_name_text" - "brief" - "description" - "vendor" shared_fields: # - "tags" # - "option1_values" # - "option2_values" # - "option3_values" core_multilingual_fields: - "title" - "qanchors" - "category_name_text" # 统一文本召回策略(主查询 + 翻译查询) text_query_strategy: base_minimum_should_match: "75%" translation_minimum_should_match: "75%" translation_boost: 0.75 tie_breaker_base_query: 0.5 best_fields_boost: 2.0 best_fields: title: 4.0 qanchors: 3.0 category_name_text: 2.0 phrase_fields: title: 5.0 qanchors: 4.0 phrase_match_boost: 3.0 # Embedding字段名称 text_embedding_field: "title_embedding" image_embedding_field: null # 返回字段配置(_source includes) # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 # 下列字段与 api/result_formatter.py(SpuResult 填充)及 search/searcher.py(SKU 排序/主图替换)一致 source_fields: - spu_id - handle - title - brief - description - vendor - category_name - category_name_text - category_path - category_id - category_level - category1_name - category2_name - category3_name - tags - min_price - compare_at_price - image_url - sku_prices - sku_weights - sku_weight_units - total_inventory - option1_name - option1_values - option2_name - option2_values - option3_name - option3_values - specifications - skus # KNN boost配置(向量召回的boost值) knn_boost: 2.0 # Lower boost for embedding recall # Function Score配置(ES层打分规则) function_score: score_mode: "sum" boost_mode: "multiply" functions: [] # 重排配置(provider/URL 在 services.rerank) rerank: enabled: true rerank_window: 400 timeout_sec: 15.0 weight_es: 0.4 weight_ai: 0.6 rerank_query_template: "{query}" rerank_doc_template: "{title}" # 乘法融合:fused = Π (max(score,0) + bias) ** exponent(rerank / text / knn 三项) fusion: rerank_bias: 0.00001 rerank_exponent: 1.0 text_bias: 0.1 text_exponent: 0.35 knn_bias: 0.6 knn_exponent: 0.2 # 可扩展服务/provider 注册表(单一配置源) services: translation: service_url: "http://127.0.0.1:6006" default_model: "nllb-200-distilled-600m" default_scene: "general" timeout_sec: 10.0 cache: ttl_seconds: 62208000 sliding_expiration: true # When false, cache keys are exact-match per request model only (ignores model_quality_tiers for lookups). enable_model_quality_tier_cache: true # Higher tier = better quality. Multiple models may share one tier (同级). # A request may reuse Redis keys from models with tier > A or tier == A (not from lower tiers). model_quality_tiers: deepl: 30 qwen-mt: 30 llm: 30 nllb-200-distilled-600m: 20 opus-mt-zh-en: 10 opus-mt-en-zh: 10 capabilities: qwen-mt: enabled: true backend: "qwen_mt" model: "qwen-mt-flash" base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" timeout_sec: 10.0 use_cache: true llm: enabled: true backend: "llm" model: "qwen-flash" base_url: "https://dashscope-us.aliyuncs.com/compatible-mode/v1" timeout_sec: 30.0 use_cache: true deepl: enabled: true backend: "deepl" api_url: "https://api.deepl.com/v2/translate" timeout_sec: 10.0 glossary_id: "" use_cache: true nllb-200-distilled-600m: enabled: true backend: "local_nllb" model_id: "facebook/nllb-200-distilled-600M" model_dir: "./models/translation/facebook/nllb-200-distilled-600M" ct2_model_dir: "./models/translation/facebook/nllb-200-distilled-600M/ctranslate2-float16" ct2_compute_type: "float16" ct2_conversion_quantization: "float16" ct2_auto_convert: true ct2_inter_threads: 4 ct2_intra_threads: 0 ct2_max_queued_batches: 32 ct2_batch_type: "examples" ct2_decoding_length_mode: "source" ct2_decoding_length_extra: 8 ct2_decoding_length_min: 32 device: "cuda" torch_dtype: "float16" batch_size: 64 max_input_length: 256 max_new_tokens: 64 num_beams: 1 use_cache: true opus-mt-zh-en: enabled: true backend: "local_marian" model_id: "Helsinki-NLP/opus-mt-zh-en" model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en" ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-zh-en/ctranslate2-float16" ct2_compute_type: "float16" ct2_conversion_quantization: "float16" ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: "examples" device: "cuda" torch_dtype: "float16" batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true opus-mt-en-zh: enabled: true backend: "local_marian" model_id: "Helsinki-NLP/opus-mt-en-zh" model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh" ct2_model_dir: "./models/translation/Helsinki-NLP/opus-mt-en-zh/ctranslate2-float16" ct2_compute_type: "float16" ct2_conversion_quantization: "float16" ct2_auto_convert: true ct2_inter_threads: 1 ct2_intra_threads: 0 ct2_max_queued_batches: 0 ct2_batch_type: "examples" device: "cuda" torch_dtype: "float16" batch_size: 16 max_input_length: 256 max_new_tokens: 256 num_beams: 1 use_cache: true embedding: provider: "http" # http providers: http: text_base_url: "http://127.0.0.1:6005" image_base_url: "http://127.0.0.1:6008" # 服务内文本后端(embedding 进程启动时读取) backend: "tei" # tei | local_st backends: tei: base_url: "http://127.0.0.1:8080" timeout_sec: 20 model_id: "Qwen/Qwen3-Embedding-0.6B" local_st: model_id: "Qwen/Qwen3-Embedding-0.6B" device: "cuda" batch_size: 32 normalize_embeddings: true # 服务内图片后端(embedding 进程启动时读取) image_backend: "clip_as_service" # clip_as_service | local_cnclip image_backends: clip_as_service: server: "grpc://127.0.0.1:51000" model_name: "CN-CLIP/ViT-L-14" batch_size: 8 normalize_embeddings: true local_cnclip: model_name: "ViT-L-14" device: null batch_size: 8 normalize_embeddings: true rerank: provider: "http" base_url: "http://127.0.0.1:6007" providers: http: base_url: "http://127.0.0.1:6007" service_url: "http://127.0.0.1:6007/rerank" request: max_docs: 1000 normalize: true # 服务内后端(reranker 进程启动时读取) backend: "bge" # bge | qwen3_vllm | qwen3_transformers | dashscope_rerank backends: bge: model_name: "BAAI/bge-reranker-v2-m3" device: null use_fp16: true batch_size: 64 max_length: 160 cache_dir: "./model_cache" enable_warmup: true qwen3_vllm: model_name: "Qwen/Qwen3-Reranker-0.6B" engine: "vllm" max_model_len: 160 tensor_parallel_size: 1 gpu_memory_utilization: 0.20 dtype: "float16" enable_prefix_caching: true enforce_eager: false infer_batch_size: 100 sort_by_doc_length: true # instruction: "Given a query, score the product for relevance" # "rank products by given query" 比 “Given a query, score the product for relevance” 更好点 # instruction: "rank products by given query, category match first" # instruction: "Rank products by query relevance, prioritizing category match" # instruction: "Rank products by query relevance, prioritizing category and style match" # instruction: "Rank by query relevance, prioritize category & style" # instruction: "Relevance ranking: category & style match first" # instruction: "Score product relevance by query with category & style match prioritized" instruction: "Rank products by query with category & style match prioritized" qwen3_transformers: model_name: "Qwen/Qwen3-Reranker-0.6B" instruction: "rank products by given query" # instruction: "Score the product’s relevance to the given query" max_length: 8192 batch_size: 64 use_fp16: true # sdpa:默认无需 flash-attn;若已安装 flash_attn 可改为 flash_attention_2 attn_implementation: "sdpa" dashscope_rerank: model_name: "qwen3-rerank" # 按地域选择 endpoint: # 中国: https://dashscope.aliyuncs.com/compatible-api/v1/reranks # 新加坡: https://dashscope-intl.aliyuncs.com/compatible-api/v1/reranks # 美国: https://dashscope-us.aliyuncs.com/compatible-api/v1/reranks endpoint: "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" api_key_env: "RERANK_DASHSCOPE_API_KEY_CN" timeout_sec: 10.0 # top_n_cap: 0 # 0 表示 top_n=当前请求文档数;>0 则限制 top_n 上限 batchsize: 64 # 0 关闭;>0 启用并发小包调度(top_n/top_n_cap 仍生效,分包后全局截断) instruct: "Given a shopping query, rank product titles by relevance" max_retries: 2 retry_backoff_sec: 0.2 # SPU配置(已启用,使用嵌套skus) spu_config: enabled: true spu_field: "spu_id" inner_hits_size: 10 # 配置哪些option维度参与检索(进索引、以及在线搜索) # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: ['option1', 'option2', 'option3'] # 租户配置(Tenant Configuration) # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SOURCE_LANG_CODE_MAP.keys() 的子集 tenant_config: default: primary_language: "en" index_languages: ["en", "zh"] tenants: "1": primary_language: "zh" index_languages: ["zh", "en"] "2": primary_language: "en" index_languages: ["en", "zh"] "3": primary_language: "zh" index_languages: ["zh", "en"] "162": primary_language: "zh" index_languages: ["zh", "en"] "170": primary_language: "en" index_languages: ["en", "zh"]