# Unified Configuration for Multi-Tenant Search Engine # 统一配置文件,所有租户共用一套配置 # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为 # Elasticsearch Index es_index_name: "search_products" # ES Index Settings (基础设置) es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: "30s" # 字段权重配置(用于搜索时的字段boost) # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义) field_boosts: # 文本相关性字段 "title.zh": 3.0 "brief.zh": 1.5 "description.zh": 1.0 "vendor.zh": 1.5 "title.en": 3.0 "brief.en": 1.5 "description.en": 1.0 "vendor.en": 1.5 # 分类相关字段 "category_path.zh": 1.5 "category_name_text.zh": 1.5 "category_path.en": 1.5 "category_name_text.en": 1.5 # 标签和属性值字段 tags: 1.0 option1_values: 0.5 option2_values: 0.5 option3_values: 0.5 # 搜索域配置(Query Domains) # 定义不同的搜索策略,指定哪些字段组合在一起搜索 indexes: - name: "default" label: "默认搜索" fields: - "title.zh" - "brief.zh" - "description.zh" - "vendor.zh" - "tags" - "category_path.zh" - "category_name_text.zh" - "option1_values" boost: 1.0 - name: "title" label: "标题搜索" fields: - "title.zh" boost: 2.0 - name: "vendor" label: "品牌搜索" fields: - "vendor.zh" boost: 1.5 - name: "category" label: "类目搜索" fields: - "category_path.zh" - "category_name_text.zh" boost: 1.5 - name: "tags" label: "标签搜索" fields: - "tags" boost: 1.0 # Query Configuration(查询配置) query_config: # 支持的语言 supported_languages: - "zh" - "en" default_language: "en" # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true # Embedding字段名称 text_embedding_field: "title_embedding" image_embedding_field: null # Embedding禁用阈值(短查询不使用向量搜索) embedding_disable_thresholds: chinese_char_limit: 4 english_word_limit: 3 # 翻译API配置(provider/URL 在 services.translation) translation_service: "deepl" translation_api_key: null # 通过环境变量设置 # 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数) translation_prompts: # 商品标题翻译提示词 product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。" product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." # query翻译提示词 query_zh: "电商领域" query_en: "e-commerce domain" # 默认翻译用词 default_zh: "电商领域" default_en: "e-commerce domain" # 返回字段配置(_source includes) # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 source_fields: null # KNN boost配置(向量召回的boost值) knn_boost: 0.25 # Lower boost for embedding recall # Ranking Configuration(排序配置) ranking: expression: "bm25() + 0.25*text_embedding_relevance()" description: "BM25 text relevance combined with semantic embedding similarity" # Function Score配置(ES层打分规则) function_score: score_mode: "sum" boost_mode: "multiply" functions: [] # 重排配置(provider/URL 在 services.rerank) rerank: rerank_window: 1000 timeout_sec: 15.0 weight_es: 0.4 weight_ai: 0.6 rerank_query_template: "{query}" rerank_doc_template: "{title}" # 可扩展服务/provider 注册表(单一配置源) services: translation: provider: "direct" # direct | http | google(reserved) base_url: "http://127.0.0.1:6006" model: "qwen" timeout_sec: 10.0 providers: direct: model: "qwen" http: base_url: "http://127.0.0.1:6006" model: "qwen" timeout_sec: 10.0 google: enabled: false project_id: "" location: "global" model: "" embedding: provider: "http" # http base_url: "http://127.0.0.1:6005" providers: http: base_url: "http://127.0.0.1:6005" # 服务内文本后端(embedding 进程启动时读取) backend: "local_st" # tei | local_st backends: tei: base_url: "http://127.0.0.1:8080" timeout_sec: 60 model_id: "Qwen/Qwen3-Embedding-0.6B" local_st: model_id: "Qwen/Qwen3-Embedding-0.6B" device: "cuda" batch_size: 32 normalize_embeddings: true rerank: provider: "http" base_url: "http://127.0.0.1:6007" providers: http: base_url: "http://127.0.0.1:6007" service_url: "http://127.0.0.1:6007/rerank" # 服务内后端(reranker 进程启动时读取) backend: "qwen3_vllm" # bge | qwen3_vllm backends: bge: model_name: "BAAI/bge-reranker-v2-m3" device: null use_fp16: true batch_size: 64 max_length: 512 cache_dir: "./model_cache" enable_warmup: true qwen3_vllm: model_name: "Qwen/Qwen3-Reranker-0.6B" engine: "vllm" max_model_len: 256 tensor_parallel_size: 1 gpu_memory_utilization: 0.36 dtype: "float16" enable_prefix_caching: true enforce_eager: false instruction: "Given a web search query, retrieve relevant passages that answer the query" # SPU配置(已启用,使用嵌套skus) spu_config: enabled: true spu_field: "spu_id" inner_hits_size: 10 # 配置哪些option维度参与检索(进索引、以及在线搜索) # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: ['option1', 'option2', 'option3'] # 租户配置(Tenant Configuration) # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集 tenant_config: default: primary_language: "en" index_languages: ["en", "zh"] tenants: "1": primary_language: "zh" index_languages: ["zh", "en"] "2": primary_language: "en" index_languages: ["en", "zh"] "3": primary_language: "zh" index_languages: ["zh", "en"] "162": primary_language: "zh" index_languages: ["zh", "en"] "170": primary_language: "en" index_languages: ["en", "zh"]