# Unified Configuration for Multi-Tenant Search Engine # 统一配置文件,所有租户共用一套配置 # 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为 # Elasticsearch Index es_index_name: "search_products" # ES Index Settings (基础设置) es_settings: number_of_shards: 1 number_of_replicas: 0 refresh_interval: "30s" # 字段权重配置(用于搜索时的字段boost) # 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义) field_boosts: # 文本相关性字段 "title.zh": 3.0 "brief.zh": 1.5 "description.zh": 1.0 "vendor.zh": 1.5 "title.en": 3.0 "brief.en": 1.5 "description.en": 1.0 "vendor.en": 1.5 # 分类相关字段 "category_path.zh": 1.5 "category_name_text.zh": 1.5 "category_path.en": 1.5 "category_name_text.en": 1.5 # 标签和属性值字段 tags: 1.0 option1_values: 0.5 option2_values: 0.5 option3_values: 0.5 # 搜索域配置(Query Domains) # 定义不同的搜索策略,指定哪些字段组合在一起搜索 indexes: - name: "default" label: "默认搜索" fields: - "title.zh" - "brief.zh" - "description.zh" - "vendor.zh" - "tags" - "category_path.zh" - "category_name_text.zh" - "option1_values" boost: 1.0 - name: "title" label: "标题搜索" fields: - "title.zh" boost: 2.0 - name: "vendor" label: "品牌搜索" fields: - "vendor.zh" boost: 1.5 - name: "category" label: "类目搜索" fields: - "category_path.zh" - "category_name_text.zh" boost: 1.5 - name: "tags" label: "标签搜索" fields: - "tags" boost: 1.0 # Query Configuration(查询配置) query_config: # 支持的语言 supported_languages: - "zh" - "en" default_language: "en" # 功能开关(翻译开关由tenant_config控制) enable_text_embedding: true enable_query_rewrite: true enable_multilang_search: true # 启用多语言搜索(使用翻译进行跨语言检索) # Embedding字段名称 text_embedding_field: "title_embedding" image_embedding_field: null # Embedding禁用阈值(短查询不使用向量搜索) embedding_disable_thresholds: chinese_char_limit: 4 english_word_limit: 3 # 翻译API配置 translation_service: "deepl" translation_api_key: null # 通过环境变量设置 # 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数) translation_prompts: # 商品标题翻译提示词 product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。" product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language." # query翻译提示词 query_zh: "电商领域" query_en: "e-commerce domain" # 默认翻译用词 default_zh: "电商领域" default_en: "e-commerce domain" # 返回字段配置(_source includes) # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段 source_fields: null # KNN boost配置(向量召回的boost值) knn_boost: 0.25 # Lower boost for embedding recall # Ranking Configuration(排序配置) ranking: expression: "bm25() + 0.25*text_embedding_relevance()" description: "BM25 text relevance combined with semantic embedding similarity" # Function Score配置(ES层打分规则) function_score: score_mode: "sum" boost_mode: "multiply" functions: [] # 重排配置(唯一实现:外部 BGE 重排服务,由请求参数 enable_rerank 控制是否执行) # enable_rerank 且 from+size<=rerank_window 时:从 ES 取前 rerank_window 条、重排后再按 from/size 分页 rerank: rerank_window: 1000 # service_url: "http://127.0.0.1:6007/rerank" # 可选,不填则用默认端口 6007 timeout_sec: 15.0 # 文档多时重排耗时长,可按需调大 weight_es: 0.4 weight_ai: 0.6 # 模板:用于将搜索请求/文档字段组装成重排服务输入 # - rerank_query_template:支持 {query} # - rerank_doc_template:支持 {title} {brief} {vendor} {description} {category_path} rerank_query_template: "{query}" rerank_doc_template: "{title}" # SPU配置(已启用,使用嵌套skus) spu_config: enabled: true spu_field: "spu_id" inner_hits_size: 10 # 配置哪些option维度参与检索(进索引、以及在线搜索) # 格式为list,选择option1/option2/option3中的一个或多个 searchable_option_dimensions: ['option1', 'option2', 'option3'] # 租户配置(Tenant Configuration) # 每个租户可配置主语言 primary_language 与索引语言 index_languages(主市场语言,商家可勾选) # 默认 index_languages: [en, zh],可配置为任意 SUPPORTED_INDEX_LANGUAGES 的子集 tenant_config: default: primary_language: "en" index_languages: ["en", "zh"] tenants: "1": primary_language: "zh" index_languages: ["zh", "en"] "2": primary_language: "en" index_languages: ["en", "zh"] "3": primary_language: "zh" index_languages: ["zh", "en"] "162": primary_language: "zh" index_languages: ["zh", "en"] "170": primary_language: "en" index_languages: ["en", "zh"]