config.yaml 4.7 KB
# Unified Configuration for Multi-Tenant Search Engine
# 统一配置文件,所有租户共用一套配置
# 注意:索引结构由 mappings/search_products.json 定义,此文件只配置搜索行为

# Elasticsearch Index
es_index_name: "search_products"

# ES Index Settings (基础设置)
es_settings:
  number_of_shards: 1
  number_of_replicas: 0
  refresh_interval: "30s"

# 字段权重配置(用于搜索时的字段boost)
# 只配置权重,不配置字段结构(字段结构由 mappings/search_products.json 定义)
field_boosts:
  # 文本相关性字段
  title_zh: 3.0
  brief_zh: 1.5
  description_zh: 1.0
  vendor_zh: 1.5
  title_en: 3.0
  brief_en: 1.5
  description_en: 1.0
  vendor_en: 1.5
  
  # 分类相关字段
  category_path_zh: 1.5
  category_name_zh: 1.5
  category_path_en: 1.5
  category_name_en: 1.5
  
  # 标签和属性值字段
  tags: 1.0
  option1_values: 0.5
  option2_values: 0.5
  option3_values: 0.5

# 搜索域配置(Query Domains)
# 定义不同的搜索策略,指定哪些字段组合在一起搜索
indexes:
  - name: "default"
    label: "默认搜索"
    fields:
      - "title_zh"
      - "brief_zh"
      - "description_zh"
      - "vendor_zh"
      - "tags"
      - "category_path_zh"
      - "category_name_zh"
      - "option1_values"
    boost: 1.0

  - name: "title"
    label: "标题搜索"
    fields:
      - "title_zh"
    boost: 2.0

  - name: "vendor"
    label: "品牌搜索"
    fields:
      - "vendor_zh"
    boost: 1.5

  - name: "category"
    label: "类目搜索"
    fields:
      - "category_path_zh"
      - "category_name_zh"
    boost: 1.5

  - name: "tags"
    label: "标签搜索"
    fields:
      - "tags"
    boost: 1.0

# Query Configuration(查询配置)
query_config:
  # 支持的语言
  supported_languages:
    - "zh"
    - "en"
  default_language: "zh"
  
  # 功能开关
  enable_translation: true
  enable_text_embedding: true
  enable_query_rewrite: true
  enable_multilang_search: true  # 启用多语言搜索(使用翻译进行跨语言检索)

  # Embedding字段名称
  text_embedding_field: "title_embedding"
  image_embedding_field: null

  # Embedding禁用阈值(短查询不使用向量搜索)
  embedding_disable_thresholds:
    chinese_char_limit: 4
    english_word_limit: 3

  # 翻译API配置
  translation_service: "deepl"
  translation_api_key: null  # 通过环境变量设置
  
  # 翻译提示词配置(用于提高翻译质量,作为DeepL API的context参数)
  translation_prompts:
    # 商品标题翻译提示词
    product_title_zh: "请将原文翻译成中文商品SKU名称,要求:确保精确、完整地传达原文信息的基础上,语言简洁清晰、地道、专业。"
    product_title_en: "Translate the original text into an English product SKU name. Requirements: Ensure accurate and complete transmission of the original information, with concise, clear, authentic, and professional language."
    # query翻译提示词
    query_zh: "电商领域"
    query_en: "e-commerce domain"
    # 默认翻译用词
    default_zh: "电商领域"
    default_en: "e-commerce domain"
  
  # 返回字段配置(_source includes)
  # null表示返回所有字段,[]表示不返回任何字段,列表表示只返回指定字段
  source_fields: null

# Ranking Configuration(排序配置)
ranking:
  expression: "bm25() + 0.2*text_embedding_relevance()"
  description: "BM25 text relevance combined with semantic embedding similarity"

# Function Score配置(ES层打分规则)
function_score:
  score_mode: "sum"
  boost_mode: "multiply"
  functions: []

# Rerank配置(本地重排,当前禁用)
rerank:
  enabled: false
  expression: ""
  description: "Local reranking (disabled, use ES function_score instead)"

# SPU配置(已启用,使用嵌套skus)
spu_config:
  enabled: true
  spu_field: "spu_id"
  inner_hits_size: 10
  # 配置哪些option维度参与检索(进索引、以及在线搜索)
  # 格式为list,选择option1/option2/option3中的一个或多个
  searchable_option_dimensions: ['option1', 'option2', 'option3']

# 租户配置(Tenant Configuration)
# 每个租户可以配置主语言和翻译选项
tenant_config:
  # 默认配置(未配置的租户使用此配置)
  default:
    primary_language: "zh"
    translate_to_en: true
    translate_to_zh: false
  # 租户特定配置
  tenants:
    "1":
      primary_language: "zh"
      translate_to_en: true
      translate_to_zh: false
    "2":
      primary_language: "en"
      translate_to_en: false
      translate_to_zh: true
    "3":
      primary_language: "zh"
      translate_to_en: true
      translate_to_zh: false
    "162":
      primary_language: "zh"
      translate_to_en: false
      translate_to_zh: false