config.yaml 5.77 KB
# Unified Configuration for Multi-Tenant Search Engine
# 统一配置文件,所有租户共用一套索引配置
# 注意:此配置不包含MySQL相关配置,只包含ES搜索相关配置

# Elasticsearch Index
es_index_name: "search_products"

# ES Index Settings
es_settings:
  number_of_shards: 1
  number_of_replicas: 0
  refresh_interval: "30s"

# Field Definitions (SPU级别,只包含对搜索有帮助的字段)
fields:
  # 租户隔离字段(必需)
  - name: "tenant_id"
    type: "KEYWORD"
    required: true
    index: true
    store: true

  # 商品标识字段
  - name: "product_id"
    type: "KEYWORD"
    required: true
    index: true
    store: true

  - name: "handle"
    type: "KEYWORD"
    index: true
    store: true

  # 文本搜索字段
  - name: "title"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 3.0
    index: true
    store: true

  - name: "brief"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "description"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.0
    index: true
    store: true

  # SEO字段(提升相关性)
  - name: "seo_title"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 2.0
    index: true
    store: true

  - name: "seo_description"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "seo_keywords"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 2.0
    index: true
    store: true

  # 分类和标签字段(TEXT + KEYWORD双重索引)
  - name: "vendor"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "vendor_keyword"
    type: "KEYWORD"
    index: true
    store: false

  - name: "product_type"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "product_type_keyword"
    type: "KEYWORD"
    index: true
    store: false

  - name: "tags"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.0
    index: true
    store: true

  - name: "tags_keyword"
    type: "KEYWORD"
    index: true
    store: false

  - name: "category"
    type: "TEXT"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "category_keyword"
    type: "KEYWORD"
    index: true
    store: false

  # 价格字段(扁平化)
  - name: "min_price"
    type: "FLOAT"
    index: true
    store: true

  - name: "max_price"
    type: "FLOAT"
    index: true
    store: true

  - name: "compare_at_price"
    type: "FLOAT"
    index: true
    store: true

  # 图片字段(用于显示,不参与搜索)
  - name: "image_url"
    type: "KEYWORD"
    index: false
    store: true

  # 文本嵌入字段(用于语义搜索)
  - name: "name_embedding"
    type: "TEXT_EMBEDDING"
    embedding_dims: 1024
    embedding_similarity: "dot_product"
    index: true
    store: false

  # 嵌套variants字段
  - name: "variants"
    type: "JSON"
    nested: true
    nested_properties:
      variant_id:
        type: "keyword"
        index: true
        store: true
      title:
        type: "text"
        analyzer: "chinese_ecommerce"
        index: true
        store: true
      price:
        type: "float"
        index: true
        store: true
      compare_at_price:
        type: "float"
        index: true
        store: true
      sku:
        type: "keyword"
        index: true
        store: true
      stock:
        type: "long"
        index: true
        store: true
      options:
        type: "object"
        enabled: true

# Index Structure (Query Domains)
indexes:
  - name: "default"
    label: "默认索引"
    fields:
      - "title"
      - "brief"
      - "description"
      - "seo_title"
      - "seo_description"
      - "seo_keywords"
      - "vendor"
      - "product_type"
      - "tags"
      - "category"
    analyzer: "chinese_ecommerce"
    boost: 1.0

  - name: "title"
    label: "标题索引"
    fields:
      - "title"
      - "seo_title"
    analyzer: "chinese_ecommerce"
    boost: 2.0

  - name: "vendor"
    label: "品牌索引"
    fields:
      - "vendor"
    analyzer: "chinese_ecommerce"
    boost: 1.5

  - name: "category"
    label: "类目索引"
    fields:
      - "category"
    analyzer: "chinese_ecommerce"
    boost: 1.5

  - name: "tags"
    label: "标签索引"
    fields:
      - "tags"
      - "seo_keywords"
    analyzer: "chinese_ecommerce"
    boost: 1.0

# Query Configuration
query_config:
  supported_languages:
    - "zh"
    - "en"
  default_language: "zh"
  enable_translation: true
  enable_text_embedding: true
  enable_query_rewrite: true

  # Embedding field names (if not set, will auto-detect from fields)
  text_embedding_field: "name_embedding"  # Field name for text embeddings
  image_embedding_field: null  # Field name for image embeddings (if not set, will auto-detect)

  # Translation API (DeepL)
  translation_service: "deepl"
  translation_api_key: null  # Set via environment variable
  # translation_glossary_id: null  # Optional: DeepL glossary ID for custom terminology (e.g., "车" -> "car")
  # translation_context: "e-commerce product search"  # Context hint for better translation disambiguation

# Ranking Configuration
ranking:
  expression: "bm25() + 0.2*text_embedding_relevance()"
  description: "BM25 text relevance combined with semantic embedding similarity"

# Function Score配置(ES层打分规则)
function_score:
  score_mode: "sum"
  boost_mode: "multiply"
  
  functions: []

# Rerank配置(本地重排,当前禁用)
rerank:
  enabled: false
  expression: ""
  description: "Local reranking (disabled, use ES function_score instead)"

# SPU配置(已启用,使用嵌套variants)
spu_config:
  enabled: true
  spu_field: "product_id"
  inner_hits_size: 10