config.yaml 7.98 KB
# Customer1 Configuration
# Test customer for cross-border e-commerce search

customer_name: "Customer1 Test Instance"

# MySQL Database Configuration
mysql_config:
  host: "120.79.247.228"
  port: 3316
  database: "saas"
  username: "saas"
  password: "P89cZHS5d7dFyc9R"

# Table Configuration
main_table: "shoplazza_product_sku"
extension_table: "customer1_extension"

# Elasticsearch Index
es_index_name: "search_customer1"

# ES Index Settings
es_settings:
  number_of_shards: 1
  number_of_replicas: 0
  refresh_interval: "30s"

# Field Definitions
fields:
  # Primary Key
  - name: "skuId"
    type: "LONG"
    source_table: "main"
    source_column: "id"
    required: true
    index: true
    store: true

  # Text Fields - Multi-language
  - name: "name"
    type: "TEXT"
    source_table: "extension"
    source_column: "name"
    analyzer: "chinese_ecommerce"
    boost: 2.0
    index: true
    store: true

  - name: "name_pinyin"
    type: "TEXT"
    source_table: "extension"
    source_column: "name_pinyin"
    analyzer: "standard"
    boost: 1.5
    index: true
    store: false

  - name: "ruSkuName"
    type: "TEXT"
    source_table: "extension"
    source_column: "ruSkuName"
    analyzer: "russian"
    boost: 2.0
    index: true
    store: true

  - name: "enSpuName"
    type: "TEXT"
    source_table: "extension"
    source_column: "enSpuName"
    analyzer: "english"
    boost: 2.0
    index: true
    store: true

  # Category and Brand
  - name: "categoryName"
    type: "TEXT"
    source_table: "extension"
    source_column: "categoryName"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "brandName"
    type: "TEXT"
    source_table: "extension"
    source_column: "brandName"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    index: true
    store: true

  - name: "supplierName"
    type: "TEXT"
    source_table: "extension"
    source_column: "supplierName"
    analyzer: "chinese_ecommerce"
    boost: 1.0
    index: true
    store: true

  # Keyword Fields
  - name: "categoryName_keyword"
    type: "KEYWORD"
    source_table: "extension"
    source_column: "categoryName"
    index: true
    store: false

  - name: "brandName_keyword"
    type: "KEYWORD"
    source_table: "extension"
    source_column: "brandName"
    index: true
    store: false

  - name: "supplierName_keyword"
    type: "KEYWORD"
    source_table: "extension"
    source_column: "supplierName"
    index: true
    store: false

  # Price Fields
  - name: "price"
    type: "DOUBLE"
    source_table: "extension"
    source_column: "price"
    index: true
    store: true

  # Text Embedding Fields
  - name: "name_embedding"
    type: "TEXT_EMBEDDING"
    source_table: "extension"
    source_column: "name"
    embedding_dims: 1024
    embedding_similarity: "dot_product"
    index: true

  - name: "enSpuName_embedding"
    type: "TEXT_EMBEDDING"
    source_table: "extension"
    source_column: "enSpuName"
    embedding_dims: 1024
    embedding_similarity: "dot_product"
    index: true

  # Image Fields
  - name: "imageUrl"
    type: "KEYWORD"
    source_table: "extension"
    source_column: "imageUrl"
    index: true
    store: true

  - name: "image_embedding"
    type: "IMAGE_EMBEDDING"
    source_table: "extension"
    source_column: "imageUrl"
    embedding_dims: 1024
    embedding_similarity: "dot_product"
    nested: false
    index: true

  # Metadata Fields
  - name: "create_time"
    type: "DATE"
    source_table: "extension"
    source_column: "create_time"
    index: true
    store: true

  - name: "days_since_last_update"
    type: "INT"
    source_table: "extension"
    source_column: "days_since_last_update"
    index: true
    store: true

# Index Structure (Query Domains)
indexes:
  - name: "default"
    label: "默认索引"
    fields:
      - "name"
      - "enSpuName"
      - "ruSkuName"
      - "categoryName"
      - "brandName"
    analyzer: "chinese_ecommerce"
    boost: 1.0
    example: 'query=default:"消防套"'
    language_field_mapping:
      zh:
        - "name"
        - "categoryName"
        - "brandName"
      en:
        - "enSpuName"
      ru:
        - "ruSkuName"

  - name: "title"
    label: "标题索引"
    fields:
      - "name"
      - "enSpuName"
      - "ruSkuName"
    analyzer: "chinese_ecommerce"
    boost: 2.0
    example: 'query=title:"芭比娃娃"'
    language_field_mapping:
      zh:
        - "name"
      en:
        - "enSpuName"
      ru:
        - "ruSkuName"

  - name: "category"
    label: "类目索引"
    fields:
      - "categoryName"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    example: 'query=category:"玩具"'

  - name: "brand"
    label: "品牌索引"
    fields:
      - "brandName"
    analyzer: "chinese_ecommerce"
    boost: 1.5
    example: 'query=brand:"ZHU LIN"'

# Query Configuration
query_config:
  supported_languages:
    - "zh"
    - "en"
    - "ru"
  default_language: "zh"
  enable_translation: true
  enable_text_embedding: true
  enable_query_rewrite: true

  # Translation API (DeepL)
  translation_service: "deepl"
  translation_api_key: null  # Set via environment variable

# Ranking Configuration(已弃用,保留用于文档说明)
ranking:
  expression: "bm25() + 0.2*text_embedding_relevance()"
  description: "BM25 text relevance combined with semantic embedding similarity"

# Function Score配置(ES层打分规则)
# 约定:function_score是查询结构的必需部分
function_score:
  score_mode: "sum"       # multiply, sum, avg, first, max, min
  boost_mode: "multiply"  # multiply, replace, sum, avg, max, min
  
  functions:
    # 1. Filter + Weight(条件权重)- 根据条件匹配提权
    - type: "filter_weight"
      name: "7天新品提权"
      filter:
        range:
          days_since_last_update:
            lte: 7
      weight: 1.3
      
    - type: "filter_weight"
      name: "30天新品提权"
      filter:
        range:
          days_since_last_update:
            lte: 30
      weight: 1.15
      
    - type: "filter_weight"
      name: "有视频提权"
      filter:
        term:
          is_video: true
      weight: 1.05
      
    # 示例:特定标签提权
    # - type: "filter_weight"
    #   name: "特定标签提权"
    #   filter:
    #     term:
    #       labelId_by_skuId_essa_3: 165
    #   weight: 1.1
    
    # 示例:主力价格段提权
    # - type: "filter_weight"
    #   name: "主力价格段"
    #   filter:
    #     range:
    #       price:
    #         gte: 50
    #         lte: 200
    #   weight: 1.1
    
    # 2. Field Value Factor(字段值映射)- 将数值字段映射为打分因子
    # 示例:在售天数
    # - type: "field_value_factor"
    #   name: "在售天数因子"
    #   field: "on_sell_days_boost"
    #   factor: 1.0
    #   modifier: "none"  # none, log, log1p, log2p, ln, ln1p, ln2p, square, sqrt, reciprocal
    #   missing: 1.0
    
    # 示例:销量因子
    # - type: "field_value_factor"
    #   name: "销量因子"
    #   field: "sales_count"
    #   factor: 0.01
    #   modifier: "log1p"  # 对数映射,避免极端值影响
    #   missing: 1.0
    
    # 示例:评分因子
    # - type: "field_value_factor"
    #   name: "评分因子"
    #   field: "rating"
    #   factor: 0.5
    #   modifier: "sqrt"
    #   missing: 1.0
    
    # 3. Decay Functions(衰减函数)- 距离原点越远分数越低
    # 示例:时间衰减
    # - type: "decay"
    #   name: "时间衰减"
    #   function: "gauss"  # gauss, exp, linear
    #   field: "create_time"
    #   origin: "now"
    #   scale: "30d"
    #   offset: "0d"
    #   decay: 0.5
    
    # 示例:价格衰减
    # - type: "decay"
    #   name: "价格衰减"
    #   function: "linear"
    #   field: "price"
    #   origin: "100"
    #   scale: "50"
    #   decay: 0.5

# Rerank配置(本地重排,当前禁用)
rerank:
  enabled: false
  expression: ""
  description: "Local reranking (disabled, use ES function_score instead)"

# SPU Aggregation (disabled for customer1)
spu_config:
  enabled: false
  spu_field: null
  inner_hits_size: 3