Blame view

config/config.yaml 9.05 KB
4d824a77   tangwang   所有租户共用一套统一配置.tena...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
  # Unified Configuration for Multi-Tenant Search Engine
  # 统一配置文件,所有租户共用一套索引配置
  # 注意:此配置不包含MySQL相关配置,只包含ES搜索相关配置
  
  # Elasticsearch Index
  es_index_name: "search_products"
  
  # ES Index Settings
  es_settings:
    number_of_shards: 1
    number_of_replicas: 0
    refresh_interval: "30s"
  
  # Field Definitions (SPU级别,只包含对搜索有帮助的字段)
  fields:
    # 租户隔离字段(必需)
    - name: "tenant_id"
      type: "KEYWORD"
      required: true
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
22
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
23
24
  
    # 商品标识字段
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
25
    - name: "spu_id"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
26
27
28
29
      type: "KEYWORD"
      required: true
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
30
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
31
  
5dcddc06   tangwang   索引重构
32
33
    # 文本相关性相关字段(中英文双语)
    - name: "title_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
34
      type: "TEXT"
5dcddc06   tangwang   索引重构
35
36
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
37
38
39
      boost: 3.0
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
40
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
41
  
5dcddc06   tangwang   索引重构
42
    - name: "brief_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
43
      type: "TEXT"
5dcddc06   tangwang   索引重构
44
45
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
46
47
48
      boost: 1.5
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
49
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
50
  
5dcddc06   tangwang   索引重构
51
    - name: "description_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
52
      type: "TEXT"
5dcddc06   tangwang   索引重构
53
54
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
55
56
57
      boost: 1.0
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
58
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
59
  
5dcddc06   tangwang   索引重构
60
    - name: "vendor_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
61
      type: "TEXT"
5dcddc06   tangwang   索引重构
62
63
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
64
65
66
      boost: 1.5
      index: true
      store: true
5dcddc06   tangwang   索引重构
67
68
69
      return_in_source: true
      keyword_subfield: true
      keyword_normalizer: "lowercase"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
70
  
5dcddc06   tangwang   索引重构
71
    - name: "title_en"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
72
      type: "TEXT"
5dcddc06   tangwang   索引重构
73
74
75
      analyzer: "english"
      search_analyzer: "english"
      boost: 3.0
4d824a77   tangwang   所有租户共用一套统一配置.tena...
76
77
      index: true
      store: true
5dcddc06   tangwang   索引重构
78
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
79
  
5dcddc06   tangwang   索引重构
80
81
82
83
    - name: "brief_en"
      type: "TEXT"
      analyzer: "english"
      search_analyzer: "english"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
84
85
86
      boost: 1.5
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
87
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
88
  
5dcddc06   tangwang   索引重构
89
90
91
92
    - name: "description_en"
      type: "TEXT"
      analyzer: "english"
      search_analyzer: "english"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
93
94
95
      boost: 1.0
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
96
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
97
  
5dcddc06   tangwang   索引重构
98
99
100
101
    - name: "vendor_en"
      type: "TEXT"
      analyzer: "english"
      search_analyzer: "english"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
102
103
104
      boost: 1.5
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
105
      return_in_source: true
5dcddc06   tangwang   索引重构
106
107
108
109
110
111
112
113
      keyword_subfield: true
      keyword_normalizer: "lowercase"
  
    - name: "tags"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
114
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
115
116
117
118
119
    # 价格字段(扁平化)
    - name: "min_price"
      type: "FLOAT"
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
120
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
121
122
123
124
125
  
    - name: "max_price"
      type: "FLOAT"
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
126
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
127
128
129
130
131
  
    - name: "compare_at_price"
      type: "FLOAT"
      index: true
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
132
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
133
  
5dcddc06   tangwang   索引重构
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
    - name: "sku_prices"
      type: "FLOAT"
      index: true
      store: true
      return_in_source: true
  
    - name: "sku_weights"
      type: "LONG"
      index: true
      store: true
      return_in_source: true
  
    - name: "sku_weight_units"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "total_inventory"
      type: "LONG"
      index: true
      store: true
      return_in_source: true
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
158
159
160
161
162
    # 图片字段(用于显示,不参与搜索)
    - name: "image_url"
      type: "KEYWORD"
      index: false
      store: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
163
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
164
  
5dcddc06   tangwang   索引重构
165
    # 语义向量
b73baf85   tangwang   撰写接口文档
166
    - name: "title_embedding"
325eec03   tangwang   1. 日志、配置基础设施,使用优化
167
168
169
170
171
      type: "TEXT_EMBEDDING"
      embedding_dims: 1024
      embedding_similarity: "dot_product"
      index: true
      store: false
cd3799c6   tangwang   tenant2 1w测试数据 mo...
172
173
      return_in_source: false  # 嵌入向量通常不需要在结果中返回
  
5dcddc06   tangwang   索引重构
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
    - name: "image_embedding"
      type: "IMAGE_EMBEDDING"
      embedding_dims: 1024
      embedding_similarity: "dot_product"
      nested: true
      index: true
      store: false
      return_in_source: false
  
    # 分类相关字段
    - name: "category_path_zh"
      type: "TEXT"
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
      boost: 1.5
cd3799c6   tangwang   tenant2 1w测试数据 mo...
189
190
191
192
      index: true
      store: true
      return_in_source: true
  
5dcddc06   tangwang   索引重构
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    - name: "category_path_en"
      type: "TEXT"
      analyzer: "english"
      search_analyzer: "english"
      boost: 1.5
      index: true
      store: true
      return_in_source: true
  
    - name: "category_name_zh"
      type: "TEXT"
      analyzer: "hanlp_index"
      search_analyzer: "hanlp_standard"
      boost: 1.5
      index: true
      store: true
      return_in_source: true
  
    - name: "category_name_en"
      type: "TEXT"
      analyzer: "english"
      search_analyzer: "english"
      boost: 1.5
cd3799c6   tangwang   tenant2 1w测试数据 mo...
216
217
218
219
      index: true
      store: true
      return_in_source: true
  
5dcddc06   tangwang   索引重构
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
    - name: "category_id"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "category_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "category_level"
      type: "INT"
      index: true
      store: true
      return_in_source: true
  
    - name: "category1_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "category2_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "category3_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    # SKU款式、子sku属性
    - name: "specifications"
      type: "JSON"
      nested: true
      return_in_source: true
      nested_properties:
        sku_id:
          type: "keyword"
          index: true
          store: true
        name:
          type: "keyword"
          index: true
          store: true
        value:
          type: "keyword"
          index: true
          store: true
  
    - name: "option1_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "option2_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    - name: "option3_name"
      type: "KEYWORD"
      index: true
      store: true
      return_in_source: true
  
    # 时间字段
    - name: "create_time"
cd3799c6   tangwang   tenant2 1w测试数据 mo...
295
296
297
      type: "DATE"
      index: true
      store: true
5dcddc06   tangwang   索引重构
298
      return_in_source: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
299
  
5dcddc06   tangwang   索引重构
300
    - name: "update_time"
cd3799c6   tangwang   tenant2 1w测试数据 mo...
301
302
303
      type: "DATE"
      index: true
      store: true
5dcddc06   tangwang   索引重构
304
      return_in_source: true
325eec03   tangwang   1. 日志、配置基础设施,使用优化
305
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
306
307
    # 嵌套skus字段
    - name: "skus"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
308
309
      type: "JSON"
      nested: true
cd3799c6   tangwang   tenant2 1w测试数据 mo...
310
      return_in_source: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
311
      nested_properties:
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
312
        sku_id:
4d824a77   tangwang   所有租户共用一套统一配置.tena...
313
314
315
          type: "keyword"
          index: true
          store: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
316
317
318
319
320
321
322
323
        price:
          type: "float"
          index: true
          store: true
        compare_at_price:
          type: "float"
          index: true
          store: true
5dcddc06   tangwang   索引重构
324
        sku_code:
4d824a77   tangwang   所有租户共用一套统一配置.tena...
325
326
327
328
329
330
331
          type: "keyword"
          index: true
          store: true
        stock:
          type: "long"
          index: true
          store: true
5dcddc06   tangwang   索引重构
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
        weight:
          type: "float"
          index: true
          store: true
        weight_unit:
          type: "keyword"
          index: true
          store: true
        option1_value:
          type: "keyword"
          index: true
          store: true
        option2_value:
          type: "keyword"
          index: true
          store: true
        option3_value:
          type: "keyword"
          index: true
          store: true
        image_src:
          type: "keyword"
          index: false
          store: true
4d824a77   tangwang   所有租户共用一套统一配置.tena...
356
357
358
359
360
361
  
  # Index Structure (Query Domains)
  indexes:
    - name: "default"
      label: "默认索引"
      fields:
5dcddc06   tangwang   索引重构
362
363
364
365
        - "title_zh"
        - "brief_zh"
        - "description_zh"
        - "vendor_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
366
        - "tags"
5dcddc06   tangwang   索引重构
367
368
        - "category_path_zh"
        - "category_name_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
369
370
371
372
373
374
      analyzer: "chinese_ecommerce"
      boost: 1.0
  
    - name: "title"
      label: "标题索引"
      fields:
5dcddc06   tangwang   索引重构
375
        - "title_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
376
377
378
379
380
381
      analyzer: "chinese_ecommerce"
      boost: 2.0
  
    - name: "vendor"
      label: "品牌索引"
      fields:
5dcddc06   tangwang   索引重构
382
        - "vendor_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
383
384
385
386
387
388
      analyzer: "chinese_ecommerce"
      boost: 1.5
  
    - name: "category"
      label: "类目索引"
      fields:
5dcddc06   tangwang   索引重构
389
390
        - "category_path_zh"
        - "category_name_zh"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
391
392
393
394
395
396
397
      analyzer: "chinese_ecommerce"
      boost: 1.5
  
    - name: "tags"
      label: "标签索引"
      fields:
        - "tags"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
398
399
400
401
402
403
404
405
406
407
408
409
410
      analyzer: "chinese_ecommerce"
      boost: 1.0
  
  # Query Configuration
  query_config:
    supported_languages:
      - "zh"
      - "en"
    default_language: "zh"
    enable_translation: true
    enable_text_embedding: true
    enable_query_rewrite: true
  
325eec03   tangwang   1. 日志、配置基础设施,使用优化
411
    # Embedding field names (if not set, will auto-detect from fields)
b73baf85   tangwang   撰写接口文档
412
    text_embedding_field: "title_embedding"  # Field name for text embeddings
325eec03   tangwang   1. 日志、配置基础设施,使用优化
413
414
    image_embedding_field: null  # Field name for image embeddings (if not set, will auto-detect)
  
4d824a77   tangwang   所有租户共用一套统一配置.tena...
415
416
417
    # Translation API (DeepL)
    translation_service: "deepl"
    translation_api_key: null  # Set via environment variable
522a3964   tangwang   多语言搜索翻译的优化(deepL添...
418
419
    # translation_glossary_id: null  # Optional: DeepL glossary ID for custom terminology (e.g., "车" -> "car")
    # translation_context: "e-commerce product search"  # Context hint for better translation disambiguation
4d824a77   tangwang   所有租户共用一套统一配置.tena...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
  
  # Ranking Configuration
  ranking:
    expression: "bm25() + 0.2*text_embedding_relevance()"
    description: "BM25 text relevance combined with semantic embedding similarity"
  
  # Function Score配置(ES层打分规则)
  function_score:
    score_mode: "sum"
    boost_mode: "multiply"
    
    functions: []
  
  # Rerank配置(本地重排,当前禁用)
  rerank:
    enabled: false
    expression: ""
    description: "Local reranking (disabled, use ES function_score instead)"
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
439
  # SPU配置(已启用,使用嵌套skus)
4d824a77   tangwang   所有租户共用一套统一配置.tena...
440
441
  spu_config:
    enabled: true
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
442
    spu_field: "spu_id"
4d824a77   tangwang   所有租户共用一套统一配置.tena...
443
    inner_hits_size: 10