Commit 2d17b98e22711ff4e7a3deb35f64a9d3f602ebdc

Authored by tangwang
1 parent 1cca75c8

sugg

docs/suggestion索引构建.md
@@ -34,24 +34,24 @@ @@ -34,24 +34,24 @@
34 - 用于记录每个租户的元信息(`_id = tenant_id`): 34 - 用于记录每个租户的元信息(`_id = tenant_id`):
35 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。 35 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。
36 ``` 36 ```
37 -{  
38 -"settings": {  
39 - "number_of_shards": 1,  
40 - "number_of_replicas": 0,  
41 - "refresh_interval": "1s",  
42 -},  
43 -"mappings": {  
44 - "properties": {  
45 - "tenant_id": {"type": "keyword"},  
46 - "active_alias": {"type": "keyword"},  
47 - "active_index": {"type": "keyword"},  
48 - "last_full_build_at": {"type": "date"},  
49 - "last_incremental_build_at": {"type": "date"},  
50 - "last_incremental_watermark": {"type": "date"},  
51 - "updated_at": {"type": "date"}, 37 + ```json
  38 + "settings": {
  39 + "number_of_shards": 1,
  40 + "number_of_replicas": 0,
  41 + "refresh_interval": "1s",
  42 + },
  43 + "mappings": {
  44 + "properties": {
  45 + "tenant_id": {"type": "keyword"},
  46 + "active_alias": {"type": "keyword"},
  47 + "active_index": {"type": "keyword"},
  48 + "last_full_build_at": {"type": "date"},
  49 + "last_incremental_build_at": {"type": "date"},
  50 + "last_incremental_watermark": {"type": "date"},
  51 + "updated_at": {"type": "date"},
  52 + }
52 } 53 }
53 -}  
54 -``` 54 + ```
55 - `active_alias`(当前 alias 名) 55 - `active_alias`(当前 alias 名)
56 - `active_index`(当前实际索引名) 56 - `active_index`(当前实际索引名)
57 - `last_full_build_at` 57 - `last_full_build_at`
@@ -62,7 +62,7 @@ @@ -62,7 +62,7 @@
62 62
63 #### 2. suggestion 索引 mapping 结构 63 #### 2. suggestion 索引 mapping 结构
64 64
65 -出自 `suggestion/mapping.py` → `build_suggestion_mapping(index_languages)`: 65 +见 [search_suggestions.json](http://gitlab.essa.top:88/ai-saas/saas-search/blob/master/mappings/search_suggestions.json)
66 66
67 - **settings** 67 - **settings**
68 68
mappings/search_suggestions.json 0 → 100644
@@ -0,0 +1,116 @@ @@ -0,0 +1,116 @@
  1 +{
  2 + "settings": {
  3 + "number_of_shards": 1,
  4 + "number_of_replicas": 0,
  5 + "refresh_interval": "30s",
  6 + "analysis": {
  7 + "analyzer": {
  8 + "index_ik": {
  9 + "type": "custom",
  10 + "tokenizer": "ik_max_word",
  11 + "filter": ["lowercase", "asciifolding"]
  12 + },
  13 + "query_ik": {
  14 + "type": "custom",
  15 + "tokenizer": "ik_smart",
  16 + "filter": ["lowercase", "asciifolding"]
  17 + }
  18 + }
  19 + }
  20 + },
  21 + "mappings": {
  22 + "properties": {
  23 + "tenant_id": { "type": "keyword" },
  24 + "lang": { "type": "keyword" },
  25 + "text": { "type": "keyword" },
  26 + "text_norm": { "type": "keyword" },
  27 + "sources": { "type": "keyword" },
  28 + "title_doc_count": { "type": "integer" },
  29 + "qanchor_doc_count": { "type": "integer" },
  30 + "query_count_7d": { "type": "integer" },
  31 + "query_count_30d": { "type": "integer" },
  32 + "rank_score": { "type": "float" },
  33 + "lang_confidence": { "type": "float" },
  34 + "lang_source": { "type": "keyword" },
  35 + "lang_conflict": { "type": "boolean" },
  36 + "status": { "type": "byte" },
  37 + "updated_at": { "type": "date" },
  38 + "completion": {
  39 + "properties": {
  40 + "zh": {
  41 + "type": "completion",
  42 + "analyzer": "index_ik",
  43 + "search_analyzer": "query_ik"
  44 + },
  45 + "en": { "type": "completion", "analyzer": "english" },
  46 + "ar": { "type": "completion", "analyzer": "arabic" },
  47 + "hy": { "type": "completion", "analyzer": "armenian" },
  48 + "eu": { "type": "completion", "analyzer": "basque" },
  49 + "pt_br": { "type": "completion", "analyzer": "brazilian" },
  50 + "bg": { "type": "completion", "analyzer": "bulgarian" },
  51 + "ca": { "type": "completion", "analyzer": "catalan" },
  52 + "cjk": { "type": "completion", "analyzer": "cjk" },
  53 + "cs": { "type": "completion", "analyzer": "czech" },
  54 + "da": { "type": "completion", "analyzer": "danish" },
  55 + "nl": { "type": "completion", "analyzer": "dutch" },
  56 + "fi": { "type": "completion", "analyzer": "finnish" },
  57 + "fr": { "type": "completion", "analyzer": "french" },
  58 + "gl": { "type": "completion", "analyzer": "galician" },
  59 + "de": { "type": "completion", "analyzer": "german" },
  60 + "el": { "type": "completion", "analyzer": "greek" },
  61 + "hi": { "type": "completion", "analyzer": "hindi" },
  62 + "hu": { "type": "completion", "analyzer": "hungarian" },
  63 + "id": { "type": "completion", "analyzer": "indonesian" },
  64 + "it": { "type": "completion", "analyzer": "italian" },
  65 + "no": { "type": "completion", "analyzer": "norwegian" },
  66 + "fa": { "type": "completion", "analyzer": "persian" },
  67 + "pt": { "type": "completion", "analyzer": "portuguese" },
  68 + "ro": { "type": "completion", "analyzer": "romanian" },
  69 + "ru": { "type": "completion", "analyzer": "russian" },
  70 + "es": { "type": "completion", "analyzer": "spanish" },
  71 + "sv": { "type": "completion", "analyzer": "swedish" },
  72 + "tr": { "type": "completion", "analyzer": "turkish" },
  73 + "th": { "type": "completion", "analyzer": "thai" }
  74 + }
  75 + },
  76 + "sat": {
  77 + "properties": {
  78 + "zh": {
  79 + "type": "search_as_you_type",
  80 + "analyzer": "index_ik"
  81 + },
  82 + "en": { "type": "search_as_you_type", "analyzer": "english" },
  83 + "ar": { "type": "search_as_you_type", "analyzer": "arabic" },
  84 + "hy": { "type": "search_as_you_type", "analyzer": "armenian" },
  85 + "eu": { "type": "search_as_you_type", "analyzer": "basque" },
  86 + "pt_br": { "type": "search_as_you_type", "analyzer": "brazilian" },
  87 + "bg": { "type": "search_as_you_type", "analyzer": "bulgarian" },
  88 + "ca": { "type": "search_as_you_type", "analyzer": "catalan" },
  89 + "cjk": { "type": "search_as_you_type", "analyzer": "cjk" },
  90 + "cs": { "type": "search_as_you_type", "analyzer": "czech" },
  91 + "da": { "type": "search_as_you_type", "analyzer": "danish" },
  92 + "nl": { "type": "search_as_you_type", "analyzer": "dutch" },
  93 + "fi": { "type": "search_as_you_type", "analyzer": "finnish" },
  94 + "fr": { "type": "search_as_you_type", "analyzer": "french" },
  95 + "gl": { "type": "search_as_you_type", "analyzer": "galician" },
  96 + "de": { "type": "search_as_you_type", "analyzer": "german" },
  97 + "el": { "type": "search_as_you_type", "analyzer": "greek" },
  98 + "hi": { "type": "search_as_you_type", "analyzer": "hindi" },
  99 + "hu": { "type": "search_as_you_type", "analyzer": "hungarian" },
  100 + "id": { "type": "search_as_you_type", "analyzer": "indonesian" },
  101 + "it": { "type": "search_as_you_type", "analyzer": "italian" },
  102 + "no": { "type": "search_as_you_type", "analyzer": "norwegian" },
  103 + "fa": { "type": "search_as_you_type", "analyzer": "persian" },
  104 + "pt": { "type": "search_as_you_type", "analyzer": "portuguese" },
  105 + "ro": { "type": "search_as_you_type", "analyzer": "romanian" },
  106 + "ru": { "type": "search_as_you_type", "analyzer": "russian" },
  107 + "es": { "type": "search_as_you_type", "analyzer": "spanish" },
  108 + "sv": { "type": "search_as_you_type", "analyzer": "swedish" },
  109 + "tr": { "type": "search_as_you_type", "analyzer": "turkish" },
  110 + "th": { "type": "search_as_you_type", "analyzer": "thai" }
  111 + }
  112 + }
  113 + }
  114 + }
  115 +}
  116 +