Commit 2d17b98e22711ff4e7a3deb35f64a9d3f602ebdc
1 parent
1cca75c8
sugg
Showing
2 changed files
with
134 additions
and
18 deletions
Show diff stats
docs/suggestion索引构建.md
| @@ -34,24 +34,24 @@ | @@ -34,24 +34,24 @@ | ||
| 34 | - 用于记录每个租户的元信息(`_id = tenant_id`): | 34 | - 用于记录每个租户的元信息(`_id = tenant_id`): |
| 35 | 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。 | 35 | 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。 |
| 36 | ``` | 36 | ``` |
| 37 | -{ | ||
| 38 | -"settings": { | ||
| 39 | - "number_of_shards": 1, | ||
| 40 | - "number_of_replicas": 0, | ||
| 41 | - "refresh_interval": "1s", | ||
| 42 | -}, | ||
| 43 | -"mappings": { | ||
| 44 | - "properties": { | ||
| 45 | - "tenant_id": {"type": "keyword"}, | ||
| 46 | - "active_alias": {"type": "keyword"}, | ||
| 47 | - "active_index": {"type": "keyword"}, | ||
| 48 | - "last_full_build_at": {"type": "date"}, | ||
| 49 | - "last_incremental_build_at": {"type": "date"}, | ||
| 50 | - "last_incremental_watermark": {"type": "date"}, | ||
| 51 | - "updated_at": {"type": "date"}, | 37 | + ```json |
| 38 | + "settings": { | ||
| 39 | + "number_of_shards": 1, | ||
| 40 | + "number_of_replicas": 0, | ||
| 41 | + "refresh_interval": "1s", | ||
| 42 | + }, | ||
| 43 | + "mappings": { | ||
| 44 | + "properties": { | ||
| 45 | + "tenant_id": {"type": "keyword"}, | ||
| 46 | + "active_alias": {"type": "keyword"}, | ||
| 47 | + "active_index": {"type": "keyword"}, | ||
| 48 | + "last_full_build_at": {"type": "date"}, | ||
| 49 | + "last_incremental_build_at": {"type": "date"}, | ||
| 50 | + "last_incremental_watermark": {"type": "date"}, | ||
| 51 | + "updated_at": {"type": "date"}, | ||
| 52 | + } | ||
| 52 | } | 53 | } |
| 53 | -} | ||
| 54 | -``` | 54 | + ``` |
| 55 | - `active_alias`(当前 alias 名) | 55 | - `active_alias`(当前 alias 名) |
| 56 | - `active_index`(当前实际索引名) | 56 | - `active_index`(当前实际索引名) |
| 57 | - `last_full_build_at` | 57 | - `last_full_build_at` |
| @@ -62,7 +62,7 @@ | @@ -62,7 +62,7 @@ | ||
| 62 | 62 | ||
| 63 | #### 2. suggestion 索引 mapping 结构 | 63 | #### 2. suggestion 索引 mapping 结构 |
| 64 | 64 | ||
| 65 | -出自 `suggestion/mapping.py` → `build_suggestion_mapping(index_languages)`: | 65 | +见 [search_suggestions.json](http://gitlab.essa.top:88/ai-saas/saas-search/blob/master/mappings/search_suggestions.json) |
| 66 | 66 | ||
| 67 | - **settings** | 67 | - **settings** |
| 68 | 68 |
| @@ -0,0 +1,116 @@ | @@ -0,0 +1,116 @@ | ||
| 1 | +{ | ||
| 2 | + "settings": { | ||
| 3 | + "number_of_shards": 1, | ||
| 4 | + "number_of_replicas": 0, | ||
| 5 | + "refresh_interval": "30s", | ||
| 6 | + "analysis": { | ||
| 7 | + "analyzer": { | ||
| 8 | + "index_ik": { | ||
| 9 | + "type": "custom", | ||
| 10 | + "tokenizer": "ik_max_word", | ||
| 11 | + "filter": ["lowercase", "asciifolding"] | ||
| 12 | + }, | ||
| 13 | + "query_ik": { | ||
| 14 | + "type": "custom", | ||
| 15 | + "tokenizer": "ik_smart", | ||
| 16 | + "filter": ["lowercase", "asciifolding"] | ||
| 17 | + } | ||
| 18 | + } | ||
| 19 | + } | ||
| 20 | + }, | ||
| 21 | + "mappings": { | ||
| 22 | + "properties": { | ||
| 23 | + "tenant_id": { "type": "keyword" }, | ||
| 24 | + "lang": { "type": "keyword" }, | ||
| 25 | + "text": { "type": "keyword" }, | ||
| 26 | + "text_norm": { "type": "keyword" }, | ||
| 27 | + "sources": { "type": "keyword" }, | ||
| 28 | + "title_doc_count": { "type": "integer" }, | ||
| 29 | + "qanchor_doc_count": { "type": "integer" }, | ||
| 30 | + "query_count_7d": { "type": "integer" }, | ||
| 31 | + "query_count_30d": { "type": "integer" }, | ||
| 32 | + "rank_score": { "type": "float" }, | ||
| 33 | + "lang_confidence": { "type": "float" }, | ||
| 34 | + "lang_source": { "type": "keyword" }, | ||
| 35 | + "lang_conflict": { "type": "boolean" }, | ||
| 36 | + "status": { "type": "byte" }, | ||
| 37 | + "updated_at": { "type": "date" }, | ||
| 38 | + "completion": { | ||
| 39 | + "properties": { | ||
| 40 | + "zh": { | ||
| 41 | + "type": "completion", | ||
| 42 | + "analyzer": "index_ik", | ||
| 43 | + "search_analyzer": "query_ik" | ||
| 44 | + }, | ||
| 45 | + "en": { "type": "completion", "analyzer": "english" }, | ||
| 46 | + "ar": { "type": "completion", "analyzer": "arabic" }, | ||
| 47 | + "hy": { "type": "completion", "analyzer": "armenian" }, | ||
| 48 | + "eu": { "type": "completion", "analyzer": "basque" }, | ||
| 49 | + "pt_br": { "type": "completion", "analyzer": "brazilian" }, | ||
| 50 | + "bg": { "type": "completion", "analyzer": "bulgarian" }, | ||
| 51 | + "ca": { "type": "completion", "analyzer": "catalan" }, | ||
| 52 | + "cjk": { "type": "completion", "analyzer": "cjk" }, | ||
| 53 | + "cs": { "type": "completion", "analyzer": "czech" }, | ||
| 54 | + "da": { "type": "completion", "analyzer": "danish" }, | ||
| 55 | + "nl": { "type": "completion", "analyzer": "dutch" }, | ||
| 56 | + "fi": { "type": "completion", "analyzer": "finnish" }, | ||
| 57 | + "fr": { "type": "completion", "analyzer": "french" }, | ||
| 58 | + "gl": { "type": "completion", "analyzer": "galician" }, | ||
| 59 | + "de": { "type": "completion", "analyzer": "german" }, | ||
| 60 | + "el": { "type": "completion", "analyzer": "greek" }, | ||
| 61 | + "hi": { "type": "completion", "analyzer": "hindi" }, | ||
| 62 | + "hu": { "type": "completion", "analyzer": "hungarian" }, | ||
| 63 | + "id": { "type": "completion", "analyzer": "indonesian" }, | ||
| 64 | + "it": { "type": "completion", "analyzer": "italian" }, | ||
| 65 | + "no": { "type": "completion", "analyzer": "norwegian" }, | ||
| 66 | + "fa": { "type": "completion", "analyzer": "persian" }, | ||
| 67 | + "pt": { "type": "completion", "analyzer": "portuguese" }, | ||
| 68 | + "ro": { "type": "completion", "analyzer": "romanian" }, | ||
| 69 | + "ru": { "type": "completion", "analyzer": "russian" }, | ||
| 70 | + "es": { "type": "completion", "analyzer": "spanish" }, | ||
| 71 | + "sv": { "type": "completion", "analyzer": "swedish" }, | ||
| 72 | + "tr": { "type": "completion", "analyzer": "turkish" }, | ||
| 73 | + "th": { "type": "completion", "analyzer": "thai" } | ||
| 74 | + } | ||
| 75 | + }, | ||
| 76 | + "sat": { | ||
| 77 | + "properties": { | ||
| 78 | + "zh": { | ||
| 79 | + "type": "search_as_you_type", | ||
| 80 | + "analyzer": "index_ik" | ||
| 81 | + }, | ||
| 82 | + "en": { "type": "search_as_you_type", "analyzer": "english" }, | ||
| 83 | + "ar": { "type": "search_as_you_type", "analyzer": "arabic" }, | ||
| 84 | + "hy": { "type": "search_as_you_type", "analyzer": "armenian" }, | ||
| 85 | + "eu": { "type": "search_as_you_type", "analyzer": "basque" }, | ||
| 86 | + "pt_br": { "type": "search_as_you_type", "analyzer": "brazilian" }, | ||
| 87 | + "bg": { "type": "search_as_you_type", "analyzer": "bulgarian" }, | ||
| 88 | + "ca": { "type": "search_as_you_type", "analyzer": "catalan" }, | ||
| 89 | + "cjk": { "type": "search_as_you_type", "analyzer": "cjk" }, | ||
| 90 | + "cs": { "type": "search_as_you_type", "analyzer": "czech" }, | ||
| 91 | + "da": { "type": "search_as_you_type", "analyzer": "danish" }, | ||
| 92 | + "nl": { "type": "search_as_you_type", "analyzer": "dutch" }, | ||
| 93 | + "fi": { "type": "search_as_you_type", "analyzer": "finnish" }, | ||
| 94 | + "fr": { "type": "search_as_you_type", "analyzer": "french" }, | ||
| 95 | + "gl": { "type": "search_as_you_type", "analyzer": "galician" }, | ||
| 96 | + "de": { "type": "search_as_you_type", "analyzer": "german" }, | ||
| 97 | + "el": { "type": "search_as_you_type", "analyzer": "greek" }, | ||
| 98 | + "hi": { "type": "search_as_you_type", "analyzer": "hindi" }, | ||
| 99 | + "hu": { "type": "search_as_you_type", "analyzer": "hungarian" }, | ||
| 100 | + "id": { "type": "search_as_you_type", "analyzer": "indonesian" }, | ||
| 101 | + "it": { "type": "search_as_you_type", "analyzer": "italian" }, | ||
| 102 | + "no": { "type": "search_as_you_type", "analyzer": "norwegian" }, | ||
| 103 | + "fa": { "type": "search_as_you_type", "analyzer": "persian" }, | ||
| 104 | + "pt": { "type": "search_as_you_type", "analyzer": "portuguese" }, | ||
| 105 | + "ro": { "type": "search_as_you_type", "analyzer": "romanian" }, | ||
| 106 | + "ru": { "type": "search_as_you_type", "analyzer": "russian" }, | ||
| 107 | + "es": { "type": "search_as_you_type", "analyzer": "spanish" }, | ||
| 108 | + "sv": { "type": "search_as_you_type", "analyzer": "swedish" }, | ||
| 109 | + "tr": { "type": "search_as_you_type", "analyzer": "turkish" }, | ||
| 110 | + "th": { "type": "search_as_you_type", "analyzer": "thai" } | ||
| 111 | + } | ||
| 112 | + } | ||
| 113 | + } | ||
| 114 | + } | ||
| 115 | +} | ||
| 116 | + |