Commit 2d17b98e22711ff4e7a3deb35f64a9d3f602ebdc
1 parent
1cca75c8
sugg
Showing
2 changed files
with
134 additions
and
18 deletions
Show diff stats
docs/suggestion索引构建.md
| ... | ... | @@ -34,24 +34,24 @@ |
| 34 | 34 | - 用于记录每个租户的元信息(`_id = tenant_id`): |
| 35 | 35 | 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。 |
| 36 | 36 | ``` |
| 37 | -{ | |
| 38 | -"settings": { | |
| 39 | - "number_of_shards": 1, | |
| 40 | - "number_of_replicas": 0, | |
| 41 | - "refresh_interval": "1s", | |
| 42 | -}, | |
| 43 | -"mappings": { | |
| 44 | - "properties": { | |
| 45 | - "tenant_id": {"type": "keyword"}, | |
| 46 | - "active_alias": {"type": "keyword"}, | |
| 47 | - "active_index": {"type": "keyword"}, | |
| 48 | - "last_full_build_at": {"type": "date"}, | |
| 49 | - "last_incremental_build_at": {"type": "date"}, | |
| 50 | - "last_incremental_watermark": {"type": "date"}, | |
| 51 | - "updated_at": {"type": "date"}, | |
| 37 | + ```json | |
| 38 | + "settings": { | |
| 39 | + "number_of_shards": 1, | |
| 40 | + "number_of_replicas": 0, | |
| 41 | + "refresh_interval": "1s", | |
| 42 | + }, | |
| 43 | + "mappings": { | |
| 44 | + "properties": { | |
| 45 | + "tenant_id": {"type": "keyword"}, | |
| 46 | + "active_alias": {"type": "keyword"}, | |
| 47 | + "active_index": {"type": "keyword"}, | |
| 48 | + "last_full_build_at": {"type": "date"}, | |
| 49 | + "last_incremental_build_at": {"type": "date"}, | |
| 50 | + "last_incremental_watermark": {"type": "date"}, | |
| 51 | + "updated_at": {"type": "date"}, | |
| 52 | + } | |
| 52 | 53 | } |
| 53 | -} | |
| 54 | -``` | |
| 54 | + ``` | |
| 55 | 55 | - `active_alias`(当前 alias 名) |
| 56 | 56 | - `active_index`(当前实际索引名) |
| 57 | 57 | - `last_full_build_at` |
| ... | ... | @@ -62,7 +62,7 @@ |
| 62 | 62 | |
| 63 | 63 | #### 2. suggestion 索引 mapping 结构 |
| 64 | 64 | |
| 65 | -出自 `suggestion/mapping.py` → `build_suggestion_mapping(index_languages)`: | |
| 65 | +见 [search_suggestions.json](http://gitlab.essa.top:88/ai-saas/saas-search/blob/master/mappings/search_suggestions.json) | |
| 66 | 66 | |
| 67 | 67 | - **settings** |
| 68 | 68 | ... | ... |
| ... | ... | @@ -0,0 +1,116 @@ |
| 1 | +{ | |
| 2 | + "settings": { | |
| 3 | + "number_of_shards": 1, | |
| 4 | + "number_of_replicas": 0, | |
| 5 | + "refresh_interval": "30s", | |
| 6 | + "analysis": { | |
| 7 | + "analyzer": { | |
| 8 | + "index_ik": { | |
| 9 | + "type": "custom", | |
| 10 | + "tokenizer": "ik_max_word", | |
| 11 | + "filter": ["lowercase", "asciifolding"] | |
| 12 | + }, | |
| 13 | + "query_ik": { | |
| 14 | + "type": "custom", | |
| 15 | + "tokenizer": "ik_smart", | |
| 16 | + "filter": ["lowercase", "asciifolding"] | |
| 17 | + } | |
| 18 | + } | |
| 19 | + } | |
| 20 | + }, | |
| 21 | + "mappings": { | |
| 22 | + "properties": { | |
| 23 | + "tenant_id": { "type": "keyword" }, | |
| 24 | + "lang": { "type": "keyword" }, | |
| 25 | + "text": { "type": "keyword" }, | |
| 26 | + "text_norm": { "type": "keyword" }, | |
| 27 | + "sources": { "type": "keyword" }, | |
| 28 | + "title_doc_count": { "type": "integer" }, | |
| 29 | + "qanchor_doc_count": { "type": "integer" }, | |
| 30 | + "query_count_7d": { "type": "integer" }, | |
| 31 | + "query_count_30d": { "type": "integer" }, | |
| 32 | + "rank_score": { "type": "float" }, | |
| 33 | + "lang_confidence": { "type": "float" }, | |
| 34 | + "lang_source": { "type": "keyword" }, | |
| 35 | + "lang_conflict": { "type": "boolean" }, | |
| 36 | + "status": { "type": "byte" }, | |
| 37 | + "updated_at": { "type": "date" }, | |
| 38 | + "completion": { | |
| 39 | + "properties": { | |
| 40 | + "zh": { | |
| 41 | + "type": "completion", | |
| 42 | + "analyzer": "index_ik", | |
| 43 | + "search_analyzer": "query_ik" | |
| 44 | + }, | |
| 45 | + "en": { "type": "completion", "analyzer": "english" }, | |
| 46 | + "ar": { "type": "completion", "analyzer": "arabic" }, | |
| 47 | + "hy": { "type": "completion", "analyzer": "armenian" }, | |
| 48 | + "eu": { "type": "completion", "analyzer": "basque" }, | |
| 49 | + "pt_br": { "type": "completion", "analyzer": "brazilian" }, | |
| 50 | + "bg": { "type": "completion", "analyzer": "bulgarian" }, | |
| 51 | + "ca": { "type": "completion", "analyzer": "catalan" }, | |
| 52 | + "cjk": { "type": "completion", "analyzer": "cjk" }, | |
| 53 | + "cs": { "type": "completion", "analyzer": "czech" }, | |
| 54 | + "da": { "type": "completion", "analyzer": "danish" }, | |
| 55 | + "nl": { "type": "completion", "analyzer": "dutch" }, | |
| 56 | + "fi": { "type": "completion", "analyzer": "finnish" }, | |
| 57 | + "fr": { "type": "completion", "analyzer": "french" }, | |
| 58 | + "gl": { "type": "completion", "analyzer": "galician" }, | |
| 59 | + "de": { "type": "completion", "analyzer": "german" }, | |
| 60 | + "el": { "type": "completion", "analyzer": "greek" }, | |
| 61 | + "hi": { "type": "completion", "analyzer": "hindi" }, | |
| 62 | + "hu": { "type": "completion", "analyzer": "hungarian" }, | |
| 63 | + "id": { "type": "completion", "analyzer": "indonesian" }, | |
| 64 | + "it": { "type": "completion", "analyzer": "italian" }, | |
| 65 | + "no": { "type": "completion", "analyzer": "norwegian" }, | |
| 66 | + "fa": { "type": "completion", "analyzer": "persian" }, | |
| 67 | + "pt": { "type": "completion", "analyzer": "portuguese" }, | |
| 68 | + "ro": { "type": "completion", "analyzer": "romanian" }, | |
| 69 | + "ru": { "type": "completion", "analyzer": "russian" }, | |
| 70 | + "es": { "type": "completion", "analyzer": "spanish" }, | |
| 71 | + "sv": { "type": "completion", "analyzer": "swedish" }, | |
| 72 | + "tr": { "type": "completion", "analyzer": "turkish" }, | |
| 73 | + "th": { "type": "completion", "analyzer": "thai" } | |
| 74 | + } | |
| 75 | + }, | |
| 76 | + "sat": { | |
| 77 | + "properties": { | |
| 78 | + "zh": { | |
| 79 | + "type": "search_as_you_type", | |
| 80 | + "analyzer": "index_ik" | |
| 81 | + }, | |
| 82 | + "en": { "type": "search_as_you_type", "analyzer": "english" }, | |
| 83 | + "ar": { "type": "search_as_you_type", "analyzer": "arabic" }, | |
| 84 | + "hy": { "type": "search_as_you_type", "analyzer": "armenian" }, | |
| 85 | + "eu": { "type": "search_as_you_type", "analyzer": "basque" }, | |
| 86 | + "pt_br": { "type": "search_as_you_type", "analyzer": "brazilian" }, | |
| 87 | + "bg": { "type": "search_as_you_type", "analyzer": "bulgarian" }, | |
| 88 | + "ca": { "type": "search_as_you_type", "analyzer": "catalan" }, | |
| 89 | + "cjk": { "type": "search_as_you_type", "analyzer": "cjk" }, | |
| 90 | + "cs": { "type": "search_as_you_type", "analyzer": "czech" }, | |
| 91 | + "da": { "type": "search_as_you_type", "analyzer": "danish" }, | |
| 92 | + "nl": { "type": "search_as_you_type", "analyzer": "dutch" }, | |
| 93 | + "fi": { "type": "search_as_you_type", "analyzer": "finnish" }, | |
| 94 | + "fr": { "type": "search_as_you_type", "analyzer": "french" }, | |
| 95 | + "gl": { "type": "search_as_you_type", "analyzer": "galician" }, | |
| 96 | + "de": { "type": "search_as_you_type", "analyzer": "german" }, | |
| 97 | + "el": { "type": "search_as_you_type", "analyzer": "greek" }, | |
| 98 | + "hi": { "type": "search_as_you_type", "analyzer": "hindi" }, | |
| 99 | + "hu": { "type": "search_as_you_type", "analyzer": "hungarian" }, | |
| 100 | + "id": { "type": "search_as_you_type", "analyzer": "indonesian" }, | |
| 101 | + "it": { "type": "search_as_you_type", "analyzer": "italian" }, | |
| 102 | + "no": { "type": "search_as_you_type", "analyzer": "norwegian" }, | |
| 103 | + "fa": { "type": "search_as_you_type", "analyzer": "persian" }, | |
| 104 | + "pt": { "type": "search_as_you_type", "analyzer": "portuguese" }, | |
| 105 | + "ro": { "type": "search_as_you_type", "analyzer": "romanian" }, | |
| 106 | + "ru": { "type": "search_as_you_type", "analyzer": "russian" }, | |
| 107 | + "es": { "type": "search_as_you_type", "analyzer": "spanish" }, | |
| 108 | + "sv": { "type": "search_as_you_type", "analyzer": "swedish" }, | |
| 109 | + "tr": { "type": "search_as_you_type", "analyzer": "turkish" }, | |
| 110 | + "th": { "type": "search_as_you_type", "analyzer": "thai" } | |
| 111 | + } | |
| 112 | + } | |
| 113 | + } | |
| 114 | + } | |
| 115 | +} | |
| 116 | + | ... | ... |