From 2d17b98e22711ff4e7a3deb35f64a9d3f602ebdc Mon Sep 17 00:00:00 2001 From: tangwang Date: Mon, 16 Mar 2026 17:38:34 +0800 Subject: [PATCH] sugg --- docs/suggestion索引构建.md | 36 ++++++++++++++++++------------------ mappings/search_suggestions.json | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 18 deletions(-) create mode 100644 mappings/search_suggestions.json diff --git a/docs/suggestion索引构建.md b/docs/suggestion索引构建.md index 0dad214..5b56890 100644 --- a/docs/suggestion索引构建.md +++ b/docs/suggestion索引构建.md @@ -34,24 +34,24 @@ - 用于记录每个租户的元信息(`_id = tenant_id`): 该索引全局创建一次,每新增一个租户,插入一行,每次为一个租户做完全量,如果成功,更新一下对应的信息。 ``` -{ -"settings": { - "number_of_shards": 1, - "number_of_replicas": 0, - "refresh_interval": "1s", -}, -"mappings": { - "properties": { - "tenant_id": {"type": "keyword"}, - "active_alias": {"type": "keyword"}, - "active_index": {"type": "keyword"}, - "last_full_build_at": {"type": "date"}, - "last_incremental_build_at": {"type": "date"}, - "last_incremental_watermark": {"type": "date"}, - "updated_at": {"type": "date"}, + ```json + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1s", + }, + "mappings": { + "properties": { + "tenant_id": {"type": "keyword"}, + "active_alias": {"type": "keyword"}, + "active_index": {"type": "keyword"}, + "last_full_build_at": {"type": "date"}, + "last_incremental_build_at": {"type": "date"}, + "last_incremental_watermark": {"type": "date"}, + "updated_at": {"type": "date"}, + } } -} -``` + ``` - `active_alias`(当前 alias 名) - `active_index`(当前实际索引名) - `last_full_build_at` @@ -62,7 +62,7 @@ #### 2. suggestion 索引 mapping 结构 -出自 `suggestion/mapping.py` → `build_suggestion_mapping(index_languages)`: +见 [search_suggestions.json](http://gitlab.essa.top:88/ai-saas/saas-search/blob/master/mappings/search_suggestions.json) - **settings** diff --git a/mappings/search_suggestions.json b/mappings/search_suggestions.json new file mode 100644 index 0000000..7d52daf --- /dev/null +++ b/mappings/search_suggestions.json @@ -0,0 +1,116 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "30s", + "analysis": { + "analyzer": { + "index_ik": { + "type": "custom", + "tokenizer": "ik_max_word", + "filter": ["lowercase", "asciifolding"] + }, + "query_ik": { + "type": "custom", + "tokenizer": "ik_smart", + "filter": ["lowercase", "asciifolding"] + } + } + } + }, + "mappings": { + "properties": { + "tenant_id": { "type": "keyword" }, + "lang": { "type": "keyword" }, + "text": { "type": "keyword" }, + "text_norm": { "type": "keyword" }, + "sources": { "type": "keyword" }, + "title_doc_count": { "type": "integer" }, + "qanchor_doc_count": { "type": "integer" }, + "query_count_7d": { "type": "integer" }, + "query_count_30d": { "type": "integer" }, + "rank_score": { "type": "float" }, + "lang_confidence": { "type": "float" }, + "lang_source": { "type": "keyword" }, + "lang_conflict": { "type": "boolean" }, + "status": { "type": "byte" }, + "updated_at": { "type": "date" }, + "completion": { + "properties": { + "zh": { + "type": "completion", + "analyzer": "index_ik", + "search_analyzer": "query_ik" + }, + "en": { "type": "completion", "analyzer": "english" }, + "ar": { "type": "completion", "analyzer": "arabic" }, + "hy": { "type": "completion", "analyzer": "armenian" }, + "eu": { "type": "completion", "analyzer": "basque" }, + "pt_br": { "type": "completion", "analyzer": "brazilian" }, + "bg": { "type": "completion", "analyzer": "bulgarian" }, + "ca": { "type": "completion", "analyzer": "catalan" }, + "cjk": { "type": "completion", "analyzer": "cjk" }, + "cs": { "type": "completion", "analyzer": "czech" }, + "da": { "type": "completion", "analyzer": "danish" }, + "nl": { "type": "completion", "analyzer": "dutch" }, + "fi": { "type": "completion", "analyzer": "finnish" }, + "fr": { "type": "completion", "analyzer": "french" }, + "gl": { "type": "completion", "analyzer": "galician" }, + "de": { "type": "completion", "analyzer": "german" }, + "el": { "type": "completion", "analyzer": "greek" }, + "hi": { "type": "completion", "analyzer": "hindi" }, + "hu": { "type": "completion", "analyzer": "hungarian" }, + "id": { "type": "completion", "analyzer": "indonesian" }, + "it": { "type": "completion", "analyzer": "italian" }, + "no": { "type": "completion", "analyzer": "norwegian" }, + "fa": { "type": "completion", "analyzer": "persian" }, + "pt": { "type": "completion", "analyzer": "portuguese" }, + "ro": { "type": "completion", "analyzer": "romanian" }, + "ru": { "type": "completion", "analyzer": "russian" }, + "es": { "type": "completion", "analyzer": "spanish" }, + "sv": { "type": "completion", "analyzer": "swedish" }, + "tr": { "type": "completion", "analyzer": "turkish" }, + "th": { "type": "completion", "analyzer": "thai" } + } + }, + "sat": { + "properties": { + "zh": { + "type": "search_as_you_type", + "analyzer": "index_ik" + }, + "en": { "type": "search_as_you_type", "analyzer": "english" }, + "ar": { "type": "search_as_you_type", "analyzer": "arabic" }, + "hy": { "type": "search_as_you_type", "analyzer": "armenian" }, + "eu": { "type": "search_as_you_type", "analyzer": "basque" }, + "pt_br": { "type": "search_as_you_type", "analyzer": "brazilian" }, + "bg": { "type": "search_as_you_type", "analyzer": "bulgarian" }, + "ca": { "type": "search_as_you_type", "analyzer": "catalan" }, + "cjk": { "type": "search_as_you_type", "analyzer": "cjk" }, + "cs": { "type": "search_as_you_type", "analyzer": "czech" }, + "da": { "type": "search_as_you_type", "analyzer": "danish" }, + "nl": { "type": "search_as_you_type", "analyzer": "dutch" }, + "fi": { "type": "search_as_you_type", "analyzer": "finnish" }, + "fr": { "type": "search_as_you_type", "analyzer": "french" }, + "gl": { "type": "search_as_you_type", "analyzer": "galician" }, + "de": { "type": "search_as_you_type", "analyzer": "german" }, + "el": { "type": "search_as_you_type", "analyzer": "greek" }, + "hi": { "type": "search_as_you_type", "analyzer": "hindi" }, + "hu": { "type": "search_as_you_type", "analyzer": "hungarian" }, + "id": { "type": "search_as_you_type", "analyzer": "indonesian" }, + "it": { "type": "search_as_you_type", "analyzer": "italian" }, + "no": { "type": "search_as_you_type", "analyzer": "norwegian" }, + "fa": { "type": "search_as_you_type", "analyzer": "persian" }, + "pt": { "type": "search_as_you_type", "analyzer": "portuguese" }, + "ro": { "type": "search_as_you_type", "analyzer": "romanian" }, + "ru": { "type": "search_as_you_type", "analyzer": "russian" }, + "es": { "type": "search_as_you_type", "analyzer": "spanish" }, + "sv": { "type": "search_as_you_type", "analyzer": "swedish" }, + "tr": { "type": "search_as_you_type", "analyzer": "turkish" }, + "th": { "type": "search_as_you_type", "analyzer": "thai" } + } + } + } + } +} + -- libgit2 0.21.2