Blame view

suggestion/mapping.py 2.8 KB
ded6f29e   tangwang   补充suggestion模块
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  """
  Mapping generator for suggestion indices.
  """
  
  from typing import Dict, Any, List
  
  
  ANALYZER_BY_LANG: Dict[str, str] = {
      "zh": "index_ansj",
      "en": "english",
      "ar": "arabic",
      "hy": "armenian",
      "eu": "basque",
      "pt_br": "brazilian",
      "bg": "bulgarian",
      "ca": "catalan",
      "cjk": "cjk",
      "cs": "czech",
      "da": "danish",
      "nl": "dutch",
      "fi": "finnish",
      "fr": "french",
      "gl": "galician",
      "de": "german",
      "el": "greek",
      "hi": "hindi",
      "hu": "hungarian",
      "id": "indonesian",
      "it": "italian",
      "no": "norwegian",
      "fa": "persian",
      "pt": "portuguese",
      "ro": "romanian",
      "ru": "russian",
      "es": "spanish",
      "sv": "swedish",
      "tr": "turkish",
      "th": "thai",
  }
  
  
  def _completion_field(lang: str) -> Dict[str, Any]:
      analyzer = ANALYZER_BY_LANG.get(lang, "standard")
      if lang == "zh":
          return {
              "type": "completion",
              "analyzer": analyzer,
              "search_analyzer": "query_ansj",
          }
      return {"type": "completion", "analyzer": analyzer}
  
  
  def _sat_field(lang: str) -> Dict[str, Any]:
      analyzer = ANALYZER_BY_LANG.get(lang, "standard")
      return {"type": "search_as_you_type", "analyzer": analyzer}
  
  
  def build_suggestion_mapping(index_languages: List[str]) -> Dict[str, Any]:
      """Build index settings+mappings for suggestion index."""
      langs = [x for x in (index_languages or []) if x]
      if not langs:
          langs = ["en", "zh"]
  
      completion_props: Dict[str, Any] = {}
      sat_props: Dict[str, Any] = {}
      for lang in langs:
          completion_props[lang] = _completion_field(lang)
          sat_props[lang] = _sat_field(lang)
  
      return {
          "settings": {
              "number_of_shards": 1,
              "number_of_replicas": 0,
              "refresh_interval": "30s",
          },
          "mappings": {
              "properties": {
                  "tenant_id": {"type": "keyword"},
                  "lang": {"type": "keyword"},
                  "text": {"type": "keyword"},
                  "text_norm": {"type": "keyword"},
                  "sources": {"type": "keyword"},
                  "title_doc_count": {"type": "integer"},
                  "qanchor_doc_count": {"type": "integer"},
                  "query_count_7d": {"type": "integer"},
                  "query_count_30d": {"type": "integer"},
                  "rank_score": {"type": "float"},
                  "lang_confidence": {"type": "float"},
                  "lang_source": {"type": "keyword"},
                  "lang_conflict": {"type": "boolean"},
                  "top_spu_ids": {"type": "keyword"},
                  "status": {"type": "byte"},
                  "updated_at": {"type": "date"},
                  "completion": {"properties": completion_props},
                  "sat": {"properties": sat_props},
              }
          },
      }