Blame view

api/result_formatter.py 10.8 KB
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
1
2
3
4
5
  """
  Result formatter for converting ES internal format to external-friendly format.
  """
  
  from typing import List, Dict, Any, Optional
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
6
  from .models import SpuResult, SkuResult, FacetResult, FacetValue
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
7
8
9
10
11
12
13
14
  
  
  class ResultFormatter:
      """Formats ES search results to external-friendly format."""
  
      @staticmethod
      def format_search_results(
          es_hits: List[Dict[str, Any]],
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
15
16
          max_score: float = 1.0,
          language: str = "zh"
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
17
      ) -> List[SpuResult]:
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
18
          """
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
19
          Convert ES hits to SpuResult list.
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
20
21
22
  
          Args:
              es_hits: List of ES hit dictionaries (with _id, _score, _source)
f0577ce4   tangwang   fix last up
23
              max_score: Maximum score (unused, kept for compatibility)
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
24
25
  
          Returns:
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
26
              List of SpuResult objects
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
27
28
          """
          results = []
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
29
30
31
32
33
34
35
36
37
38
39
40
          lang = (language or "zh").lower()
          if lang not in ("zh", "en"):
              lang = "en"
  
          def pick_lang_field(src: Dict[str, Any], base: str) -> Optional[str]:
              """从 *_zh / *_en 字段中按语言选择一个值,若目标语言缺失则回退到另一种。"""
              zh_val = src.get(f"{base}_zh")
              en_val = src.get(f"{base}_en")
              if lang == "zh":
                  return zh_val or en_val
              else:
                  return en_val or zh_val
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
41
42
43
  
          for hit in es_hits:
              source = hit.get('_source', {})
cd3799c6   tangwang   tenant2 1w测试数据 mo...
44
              score = hit.get('_score')
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
45
              
f0577ce4   tangwang   fix last up
46
              # Use original ES score directly (no normalization)
cd3799c6   tangwang   tenant2 1w测试数据 mo...
47
48
49
50
51
52
53
54
              # Handle None score (can happen with certain query types or when score is explicitly null)
              if score is None:
                  relevance_score = 0.0
              else:
                  try:
                      relevance_score = float(score)
                  except (ValueError, TypeError):
                      relevance_score = 0.0
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
55
  
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
56
57
58
59
60
61
62
63
              # Multi-language fields
              title = pick_lang_field(source, "title")
              brief = pick_lang_field(source, "brief")
              description = pick_lang_field(source, "description")
              vendor = pick_lang_field(source, "vendor")
              category_path = pick_lang_field(source, "category_path")
              category_name = pick_lang_field(source, "category_name")
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
64
65
66
67
68
69
70
71
72
73
74
75
76
              # Extract SKUs
              skus = []
              skus_data = source.get('skus', [])
              if isinstance(skus_data, list):
                  for sku_entry in skus_data:
                      sku = SkuResult(
                          sku_id=str(sku_entry.get('sku_id', '')),
                          title=sku_entry.get('title'),
                          price=sku_entry.get('price'),
                          compare_at_price=sku_entry.get('compare_at_price'),
                          sku=sku_entry.get('sku'),
                          stock=sku_entry.get('stock', 0),
                          options=sku_entry.get('options')
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
77
                      )
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
78
                      skus.append(sku)
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
79
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
80
81
              # Determine in_stock (any sku has stock > 0)
              in_stock = any(sku.stock > 0 for sku in skus) if skus else True
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
82
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
83
84
85
              # Build SpuResult
              spu = SpuResult(
                  spu_id=str(source.get('spu_id', '')),
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
86
87
                  title=title,
                  brief=brief,
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
88
                  handle=source.get('handle'),
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
89
90
91
92
93
94
95
96
97
98
                  description=description,
                  vendor=vendor,
                  category=category_name,
                  category_path=category_path,
                  category_name=category_name,
                  category_id=source.get('category_id'),
                  category_level=source.get('category_level'),
                  category1_name=source.get('category1_name'),
                  category2_name=source.get('category2_name'),
                  category3_name=source.get('category3_name'),
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
99
100
101
102
103
104
                  tags=source.get('tags'),
                  price=source.get('min_price'),
                  compare_at_price=source.get('compare_at_price'),
                  currency="USD",  # Default currency
                  image_url=source.get('image_url'),
                  in_stock=in_stock,
577ec972   tangwang   返回给前端的字段、格式适配。主要包...
105
106
107
108
109
110
111
112
                  sku_prices=source.get('sku_prices'),
                  sku_weights=source.get('sku_weights'),
                  sku_weight_units=source.get('sku_weight_units'),
                  total_inventory=source.get('total_inventory'),
                  option1_name=source.get('option1_name'),
                  option2_name=source.get('option2_name'),
                  option3_name=source.get('option3_name'),
                  specifications=source.get('specifications'),
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
113
                  skus=skus,
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
114
115
116
                  relevance_score=relevance_score
              )
  
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
117
              results.append(spu)
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
118
119
120
121
122
123
124
125
126
127
128
  
          return results
  
      @staticmethod
      def format_facets(
          es_aggregations: Dict[str, Any],
          facet_configs: Optional[List[Any]] = None
      ) -> List[FacetResult]:
          """
          Format ES aggregations to FacetResult list.
  
bf89b597   tangwang   feat(search): ada...
129
130
131
132
133
          支持:
          1. 普通terms聚合
          2. range聚合
          3. specifications嵌套聚合(按name分组,然后按value聚合)
  
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
134
135
136
137
138
139
140
141
142
143
          Args:
              es_aggregations: ES aggregations response
              facet_configs: Facet configurations (optional)
  
          Returns:
              List of FacetResult objects
          """
          facets = []
  
          for field_name, agg_data in es_aggregations.items():
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
144
              display_field = field_name[:-6] if field_name.endswith("_facet") else field_name
bf89b597   tangwang   feat(search): ada...
145
              
f7d3cf70   tangwang   更新文档
146
              # 处理specifications嵌套分面(所有name)
bf89b597   tangwang   feat(search): ada...
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
              if field_name == "specifications_facet" and 'by_name' in agg_data:
                  # specifications嵌套聚合:按name分组,每个name下有value_counts
                  by_name_agg = agg_data['by_name']
                  if 'buckets' in by_name_agg:
                      for name_bucket in by_name_agg['buckets']:
                          name = name_bucket['key']
                          value_counts = name_bucket.get('value_counts', {})
                          
                          values = []
                          if 'buckets' in value_counts:
                              for value_bucket in value_counts['buckets']:
                                  value = FacetValue(
                                      value=value_bucket['key'],
                                      label=str(value_bucket['key']),
                                      count=value_bucket['doc_count'],
                                      selected=False
                                  )
                                  values.append(value)
                          
                          # 为每个name创建一个分面结果
                          facet = FacetResult(
                              field=f"specifications.{name}",
                              label=str(name),  # 使用name作为label,如"颜色"、"尺寸"
                              type="terms",
                              values=values,
                              total_count=name_bucket['doc_count']
                          )
                          facets.append(facet)
                  continue
              
f7d3cf70   tangwang   更新文档
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
              # 处理specifications嵌套分面(指定name)
              if field_name.startswith("specifications_") and field_name.endswith("_facet") and 'filter_by_name' in agg_data:
                  # 提取name(从 "specifications_颜色_facet" 提取 "颜色")
                  name = field_name[len("specifications_"):-len("_facet")]
                  filter_by_name_agg = agg_data.get('filter_by_name', {})
                  value_counts = filter_by_name_agg.get('value_counts', {})
                  
                  values = []
                  if 'buckets' in value_counts:
                      for value_bucket in value_counts['buckets']:
                          value = FacetValue(
                              value=value_bucket['key'],
                              label=str(value_bucket['key']),
                              count=value_bucket['doc_count'],
                              selected=False
                          )
                          values.append(value)
                  
                  # 创建分面结果
                  facet = FacetResult(
                      field=f"specifications.{name}",
                      label=str(name),
                      type="terms",
                      values=values,
                      total_count=filter_by_name_agg.get('doc_count', 0)
                  )
                  facets.append(facet)
                  continue
              
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
206
207
208
209
210
211
212
213
214
215
216
217
218
              # Handle terms aggregation
              if 'buckets' in agg_data:
                  values = []
                  for bucket in agg_data['buckets']:
                      value = FacetValue(
                          value=bucket['key'],
                          label=bucket.get('key_as_string', str(bucket['key'])),
                          count=bucket['doc_count'],
                          selected=False
                      )
                      values.append(value)
  
                  facet = FacetResult(
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
219
220
                      field=display_field,
                      label=display_field,  # Can be enhanced with field labels
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
                      type="terms",
                      values=values,
                      total_count=agg_data.get('sum_other_doc_count', 0) + len(values)
                  )
                  facets.append(facet)
  
              # Handle range aggregation
              elif 'buckets' in agg_data and any('from' in b or 'to' in b for b in agg_data['buckets']):
                  values = []
                  for bucket in agg_data['buckets']:
                      range_key = bucket.get('key', '')
                      value = FacetValue(
                          value=range_key,
                          label=range_key,
                          count=bucket['doc_count'],
                          selected=False
                      )
                      values.append(value)
  
                  facet = FacetResult(
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
241
242
                      field=display_field,
                      label=display_field,
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
243
244
245
246
247
248
249
250
251
252
                      type="range",
                      values=values
                  )
                  facets.append(facet)
  
          return facets
  
      @staticmethod
      def generate_suggestions(
          query: str,
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
253
          results: List[SpuResult]
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
      ) -> List[str]:
          """
          Generate search suggestions.
  
          Args:
              query: Original search query
              results: Search results
  
          Returns:
              List of suggestion strings (currently returns empty list)
          """
          # TODO: Implement suggestion generation logic
          return []
  
      @staticmethod
      def generate_related_searches(
          query: str,
cadc77b6   tangwang   索引字段名、变量名、API数据结构...
271
          results: List[SpuResult]
1f6d15fa   tangwang   重构:SPU级别索引、统一索引架构...
272
273
274
275
276
277
278
279
280
281
282
283
284
      ) -> List[str]:
          """
          Generate related searches.
  
          Args:
              query: Original search query
              results: Search results
  
          Returns:
              List of related search strings (currently returns empty list)
          """
          # TODO: Implement related search generation logic
          return []