Commit d8ca3b13f85871d0b2e63595cf2594fb421c9d23
1 parent
bad3b18b
修复 分面结果 各个选项结果数 和实际不一致的问题(因为统计的是子文档个数)
问题:nested 聚合统计的是嵌套文档(specifications 条目)数量,而不是产品(父文档)数量。
修复内容:
在 es_query_builder.py 中:为 specifications 分面的聚合添加了 reverse_nested 子聚合,用于统计产品数量:
"aggs": { "product_count": { "reverse_nested": {} } }
在 result_formatter.py 中:更新读取逻辑,从 product_count.doc_count 读取产品数量,而不是直接使用 doc_count。
修复效果:
之前:分面显示 62(统计了 62 个嵌套文档/规格条目)
现在:分面显示实际的产品数量(例如 2),与搜索结果数量一致
Showing
3 changed files
with
39 additions
and
9 deletions
Show diff stats
api/result_formatter.py
| @@ -308,10 +308,20 @@ class ResultFormatter: | @@ -308,10 +308,20 @@ class ResultFormatter: | ||
| 308 | for value_bucket in value_counts['buckets']: | 308 | for value_bucket in value_counts['buckets']: |
| 309 | # Check if this spec value is selected | 309 | # Check if this spec value is selected |
| 310 | is_selected = (name, value_bucket['key']) in selected_specs | 310 | is_selected = (name, value_bucket['key']) in selected_specs |
| 311 | + | ||
| 312 | + # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量) | ||
| 313 | + # 如果没有 product_count(兼容旧格式),回退到 doc_count | ||
| 314 | + product_count_agg = value_bucket.get('product_count', {}) | ||
| 315 | + if product_count_agg and 'doc_count' in product_count_agg: | ||
| 316 | + count = product_count_agg['doc_count'] | ||
| 317 | + else: | ||
| 318 | + # 回退到 doc_count(兼容旧格式,但这不是我们想要的计数方式) | ||
| 319 | + count = value_bucket.get('doc_count', 0) | ||
| 320 | + | ||
| 311 | value = FacetValue( | 321 | value = FacetValue( |
| 312 | value=value_bucket['key'], | 322 | value=value_bucket['key'], |
| 313 | label=str(value_bucket['key']), | 323 | label=str(value_bucket['key']), |
| 314 | - count=value_bucket['doc_count'], | 324 | + count=count, |
| 315 | selected=is_selected | 325 | selected=is_selected |
| 316 | ) | 326 | ) |
| 317 | values.append(value) | 327 | values.append(value) |
search/es_query_builder.py
| @@ -829,13 +829,16 @@ class ESQueryBuilder: | @@ -829,13 +829,16 @@ class ESQueryBuilder: | ||
| 829 | 829 | ||
| 830 | def build_facets( | 830 | def build_facets( |
| 831 | self, | 831 | self, |
| 832 | - facet_configs: Optional[List['FacetConfig']] = None | 832 | + facet_configs: Optional[List['FacetConfig']] = None, |
| 833 | + use_reverse_nested: bool = True | ||
| 833 | ) -> Dict[str, Any]: | 834 | ) -> Dict[str, Any]: |
| 834 | """ | 835 | """ |
| 835 | 构建分面聚合。 | 836 | 构建分面聚合。 |
| 836 | 837 | ||
| 837 | Args: | 838 | Args: |
| 838 | facet_configs: 分面配置对象列表 | 839 | facet_configs: 分面配置对象列表 |
| 840 | + use_reverse_nested: 是否使用 reverse_nested 统计产品数量(默认 True) | ||
| 841 | + 如果为 False,将统计嵌套文档数量(性能更好但计数可能不准确) | ||
| 839 | 842 | ||
| 840 | 支持的字段类型: | 843 | 支持的字段类型: |
| 841 | - 普通字段: 如 "category1_name"(terms 或 range 类型) | 844 | - 普通字段: 如 "category1_name"(terms 或 range 类型) |
| @@ -844,6 +847,10 @@ class ESQueryBuilder: | @@ -844,6 +847,10 @@ class ESQueryBuilder: | ||
| 844 | 847 | ||
| 845 | Returns: | 848 | Returns: |
| 846 | ES aggregations 字典 | 849 | ES aggregations 字典 |
| 850 | + | ||
| 851 | + 性能说明: | ||
| 852 | + - use_reverse_nested=True: 统计产品数量,准确性高但性能略差(通常影响 < 20%) | ||
| 853 | + - use_reverse_nested=False: 统计嵌套文档数量,性能更好但计数可能不准确 | ||
| 847 | """ | 854 | """ |
| 848 | if not facet_configs: | 855 | if not facet_configs: |
| 849 | return {} | 856 | return {} |
| @@ -884,19 +891,31 @@ class ESQueryBuilder: | @@ -884,19 +891,31 @@ class ESQueryBuilder: | ||
| 884 | if field.startswith("specifications."): | 891 | if field.startswith("specifications."): |
| 885 | name = field[len("specifications."):] | 892 | name = field[len("specifications."):] |
| 886 | agg_name = f"specifications_{name}_facet" | 893 | agg_name = f"specifications_{name}_facet" |
| 894 | + # 使用 reverse_nested 统计产品(父文档)数量,而不是规格条目(嵌套文档)数量 | ||
| 895 | + # 这样可以确保分面计数反映实际的产品数量,与搜索结果数量一致 | ||
| 896 | + base_value_counts = { | ||
| 897 | + "terms": { | ||
| 898 | + "field": "specifications.value", | ||
| 899 | + "size": size, | ||
| 900 | + "order": {"_count": "desc"} | ||
| 901 | + } | ||
| 902 | + } | ||
| 903 | + | ||
| 904 | + # 如果启用 reverse_nested,添加子聚合统计产品数量 | ||
| 905 | + if use_reverse_nested: | ||
| 906 | + base_value_counts["aggs"] = { | ||
| 907 | + "product_count": { | ||
| 908 | + "reverse_nested": {} | ||
| 909 | + } | ||
| 910 | + } | ||
| 911 | + | ||
| 887 | aggs[agg_name] = { | 912 | aggs[agg_name] = { |
| 888 | "nested": {"path": "specifications"}, | 913 | "nested": {"path": "specifications"}, |
| 889 | "aggs": { | 914 | "aggs": { |
| 890 | "filter_by_name": { | 915 | "filter_by_name": { |
| 891 | "filter": {"term": {"specifications.name": name}}, | 916 | "filter": {"term": {"specifications.name": name}}, |
| 892 | "aggs": { | 917 | "aggs": { |
| 893 | - "value_counts": { | ||
| 894 | - "terms": { | ||
| 895 | - "field": "specifications.value", | ||
| 896 | - "size": size, | ||
| 897 | - "order": {"_count": "desc"} | ||
| 898 | - } | ||
| 899 | - } | 918 | + "value_counts": base_value_counts |
| 900 | } | 919 | } |
| 901 | } | 920 | } |
| 902 | } | 921 | } |
clip-as-service @ 03410570d43
| @@ -0,0 +1 @@ | @@ -0,0 +1 @@ | ||
| 1 | +Subproject commit 03410570d4398084f5ca5c88ad968248e0f3fc5d |