From d8ca3b13f85871d0b2e63595cf2594fb421c9d23 Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 25 Dec 2025 13:12:34 +0800 Subject: [PATCH] 修复 分面结果 各个选项结果数 和实际不一致的问题(因为统计的是子文档个数) 问题:nested 聚合统计的是嵌套文档(specifications 条目)数量,而不是产品(父文档)数量。 修复内容: 在 es_query_builder.py 中:为 specifications 分面的聚合添加了 reverse_nested 子聚合,用于统计产品数量: "aggs": { "product_count": { "reverse_nested": {} } } 在 result_formatter.py 中:更新读取逻辑,从 product_count.doc_count 读取产品数量,而不是直接使用 doc_count。 修复效果: 之前:分面显示 62(统计了 62 个嵌套文档/规格条目) 现在:分面显示实际的产品数量(例如 2),与搜索结果数量一致 --- api/result_formatter.py | 12 +++++++++++- search/es_query_builder.py | 35 +++++++++++++++++++++++++++-------- third-party/clip-as-service | 1 + 3 files changed, 39 insertions(+), 9 deletions(-) create mode 160000 third-party/clip-as-service diff --git a/api/result_formatter.py b/api/result_formatter.py index 7770cf2..f3fd9f7 100644 --- a/api/result_formatter.py +++ b/api/result_formatter.py @@ -308,10 +308,20 @@ class ResultFormatter: for value_bucket in value_counts['buckets']: # Check if this spec value is selected is_selected = (name, value_bucket['key']) in selected_specs + + # 使用 reverse_nested 的 product_count 统计产品数量(而不是规格条目数量) + # 如果没有 product_count(兼容旧格式),回退到 doc_count + product_count_agg = value_bucket.get('product_count', {}) + if product_count_agg and 'doc_count' in product_count_agg: + count = product_count_agg['doc_count'] + else: + # 回退到 doc_count(兼容旧格式,但这不是我们想要的计数方式) + count = value_bucket.get('doc_count', 0) + value = FacetValue( value=value_bucket['key'], label=str(value_bucket['key']), - count=value_bucket['doc_count'], + count=count, selected=is_selected ) values.append(value) diff --git a/search/es_query_builder.py b/search/es_query_builder.py index dbe44fa..7b6bd77 100644 --- a/search/es_query_builder.py +++ b/search/es_query_builder.py @@ -829,13 +829,16 @@ class ESQueryBuilder: def build_facets( self, - facet_configs: Optional[List['FacetConfig']] = None + facet_configs: Optional[List['FacetConfig']] = None, + use_reverse_nested: bool = True ) -> Dict[str, Any]: """ 构建分面聚合。 Args: facet_configs: 分面配置对象列表 + use_reverse_nested: 是否使用 reverse_nested 统计产品数量(默认 True) + 如果为 False,将统计嵌套文档数量(性能更好但计数可能不准确) 支持的字段类型: - 普通字段: 如 "category1_name"(terms 或 range 类型) @@ -844,6 +847,10 @@ class ESQueryBuilder: Returns: ES aggregations 字典 + + 性能说明: + - use_reverse_nested=True: 统计产品数量,准确性高但性能略差(通常影响 < 20%) + - use_reverse_nested=False: 统计嵌套文档数量,性能更好但计数可能不准确 """ if not facet_configs: return {} @@ -884,19 +891,31 @@ class ESQueryBuilder: if field.startswith("specifications."): name = field[len("specifications."):] agg_name = f"specifications_{name}_facet" + # 使用 reverse_nested 统计产品(父文档)数量,而不是规格条目(嵌套文档)数量 + # 这样可以确保分面计数反映实际的产品数量,与搜索结果数量一致 + base_value_counts = { + "terms": { + "field": "specifications.value", + "size": size, + "order": {"_count": "desc"} + } + } + + # 如果启用 reverse_nested,添加子聚合统计产品数量 + if use_reverse_nested: + base_value_counts["aggs"] = { + "product_count": { + "reverse_nested": {} + } + } + aggs[agg_name] = { "nested": {"path": "specifications"}, "aggs": { "filter_by_name": { "filter": {"term": {"specifications.name": name}}, "aggs": { - "value_counts": { - "terms": { - "field": "specifications.value", - "size": size, - "order": {"_count": "desc"} - } - } + "value_counts": base_value_counts } } } diff --git a/third-party/clip-as-service b/third-party/clip-as-service new file mode 160000 index 0000000..0341057 --- /dev/null +++ b/third-party/clip-as-service @@ -0,0 +1 @@ +Subproject commit 03410570d4398084f5ca5c88ad968248e0f3fc5d -- libgit2 0.21.2