Commit 85f08823178f2780894165ec44e85917cb1fa079
1 parent
a10a89a3
过滤逻辑
不同维度(不同的 name):求交集 相同维度(相同的 name):求并集
Showing
5 changed files
with
139 additions
and
39 deletions
Show diff stats
docs/搜索API对接指南.md
| ... | ... | @@ -136,7 +136,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 136 | 136 | |
| 137 | 137 | #### 1. 精确匹配过滤器 (filters) |
| 138 | 138 | |
| 139 | -用于精确匹配或多值匹配(OR 逻辑)。 | |
| 139 | +用于精确匹配或多值匹配。对于普通字段,数组表示 OR 逻辑(匹配任意一个值);对于 specifications 字段,按维度分组处理(见下文)。 | |
| 140 | 140 | |
| 141 | 141 | **格式**: |
| 142 | 142 | ```json |
| ... | ... | @@ -181,7 +181,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 181 | 181 | ``` |
| 182 | 182 | 查询规格名称为"color"且值为"white"的商品。 |
| 183 | 183 | |
| 184 | -**多个规格过滤(OR 逻辑)**: | |
| 184 | +**多个规格过滤(按维度分组)**: | |
| 185 | 185 | ```json |
| 186 | 186 | { |
| 187 | 187 | "filters": { |
| ... | ... | @@ -192,7 +192,26 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 192 | 192 | } |
| 193 | 193 | } |
| 194 | 194 | ``` |
| 195 | -查询满足任意一个规格的商品(color=white **或** size=256GB)。 | |
| 195 | +查询同时满足所有规格的商品(color=white **且** size=256GB)。 | |
| 196 | + | |
| 197 | +**相同维度的多个值(OR 逻辑)**: | |
| 198 | +```json | |
| 199 | +{ | |
| 200 | + "filters": { | |
| 201 | + "specifications": [ | |
| 202 | + {"name": "size", "value": "3"}, | |
| 203 | + {"name": "size", "value": "4"}, | |
| 204 | + {"name": "size", "value": "5"}, | |
| 205 | + {"name": "color", "value": "green"} | |
| 206 | + ] | |
| 207 | + } | |
| 208 | +} | |
| 209 | +``` | |
| 210 | +查询满足 (size=3 **或** size=4 **或** size=5) **且** color=green 的商品。 | |
| 211 | + | |
| 212 | +**过滤逻辑说明**: | |
| 213 | +- **不同维度**(不同的 `name`)之间是 **AND** 关系(求交集) | |
| 214 | +- **相同维度**(相同的 `name`)的多个值之间是 **OR** 关系(求并集) | |
| 196 | 215 | |
| 197 | 216 | **常用过滤字段**: |
| 198 | 217 | - `category_name`: 类目名称 |
| ... | ... | @@ -707,9 +726,9 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 707 | 726 | } |
| 708 | 727 | ``` |
| 709 | 728 | |
| 710 | -### 场景7:多个规格过滤(OR逻辑) | |
| 729 | +### 场景7:多个规格过滤(不同维度AND,相同维度OR) | |
| 711 | 730 | |
| 712 | -**需求**: 搜索"手机",筛选color为"white"或size为"256GB"的商品 | |
| 731 | +**需求**: 搜索"手机",筛选color为"white"且size为"256GB"的商品 | |
| 713 | 732 | |
| 714 | 733 | ```json |
| 715 | 734 | { |
| ... | ... | @@ -725,6 +744,24 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 725 | 744 | } |
| 726 | 745 | ``` |
| 727 | 746 | |
| 747 | +**需求**: 搜索"手机",筛选size为"3"、"4"或"5",且color为"green"的商品 | |
| 748 | + | |
| 749 | +```json | |
| 750 | +{ | |
| 751 | + "query": "手机", | |
| 752 | + "size": 20, | |
| 753 | + "language": "zh", | |
| 754 | + "filters": { | |
| 755 | + "specifications": [ | |
| 756 | + {"name": "size", "value": "3"}, | |
| 757 | + {"name": "size", "value": "4"}, | |
| 758 | + {"name": "size", "value": "5"}, | |
| 759 | + {"name": "color", "value": "green"} | |
| 760 | + ] | |
| 761 | + } | |
| 762 | +} | |
| 763 | +``` | |
| 764 | + | |
| 728 | 765 | ### 场景8:规格分面搜索 |
| 729 | 766 | |
| 730 | 767 | **需求**: 搜索"手机",获取所有规格的分面统计 | ... | ... |
docs/搜索API速查表.md
docs/系统设计文档.md
| ... | ... | @@ -480,7 +480,8 @@ laptop AND (gaming OR professional) ANDNOT cheap |
| 480 | 480 | - 范围过滤:`{"min_price": {"gte": 50, "lte": 200}}` |
| 481 | 481 | - **Specifications嵌套过滤**: |
| 482 | 482 | - 单个规格:`{"specifications": {"name": "color", "value": "white"}}` |
| 483 | - - 多个规格(OR):`{"specifications": [{"name": "color", "value": "white"}, {"name": "size", "value": "256GB"}]}` | |
| 483 | + - 多个规格:`{"specifications": [{"name": "color", "value": "white"}, {"name": "size", "value": "256GB"}]}` | |
| 484 | + - 过滤逻辑:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系 | |
| 484 | 485 | - 使用ES的`nested`查询实现 |
| 485 | 486 | - **text_recall**: 文本相关性召回 |
| 486 | 487 | - 同时搜索中英文字段(`title_zh/en`, `brief_zh/en`, `description_zh/en`, `vendor_zh/en`, `category_path_zh/en`, `category_name_zh/en`, `tags`) |
| ... | ... | @@ -593,7 +594,7 @@ ranking: |
| 593 | 594 | - ✅ 语义搜索(KNN 检索) |
| 594 | 595 | - ✅ 相关性排序(BM25 + 向量相似度) |
| 595 | 596 | - ✅ 结果聚合(Faceted Search) |
| 596 | -- ✅ Specifications嵌套过滤(单个和多个规格,OR逻辑) | |
| 597 | +- ✅ Specifications嵌套过滤(单个和多个规格,按维度分组:不同维度AND,相同维度OR) | |
| 597 | 598 | - ✅ Specifications嵌套分面(所有规格名称和指定规格名称) |
| 598 | 599 | - ✅ SKU筛选(按维度过滤,应用层实现) |
| 599 | 600 | |
| ... | ... | @@ -784,7 +785,7 @@ class RangeFilter(BaseModel): |
| 784 | 785 | } |
| 785 | 786 | ``` |
| 786 | 787 | |
| 787 | -**多个规格过滤(OR逻辑)**: | |
| 788 | +**多个规格过滤(按维度分组)**: | |
| 788 | 789 | ```json |
| 789 | 790 | { |
| 790 | 791 | "specifications": [ |
| ... | ... | @@ -799,7 +800,7 @@ class RangeFilter(BaseModel): |
| 799 | 800 | 2. Searcher 层:透传 `filters` 字典 |
| 800 | 801 | 3. ES Query Builder:检测 `specifications` 键,构建ES `nested` 查询 |
| 801 | 802 | - 单个规格:构建单个 `nested` 查询 |
| 802 | - - 多个规格:构建多个 `nested` 查询,使用 `should` 组合(OR逻辑) | |
| 803 | + - 多个规格:按 name 维度分组,相同维度内使用 `should` 组合(OR逻辑),不同维度之间使用 `must` 组合(AND逻辑) | |
| 803 | 804 | 4. 输出:ES nested 查询(`nested.path=specifications` + `bool.must=[term(name), term(value)]`) |
| 804 | 805 | |
| 805 | 806 | #### 8.3.4 响应 Facets 数据流 | ... | ... |
docs/索引字段说明v2.md
| ... | ... | @@ -132,7 +132,7 @@ |
| 132 | 132 | } |
| 133 | 133 | ``` |
| 134 | 134 | |
| 135 | -**多个规格过滤(OR逻辑)**: | |
| 135 | +**多个规格过滤(按维度分组)**: | |
| 136 | 136 | ```json |
| 137 | 137 | { |
| 138 | 138 | "query": "手机", |
| ... | ... | @@ -144,21 +144,57 @@ |
| 144 | 144 | } |
| 145 | 145 | } |
| 146 | 146 | ``` |
| 147 | +说明:不同维度(不同name)是AND关系,相同维度(相同name)的多个值是OR关系。 | |
| 148 | + | |
| 149 | +**示例:相同维度的多个值(OR)**: | |
| 150 | +```json | |
| 151 | +{ | |
| 152 | + "query": "手机", | |
| 153 | + "filters": { | |
| 154 | + "specifications": [ | |
| 155 | + {"name": "size", "value": "3"}, | |
| 156 | + {"name": "size", "value": "4"}, | |
| 157 | + {"name": "size", "value": "5"}, | |
| 158 | + {"name": "color", "value": "green"} | |
| 159 | + ] | |
| 160 | + } | |
| 161 | +} | |
| 162 | +``` | |
| 163 | +生成查询:(size=3 OR size=4 OR size=5) AND color=green | |
| 147 | 164 | |
| 148 | 165 | **ES 查询结构**(后端自动生成): |
| 149 | 166 | ```json |
| 150 | 167 | { |
| 151 | - "nested": { | |
| 152 | - "path": "specifications", | |
| 153 | - "query": { | |
| 154 | - "bool": { | |
| 155 | - "must": [ | |
| 156 | - { "term": { "specifications.name": "color" } }, | |
| 157 | - { "term": { "specifications.value": "white" } } | |
| 158 | - ] | |
| 168 | + "filter": [ | |
| 169 | + { | |
| 170 | + "nested": { | |
| 171 | + "path": "specifications", | |
| 172 | + "query": { | |
| 173 | + "bool": { | |
| 174 | + "should": [ | |
| 175 | + {"bool": {"must": [{"term": {"specifications.name": "size"}}, {"term": {"specifications.value": "3"}}]}}, | |
| 176 | + {"bool": {"must": [{"term": {"specifications.name": "size"}}, {"term": {"specifications.value": "4"}}]}}, | |
| 177 | + {"bool": {"must": [{"term": {"specifications.name": "size"}}, {"term": {"specifications.value": "5"}}]}} | |
| 178 | + ], | |
| 179 | + "minimum_should_match": 1 | |
| 180 | + } | |
| 181 | + } | |
| 182 | + } | |
| 183 | + }, | |
| 184 | + { | |
| 185 | + "nested": { | |
| 186 | + "path": "specifications", | |
| 187 | + "query": { | |
| 188 | + "bool": { | |
| 189 | + "must": [ | |
| 190 | + {"term": {"specifications.name": "color"}}, | |
| 191 | + {"term": {"specifications.value": "green"}} | |
| 192 | + ] | |
| 193 | + } | |
| 194 | + } | |
| 159 | 195 | } |
| 160 | 196 | } |
| 161 | - } | |
| 197 | + ] | |
| 162 | 198 | } |
| 163 | 199 | ``` |
| 164 | 200 | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -373,33 +373,58 @@ class ESQueryBuilder: |
| 373 | 373 | } |
| 374 | 374 | }) |
| 375 | 375 | elif isinstance(value, list): |
| 376 | - # 多个规格过滤(OR逻辑):[{"name": "color", "value": "green"}, ...] | |
| 377 | - should_clauses = [] | |
| 376 | + # 多个规格过滤:按 name 分组,相同维度 OR,不同维度 AND | |
| 377 | + # 例如:[{"name": "size", "value": "3"}, {"name": "size", "value": "4"}, {"name": "color", "value": "green"}] | |
| 378 | + # 应该生成:(size=3 OR size=4) AND color=green | |
| 379 | + from collections import defaultdict | |
| 380 | + specs_by_name = defaultdict(list) | |
| 378 | 381 | for spec in value: |
| 379 | 382 | if isinstance(spec, dict): |
| 380 | 383 | name = spec.get("name") |
| 381 | 384 | spec_value = spec.get("value") |
| 382 | 385 | if name and spec_value: |
| 383 | - should_clauses.append({ | |
| 384 | - "nested": { | |
| 385 | - "path": "specifications", | |
| 386 | - "query": { | |
| 387 | - "bool": { | |
| 388 | - "must": [ | |
| 389 | - {"term": {"specifications.name": name}}, | |
| 390 | - {"term": {"specifications.value": spec_value}} | |
| 391 | - ] | |
| 392 | - } | |
| 386 | + specs_by_name[name].append(spec_value) | |
| 387 | + | |
| 388 | + # 为每个 name 维度生成一个过滤子句 | |
| 389 | + for name, values in specs_by_name.items(): | |
| 390 | + if len(values) == 1: | |
| 391 | + # 单个值,直接生成 term 查询 | |
| 392 | + filter_clauses.append({ | |
| 393 | + "nested": { | |
| 394 | + "path": "specifications", | |
| 395 | + "query": { | |
| 396 | + "bool": { | |
| 397 | + "must": [ | |
| 398 | + {"term": {"specifications.name": name}}, | |
| 399 | + {"term": {"specifications.value": values[0]}} | |
| 400 | + ] | |
| 393 | 401 | } |
| 394 | 402 | } |
| 403 | + } | |
| 404 | + }) | |
| 405 | + else: | |
| 406 | + # 多个值,使用 should (OR) 连接 | |
| 407 | + should_clauses = [] | |
| 408 | + for spec_value in values: | |
| 409 | + should_clauses.append({ | |
| 410 | + "bool": { | |
| 411 | + "must": [ | |
| 412 | + {"term": {"specifications.name": name}}, | |
| 413 | + {"term": {"specifications.value": spec_value}} | |
| 414 | + ] | |
| 415 | + } | |
| 395 | 416 | }) |
| 396 | - if should_clauses: | |
| 397 | - filter_clauses.append({ | |
| 398 | - "bool": { | |
| 399 | - "should": should_clauses, | |
| 400 | - "minimum_should_match": 1 | |
| 401 | - } | |
| 402 | - }) | |
| 417 | + filter_clauses.append({ | |
| 418 | + "nested": { | |
| 419 | + "path": "specifications", | |
| 420 | + "query": { | |
| 421 | + "bool": { | |
| 422 | + "should": should_clauses, | |
| 423 | + "minimum_should_match": 1 | |
| 424 | + } | |
| 425 | + } | |
| 426 | + } | |
| 427 | + }) | |
| 403 | 428 | continue |
| 404 | 429 | |
| 405 | 430 | # 普通字段过滤 | ... | ... |