Commit c581becd8aa4337356cd6465277a543bdc145503

Authored by tangwang
1 parent b8317ce4

feat: 实现 Multi-Select Faceting 和 Selected 状态标记

核心功能:
- 添加 multi_select 字段到 FacetConfig(默认为 true)
- 实现 post_filter 支持 disjunctive faceting
- 后端自动标记 facet 值的 selected 状态
- 支持 specifications 和普通字段的 multi-select

技术改进:
- ESQueryBuilder: 分离 conjunctive/disjunctive filters
- ResultFormatter: 根据 current_filters 标记 selected
- Searcher: 传递 facet_configs 给 query builder

文档更新:
- 添加 multi_select_faceting.md 详细文档
- 更新 API 对接指南,说明新功能
- 添加测试脚本 test_multi_select_facet.py

业界标准:
- 遵循 Elasticsearch/Algolia/Amazon 的最佳实践
- 提供探索式搜索体验
- 前后端职责清晰分离
api/models.py
... ... @@ -34,6 +34,10 @@ class FacetConfig(BaseModel):
34 34 field: str = Field(..., description="分面字段名")
35 35 size: int = Field(10, ge=1, le=100, description="返回的分面值数量")
36 36 type: Literal["terms", "range"] = Field("terms", description="分面类型")
  37 + multi_select: bool = Field(
  38 + True,
  39 + description="是否支持多选(disjunctive faceting)。启用后,选中该分面的过滤器时,仍会显示其他可选项"
  40 + )
37 41 ranges: Optional[List[Dict[str, Any]]] = Field(
38 42 None,
39 43 description="范围分面的范围定义(仅当 type='range' 时需要)"
... ...
api/result_formatter.py
... ... @@ -220,24 +220,40 @@ class ResultFormatter:
220 220 @staticmethod
221 221 def format_facets(
222 222 es_aggregations: Dict[str, Any],
223   - facet_configs: Optional[List[Any]] = None
  223 + facet_configs: Optional[List[Any]] = None,
  224 + current_filters: Optional[Dict[str, Any]] = None
224 225 ) -> List[FacetResult]:
225 226 """
226   - Format ES aggregations to FacetResult list.
  227 + Format ES aggregations to FacetResult list with selected state.
227 228  
228 229 支持:
229 230 1. 普通terms聚合
230 231 2. range聚合
231 232 3. specifications嵌套聚合(按name分组,然后按value聚合)
  233 + 4. 标记selected状态(基于current_filters)
232 234  
233 235 Args:
234 236 es_aggregations: ES aggregations response
235 237 facet_configs: Facet configurations (optional)
  238 + current_filters: Current applied filters (used to mark selected values)
236 239  
237 240 Returns:
238   - List of FacetResult objects
  241 + List of FacetResult objects with selected states
239 242 """
240 243 facets = []
  244 +
  245 + # Build a set of selected values for specifications
  246 + selected_specs = set()
  247 + if current_filters and 'specifications' in current_filters:
  248 + specs = current_filters['specifications']
  249 + if isinstance(specs, list):
  250 + # [{"name": "颜色", "value": "白色"}, ...]
  251 + for spec in specs:
  252 + if isinstance(spec, dict):
  253 + selected_specs.add((spec.get('name'), spec.get('value')))
  254 + elif isinstance(specs, dict):
  255 + # {"name": "颜色", "value": "白色"}
  256 + selected_specs.add((specs.get('name'), specs.get('value')))
241 257  
242 258 for field_name, agg_data in es_aggregations.items():
243 259 display_field = field_name[:-6] if field_name.endswith("_facet") else field_name
... ... @@ -254,11 +270,13 @@ class ResultFormatter:
254 270 values = []
255 271 if 'buckets' in value_counts:
256 272 for value_bucket in value_counts['buckets']:
  273 + # Check if this spec value is selected
  274 + is_selected = (name, value_bucket['key']) in selected_specs
257 275 value = FacetValue(
258 276 value=value_bucket['key'],
259 277 label=str(value_bucket['key']),
260 278 count=value_bucket['doc_count'],
261   - selected=False
  279 + selected=is_selected
262 280 )
263 281 values.append(value)
264 282  
... ... @@ -288,11 +306,13 @@ class ResultFormatter:
288 306 values = []
289 307 if 'buckets' in value_counts and value_counts['buckets']:
290 308 for value_bucket in value_counts['buckets']:
  309 + # Check if this spec value is selected
  310 + is_selected = (name, value_bucket['key']) in selected_specs
291 311 value = FacetValue(
292 312 value=value_bucket['key'],
293 313 label=str(value_bucket['key']),
294 314 count=value_bucket['doc_count'],
295   - selected=False
  315 + selected=is_selected
296 316 )
297 317 values.append(value)
298 318  
... ... @@ -311,11 +331,20 @@ class ResultFormatter:
311 331 if 'buckets' in agg_data:
312 332 values = []
313 333 for bucket in agg_data['buckets']:
  334 + # Check if this value is selected in current filters
  335 + is_selected = False
  336 + if current_filters and display_field in current_filters:
  337 + filter_value = current_filters[display_field]
  338 + if isinstance(filter_value, list):
  339 + is_selected = bucket['key'] in filter_value
  340 + else:
  341 + is_selected = bucket['key'] == filter_value
  342 +
314 343 value = FacetValue(
315 344 value=bucket['key'],
316 345 label=bucket.get('key_as_string', str(bucket['key'])),
317 346 count=bucket['doc_count'],
318   - selected=False
  347 + selected=is_selected
319 348 )
320 349 values.append(value)
321 350  
... ... @@ -333,11 +362,20 @@ class ResultFormatter:
333 362 values = []
334 363 for bucket in agg_data['buckets']:
335 364 range_key = bucket.get('key', '')
  365 + # Check if this range is selected
  366 + is_selected = False
  367 + if current_filters and display_field in current_filters:
  368 + filter_value = current_filters[display_field]
  369 + if isinstance(filter_value, list):
  370 + is_selected = range_key in filter_value
  371 + else:
  372 + is_selected = range_key == filter_value
  373 +
336 374 value = FacetValue(
337 375 value=range_key,
338 376 label=range_key,
339 377 count=bucket['doc_count'],
340   - selected=False
  378 + selected=is_selected
341 379 )
342 380 values.append(value)
343 381  
... ...
docs/multi_select_faceting.md 0 → 100644
... ... @@ -0,0 +1,399 @@
  1 +# Multi-Select Faceting 功能说明
  2 +
  3 +## 概述
  4 +
  5 +Multi-Select Faceting(多选分面)是业界标准的 Faceted Search 功能,允许用户在选中某个分面筛选项后,仍然能看到该分面的其他可选项,提供更好的探索式搜索体验。
  6 +
  7 +## 功能特性
  8 +
  9 +### 1. 两种 Faceting 模式
  10 +
  11 +#### 标准模式(Conjunctive Faceting)
  12 +- **设置**: `multi_select: false`(默认)
  13 +- **行为**: 选中某个分面值后,该分面只显示选中的值
  14 +- **适用场景**: 层级下钻、逐步精炼
  15 +- **ES 实现**: 过滤器应用在 `query.bool.filter`
  16 +
  17 +#### Multi-Select 模式(Disjunctive Faceting)
  18 +- **设置**: `multi_select: true`
  19 +- **行为**: 选中某个分面值后,该分面仍显示所有可选项
  20 +- **适用场景**: 颜色、品牌、尺码等可切换属性
  21 +- **ES 实现**: 过滤器应用在 `post_filter`
  22 +
  23 +### 2. Selected 状态标记
  24 +
  25 +所有 facet 值都包含 `selected` 字段,标记当前是否被选中:
  26 +- `selected: true` - 当前筛选项已被选中
  27 +- `selected: false` - 当前筛选项未被选中
  28 +
  29 +## 使用示例
  30 +
  31 +### 示例 1: 标准 Category Faceting
  32 +
  33 +```json
  34 +{
  35 + "query": "T恤",
  36 + "filters": {
  37 + "category1_name": "服装"
  38 + },
  39 + "facets": [
  40 + {
  41 + "field": "category1_name",
  42 + "size": 10,
  43 + "type": "terms",
  44 + "multi_select": false
  45 + }
  46 + ]
  47 +}
  48 +```
  49 +
  50 +**响应**:
  51 +```json
  52 +{
  53 + "results": [...],
  54 + "facets": [
  55 + {
  56 + "field": "category1_name",
  57 + "values": [
  58 + {"value": "服装", "count": 150, "selected": true}
  59 + ]
  60 + }
  61 + ]
  62 +}
  63 +```
  64 +
  65 +### 示例 2: Multi-Select Brand Faceting
  66 +
  67 +```json
  68 +{
  69 + "query": "手机",
  70 + "filters": {
  71 + "brand_name": "苹果"
  72 + },
  73 + "facets": [
  74 + {
  75 + "field": "brand_name",
  76 + "size": 10,
  77 + "type": "terms",
  78 + "multi_select": true
  79 + }
  80 + ]
  81 +}
  82 +```
  83 +
  84 +**响应**:
  85 +```json
  86 +{
  87 + "results": [...只包含苹果手机...],
  88 + "facets": [
  89 + {
  90 + "field": "brand_name",
  91 + "values": [
  92 + {"value": "苹果", "count": 150, "selected": true},
  93 + {"value": "华为", "count": 120, "selected": false},
  94 + {"value": "小米", "count": 98, "selected": false}
  95 + ]
  96 + }
  97 + ]
  98 +}
  99 +```
  100 +
  101 +### 示例 3: Specifications Multi-Select
  102 +
  103 +```json
  104 +{
  105 + "query": "衬衫",
  106 + "filters": {
  107 + "specifications": {
  108 + "name": "颜色",
  109 + "value": "白色"
  110 + }
  111 + },
  112 + "facets": [
  113 + {
  114 + "field": "specifications.颜色",
  115 + "size": 10,
  116 + "type": "terms",
  117 + "multi_select": true
  118 + },
  119 + {
  120 + "field": "specifications.尺码",
  121 + "size": 10,
  122 + "type": "terms",
  123 + "multi_select": false
  124 + }
  125 + ]
  126 +}
  127 +```
  128 +
  129 +**响应**:
  130 +```json
  131 +{
  132 + "results": [...只包含白色衬衫...],
  133 + "facets": [
  134 + {
  135 + "field": "specifications.颜色",
  136 + "label": "颜色",
  137 + "values": [
  138 + {"value": "白色", "count": 50, "selected": true},
  139 + {"value": "蓝色", "count": 35, "selected": false},
  140 + {"value": "黑色", "count": 28, "selected": false}
  141 + ]
  142 + },
  143 + {
  144 + "field": "specifications.尺码",
  145 + "label": "尺码",
  146 + "values": [
  147 + {"value": "M", "count": 20, "selected": false},
  148 + {"value": "L", "count": 18, "selected": false},
  149 + {"value": "XL", "count": 12, "selected": false}
  150 + ]
  151 + }
  152 + ]
  153 +}
  154 +```
  155 +
  156 +注意:尺码分面(`multi_select: false`)的统计是基于白色衬衫的。
  157 +
  158 +### 示例 4: 混合多个 Multi-Select Facets
  159 +
  160 +```json
  161 +{
  162 + "query": "*",
  163 + "filters": {
  164 + "category1_name": "玩具",
  165 + "specifications": [
  166 + {"name": "颜色", "value": "红色"},
  167 + {"name": "材质", "value": "塑料"}
  168 + ]
  169 + },
  170 + "facets": [
  171 + {
  172 + "field": "category1_name",
  173 + "size": 10,
  174 + "multi_select": true
  175 + },
  176 + {
  177 + "field": "specifications.颜色",
  178 + "size": 10,
  179 + "multi_select": true
  180 + },
  181 + {
  182 + "field": "specifications.材质",
  183 + "size": 10,
  184 + "multi_select": true
  185 + },
  186 + {
  187 + "field": "specifications.年龄段",
  188 + "size": 10,
  189 + "multi_select": false
  190 + }
  191 + ]
  192 +}
  193 +```
  194 +
  195 +**行为说明**:
  196 +- `category1_name`: 显示所有类目选项(玩具被标记为 selected)
  197 +- `specifications.颜色`: 显示所有颜色选项(红色被标记为 selected)
  198 +- `specifications.材质`: 显示所有材质选项(塑料被标记为 selected)
  199 +- `specifications.年龄段`: 只显示符合当前过滤条件的年龄段选项
  200 +
  201 +## 前端集成建议
  202 +
  203 +### React 示例
  204 +
  205 +```jsx
  206 +function FacetComponent({ facet }) {
  207 + return (
  208 + <div className="facet">
  209 + <h3>{facet.label}</h3>
  210 + {facet.values.map(value => (
  211 + <label key={value.value} className={value.selected ? 'active' : ''}>
  212 + <input
  213 + type="checkbox"
  214 + checked={value.selected}
  215 + onChange={() => toggleFilter(facet.field, value.value)}
  216 + />
  217 + {value.value} ({value.count})
  218 + </label>
  219 + ))}
  220 + </div>
  221 + );
  222 +}
  223 +```
  224 +
  225 +### Vue 示例
  226 +
  227 +```vue
  228 +<template>
  229 + <div class="facet">
  230 + <h3>{{ facet.label }}</h3>
  231 + <label
  232 + v-for="value in facet.values"
  233 + :key="value.value"
  234 + :class="{ active: value.selected }"
  235 + >
  236 + <input
  237 + type="checkbox"
  238 + :checked="value.selected"
  239 + @change="toggleFilter(facet.field, value.value)"
  240 + />
  241 + {{ value.value }} ({{ value.count }})
  242 + </label>
  243 + </div>
  244 +</template>
  245 +```
  246 +
  247 +## 技术实现细节
  248 +
  249 +### Elasticsearch Query 结构
  250 +
  251 +**Multi-Select Faceting** 使用 `post_filter` 实现:
  252 +
  253 +```json
  254 +{
  255 + "query": {
  256 + "bool": {
  257 + "must": [...],
  258 + "filter": [
  259 + // 只包含 multi_select=false 的过滤器
  260 + {"term": {"category2_name": "短袖T恤"}}
  261 + ]
  262 + }
  263 + },
  264 + "post_filter": {
  265 + "bool": {
  266 + "filter": [
  267 + // 包含 multi_select=true 的过滤器
  268 + {"term": {"brand_name": "苹果"}},
  269 + {
  270 + "nested": {
  271 + "path": "specifications",
  272 + "query": {
  273 + "bool": {
  274 + "must": [
  275 + {"term": {"specifications.name": "颜色"}},
  276 + {"term": {"specifications.value": "白色"}}
  277 + ]
  278 + }
  279 + }
  280 + }
  281 + }
  282 + ]
  283 + }
  284 + },
  285 + "aggs": {
  286 + // 所有聚合都基于 query 的结果(不受 post_filter 影响)
  287 + "brand_name_facet": {...},
  288 + "specifications_颜色_facet": {...}
  289 + }
  290 +}
  291 +```
  292 +
  293 +**关键点**:
  294 +- `query.bool.filter`: 影响结果和聚合
  295 +- `post_filter`: 只影响结果,不影响聚合
  296 +- 聚合统计基于 `query` 的结果,因此 multi-select facet 可以显示多个选项
  297 +
  298 +## 最佳实践
  299 +
  300 +### 1. 何时使用 Multi-Select
  301 +
  302 +| Facet 类型 | 推荐模式 | 原因 |
  303 +|-----------|---------|------|
  304 +| 颜色 | `multi_select: true` | 用户需要切换颜色 |
  305 +| 品牌 | `multi_select: true` | 用户需要比较不同品牌 |
  306 +| 尺码 | `multi_select: true` | 用户需要查看其他尺码 |
  307 +| 类目 | `multi_select: false` | 层级下钻 |
  308 +| 价格区间 | `multi_select: false` | 互斥选择 |
  309 +| 是否有货 | `multi_select: false` | 布尔值筛选 |
  310 +
  311 +### 2. 性能考虑
  312 +
  313 +- **过多 Multi-Select**: 会增加 ES 查询复杂度
  314 +- **建议**: 最多 3-5 个 multi-select facets
  315 +- **优化**: 对于不常用的属性使用标准模式
  316 +
  317 +### 3. UI 设计建议
  318 +
  319 +- **Multi-Select Facets**: 使用复选框(Checkbox)
  320 +- **Standard Facets**: 使用单选框(Radio)或链接
  321 +- **Selected 状态**: 使用不同颜色或图标标识
  322 +
  323 +## API 变更说明
  324 +
  325 +### 新增字段
  326 +
  327 +**FacetConfig**:
  328 +```json
  329 +{
  330 + "field": "brand_name",
  331 + "size": 10,
  332 + "type": "terms",
  333 + "multi_select": true // 新增字段
  334 +}
  335 +```
  336 +
  337 +**FacetValue**:
  338 +```json
  339 +{
  340 + "value": "苹果",
  341 + "count": 150,
  342 + "selected": true // 现在由后端返回真实状态
  343 +}
  344 +```
  345 +
  346 +### 兼容性
  347 +
  348 +- `multi_select` 默认为 `false`,保持向后兼容
  349 +- 旧版 API 调用仍然有效(使用标准模式)
  350 +
  351 +## 测试验证
  352 +
  353 +运行测试脚本:
  354 +```bash
  355 +python test_multi_select_facet.py
  356 +```
  357 +
  358 +测试覆盖:
  359 +1. ✓ 标准 Faceting (multi_select=false)
  360 +2. ✓ Multi-Select Faceting (multi_select=true)
  361 +3. ✓ Specifications Multi-Select
  362 +4. ✓ ES Query 结构验证
  363 +
  364 +## 故障排查
  365 +
  366 +### 问题 1: Multi-Select 不生效
  367 +
  368 +**症状**: 设置了 `multi_select: true`,但仍然只返回一个值
  369 +
  370 +**检查**:
  371 +1. 确认 `multi_select` 字段在请求中正确设置
  372 +2. 检查 ES query 是否包含 `post_filter`(开启 `debug: true`)
  373 +3. 验证 Elasticsearch 版本支持 `post_filter`
  374 +
  375 +### 问题 2: Selected 标记不正确
  376 +
  377 +**症状**: `selected` 字段没有正确标记
  378 +
  379 +**检查**:
  380 +1. 确认 `filters` 中的字段名与 facet 字段名一致
  381 +2. 对于 specifications,检查 `name` 和 `value` 是否匹配
  382 +3. 检查 `filters` 的值类型(字符串、数组等)
  383 +
  384 +### 问题 3: 性能问题
  385 +
  386 +**症状**: 启用 Multi-Select 后查询变慢
  387 +
  388 +**优化**:
  389 +1. 减少 multi-select facets 数量
  390 +2. 降低 facet `size` 参数
  391 +3. 考虑使用缓存
  392 +4. 为常用字段建立索引
  393 +
  394 +## 参考资料
  395 +
  396 +- [Elasticsearch Post Filter](https://www.elastic.co/guide/en/elasticsearch/reference/current/filter-search-results.html#post-filter)
  397 +- [Algolia Disjunctive Faceting](https://www.algolia.com/doc/guides/managing-results/refine-results/faceting/#conjunctive-and-disjunctive-facets)
  398 +- [Amazon Product Search](https://www.amazon.com) - 业界最佳实践示例
  399 +
... ...
docs/搜索API对接指南.md
... ... @@ -275,17 +275,20 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
275 275 {
276 276 "field": "category1_name",
277 277 "size": 15,
278   - "type": "terms"
  278 + "type": "terms",
  279 + "multi_select": false
279 280 },
280 281 {
281   - "field": "category2_name",
  282 + "field": "brand_name",
282 283 "size": 10,
283   - "type": "terms"
  284 + "type": "terms",
  285 + "multi_select": true
284 286 },
285 287 {
286 288 "field": "specifications.color",
287 289 "size": 20,
288   - "type": "terms"
  290 + "type": "terms",
  291 + "multi_select": true
289 292 },
290 293 {
291 294 "field": "min_price",
... ... @@ -301,6 +304,62 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
301 304 }
302 305 ```
303 306  
  307 +**Facet 字段说明**:
  308 +
  309 +| 字段 | 类型 | 必填 | 默认值 | 说明 |
  310 +|------|------|------|--------|------|
  311 +| `field` | string | 是 | - | 分面字段名 |
  312 +| `size` | int | 否 | 10 | 返回的分面值数量(1-100) |
  313 +| `type` | string | 否 | "terms" | 分面类型:`terms`(词条聚合)或 `range`(范围聚合) |
  314 +| `multi_select` | bool | 否 | false | **是否支持多选模式**(见下文详细说明) |
  315 +| `ranges` | array | 否 | null | 范围配置(仅 `type="range"` 时需要) |
  316 +
  317 +#### 🆕 Multi-Select Faceting(多选分面)
  318 +
  319 +**重要特性**: `multi_select` 字段控制分面的行为模式。
  320 +
  321 +##### 标准模式 (multi_select: false)
  322 +- **行为**: 选中某个分面值后,该分面只显示选中的值
  323 +- **适用场景**: 层级类目、互斥选择
  324 +- **示例**: 类目下钻(玩具 > 娃娃 > 芭比)
  325 +
  326 +```json
  327 +{
  328 + "filters": {"category1_name": "玩具"},
  329 + "facets": [
  330 + {"field": "category1_name", "size": 10, "multi_select": false}
  331 + ]
  332 +}
  333 +```
  334 +**响应**: 只返回 "玩具" 一个选项
  335 +
  336 +##### Multi-Select 模式 (multi_select: true) ⭐
  337 +- **行为**: 选中某个分面值后,该分面仍显示所有可选项
  338 +- **适用场景**: 颜色、品牌、尺码等可切换属性
  339 +- **示例**: 选择了"红色"后,仍能看到"蓝色"、"绿色"等选项
  340 +
  341 +```json
  342 +{
  343 + "filters": {
  344 + "specifications": {"name": "颜色", "value": "红色"}
  345 + },
  346 + "facets": [
  347 + {"field": "specifications.颜色", "size": 10, "multi_select": true}
  348 + ]
  349 +}
  350 +```
  351 +**响应**: 返回所有颜色选项,"红色" 被标记为 `selected: true`
  352 +
  353 +##### 推荐配置
  354 +
  355 +| 分面类型 | multi_select | 原因 |
  356 +|---------|-------------|------|
  357 +| 颜色 | `true` | 用户需要切换颜色 |
  358 +| 品牌 | `true` | 用户需要比较品牌 |
  359 +| 尺码 | `true` | 用户需要查看其他尺码 |
  360 +| 类目 | `false` | 层级下钻 |
  361 +| 价格区间 | `false` | 互斥选择 |
  362 +
304 363 **规格分面说明**:
305 364  
306 365 `specifications` 是嵌套字段,支持两种分面模式:
... ... @@ -326,12 +385,14 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
326 385 {
327 386 "field": "specifications.color",
328 387 "size": 20,
329   - "type": "terms"
  388 + "type": "terms",
  389 + "multi_select": true
330 390 },
331 391 {
332 392 "field": "specifications.size",
333 393 "size": 15,
334   - "type": "terms"
  394 + "type": "terms",
  395 + "multi_select": true
335 396 }
336 397 ]
337 398 }
... ... @@ -347,8 +408,9 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
347 408 "label": "color",
348 409 "type": "terms",
349 410 "values": [
350   - {"value": "white", "count": 50, "selected": false},
351   - {"value": "black", "count": 30, "selected": false}
  411 + {"value": "white", "count": 50, "selected": true}, // ✓ selected 字段由后端标记
  412 + {"value": "black", "count": 30, "selected": false},
  413 + {"value": "red", "count": 20, "selected": false}
352 414 ]
353 415 },
354 416 {
... ...
search/es_query_builder.py
... ... @@ -44,6 +44,54 @@ class ESQueryBuilder:
44 44 self.source_fields = source_fields
45 45 self.function_score_config = function_score_config
46 46  
  47 + def _split_filters_for_faceting(
  48 + self,
  49 + filters: Optional[Dict[str, Any]],
  50 + facet_configs: Optional[List[Any]]
  51 + ) -> tuple:
  52 + """
  53 + Split filters into conjunctive (query) and disjunctive (post_filter) based on facet configs.
  54 +
  55 + Disjunctive filters (multi-select facets):
  56 + - Applied via post_filter (affects results but not aggregations)
  57 + - Allows showing other options in the same facet even when filtered
  58 +
  59 + Conjunctive filters (standard facets):
  60 + - Applied in query.bool.filter (affects both results and aggregations)
  61 + - Standard drill-down behavior
  62 +
  63 + Args:
  64 + filters: All filters from request
  65 + facet_configs: Facet configurations with multi_select flags
  66 +
  67 + Returns:
  68 + (conjunctive_filters, disjunctive_filters)
  69 + """
  70 + if not filters or not facet_configs:
  71 + return filters or {}, {}
  72 +
  73 + # Get fields that support multi-select
  74 + multi_select_fields = set()
  75 + for fc in facet_configs:
  76 + if getattr(fc, 'multi_select', False):
  77 + # Handle specifications.xxx format
  78 + if fc.field.startswith('specifications.'):
  79 + multi_select_fields.add('specifications')
  80 + else:
  81 + multi_select_fields.add(fc.field)
  82 +
  83 + # Split filters
  84 + conjunctive = {}
  85 + disjunctive = {}
  86 +
  87 + for field, value in filters.items():
  88 + if field in multi_select_fields:
  89 + disjunctive[field] = value
  90 + else:
  91 + conjunctive[field] = value
  92 +
  93 + return conjunctive, disjunctive
  94 +
47 95 def build_query(
48 96 self,
49 97 query_text: str,
... ... @@ -51,6 +99,7 @@ class ESQueryBuilder:
51 99 query_node: Optional[QueryNode] = None,
52 100 filters: Optional[Dict[str, Any]] = None,
53 101 range_filters: Optional[Dict[str, Any]] = None,
  102 + facet_configs: Optional[List[Any]] = None,
54 103 size: int = 10,
55 104 from_: int = 0,
56 105 enable_knn: bool = True,
... ... @@ -59,10 +108,11 @@ class ESQueryBuilder:
59 108 min_score: Optional[float] = None
60 109 ) -> Dict[str, Any]:
61 110 """
62   - Build complete ES query (简化版).
  111 + Build complete ES query with post_filter support for multi-select faceting.
63 112  
64   - 结构:filters and (text_recall or embedding_recall)
65   - - filters: 前端传递的过滤条件永远起作用
  113 + 结构:filters and (text_recall or embedding_recall) + post_filter
  114 + - conjunctive_filters: 应用在 query.bool.filter(影响结果和聚合)
  115 + - disjunctive_filters: 应用在 post_filter(只影响结果,不影响聚合)
66 116 - text_recall: 文本相关性召回(中英文字段都用)
67 117 - embedding_recall: 向量召回(KNN)
68 118 - function_score: 包装召回部分,支持提权字段
... ... @@ -71,8 +121,9 @@ class ESQueryBuilder:
71 121 query_text: Query text for BM25 matching
72 122 query_vector: Query embedding for KNN search
73 123 query_node: Parsed boolean expression tree
74   - filters: Exact match filters (always applied)
75   - range_filters: Range filters for numeric fields (always applied)
  124 + filters: Exact match filters
  125 + range_filters: Range filters for numeric fields (always applied in query)
  126 + facet_configs: Facet configurations (used to identify multi-select facets)
76 127 size: Number of results
77 128 from_: Offset for pagination
78 129 enable_knn: Whether to use KNN search
... ... @@ -110,8 +161,13 @@ class ESQueryBuilder:
110 161 # Embedding recall (KNN - separate from query, handled below)
111 162 has_embedding = enable_knn and query_vector is not None and self.text_embedding_field
112 163  
113   - # 2. Build filter clauses (always applied)
114   - filter_clauses = self._build_filters(filters, range_filters)
  164 + # 2. Split filters for multi-select faceting
  165 + conjunctive_filters, disjunctive_filters = self._split_filters_for_faceting(
  166 + filters, facet_configs
  167 + )
  168 +
  169 + # Build filter clauses for query (conjunctive filters + range filters)
  170 + filter_clauses = self._build_filters(conjunctive_filters, range_filters)
115 171  
116 172 # 3. Build main query structure: filters and recall
117 173 if recall_clauses:
... ... @@ -162,7 +218,18 @@ class ESQueryBuilder:
162 218 }
163 219 es_query["knn"] = knn_clause
164 220  
165   - # 5. Add minimum score filter
  221 + # 5. Add post_filter for disjunctive (multi-select) filters
  222 + if disjunctive_filters:
  223 + post_filter_clauses = self._build_filters(disjunctive_filters, None)
  224 + if post_filter_clauses:
  225 + if len(post_filter_clauses) == 1:
  226 + es_query["post_filter"] = post_filter_clauses[0]
  227 + else:
  228 + es_query["post_filter"] = {
  229 + "bool": {"filter": post_filter_clauses}
  230 + }
  231 +
  232 + # 6. Add minimum score filter
166 233 if min_score is not None:
167 234 es_query["min_score"] = min_score
168 235  
... ...
search/searcher.py
... ... @@ -275,6 +275,7 @@ class Searcher:
275 275 query_node=query_node,
276 276 filters=filters,
277 277 range_filters=range_filters,
  278 + facet_configs=facets,
278 279 size=size,
279 280 from_=from_,
280 281 enable_knn=enable_embedding and parsed_query.query_vector is not None,
... ... @@ -381,7 +382,8 @@ class Searcher:
381 382 if facets:
382 383 standardized_facets = ResultFormatter.format_facets(
383 384 es_response.get('aggregations', {}),
384   - facets
  385 + facets,
  386 + filters
385 387 )
386 388  
387 389 # Generate suggestions and related searches
... ...
test_multi_select_facet.py 0 → 100644
... ... @@ -0,0 +1,288 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +测试 Multi-Select Faceting 和 Selected 标记功能
  4 +
  5 +验证:
  6 +1. multi_select=False 时,选中某个 facet 值后,该 facet 只返回一个值(标准模式)
  7 +2. multi_select=True 时,选中某个 facet 值后,该 facet 仍返回多个值(disjunctive 模式)
  8 +3. selected 字段正确标记
  9 +"""
  10 +
  11 +import requests
  12 +import json
  13 +
  14 +API_URL = "http://localhost:8000/api/search"
  15 +TENANT_ID = "test_tenant"
  16 +
  17 +def test_standard_faceting():
  18 + """测试标准 faceting(multi_select=False)"""
  19 + print("\n" + "="*80)
  20 + print("测试 1: 标准 Faceting (multi_select=False)")
  21 + print("="*80)
  22 +
  23 + # 选择 category1_name = "玩具"
  24 + payload = {
  25 + "query": "*",
  26 + "size": 5,
  27 + "filters": {
  28 + "category1_name": "玩具"
  29 + },
  30 + "facets": [
  31 + {
  32 + "field": "category1_name",
  33 + "size": 10,
  34 + "type": "terms",
  35 + "multi_select": False # 标准模式
  36 + }
  37 + ]
  38 + }
  39 +
  40 + headers = {"X-Tenant-ID": TENANT_ID}
  41 +
  42 + try:
  43 + response = requests.post(API_URL, json=payload, headers=headers)
  44 + response.raise_for_status()
  45 + result = response.json()
  46 +
  47 + print(f"\n✓ 请求成功 (HTTP {response.status_code})")
  48 + print(f" 总结果数: {result.get('total', 0)}")
  49 +
  50 + if result.get('facets'):
  51 + for facet in result['facets']:
  52 + if facet['field'] == 'category1_name':
  53 + print(f"\n Facet: {facet['field']}")
  54 + print(f" Values 数量: {len(facet['values'])}")
  55 + for val in facet['values']:
  56 + selected_mark = "✓" if val.get('selected') else " "
  57 + print(f" [{selected_mark}] {val['value']}: {val['count']}")
  58 +
  59 + # 验证
  60 + if len(facet['values']) == 1:
  61 + print("\n ✓ 验证通过: multi_select=False 时,只返回选中的值")
  62 + else:
  63 + print("\n ⚠ 警告: multi_select=False 时应只返回一个值")
  64 +
  65 + selected_values = [v['value'] for v in facet['values'] if v.get('selected')]
  66 + if selected_values == ['玩具']:
  67 + print(" ✓ 验证通过: selected 字段正确标记")
  68 + else:
  69 + print(f" ✗ 错误: selected 标记不正确,期望 ['玩具'],实际 {selected_values}")
  70 + else:
  71 + print("\n ⚠ 警告: 没有返回 facets")
  72 +
  73 + except requests.exceptions.RequestException as e:
  74 + print(f"\n✗ 请求失败: {e}")
  75 + except Exception as e:
  76 + print(f"\n✗ 错误: {e}")
  77 +
  78 +
  79 +def test_multi_select_faceting():
  80 + """测试 Multi-Select Faceting (multi_select=True)"""
  81 + print("\n" + "="*80)
  82 + print("测试 2: Multi-Select Faceting (multi_select=True)")
  83 + print("="*80)
  84 +
  85 + # 选择 category1_name = "玩具"
  86 + payload = {
  87 + "query": "*",
  88 + "size": 5,
  89 + "filters": {
  90 + "category1_name": "玩具"
  91 + },
  92 + "facets": [
  93 + {
  94 + "field": "category1_name",
  95 + "size": 10,
  96 + "type": "terms",
  97 + "multi_select": True # Multi-select 模式
  98 + }
  99 + ]
  100 + }
  101 +
  102 + headers = {"X-Tenant-ID": TENANT_ID}
  103 +
  104 + try:
  105 + response = requests.post(API_URL, json=payload, headers=headers)
  106 + response.raise_for_status()
  107 + result = response.json()
  108 +
  109 + print(f"\n✓ 请求成功 (HTTP {response.status_code})")
  110 + print(f" 总结果数: {result.get('total', 0)}")
  111 +
  112 + if result.get('facets'):
  113 + for facet in result['facets']:
  114 + if facet['field'] == 'category1_name':
  115 + print(f"\n Facet: {facet['field']}")
  116 + print(f" Values 数量: {len(facet['values'])}")
  117 + for val in facet['values'][:5]: # 只显示前5个
  118 + selected_mark = "✓" if val.get('selected') else " "
  119 + print(f" [{selected_mark}] {val['value']}: {val['count']}")
  120 +
  121 + # 验证
  122 + if len(facet['values']) > 1:
  123 + print(f"\n ✓ 验证通过: multi_select=True 时,返回多个值 ({len(facet['values'])} 个)")
  124 + else:
  125 + print("\n ✗ 错误: multi_select=True 时应返回多个值")
  126 +
  127 + selected_values = [v['value'] for v in facet['values'] if v.get('selected')]
  128 + if selected_values == ['玩具']:
  129 + print(" ✓ 验证通过: selected 字段正确标记")
  130 + else:
  131 + print(f" ⚠ 警告: selected 标记可能不正确,期望 ['玩具'],实际 {selected_values}")
  132 + else:
  133 + print("\n ⚠ 警告: 没有返回 facets")
  134 +
  135 + except requests.exceptions.RequestException as e:
  136 + print(f"\n✗ 请求失败: {e}")
  137 + except Exception as e:
  138 + print(f"\n✗ 错误: {e}")
  139 +
  140 +
  141 +def test_specifications_multi_select():
  142 + """测试 Specifications 的 Multi-Select Faceting"""
  143 + print("\n" + "="*80)
  144 + print("测试 3: Specifications Multi-Select Faceting")
  145 + print("="*80)
  146 +
  147 + # 选择 specifications.颜色 = "白色"
  148 + payload = {
  149 + "query": "*",
  150 + "size": 5,
  151 + "filters": {
  152 + "specifications": {
  153 + "name": "颜色",
  154 + "value": "白色"
  155 + }
  156 + },
  157 + "facets": [
  158 + {
  159 + "field": "specifications.颜色",
  160 + "size": 10,
  161 + "type": "terms",
  162 + "multi_select": True
  163 + },
  164 + {
  165 + "field": "specifications.尺寸",
  166 + "size": 10,
  167 + "type": "terms",
  168 + "multi_select": False
  169 + }
  170 + ]
  171 + }
  172 +
  173 + headers = {"X-Tenant-ID": TENANT_ID}
  174 +
  175 + try:
  176 + response = requests.post(API_URL, json=payload, headers=headers)
  177 + response.raise_for_status()
  178 + result = response.json()
  179 +
  180 + print(f"\n✓ 请求成功 (HTTP {response.status_code})")
  181 + print(f" 总结果数: {result.get('total', 0)}")
  182 +
  183 + if result.get('facets'):
  184 + for facet in result['facets']:
  185 + print(f"\n Facet: {facet['field']} (multi_select={facet.get('multi_select', 'N/A')})")
  186 + print(f" Values 数量: {len(facet['values'])}")
  187 + for val in facet['values'][:5]:
  188 + selected_mark = "✓" if val.get('selected') else " "
  189 + print(f" [{selected_mark}] {val['value']}: {val['count']}")
  190 +
  191 + # 验证 specifications.颜色
  192 + if facet['field'] == 'specifications.颜色':
  193 + if len(facet['values']) > 1:
  194 + print(f" ✓ 验证通过: multi_select=True,返回多个颜色选项")
  195 + selected_values = [v['value'] for v in facet['values'] if v.get('selected')]
  196 + if '白色' in selected_values:
  197 + print(" ✓ 验证通过: '白色' 被正确标记为 selected")
  198 +
  199 + # 验证 specifications.尺寸(基于白色商品的尺寸分布)
  200 + if facet['field'] == 'specifications.尺寸':
  201 + print(f" ℹ 尺寸分布基于已选的颜色过滤器")
  202 + else:
  203 + print("\n ⚠ 警告: 没有返回 facets")
  204 +
  205 + except requests.exceptions.RequestException as e:
  206 + print(f"\n✗ 请求失败: {e}")
  207 + except Exception as e:
  208 + print(f"\n✗ 错误: {e}")
  209 +
  210 +
  211 +def test_es_query_structure():
  212 + """测试 ES Query 结构(需要 debug=True)"""
  213 + print("\n" + "="*80)
  214 + print("测试 4: ES Query 结构验证 (debug=True)")
  215 + print("="*80)
  216 +
  217 + payload = {
  218 + "query": "手机",
  219 + "size": 1,
  220 + "filters": {
  221 + "category1_name": "电子产品",
  222 + "specifications": {"name": "颜色", "value": "白色"}
  223 + },
  224 + "facets": [
  225 + {
  226 + "field": "category1_name",
  227 + "size": 5,
  228 + "multi_select": True
  229 + },
  230 + {
  231 + "field": "specifications.颜色",
  232 + "size": 5,
  233 + "multi_select": True
  234 + }
  235 + ],
  236 + "debug": True
  237 + }
  238 +
  239 + headers = {"X-Tenant-ID": TENANT_ID}
  240 +
  241 + try:
  242 + response = requests.post(API_URL, json=payload, headers=headers)
  243 + response.raise_for_status()
  244 + result = response.json()
  245 +
  246 + print(f"\n✓ 请求成功")
  247 +
  248 + if result.get('debug_info') and result['debug_info'].get('es_query'):
  249 + es_query = result['debug_info']['es_query']
  250 +
  251 + # 检查 post_filter
  252 + if 'post_filter' in es_query:
  253 + print("\n ✓ ES Query 包含 post_filter:")
  254 + print(f" {json.dumps(es_query['post_filter'], indent=4, ensure_ascii=False)[:200]}...")
  255 + else:
  256 + print("\n ℹ ES Query 不包含 post_filter(可能没有 multi-select 过滤器)")
  257 +
  258 + # 检查 query.bool.filter
  259 + if 'query' in es_query and 'bool' in es_query['query']:
  260 + filters = es_query['query']['bool'].get('filter', [])
  261 + print(f"\n ✓ Query filters 数量: {len(filters)}")
  262 +
  263 + else:
  264 + print("\n ⚠ 警告: debug_info 中没有 es_query")
  265 +
  266 + except requests.exceptions.RequestException as e:
  267 + print(f"\n✗ 请求失败: {e}")
  268 + except Exception as e:
  269 + print(f"\n✗ 错误: {e}")
  270 +
  271 +
  272 +if __name__ == "__main__":
  273 + print("\n" + "="*80)
  274 + print("Multi-Select Faceting 功能测试")
  275 + print("="*80)
  276 + print(f"\nAPI URL: {API_URL}")
  277 + print(f"Tenant ID: {TENANT_ID}")
  278 +
  279 + # 运行测试
  280 + test_standard_faceting()
  281 + test_multi_select_faceting()
  282 + test_specifications_multi_select()
  283 + test_es_query_structure()
  284 +
  285 + print("\n" + "="*80)
  286 + print("测试完成")
  287 + print("="*80)
  288 +
... ...