diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.0.md b/.cursor/plans/api-interface-analysis-42918612.plan.0.md
new file mode 100644
index 0000000..688cc5d
--- /dev/null
+++ b/.cursor/plans/api-interface-analysis-42918612.plan.0.md
@@ -0,0 +1,1222 @@
+
+# 搜索引擎 API 接口重构实施计划
+
+## 第一部分:现状分析
+
+### 1. 当前实现存在的问题
+
+#### 问题 1:硬编码的价格范围过滤
+
+**位置**:`search/es_query_builder.py` 第 205-233 行
+
+**问题描述**:
+
+```python
+if field == 'price_ranges':
+ # 硬编码特定字符串值
+ if price_range == '0-50':
+ price_ranges.append({"lt": 50})
+ elif price_range == '50-100':
+ price_ranges.append({"gte": 50, "lt": 100})
+ # ...
+```
+
+**影响**:
+
+- 只支持 `price` 字段,无法扩展到其他数值字段
+- 范围值硬编码,无法根据业务需求调整
+- 不符合 SaaS 系统的通用性要求
+
+#### 问题 2:聚合参数直接暴露 ES DSL
+
+**位置**:
+
+- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]`
+- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations`
+- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL
+
+**问题描述**:
+
+前端需要了解 Elasticsearch 的聚合语法:
+
+```javascript
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ },
+ "price_ranges": {
+ "range": {
+ "field": "price",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ // ...
+ ]
+ }
+ }
+};
+```
+
+**影响**:
+
+- 前端需要了解 ES 语法,增加集成难度
+- 不符合 SaaS 产品易用性原则
+- 难以进行参数验证和文档生成
+
+#### 问题 3:分面搜索结果格式不统一
+
+**位置**:`frontend/static/js/app.js` 第 208-258 行
+
+**问题描述**:
+
+- 直接返回 ES 原始格式(`buckets` 结构)
+- 前端需要知道不同聚合类型的响应结构
+- 没有统一的分面结果模型
+
+**影响**:
+
+- 前端解析逻辑复杂
+- 不同类型的聚合处理方式不一致
+- 难以扩展新的聚合类型
+
+#### 问题 4:缺少搜索建议功能
+
+**当前状态**:完全没有实现
+
+**需求**:
+
+- 自动补全(Autocomplete)
+- 搜索建议(Suggestions)
+- 搜索即时反馈(Instant Search)
+
+### 2. 依赖关系分析
+
+**影响范围**:
+
+1. **后端模型层**:`api/models.py`
+2. **查询构建层**:`search/es_query_builder.py`
+3. **搜索执行层**:`search/searcher.py`
+4. **API 路由层**:`api/routes/search.py`
+5. **前端代码**:`frontend/static/js/app.js`
+6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py`
+
+## 第二部分:优化方案设计
+
+### 方案概述
+
+采用**结构化过滤参数方案(方案 A 的简化版)**:
+
+- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤)
+- **不支持单字段多个不连续范围**,简化设计
+- 标准化聚合参数,使用简化的接口
+- 统一分面搜索响应格式
+
+### 1. 新的请求模型设计
+
+#### 1.1 核心模型定义
+
+**文件**:`api/models.py`
+
+```python
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union, Literal
+
+
+class RangeFilter(BaseModel):
+ """数值范围过滤器"""
+ gte: Optional[float] = Field(None, description="大于等于 (>=)")
+ gt: Optional[float] = Field(None, description="大于 (>)")
+ lte: Optional[float] = Field(None, description="小于等于 (<=)")
+ lt: Optional[float] = Field(None, description="小于 (<)")
+
+ @field_validator('*')
+ def check_at_least_one(cls, v, info):
+ """确保至少指定一个边界"""
+ values = info.data
+ if not any([values.get('gte'), values.get('gt'),
+ values.get('lte'), values.get('lt')]):
+ raise ValueError('至少需要指定一个范围边界')
+ return v
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {"gte": 50, "lte": 200},
+ {"gt": 100},
+ {"lt": 50}
+ ]
+ }
+
+
+class FacetConfig(BaseModel):
+ """分面配置(简化版)"""
+ field: str = Field(..., description="分面字段名")
+ size: int = Field(10, ge=1, le=100, description="返回的分面值数量")
+ type: Literal["terms", "range"] = Field("terms", description="分面类型")
+ ranges: Optional[List[Dict[str, Any]]] = Field(
+ None,
+ description="范围分面的范围定义(仅当 type='range' 时需要)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "size": 4,
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+ }
+
+
+class SearchRequest(BaseModel):
+ """搜索请求模型(重构版)"""
+
+ # 基础搜索参数
+ query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
+
+ # 过滤器 - 精确匹配和多值匹配
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field(
+ None,
+ description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)",
+ json_schema_extra={
+ "examples": [
+ {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "brandName_keyword": "乐高",
+ "in_stock": True
+ }
+ ]
+ }
+ )
+
+ # 范围过滤器 - 数值范围
+ range_filters: Optional[Dict[str, RangeFilter]] = Field(
+ None,
+ description="数值范围过滤器。支持 gte, gt, lte, lt 操作符",
+ json_schema_extra={
+ "examples": [
+ {
+ "price": {"gte": 50, "lte": 200},
+ "days_since_last_update": {"lte": 30}
+ }
+ ]
+ }
+ )
+
+ # 排序
+ sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')")
+ sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)")
+
+ # 分面搜索 - 简化接口
+ facets: Optional[List[Union[str, FacetConfig]]] = Field(
+ None,
+ description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象",
+ json_schema_extra={
+ "examples": [
+ # 简单模式:只指定字段名,使用默认配置
+ ["categoryName_keyword", "brandName_keyword"],
+ # 高级模式:详细配置
+ [
+ {"field": "categoryName_keyword", "size": 15},
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ ]
+ }
+ )
+
+ # 高级选项
+ min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值")
+ highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)")
+ debug: bool = Field(False, description="是否返回调试信息")
+
+ # 个性化参数(预留)
+ user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐")
+ session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析")
+
+
+class ImageSearchRequest(BaseModel):
+ """图片搜索请求模型"""
+ image_url: str = Field(..., description="查询图片的 URL")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+
+
+class SearchSuggestRequest(BaseModel):
+ """搜索建议请求模型(框架,暂不实现)"""
+ query: str = Field(..., min_length=1, description="搜索查询字符串")
+ size: int = Field(5, ge=1, le=20, description="返回建议数量")
+ types: List[Literal["query", "product", "category", "brand"]] = Field(
+ ["query"],
+ description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)"
+ )
+```
+
+#### 1.2 响应模型定义
+
+```python
+class FacetValue(BaseModel):
+ """分面值"""
+ value: Union[str, int, float] = Field(..., description="分面值")
+ label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)")
+ count: int = Field(..., description="匹配的文档数量")
+ selected: bool = Field(False, description="是否已选中(当前过滤器中)")
+
+
+class FacetResult(BaseModel):
+ """分面结果(标准化格式)"""
+ field: str = Field(..., description="字段名")
+ label: str = Field(..., description="分面显示名称")
+ type: Literal["terms", "range"] = Field(..., description="分面类型")
+ values: List[FacetValue] = Field(..., description="分面值列表")
+ total_count: Optional[int] = Field(None, description="该字段的总文档数")
+
+
+class SearchResponse(BaseModel):
+ """搜索响应模型(重构版)"""
+
+ # 核心结果
+ hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表")
+ total: int = Field(..., description="匹配的总文档数")
+ max_score: float = Field(..., description="最高相关性分数")
+
+ # 分面搜索结果(标准化格式)
+ facets: Optional[List[FacetResult]] = Field(
+ None,
+ description="分面统计结果(标准化格式)"
+ )
+
+ # 查询信息
+ query_info: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="查询处理信息(原始查询、改写、语言检测、翻译等)"
+ )
+
+ # 推荐与建议(预留)
+ related_queries: Optional[List[str]] = Field(None, description="相关搜索查询")
+
+ # 性能指标
+ took_ms: int = Field(..., description="搜索总耗时(毫秒)")
+ performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息")
+
+ # 调试信息
+ debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)")
+
+
+class SearchSuggestResponse(BaseModel):
+ """搜索建议响应模型(框架,暂不实现)"""
+ query: str = Field(..., description="原始查询")
+ suggestions: List[Dict[str, Any]] = Field(..., description="建议列表")
+ took_ms: int = Field(..., description="耗时(毫秒)")
+```
+
+### 2. 查询构建器重构
+
+#### 2.1 移除硬编码的 price_ranges 逻辑
+
+**文件**:`search/es_query_builder.py`
+
+**需要修改的方法**:`_build_filters(self, filters, range_filters)`
+
+**改进点**:
+
+1. 移除 `if field == 'price_ranges'` 的特殊处理
+2. 分离 filters 和 range_filters 的处理逻辑
+3. 添加字段类型验证(利用配置系统)
+
+**新的实现逻辑**:
+
+```python
+def _build_filters(
+ self,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None
+) -> List[Dict[str, Any]]:
+ """
+ 构建过滤子句(重构版)。
+
+ Args:
+ filters: 精确匹配过滤器字典
+ range_filters: 范围过滤器字典
+
+ Returns:
+ ES filter子句列表
+ """
+ filter_clauses = []
+
+ # 1. 处理精确匹配过滤
+ if filters:
+ for field, value in filters.items():
+ if isinstance(value, list):
+ # 多值匹配(OR)
+ filter_clauses.append({
+ "terms": {field: value}
+ })
+ else:
+ # 单值精确匹配
+ filter_clauses.append({
+ "term": {field: value}
+ })
+
+ # 2. 处理范围过滤
+ if range_filters:
+ for field, range_spec in range_filters.items():
+ # 验证字段是否为数值类型(可选,基于配置)
+ # TODO: 添加字段类型验证
+
+ # 构建范围查询
+ range_conditions = {}
+ if isinstance(range_spec, dict):
+ for op in ['gte', 'gt', 'lte', 'lt']:
+ if op in range_spec and range_spec[op] is not None:
+ range_conditions[op] = range_spec[op]
+
+ if range_conditions:
+ filter_clauses.append({
+ "range": {field: range_conditions}
+ })
+
+ return filter_clauses
+```
+
+#### 2.2 优化聚合参数接口
+
+**新增方法**:`build_facets(self, facet_configs)`
+
+**改进点**:
+
+1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL)
+2. 重构 `add_aggregations` 为更通用的 `build_facets`
+3. 支持简化配置和高级配置两种模式
+
+**新的实现逻辑**:
+
+```python
+def build_facets(
+ self,
+ facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None
+) -> Dict[str, Any]:
+ """
+ 构建分面聚合(重构版)。
+
+ Args:
+ facet_configs: 分面配置列表。可以是:
+ - 字符串列表:字段名,使用默认配置
+ - 配置对象列表:详细的分面配置
+
+ Returns:
+ ES aggregations字典
+ """
+ if not facet_configs:
+ return {}
+
+ aggs = {}
+
+ for config in facet_configs:
+ # 1. 简单模式:只有字段名
+ if isinstance(config, str):
+ field = config
+ agg_name = f"{field}_facet"
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": 10, # 默认大小
+ "order": {"_count": "desc"}
+ }
+ }
+
+ # 2. 高级模式:详细配置对象
+ elif isinstance(config, dict):
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+ size = config.get('size', 10)
+ agg_name = f"{field}_facet"
+
+ if facet_type == 'terms':
+ # Terms 聚合(分组统计)
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": size,
+ "order": {"_count": "desc"}
+ }
+ }
+
+ elif facet_type == 'range':
+ # Range 聚合(范围统计)
+ ranges = config.get('ranges', [])
+ if ranges:
+ aggs[agg_name] = {
+ "range": {
+ "field": field,
+ "ranges": ranges
+ }
+ }
+
+ return aggs
+```
+
+#### 2.3 更新主查询构建方法
+
+**修改方法签名**:`build_query()`
+
+```python
+def build_query(
+ self,
+ query_text: str,
+ query_vector: Optional[np.ndarray] = None,
+ query_node: Optional[QueryNode] = None,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ size: int = 10,
+ from_: int = 0,
+ enable_knn: bool = True,
+ knn_k: int = 50,
+ knn_num_candidates: int = 200,
+ min_score: Optional[float] = None
+) -> Dict[str, Any]:
+ """构建完整的 ES 查询(重构版)"""
+ # ... 实现
+
+ # 添加过滤器
+ if filters or range_filters:
+ filter_clauses = self._build_filters(filters, range_filters)
+ if filter_clauses:
+ es_query["query"] = {
+ "bool": {
+ "must": [query_clause],
+ "filter": filter_clauses
+ }
+ }
+```
+
+### 3. 搜索执行层重构
+
+**文件**:`search/searcher.py`
+
+**需要修改的方法**:`search()`
+
+**改进点**:
+
+1. 更新方法签名,接受 `range_filters` 参数
+2. 使用新的 `build_facets` 方法替代旧的聚合逻辑
+3. 标准化分面搜索结果
+
+**关键代码片段**:
+
+```python
+def search(
+ self,
+ query: str,
+ size: int = 10,
+ from_: int = 0,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations
+ min_score: Optional[float] = None,
+ sort_by: Optional[str] = None,
+ sort_order: Optional[str] = "desc",
+ debug: bool = False,
+ context: Optional[RequestContext] = None
+) -> SearchResult:
+ """执行搜索(重构版)"""
+
+ # ... 查询解析 ...
+
+ # 构建 ES 查询
+ es_query = self.query_builder.build_multilang_query(
+ parsed_query=parsed_query,
+ query_vector=parsed_query.query_vector,
+ query_node=query_node,
+ filters=filters,
+ range_filters=range_filters, # 新增
+ size=size,
+ from_=from_,
+ enable_knn=enable_embedding,
+ min_score=min_score
+ )
+
+ # 添加分面聚合
+ if facets:
+ facet_aggs = self.query_builder.build_facets(facets)
+ if facet_aggs:
+ if "aggs" not in es_query:
+ es_query["aggs"] = {}
+ es_query["aggs"].update(facet_aggs)
+
+ # ... 执行搜索 ...
+
+ # 标准化分面结果
+ standardized_facets = self._standardize_facets(
+ es_response.get('aggregations', {}),
+ facets,
+ filters
+ )
+
+ return SearchResult(
+ hits=hits,
+ total=total_value,
+ max_score=max_score,
+ took_ms=int(total_duration),
+ facets=standardized_facets, # 标准化格式
+ query_info=parsed_query.to_dict(),
+ debug_info=debug_info
+ )
+```
+
+**新增辅助方法**:
+
+```python
+def _standardize_facets(
+ self,
+ es_aggregations: Dict[str, Any],
+ facet_configs: Optional[List[Union[str, Dict]]],
+ current_filters: Optional[Dict[str, Any]]
+) -> Optional[List[Dict[str, Any]]]:
+ """
+ 将 ES 聚合结果转换为标准化的分面格式。
+
+ Args:
+ es_aggregations: ES 原始聚合结果
+ facet_configs: 分面配置列表
+ current_filters: 当前应用的过滤器
+
+ Returns:
+ 标准化的分面结果列表
+ """
+ if not es_aggregations or not facet_configs:
+ return None
+
+ standardized_facets = []
+
+ for config in facet_configs:
+ # 解析配置
+ if isinstance(config, str):
+ field = config
+ facet_type = "terms"
+ else:
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+
+ agg_name = f"{field}_facet"
+
+ if agg_name not in es_aggregations:
+ continue
+
+ agg_result = es_aggregations[agg_name]
+
+ # 构建标准化分面结果
+ facet = {
+ "field": field,
+ "label": self._get_field_label(field), # 从配置获取
+ "type": facet_type,
+ "values": []
+ }
+
+ # 获取当前字段的选中值
+ selected_values = set()
+ if current_filters and field in current_filters:
+ filter_value = current_filters[field]
+ if isinstance(filter_value, list):
+ selected_values = set(filter_value)
+ else:
+ selected_values = {filter_value}
+
+ # 转换 buckets
+ if 'buckets' in agg_result:
+ for bucket in agg_result['buckets']:
+ value = bucket.get('key')
+ count = bucket.get('doc_count', 0)
+
+ facet['values'].append({
+ "value": value,
+ "label": str(value), # 可以从配置映射
+ "count": count,
+ "selected": value in selected_values
+ })
+
+ standardized_facets.append(facet)
+
+ return standardized_facets
+
+
+def _get_field_label(self, field: str) -> str:
+ """获取字段的显示标签"""
+ # 从配置中获取字段标签
+ for field_config in self.config.fields:
+ if field_config.name == field:
+ # 假设配置中有 label 字段
+ return getattr(field_config, 'label', field)
+ return field
+```
+
+### 4. API 路由层更新
+
+**文件**:`api/routes/search.py`
+
+**改进点**:
+
+1. 接受新的请求模型参数
+2. 添加搜索建议端点(框架)
+
+**新增端点**:
+
+```python
+@router.get("/suggestions", response_model=SearchSuggestResponse)
+async def search_suggestions(
+ q: str = Query(..., min_length=1, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="建议数量"),
+ types: str = Query("query", description="建议类型(逗号分隔)")
+):
+ """
+ 获取搜索建议(自动补全)。
+
+ 功能说明:
+ - 查询建议(query):基于历史搜索和热门搜索
+ - 商品建议(product):匹配的商品
+ - 类目建议(category):匹配的类目
+ - 品牌建议(brand):匹配的品牌
+
+ 注意:此功能暂未实现,仅返回框架响应。
+ """
+ import time
+ start_time = time.time()
+
+ # TODO: 实现搜索建议逻辑
+ # 1. 从搜索历史中获取建议
+ # 2. 从商品标题中匹配前缀
+ # 3. 从类目、品牌中匹配
+
+ # 临时返回空结果
+ suggestions = []
+
+ # 示例结构(暂不实现)
+ # suggestions = [
+ # {
+ # "text": "芭比娃娃",
+ # "type": "query",
+ # "highlight": "芭比娃娃",
+ # "popularity": 850
+ # }
+ # ]
+
+ took_ms = int((time.time() - start_time) * 1000)
+
+ return SearchSuggestResponse(
+ query=q,
+ suggestions=suggestions,
+ took_ms=took_ms
+ )
+
+
+@router.get("/instant", response_model=SearchResponse)
+async def instant_search(
+ q: str = Query(..., min_length=2, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="结果数量")
+):
+ """
+ 即时搜索(Instant Search)。
+
+ 功能说明:
+ - 边输入边搜索,无需点击搜索按钮
+ - 返回简化的搜索结果
+ - 性能优化:缓存、限流
+
+ 注意:此功能暂未实现,调用标准搜索接口。
+ """
+ # TODO: 优化即时搜索性能
+ # 1. 添加防抖/节流
+ # 2. 实现结果缓存
+ # 3. 简化返回字段
+
+ # 临时使用标准搜索接口
+ from api.app import get_searcher
+ searcher = get_searcher()
+
+ result = searcher.search(
+ query=q,
+ size=size,
+ from_=0
+ )
+
+ return SearchResponse(
+ hits=result.hits,
+ total=result.total,
+ max_score=result.max_score,
+ took_ms=result.took_ms,
+ query_info=result.query_info
+ )
+```
+
+### 5. 前端适配
+
+**文件**:`frontend/static/js/app.js`
+
+**需要修改的地方**:
+
+1. **聚合参数改用简化配置**(第 57-87 行):
+```javascript
+// 旧的方式(直接 ES DSL)
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ }
+};
+
+// 新的方式(简化配置)
+const facets = [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+];
+```
+
+2. **过滤器使用新格式**(第 103 行):
+```javascript
+// 旧的方式
+filters: {
+ "price_ranges": ["0-50", "50-100"] // 硬编码
+}
+
+// 新的方式
+filters: {
+ "categoryName_keyword": ["玩具"],
+ "in_stock": true
+},
+range_filters: {
+ "price": {"gte": 50, "lte": 100}
+}
+```
+
+3. **解析标准化的分面结果**(第 208-258 行):
+```javascript
+// 旧的方式(直接访问 ES 结构)
+if (aggregations.category_stats && aggregations.category_stats.buckets) {
+ aggregations.category_stats.buckets.forEach(bucket => {
+ // ...
+ });
+}
+
+// 新的方式(标准化格式)
+if (data.facets) {
+ data.facets.forEach(facet => {
+ if (facet.field === 'categoryName_keyword') {
+ facet.values.forEach(facetValue => {
+ const value = facetValue.value;
+ const count = facetValue.count;
+ const selected = facetValue.selected;
+ // ...
+ });
+ }
+ });
+}
+```
+
+
+### 6. 测试代码更新
+
+**文件**:`test_aggregation_api.py`
+
+**需要修改的地方**:
+
+1. 移除 `price_ranges` 硬编码测试(第 93 行)
+2. 使用新的 `range_filters` 格式
+3. 使用新的 `facets` 配置
+
+**新的测试代码**:
+
+```python
+def test_search_with_filters():
+ """测试新的过滤器格式"""
+ test_request = {
+ "query": "玩具",
+ "size": 5,
+ "filters": {
+ "categoryName_keyword": ["玩具"]
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 100}
+ }
+ }
+ # ...
+
+def test_search_with_facets():
+ """测试新的分面配置"""
+ test_request = {
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ }
+ # ...
+```
+
+## 第三部分:实施步骤
+
+### 阶段 1:后端模型层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/models.py`
+ - [ ] 定义 `RangeFilter` 模型
+ - [ ] 定义 `FacetConfig` 模型
+ - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets`
+ - [ ] 移除 `aggregations` 参数
+ - [ ] 定义 `FacetValue` 和 `FacetResult` 模型
+ - [ ] 更新 `SearchResponse`,使用标准化分面格式
+ - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架)
+
+**验证方式**:
+
+- 运行 Pydantic 模型验证
+- 检查 API 文档(`/docs`)是否正确生成
+
+### 阶段 2:查询构建器重构(高优先级)
+
+**任务清单**:
+
+- [ ] 重构 `search/es_query_builder.py`
+ - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行)
+ - [ ] 重构 `_build_filters` 方法,支持 `range_filters`
+ - [ ] 移除 `add_dynamic_aggregations` 方法
+ - [ ] 重构 `add_aggregations` 为 `build_facets`
+ - [ ] 更新 `build_query` 方法签名
+- [ ] 更新 `search/multilang_query_builder.py`(如果需要)
+
+**验证方式**:
+
+- 编写单元测试验证过滤器构建逻辑
+- 打印生成的 ES DSL,检查正确性
+
+### 阶段 3:搜索执行层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `search/searcher.py`
+ - [ ] 更新 `search()` 方法签名
+ - [ ] 使用新的 `build_facets` 方法
+ - [ ] 实现 `_standardize_facets()` 辅助方法
+ - [ ] 实现 `_get_field_label()` 辅助方法
+ - [ ] 更新 `SearchResult` 类,使用标准化分面格式
+
+**验证方式**:
+
+- 编写集成测试
+- 手动测试搜索功能
+
+### 阶段 4:API 路由层更新(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/routes/search.py`
+ - [ ] 更新 `/search/` 端点,接受新的请求参数
+ - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果)
+ - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索)
+ - [ ] 添加端点文档和示例
+
+**验证方式**:
+
+- 使用 Swagger UI 测试端点
+- 检查 API 文档完整性
+
+### 阶段 5:前端适配(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `frontend/static/js/app.js`
+ - [ ] 修改聚合参数为 `facets` 简化配置
+ - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters`
+ - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果
+ - [ ] 添加范围过滤器 UI(如价格滑块)
+ - [ ] 移除硬编码的 `price_ranges`
+
+**验证方式**:
+
+- 浏览器测试前端功能
+- 检查网络请求和响应格式
+
+### 阶段 6:测试代码更新(低优先级)
+
+**任务清单**:
+
+- [ ] 更新 `test_aggregation_api.py`
+ - [ ] 移除 `price_ranges` 测试
+ - [ ] 添加 `range_filters` 测试
+ - [ ] 添加新的 `facets` 测试
+- [ ] 更新 `test_complete_search.py`
+- [ ] 更新 `tests/integration/test_aggregation_api.py`
+- [ ] 更新 `tests/unit/test_searcher.py`
+
+**验证方式**:
+
+- 运行所有测试,确保通过
+- 检查测试覆盖率
+
+### 阶段 7:文档更新(低优先级)
+
+**任务清单**:
+
+- [ ] 撰写完整的 API 接口文档
+- [ ] 更新 `README.md`
+- [ ] 更新 `USER_GUIDE.md`
+- [ ] 添加接口使用示例
+- [ ] 添加迁移指南(旧接口 → 新接口)
+
+## 第四部分:API 使用示例
+
+### 示例 1:简单搜索
+
+```bash
+POST /search/
+{
+ "query": "芭比娃娃",
+ "size": 20
+}
+```
+
+### 示例 2:带过滤器的搜索
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "filters": {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "in_stock": true
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 200}
+ }
+}
+```
+
+### 示例 3:带分面搜索的请求
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+}
+```
+
+**响应示例**(标准化分面格式):
+
+```json
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "label": "商品类目",
+ "type": "terms",
+ "values": [
+ {"value": "玩具", "label": "玩具", "count": 85, "selected": false},
+ {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false}
+ ]
+ },
+ {
+ "field": "price",
+ "label": "价格区间",
+ "type": "range",
+ "values": [
+ {"value": "0-50", "label": "0-50元", "count": 23, "selected": false},
+ {"value": "50-100", "label": "50-100元", "count": 45, "selected": false},
+ {"value": "100-200", "label": "100-200元", "count": 38, "selected": false},
+ {"value": "200+", "label": "200元以上", "count": 12, "selected": false}
+ ]
+ }
+ ]
+}
+```
+
+### 示例 4:搜索建议(框架)
+
+```bash
+GET /search/suggestions?q=芭&size=5
+
+{
+ "query": "芭",
+ "suggestions": [
+ {
+ "text": "芭比娃娃",
+ "type": "query",
+ "highlight": "芭比娃娃",
+ "popularity": 850
+ },
+ {
+ "text": "芭比娃娃屋",
+ "type": "query",
+ "highlight": "芭比娃娃屋",
+ "popularity": 320
+ }
+ ],
+ "took_ms": 5
+}
+```
+
+## 第五部分:向后兼容性
+
+### 兼容策略
+
+为保持向后兼容,在过渡期(1-2 个版本)内:
+
+1. **同时支持旧参数和新参数**:
+```python
+class SearchRequest(BaseModel):
+ # 新参数
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+ facets: Optional[List[Union[str, FacetConfig]]] = None
+
+ # 旧参数(标记为废弃)
+ aggregations: Optional[Dict[str, Any]] = Field(
+ None,
+ deprecated=True,
+ description="已废弃。请使用 'facets' 参数"
+ )
+```
+
+2. **在后端自动转换旧格式**:
+```python
+# 在 searcher.py 中
+if request.aggregations and not request.facets:
+ # 将旧的 aggregations 转换为新的 facets
+ request.facets = self._convert_legacy_aggregations(request.aggregations)
+```
+
+3. **在响应中提供迁移提示**:
+```python
+if request.aggregations:
+ warnings.append({
+ "type": "deprecation",
+ "message": "'aggregations' 参数已废弃,请使用 'facets' 参数",
+ "migration_guide": "https://docs.example.com/migration"
+ })
+```
+
+
+### 迁移时间线
+
+- **v3.0**(当前版本):发布新接口,旧接口标记为废弃
+- **v3.1**(1 个月后):移除旧接口的自动转换
+- **v4.0**(3 个月后):完全移除旧接口
+
+## 第六部分:风险评估与缓解
+
+### 风险点
+
+1. **破坏性变更风险**:
+
+ - 风险:现有客户代码可能依赖旧接口
+ - 缓解:提供向后兼容层,发布详细迁移指南
+
+2. **性能影响风险**:
+
+ - 风险:新的标准化处理可能增加延迟
+ - 缓解:添加性能测试,优化关键路径
+
+3. **测试覆盖不足风险**:
+
+ - 风险:重构可能引入新 bug
+ - 缓解:全面的单元测试和集成测试
+
+### 验收标准
+
+- [ ] 所有单元测试通过
+- [ ] 所有集成测试通过
+- [ ] API 文档完整且准确
+- [ ] 性能无明显下降(< 10% 延迟增加)
+- [ ] 前端功能正常工作
+- [ ] 提供完整的迁移指南
+
+## 总结
+
+本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括:
+
+1. ✅ 移除硬编码的 price_ranges 逻辑
+2. ✅ 实现结构化的过滤参数(filters + range_filters)
+3. ✅ 简化聚合参数接口,不暴露 ES DSL
+4. ✅ 标准化分面搜索响应格式
+5. ✅ 添加搜索建议功能框架(暂不实现)
+
+通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。
\ No newline at end of file
diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.1.md b/.cursor/plans/api-interface-analysis-42918612.plan.1.md
new file mode 100644
index 0000000..88164b3
--- /dev/null
+++ b/.cursor/plans/api-interface-analysis-42918612.plan.1.md
@@ -0,0 +1,1222 @@
+
+# 搜索引擎 API 接口重构实施计划
+
+## 第一部分:现状分析
+
+### 1. 当前实现存在的问题
+
+#### 问题 1:硬编码的价格范围过滤
+
+**位置**:`search/es_query_builder.py` 第 205-233 行
+
+**问题描述**:
+
+```python
+if field == 'price_ranges':
+ # 硬编码特定字符串值
+ if price_range == '0-50':
+ price_ranges.append({"lt": 50})
+ elif price_range == '50-100':
+ price_ranges.append({"gte": 50, "lt": 100})
+ # ...
+```
+
+**影响**:
+
+- 只支持 `price` 字段,无法扩展到其他数值字段
+- 范围值硬编码,无法根据业务需求调整
+- 不符合 SaaS 系统的通用性要求
+
+#### 问题 2:聚合参数直接暴露 ES DSL
+
+**位置**:
+
+- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]`
+- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations`
+- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL
+
+**问题描述**:
+
+前端需要了解 Elasticsearch 的聚合语法:
+
+```javascript
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ },
+ "price_ranges": {
+ "range": {
+ "field": "price",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ // ...
+ ]
+ }
+ }
+};
+```
+
+**影响**:
+
+- 前端需要了解 ES 语法,增加集成难度
+- 不符合 SaaS 产品易用性原则
+- 难以进行参数验证和文档生成
+
+#### 问题 3:分面搜索结果格式不统一
+
+**位置**:`frontend/static/js/app.js` 第 208-258 行
+
+**问题描述**:
+
+- 直接返回 ES 原始格式(`buckets` 结构)
+- 前端需要知道不同聚合类型的响应结构
+- 没有统一的分面结果模型
+
+**影响**:
+
+- 前端解析逻辑复杂
+- 不同类型的聚合处理方式不一致
+- 难以扩展新的聚合类型
+
+#### 问题 4:缺少搜索建议功能
+
+**当前状态**:完全没有实现
+
+**需求**:
+
+- 自动补全(Autocomplete)
+- 搜索建议(Suggestions)
+- 搜索即时反馈(Instant Search)
+
+### 2. 依赖关系分析
+
+**影响范围**:
+
+1. **后端模型层**:`api/models.py`
+2. **查询构建层**:`search/es_query_builder.py`
+3. **搜索执行层**:`search/searcher.py`
+4. **API 路由层**:`api/routes/search.py`
+5. **前端代码**:`frontend/static/js/app.js`
+6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py`
+
+## 第二部分:优化方案设计
+
+### 方案概述
+
+采用**结构化过滤参数方案(方案 A 的简化版)**:
+
+- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤)
+- **不支持单字段多个不连续范围**,简化设计
+- 标准化聚合参数,使用简化的接口
+- 统一分面搜索响应格式
+
+### 1. 新的请求模型设计
+
+#### 1.1 核心模型定义
+
+**文件**:`api/models.py`
+
+```python
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union, Literal
+
+
+class RangeFilter(BaseModel):
+ """数值范围过滤器"""
+ gte: Optional[float] = Field(None, description="大于等于 (>=)")
+ gt: Optional[float] = Field(None, description="大于 (>)")
+ lte: Optional[float] = Field(None, description="小于等于 (<=)")
+ lt: Optional[float] = Field(None, description="小于 (<)")
+
+ @field_validator('*')
+ def check_at_least_one(cls, v, info):
+ """确保至少指定一个边界"""
+ values = info.data
+ if not any([values.get('gte'), values.get('gt'),
+ values.get('lte'), values.get('lt')]):
+ raise ValueError('至少需要指定一个范围边界')
+ return v
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {"gte": 50, "lte": 200},
+ {"gt": 100},
+ {"lt": 50}
+ ]
+ }
+
+
+class FacetConfig(BaseModel):
+ """分面配置(简化版)"""
+ field: str = Field(..., description="分面字段名")
+ size: int = Field(10, ge=1, le=100, description="返回的分面值数量")
+ type: Literal["terms", "range"] = Field("terms", description="分面类型")
+ ranges: Optional[List[Dict[str, Any]]] = Field(
+ None,
+ description="范围分面的范围定义(仅当 type='range' 时需要)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "size": 4,
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+ }
+
+
+class SearchRequest(BaseModel):
+ """搜索请求模型(重构版)"""
+
+ # 基础搜索参数
+ query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
+
+ # 过滤器 - 精确匹配和多值匹配
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field(
+ None,
+ description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)",
+ json_schema_extra={
+ "examples": [
+ {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "brandName_keyword": "乐高",
+ "in_stock": True
+ }
+ ]
+ }
+ )
+
+ # 范围过滤器 - 数值范围
+ range_filters: Optional[Dict[str, RangeFilter]] = Field(
+ None,
+ description="数值范围过滤器。支持 gte, gt, lte, lt 操作符",
+ json_schema_extra={
+ "examples": [
+ {
+ "price": {"gte": 50, "lte": 200},
+ "days_since_last_update": {"lte": 30}
+ }
+ ]
+ }
+ )
+
+ # 排序
+ sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')")
+ sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)")
+
+ # 分面搜索 - 简化接口
+ facets: Optional[List[Union[str, FacetConfig]]] = Field(
+ None,
+ description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象",
+ json_schema_extra={
+ "examples": [
+ # 简单模式:只指定字段名,使用默认配置
+ ["categoryName_keyword", "brandName_keyword"],
+ # 高级模式:详细配置
+ [
+ {"field": "categoryName_keyword", "size": 15},
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ ]
+ }
+ )
+
+ # 高级选项
+ min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值")
+ highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)")
+ debug: bool = Field(False, description="是否返回调试信息")
+
+ # 个性化参数(预留)
+ user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐")
+ session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析")
+
+
+class ImageSearchRequest(BaseModel):
+ """图片搜索请求模型"""
+ image_url: str = Field(..., description="查询图片的 URL")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+
+
+class SearchSuggestRequest(BaseModel):
+ """搜索建议请求模型(框架,暂不实现)"""
+ query: str = Field(..., min_length=1, description="搜索查询字符串")
+ size: int = Field(5, ge=1, le=20, description="返回建议数量")
+ types: List[Literal["query", "product", "category", "brand"]] = Field(
+ ["query"],
+ description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)"
+ )
+```
+
+#### 1.2 响应模型定义
+
+```python
+class FacetValue(BaseModel):
+ """分面值"""
+ value: Union[str, int, float] = Field(..., description="分面值")
+ label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)")
+ count: int = Field(..., description="匹配的文档数量")
+ selected: bool = Field(False, description="是否已选中(当前过滤器中)")
+
+
+class FacetResult(BaseModel):
+ """分面结果(标准化格式)"""
+ field: str = Field(..., description="字段名")
+ label: str = Field(..., description="分面显示名称")
+ type: Literal["terms", "range"] = Field(..., description="分面类型")
+ values: List[FacetValue] = Field(..., description="分面值列表")
+ total_count: Optional[int] = Field(None, description="该字段的总文档数")
+
+
+class SearchResponse(BaseModel):
+ """搜索响应模型(重构版)"""
+
+ # 核心结果
+ hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表")
+ total: int = Field(..., description="匹配的总文档数")
+ max_score: float = Field(..., description="最高相关性分数")
+
+ # 分面搜索结果(标准化格式)
+ facets: Optional[List[FacetResult]] = Field(
+ None,
+ description="分面统计结果(标准化格式)"
+ )
+
+ # 查询信息
+ query_info: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="查询处理信息(原始查询、改写、语言检测、翻译等)"
+ )
+
+ # 推荐与建议(预留)
+ related_queries: Optional[List[str]] = Field(None, description="相关搜索查询")
+
+ # 性能指标
+ took_ms: int = Field(..., description="搜索总耗时(毫秒)")
+ performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息")
+
+ # 调试信息
+ debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)")
+
+
+class SearchSuggestResponse(BaseModel):
+ """搜索建议响应模型(框架,暂不实现)"""
+ query: str = Field(..., description="原始查询")
+ suggestions: List[Dict[str, Any]] = Field(..., description="建议列表")
+ took_ms: int = Field(..., description="耗时(毫秒)")
+```
+
+### 2. 查询构建器重构
+
+#### 2.1 移除硬编码的 price_ranges 逻辑
+
+**文件**:`search/es_query_builder.py`
+
+**需要修改的方法**:`_build_filters(self, filters, range_filters)`
+
+**改进点**:
+
+1. 移除 `if field == 'price_ranges'` 的特殊处理
+2. 分离 filters 和 range_filters 的处理逻辑
+3. 添加字段类型验证(利用配置系统)
+
+**新的实现逻辑**:
+
+```python
+def _build_filters(
+ self,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None
+) -> List[Dict[str, Any]]:
+ """
+ 构建过滤子句(重构版)。
+
+ Args:
+ filters: 精确匹配过滤器字典
+ range_filters: 范围过滤器字典
+
+ Returns:
+ ES filter子句列表
+ """
+ filter_clauses = []
+
+ # 1. 处理精确匹配过滤
+ if filters:
+ for field, value in filters.items():
+ if isinstance(value, list):
+ # 多值匹配(OR)
+ filter_clauses.append({
+ "terms": {field: value}
+ })
+ else:
+ # 单值精确匹配
+ filter_clauses.append({
+ "term": {field: value}
+ })
+
+ # 2. 处理范围过滤
+ if range_filters:
+ for field, range_spec in range_filters.items():
+ # 验证字段是否为数值类型(可选,基于配置)
+ # TODO: 添加字段类型验证
+
+ # 构建范围查询
+ range_conditions = {}
+ if isinstance(range_spec, dict):
+ for op in ['gte', 'gt', 'lte', 'lt']:
+ if op in range_spec and range_spec[op] is not None:
+ range_conditions[op] = range_spec[op]
+
+ if range_conditions:
+ filter_clauses.append({
+ "range": {field: range_conditions}
+ })
+
+ return filter_clauses
+```
+
+#### 2.2 优化聚合参数接口
+
+**新增方法**:`build_facets(self, facet_configs)`
+
+**改进点**:
+
+1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL)
+2. 重构 `add_aggregations` 为更通用的 `build_facets`
+3. 支持简化配置和高级配置两种模式
+
+**新的实现逻辑**:
+
+```python
+def build_facets(
+ self,
+ facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None
+) -> Dict[str, Any]:
+ """
+ 构建分面聚合(重构版)。
+
+ Args:
+ facet_configs: 分面配置列表。可以是:
+ - 字符串列表:字段名,使用默认配置
+ - 配置对象列表:详细的分面配置
+
+ Returns:
+ ES aggregations字典
+ """
+ if not facet_configs:
+ return {}
+
+ aggs = {}
+
+ for config in facet_configs:
+ # 1. 简单模式:只有字段名
+ if isinstance(config, str):
+ field = config
+ agg_name = f"{field}_facet"
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": 10, # 默认大小
+ "order": {"_count": "desc"}
+ }
+ }
+
+ # 2. 高级模式:详细配置对象
+ elif isinstance(config, dict):
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+ size = config.get('size', 10)
+ agg_name = f"{field}_facet"
+
+ if facet_type == 'terms':
+ # Terms 聚合(分组统计)
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": size,
+ "order": {"_count": "desc"}
+ }
+ }
+
+ elif facet_type == 'range':
+ # Range 聚合(范围统计)
+ ranges = config.get('ranges', [])
+ if ranges:
+ aggs[agg_name] = {
+ "range": {
+ "field": field,
+ "ranges": ranges
+ }
+ }
+
+ return aggs
+```
+
+#### 2.3 更新主查询构建方法
+
+**修改方法签名**:`build_query()`
+
+```python
+def build_query(
+ self,
+ query_text: str,
+ query_vector: Optional[np.ndarray] = None,
+ query_node: Optional[QueryNode] = None,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ size: int = 10,
+ from_: int = 0,
+ enable_knn: bool = True,
+ knn_k: int = 50,
+ knn_num_candidates: int = 200,
+ min_score: Optional[float] = None
+) -> Dict[str, Any]:
+ """构建完整的 ES 查询(重构版)"""
+ # ... 实现
+
+ # 添加过滤器
+ if filters or range_filters:
+ filter_clauses = self._build_filters(filters, range_filters)
+ if filter_clauses:
+ es_query["query"] = {
+ "bool": {
+ "must": [query_clause],
+ "filter": filter_clauses
+ }
+ }
+```
+
+### 3. 搜索执行层重构
+
+**文件**:`search/searcher.py`
+
+**需要修改的方法**:`search()`
+
+**改进点**:
+
+1. 更新方法签名,接受 `range_filters` 参数
+2. 使用新的 `build_facets` 方法替代旧的聚合逻辑
+3. 标准化分面搜索结果
+
+**关键代码片段**:
+
+```python
+def search(
+ self,
+ query: str,
+ size: int = 10,
+ from_: int = 0,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations
+ min_score: Optional[float] = None,
+ sort_by: Optional[str] = None,
+ sort_order: Optional[str] = "desc",
+ debug: bool = False,
+ context: Optional[RequestContext] = None
+) -> SearchResult:
+ """执行搜索(重构版)"""
+
+ # ... 查询解析 ...
+
+ # 构建 ES 查询
+ es_query = self.query_builder.build_multilang_query(
+ parsed_query=parsed_query,
+ query_vector=parsed_query.query_vector,
+ query_node=query_node,
+ filters=filters,
+ range_filters=range_filters, # 新增
+ size=size,
+ from_=from_,
+ enable_knn=enable_embedding,
+ min_score=min_score
+ )
+
+ # 添加分面聚合
+ if facets:
+ facet_aggs = self.query_builder.build_facets(facets)
+ if facet_aggs:
+ if "aggs" not in es_query:
+ es_query["aggs"] = {}
+ es_query["aggs"].update(facet_aggs)
+
+ # ... 执行搜索 ...
+
+ # 标准化分面结果
+ standardized_facets = self._standardize_facets(
+ es_response.get('aggregations', {}),
+ facets,
+ filters
+ )
+
+ return SearchResult(
+ hits=hits,
+ total=total_value,
+ max_score=max_score,
+ took_ms=int(total_duration),
+ facets=standardized_facets, # 标准化格式
+ query_info=parsed_query.to_dict(),
+ debug_info=debug_info
+ )
+```
+
+**新增辅助方法**:
+
+```python
+def _standardize_facets(
+ self,
+ es_aggregations: Dict[str, Any],
+ facet_configs: Optional[List[Union[str, Dict]]],
+ current_filters: Optional[Dict[str, Any]]
+) -> Optional[List[Dict[str, Any]]]:
+ """
+ 将 ES 聚合结果转换为标准化的分面格式。
+
+ Args:
+ es_aggregations: ES 原始聚合结果
+ facet_configs: 分面配置列表
+ current_filters: 当前应用的过滤器
+
+ Returns:
+ 标准化的分面结果列表
+ """
+ if not es_aggregations or not facet_configs:
+ return None
+
+ standardized_facets = []
+
+ for config in facet_configs:
+ # 解析配置
+ if isinstance(config, str):
+ field = config
+ facet_type = "terms"
+ else:
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+
+ agg_name = f"{field}_facet"
+
+ if agg_name not in es_aggregations:
+ continue
+
+ agg_result = es_aggregations[agg_name]
+
+ # 构建标准化分面结果
+ facet = {
+ "field": field,
+ "label": self._get_field_label(field), # 从配置获取
+ "type": facet_type,
+ "values": []
+ }
+
+ # 获取当前字段的选中值
+ selected_values = set()
+ if current_filters and field in current_filters:
+ filter_value = current_filters[field]
+ if isinstance(filter_value, list):
+ selected_values = set(filter_value)
+ else:
+ selected_values = {filter_value}
+
+ # 转换 buckets
+ if 'buckets' in agg_result:
+ for bucket in agg_result['buckets']:
+ value = bucket.get('key')
+ count = bucket.get('doc_count', 0)
+
+ facet['values'].append({
+ "value": value,
+ "label": str(value), # 可以从配置映射
+ "count": count,
+ "selected": value in selected_values
+ })
+
+ standardized_facets.append(facet)
+
+ return standardized_facets
+
+
+def _get_field_label(self, field: str) -> str:
+ """获取字段的显示标签"""
+ # 从配置中获取字段标签
+ for field_config in self.config.fields:
+ if field_config.name == field:
+ # 假设配置中有 label 字段
+ return getattr(field_config, 'label', field)
+ return field
+```
+
+### 4. API 路由层更新
+
+**文件**:`api/routes/search.py`
+
+**改进点**:
+
+1. 接受新的请求模型参数
+2. 添加搜索建议端点(框架)
+
+**新增端点**:
+
+```python
+@router.get("/suggestions", response_model=SearchSuggestResponse)
+async def search_suggestions(
+ q: str = Query(..., min_length=1, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="建议数量"),
+ types: str = Query("query", description="建议类型(逗号分隔)")
+):
+ """
+ 获取搜索建议(自动补全)。
+
+ 功能说明:
+ - 查询建议(query):基于历史搜索和热门搜索
+ - 商品建议(product):匹配的商品
+ - 类目建议(category):匹配的类目
+ - 品牌建议(brand):匹配的品牌
+
+ 注意:此功能暂未实现,仅返回框架响应。
+ """
+ import time
+ start_time = time.time()
+
+ # TODO: 实现搜索建议逻辑
+ # 1. 从搜索历史中获取建议
+ # 2. 从商品标题中匹配前缀
+ # 3. 从类目、品牌中匹配
+
+ # 临时返回空结果
+ suggestions = []
+
+ # 示例结构(暂不实现)
+ # suggestions = [
+ # {
+ # "text": "芭比娃娃",
+ # "type": "query",
+ # "highlight": "芭比娃娃",
+ # "popularity": 850
+ # }
+ # ]
+
+ took_ms = int((time.time() - start_time) * 1000)
+
+ return SearchSuggestResponse(
+ query=q,
+ suggestions=suggestions,
+ took_ms=took_ms
+ )
+
+
+@router.get("/instant", response_model=SearchResponse)
+async def instant_search(
+ q: str = Query(..., min_length=2, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="结果数量")
+):
+ """
+ 即时搜索(Instant Search)。
+
+ 功能说明:
+ - 边输入边搜索,无需点击搜索按钮
+ - 返回简化的搜索结果
+ - 性能优化:缓存、限流
+
+ 注意:此功能暂未实现,调用标准搜索接口。
+ """
+ # TODO: 优化即时搜索性能
+ # 1. 添加防抖/节流
+ # 2. 实现结果缓存
+ # 3. 简化返回字段
+
+ # 临时使用标准搜索接口
+ from api.app import get_searcher
+ searcher = get_searcher()
+
+ result = searcher.search(
+ query=q,
+ size=size,
+ from_=0
+ )
+
+ return SearchResponse(
+ hits=result.hits,
+ total=result.total,
+ max_score=result.max_score,
+ took_ms=result.took_ms,
+ query_info=result.query_info
+ )
+```
+
+### 5. 前端适配
+
+**文件**:`frontend/static/js/app.js`
+
+**需要修改的地方**:
+
+1. **聚合参数改用简化配置**(第 57-87 行):
+```javascript
+// 旧的方式(直接 ES DSL)
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ }
+};
+
+// 新的方式(简化配置)
+const facets = [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+];
+```
+
+2. **过滤器使用新格式**(第 103 行):
+```javascript
+// 旧的方式
+filters: {
+ "price_ranges": ["0-50", "50-100"] // 硬编码
+}
+
+// 新的方式
+filters: {
+ "categoryName_keyword": ["玩具"],
+ "in_stock": true
+},
+range_filters: {
+ "price": {"gte": 50, "lte": 100}
+}
+```
+
+3. **解析标准化的分面结果**(第 208-258 行):
+```javascript
+// 旧的方式(直接访问 ES 结构)
+if (aggregations.category_stats && aggregations.category_stats.buckets) {
+ aggregations.category_stats.buckets.forEach(bucket => {
+ // ...
+ });
+}
+
+// 新的方式(标准化格式)
+if (data.facets) {
+ data.facets.forEach(facet => {
+ if (facet.field === 'categoryName_keyword') {
+ facet.values.forEach(facetValue => {
+ const value = facetValue.value;
+ const count = facetValue.count;
+ const selected = facetValue.selected;
+ // ...
+ });
+ }
+ });
+}
+```
+
+
+### 6. 测试代码更新
+
+**文件**:`test_aggregation_api.py`
+
+**需要修改的地方**:
+
+1. 移除 `price_ranges` 硬编码测试(第 93 行)
+2. 使用新的 `range_filters` 格式
+3. 使用新的 `facets` 配置
+
+**新的测试代码**:
+
+```python
+def test_search_with_filters():
+ """测试新的过滤器格式"""
+ test_request = {
+ "query": "玩具",
+ "size": 5,
+ "filters": {
+ "categoryName_keyword": ["玩具"]
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 100}
+ }
+ }
+ # ...
+
+def test_search_with_facets():
+ """测试新的分面配置"""
+ test_request = {
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ }
+ # ...
+```
+
+## 第三部分:实施步骤
+
+### 阶段 1:后端模型层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/models.py`
+ - [ ] 定义 `RangeFilter` 模型
+ - [ ] 定义 `FacetConfig` 模型
+ - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets`
+ - [ ] 移除 `aggregations` 参数
+ - [ ] 定义 `FacetValue` 和 `FacetResult` 模型
+ - [ ] 更新 `SearchResponse`,使用标准化分面格式
+ - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架)
+
+**验证方式**:
+
+- 运行 Pydantic 模型验证
+- 检查 API 文档(`/docs`)是否正确生成
+
+### 阶段 2:查询构建器重构(高优先级)
+
+**任务清单**:
+
+- [ ] 重构 `search/es_query_builder.py`
+ - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行)
+ - [ ] 重构 `_build_filters` 方法,支持 `range_filters`
+ - [ ] 移除 `add_dynamic_aggregations` 方法
+ - [ ] 重构 `add_aggregations` 为 `build_facets`
+ - [ ] 更新 `build_query` 方法签名
+- [ ] 更新 `search/multilang_query_builder.py`(如果需要)
+
+**验证方式**:
+
+- 编写单元测试验证过滤器构建逻辑
+- 打印生成的 ES DSL,检查正确性
+
+### 阶段 3:搜索执行层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `search/searcher.py`
+ - [ ] 更新 `search()` 方法签名
+ - [ ] 使用新的 `build_facets` 方法
+ - [ ] 实现 `_standardize_facets()` 辅助方法
+ - [ ] 实现 `_get_field_label()` 辅助方法
+ - [ ] 更新 `SearchResult` 类,使用标准化分面格式
+
+**验证方式**:
+
+- 编写集成测试
+- 手动测试搜索功能
+
+### 阶段 4:API 路由层更新(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/routes/search.py`
+ - [ ] 更新 `/search/` 端点,接受新的请求参数
+ - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果)
+ - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索)
+ - [ ] 添加端点文档和示例
+
+**验证方式**:
+
+- 使用 Swagger UI 测试端点
+- 检查 API 文档完整性
+
+### 阶段 5:前端适配(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `frontend/static/js/app.js`
+ - [ ] 修改聚合参数为 `facets` 简化配置
+ - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters`
+ - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果
+ - [ ] 添加范围过滤器 UI(如价格滑块)
+ - [ ] 移除硬编码的 `price_ranges`
+
+**验证方式**:
+
+- 浏览器测试前端功能
+- 检查网络请求和响应格式
+
+### 阶段 6:测试代码更新(低优先级)
+
+**任务清单**:
+
+- [ ] 更新 `test_aggregation_api.py`
+ - [ ] 移除 `price_ranges` 测试
+ - [ ] 添加 `range_filters` 测试
+ - [ ] 添加新的 `facets` 测试
+- [ ] 更新 `test_complete_search.py`
+- [ ] 更新 `tests/integration/test_aggregation_api.py`
+- [ ] 更新 `tests/unit/test_searcher.py`
+
+**验证方式**:
+
+- 运行所有测试,确保通过
+- 检查测试覆盖率
+
+### 阶段 7:文档更新(低优先级)
+
+**任务清单**:
+
+- [ ] 撰写完整的 API 接口文档
+- [ ] 更新 `README.md`
+- [ ] 更新 `USER_GUIDE.md`
+- [ ] 添加接口使用示例
+- [ ] 添加迁移指南(旧接口 → 新接口)
+
+## 第四部分:API 使用示例
+
+### 示例 1:简单搜索
+
+```bash
+POST /search/
+{
+ "query": "芭比娃娃",
+ "size": 20
+}
+```
+
+### 示例 2:带过滤器的搜索
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "filters": {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "in_stock": true
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 200}
+ }
+}
+```
+
+### 示例 3:带分面搜索的请求
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+}
+```
+
+**响应示例**(标准化分面格式):
+
+```json
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "label": "商品类目",
+ "type": "terms",
+ "values": [
+ {"value": "玩具", "label": "玩具", "count": 85, "selected": false},
+ {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false}
+ ]
+ },
+ {
+ "field": "price",
+ "label": "价格区间",
+ "type": "range",
+ "values": [
+ {"value": "0-50", "label": "0-50元", "count": 23, "selected": false},
+ {"value": "50-100", "label": "50-100元", "count": 45, "selected": false},
+ {"value": "100-200", "label": "100-200元", "count": 38, "selected": false},
+ {"value": "200+", "label": "200元以上", "count": 12, "selected": false}
+ ]
+ }
+ ]
+}
+```
+
+### 示例 4:搜索建议(框架)
+
+```bash
+GET /search/suggestions?q=芭&size=5
+
+{
+ "query": "芭",
+ "suggestions": [
+ {
+ "text": "芭比娃娃",
+ "type": "query",
+ "highlight": "芭比娃娃",
+ "popularity": 850
+ },
+ {
+ "text": "芭比娃娃屋",
+ "type": "query",
+ "highlight": "芭比娃娃屋",
+ "popularity": 320
+ }
+ ],
+ "took_ms": 5
+}
+```
+
+## 第五部分:向后兼容性
+
+### 兼容策略
+
+为保持向后兼容,在过渡期(1-2 个版本)内:
+
+1. **同时支持旧参数和新参数**:
+```python
+class SearchRequest(BaseModel):
+ # 新参数
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+ facets: Optional[List[Union[str, FacetConfig]]] = None
+
+ # 旧参数(标记为废弃)
+ aggregations: Optional[Dict[str, Any]] = Field(
+ None,
+ deprecated=True,
+ description="已废弃。请使用 'facets' 参数"
+ )
+```
+
+2. **在后端自动转换旧格式**:
+```python
+# 在 searcher.py 中
+if request.aggregations and not request.facets:
+ # 将旧的 aggregations 转换为新的 facets
+ request.facets = self._convert_legacy_aggregations(request.aggregations)
+```
+
+3. **在响应中提供迁移提示**:
+```python
+if request.aggregations:
+ warnings.append({
+ "type": "deprecation",
+ "message": "'aggregations' 参数已废弃,请使用 'facets' 参数",
+ "migration_guide": "https://docs.example.com/migration"
+ })
+```
+
+
+### 迁移时间线
+
+- **v3.0**(当前版本):发布新接口,旧接口标记为废弃
+- **v3.1**(1 个月后):移除旧接口的自动转换
+- **v4.0**(3 个月后):完全移除旧接口
+
+## 第六部分:风险评估与缓解
+
+### 风险点
+
+1. **破坏性变更风险**:
+
+ - 风险:现有客户代码可能依赖旧接口
+ - 缓解:提供向后兼容层,发布详细迁移指南
+
+2. **性能影响风险**:
+
+ - 风险:新的标准化处理可能增加延迟
+ - 缓解:添加性能测试,优化关键路径
+
+3. **测试覆盖不足风险**:
+
+ - 风险:重构可能引入新 bug
+ - 缓解:全面的单元测试和集成测试
+
+### 验收标准
+
+- [ ] 所有单元测试通过
+- [ ] 所有集成测试通过
+- [ ] API 文档完整且准确
+- [ ] 性能无明显下降(< 10% 延迟增加)
+- [ ] 前端功能正常工作
+- [ ] 提供完整的迁移指南
+
+## 总结
+
+本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括:
+
+1. ✅ 移除硬编码的 price_ranges 逻辑
+2. ✅ 实现结构化的过滤参数(filters + range_filters)
+3. ✅ 简化聚合参数接口,不暴露 ES DSL
+4. ✅ 标准化分面搜索响应格式
+5. ✅ 添加搜索建议功能框架(暂不实现)
+
+通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。
\ No newline at end of file
diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.2.md b/.cursor/plans/api-interface-analysis-42918612.plan.2.md
new file mode 100644
index 0000000..61ab5f5
--- /dev/null
+++ b/.cursor/plans/api-interface-analysis-42918612.plan.2.md
@@ -0,0 +1,1222 @@
+
+# 搜索引擎 API 接口重构实施计划
+
+## 第一部分:现状分析
+
+### 1. 当前实现存在的问题
+
+#### 问题 1:硬编码的价格范围过滤
+
+**位置**:`search/es_query_builder.py` 第 205-233 行
+
+**问题描述**:
+
+```python
+if field == 'price_ranges':
+ # 硬编码特定字符串值
+ if price_range == '0-50':
+ price_ranges.append({"lt": 50})
+ elif price_range == '50-100':
+ price_ranges.append({"gte": 50, "lt": 100})
+ # ...
+```
+
+**影响**:
+
+- 只支持 `price` 字段,无法扩展到其他数值字段
+- 范围值硬编码,无法根据业务需求调整
+- 不符合 SaaS 系统的通用性要求
+
+#### 问题 2:聚合参数直接暴露 ES DSL
+
+**位置**:
+
+- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]`
+- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations`
+- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL
+
+**问题描述**:
+
+前端需要了解 Elasticsearch 的聚合语法:
+
+```javascript
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ },
+ "price_ranges": {
+ "range": {
+ "field": "price",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ // ...
+ ]
+ }
+ }
+};
+```
+
+**影响**:
+
+- 前端需要了解 ES 语法,增加集成难度
+- 不符合 SaaS 产品易用性原则
+- 难以进行参数验证和文档生成
+
+#### 问题 3:分面搜索结果格式不统一
+
+**位置**:`frontend/static/js/app.js` 第 208-258 行
+
+**问题描述**:
+
+- 直接返回 ES 原始格式(`buckets` 结构)
+- 前端需要知道不同聚合类型的响应结构
+- 没有统一的分面结果模型
+
+**影响**:
+
+- 前端解析逻辑复杂
+- 不同类型的聚合处理方式不一致
+- 难以扩展新的聚合类型
+
+#### 问题 4:缺少搜索建议功能
+
+**当前状态**:完全没有实现
+
+**需求**:
+
+- 自动补全(Autocomplete)
+- 搜索建议(Suggestions)
+- 搜索即时反馈(Instant Search)
+
+### 2. 依赖关系分析
+
+**影响范围**:
+
+1. **后端模型层**:`api/models.py`
+2. **查询构建层**:`search/es_query_builder.py`
+3. **搜索执行层**:`search/searcher.py`
+4. **API 路由层**:`api/routes/search.py`
+5. **前端代码**:`frontend/static/js/app.js`
+6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py`
+
+## 第二部分:优化方案设计
+
+### 方案概述
+
+采用**结构化过滤参数方案(方案 A 的简化版)**:
+
+- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤)
+- **不支持单字段多个不连续范围**,简化设计
+- 标准化聚合参数,使用简化的接口
+- 统一分面搜索响应格式
+
+### 1. 新的请求模型设计
+
+#### 1.1 核心模型定义
+
+**文件**:`api/models.py`
+
+```python
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union, Literal
+
+
+class RangeFilter(BaseModel):
+ """数值范围过滤器"""
+ gte: Optional[float] = Field(None, description="大于等于 (>=)")
+ gt: Optional[float] = Field(None, description="大于 (>)")
+ lte: Optional[float] = Field(None, description="小于等于 (<=)")
+ lt: Optional[float] = Field(None, description="小于 (<)")
+
+ @field_validator('*')
+ def check_at_least_one(cls, v, info):
+ """确保至少指定一个边界"""
+ values = info.data
+ if not any([values.get('gte'), values.get('gt'),
+ values.get('lte'), values.get('lt')]):
+ raise ValueError('至少需要指定一个范围边界')
+ return v
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {"gte": 50, "lte": 200},
+ {"gt": 100},
+ {"lt": 50}
+ ]
+ }
+
+
+class FacetConfig(BaseModel):
+ """分面配置(简化版)"""
+ field: str = Field(..., description="分面字段名")
+ size: int = Field(10, ge=1, le=100, description="返回的分面值数量")
+ type: Literal["terms", "range"] = Field("terms", description="分面类型")
+ ranges: Optional[List[Dict[str, Any]]] = Field(
+ None,
+ description="范围分面的范围定义(仅当 type='range' 时需要)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "size": 4,
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+ }
+
+
+class SearchRequest(BaseModel):
+ """搜索请求模型(重构版)"""
+
+ # 基础搜索参数
+ query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
+
+ # 过滤器 - 精确匹配和多值匹配
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field(
+ None,
+ description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)",
+ json_schema_extra={
+ "examples": [
+ {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "brandName_keyword": "乐高",
+ "in_stock": True
+ }
+ ]
+ }
+ )
+
+ # 范围过滤器 - 数值范围
+ range_filters: Optional[Dict[str, RangeFilter]] = Field(
+ None,
+ description="数值范围过滤器。支持 gte, gt, lte, lt 操作符",
+ json_schema_extra={
+ "examples": [
+ {
+ "price": {"gte": 50, "lte": 200},
+ "days_since_last_update": {"lte": 30}
+ }
+ ]
+ }
+ )
+
+ # 排序
+ sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')")
+ sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)")
+
+ # 分面搜索 - 简化接口
+ facets: Optional[List[Union[str, FacetConfig]]] = Field(
+ None,
+ description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象",
+ json_schema_extra={
+ "examples": [
+ # 简单模式:只指定字段名,使用默认配置
+ ["categoryName_keyword", "brandName_keyword"],
+ # 高级模式:详细配置
+ [
+ {"field": "categoryName_keyword", "size": 15},
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ ]
+ }
+ )
+
+ # 高级选项
+ min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值")
+ highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)")
+ debug: bool = Field(False, description="是否返回调试信息")
+
+ # 个性化参数(预留)
+ user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐")
+ session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析")
+
+
+class ImageSearchRequest(BaseModel):
+ """图片搜索请求模型"""
+ image_url: str = Field(..., description="查询图片的 URL")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+
+
+class SearchSuggestRequest(BaseModel):
+ """搜索建议请求模型(框架,暂不实现)"""
+ query: str = Field(..., min_length=1, description="搜索查询字符串")
+ size: int = Field(5, ge=1, le=20, description="返回建议数量")
+ types: List[Literal["query", "product", "category", "brand"]] = Field(
+ ["query"],
+ description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)"
+ )
+```
+
+#### 1.2 响应模型定义
+
+```python
+class FacetValue(BaseModel):
+ """分面值"""
+ value: Union[str, int, float] = Field(..., description="分面值")
+ label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)")
+ count: int = Field(..., description="匹配的文档数量")
+ selected: bool = Field(False, description="是否已选中(当前过滤器中)")
+
+
+class FacetResult(BaseModel):
+ """分面结果(标准化格式)"""
+ field: str = Field(..., description="字段名")
+ label: str = Field(..., description="分面显示名称")
+ type: Literal["terms", "range"] = Field(..., description="分面类型")
+ values: List[FacetValue] = Field(..., description="分面值列表")
+ total_count: Optional[int] = Field(None, description="该字段的总文档数")
+
+
+class SearchResponse(BaseModel):
+ """搜索响应模型(重构版)"""
+
+ # 核心结果
+ hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表")
+ total: int = Field(..., description="匹配的总文档数")
+ max_score: float = Field(..., description="最高相关性分数")
+
+ # 分面搜索结果(标准化格式)
+ facets: Optional[List[FacetResult]] = Field(
+ None,
+ description="分面统计结果(标准化格式)"
+ )
+
+ # 查询信息
+ query_info: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="查询处理信息(原始查询、改写、语言检测、翻译等)"
+ )
+
+ # 推荐与建议(预留)
+ related_queries: Optional[List[str]] = Field(None, description="相关搜索查询")
+
+ # 性能指标
+ took_ms: int = Field(..., description="搜索总耗时(毫秒)")
+ performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息")
+
+ # 调试信息
+ debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)")
+
+
+class SearchSuggestResponse(BaseModel):
+ """搜索建议响应模型(框架,暂不实现)"""
+ query: str = Field(..., description="原始查询")
+ suggestions: List[Dict[str, Any]] = Field(..., description="建议列表")
+ took_ms: int = Field(..., description="耗时(毫秒)")
+```
+
+### 2. 查询构建器重构
+
+#### 2.1 移除硬编码的 price_ranges 逻辑
+
+**文件**:`search/es_query_builder.py`
+
+**需要修改的方法**:`_build_filters(self, filters, range_filters)`
+
+**改进点**:
+
+1. 移除 `if field == 'price_ranges'` 的特殊处理
+2. 分离 filters 和 range_filters 的处理逻辑
+3. 添加字段类型验证(利用配置系统)
+
+**新的实现逻辑**:
+
+```python
+def _build_filters(
+ self,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None
+) -> List[Dict[str, Any]]:
+ """
+ 构建过滤子句(重构版)。
+
+ Args:
+ filters: 精确匹配过滤器字典
+ range_filters: 范围过滤器字典
+
+ Returns:
+ ES filter子句列表
+ """
+ filter_clauses = []
+
+ # 1. 处理精确匹配过滤
+ if filters:
+ for field, value in filters.items():
+ if isinstance(value, list):
+ # 多值匹配(OR)
+ filter_clauses.append({
+ "terms": {field: value}
+ })
+ else:
+ # 单值精确匹配
+ filter_clauses.append({
+ "term": {field: value}
+ })
+
+ # 2. 处理范围过滤
+ if range_filters:
+ for field, range_spec in range_filters.items():
+ # 验证字段是否为数值类型(可选,基于配置)
+ # TODO: 添加字段类型验证
+
+ # 构建范围查询
+ range_conditions = {}
+ if isinstance(range_spec, dict):
+ for op in ['gte', 'gt', 'lte', 'lt']:
+ if op in range_spec and range_spec[op] is not None:
+ range_conditions[op] = range_spec[op]
+
+ if range_conditions:
+ filter_clauses.append({
+ "range": {field: range_conditions}
+ })
+
+ return filter_clauses
+```
+
+#### 2.2 优化聚合参数接口
+
+**新增方法**:`build_facets(self, facet_configs)`
+
+**改进点**:
+
+1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL)
+2. 重构 `add_aggregations` 为更通用的 `build_facets`
+3. 支持简化配置和高级配置两种模式
+
+**新的实现逻辑**:
+
+```python
+def build_facets(
+ self,
+ facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None
+) -> Dict[str, Any]:
+ """
+ 构建分面聚合(重构版)。
+
+ Args:
+ facet_configs: 分面配置列表。可以是:
+ - 字符串列表:字段名,使用默认配置
+ - 配置对象列表:详细的分面配置
+
+ Returns:
+ ES aggregations字典
+ """
+ if not facet_configs:
+ return {}
+
+ aggs = {}
+
+ for config in facet_configs:
+ # 1. 简单模式:只有字段名
+ if isinstance(config, str):
+ field = config
+ agg_name = f"{field}_facet"
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": 10, # 默认大小
+ "order": {"_count": "desc"}
+ }
+ }
+
+ # 2. 高级模式:详细配置对象
+ elif isinstance(config, dict):
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+ size = config.get('size', 10)
+ agg_name = f"{field}_facet"
+
+ if facet_type == 'terms':
+ # Terms 聚合(分组统计)
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": size,
+ "order": {"_count": "desc"}
+ }
+ }
+
+ elif facet_type == 'range':
+ # Range 聚合(范围统计)
+ ranges = config.get('ranges', [])
+ if ranges:
+ aggs[agg_name] = {
+ "range": {
+ "field": field,
+ "ranges": ranges
+ }
+ }
+
+ return aggs
+```
+
+#### 2.3 更新主查询构建方法
+
+**修改方法签名**:`build_query()`
+
+```python
+def build_query(
+ self,
+ query_text: str,
+ query_vector: Optional[np.ndarray] = None,
+ query_node: Optional[QueryNode] = None,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ size: int = 10,
+ from_: int = 0,
+ enable_knn: bool = True,
+ knn_k: int = 50,
+ knn_num_candidates: int = 200,
+ min_score: Optional[float] = None
+) -> Dict[str, Any]:
+ """构建完整的 ES 查询(重构版)"""
+ # ... 实现
+
+ # 添加过滤器
+ if filters or range_filters:
+ filter_clauses = self._build_filters(filters, range_filters)
+ if filter_clauses:
+ es_query["query"] = {
+ "bool": {
+ "must": [query_clause],
+ "filter": filter_clauses
+ }
+ }
+```
+
+### 3. 搜索执行层重构
+
+**文件**:`search/searcher.py`
+
+**需要修改的方法**:`search()`
+
+**改进点**:
+
+1. 更新方法签名,接受 `range_filters` 参数
+2. 使用新的 `build_facets` 方法替代旧的聚合逻辑
+3. 标准化分面搜索结果
+
+**关键代码片段**:
+
+```python
+def search(
+ self,
+ query: str,
+ size: int = 10,
+ from_: int = 0,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations
+ min_score: Optional[float] = None,
+ sort_by: Optional[str] = None,
+ sort_order: Optional[str] = "desc",
+ debug: bool = False,
+ context: Optional[RequestContext] = None
+) -> SearchResult:
+ """执行搜索(重构版)"""
+
+ # ... 查询解析 ...
+
+ # 构建 ES 查询
+ es_query = self.query_builder.build_multilang_query(
+ parsed_query=parsed_query,
+ query_vector=parsed_query.query_vector,
+ query_node=query_node,
+ filters=filters,
+ range_filters=range_filters, # 新增
+ size=size,
+ from_=from_,
+ enable_knn=enable_embedding,
+ min_score=min_score
+ )
+
+ # 添加分面聚合
+ if facets:
+ facet_aggs = self.query_builder.build_facets(facets)
+ if facet_aggs:
+ if "aggs" not in es_query:
+ es_query["aggs"] = {}
+ es_query["aggs"].update(facet_aggs)
+
+ # ... 执行搜索 ...
+
+ # 标准化分面结果
+ standardized_facets = self._standardize_facets(
+ es_response.get('aggregations', {}),
+ facets,
+ filters
+ )
+
+ return SearchResult(
+ hits=hits,
+ total=total_value,
+ max_score=max_score,
+ took_ms=int(total_duration),
+ facets=standardized_facets, # 标准化格式
+ query_info=parsed_query.to_dict(),
+ debug_info=debug_info
+ )
+```
+
+**新增辅助方法**:
+
+```python
+def _standardize_facets(
+ self,
+ es_aggregations: Dict[str, Any],
+ facet_configs: Optional[List[Union[str, Dict]]],
+ current_filters: Optional[Dict[str, Any]]
+) -> Optional[List[Dict[str, Any]]]:
+ """
+ 将 ES 聚合结果转换为标准化的分面格式。
+
+ Args:
+ es_aggregations: ES 原始聚合结果
+ facet_configs: 分面配置列表
+ current_filters: 当前应用的过滤器
+
+ Returns:
+ 标准化的分面结果列表
+ """
+ if not es_aggregations or not facet_configs:
+ return None
+
+ standardized_facets = []
+
+ for config in facet_configs:
+ # 解析配置
+ if isinstance(config, str):
+ field = config
+ facet_type = "terms"
+ else:
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+
+ agg_name = f"{field}_facet"
+
+ if agg_name not in es_aggregations:
+ continue
+
+ agg_result = es_aggregations[agg_name]
+
+ # 构建标准化分面结果
+ facet = {
+ "field": field,
+ "label": self._get_field_label(field), # 从配置获取
+ "type": facet_type,
+ "values": []
+ }
+
+ # 获取当前字段的选中值
+ selected_values = set()
+ if current_filters and field in current_filters:
+ filter_value = current_filters[field]
+ if isinstance(filter_value, list):
+ selected_values = set(filter_value)
+ else:
+ selected_values = {filter_value}
+
+ # 转换 buckets
+ if 'buckets' in agg_result:
+ for bucket in agg_result['buckets']:
+ value = bucket.get('key')
+ count = bucket.get('doc_count', 0)
+
+ facet['values'].append({
+ "value": value,
+ "label": str(value), # 可以从配置映射
+ "count": count,
+ "selected": value in selected_values
+ })
+
+ standardized_facets.append(facet)
+
+ return standardized_facets
+
+
+def _get_field_label(self, field: str) -> str:
+ """获取字段的显示标签"""
+ # 从配置中获取字段标签
+ for field_config in self.config.fields:
+ if field_config.name == field:
+ # 假设配置中有 label 字段
+ return getattr(field_config, 'label', field)
+ return field
+```
+
+### 4. API 路由层更新
+
+**文件**:`api/routes/search.py`
+
+**改进点**:
+
+1. 接受新的请求模型参数
+2. 添加搜索建议端点(框架)
+
+**新增端点**:
+
+```python
+@router.get("/suggestions", response_model=SearchSuggestResponse)
+async def search_suggestions(
+ q: str = Query(..., min_length=1, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="建议数量"),
+ types: str = Query("query", description="建议类型(逗号分隔)")
+):
+ """
+ 获取搜索建议(自动补全)。
+
+ 功能说明:
+ - 查询建议(query):基于历史搜索和热门搜索
+ - 商品建议(product):匹配的商品
+ - 类目建议(category):匹配的类目
+ - 品牌建议(brand):匹配的品牌
+
+ 注意:此功能暂未实现,仅返回框架响应。
+ """
+ import time
+ start_time = time.time()
+
+ # TODO: 实现搜索建议逻辑
+ # 1. 从搜索历史中获取建议
+ # 2. 从商品标题中匹配前缀
+ # 3. 从类目、品牌中匹配
+
+ # 临时返回空结果
+ suggestions = []
+
+ # 示例结构(暂不实现)
+ # suggestions = [
+ # {
+ # "text": "芭比娃娃",
+ # "type": "query",
+ # "highlight": "芭比娃娃",
+ # "popularity": 850
+ # }
+ # ]
+
+ took_ms = int((time.time() - start_time) * 1000)
+
+ return SearchSuggestResponse(
+ query=q,
+ suggestions=suggestions,
+ took_ms=took_ms
+ )
+
+
+@router.get("/instant", response_model=SearchResponse)
+async def instant_search(
+ q: str = Query(..., min_length=2, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="结果数量")
+):
+ """
+ 即时搜索(Instant Search)。
+
+ 功能说明:
+ - 边输入边搜索,无需点击搜索按钮
+ - 返回简化的搜索结果
+ - 性能优化:缓存、限流
+
+ 注意:此功能暂未实现,调用标准搜索接口。
+ """
+ # TODO: 优化即时搜索性能
+ # 1. 添加防抖/节流
+ # 2. 实现结果缓存
+ # 3. 简化返回字段
+
+ # 临时使用标准搜索接口
+ from api.app import get_searcher
+ searcher = get_searcher()
+
+ result = searcher.search(
+ query=q,
+ size=size,
+ from_=0
+ )
+
+ return SearchResponse(
+ hits=result.hits,
+ total=result.total,
+ max_score=result.max_score,
+ took_ms=result.took_ms,
+ query_info=result.query_info
+ )
+```
+
+### 5. 前端适配
+
+**文件**:`frontend/static/js/app.js`
+
+**需要修改的地方**:
+
+1. **聚合参数改用简化配置**(第 57-87 行):
+```javascript
+// 旧的方式(直接 ES DSL)
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ }
+};
+
+// 新的方式(简化配置)
+const facets = [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+];
+```
+
+2. **过滤器使用新格式**(第 103 行):
+```javascript
+// 旧的方式
+filters: {
+ "price_ranges": ["0-50", "50-100"] // 硬编码
+}
+
+// 新的方式
+filters: {
+ "categoryName_keyword": ["玩具"],
+ "in_stock": true
+},
+range_filters: {
+ "price": {"gte": 50, "lte": 100}
+}
+```
+
+3. **解析标准化的分面结果**(第 208-258 行):
+```javascript
+// 旧的方式(直接访问 ES 结构)
+if (aggregations.category_stats && aggregations.category_stats.buckets) {
+ aggregations.category_stats.buckets.forEach(bucket => {
+ // ...
+ });
+}
+
+// 新的方式(标准化格式)
+if (data.facets) {
+ data.facets.forEach(facet => {
+ if (facet.field === 'categoryName_keyword') {
+ facet.values.forEach(facetValue => {
+ const value = facetValue.value;
+ const count = facetValue.count;
+ const selected = facetValue.selected;
+ // ...
+ });
+ }
+ });
+}
+```
+
+
+### 6. 测试代码更新
+
+**文件**:`test_aggregation_api.py`
+
+**需要修改的地方**:
+
+1. 移除 `price_ranges` 硬编码测试(第 93 行)
+2. 使用新的 `range_filters` 格式
+3. 使用新的 `facets` 配置
+
+**新的测试代码**:
+
+```python
+def test_search_with_filters():
+ """测试新的过滤器格式"""
+ test_request = {
+ "query": "玩具",
+ "size": 5,
+ "filters": {
+ "categoryName_keyword": ["玩具"]
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 100}
+ }
+ }
+ # ...
+
+def test_search_with_facets():
+ """测试新的分面配置"""
+ test_request = {
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ }
+ # ...
+```
+
+## 第三部分:实施步骤
+
+### 阶段 1:后端模型层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/models.py`
+ - [ ] 定义 `RangeFilter` 模型
+ - [ ] 定义 `FacetConfig` 模型
+ - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets`
+ - [ ] 移除 `aggregations` 参数
+ - [ ] 定义 `FacetValue` 和 `FacetResult` 模型
+ - [ ] 更新 `SearchResponse`,使用标准化分面格式
+ - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架)
+
+**验证方式**:
+
+- 运行 Pydantic 模型验证
+- 检查 API 文档(`/docs`)是否正确生成
+
+### 阶段 2:查询构建器重构(高优先级)
+
+**任务清单**:
+
+- [ ] 重构 `search/es_query_builder.py`
+ - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行)
+ - [ ] 重构 `_build_filters` 方法,支持 `range_filters`
+ - [ ] 移除 `add_dynamic_aggregations` 方法
+ - [ ] 重构 `add_aggregations` 为 `build_facets`
+ - [ ] 更新 `build_query` 方法签名
+- [ ] 更新 `search/multilang_query_builder.py`(如果需要)
+
+**验证方式**:
+
+- 编写单元测试验证过滤器构建逻辑
+- 打印生成的 ES DSL,检查正确性
+
+### 阶段 3:搜索执行层重构(高优先级)
+
+**任务清单**:
+
+- [ ] 更新 `search/searcher.py`
+ - [ ] 更新 `search()` 方法签名
+ - [ ] 使用新的 `build_facets` 方法
+ - [ ] 实现 `_standardize_facets()` 辅助方法
+ - [ ] 实现 `_get_field_label()` 辅助方法
+ - [ ] 更新 `SearchResult` 类,使用标准化分面格式
+
+**验证方式**:
+
+- 编写集成测试
+- 手动测试搜索功能
+
+### 阶段 4:API 路由层更新(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `api/routes/search.py`
+ - [ ] 更新 `/search/` 端点,接受新的请求参数
+ - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果)
+ - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索)
+ - [ ] 添加端点文档和示例
+
+**验证方式**:
+
+- 使用 Swagger UI 测试端点
+- 检查 API 文档完整性
+
+### 阶段 5:前端适配(中优先级)
+
+**任务清单**:
+
+- [ ] 更新 `frontend/static/js/app.js`
+ - [ ] 修改聚合参数为 `facets` 简化配置
+ - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters`
+ - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果
+ - [ ] 添加范围过滤器 UI(如价格滑块)
+ - [ ] 移除硬编码的 `price_ranges`
+
+**验证方式**:
+
+- 浏览器测试前端功能
+- 检查网络请求和响应格式
+
+### 阶段 6:测试代码更新(低优先级)
+
+**任务清单**:
+
+- [ ] 更新 `test_aggregation_api.py`
+ - [ ] 移除 `price_ranges` 测试
+ - [ ] 添加 `range_filters` 测试
+ - [ ] 添加新的 `facets` 测试
+- [ ] 更新 `test_complete_search.py`
+- [ ] 更新 `tests/integration/test_aggregation_api.py`
+- [ ] 更新 `tests/unit/test_searcher.py`
+
+**验证方式**:
+
+- 运行所有测试,确保通过
+- 检查测试覆盖率
+
+### 阶段 7:文档更新(低优先级)
+
+**任务清单**:
+
+- [ ] 撰写完整的 API 接口文档
+- [ ] 更新 `README.md`
+- [ ] 更新 `USER_GUIDE.md`
+- [ ] 添加接口使用示例
+- [ ] 添加迁移指南(旧接口 → 新接口)
+
+## 第四部分:API 使用示例
+
+### 示例 1:简单搜索
+
+```bash
+POST /search/
+{
+ "query": "芭比娃娃",
+ "size": 20
+}
+```
+
+### 示例 2:带过滤器的搜索
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "filters": {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "in_stock": true
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 200}
+ }
+}
+```
+
+### 示例 3:带分面搜索的请求
+
+```bash
+POST /search/
+{
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+}
+```
+
+**响应示例**(标准化分面格式):
+
+```json
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "label": "商品类目",
+ "type": "terms",
+ "values": [
+ {"value": "玩具", "label": "玩具", "count": 85, "selected": false},
+ {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false}
+ ]
+ },
+ {
+ "field": "price",
+ "label": "价格区间",
+ "type": "range",
+ "values": [
+ {"value": "0-50", "label": "0-50元", "count": 23, "selected": false},
+ {"value": "50-100", "label": "50-100元", "count": 45, "selected": false},
+ {"value": "100-200", "label": "100-200元", "count": 38, "selected": false},
+ {"value": "200+", "label": "200元以上", "count": 12, "selected": false}
+ ]
+ }
+ ]
+}
+```
+
+### 示例 4:搜索建议(框架)
+
+```bash
+GET /search/suggestions?q=芭&size=5
+
+{
+ "query": "芭",
+ "suggestions": [
+ {
+ "text": "芭比娃娃",
+ "type": "query",
+ "highlight": "芭比娃娃",
+ "popularity": 850
+ },
+ {
+ "text": "芭比娃娃屋",
+ "type": "query",
+ "highlight": "芭比娃娃屋",
+ "popularity": 320
+ }
+ ],
+ "took_ms": 5
+}
+```
+
+## 第五部分:向后兼容性
+
+### 兼容策略
+
+为保持向后兼容,在过渡期(1-2 个版本)内:
+
+1. **同时支持旧参数和新参数**:
+```python
+class SearchRequest(BaseModel):
+ # 新参数
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+ facets: Optional[List[Union[str, FacetConfig]]] = None
+
+ # 旧参数(标记为废弃)
+ aggregations: Optional[Dict[str, Any]] = Field(
+ None,
+ deprecated=True,
+ description="已废弃。请使用 'facets' 参数"
+ )
+```
+
+2. **在后端自动转换旧格式**:
+```python
+# 在 searcher.py 中
+if request.aggregations and not request.facets:
+ # 将旧的 aggregations 转换为新的 facets
+ request.facets = self._convert_legacy_aggregations(request.aggregations)
+```
+
+3. **在响应中提供迁移提示**:
+```python
+if request.aggregations:
+ warnings.append({
+ "type": "deprecation",
+ "message": "'aggregations' 参数已废弃,请使用 'facets' 参数",
+ "migration_guide": "https://docs.example.com/migration"
+ })
+```
+
+
+### 迁移时间线
+
+- **v3.0**(当前版本):发布新接口,旧接口标记为废弃
+- **v3.1**(1 个月后):移除旧接口的自动转换
+- **v4.0**(3 个月后):完全移除旧接口
+
+## 第六部分:风险评估与缓解
+
+### 风险点
+
+1. **破坏性变更风险**:
+
+ - 风险:现有客户代码可能依赖旧接口
+ - 缓解:提供向后兼容层,发布详细迁移指南
+
+2. **性能影响风险**:
+
+ - 风险:新的标准化处理可能增加延迟
+ - 缓解:添加性能测试,优化关键路径
+
+3. **测试覆盖不足风险**:
+
+ - 风险:重构可能引入新 bug
+ - 缓解:全面的单元测试和集成测试
+
+### 验收标准
+
+- [ ] 所有单元测试通过
+- [ ] 所有集成测试通过
+- [ ] API 文档完整且准确
+- [ ] 性能无明显下降(< 10% 延迟增加)
+- [ ] 前端功能正常工作
+- [ ] 提供完整的迁移指南
+
+## 总结
+
+本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括:
+
+1. ✅ 移除硬编码的 price_ranges 逻辑
+2. ✅ 实现结构化的过滤参数(filters + range_filters)
+3. ✅ 简化聚合参数接口,不暴露 ES DSL
+4. ✅ 标准化分面搜索响应格式
+5. ✅ 添加搜索建议功能框架(暂不实现)
+
+通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。
\ No newline at end of file
diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md b/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md
new file mode 100644
index 0000000..58bec89
--- /dev/null
+++ b/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md
@@ -0,0 +1,1402 @@
+
+# 搜索引擎 API 接口重构实施计划
+
+## 第一部分:现状分析
+
+### 1. 当前实现存在的问题
+
+#### 问题 1:硬编码的价格范围过滤
+
+**位置**:`search/es_query_builder.py` 第 205-233 行
+
+**问题描述**:
+
+```python
+if field == 'price_ranges':
+ # 硬编码特定字符串值
+ if price_range == '0-50':
+ price_ranges.append({"lt": 50})
+ elif price_range == '50-100':
+ price_ranges.append({"gte": 50, "lt": 100})
+ # ...
+```
+
+**影响**:
+
+- 只支持 `price` 字段,无法扩展到其他数值字段
+- 范围值硬编码,无法根据业务需求调整
+- 不符合 SaaS 系统的通用性要求
+
+#### 问题 2:聚合参数直接暴露 ES DSL
+
+**位置**:
+
+- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]`
+- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations`
+- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL
+
+**问题描述**:
+
+前端需要了解 Elasticsearch 的聚合语法:
+
+```javascript
+const aggregations = {
+ "category_stats": {
+ "terms": {
+ "field": "categoryName_keyword",
+ "size": 15
+ }
+ },
+ "price_ranges": {
+ "range": {
+ "field": "price",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ // ...
+ ]
+ }
+ }
+};
+```
+
+**影响**:
+
+- 前端需要了解 ES 语法,增加集成难度
+- 不符合 SaaS 产品易用性原则
+- 难以进行参数验证和文档生成
+
+#### 问题 3:分面搜索结果格式不统一
+
+**位置**:`frontend/static/js/app.js` 第 208-258 行
+
+**问题描述**:
+
+- 直接返回 ES 原始格式(`buckets` 结构)
+- 前端需要知道不同聚合类型的响应结构
+- 没有统一的分面结果模型
+
+**影响**:
+
+- 前端解析逻辑复杂
+- 不同类型的聚合处理方式不一致
+- 难以扩展新的聚合类型
+
+#### 问题 4:缺少搜索建议功能
+
+**当前状态**:完全没有实现
+
+**需求**:
+
+- 自动补全(Autocomplete)
+- 搜索建议(Suggestions)
+- 搜索即时反馈(Instant Search)
+
+### 2. 依赖关系分析
+
+**影响范围**:
+
+1. **后端模型层**:`api/models.py`
+2. **查询构建层**:`search/es_query_builder.py`
+3. **搜索执行层**:`search/searcher.py`
+4. **API 路由层**:`api/routes/search.py`
+5. **前端代码**:`frontend/static/js/app.js`
+
+## 第二部分:优化方案设计
+
+### 方案概述
+
+采用**结构化过滤参数方案(方案 A 的简化版)**:
+
+- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤)
+- **不支持单字段多个不连续范围**,简化设计
+- 标准化聚合参数,使用简化的接口
+- 统一分面搜索响应格式
+- **完全重构,不保留旧接口和兼容代码**
+
+### 1. 新的请求模型设计
+
+#### 1.1 核心模型定义
+
+**文件**:`api/models.py`
+
+```python
+from pydantic import BaseModel, Field, field_validator
+from typing import List, Dict, Any, Optional, Union, Literal
+
+
+class RangeFilter(BaseModel):
+ """数值范围过滤器"""
+ gte: Optional[float] = Field(None, description="大于等于 (>=)")
+ gt: Optional[float] = Field(None, description="大于 (>)")
+ lte: Optional[float] = Field(None, description="小于等于 (<=)")
+ lt: Optional[float] = Field(None, description="小于 (<)")
+
+ @field_validator('*')
+ def check_at_least_one(cls, v, info):
+ """确保至少指定一个边界"""
+ values = info.data
+ if not any([values.get('gte'), values.get('gt'),
+ values.get('lte'), values.get('lt')]):
+ raise ValueError('至少需要指定一个范围边界')
+ return v
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {"gte": 50, "lte": 200},
+ {"gt": 100},
+ {"lt": 50}
+ ]
+ }
+
+
+class FacetConfig(BaseModel):
+ """分面配置(简化版)"""
+ field: str = Field(..., description="分面字段名")
+ size: int = Field(10, ge=1, le=100, description="返回的分面值数量")
+ type: Literal["terms", "range"] = Field("terms", description="分面类型")
+ ranges: Optional[List[Dict[str, Any]]] = Field(
+ None,
+ description="范围分面的范围定义(仅当 type='range' 时需要)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "size": 4,
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+ }
+
+
+class SearchRequest(BaseModel):
+ """搜索请求模型(重构版)"""
+
+ # 基础搜索参数
+ query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
+
+ # 过滤器 - 精确匹配和多值匹配
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field(
+ None,
+ description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)",
+ json_schema_extra={
+ "examples": [
+ {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "brandName_keyword": "乐高",
+ "in_stock": True
+ }
+ ]
+ }
+ )
+
+ # 范围过滤器 - 数值范围
+ range_filters: Optional[Dict[str, RangeFilter]] = Field(
+ None,
+ description="数值范围过滤器。支持 gte, gt, lte, lt 操作符",
+ json_schema_extra={
+ "examples": [
+ {
+ "price": {"gte": 50, "lte": 200},
+ "days_since_last_update": {"lte": 30}
+ }
+ ]
+ }
+ )
+
+ # 排序
+ sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')")
+ sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)")
+
+ # 分面搜索 - 简化接口
+ facets: Optional[List[Union[str, FacetConfig]]] = Field(
+ None,
+ description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象",
+ json_schema_extra={
+ "examples": [
+ # 简单模式:只指定字段名,使用默认配置
+ ["categoryName_keyword", "brandName_keyword"],
+ # 高级模式:详细配置
+ [
+ {"field": "categoryName_keyword", "size": 15},
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100}
+ ]
+ }
+ ]
+ ]
+ }
+ )
+
+ # 高级选项
+ min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值")
+ highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)")
+ debug: bool = Field(False, description="是否返回调试信息")
+
+ # 个性化参数(预留)
+ user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐")
+ session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析")
+
+
+class ImageSearchRequest(BaseModel):
+ """图片搜索请求模型"""
+ image_url: str = Field(..., description="查询图片的 URL")
+ size: int = Field(10, ge=1, le=100, description="返回结果数量")
+ filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None
+ range_filters: Optional[Dict[str, RangeFilter]] = None
+
+
+class SearchSuggestRequest(BaseModel):
+ """搜索建议请求模型(框架,暂不实现)"""
+ query: str = Field(..., min_length=1, description="搜索查询字符串")
+ size: int = Field(5, ge=1, le=20, description="返回建议数量")
+ types: List[Literal["query", "product", "category", "brand"]] = Field(
+ ["query"],
+ description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)"
+ )
+```
+
+#### 1.2 响应模型定义
+
+```python
+class FacetValue(BaseModel):
+ """分面值"""
+ value: Union[str, int, float] = Field(..., description="分面值")
+ label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)")
+ count: int = Field(..., description="匹配的文档数量")
+ selected: bool = Field(False, description="是否已选中(当前过滤器中)")
+
+
+class FacetResult(BaseModel):
+ """分面结果(标准化格式)"""
+ field: str = Field(..., description="字段名")
+ label: str = Field(..., description="分面显示名称")
+ type: Literal["terms", "range"] = Field(..., description="分面类型")
+ values: List[FacetValue] = Field(..., description="分面值列表")
+ total_count: Optional[int] = Field(None, description="该字段的总文档数")
+
+
+class SearchResponse(BaseModel):
+ """搜索响应模型(重构版)"""
+
+ # 核心结果
+ hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表")
+ total: int = Field(..., description="匹配的总文档数")
+ max_score: float = Field(..., description="最高相关性分数")
+
+ # 分面搜索结果(标准化格式)
+ facets: Optional[List[FacetResult]] = Field(
+ None,
+ description="分面统计结果(标准化格式)"
+ )
+
+ # 查询信息
+ query_info: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="查询处理信息(原始查询、改写、语言检测、翻译等)"
+ )
+
+ # 推荐与建议(预留)
+ related_queries: Optional[List[str]] = Field(None, description="相关搜索查询")
+
+ # 性能指标
+ took_ms: int = Field(..., description="搜索总耗时(毫秒)")
+ performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息")
+
+ # 调试信息
+ debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)")
+
+
+class SearchSuggestResponse(BaseModel):
+ """搜索建议响应模型(框架,暂不实现)"""
+ query: str = Field(..., description="原始查询")
+ suggestions: List[Dict[str, Any]] = Field(..., description="建议列表")
+ took_ms: int = Field(..., description="耗时(毫秒)")
+```
+
+### 2. 查询构建器重构
+
+#### 2.1 移除硬编码的 price_ranges 逻辑
+
+**文件**:`search/es_query_builder.py`
+
+**需要修改的方法**:`_build_filters(self, filters, range_filters)`
+
+**改进点**:
+
+1. **完全移除** `if field == 'price_ranges'` 的特殊处理代码
+2. 分离 filters 和 range_filters 的处理逻辑
+3. 添加字段类型验证(利用配置系统)
+
+**新的实现逻辑**:
+
+```python
+def _build_filters(
+ self,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None
+) -> List[Dict[str, Any]]:
+ """
+ 构建过滤子句(重构版)。
+
+ Args:
+ filters: 精确匹配过滤器字典
+ range_filters: 范围过滤器字典
+
+ Returns:
+ ES filter子句列表
+ """
+ filter_clauses = []
+
+ # 1. 处理精确匹配过滤
+ if filters:
+ for field, value in filters.items():
+ if isinstance(value, list):
+ # 多值匹配(OR)
+ filter_clauses.append({
+ "terms": {field: value}
+ })
+ else:
+ # 单值精确匹配
+ filter_clauses.append({
+ "term": {field: value}
+ })
+
+ # 2. 处理范围过滤
+ if range_filters:
+ for field, range_spec in range_filters.items():
+ # 构建范围查询
+ range_conditions = {}
+ if isinstance(range_spec, dict):
+ for op in ['gte', 'gt', 'lte', 'lt']:
+ if op in range_spec and range_spec[op] is not None:
+ range_conditions[op] = range_spec[op]
+
+ if range_conditions:
+ filter_clauses.append({
+ "range": {field: range_conditions}
+ })
+
+ return filter_clauses
+```
+
+#### 2.2 优化聚合参数接口
+
+**需要完全移除的方法**:
+
+- `add_dynamic_aggregations` - 直接暴露 ES DSL,完全删除
+
+**需要重构的方法**:
+
+- `add_aggregations` → 重构为 `build_facets`
+
+**新增方法**:`build_facets(self, facet_configs)`
+
+**新的实现逻辑**:
+
+```python
+def build_facets(
+ self,
+ facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None
+) -> Dict[str, Any]:
+ """
+ 构建分面聚合(重构版)。
+
+ Args:
+ facet_configs: 分面配置列表。可以是:
+ - 字符串列表:字段名,使用默认配置
+ - 配置对象列表:详细的分面配置
+
+ Returns:
+ ES aggregations字典
+ """
+ if not facet_configs:
+ return {}
+
+ aggs = {}
+
+ for config in facet_configs:
+ # 1. 简单模式:只有字段名
+ if isinstance(config, str):
+ field = config
+ agg_name = f"{field}_facet"
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": 10, # 默认大小
+ "order": {"_count": "desc"}
+ }
+ }
+
+ # 2. 高级模式:详细配置对象
+ elif isinstance(config, dict):
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+ size = config.get('size', 10)
+ agg_name = f"{field}_facet"
+
+ if facet_type == 'terms':
+ # Terms 聚合(分组统计)
+ aggs[agg_name] = {
+ "terms": {
+ "field": field,
+ "size": size,
+ "order": {"_count": "desc"}
+ }
+ }
+
+ elif facet_type == 'range':
+ # Range 聚合(范围统计)
+ ranges = config.get('ranges', [])
+ if ranges:
+ aggs[agg_name] = {
+ "range": {
+ "field": field,
+ "ranges": ranges
+ }
+ }
+
+ return aggs
+```
+
+#### 2.3 更新主查询构建方法
+
+**修改方法签名**:`build_query()`
+
+```python
+def build_query(
+ self,
+ query_text: str,
+ query_vector: Optional[np.ndarray] = None,
+ query_node: Optional[QueryNode] = None,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ size: int = 10,
+ from_: int = 0,
+ enable_knn: bool = True,
+ knn_k: int = 50,
+ knn_num_candidates: int = 200,
+ min_score: Optional[float] = None
+) -> Dict[str, Any]:
+ """构建完整的 ES 查询(重构版)"""
+ # ... 实现
+
+ # 添加过滤器
+ if filters or range_filters:
+ filter_clauses = self._build_filters(filters, range_filters)
+ if filter_clauses:
+ es_query["query"] = {
+ "bool": {
+ "must": [query_clause],
+ "filter": filter_clauses
+ }
+ }
+```
+
+### 3. 搜索执行层重构
+
+**文件**:`search/searcher.py`
+
+**需要修改的方法**:`search()`
+
+**改进点**:
+
+1. 更新方法签名,接受 `range_filters` 和 `facets` 参数
+2. **完全移除** `aggregations` 参数支持
+3. 使用新的 `build_facets` 方法替代旧的聚合逻辑
+4. 标准化分面搜索结果
+
+**关键代码片段**:
+
+```python
+def search(
+ self,
+ query: str,
+ size: int = 10,
+ from_: int = 0,
+ filters: Optional[Dict[str, Any]] = None,
+ range_filters: Optional[Dict[str, Any]] = None, # 新增
+ facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations
+ min_score: Optional[float] = None,
+ sort_by: Optional[str] = None,
+ sort_order: Optional[str] = "desc",
+ debug: bool = False,
+ context: Optional[RequestContext] = None
+) -> SearchResult:
+ """执行搜索(重构版)"""
+
+ # ... 查询解析 ...
+
+ # 构建 ES 查询
+ es_query = self.query_builder.build_multilang_query(
+ parsed_query=parsed_query,
+ query_vector=parsed_query.query_vector,
+ query_node=query_node,
+ filters=filters,
+ range_filters=range_filters, # 新增
+ size=size,
+ from_=from_,
+ enable_knn=enable_embedding,
+ min_score=min_score
+ )
+
+ # 添加分面聚合(完全替代旧的 aggregations 逻辑)
+ if facets:
+ facet_aggs = self.query_builder.build_facets(facets)
+ if facet_aggs:
+ if "aggs" not in es_query:
+ es_query["aggs"] = {}
+ es_query["aggs"].update(facet_aggs)
+
+ # ... 执行搜索 ...
+
+ # 标准化分面结果
+ standardized_facets = self._standardize_facets(
+ es_response.get('aggregations', {}),
+ facets,
+ filters
+ )
+
+ return SearchResult(
+ hits=hits,
+ total=total_value,
+ max_score=max_score,
+ took_ms=int(total_duration),
+ facets=standardized_facets, # 标准化格式
+ query_info=parsed_query.to_dict(),
+ debug_info=debug_info
+ )
+```
+
+**新增辅助方法**:
+
+```python
+def _standardize_facets(
+ self,
+ es_aggregations: Dict[str, Any],
+ facet_configs: Optional[List[Union[str, Dict]]],
+ current_filters: Optional[Dict[str, Any]]
+) -> Optional[List[Dict[str, Any]]]:
+ """
+ 将 ES 聚合结果转换为标准化的分面格式。
+
+ Args:
+ es_aggregations: ES 原始聚合结果
+ facet_configs: 分面配置列表
+ current_filters: 当前应用的过滤器
+
+ Returns:
+ 标准化的分面结果列表
+ """
+ if not es_aggregations or not facet_configs:
+ return None
+
+ standardized_facets = []
+
+ for config in facet_configs:
+ # 解析配置
+ if isinstance(config, str):
+ field = config
+ facet_type = "terms"
+ else:
+ field = config['field']
+ facet_type = config.get('type', 'terms')
+
+ agg_name = f"{field}_facet"
+
+ if agg_name not in es_aggregations:
+ continue
+
+ agg_result = es_aggregations[agg_name]
+
+ # 构建标准化分面结果
+ facet = {
+ "field": field,
+ "label": self._get_field_label(field),
+ "type": facet_type,
+ "values": []
+ }
+
+ # 获取当前字段的选中值
+ selected_values = set()
+ if current_filters and field in current_filters:
+ filter_value = current_filters[field]
+ if isinstance(filter_value, list):
+ selected_values = set(filter_value)
+ else:
+ selected_values = {filter_value}
+
+ # 转换 buckets
+ if 'buckets' in agg_result:
+ for bucket in agg_result['buckets']:
+ value = bucket.get('key')
+ count = bucket.get('doc_count', 0)
+
+ facet['values'].append({
+ "value": value,
+ "label": str(value),
+ "count": count,
+ "selected": value in selected_values
+ })
+
+ standardized_facets.append(facet)
+
+ return standardized_facets
+
+
+def _get_field_label(self, field: str) -> str:
+ """获取字段的显示标签"""
+ # 从配置中获取字段标签
+ for field_config in self.config.fields:
+ if field_config.name == field:
+ return getattr(field_config, 'label', field)
+ return field
+```
+
+### 4. API 路由层更新
+
+**文件**:`api/routes/search.py`
+
+**改进点**:
+
+1. 接受新的请求模型参数
+2. 添加搜索建议端点(框架)
+3. **完全移除**旧的 `aggregations` 参数支持
+
+**主搜索端点更新**:
+
+```python
+@router.post("/", response_model=SearchResponse)
+async def search(request: SearchRequest, http_request: Request):
+ """
+ 执行文本搜索。
+
+ 支持:
+ - 布尔表达式(AND, OR, RANK, ANDNOT)
+ - 精确匹配过滤器
+ - 范围过滤器
+ - 分面搜索
+ - 自定义排序
+ """
+ # ... 实现使用新的参数
+ result = searcher.search(
+ query=request.query,
+ size=request.size,
+ from_=request.from_,
+ filters=request.filters,
+ range_filters=request.range_filters, # 新增
+ facets=request.facets, # 替代 aggregations
+ min_score=request.min_score,
+ sort_by=request.sort_by,
+ sort_order=request.sort_order,
+ debug=request.debug,
+ context=context
+ )
+```
+
+**新增端点**:
+
+```python
+@router.get("/suggestions", response_model=SearchSuggestResponse)
+async def search_suggestions(
+ q: str = Query(..., min_length=1, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="建议数量"),
+ types: str = Query("query", description="建议类型(逗号分隔)")
+):
+ """
+ 获取搜索建议(自动补全)。
+
+ 功能说明:
+ - 查询建议(query):基于历史搜索和热门搜索
+ - 商品建议(product):匹配的商品
+ - 类目建议(category):匹配的类目
+ - 品牌建议(brand):匹配的品牌
+
+ 注意:此功能暂未实现,仅返回框架响应。
+ """
+ import time
+ start_time = time.time()
+
+ # TODO: 实现搜索建议逻辑
+ suggestions = []
+ took_ms = int((time.time() - start_time) * 1000)
+
+ return SearchSuggestResponse(
+ query=q,
+ suggestions=suggestions,
+ took_ms=took_ms
+ )
+
+
+@router.get("/instant", response_model=SearchResponse)
+async def instant_search(
+ q: str = Query(..., min_length=2, description="搜索查询"),
+ size: int = Query(5, ge=1, le=20, description="结果数量")
+):
+ """
+ 即时搜索(Instant Search)。
+
+ 功能说明:
+ - 边输入边搜索,无需点击搜索按钮
+ - 返回简化的搜索结果
+
+ 注意:此功能暂未实现,调用标准搜索接口。
+ """
+ from api.app import get_searcher
+ searcher = get_searcher()
+
+ result = searcher.search(
+ query=q,
+ size=size,
+ from_=0
+ )
+
+ return SearchResponse(
+ hits=result.hits,
+ total=result.total,
+ max_score=result.max_score,
+ took_ms=result.took_ms,
+ query_info=result.query_info
+ )
+```
+
+### 5. 前端适配
+
+**文件**:`frontend/static/js/app.js`
+
+**需要完全重写的地方**:
+
+1. **聚合参数改用简化配置**(第 57-87 行):
+```javascript
+// 旧的方式(ES DSL)- 完全删除
+const aggregations = {
+ "category_stats": {...} // 删除
+};
+
+// 新的方式(简化配置)
+const facets = [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "supplierName_keyword",
+ "size": 10,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+];
+```
+
+2. **过滤器使用新格式**(第 103 行):
+```javascript
+// 旧的方式 - 完全删除
+filters: {
+ "price_ranges": ["0-50", "50-100"] // 删除
+}
+
+// 新的方式
+filters: Object.keys(state.filters).length > 0 ? state.filters : null,
+range_filters: state.rangeFilters ? state.rangeFilters : null
+```
+
+3. **解析标准化的分面结果**(第 208-258 行):
+```javascript
+// 旧的方式(ES 结构)- 完全删除并重写
+function displayAggregations(aggregations) {
+ if (!aggregations) return;
+ // 旧的代码全部删除
+}
+
+// 新的方式(标准化格式)
+function displayFacets(facets) {
+ if (!facets) return;
+
+ facets.forEach(facet => {
+ if (facet.field === 'categoryName_keyword') {
+ const container = document.getElementById('categoryTags');
+ let html = '';
+
+ facet.values.forEach(facetValue => {
+ const value = facetValue.value;
+ const count = facetValue.count;
+ const selected = facetValue.selected;
+
+ html += `
+
+ ${escapeHtml(value)} (${count})
+
+ `;
+ });
+
+ container.innerHTML = html;
+ }
+ // 处理其他分面...
+ });
+}
+```
+
+
+## 第三部分:实施步骤
+
+### 阶段 1:后端模型层重构
+
+**任务清单**:
+
+- [ ] 更新 `api/models.py`
+ - [ ] 定义 `RangeFilter` 模型
+ - [ ] 定义 `FacetConfig` 模型
+ - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets`
+ - [ ] **完全移除** `aggregations` 参数
+ - [ ] 定义 `FacetValue` 和 `FacetResult` 模型
+ - [ ] 更新 `SearchResponse`,使用标准化分面格式
+ - [ ] 更新 `ImageSearchRequest`,添加 `range_filters`
+ - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架)
+
+**验证方式**:
+
+- 运行 Pydantic 模型验证
+- 检查 API 文档(`/docs`)是否正确生成
+- 确认旧的 `aggregations` 参数已完全移除
+
+### 阶段 2:查询构建器重构
+
+**任务清单**:
+
+- [ ] 重构 `search/es_query_builder.py`
+ - [ ] **完全删除** `price_ranges` 硬编码逻辑(第 205-233 行)
+ - [ ] 重构 `_build_filters` 方法,支持 `range_filters`
+ - [ ] **完全删除** `add_dynamic_aggregations` 方法
+ - [ ] **完全删除或重命名** `add_aggregations` 方法
+ - [ ] 新增 `build_facets` 方法
+ - [ ] 更新 `build_query` 方法签名,添加 `range_filters`
+- [ ] 更新 `search/multilang_query_builder.py`
+ - [ ] 更新 `build_multilang_query` 方法签名
+ - [ ] 确保正确传递 `range_filters` 参数
+
+**验证方式**:
+
+- 打印生成的 ES DSL,检查正确性
+- 确认旧的硬编码逻辑和方法已完全移除
+- 手动测试不同的过滤器组合
+
+### 阶段 3:搜索执行层重构
+
+**任务清单**:
+
+- [ ] 更新 `search/searcher.py`
+ - [ ] 更新 `search()` 方法签名,添加 `range_filters` 和 `facets`
+ - [ ] **完全移除** `aggregations` 参数支持
+ - [ ] 使用新的 `build_facets` 方法
+ - [ ] 实现 `_standardize_facets()` 辅助方法
+ - [ ] 实现 `_get_field_label()` 辅助方法
+ - [ ] 更新 `SearchResult` 类,使用标准化分面格式
+ - [ ] 更新 `search_by_image()` 方法,支持 `range_filters`
+
+**验证方式**:
+
+- 手动测试各种搜索场景
+- 检查分面结果格式是否标准化
+- 确认旧的聚合逻辑已完全移除
+
+### 阶段 4:API 路由层更新
+
+**任务清单**:
+
+- [ ] 更新 `api/routes/search.py`
+ - [ ] 更新 `/search/` 端点,使用新的请求参数
+ - [ ] **确认完全移除**对旧 `aggregations` 的支持
+ - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果)
+ - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索)
+ - [ ] 更新 `/search/image` 端点,支持 `range_filters`
+ - [ ] 添加完整的端点文档和示例
+
+**验证方式**:
+
+- 使用 Swagger UI(`/docs`)测试所有端点
+- 检查请求和响应格式
+- 确认旧参数不再被接受
+
+### 阶段 5:前端适配
+
+**任务清单**:
+
+- [ ] 完全重构 `frontend/static/js/app.js`
+ - [ ] **删除**所有 ES DSL 聚合代码(第 57-87 行)
+ - [ ] 使用新的 `facets` 简化配置
+ - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters`
+ - [ ] **完全重写** `displayAggregations()` 为 `displayFacets()`
+ - [ ] 解析标准化的分面结果格式
+ - [ ] **删除**所有 `price_ranges` 硬编码
+ - [ ] 添加范围过滤器 UI 组件
+ - [ ] 更新状态管理,支持 `rangeFilters`
+
+**验证方式**:
+
+- 浏览器测试所有前端功能
+- 检查网络请求格式
+- 检查分面结果显示
+- 确认旧的代码已完全删除
+
+### 阶段 6:文档更新与示例
+
+**任务清单**:
+
+- [ ] 撰写完整的 API 接口文档
+ - [ ] 搜索接口文档
+ - [ ] 过滤器使用说明
+ - [ ] 分面搜索使用说明
+ - [ ] 搜索建议接口文档(标注为框架)
+- [ ] 更新项目文档
+ - [ ] 更新 `README.md`
+ - [ ] 更新 `USER_GUIDE.md`
+ - [ ] 更新 `CHANGES.md` 或 `CHANGELOG.md`
+- [ ] 添加完整的使用示例
+ - [ ] 简单搜索示例
+ - [ ] 过滤器搜索示例
+ - [ ] 分面搜索示例
+ - [ ] cURL 命令示例
+ - [ ] Python 代码示例
+ - [ ] JavaScript 代码示例
+- [ ] 更新 API 文档注释
+ - [ ] 所有模型的文档字符串
+ - [ ] 所有端点的文档字符串
+ - [ ] 参数说明和示例
+
+**验证方式**:
+
+- 文档审查
+- 示例代码验证
+- 确保文档与实现一致
+
+## 第四部分:API 使用示例
+
+### 示例 1:简单搜索
+
+```bash
+POST /search/
+Content-Type: application/json
+
+{
+ "query": "芭比娃娃",
+ "size": 20
+}
+```
+
+**响应**:
+
+```json
+{
+ "hits": [
+ {
+ "_id": "12345",
+ "_score": 8.5,
+ "_source": {
+ "name": "芭比时尚娃娃",
+ "price": 89.99,
+ "categoryName": "玩具"
+ }
+ }
+ ],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "query_info": {
+ "original_query": "芭比娃娃",
+ "detected_language": "zh"
+ }
+}
+```
+
+### 示例 2:带过滤器的搜索
+
+```bash
+POST /search/
+Content-Type: application/json
+
+{
+ "query": "玩具",
+ "size": 20,
+ "filters": {
+ "categoryName_keyword": ["玩具", "益智玩具"],
+ "in_stock": true
+ },
+ "range_filters": {
+ "price": {
+ "gte": 50,
+ "lte": 200
+ }
+ }
+}
+```
+
+**响应**:
+
+```json
+{
+ "hits": [...],
+ "total": 45,
+ "max_score": 7.2,
+ "took_ms": 38
+}
+```
+
+### 示例 3:带分面搜索的请求(简单模式)
+
+```bash
+POST /search/
+Content-Type: application/json
+
+{
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ "categoryName_keyword",
+ "brandName_keyword"
+ ]
+}
+```
+
+**响应**:
+
+```json
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "label": "商品类目",
+ "type": "terms",
+ "values": [
+ {
+ "value": "玩具",
+ "label": "玩具",
+ "count": 85,
+ "selected": false
+ },
+ {
+ "value": "益智玩具",
+ "label": "益智玩具",
+ "count": 33,
+ "selected": false
+ }
+ ]
+ },
+ {
+ "field": "brandName_keyword",
+ "label": "品牌",
+ "type": "terms",
+ "values": [
+ {
+ "value": "乐高",
+ "label": "乐高",
+ "count": 42,
+ "selected": false
+ }
+ ]
+ }
+ ]
+}
+```
+
+### 示例 4:带分面搜索的请求(高级模式)
+
+```bash
+POST /search/
+Content-Type: application/json
+
+{
+ "query": "玩具",
+ "size": 20,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "brandName_keyword",
+ "size": 15,
+ "type": "terms"
+ },
+ {
+ "field": "price",
+ "type": "range",
+ "ranges": [
+ {"key": "0-50", "to": 50},
+ {"key": "50-100", "from": 50, "to": 100},
+ {"key": "100-200", "from": 100, "to": 200},
+ {"key": "200+", "from": 200}
+ ]
+ }
+ ]
+}
+```
+
+**响应**:
+
+```json
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 45,
+ "facets": [
+ {
+ "field": "categoryName_keyword",
+ "label": "商品类目",
+ "type": "terms",
+ "values": [...]
+ },
+ {
+ "field": "price",
+ "label": "价格区间",
+ "type": "range",
+ "values": [
+ {
+ "value": "0-50",
+ "label": "0-50",
+ "count": 23,
+ "selected": false
+ },
+ {
+ "value": "50-100",
+ "label": "50-100",
+ "count": 45,
+ "selected": false
+ },
+ {
+ "value": "100-200",
+ "label": "100-200",
+ "count": 38,
+ "selected": false
+ },
+ {
+ "value": "200+",
+ "label": "200+",
+ "count": 12,
+ "selected": false
+ }
+ ]
+ }
+ ]
+}
+```
+
+### 示例 5:复杂搜索(过滤+分面+排序)
+
+```bash
+POST /search/
+Content-Type: application/json
+
+{
+ "query": "玩具 AND (乐高 OR 芭比)",
+ "size": 20,
+ "from": 0,
+ "filters": {
+ "categoryName_keyword": "玩具"
+ },
+ "range_filters": {
+ "price": {
+ "gte": 50,
+ "lte": 200
+ },
+ "days_since_last_update": {
+ "lte": 30
+ }
+ },
+ "facets": [
+ {"field": "brandName_keyword", "size": 15},
+ {"field": "supplierName_keyword", "size": 10}
+ ],
+ "sort_by": "price",
+ "sort_order": "asc",
+ "debug": false
+}
+```
+
+### 示例 6:搜索建议(框架)
+
+```bash
+GET /search/suggestions?q=芭&size=5
+
+{
+ "query": "芭",
+ "suggestions": [],
+ "took_ms": 2
+}
+```
+
+### 示例 7:即时搜索(框架)
+
+```bash
+GET /search/instant?q=玩具&size=5
+
+{
+ "hits": [...],
+ "total": 118,
+ "max_score": 8.5,
+ "took_ms": 25,
+ "query_info": {...}
+}
+```
+
+### Python 示例代码
+
+```python
+import requests
+
+API_URL = "http://localhost:6002/search/"
+
+# 简单搜索
+response = requests.post(API_URL, json={
+ "query": "芭比娃娃",
+ "size": 20
+})
+print(response.json())
+
+# 带过滤器和分面的搜索
+response = requests.post(API_URL, json={
+ "query": "玩具",
+ "size": 20,
+ "filters": {
+ "categoryName_keyword": ["玩具", "益智玩具"]
+ },
+ "range_filters": {
+ "price": {"gte": 50, "lte": 200}
+ },
+ "facets": [
+ {"field": "brandName_keyword", "size": 15},
+ {"field": "categoryName_keyword", "size": 15}
+ ],
+ "sort_by": "price",
+ "sort_order": "asc"
+})
+result = response.json()
+
+# 处理分面结果
+for facet in result.get('facets', []):
+ print(f"\n{facet['label']}:")
+ for value in facet['values']:
+ print(f" - {value['label']}: {value['count']}")
+```
+
+### JavaScript 示例代码
+
+```javascript
+// 搜索函数
+async function searchProducts(query, filters, rangeFilters, facets) {
+ const response = await fetch('http://localhost:6002/search/', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({
+ query: query,
+ size: 20,
+ filters: filters,
+ range_filters: rangeFilters,
+ facets: facets
+ })
+ });
+
+ const data = await response.json();
+ return data;
+}
+
+// 使用示例
+const result = await searchProducts(
+ "玩具",
+ { categoryName_keyword: ["玩具"] },
+ { price: { gte: 50, lte: 200 } },
+ [
+ { field: "brandName_keyword", size: 15 },
+ { field: "categoryName_keyword", size: 15 }
+ ]
+);
+
+// 显示分面结果
+result.facets.forEach(facet => {
+ console.log(`${facet.label}:`);
+ facet.values.forEach(value => {
+ console.log(` - ${value.label}: ${value.count}`);
+ });
+});
+```
+
+## 第五部分:验收标准
+
+### 功能验收
+
+- [ ] 所有硬编码逻辑已完全移除
+- [ ] 新的过滤器参数正常工作
+- [ ] 分面搜索返回标准化格式
+- [ ] API 文档完整准确
+- [ ] 前端功能正常工作
+- [ ] 搜索建议端点已添加(框架)
+
+### 代码质量验收
+
+- [ ] 代码中不再有 `price_ranges` 字符串
+- [ ] 代码中不再有 `add_dynamic_aggregations` 方法
+- [ ] 代码中不再有 `aggregations` 参数(除了文档说明)
+- [ ] 所有方法签名已更新
+- [ ] 所有文档字符串已更新
+
+### 性能验收
+
+- [ ] 搜索响应时间无明显增加
+- [ ] 分面查询性能正常
+- [ ] 过滤器性能正常
+
+## 总结
+
+本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括:
+
+1. ✅ **完全移除**硬编码的 price_ranges 逻辑
+2. ✅ 实现结构化的过滤参数(filters + range_filters)
+3. ✅ **完全移除** aggregations 参数,使用简化的 facets 接口
+4. ✅ 标准化分面搜索响应格式
+5. ✅ 添加搜索建议功能框架(暂不实现)
+6. ✅ **不保留向后兼容代码**,完全重构
+
+通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,代码更加精简和统一。
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
index 58fe2e3..40fb693 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -96,7 +96,6 @@ The `searcher` supports:
- Extracts product data from MySQL
- Maps images from filebank database
- Creates inverted index (URL → SKU list)
- - Configurable year range via `--years` parameter
## Key Implementation Notes
diff --git a/query/query_rewriter.py b/query/query_rewriter.py
index d5ef9ad..b460770 100644
--- a/query/query_rewriter.py
+++ b/query/query_rewriter.py
@@ -7,21 +7,26 @@ import re
class QueryRewriter:
- """Rewrites queries based on configured dictionary rules."""
+ """Rewrites queries based on exact word matching with configured dictionary rules.
+
+ Only performs full word matching - no partial matching or substring replacement.
+ The entire query must exactly match a key in the rewrite dictionary to be rewritten.
+ """
def __init__(self, rewrite_dict: Dict[str, str] = None):
"""
- Initialize query rewriter.
+ Initialize query rewriter for exact word matching only.
Args:
- rewrite_dict: Dictionary mapping query patterns to rewrite expressions
+ rewrite_dict: Dictionary mapping exact query terms to rewrite expressions
e.g., {"芭比": "brand:芭比 OR name:芭比娃娃"}
+ Only full word matches will be rewritten, no partial matching.
"""
self.rewrite_dict = rewrite_dict or {}
def rewrite(self, query: str) -> str:
"""
- Rewrite query based on dictionary rules.
+ Rewrite query based on dictionary rules using exact word matching only.
Args:
query: Original query string
@@ -32,21 +37,13 @@ class QueryRewriter:
if not query or not query.strip():
return query
- # Check for exact matches first
+ # Only check for exact matches - no partial matching
if query in self.rewrite_dict:
rewritten = self.rewrite_dict[query]
print(f"[QueryRewriter] Exact match: '{query}' -> '{rewritten}'")
return rewritten
- # Check for partial matches (query contains a rewrite key)
- for pattern, replacement in self.rewrite_dict.items():
- if pattern in query:
- # Replace the pattern
- rewritten = query.replace(pattern, replacement)
- print(f"[QueryRewriter] Partial match: '{query}' -> '{rewritten}'")
- return rewritten
-
- # No rewrite needed
+ # No rewrite needed - no partial matching
return query
def add_rule(self, pattern: str, replacement: str) -> None:
diff --git a/simple_server.py b/simple_server.py
deleted file mode 100644
index 25fda66..0000000
--- a/simple_server.py
+++ /dev/null
@@ -1,340 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple API server for testing aggregation functionality without external dependencies.
-"""
-
-import json
-import time
-import random
-from http.server import HTTPServer, BaseHTTPRequestHandler
-from urllib.parse import urlparse, parse_qs
-import threading
-
-class SearchAPIHandler(BaseHTTPRequestHandler):
- """Simple API handler for search requests."""
-
- def do_OPTIONS(self):
- """Handle CORS preflight requests."""
- self.send_response(200)
- self.send_header('Access-Control-Allow-Origin', '*')
- self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
- self.send_header('Access-Control-Allow-Headers', 'Content-Type')
- self.end_headers()
-
- def do_POST(self):
- """Handle POST requests."""
- if self.path == '/':
- self.handle_search()
- elif self.path == '/search/':
- self.handle_search()
- else:
- self.send_response(404)
- self.end_headers()
-
- def handle_search(self):
- """Handle search requests with aggregations."""
- try:
- # Read request body
- content_length = int(self.headers['Content-Length'])
- post_data = self.rfile.read(content_length)
- request_data = json.loads(post_data.decode('utf-8'))
-
- query = request_data.get('query', '')
- size = request_data.get('size', 10)
- sort_by = request_data.get('sort_by', 'relevance')
- aggregations = request_data.get('aggregations', {})
- filters = request_data.get('filters', {})
-
- print(f"Search request: query='{query}', size={size}, sort_by={sort_by}")
- print(f"Aggregations: {list(aggregations.keys()) if aggregations else 'None'}")
- print(f"Filters: {filters if filters else 'None'}")
-
- # Simulate processing time
- time.sleep(0.1)
-
- # Generate mock search results
- results = self.generate_mock_results(query, size, sort_by, filters)
-
- # Generate mock aggregations
- aggregation_results = self.generate_mock_aggregations(aggregations, filters)
-
- # Build response
- response = {
- "hits": results,
- "total": len(results) + random.randint(10, 100),
- "max_score": round(random.uniform(1.5, 3.5), 3),
- "took_ms": random.randint(15, 45),
- "aggregations": aggregation_results,
- "query_info": {
- "original_query": query,
- "rewritten_query": query,
- "detected_language": "zh" if any('\u4e00' <= char <= '\u9fff' for char in query) else "en",
- "domain": "default",
- "translations": {},
- "has_vector": False
- }
- }
-
- # Send response
- self.send_response(200)
- self.send_header('Content-Type', 'application/json')
- self.send_header('Access-Control-Allow-Origin', '*')
- self.end_headers()
-
- response_json = json.dumps(response, ensure_ascii=False, indent=2)
- self.wfile.write(response_json.encode('utf-8'))
-
- print(f"Response sent with {len(results)} results and {len(aggregation_results)} aggregations")
-
- except Exception as e:
- print(f"Error handling request: {e}")
- self.send_response(500)
- self.send_header('Content-Type', 'application/json')
- self.send_header('Access-Control-Allow-Origin', '*')
- self.end_headers()
-
- error_response = {
- "error": str(e),
- "detail": "Internal server error"
- }
-
- self.wfile.write(json.dumps(error_response).encode('utf-8'))
-
- def generate_mock_results(self, query, size, sort_by, filters):
- """Generate mock search results."""
-
- # Sample product data
- sample_products = [
- {
- "skuId": 1001,
- "name": "芭比娃娃梦幻套装",
- "enSpuName": "Barbie Dream House Playset",
- "ruSkuName": "Кукла Барби Мечтательный домик",
- "categoryName": "芭比",
- "brandName": "美泰",
- "supplierName": "义乌玩具厂",
- "price": 89.99,
- "imageUrl": "https://picsum.photos/seed/barbie1/200/200.jpg",
- "create_time": "2024-01-15T10:30:00Z",
- "days_since_last_update": 45
- },
- {
- "skuId": 1002,
- "name": "芭比娃娃时尚系列",
- "enSpuName": "Barbie Fashion Doll Collection",
- "ruSkuName": "Кукла Барби Модная коллекция",
- "categoryName": "芭比",
- "brandName": "美泰",
- "supplierName": "汕头玩具公司",
- "price": 45.50,
- "imageUrl": "https://picsum.photos/seed/barbie2/200/200.jpg",
- "create_time": "2024-02-20T14:15:00Z",
- "days_since_last_update": 30
- },
- {
- "skuId": 1003,
- "name": "儿童积木套装",
- "enSpuName": "Kids Building Blocks Set",
- "ruSkuName": "Детский строительный набор",
- "categoryName": "积木",
- "brandName": "乐高",
- "supplierName": "深圳塑胶制品厂",
- "price": 158.00,
- "imageUrl": "https://picsum.photos/seed/blocks1/200/200.jpg",
- "create_time": "2024-01-10T09:20:00Z",
- "days_since_last_update": 60
- },
- {
- "skuId": 1004,
- "name": "消防车玩具模型",
- "enSpuName": "Fire Truck Toy Model",
- "ruSkuName": "Модель пожарной машины",
- "categoryName": "小汽车",
- "brandName": "多美卡",
- "supplierName": "东莞玩具制造厂",
- "price": 78.50,
- "imageUrl": "https://picsum.photos/seed/firetruck1/200/200.jpg",
- "create_time": "2024-03-05T16:45:00Z",
- "days_since_last_update": 15
- },
- {
- "skuId": 1005,
- "name": "婴儿毛绒玩具",
- "enSpuName": "Baby Plush Toy",
- "ruSkuName": "Детская плюшевая игрушка",
- "categoryName": "婴儿娃娃",
- "brandName": "迪士尼",
- "supplierName": "上海礼品公司",
- "price": 32.00,
- "imageUrl": "https://picsum.photos/seed/plush1/200/200.jpg",
- "create_time": "2024-02-14T11:30:00Z",
- "days_since_last_update": 25
- }
- ]
-
- # Apply filters if any
- if filters:
- filtered_products = []
- for product in sample_products:
- include = True
-
- # Check category filter
- if 'category_name' in filters:
- if product['categoryName'] not in filters['category_name']:
- include = False
-
- # Check brand filter
- if 'brand_name' in filters:
- if product['brandName'] not in filters['brand_name']:
- include = False
-
- # Check price range filter
- if 'price_ranges' in filters:
- price = product['price']
- in_range = False
- for price_range in filters['price_ranges']:
- if price_range == '0-50' and price <= 50:
- in_range = True
- elif price_range == '50-100' and 50 < price <= 100:
- in_range = True
- elif price_range == '100-200' and 100 < price <= 200:
- in_range = True
- elif price_range == '200+' and price > 200:
- in_range = True
- if not in_range:
- include = False
-
- if include:
- filtered_products.append(product)
- sample_products = filtered_products
-
- # Apply sorting
- if sort_by == 'price_asc':
- sample_products.sort(key=lambda x: x.get('price', 0))
- elif sort_by == 'price_desc':
- sample_products.sort(key=lambda x: x.get('price', 0), reverse=True)
- elif sort_by == 'time_desc':
- sample_products.sort(key=lambda x: x.get('create_time', ''), reverse=True)
-
- # Convert to API response format
- results = []
- for i, product in enumerate(sample_products[:size]):
- hit = {
- "_id": str(product['skuId']),
- "_score": round(random.uniform(1.5, 3.5), 3),
- "_source": product
- }
- results.append(hit)
-
- return results
-
- def generate_mock_aggregations(self, aggregations, filters):
- """Generate mock aggregation results."""
- if not aggregations:
- return {}
-
- result = {}
-
- for agg_name, agg_spec in aggregations.items():
- agg_type = agg_spec.get('type', 'terms')
-
- if agg_type == 'terms':
- # Generate mock terms aggregation
- if agg_name == 'category_name':
- buckets = [
- {"key": "芭比", "doc_count": random.randint(15, 35)},
- {"key": "儿童娃娃", "doc_count": random.randint(8, 20)},
- {"key": "积木", "doc_count": random.randint(5, 15)},
- {"key": "小汽车", "doc_count": random.randint(3, 12)},
- {"key": "婴儿娃娃", "doc_count": random.randint(4, 10)},
- {"key": "人物", "doc_count": random.randint(6, 18)}
- ]
- elif agg_name == 'brand_name':
- buckets = [
- {"key": "美泰", "doc_count": random.randint(20, 40)},
- {"key": "乐高", "doc_count": random.randint(10, 25)},
- {"key": "迪士尼", "doc_count": random.randint(8, 20)},
- {"key": "多美卡", "doc_count": random.randint(5, 15)},
- {"key": "孩之宝", "doc_count": random.randint(6, 18)},
- {"key": "万代", "doc_count": random.randint(3, 10)}
- ]
- elif agg_name == 'material_type':
- buckets = [
- {"key": "塑料", "doc_count": random.randint(40, 80)},
- {"key": "布绒", "doc_count": random.randint(8, 20)},
- {"key": "金属", "doc_count": random.randint(5, 15)},
- {"key": "木质", "doc_count": random.randint(3, 12)}
- ]
- else:
- # Generic terms aggregation
- buckets = [
- {"key": f"选项{i+1}", "doc_count": random.randint(5, 25)}
- for i in range(5)
- ]
-
- result[agg_name] = {
- "doc_count_error_upper_bound": 0,
- "sum_other_doc_count": random.randint(10, 50),
- "buckets": buckets
- }
-
- elif agg_type == 'range':
- # Generate mock range aggregation (usually for price)
- if agg_name == 'price_ranges':
- ranges = agg_spec.get('ranges', [])
- buckets = []
- for range_spec in ranges:
- key = range_spec.get('key', 'unknown')
- count = random.randint(5, 30)
- bucket_data = {"key": key, "doc_count": count}
-
- # Add range bounds
- if 'to' in range_spec:
- bucket_data['to'] = range_spec['to']
- if 'from' in range_spec:
- bucket_data['from'] = range_spec['from']
-
- buckets.append(bucket_data)
-
- result[agg_name] = {"buckets": buckets}
-
- return result
-
- def log_message(self, format, *args):
- """Override to reduce log noise."""
- pass
-
-def run_server():
- """Run the API server."""
- server_address = ('', 6002)
- httpd = HTTPServer(server_address, SearchAPIHandler)
- print("🚀 Simple Search API Server started!")
- print("📍 API: http://localhost:6002")
- print("🔍 Search endpoint: http://localhost:6002/search/")
- print("🌐 Frontend should connect to: http://localhost:6002")
- print("⏹️ Press Ctrl+C to stop")
-
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\n🛑 Server stopped")
- httpd.server_close()
-
-def run_server():
- """Run the API server - main entry point."""
- server_address = ('', 6002)
- httpd = HTTPServer(server_address, SearchAPIHandler)
- print("🚀 Simple Search API Server started!")
- print("📍 API: http://localhost:6002")
- print("🔍 Search endpoint: http://localhost:6002/search/")
- print("🌐 Frontend should connect to: http://localhost:6002")
- print("⏹️ Press Ctrl+C to stop")
-
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\n🛑 Server stopped")
- httpd.server_close()
-
-if __name__ == '__main__':
- run_server()
\ No newline at end of file
diff --git a/test_aggregation_api.py b/test_aggregation_api.py
deleted file mode 100644
index 1974042..0000000
--- a/test_aggregation_api.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for aggregation functionality
-"""
-
-import requests
-import json
-
-API_BASE_URL = 'http://120.76.41.98:6002'
-
-def test_search_with_aggregations():
- """Test search with aggregations"""
-
- # Test data
- test_query = {
- "query": "玩具",
- "size": 5,
- "aggregations": {
- "category_stats": {
- "terms": {
- "field": "categoryName_keyword",
- "size": 10
- }
- },
- "brand_stats": {
- "terms": {
- "field": "brandName_keyword",
- "size": 10
- }
- },
- "price_ranges": {
- "range": {
- "field": "price",
- "ranges": [
- {"key": "0-50", "to": 50},
- {"key": "50-100", "from": 50, "to": 100},
- {"key": "100-200", "from": 100, "to": 200},
- {"key": "200+", "from": 200}
- ]
- }
- }
- }
- }
-
- print("Testing search with aggregations...")
- print(f"Query: {json.dumps(test_query, indent=2, ensure_ascii=False)}")
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/",
- json=test_query,
- headers={'Content-Type': 'application/json'})
-
- print(f"Status Code: {response.status_code}")
-
- if response.ok:
- data = response.json()
- print(f"Found {data['total']} results in {data['took_ms']}ms")
- print(f"Max Score: {data['max_score']}")
-
- # Print aggregations
- if data.get('aggregations'):
- print("\nAggregations:")
- for agg_name, agg_result in data['aggregations'].items():
- print(f"\n{agg_name}:")
- if 'buckets' in agg_result:
- for bucket in agg_result['buckets'][:5]: # Show first 5 buckets
- print(f" - {bucket['key']}: {bucket['doc_count']}")
-
- # Print first few results
- print(f"\nFirst 3 results:")
- for i, hit in enumerate(data['hits'][:3]):
- source = hit['_source']
- print(f"\n{i+1}. {source.get('name', 'N/A')}")
- print(f" Category: {source.get('categoryName', 'N/A')}")
- print(f" Brand: {source.get('brandName', 'N/A')}")
- print(f" Price: {source.get('price', 'N/A')}")
- print(f" Score: {hit['_score']:.4f}")
- else:
- print(f"Error: {response.status_code}")
- print(f"Response: {response.text}")
-
- except Exception as e:
- print(f"Request failed: {e}")
-
-def test_search_with_filters():
- """Test search with filters"""
-
- test_filters = {
- "query": "玩具",
- "size": 5,
- "filters": {
- "categoryName_keyword": ["玩具"],
- "price_ranges": ["0-50", "50-100"]
- }
- }
-
- print("\n\nTesting search with filters...")
- print(f"Query: {json.dumps(test_filters, indent=2, ensure_ascii=False)}")
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/",
- json=test_filters,
- headers={'Content-Type': 'application/json'})
-
- print(f"Status Code: {response.status_code}")
-
- if response.ok:
- data = response.json()
- print(f"Found {data['total']} results in {data['took_ms']}ms")
-
- print(f"\nFirst 3 results:")
- for i, hit in enumerate(data['hits'][:3]):
- source = hit['_source']
- print(f"\n{i+1}. {source.get('name', 'N/A')}")
- print(f" Category: {source.get('categoryName', 'N/A')}")
- print(f" Brand: {source.get('brandName', 'N/A')}")
- print(f" Price: {source.get('price', 'N/A')}")
- print(f" Score: {hit['_score']:.4f}")
- else:
- print(f"Error: {response.status_code}")
- print(f"Response: {response.text}")
-
- except Exception as e:
- print(f"Request failed: {e}")
-
-def test_search_with_sorting():
- """Test search with sorting"""
-
- test_sort = {
- "query": "玩具",
- "size": 5,
- "sort_by": "price",
- "sort_order": "asc"
- }
-
- print("\n\nTesting search with sorting (price ascending)...")
- print(f"Query: {json.dumps(test_sort, indent=2, ensure_ascii=False)}")
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/",
- json=test_sort,
- headers={'Content-Type': 'application/json'})
-
- print(f"Status Code: {response.status_code}")
-
- if response.ok:
- data = response.json()
- print(f"Found {data['total']} results in {data['took_ms']}ms")
-
- print(f"\nFirst 3 results (sorted by price):")
- for i, hit in enumerate(data['hits'][:3]):
- source = hit['_source']
- print(f"\n{i+1}. {source.get('name', 'N/A')}")
- print(f" Price: {source.get('price', 'N/A')}")
- print(f" Score: {hit['_score']:.4f}")
- else:
- print(f"Error: {response.status_code}")
- print(f"Response: {response.text}")
-
- except Exception as e:
- print(f"Request failed: {e}")
-
-if __name__ == "__main__":
- test_search_with_aggregations()
- test_search_with_filters()
- test_search_with_sorting()
\ No newline at end of file
diff --git a/test_aggregation_functionality.py b/test_aggregation_functionality.py
deleted file mode 100644
index 5adb5fb..0000000
--- a/test_aggregation_functionality.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple test script to verify aggregation functionality without external dependencies.
-"""
-
-import sys
-import os
-import inspect
-
-# Add the project root to the Python path
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-def test_es_query_builder_aggregations():
- """Test the ES query builder aggregation methods."""
- print("Testing ES Query Builder Aggregation Methods...")
-
- # Import the query builder
- try:
- from search.es_query_builder import ESQueryBuilder
- print("✓ ESQueryBuilder imported successfully")
- except ImportError as e:
- print(f"✗ Failed to import ESQueryBuilder: {e}")
- return False
-
- # Create a query builder instance
- builder = ESQueryBuilder(
- index_name="test_index",
- match_fields=["name", "description"]
- )
-
- # Test basic aggregation
- es_query = {"query": {"match_all": {}}}
-
- # Test add_dynamic_aggregations
- aggregations = {
- "category_name": {
- "type": "terms",
- "field": "categoryName_keyword",
- "size": 10
- },
- "price_ranges": {
- "type": "range",
- "field": "price",
- "ranges": [
- {"key": "0-50", "to": 50},
- {"key": "50-100", "from": 50, "to": 100}
- ]
- }
- }
-
- result_query = builder.add_dynamic_aggregations(es_query, aggregations)
-
- if "aggs" in result_query:
- print("✓ Aggregations added to query")
-
- # Check category aggregation
- if "category_name" in result_query["aggs"]:
- category_agg = result_query["aggs"]["category_name"]
- if "terms" in category_agg and category_agg["terms"]["field"] == "categoryName_keyword":
- print("✓ Category aggregation correctly configured")
- else:
- print("✗ Category aggregation incorrectly configured")
- return False
-
- # Check price range aggregation
- if "price_ranges" in result_query["aggs"]:
- price_agg = result_query["aggs"]["price_ranges"]
- if "range" in price_agg and price_agg["range"]["field"] == "price":
- print("✓ Price range aggregation correctly configured")
- else:
- print("✗ Price range aggregation incorrectly configured")
- return False
- else:
- print("✗ No aggregations added to query")
- return False
-
- # Test sorting
- result_query_asc = builder.add_sorting({}, "price_asc")
- if "sort" in result_query_asc:
- print("✓ Price ascending sort added")
- else:
- print("✗ Price ascending sort not added")
- return False
-
- result_query_desc = builder.add_sorting({}, "price_desc")
- if "sort" in result_query_desc:
- print("✓ Price descending sort added")
- else:
- print("✗ Price descending sort not added")
- return False
-
- result_query_time = builder.add_sorting({}, "time_desc")
- if "sort" in result_query_time:
- print("✓ Time descending sort added")
- else:
- print("✗ Time descending sort not added")
- return False
-
- return True
-
-
-def test_searcher_integration():
- """Test searcher integration with new parameters."""
- print("\nTesting Searcher Integration...")
-
- try:
- from search.searcher import Searcher
- print("✓ Searcher imported successfully")
- except ImportError as e:
- print(f"✗ Failed to import Searcher: {e}")
- return False
-
- # We can't easily test the full searcher without ES, but we can check the method signature
- search_method = getattr(Searcher, 'search', None)
-
- if search_method:
- sig = inspect.signature(search_method)
- params = list(sig.parameters.keys())
-
- expected_params = ['query', 'size', 'from_', 'filters', 'min_score', 'aggregations', 'sort_by', 'context']
- for param in expected_params:
- if param in params:
- print(f"✓ Parameter '{param}' found in search method")
- else:
- print(f"✗ Parameter '{param}' missing from search method")
- return False
- else:
- print("✗ Search method not found in Searcher class")
- return False
-
- return True
-
-
-def test_api_route_integration():
- """Test API route integration."""
- print("\nTesting API Route Integration...")
-
- try:
- from api.routes.search import router
- print("✓ Search router imported successfully")
- except ImportError as e:
- print(f"✗ Failed to import search router: {e}")
- return False
-
- # Check if the route exists
- routes = [route.path for route in router.routes]
- if "/" in routes:
- print("✓ Main search route found")
- else:
- print("✗ Main search route not found")
- return False
-
- return True
-
-
-def test_configuration():
- """Test configuration parsing."""
- print("\nTesting Configuration...")
-
- try:
- from config import CustomerConfig
- print("✓ CustomerConfig imported successfully")
- except ImportError as e:
- print(f"✗ Failed to import CustomerConfig: {e}")
- return False
-
- # Try to load the customer1 config
- try:
- config = CustomerConfig.load_from_file("config/schema/customer1_config.yaml")
- print("✓ Customer1 configuration loaded successfully")
-
- # Check if price field is in the configuration
- field_names = [field.name for field in config.fields]
- if "price" in field_names:
- print("✓ Price field found in configuration")
- else:
- print("✗ Price field not found in configuration")
- return False
-
- # Check keyword fields for aggregations
- if "categoryName_keyword" in field_names:
- print("✓ Category keyword field found")
- else:
- print("✗ Category keyword field not found")
- return False
-
- if "brandName_keyword" in field_names:
- print("✓ Brand keyword field found")
- else:
- print("✗ Brand keyword field not found")
- return False
-
- except Exception as e:
- print(f"✗ Failed to load configuration: {e}")
- return False
-
- return True
-
-
-def main():
- """Run all tests."""
- print("=== Search Engine Aggregation Functionality Tests ===\n")
-
- tests = [
- test_es_query_builder_aggregations,
- test_searcher_integration,
- test_api_route_integration,
- test_configuration
- ]
-
- passed = 0
- total = len(tests)
-
- for test in tests:
- try:
- if test():
- passed += 1
- print(f"✓ {test.__name__} PASSED")
- else:
- print(f"✗ {test.__name__} FAILED")
- except Exception as e:
- print(f"✗ {test.__name__} ERROR: {e}")
-
- print(f"\n=== Test Results: {passed}/{total} tests passed ===")
-
- if passed == total:
- print("🎉 All tests passed! Aggregation functionality is ready.")
- return True
- else:
- print("❌ Some tests failed. Please check the implementation.")
- return False
-
-
-if __name__ == "__main__":
- success = main()
- sys.exit(0 if success else 1)
\ No newline at end of file
diff --git a/test_complete_search.py b/test_complete_search.py
deleted file mode 100644
index ded765f..0000000
--- a/test_complete_search.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python3
-"""
-Complete test script simulating frontend search interaction
-"""
-
-import requests
-import json
-
-API_BASE_URL = 'http://120.76.41.98:6002'
-
-def test_complete_search_workflow():
- """Test complete search workflow similar to frontend"""
-
- print("=" * 60)
- print("完整搜索流程测试")
- print("=" * 60)
-
- # Step 1: Initial search with aggregations
- print("\n1️⃣ 初始搜索(带聚合功能)")
- print("-" * 30)
-
- search_request = {
- "query": "芭比娃娃",
- "size": 10,
- "aggregations": {
- "category_stats": {
- "terms": {
- "field": "categoryName_keyword",
- "size": 10
- }
- },
- "brand_stats": {
- "terms": {
- "field": "brandName_keyword",
- "size": 10
- }
- },
- "price_ranges": {
- "range": {
- "field": "price",
- "ranges": [
- {"key": "0-50", "to": 50},
- {"key": "50-100", "from": 50, "to": 100},
- {"key": "100-200", "from": 100, "to": 200},
- {"key": "200+", "from": 200}
- ]
- }
- }
- }
- }
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/", json=search_request)
-
- if response.ok:
- data = response.json()
- print(f"✅ 找到 {data['total']} 个结果,耗时 {data['took_ms']}ms")
-
- # Show aggregations results
- if data.get('aggregations'):
- print("\n📊 聚合结果:")
-
- # Category aggregations
- if 'category_stats' in data['aggregations']:
- print(" 🏷️ 分类统计:")
- for bucket in data['aggregations']['category_stats']['buckets'][:3]:
- print(f" - {bucket['key']}: {bucket['doc_count']} 个商品")
-
- # Brand aggregations
- if 'brand_stats' in data['aggregations']:
- print(" 🏢 品牌统计:")
- for bucket in data['aggregations']['brand_stats']['buckets'][:3]:
- print(f" - {bucket['key']}: {bucket['doc_count']} 个商品")
-
- # Price ranges
- if 'price_ranges' in data['aggregations']:
- print(" 💰 价格分布:")
- for bucket in data['aggregations']['price_ranges']['buckets']:
- print(f" - {bucket['key']}: {bucket['doc_count']} 个商品")
-
- # Show sample results
- print(f"\n🔍 前3个搜索结果:")
- for i, hit in enumerate(data['hits'][:3]):
- source = hit['_source']
- price = source.get('price', 'N/A')
- category = source.get('categoryName', 'N/A')
- brand = source.get('brandName', 'N/A')
- print(f" {i+1}. {source.get('name', 'N/A')}")
- print(f" 💰 价格: {price}")
- print(f" 📁 分类: {category}")
- print(f" 🏷️ 品牌: {brand}")
- print(f" ⭐ 评分: {hit['_score']:.3f}")
- print()
-
- else:
- print(f"❌ 搜索失败: {response.status_code}")
- print(f"错误信息: {response.text}")
-
- except Exception as e:
- print(f"❌ 请求异常: {e}")
-
- # Step 2: Search with filters
- print("\n2️⃣ 带过滤条件的搜索")
- print("-" * 30)
-
- filtered_search = {
- "query": "芭比娃娃",
- "size": 5,
- "filters": {
- "brandName_keyword": ["美泰"],
- "price_ranges": ["50-100", "100-200"]
- }
- }
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/", json=filtered_search)
-
- if response.ok:
- data = response.json()
- print(f"✅ 过滤后找到 {data['total']} 个结果,耗时 {data['took_ms']}ms")
- print(" 🎯 过滤条件: 品牌=美泰, 价格=¥50-200")
-
- print(f"\n💫 前3个过滤结果:")
- for i, hit in enumerate(data['hits'][:3]):
- source = hit['_source']
- price = source.get('price', 'N/A')
- category = source.get('categoryName', 'N/A')
- brand = source.get('brandName', 'N/A')
- print(f" {i+1}. {source.get('name', 'N/A')}")
- print(f" 💰 ¥{price} | 📁 {category} | 🏷️ {brand}")
- print(f" ⭐ 评分: {hit['_score']:.3f}")
-
- else:
- print(f"❌ 过滤搜索失败: {response.status_code}")
-
- except Exception as e:
- print(f"❌ 请求异常: {e}")
-
- # Step 3: Search with sorting
- print("\n3️⃣ 排序搜索")
- print("-" * 30)
-
- # Test price ascending
- price_asc_search = {
- "query": "芭比娃娃",
- "size": 3,
- "sort_by": "price",
- "sort_order": "asc"
- }
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/", json=price_asc_search)
-
- if response.ok:
- data = response.json()
- print(f"✅ 价格升序排序,找到 {data['total']} 个结果")
- print(" 📈 排序方式: 价格从低到高")
-
- print(f"\n💵 价格排序结果:")
- for i, hit in enumerate(data['hits']):
- source = hit['_source']
- price = source.get('price', 'N/A')
- name = source.get('name', 'N/A')
- print(f" {i+1}. ¥{price} - {name}")
-
- else:
- print(f"❌ 排序搜索失败: {response.status_code}")
-
- except Exception as e:
- print(f"❌ 请求异常: {e}")
-
- # Step 4: Test time sorting
- print("\n4️⃣ 时间排序测试")
- print("-" * 30)
-
- time_sort_search = {
- "query": "芭比娃娃",
- "size": 3,
- "sort_by": "create_time",
- "sort_order": "desc"
- }
-
- try:
- response = requests.post(f"{API_BASE_URL}/search/", json=time_sort_search)
-
- if response.ok:
- data = response.json()
- print(f"✅ 时间降序排序,找到 {data['total']} 个结果")
- print(" 📅 排序方式: 上架时间从新到旧")
-
- print(f"\n🕐 时间排序结果:")
- for i, hit in enumerate(data['hits']):
- source = hit['_source']
- create_time = source.get('create_time', 'N/A')
- name = source.get('name', 'N/A')
- print(f" {i+1}. {create_time} - {name}")
-
- else:
- print(f"❌ 时间排序失败: {response.status_code}")
-
- except Exception as e:
- print(f"❌ 请求异常: {e}")
-
- print("\n" + "=" * 60)
- print("🎉 搜索功能测试完成!")
- print("✨ 前端访问地址: http://localhost:8080")
- print("🔧 后端API地址: http://120.76.41.98:6002")
- print("=" * 60)
-
-if __name__ == "__main__":
- test_complete_search_workflow()
\ No newline at end of file
diff --git a/test_minimal_sort.py b/test_minimal_sort.py
deleted file mode 100644
index 946058c..0000000
--- a/test_minimal_sort.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-"""
-Minimal test to isolate sort issue
-"""
-
-import requests
-import json
-
-def test_minimal_sort():
- """Test minimal sort case"""
-
- base_url = "http://120.76.41.98:6002"
-
- # Test 1: No sort parameters
- print("Test 1: No sort parameters")
- response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1})
- print(f"Status: {response.status_code}")
- print(f"Response: {response.text[:200]}...")
-
- # Test 2: Empty sort_by
- print("\nTest 2: Empty sort_by")
- response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": ""})
- print(f"Status: {response.status_code}")
- print(f"Response: {response.text[:200]}...")
-
- # Test 3: sort_by only (no sort_order)
- print("\nTest 3: sort_by only")
- response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": "create_time"})
- print(f"Status: {response.status_code}")
- print(f"Response: {response.text[:200]}...")
-
- # Test 4: sort_order only (no sort_by)
- print("\nTest 4: sort_order only")
- response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_order": "desc"})
- print(f"Status: {response.status_code}")
- print(f"Response: {response.text[:200]}...")
-
- # Test 5: Both parameters with None values
- print("\nTest 5: Both parameters with null values")
- response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": None, "sort_order": None})
- print(f"Status: {response.status_code}")
- print(f"Response: {response.text[:200]}...")
-
-if __name__ == "__main__":
- test_minimal_sort()
\ No newline at end of file
diff --git a/支持多语言查询.md b/支持多语言查询.md
index fe93dea..a40d477 100644
--- a/支持多语言查询.md
+++ b/支持多语言查询.md
@@ -18,6 +18,7 @@ index 397a9f7..3e728c9 100644
暂时具体实现为 bm25()+0.2*text_embedding_relevence(也就是knn检索表达式的打分)
+bm25() 包括多语言的打分:内部需要通过配置翻译为多种语言(配置几种目标语言 默认中文、英文,并且设置对应的检索域),然后分别到对应的字段搜索,中文字段到配置的中文title搜索,英文到对应的英文title搜索。
+```text
bm25打分(base_query):
"multi_match": {
"query": search_query,
@@ -186,11 +187,5 @@ index a7088ec..0a798ed 100644
def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
"""
Get single document by ID.
-
-
-
-
-
-
-
+```
--
libgit2 0.21.2