diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.0.md b/.cursor/plans/api-interface-analysis-42918612.plan.0.md new file mode 100644 index 0000000..688cc5d --- /dev/null +++ b/.cursor/plans/api-interface-analysis-42918612.plan.0.md @@ -0,0 +1,1222 @@ + +# 搜索引擎 API 接口重构实施计划 + +## 第一部分:现状分析 + +### 1. 当前实现存在的问题 + +#### 问题 1:硬编码的价格范围过滤 + +**位置**:`search/es_query_builder.py` 第 205-233 行 + +**问题描述**: + +```python +if field == 'price_ranges': + # 硬编码特定字符串值 + if price_range == '0-50': + price_ranges.append({"lt": 50}) + elif price_range == '50-100': + price_ranges.append({"gte": 50, "lt": 100}) + # ... +``` + +**影响**: + +- 只支持 `price` 字段,无法扩展到其他数值字段 +- 范围值硬编码,无法根据业务需求调整 +- 不符合 SaaS 系统的通用性要求 + +#### 问题 2:聚合参数直接暴露 ES DSL + +**位置**: + +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL + +**问题描述**: + +前端需要了解 Elasticsearch 的聚合语法: + +```javascript +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + }, + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + {"key": "0-50", "to": 50}, + // ... + ] + } + } +}; +``` + +**影响**: + +- 前端需要了解 ES 语法,增加集成难度 +- 不符合 SaaS 产品易用性原则 +- 难以进行参数验证和文档生成 + +#### 问题 3:分面搜索结果格式不统一 + +**位置**:`frontend/static/js/app.js` 第 208-258 行 + +**问题描述**: + +- 直接返回 ES 原始格式(`buckets` 结构) +- 前端需要知道不同聚合类型的响应结构 +- 没有统一的分面结果模型 + +**影响**: + +- 前端解析逻辑复杂 +- 不同类型的聚合处理方式不一致 +- 难以扩展新的聚合类型 + +#### 问题 4:缺少搜索建议功能 + +**当前状态**:完全没有实现 + +**需求**: + +- 自动补全(Autocomplete) +- 搜索建议(Suggestions) +- 搜索即时反馈(Instant Search) + +### 2. 依赖关系分析 + +**影响范围**: + +1. **后端模型层**:`api/models.py` +2. **查询构建层**:`search/es_query_builder.py` +3. **搜索执行层**:`search/searcher.py` +4. **API 路由层**:`api/routes/search.py` +5. **前端代码**:`frontend/static/js/app.js` +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` + +## 第二部分:优化方案设计 + +### 方案概述 + +采用**结构化过滤参数方案(方案 A 的简化版)**: + +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) +- **不支持单字段多个不连续范围**,简化设计 +- 标准化聚合参数,使用简化的接口 +- 统一分面搜索响应格式 + +### 1. 新的请求模型设计 + +#### 1.1 核心模型定义 + +**文件**:`api/models.py` + +```python +from pydantic import BaseModel, Field, field_validator +from typing import List, Dict, Any, Optional, Union, Literal + + +class RangeFilter(BaseModel): + """数值范围过滤器""" + gte: Optional[float] = Field(None, description="大于等于 (>=)") + gt: Optional[float] = Field(None, description="大于 (>)") + lte: Optional[float] = Field(None, description="小于等于 (<=)") + lt: Optional[float] = Field(None, description="小于 (<)") + + @field_validator('*') + def check_at_least_one(cls, v, info): + """确保至少指定一个边界""" + values = info.data + if not any([values.get('gte'), values.get('gt'), + values.get('lte'), values.get('lt')]): + raise ValueError('至少需要指定一个范围边界') + return v + + class Config: + json_schema_extra = { + "examples": [ + {"gte": 50, "lte": 200}, + {"gt": 100}, + {"lt": 50} + ] + } + + +class FacetConfig(BaseModel): + """分面配置(简化版)""" + field: str = Field(..., description="分面字段名") + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") + type: Literal["terms", "range"] = Field("terms", description="分面类型") + ranges: Optional[List[Dict[str, Any]]] = Field( + None, + description="范围分面的范围定义(仅当 type='range' 时需要)" + ) + + class Config: + json_schema_extra = { + "examples": [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "size": 4, + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] + } + + +class SearchRequest(BaseModel): + """搜索请求模型(重构版)""" + + # 基础搜索参数 + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") + + # 过滤器 - 精确匹配和多值匹配 + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( + None, + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", + json_schema_extra={ + "examples": [ + { + "categoryName_keyword": ["玩具", "益智玩具"], + "brandName_keyword": "乐高", + "in_stock": True + } + ] + } + ) + + # 范围过滤器 - 数值范围 + range_filters: Optional[Dict[str, RangeFilter]] = Field( + None, + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", + json_schema_extra={ + "examples": [ + { + "price": {"gte": 50, "lte": 200}, + "days_since_last_update": {"lte": 30} + } + ] + } + ) + + # 排序 + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") + + # 分面搜索 - 简化接口 + facets: Optional[List[Union[str, FacetConfig]]] = Field( + None, + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", + json_schema_extra={ + "examples": [ + # 简单模式:只指定字段名,使用默认配置 + ["categoryName_keyword", "brandName_keyword"], + # 高级模式:详细配置 + [ + {"field": "categoryName_keyword", "size": 15}, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + ] + } + ) + + # 高级选项 + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") + debug: bool = Field(False, description="是否返回调试信息") + + # 个性化参数(预留) + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") + + +class ImageSearchRequest(BaseModel): + """图片搜索请求模型""" + image_url: str = Field(..., description="查询图片的 URL") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None + range_filters: Optional[Dict[str, RangeFilter]] = None + + +class SearchSuggestRequest(BaseModel): + """搜索建议请求模型(框架,暂不实现)""" + query: str = Field(..., min_length=1, description="搜索查询字符串") + size: int = Field(5, ge=1, le=20, description="返回建议数量") + types: List[Literal["query", "product", "category", "brand"]] = Field( + ["query"], + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" + ) +``` + +#### 1.2 响应模型定义 + +```python +class FacetValue(BaseModel): + """分面值""" + value: Union[str, int, float] = Field(..., description="分面值") + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") + count: int = Field(..., description="匹配的文档数量") + selected: bool = Field(False, description="是否已选中(当前过滤器中)") + + +class FacetResult(BaseModel): + """分面结果(标准化格式)""" + field: str = Field(..., description="字段名") + label: str = Field(..., description="分面显示名称") + type: Literal["terms", "range"] = Field(..., description="分面类型") + values: List[FacetValue] = Field(..., description="分面值列表") + total_count: Optional[int] = Field(None, description="该字段的总文档数") + + +class SearchResponse(BaseModel): + """搜索响应模型(重构版)""" + + # 核心结果 + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") + total: int = Field(..., description="匹配的总文档数") + max_score: float = Field(..., description="最高相关性分数") + + # 分面搜索结果(标准化格式) + facets: Optional[List[FacetResult]] = Field( + None, + description="分面统计结果(标准化格式)" + ) + + # 查询信息 + query_info: Dict[str, Any] = Field( + default_factory=dict, + description="查询处理信息(原始查询、改写、语言检测、翻译等)" + ) + + # 推荐与建议(预留) + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") + + # 性能指标 + took_ms: int = Field(..., description="搜索总耗时(毫秒)") + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") + + # 调试信息 + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") + + +class SearchSuggestResponse(BaseModel): + """搜索建议响应模型(框架,暂不实现)""" + query: str = Field(..., description="原始查询") + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") + took_ms: int = Field(..., description="耗时(毫秒)") +``` + +### 2. 查询构建器重构 + +#### 2.1 移除硬编码的 price_ranges 逻辑 + +**文件**:`search/es_query_builder.py` + +**需要修改的方法**:`_build_filters(self, filters, range_filters)` + +**改进点**: + +1. 移除 `if field == 'price_ranges'` 的特殊处理 +2. 分离 filters 和 range_filters 的处理逻辑 +3. 添加字段类型验证(利用配置系统) + +**新的实现逻辑**: + +```python +def _build_filters( + self, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None +) -> List[Dict[str, Any]]: + """ + 构建过滤子句(重构版)。 + + Args: + filters: 精确匹配过滤器字典 + range_filters: 范围过滤器字典 + + Returns: + ES filter子句列表 + """ + filter_clauses = [] + + # 1. 处理精确匹配过滤 + if filters: + for field, value in filters.items(): + if isinstance(value, list): + # 多值匹配(OR) + filter_clauses.append({ + "terms": {field: value} + }) + else: + # 单值精确匹配 + filter_clauses.append({ + "term": {field: value} + }) + + # 2. 处理范围过滤 + if range_filters: + for field, range_spec in range_filters.items(): + # 验证字段是否为数值类型(可选,基于配置) + # TODO: 添加字段类型验证 + + # 构建范围查询 + range_conditions = {} + if isinstance(range_spec, dict): + for op in ['gte', 'gt', 'lte', 'lt']: + if op in range_spec and range_spec[op] is not None: + range_conditions[op] = range_spec[op] + + if range_conditions: + filter_clauses.append({ + "range": {field: range_conditions} + }) + + return filter_clauses +``` + +#### 2.2 优化聚合参数接口 + +**新增方法**:`build_facets(self, facet_configs)` + +**改进点**: + +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) +2. 重构 `add_aggregations` 为更通用的 `build_facets` +3. 支持简化配置和高级配置两种模式 + +**新的实现逻辑**: + +```python +def build_facets( + self, + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None +) -> Dict[str, Any]: + """ + 构建分面聚合(重构版)。 + + Args: + facet_configs: 分面配置列表。可以是: + - 字符串列表:字段名,使用默认配置 + - 配置对象列表:详细的分面配置 + + Returns: + ES aggregations字典 + """ + if not facet_configs: + return {} + + aggs = {} + + for config in facet_configs: + # 1. 简单模式:只有字段名 + if isinstance(config, str): + field = config + agg_name = f"{field}_facet" + aggs[agg_name] = { + "terms": { + "field": field, + "size": 10, # 默认大小 + "order": {"_count": "desc"} + } + } + + # 2. 高级模式:详细配置对象 + elif isinstance(config, dict): + field = config['field'] + facet_type = config.get('type', 'terms') + size = config.get('size', 10) + agg_name = f"{field}_facet" + + if facet_type == 'terms': + # Terms 聚合(分组统计) + aggs[agg_name] = { + "terms": { + "field": field, + "size": size, + "order": {"_count": "desc"} + } + } + + elif facet_type == 'range': + # Range 聚合(范围统计) + ranges = config.get('ranges', []) + if ranges: + aggs[agg_name] = { + "range": { + "field": field, + "ranges": ranges + } + } + + return aggs +``` + +#### 2.3 更新主查询构建方法 + +**修改方法签名**:`build_query()` + +```python +def build_query( + self, + query_text: str, + query_vector: Optional[np.ndarray] = None, + query_node: Optional[QueryNode] = None, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + size: int = 10, + from_: int = 0, + enable_knn: bool = True, + knn_k: int = 50, + knn_num_candidates: int = 200, + min_score: Optional[float] = None +) -> Dict[str, Any]: + """构建完整的 ES 查询(重构版)""" + # ... 实现 + + # 添加过滤器 + if filters or range_filters: + filter_clauses = self._build_filters(filters, range_filters) + if filter_clauses: + es_query["query"] = { + "bool": { + "must": [query_clause], + "filter": filter_clauses + } + } +``` + +### 3. 搜索执行层重构 + +**文件**:`search/searcher.py` + +**需要修改的方法**:`search()` + +**改进点**: + +1. 更新方法签名,接受 `range_filters` 参数 +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 +3. 标准化分面搜索结果 + +**关键代码片段**: + +```python +def search( + self, + query: str, + size: int = 10, + from_: int = 0, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations + min_score: Optional[float] = None, + sort_by: Optional[str] = None, + sort_order: Optional[str] = "desc", + debug: bool = False, + context: Optional[RequestContext] = None +) -> SearchResult: + """执行搜索(重构版)""" + + # ... 查询解析 ... + + # 构建 ES 查询 + es_query = self.query_builder.build_multilang_query( + parsed_query=parsed_query, + query_vector=parsed_query.query_vector, + query_node=query_node, + filters=filters, + range_filters=range_filters, # 新增 + size=size, + from_=from_, + enable_knn=enable_embedding, + min_score=min_score + ) + + # 添加分面聚合 + if facets: + facet_aggs = self.query_builder.build_facets(facets) + if facet_aggs: + if "aggs" not in es_query: + es_query["aggs"] = {} + es_query["aggs"].update(facet_aggs) + + # ... 执行搜索 ... + + # 标准化分面结果 + standardized_facets = self._standardize_facets( + es_response.get('aggregations', {}), + facets, + filters + ) + + return SearchResult( + hits=hits, + total=total_value, + max_score=max_score, + took_ms=int(total_duration), + facets=standardized_facets, # 标准化格式 + query_info=parsed_query.to_dict(), + debug_info=debug_info + ) +``` + +**新增辅助方法**: + +```python +def _standardize_facets( + self, + es_aggregations: Dict[str, Any], + facet_configs: Optional[List[Union[str, Dict]]], + current_filters: Optional[Dict[str, Any]] +) -> Optional[List[Dict[str, Any]]]: + """ + 将 ES 聚合结果转换为标准化的分面格式。 + + Args: + es_aggregations: ES 原始聚合结果 + facet_configs: 分面配置列表 + current_filters: 当前应用的过滤器 + + Returns: + 标准化的分面结果列表 + """ + if not es_aggregations or not facet_configs: + return None + + standardized_facets = [] + + for config in facet_configs: + # 解析配置 + if isinstance(config, str): + field = config + facet_type = "terms" + else: + field = config['field'] + facet_type = config.get('type', 'terms') + + agg_name = f"{field}_facet" + + if agg_name not in es_aggregations: + continue + + agg_result = es_aggregations[agg_name] + + # 构建标准化分面结果 + facet = { + "field": field, + "label": self._get_field_label(field), # 从配置获取 + "type": facet_type, + "values": [] + } + + # 获取当前字段的选中值 + selected_values = set() + if current_filters and field in current_filters: + filter_value = current_filters[field] + if isinstance(filter_value, list): + selected_values = set(filter_value) + else: + selected_values = {filter_value} + + # 转换 buckets + if 'buckets' in agg_result: + for bucket in agg_result['buckets']: + value = bucket.get('key') + count = bucket.get('doc_count', 0) + + facet['values'].append({ + "value": value, + "label": str(value), # 可以从配置映射 + "count": count, + "selected": value in selected_values + }) + + standardized_facets.append(facet) + + return standardized_facets + + +def _get_field_label(self, field: str) -> str: + """获取字段的显示标签""" + # 从配置中获取字段标签 + for field_config in self.config.fields: + if field_config.name == field: + # 假设配置中有 label 字段 + return getattr(field_config, 'label', field) + return field +``` + +### 4. API 路由层更新 + +**文件**:`api/routes/search.py` + +**改进点**: + +1. 接受新的请求模型参数 +2. 添加搜索建议端点(框架) + +**新增端点**: + +```python +@router.get("/suggestions", response_model=SearchSuggestResponse) +async def search_suggestions( + q: str = Query(..., min_length=1, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="建议数量"), + types: str = Query("query", description="建议类型(逗号分隔)") +): + """ + 获取搜索建议(自动补全)。 + + 功能说明: + - 查询建议(query):基于历史搜索和热门搜索 + - 商品建议(product):匹配的商品 + - 类目建议(category):匹配的类目 + - 品牌建议(brand):匹配的品牌 + + 注意:此功能暂未实现,仅返回框架响应。 + """ + import time + start_time = time.time() + + # TODO: 实现搜索建议逻辑 + # 1. 从搜索历史中获取建议 + # 2. 从商品标题中匹配前缀 + # 3. 从类目、品牌中匹配 + + # 临时返回空结果 + suggestions = [] + + # 示例结构(暂不实现) + # suggestions = [ + # { + # "text": "芭比娃娃", + # "type": "query", + # "highlight": "比娃娃", + # "popularity": 850 + # } + # ] + + took_ms = int((time.time() - start_time) * 1000) + + return SearchSuggestResponse( + query=q, + suggestions=suggestions, + took_ms=took_ms + ) + + +@router.get("/instant", response_model=SearchResponse) +async def instant_search( + q: str = Query(..., min_length=2, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="结果数量") +): + """ + 即时搜索(Instant Search)。 + + 功能说明: + - 边输入边搜索,无需点击搜索按钮 + - 返回简化的搜索结果 + - 性能优化:缓存、限流 + + 注意:此功能暂未实现,调用标准搜索接口。 + """ + # TODO: 优化即时搜索性能 + # 1. 添加防抖/节流 + # 2. 实现结果缓存 + # 3. 简化返回字段 + + # 临时使用标准搜索接口 + from api.app import get_searcher + searcher = get_searcher() + + result = searcher.search( + query=q, + size=size, + from_=0 + ) + + return SearchResponse( + hits=result.hits, + total=result.total, + max_score=result.max_score, + took_ms=result.took_ms, + query_info=result.query_info + ) +``` + +### 5. 前端适配 + +**文件**:`frontend/static/js/app.js` + +**需要修改的地方**: + +1. **聚合参数改用简化配置**(第 57-87 行): +```javascript +// 旧的方式(直接 ES DSL) +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + } +}; + +// 新的方式(简化配置) +const facets = [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "brandName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } +]; +``` + +2. **过滤器使用新格式**(第 103 行): +```javascript +// 旧的方式 +filters: { + "price_ranges": ["0-50", "50-100"] // 硬编码 +} + +// 新的方式 +filters: { + "categoryName_keyword": ["玩具"], + "in_stock": true +}, +range_filters: { + "price": {"gte": 50, "lte": 100} +} +``` + +3. **解析标准化的分面结果**(第 208-258 行): +```javascript +// 旧的方式(直接访问 ES 结构) +if (aggregations.category_stats && aggregations.category_stats.buckets) { + aggregations.category_stats.buckets.forEach(bucket => { + // ... + }); +} + +// 新的方式(标准化格式) +if (data.facets) { + data.facets.forEach(facet => { + if (facet.field === 'categoryName_keyword') { + facet.values.forEach(facetValue => { + const value = facetValue.value; + const count = facetValue.count; + const selected = facetValue.selected; + // ... + }); + } + }); +} +``` + + +### 6. 测试代码更新 + +**文件**:`test_aggregation_api.py` + +**需要修改的地方**: + +1. 移除 `price_ranges` 硬编码测试(第 93 行) +2. 使用新的 `range_filters` 格式 +3. 使用新的 `facets` 配置 + +**新的测试代码**: + +```python +def test_search_with_filters(): + """测试新的过滤器格式""" + test_request = { + "query": "玩具", + "size": 5, + "filters": { + "categoryName_keyword": ["玩具"] + }, + "range_filters": { + "price": {"gte": 50, "lte": 100} + } + } + # ... + +def test_search_with_facets(): + """测试新的分面配置""" + test_request = { + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + } + # ... +``` + +## 第三部分:实施步骤 + +### 阶段 1:后端模型层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `api/models.py` + - [ ] 定义 `RangeFilter` 模型 + - [ ] 定义 `FacetConfig` 模型 + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` + - [ ] 移除 `aggregations` 参数 + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 + - [ ] 更新 `SearchResponse`,使用标准化分面格式 + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) + +**验证方式**: + +- 运行 Pydantic 模型验证 +- 检查 API 文档(`/docs`)是否正确生成 + +### 阶段 2:查询构建器重构(高优先级) + +**任务清单**: + +- [ ] 重构 `search/es_query_builder.py` + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` + - [ ] 移除 `add_dynamic_aggregations` 方法 + - [ ] 重构 `add_aggregations` 为 `build_facets` + - [ ] 更新 `build_query` 方法签名 +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) + +**验证方式**: + +- 编写单元测试验证过滤器构建逻辑 +- 打印生成的 ES DSL,检查正确性 + +### 阶段 3:搜索执行层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `search/searcher.py` + - [ ] 更新 `search()` 方法签名 + - [ ] 使用新的 `build_facets` 方法 + - [ ] 实现 `_standardize_facets()` 辅助方法 + - [ ] 实现 `_get_field_label()` 辅助方法 + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 + +**验证方式**: + +- 编写集成测试 +- 手动测试搜索功能 + +### 阶段 4:API 路由层更新(中优先级) + +**任务清单**: + +- [ ] 更新 `api/routes/search.py` + - [ ] 更新 `/search/` 端点,接受新的请求参数 + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) + - [ ] 添加端点文档和示例 + +**验证方式**: + +- 使用 Swagger UI 测试端点 +- 检查 API 文档完整性 + +### 阶段 5:前端适配(中优先级) + +**任务清单**: + +- [ ] 更新 `frontend/static/js/app.js` + - [ ] 修改聚合参数为 `facets` 简化配置 + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 + - [ ] 添加范围过滤器 UI(如价格滑块) + - [ ] 移除硬编码的 `price_ranges` + +**验证方式**: + +- 浏览器测试前端功能 +- 检查网络请求和响应格式 + +### 阶段 6:测试代码更新(低优先级) + +**任务清单**: + +- [ ] 更新 `test_aggregation_api.py` + - [ ] 移除 `price_ranges` 测试 + - [ ] 添加 `range_filters` 测试 + - [ ] 添加新的 `facets` 测试 +- [ ] 更新 `test_complete_search.py` +- [ ] 更新 `tests/integration/test_aggregation_api.py` +- [ ] 更新 `tests/unit/test_searcher.py` + +**验证方式**: + +- 运行所有测试,确保通过 +- 检查测试覆盖率 + +### 阶段 7:文档更新(低优先级) + +**任务清单**: + +- [ ] 撰写完整的 API 接口文档 +- [ ] 更新 `README.md` +- [ ] 更新 `USER_GUIDE.md` +- [ ] 添加接口使用示例 +- [ ] 添加迁移指南(旧接口 → 新接口) + +## 第四部分:API 使用示例 + +### 示例 1:简单搜索 + +```bash +POST /search/ +{ + "query": "芭比娃娃", + "size": 20 +} +``` + +### 示例 2:带过滤器的搜索 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "filters": { + "categoryName_keyword": ["玩具", "益智玩具"], + "in_stock": true + }, + "range_filters": { + "price": {"gte": 50, "lte": 200} + } +} +``` + +### 示例 3:带分面搜索的请求 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "brandName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] +} +``` + +**响应示例**(标准化分面格式): + +```json +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "facets": [ + { + "field": "categoryName_keyword", + "label": "商品类目", + "type": "terms", + "values": [ + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} + ] + }, + { + "field": "price", + "label": "价格区间", + "type": "range", + "values": [ + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} + ] + } + ] +} +``` + +### 示例 4:搜索建议(框架) + +```bash +GET /search/suggestions?q=芭&size=5 + +{ + "query": "芭", + "suggestions": [ + { + "text": "芭比娃娃", + "type": "query", + "highlight": "比娃娃", + "popularity": 850 + }, + { + "text": "芭比娃娃屋", + "type": "query", + "highlight": "比娃娃屋", + "popularity": 320 + } + ], + "took_ms": 5 +} +``` + +## 第五部分:向后兼容性 + +### 兼容策略 + +为保持向后兼容,在过渡期(1-2 个版本)内: + +1. **同时支持旧参数和新参数**: +```python +class SearchRequest(BaseModel): + # 新参数 + range_filters: Optional[Dict[str, RangeFilter]] = None + facets: Optional[List[Union[str, FacetConfig]]] = None + + # 旧参数(标记为废弃) + aggregations: Optional[Dict[str, Any]] = Field( + None, + deprecated=True, + description="已废弃。请使用 'facets' 参数" + ) +``` + +2. **在后端自动转换旧格式**: +```python +# 在 searcher.py 中 +if request.aggregations and not request.facets: + # 将旧的 aggregations 转换为新的 facets + request.facets = self._convert_legacy_aggregations(request.aggregations) +``` + +3. **在响应中提供迁移提示**: +```python +if request.aggregations: + warnings.append({ + "type": "deprecation", + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", + "migration_guide": "https://docs.example.com/migration" + }) +``` + + +### 迁移时间线 + +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 +- **v3.1**(1 个月后):移除旧接口的自动转换 +- **v4.0**(3 个月后):完全移除旧接口 + +## 第六部分:风险评估与缓解 + +### 风险点 + +1. **破坏性变更风险**: + + - 风险:现有客户代码可能依赖旧接口 + - 缓解:提供向后兼容层,发布详细迁移指南 + +2. **性能影响风险**: + + - 风险:新的标准化处理可能增加延迟 + - 缓解:添加性能测试,优化关键路径 + +3. **测试覆盖不足风险**: + + - 风险:重构可能引入新 bug + - 缓解:全面的单元测试和集成测试 + +### 验收标准 + +- [ ] 所有单元测试通过 +- [ ] 所有集成测试通过 +- [ ] API 文档完整且准确 +- [ ] 性能无明显下降(< 10% 延迟增加) +- [ ] 前端功能正常工作 +- [ ] 提供完整的迁移指南 + +## 总结 + +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: + +1. ✅ 移除硬编码的 price_ranges 逻辑 +2. ✅ 实现结构化的过滤参数(filters + range_filters) +3. ✅ 简化聚合参数接口,不暴露 ES DSL +4. ✅ 标准化分面搜索响应格式 +5. ✅ 添加搜索建议功能框架(暂不实现) + +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 \ No newline at end of file diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.1.md b/.cursor/plans/api-interface-analysis-42918612.plan.1.md new file mode 100644 index 0000000..88164b3 --- /dev/null +++ b/.cursor/plans/api-interface-analysis-42918612.plan.1.md @@ -0,0 +1,1222 @@ + +# 搜索引擎 API 接口重构实施计划 + +## 第一部分:现状分析 + +### 1. 当前实现存在的问题 + +#### 问题 1:硬编码的价格范围过滤 + +**位置**:`search/es_query_builder.py` 第 205-233 行 + +**问题描述**: + +```python +if field == 'price_ranges': + # 硬编码特定字符串值 + if price_range == '0-50': + price_ranges.append({"lt": 50}) + elif price_range == '50-100': + price_ranges.append({"gte": 50, "lt": 100}) + # ... +``` + +**影响**: + +- 只支持 `price` 字段,无法扩展到其他数值字段 +- 范围值硬编码,无法根据业务需求调整 +- 不符合 SaaS 系统的通用性要求 + +#### 问题 2:聚合参数直接暴露 ES DSL + +**位置**: + +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL + +**问题描述**: + +前端需要了解 Elasticsearch 的聚合语法: + +```javascript +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + }, + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + {"key": "0-50", "to": 50}, + // ... + ] + } + } +}; +``` + +**影响**: + +- 前端需要了解 ES 语法,增加集成难度 +- 不符合 SaaS 产品易用性原则 +- 难以进行参数验证和文档生成 + +#### 问题 3:分面搜索结果格式不统一 + +**位置**:`frontend/static/js/app.js` 第 208-258 行 + +**问题描述**: + +- 直接返回 ES 原始格式(`buckets` 结构) +- 前端需要知道不同聚合类型的响应结构 +- 没有统一的分面结果模型 + +**影响**: + +- 前端解析逻辑复杂 +- 不同类型的聚合处理方式不一致 +- 难以扩展新的聚合类型 + +#### 问题 4:缺少搜索建议功能 + +**当前状态**:完全没有实现 + +**需求**: + +- 自动补全(Autocomplete) +- 搜索建议(Suggestions) +- 搜索即时反馈(Instant Search) + +### 2. 依赖关系分析 + +**影响范围**: + +1. **后端模型层**:`api/models.py` +2. **查询构建层**:`search/es_query_builder.py` +3. **搜索执行层**:`search/searcher.py` +4. **API 路由层**:`api/routes/search.py` +5. **前端代码**:`frontend/static/js/app.js` +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` + +## 第二部分:优化方案设计 + +### 方案概述 + +采用**结构化过滤参数方案(方案 A 的简化版)**: + +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) +- **不支持单字段多个不连续范围**,简化设计 +- 标准化聚合参数,使用简化的接口 +- 统一分面搜索响应格式 + +### 1. 新的请求模型设计 + +#### 1.1 核心模型定义 + +**文件**:`api/models.py` + +```python +from pydantic import BaseModel, Field, field_validator +from typing import List, Dict, Any, Optional, Union, Literal + + +class RangeFilter(BaseModel): + """数值范围过滤器""" + gte: Optional[float] = Field(None, description="大于等于 (>=)") + gt: Optional[float] = Field(None, description="大于 (>)") + lte: Optional[float] = Field(None, description="小于等于 (<=)") + lt: Optional[float] = Field(None, description="小于 (<)") + + @field_validator('*') + def check_at_least_one(cls, v, info): + """确保至少指定一个边界""" + values = info.data + if not any([values.get('gte'), values.get('gt'), + values.get('lte'), values.get('lt')]): + raise ValueError('至少需要指定一个范围边界') + return v + + class Config: + json_schema_extra = { + "examples": [ + {"gte": 50, "lte": 200}, + {"gt": 100}, + {"lt": 50} + ] + } + + +class FacetConfig(BaseModel): + """分面配置(简化版)""" + field: str = Field(..., description="分面字段名") + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") + type: Literal["terms", "range"] = Field("terms", description="分面类型") + ranges: Optional[List[Dict[str, Any]]] = Field( + None, + description="范围分面的范围定义(仅当 type='range' 时需要)" + ) + + class Config: + json_schema_extra = { + "examples": [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "size": 4, + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] + } + + +class SearchRequest(BaseModel): + """搜索请求模型(重构版)""" + + # 基础搜索参数 + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") + + # 过滤器 - 精确匹配和多值匹配 + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( + None, + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", + json_schema_extra={ + "examples": [ + { + "categoryName_keyword": ["玩具", "益智玩具"], + "brandName_keyword": "乐高", + "in_stock": True + } + ] + } + ) + + # 范围过滤器 - 数值范围 + range_filters: Optional[Dict[str, RangeFilter]] = Field( + None, + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", + json_schema_extra={ + "examples": [ + { + "price": {"gte": 50, "lte": 200}, + "days_since_last_update": {"lte": 30} + } + ] + } + ) + + # 排序 + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") + + # 分面搜索 - 简化接口 + facets: Optional[List[Union[str, FacetConfig]]] = Field( + None, + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", + json_schema_extra={ + "examples": [ + # 简单模式:只指定字段名,使用默认配置 + ["categoryName_keyword", "brandName_keyword"], + # 高级模式:详细配置 + [ + {"field": "categoryName_keyword", "size": 15}, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + ] + } + ) + + # 高级选项 + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") + debug: bool = Field(False, description="是否返回调试信息") + + # 个性化参数(预留) + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") + + +class ImageSearchRequest(BaseModel): + """图片搜索请求模型""" + image_url: str = Field(..., description="查询图片的 URL") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None + range_filters: Optional[Dict[str, RangeFilter]] = None + + +class SearchSuggestRequest(BaseModel): + """搜索建议请求模型(框架,暂不实现)""" + query: str = Field(..., min_length=1, description="搜索查询字符串") + size: int = Field(5, ge=1, le=20, description="返回建议数量") + types: List[Literal["query", "product", "category", "brand"]] = Field( + ["query"], + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" + ) +``` + +#### 1.2 响应模型定义 + +```python +class FacetValue(BaseModel): + """分面值""" + value: Union[str, int, float] = Field(..., description="分面值") + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") + count: int = Field(..., description="匹配的文档数量") + selected: bool = Field(False, description="是否已选中(当前过滤器中)") + + +class FacetResult(BaseModel): + """分面结果(标准化格式)""" + field: str = Field(..., description="字段名") + label: str = Field(..., description="分面显示名称") + type: Literal["terms", "range"] = Field(..., description="分面类型") + values: List[FacetValue] = Field(..., description="分面值列表") + total_count: Optional[int] = Field(None, description="该字段的总文档数") + + +class SearchResponse(BaseModel): + """搜索响应模型(重构版)""" + + # 核心结果 + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") + total: int = Field(..., description="匹配的总文档数") + max_score: float = Field(..., description="最高相关性分数") + + # 分面搜索结果(标准化格式) + facets: Optional[List[FacetResult]] = Field( + None, + description="分面统计结果(标准化格式)" + ) + + # 查询信息 + query_info: Dict[str, Any] = Field( + default_factory=dict, + description="查询处理信息(原始查询、改写、语言检测、翻译等)" + ) + + # 推荐与建议(预留) + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") + + # 性能指标 + took_ms: int = Field(..., description="搜索总耗时(毫秒)") + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") + + # 调试信息 + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") + + +class SearchSuggestResponse(BaseModel): + """搜索建议响应模型(框架,暂不实现)""" + query: str = Field(..., description="原始查询") + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") + took_ms: int = Field(..., description="耗时(毫秒)") +``` + +### 2. 查询构建器重构 + +#### 2.1 移除硬编码的 price_ranges 逻辑 + +**文件**:`search/es_query_builder.py` + +**需要修改的方法**:`_build_filters(self, filters, range_filters)` + +**改进点**: + +1. 移除 `if field == 'price_ranges'` 的特殊处理 +2. 分离 filters 和 range_filters 的处理逻辑 +3. 添加字段类型验证(利用配置系统) + +**新的实现逻辑**: + +```python +def _build_filters( + self, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None +) -> List[Dict[str, Any]]: + """ + 构建过滤子句(重构版)。 + + Args: + filters: 精确匹配过滤器字典 + range_filters: 范围过滤器字典 + + Returns: + ES filter子句列表 + """ + filter_clauses = [] + + # 1. 处理精确匹配过滤 + if filters: + for field, value in filters.items(): + if isinstance(value, list): + # 多值匹配(OR) + filter_clauses.append({ + "terms": {field: value} + }) + else: + # 单值精确匹配 + filter_clauses.append({ + "term": {field: value} + }) + + # 2. 处理范围过滤 + if range_filters: + for field, range_spec in range_filters.items(): + # 验证字段是否为数值类型(可选,基于配置) + # TODO: 添加字段类型验证 + + # 构建范围查询 + range_conditions = {} + if isinstance(range_spec, dict): + for op in ['gte', 'gt', 'lte', 'lt']: + if op in range_spec and range_spec[op] is not None: + range_conditions[op] = range_spec[op] + + if range_conditions: + filter_clauses.append({ + "range": {field: range_conditions} + }) + + return filter_clauses +``` + +#### 2.2 优化聚合参数接口 + +**新增方法**:`build_facets(self, facet_configs)` + +**改进点**: + +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) +2. 重构 `add_aggregations` 为更通用的 `build_facets` +3. 支持简化配置和高级配置两种模式 + +**新的实现逻辑**: + +```python +def build_facets( + self, + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None +) -> Dict[str, Any]: + """ + 构建分面聚合(重构版)。 + + Args: + facet_configs: 分面配置列表。可以是: + - 字符串列表:字段名,使用默认配置 + - 配置对象列表:详细的分面配置 + + Returns: + ES aggregations字典 + """ + if not facet_configs: + return {} + + aggs = {} + + for config in facet_configs: + # 1. 简单模式:只有字段名 + if isinstance(config, str): + field = config + agg_name = f"{field}_facet" + aggs[agg_name] = { + "terms": { + "field": field, + "size": 10, # 默认大小 + "order": {"_count": "desc"} + } + } + + # 2. 高级模式:详细配置对象 + elif isinstance(config, dict): + field = config['field'] + facet_type = config.get('type', 'terms') + size = config.get('size', 10) + agg_name = f"{field}_facet" + + if facet_type == 'terms': + # Terms 聚合(分组统计) + aggs[agg_name] = { + "terms": { + "field": field, + "size": size, + "order": {"_count": "desc"} + } + } + + elif facet_type == 'range': + # Range 聚合(范围统计) + ranges = config.get('ranges', []) + if ranges: + aggs[agg_name] = { + "range": { + "field": field, + "ranges": ranges + } + } + + return aggs +``` + +#### 2.3 更新主查询构建方法 + +**修改方法签名**:`build_query()` + +```python +def build_query( + self, + query_text: str, + query_vector: Optional[np.ndarray] = None, + query_node: Optional[QueryNode] = None, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + size: int = 10, + from_: int = 0, + enable_knn: bool = True, + knn_k: int = 50, + knn_num_candidates: int = 200, + min_score: Optional[float] = None +) -> Dict[str, Any]: + """构建完整的 ES 查询(重构版)""" + # ... 实现 + + # 添加过滤器 + if filters or range_filters: + filter_clauses = self._build_filters(filters, range_filters) + if filter_clauses: + es_query["query"] = { + "bool": { + "must": [query_clause], + "filter": filter_clauses + } + } +``` + +### 3. 搜索执行层重构 + +**文件**:`search/searcher.py` + +**需要修改的方法**:`search()` + +**改进点**: + +1. 更新方法签名,接受 `range_filters` 参数 +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 +3. 标准化分面搜索结果 + +**关键代码片段**: + +```python +def search( + self, + query: str, + size: int = 10, + from_: int = 0, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations + min_score: Optional[float] = None, + sort_by: Optional[str] = None, + sort_order: Optional[str] = "desc", + debug: bool = False, + context: Optional[RequestContext] = None +) -> SearchResult: + """执行搜索(重构版)""" + + # ... 查询解析 ... + + # 构建 ES 查询 + es_query = self.query_builder.build_multilang_query( + parsed_query=parsed_query, + query_vector=parsed_query.query_vector, + query_node=query_node, + filters=filters, + range_filters=range_filters, # 新增 + size=size, + from_=from_, + enable_knn=enable_embedding, + min_score=min_score + ) + + # 添加分面聚合 + if facets: + facet_aggs = self.query_builder.build_facets(facets) + if facet_aggs: + if "aggs" not in es_query: + es_query["aggs"] = {} + es_query["aggs"].update(facet_aggs) + + # ... 执行搜索 ... + + # 标准化分面结果 + standardized_facets = self._standardize_facets( + es_response.get('aggregations', {}), + facets, + filters + ) + + return SearchResult( + hits=hits, + total=total_value, + max_score=max_score, + took_ms=int(total_duration), + facets=standardized_facets, # 标准化格式 + query_info=parsed_query.to_dict(), + debug_info=debug_info + ) +``` + +**新增辅助方法**: + +```python +def _standardize_facets( + self, + es_aggregations: Dict[str, Any], + facet_configs: Optional[List[Union[str, Dict]]], + current_filters: Optional[Dict[str, Any]] +) -> Optional[List[Dict[str, Any]]]: + """ + 将 ES 聚合结果转换为标准化的分面格式。 + + Args: + es_aggregations: ES 原始聚合结果 + facet_configs: 分面配置列表 + current_filters: 当前应用的过滤器 + + Returns: + 标准化的分面结果列表 + """ + if not es_aggregations or not facet_configs: + return None + + standardized_facets = [] + + for config in facet_configs: + # 解析配置 + if isinstance(config, str): + field = config + facet_type = "terms" + else: + field = config['field'] + facet_type = config.get('type', 'terms') + + agg_name = f"{field}_facet" + + if agg_name not in es_aggregations: + continue + + agg_result = es_aggregations[agg_name] + + # 构建标准化分面结果 + facet = { + "field": field, + "label": self._get_field_label(field), # 从配置获取 + "type": facet_type, + "values": [] + } + + # 获取当前字段的选中值 + selected_values = set() + if current_filters and field in current_filters: + filter_value = current_filters[field] + if isinstance(filter_value, list): + selected_values = set(filter_value) + else: + selected_values = {filter_value} + + # 转换 buckets + if 'buckets' in agg_result: + for bucket in agg_result['buckets']: + value = bucket.get('key') + count = bucket.get('doc_count', 0) + + facet['values'].append({ + "value": value, + "label": str(value), # 可以从配置映射 + "count": count, + "selected": value in selected_values + }) + + standardized_facets.append(facet) + + return standardized_facets + + +def _get_field_label(self, field: str) -> str: + """获取字段的显示标签""" + # 从配置中获取字段标签 + for field_config in self.config.fields: + if field_config.name == field: + # 假设配置中有 label 字段 + return getattr(field_config, 'label', field) + return field +``` + +### 4. API 路由层更新 + +**文件**:`api/routes/search.py` + +**改进点**: + +1. 接受新的请求模型参数 +2. 添加搜索建议端点(框架) + +**新增端点**: + +```python +@router.get("/suggestions", response_model=SearchSuggestResponse) +async def search_suggestions( + q: str = Query(..., min_length=1, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="建议数量"), + types: str = Query("query", description="建议类型(逗号分隔)") +): + """ + 获取搜索建议(自动补全)。 + + 功能说明: + - 查询建议(query):基于历史搜索和热门搜索 + - 商品建议(product):匹配的商品 + - 类目建议(category):匹配的类目 + - 品牌建议(brand):匹配的品牌 + + 注意:此功能暂未实现,仅返回框架响应。 + """ + import time + start_time = time.time() + + # TODO: 实现搜索建议逻辑 + # 1. 从搜索历史中获取建议 + # 2. 从商品标题中匹配前缀 + # 3. 从类目、品牌中匹配 + + # 临时返回空结果 + suggestions = [] + + # 示例结构(暂不实现) + # suggestions = [ + # { + # "text": "芭比娃娃", + # "type": "query", + # "highlight": "比娃娃", + # "popularity": 850 + # } + # ] + + took_ms = int((time.time() - start_time) * 1000) + + return SearchSuggestResponse( + query=q, + suggestions=suggestions, + took_ms=took_ms + ) + + +@router.get("/instant", response_model=SearchResponse) +async def instant_search( + q: str = Query(..., min_length=2, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="结果数量") +): + """ + 即时搜索(Instant Search)。 + + 功能说明: + - 边输入边搜索,无需点击搜索按钮 + - 返回简化的搜索结果 + - 性能优化:缓存、限流 + + 注意:此功能暂未实现,调用标准搜索接口。 + """ + # TODO: 优化即时搜索性能 + # 1. 添加防抖/节流 + # 2. 实现结果缓存 + # 3. 简化返回字段 + + # 临时使用标准搜索接口 + from api.app import get_searcher + searcher = get_searcher() + + result = searcher.search( + query=q, + size=size, + from_=0 + ) + + return SearchResponse( + hits=result.hits, + total=result.total, + max_score=result.max_score, + took_ms=result.took_ms, + query_info=result.query_info + ) +``` + +### 5. 前端适配 + +**文件**:`frontend/static/js/app.js` + +**需要修改的地方**: + +1. **聚合参数改用简化配置**(第 57-87 行): +```javascript +// 旧的方式(直接 ES DSL) +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + } +}; + +// 新的方式(简化配置) +const facets = [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "brandName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } +]; +``` + +2. **过滤器使用新格式**(第 103 行): +```javascript +// 旧的方式 +filters: { + "price_ranges": ["0-50", "50-100"] // 硬编码 +} + +// 新的方式 +filters: { + "categoryName_keyword": ["玩具"], + "in_stock": true +}, +range_filters: { + "price": {"gte": 50, "lte": 100} +} +``` + +3. **解析标准化的分面结果**(第 208-258 行): +```javascript +// 旧的方式(直接访问 ES 结构) +if (aggregations.category_stats && aggregations.category_stats.buckets) { + aggregations.category_stats.buckets.forEach(bucket => { + // ... + }); +} + +// 新的方式(标准化格式) +if (data.facets) { + data.facets.forEach(facet => { + if (facet.field === 'categoryName_keyword') { + facet.values.forEach(facetValue => { + const value = facetValue.value; + const count = facetValue.count; + const selected = facetValue.selected; + // ... + }); + } + }); +} +``` + + +### 6. 测试代码更新 + +**文件**:`test_aggregation_api.py` + +**需要修改的地方**: + +1. 移除 `price_ranges` 硬编码测试(第 93 行) +2. 使用新的 `range_filters` 格式 +3. 使用新的 `facets` 配置 + +**新的测试代码**: + +```python +def test_search_with_filters(): + """测试新的过滤器格式""" + test_request = { + "query": "玩具", + "size": 5, + "filters": { + "categoryName_keyword": ["玩具"] + }, + "range_filters": { + "price": {"gte": 50, "lte": 100} + } + } + # ... + +def test_search_with_facets(): + """测试新的分面配置""" + test_request = { + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + } + # ... +``` + +## 第三部分:实施步骤 + +### 阶段 1:后端模型层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `api/models.py` + - [ ] 定义 `RangeFilter` 模型 + - [ ] 定义 `FacetConfig` 模型 + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` + - [ ] 移除 `aggregations` 参数 + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 + - [ ] 更新 `SearchResponse`,使用标准化分面格式 + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) + +**验证方式**: + +- 运行 Pydantic 模型验证 +- 检查 API 文档(`/docs`)是否正确生成 + +### 阶段 2:查询构建器重构(高优先级) + +**任务清单**: + +- [ ] 重构 `search/es_query_builder.py` + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` + - [ ] 移除 `add_dynamic_aggregations` 方法 + - [ ] 重构 `add_aggregations` 为 `build_facets` + - [ ] 更新 `build_query` 方法签名 +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) + +**验证方式**: + +- 编写单元测试验证过滤器构建逻辑 +- 打印生成的 ES DSL,检查正确性 + +### 阶段 3:搜索执行层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `search/searcher.py` + - [ ] 更新 `search()` 方法签名 + - [ ] 使用新的 `build_facets` 方法 + - [ ] 实现 `_standardize_facets()` 辅助方法 + - [ ] 实现 `_get_field_label()` 辅助方法 + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 + +**验证方式**: + +- 编写集成测试 +- 手动测试搜索功能 + +### 阶段 4:API 路由层更新(中优先级) + +**任务清单**: + +- [ ] 更新 `api/routes/search.py` + - [ ] 更新 `/search/` 端点,接受新的请求参数 + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) + - [ ] 添加端点文档和示例 + +**验证方式**: + +- 使用 Swagger UI 测试端点 +- 检查 API 文档完整性 + +### 阶段 5:前端适配(中优先级) + +**任务清单**: + +- [ ] 更新 `frontend/static/js/app.js` + - [ ] 修改聚合参数为 `facets` 简化配置 + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 + - [ ] 添加范围过滤器 UI(如价格滑块) + - [ ] 移除硬编码的 `price_ranges` + +**验证方式**: + +- 浏览器测试前端功能 +- 检查网络请求和响应格式 + +### 阶段 6:测试代码更新(低优先级) + +**任务清单**: + +- [ ] 更新 `test_aggregation_api.py` + - [ ] 移除 `price_ranges` 测试 + - [ ] 添加 `range_filters` 测试 + - [ ] 添加新的 `facets` 测试 +- [ ] 更新 `test_complete_search.py` +- [ ] 更新 `tests/integration/test_aggregation_api.py` +- [ ] 更新 `tests/unit/test_searcher.py` + +**验证方式**: + +- 运行所有测试,确保通过 +- 检查测试覆盖率 + +### 阶段 7:文档更新(低优先级) + +**任务清单**: + +- [ ] 撰写完整的 API 接口文档 +- [ ] 更新 `README.md` +- [ ] 更新 `USER_GUIDE.md` +- [ ] 添加接口使用示例 +- [ ] 添加迁移指南(旧接口 → 新接口) + +## 第四部分:API 使用示例 + +### 示例 1:简单搜索 + +```bash +POST /search/ +{ + "query": "芭比娃娃", + "size": 20 +} +``` + +### 示例 2:带过滤器的搜索 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "filters": { + "categoryName_keyword": ["玩具", "益智玩具"], + "in_stock": true + }, + "range_filters": { + "price": {"gte": 50, "lte": 200} + } +} +``` + +### 示例 3:带分面搜索的请求 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "brandName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] +} +``` + +**响应示例**(标准化分面格式): + +```json +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "facets": [ + { + "field": "categoryName_keyword", + "label": "商品类目", + "type": "terms", + "values": [ + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} + ] + }, + { + "field": "price", + "label": "价格区间", + "type": "range", + "values": [ + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} + ] + } + ] +} +``` + +### 示例 4:搜索建议(框架) + +```bash +GET /search/suggestions?q=芭&size=5 + +{ + "query": "芭", + "suggestions": [ + { + "text": "芭比娃娃", + "type": "query", + "highlight": "比娃娃", + "popularity": 850 + }, + { + "text": "芭比娃娃屋", + "type": "query", + "highlight": "比娃娃屋", + "popularity": 320 + } + ], + "took_ms": 5 +} +``` + +## 第五部分:向后兼容性 + +### 兼容策略 + +为保持向后兼容,在过渡期(1-2 个版本)内: + +1. **同时支持旧参数和新参数**: +```python +class SearchRequest(BaseModel): + # 新参数 + range_filters: Optional[Dict[str, RangeFilter]] = None + facets: Optional[List[Union[str, FacetConfig]]] = None + + # 旧参数(标记为废弃) + aggregations: Optional[Dict[str, Any]] = Field( + None, + deprecated=True, + description="已废弃。请使用 'facets' 参数" + ) +``` + +2. **在后端自动转换旧格式**: +```python +# 在 searcher.py 中 +if request.aggregations and not request.facets: + # 将旧的 aggregations 转换为新的 facets + request.facets = self._convert_legacy_aggregations(request.aggregations) +``` + +3. **在响应中提供迁移提示**: +```python +if request.aggregations: + warnings.append({ + "type": "deprecation", + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", + "migration_guide": "https://docs.example.com/migration" + }) +``` + + +### 迁移时间线 + +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 +- **v3.1**(1 个月后):移除旧接口的自动转换 +- **v4.0**(3 个月后):完全移除旧接口 + +## 第六部分:风险评估与缓解 + +### 风险点 + +1. **破坏性变更风险**: + + - 风险:现有客户代码可能依赖旧接口 + - 缓解:提供向后兼容层,发布详细迁移指南 + +2. **性能影响风险**: + + - 风险:新的标准化处理可能增加延迟 + - 缓解:添加性能测试,优化关键路径 + +3. **测试覆盖不足风险**: + + - 风险:重构可能引入新 bug + - 缓解:全面的单元测试和集成测试 + +### 验收标准 + +- [ ] 所有单元测试通过 +- [ ] 所有集成测试通过 +- [ ] API 文档完整且准确 +- [ ] 性能无明显下降(< 10% 延迟增加) +- [ ] 前端功能正常工作 +- [ ] 提供完整的迁移指南 + +## 总结 + +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: + +1. ✅ 移除硬编码的 price_ranges 逻辑 +2. ✅ 实现结构化的过滤参数(filters + range_filters) +3. ✅ 简化聚合参数接口,不暴露 ES DSL +4. ✅ 标准化分面搜索响应格式 +5. ✅ 添加搜索建议功能框架(暂不实现) + +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 \ No newline at end of file diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.2.md b/.cursor/plans/api-interface-analysis-42918612.plan.2.md new file mode 100644 index 0000000..61ab5f5 --- /dev/null +++ b/.cursor/plans/api-interface-analysis-42918612.plan.2.md @@ -0,0 +1,1222 @@ + +# 搜索引擎 API 接口重构实施计划 + +## 第一部分:现状分析 + +### 1. 当前实现存在的问题 + +#### 问题 1:硬编码的价格范围过滤 + +**位置**:`search/es_query_builder.py` 第 205-233 行 + +**问题描述**: + +```python +if field == 'price_ranges': + # 硬编码特定字符串值 + if price_range == '0-50': + price_ranges.append({"lt": 50}) + elif price_range == '50-100': + price_ranges.append({"gte": 50, "lt": 100}) + # ... +``` + +**影响**: + +- 只支持 `price` 字段,无法扩展到其他数值字段 +- 范围值硬编码,无法根据业务需求调整 +- 不符合 SaaS 系统的通用性要求 + +#### 问题 2:聚合参数直接暴露 ES DSL + +**位置**: + +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL + +**问题描述**: + +前端需要了解 Elasticsearch 的聚合语法: + +```javascript +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + }, + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + {"key": "0-50", "to": 50}, + // ... + ] + } + } +}; +``` + +**影响**: + +- 前端需要了解 ES 语法,增加集成难度 +- 不符合 SaaS 产品易用性原则 +- 难以进行参数验证和文档生成 + +#### 问题 3:分面搜索结果格式不统一 + +**位置**:`frontend/static/js/app.js` 第 208-258 行 + +**问题描述**: + +- 直接返回 ES 原始格式(`buckets` 结构) +- 前端需要知道不同聚合类型的响应结构 +- 没有统一的分面结果模型 + +**影响**: + +- 前端解析逻辑复杂 +- 不同类型的聚合处理方式不一致 +- 难以扩展新的聚合类型 + +#### 问题 4:缺少搜索建议功能 + +**当前状态**:完全没有实现 + +**需求**: + +- 自动补全(Autocomplete) +- 搜索建议(Suggestions) +- 搜索即时反馈(Instant Search) + +### 2. 依赖关系分析 + +**影响范围**: + +1. **后端模型层**:`api/models.py` +2. **查询构建层**:`search/es_query_builder.py` +3. **搜索执行层**:`search/searcher.py` +4. **API 路由层**:`api/routes/search.py` +5. **前端代码**:`frontend/static/js/app.js` +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` + +## 第二部分:优化方案设计 + +### 方案概述 + +采用**结构化过滤参数方案(方案 A 的简化版)**: + +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) +- **不支持单字段多个不连续范围**,简化设计 +- 标准化聚合参数,使用简化的接口 +- 统一分面搜索响应格式 + +### 1. 新的请求模型设计 + +#### 1.1 核心模型定义 + +**文件**:`api/models.py` + +```python +from pydantic import BaseModel, Field, field_validator +from typing import List, Dict, Any, Optional, Union, Literal + + +class RangeFilter(BaseModel): + """数值范围过滤器""" + gte: Optional[float] = Field(None, description="大于等于 (>=)") + gt: Optional[float] = Field(None, description="大于 (>)") + lte: Optional[float] = Field(None, description="小于等于 (<=)") + lt: Optional[float] = Field(None, description="小于 (<)") + + @field_validator('*') + def check_at_least_one(cls, v, info): + """确保至少指定一个边界""" + values = info.data + if not any([values.get('gte'), values.get('gt'), + values.get('lte'), values.get('lt')]): + raise ValueError('至少需要指定一个范围边界') + return v + + class Config: + json_schema_extra = { + "examples": [ + {"gte": 50, "lte": 200}, + {"gt": 100}, + {"lt": 50} + ] + } + + +class FacetConfig(BaseModel): + """分面配置(简化版)""" + field: str = Field(..., description="分面字段名") + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") + type: Literal["terms", "range"] = Field("terms", description="分面类型") + ranges: Optional[List[Dict[str, Any]]] = Field( + None, + description="范围分面的范围定义(仅当 type='range' 时需要)" + ) + + class Config: + json_schema_extra = { + "examples": [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "size": 4, + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] + } + + +class SearchRequest(BaseModel): + """搜索请求模型(重构版)""" + + # 基础搜索参数 + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") + + # 过滤器 - 精确匹配和多值匹配 + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( + None, + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", + json_schema_extra={ + "examples": [ + { + "categoryName_keyword": ["玩具", "益智玩具"], + "brandName_keyword": "乐高", + "in_stock": True + } + ] + } + ) + + # 范围过滤器 - 数值范围 + range_filters: Optional[Dict[str, RangeFilter]] = Field( + None, + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", + json_schema_extra={ + "examples": [ + { + "price": {"gte": 50, "lte": 200}, + "days_since_last_update": {"lte": 30} + } + ] + } + ) + + # 排序 + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") + + # 分面搜索 - 简化接口 + facets: Optional[List[Union[str, FacetConfig]]] = Field( + None, + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", + json_schema_extra={ + "examples": [ + # 简单模式:只指定字段名,使用默认配置 + ["categoryName_keyword", "brandName_keyword"], + # 高级模式:详细配置 + [ + {"field": "categoryName_keyword", "size": 15}, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + ] + } + ) + + # 高级选项 + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") + debug: bool = Field(False, description="是否返回调试信息") + + # 个性化参数(预留) + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") + + +class ImageSearchRequest(BaseModel): + """图片搜索请求模型""" + image_url: str = Field(..., description="查询图片的 URL") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None + range_filters: Optional[Dict[str, RangeFilter]] = None + + +class SearchSuggestRequest(BaseModel): + """搜索建议请求模型(框架,暂不实现)""" + query: str = Field(..., min_length=1, description="搜索查询字符串") + size: int = Field(5, ge=1, le=20, description="返回建议数量") + types: List[Literal["query", "product", "category", "brand"]] = Field( + ["query"], + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" + ) +``` + +#### 1.2 响应模型定义 + +```python +class FacetValue(BaseModel): + """分面值""" + value: Union[str, int, float] = Field(..., description="分面值") + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") + count: int = Field(..., description="匹配的文档数量") + selected: bool = Field(False, description="是否已选中(当前过滤器中)") + + +class FacetResult(BaseModel): + """分面结果(标准化格式)""" + field: str = Field(..., description="字段名") + label: str = Field(..., description="分面显示名称") + type: Literal["terms", "range"] = Field(..., description="分面类型") + values: List[FacetValue] = Field(..., description="分面值列表") + total_count: Optional[int] = Field(None, description="该字段的总文档数") + + +class SearchResponse(BaseModel): + """搜索响应模型(重构版)""" + + # 核心结果 + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") + total: int = Field(..., description="匹配的总文档数") + max_score: float = Field(..., description="最高相关性分数") + + # 分面搜索结果(标准化格式) + facets: Optional[List[FacetResult]] = Field( + None, + description="分面统计结果(标准化格式)" + ) + + # 查询信息 + query_info: Dict[str, Any] = Field( + default_factory=dict, + description="查询处理信息(原始查询、改写、语言检测、翻译等)" + ) + + # 推荐与建议(预留) + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") + + # 性能指标 + took_ms: int = Field(..., description="搜索总耗时(毫秒)") + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") + + # 调试信息 + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") + + +class SearchSuggestResponse(BaseModel): + """搜索建议响应模型(框架,暂不实现)""" + query: str = Field(..., description="原始查询") + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") + took_ms: int = Field(..., description="耗时(毫秒)") +``` + +### 2. 查询构建器重构 + +#### 2.1 移除硬编码的 price_ranges 逻辑 + +**文件**:`search/es_query_builder.py` + +**需要修改的方法**:`_build_filters(self, filters, range_filters)` + +**改进点**: + +1. 移除 `if field == 'price_ranges'` 的特殊处理 +2. 分离 filters 和 range_filters 的处理逻辑 +3. 添加字段类型验证(利用配置系统) + +**新的实现逻辑**: + +```python +def _build_filters( + self, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None +) -> List[Dict[str, Any]]: + """ + 构建过滤子句(重构版)。 + + Args: + filters: 精确匹配过滤器字典 + range_filters: 范围过滤器字典 + + Returns: + ES filter子句列表 + """ + filter_clauses = [] + + # 1. 处理精确匹配过滤 + if filters: + for field, value in filters.items(): + if isinstance(value, list): + # 多值匹配(OR) + filter_clauses.append({ + "terms": {field: value} + }) + else: + # 单值精确匹配 + filter_clauses.append({ + "term": {field: value} + }) + + # 2. 处理范围过滤 + if range_filters: + for field, range_spec in range_filters.items(): + # 验证字段是否为数值类型(可选,基于配置) + # TODO: 添加字段类型验证 + + # 构建范围查询 + range_conditions = {} + if isinstance(range_spec, dict): + for op in ['gte', 'gt', 'lte', 'lt']: + if op in range_spec and range_spec[op] is not None: + range_conditions[op] = range_spec[op] + + if range_conditions: + filter_clauses.append({ + "range": {field: range_conditions} + }) + + return filter_clauses +``` + +#### 2.2 优化聚合参数接口 + +**新增方法**:`build_facets(self, facet_configs)` + +**改进点**: + +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) +2. 重构 `add_aggregations` 为更通用的 `build_facets` +3. 支持简化配置和高级配置两种模式 + +**新的实现逻辑**: + +```python +def build_facets( + self, + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None +) -> Dict[str, Any]: + """ + 构建分面聚合(重构版)。 + + Args: + facet_configs: 分面配置列表。可以是: + - 字符串列表:字段名,使用默认配置 + - 配置对象列表:详细的分面配置 + + Returns: + ES aggregations字典 + """ + if not facet_configs: + return {} + + aggs = {} + + for config in facet_configs: + # 1. 简单模式:只有字段名 + if isinstance(config, str): + field = config + agg_name = f"{field}_facet" + aggs[agg_name] = { + "terms": { + "field": field, + "size": 10, # 默认大小 + "order": {"_count": "desc"} + } + } + + # 2. 高级模式:详细配置对象 + elif isinstance(config, dict): + field = config['field'] + facet_type = config.get('type', 'terms') + size = config.get('size', 10) + agg_name = f"{field}_facet" + + if facet_type == 'terms': + # Terms 聚合(分组统计) + aggs[agg_name] = { + "terms": { + "field": field, + "size": size, + "order": {"_count": "desc"} + } + } + + elif facet_type == 'range': + # Range 聚合(范围统计) + ranges = config.get('ranges', []) + if ranges: + aggs[agg_name] = { + "range": { + "field": field, + "ranges": ranges + } + } + + return aggs +``` + +#### 2.3 更新主查询构建方法 + +**修改方法签名**:`build_query()` + +```python +def build_query( + self, + query_text: str, + query_vector: Optional[np.ndarray] = None, + query_node: Optional[QueryNode] = None, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + size: int = 10, + from_: int = 0, + enable_knn: bool = True, + knn_k: int = 50, + knn_num_candidates: int = 200, + min_score: Optional[float] = None +) -> Dict[str, Any]: + """构建完整的 ES 查询(重构版)""" + # ... 实现 + + # 添加过滤器 + if filters or range_filters: + filter_clauses = self._build_filters(filters, range_filters) + if filter_clauses: + es_query["query"] = { + "bool": { + "must": [query_clause], + "filter": filter_clauses + } + } +``` + +### 3. 搜索执行层重构 + +**文件**:`search/searcher.py` + +**需要修改的方法**:`search()` + +**改进点**: + +1. 更新方法签名,接受 `range_filters` 参数 +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 +3. 标准化分面搜索结果 + +**关键代码片段**: + +```python +def search( + self, + query: str, + size: int = 10, + from_: int = 0, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations + min_score: Optional[float] = None, + sort_by: Optional[str] = None, + sort_order: Optional[str] = "desc", + debug: bool = False, + context: Optional[RequestContext] = None +) -> SearchResult: + """执行搜索(重构版)""" + + # ... 查询解析 ... + + # 构建 ES 查询 + es_query = self.query_builder.build_multilang_query( + parsed_query=parsed_query, + query_vector=parsed_query.query_vector, + query_node=query_node, + filters=filters, + range_filters=range_filters, # 新增 + size=size, + from_=from_, + enable_knn=enable_embedding, + min_score=min_score + ) + + # 添加分面聚合 + if facets: + facet_aggs = self.query_builder.build_facets(facets) + if facet_aggs: + if "aggs" not in es_query: + es_query["aggs"] = {} + es_query["aggs"].update(facet_aggs) + + # ... 执行搜索 ... + + # 标准化分面结果 + standardized_facets = self._standardize_facets( + es_response.get('aggregations', {}), + facets, + filters + ) + + return SearchResult( + hits=hits, + total=total_value, + max_score=max_score, + took_ms=int(total_duration), + facets=standardized_facets, # 标准化格式 + query_info=parsed_query.to_dict(), + debug_info=debug_info + ) +``` + +**新增辅助方法**: + +```python +def _standardize_facets( + self, + es_aggregations: Dict[str, Any], + facet_configs: Optional[List[Union[str, Dict]]], + current_filters: Optional[Dict[str, Any]] +) -> Optional[List[Dict[str, Any]]]: + """ + 将 ES 聚合结果转换为标准化的分面格式。 + + Args: + es_aggregations: ES 原始聚合结果 + facet_configs: 分面配置列表 + current_filters: 当前应用的过滤器 + + Returns: + 标准化的分面结果列表 + """ + if not es_aggregations or not facet_configs: + return None + + standardized_facets = [] + + for config in facet_configs: + # 解析配置 + if isinstance(config, str): + field = config + facet_type = "terms" + else: + field = config['field'] + facet_type = config.get('type', 'terms') + + agg_name = f"{field}_facet" + + if agg_name not in es_aggregations: + continue + + agg_result = es_aggregations[agg_name] + + # 构建标准化分面结果 + facet = { + "field": field, + "label": self._get_field_label(field), # 从配置获取 + "type": facet_type, + "values": [] + } + + # 获取当前字段的选中值 + selected_values = set() + if current_filters and field in current_filters: + filter_value = current_filters[field] + if isinstance(filter_value, list): + selected_values = set(filter_value) + else: + selected_values = {filter_value} + + # 转换 buckets + if 'buckets' in agg_result: + for bucket in agg_result['buckets']: + value = bucket.get('key') + count = bucket.get('doc_count', 0) + + facet['values'].append({ + "value": value, + "label": str(value), # 可以从配置映射 + "count": count, + "selected": value in selected_values + }) + + standardized_facets.append(facet) + + return standardized_facets + + +def _get_field_label(self, field: str) -> str: + """获取字段的显示标签""" + # 从配置中获取字段标签 + for field_config in self.config.fields: + if field_config.name == field: + # 假设配置中有 label 字段 + return getattr(field_config, 'label', field) + return field +``` + +### 4. API 路由层更新 + +**文件**:`api/routes/search.py` + +**改进点**: + +1. 接受新的请求模型参数 +2. 添加搜索建议端点(框架) + +**新增端点**: + +```python +@router.get("/suggestions", response_model=SearchSuggestResponse) +async def search_suggestions( + q: str = Query(..., min_length=1, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="建议数量"), + types: str = Query("query", description="建议类型(逗号分隔)") +): + """ + 获取搜索建议(自动补全)。 + + 功能说明: + - 查询建议(query):基于历史搜索和热门搜索 + - 商品建议(product):匹配的商品 + - 类目建议(category):匹配的类目 + - 品牌建议(brand):匹配的品牌 + + 注意:此功能暂未实现,仅返回框架响应。 + """ + import time + start_time = time.time() + + # TODO: 实现搜索建议逻辑 + # 1. 从搜索历史中获取建议 + # 2. 从商品标题中匹配前缀 + # 3. 从类目、品牌中匹配 + + # 临时返回空结果 + suggestions = [] + + # 示例结构(暂不实现) + # suggestions = [ + # { + # "text": "芭比娃娃", + # "type": "query", + # "highlight": "比娃娃", + # "popularity": 850 + # } + # ] + + took_ms = int((time.time() - start_time) * 1000) + + return SearchSuggestResponse( + query=q, + suggestions=suggestions, + took_ms=took_ms + ) + + +@router.get("/instant", response_model=SearchResponse) +async def instant_search( + q: str = Query(..., min_length=2, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="结果数量") +): + """ + 即时搜索(Instant Search)。 + + 功能说明: + - 边输入边搜索,无需点击搜索按钮 + - 返回简化的搜索结果 + - 性能优化:缓存、限流 + + 注意:此功能暂未实现,调用标准搜索接口。 + """ + # TODO: 优化即时搜索性能 + # 1. 添加防抖/节流 + # 2. 实现结果缓存 + # 3. 简化返回字段 + + # 临时使用标准搜索接口 + from api.app import get_searcher + searcher = get_searcher() + + result = searcher.search( + query=q, + size=size, + from_=0 + ) + + return SearchResponse( + hits=result.hits, + total=result.total, + max_score=result.max_score, + took_ms=result.took_ms, + query_info=result.query_info + ) +``` + +### 5. 前端适配 + +**文件**:`frontend/static/js/app.js` + +**需要修改的地方**: + +1. **聚合参数改用简化配置**(第 57-87 行): +```javascript +// 旧的方式(直接 ES DSL) +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + } +}; + +// 新的方式(简化配置) +const facets = [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "brandName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } +]; +``` + +2. **过滤器使用新格式**(第 103 行): +```javascript +// 旧的方式 +filters: { + "price_ranges": ["0-50", "50-100"] // 硬编码 +} + +// 新的方式 +filters: { + "categoryName_keyword": ["玩具"], + "in_stock": true +}, +range_filters: { + "price": {"gte": 50, "lte": 100} +} +``` + +3. **解析标准化的分面结果**(第 208-258 行): +```javascript +// 旧的方式(直接访问 ES 结构) +if (aggregations.category_stats && aggregations.category_stats.buckets) { + aggregations.category_stats.buckets.forEach(bucket => { + // ... + }); +} + +// 新的方式(标准化格式) +if (data.facets) { + data.facets.forEach(facet => { + if (facet.field === 'categoryName_keyword') { + facet.values.forEach(facetValue => { + const value = facetValue.value; + const count = facetValue.count; + const selected = facetValue.selected; + // ... + }); + } + }); +} +``` + + +### 6. 测试代码更新 + +**文件**:`test_aggregation_api.py` + +**需要修改的地方**: + +1. 移除 `price_ranges` 硬编码测试(第 93 行) +2. 使用新的 `range_filters` 格式 +3. 使用新的 `facets` 配置 + +**新的测试代码**: + +```python +def test_search_with_filters(): + """测试新的过滤器格式""" + test_request = { + "query": "玩具", + "size": 5, + "filters": { + "categoryName_keyword": ["玩具"] + }, + "range_filters": { + "price": {"gte": 50, "lte": 100} + } + } + # ... + +def test_search_with_facets(): + """测试新的分面配置""" + test_request = { + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + } + # ... +``` + +## 第三部分:实施步骤 + +### 阶段 1:后端模型层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `api/models.py` + - [ ] 定义 `RangeFilter` 模型 + - [ ] 定义 `FacetConfig` 模型 + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` + - [ ] 移除 `aggregations` 参数 + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 + - [ ] 更新 `SearchResponse`,使用标准化分面格式 + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) + +**验证方式**: + +- 运行 Pydantic 模型验证 +- 检查 API 文档(`/docs`)是否正确生成 + +### 阶段 2:查询构建器重构(高优先级) + +**任务清单**: + +- [ ] 重构 `search/es_query_builder.py` + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` + - [ ] 移除 `add_dynamic_aggregations` 方法 + - [ ] 重构 `add_aggregations` 为 `build_facets` + - [ ] 更新 `build_query` 方法签名 +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) + +**验证方式**: + +- 编写单元测试验证过滤器构建逻辑 +- 打印生成的 ES DSL,检查正确性 + +### 阶段 3:搜索执行层重构(高优先级) + +**任务清单**: + +- [ ] 更新 `search/searcher.py` + - [ ] 更新 `search()` 方法签名 + - [ ] 使用新的 `build_facets` 方法 + - [ ] 实现 `_standardize_facets()` 辅助方法 + - [ ] 实现 `_get_field_label()` 辅助方法 + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 + +**验证方式**: + +- 编写集成测试 +- 手动测试搜索功能 + +### 阶段 4:API 路由层更新(中优先级) + +**任务清单**: + +- [ ] 更新 `api/routes/search.py` + - [ ] 更新 `/search/` 端点,接受新的请求参数 + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) + - [ ] 添加端点文档和示例 + +**验证方式**: + +- 使用 Swagger UI 测试端点 +- 检查 API 文档完整性 + +### 阶段 5:前端适配(中优先级) + +**任务清单**: + +- [ ] 更新 `frontend/static/js/app.js` + - [ ] 修改聚合参数为 `facets` 简化配置 + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 + - [ ] 添加范围过滤器 UI(如价格滑块) + - [ ] 移除硬编码的 `price_ranges` + +**验证方式**: + +- 浏览器测试前端功能 +- 检查网络请求和响应格式 + +### 阶段 6:测试代码更新(低优先级) + +**任务清单**: + +- [ ] 更新 `test_aggregation_api.py` + - [ ] 移除 `price_ranges` 测试 + - [ ] 添加 `range_filters` 测试 + - [ ] 添加新的 `facets` 测试 +- [ ] 更新 `test_complete_search.py` +- [ ] 更新 `tests/integration/test_aggregation_api.py` +- [ ] 更新 `tests/unit/test_searcher.py` + +**验证方式**: + +- 运行所有测试,确保通过 +- 检查测试覆盖率 + +### 阶段 7:文档更新(低优先级) + +**任务清单**: + +- [ ] 撰写完整的 API 接口文档 +- [ ] 更新 `README.md` +- [ ] 更新 `USER_GUIDE.md` +- [ ] 添加接口使用示例 +- [ ] 添加迁移指南(旧接口 → 新接口) + +## 第四部分:API 使用示例 + +### 示例 1:简单搜索 + +```bash +POST /search/ +{ + "query": "芭比娃娃", + "size": 20 +} +``` + +### 示例 2:带过滤器的搜索 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "filters": { + "categoryName_keyword": ["玩具", "益智玩具"], + "in_stock": true + }, + "range_filters": { + "price": {"gte": 50, "lte": 200} + } +} +``` + +### 示例 3:带分面搜索的请求 + +```bash +POST /search/ +{ + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15 + }, + { + "field": "brandName_keyword", + "size": 15 + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] +} +``` + +**响应示例**(标准化分面格式): + +```json +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "facets": [ + { + "field": "categoryName_keyword", + "label": "商品类目", + "type": "terms", + "values": [ + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} + ] + }, + { + "field": "price", + "label": "价格区间", + "type": "range", + "values": [ + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} + ] + } + ] +} +``` + +### 示例 4:搜索建议(框架) + +```bash +GET /search/suggestions?q=芭&size=5 + +{ + "query": "芭", + "suggestions": [ + { + "text": "芭比娃娃", + "type": "query", + "highlight": "比娃娃", + "popularity": 850 + }, + { + "text": "芭比娃娃屋", + "type": "query", + "highlight": "比娃娃屋", + "popularity": 320 + } + ], + "took_ms": 5 +} +``` + +## 第五部分:向后兼容性 + +### 兼容策略 + +为保持向后兼容,在过渡期(1-2 个版本)内: + +1. **同时支持旧参数和新参数**: +```python +class SearchRequest(BaseModel): + # 新参数 + range_filters: Optional[Dict[str, RangeFilter]] = None + facets: Optional[List[Union[str, FacetConfig]]] = None + + # 旧参数(标记为废弃) + aggregations: Optional[Dict[str, Any]] = Field( + None, + deprecated=True, + description="已废弃。请使用 'facets' 参数" + ) +``` + +2. **在后端自动转换旧格式**: +```python +# 在 searcher.py 中 +if request.aggregations and not request.facets: + # 将旧的 aggregations 转换为新的 facets + request.facets = self._convert_legacy_aggregations(request.aggregations) +``` + +3. **在响应中提供迁移提示**: +```python +if request.aggregations: + warnings.append({ + "type": "deprecation", + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", + "migration_guide": "https://docs.example.com/migration" + }) +``` + + +### 迁移时间线 + +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 +- **v3.1**(1 个月后):移除旧接口的自动转换 +- **v4.0**(3 个月后):完全移除旧接口 + +## 第六部分:风险评估与缓解 + +### 风险点 + +1. **破坏性变更风险**: + + - 风险:现有客户代码可能依赖旧接口 + - 缓解:提供向后兼容层,发布详细迁移指南 + +2. **性能影响风险**: + + - 风险:新的标准化处理可能增加延迟 + - 缓解:添加性能测试,优化关键路径 + +3. **测试覆盖不足风险**: + + - 风险:重构可能引入新 bug + - 缓解:全面的单元测试和集成测试 + +### 验收标准 + +- [ ] 所有单元测试通过 +- [ ] 所有集成测试通过 +- [ ] API 文档完整且准确 +- [ ] 性能无明显下降(< 10% 延迟增加) +- [ ] 前端功能正常工作 +- [ ] 提供完整的迁移指南 + +## 总结 + +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: + +1. ✅ 移除硬编码的 price_ranges 逻辑 +2. ✅ 实现结构化的过滤参数(filters + range_filters) +3. ✅ 简化聚合参数接口,不暴露 ES DSL +4. ✅ 标准化分面搜索响应格式 +5. ✅ 添加搜索建议功能框架(暂不实现) + +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 \ No newline at end of file diff --git a/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md b/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md new file mode 100644 index 0000000..58bec89 --- /dev/null +++ b/.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md @@ -0,0 +1,1402 @@ + +# 搜索引擎 API 接口重构实施计划 + +## 第一部分:现状分析 + +### 1. 当前实现存在的问题 + +#### 问题 1:硬编码的价格范围过滤 + +**位置**:`search/es_query_builder.py` 第 205-233 行 + +**问题描述**: + +```python +if field == 'price_ranges': + # 硬编码特定字符串值 + if price_range == '0-50': + price_ranges.append({"lt": 50}) + elif price_range == '50-100': + price_ranges.append({"gte": 50, "lt": 100}) + # ... +``` + +**影响**: + +- 只支持 `price` 字段,无法扩展到其他数值字段 +- 范围值硬编码,无法根据业务需求调整 +- 不符合 SaaS 系统的通用性要求 + +#### 问题 2:聚合参数直接暴露 ES DSL + +**位置**: + +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL + +**问题描述**: + +前端需要了解 Elasticsearch 的聚合语法: + +```javascript +const aggregations = { + "category_stats": { + "terms": { + "field": "categoryName_keyword", + "size": 15 + } + }, + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + {"key": "0-50", "to": 50}, + // ... + ] + } + } +}; +``` + +**影响**: + +- 前端需要了解 ES 语法,增加集成难度 +- 不符合 SaaS 产品易用性原则 +- 难以进行参数验证和文档生成 + +#### 问题 3:分面搜索结果格式不统一 + +**位置**:`frontend/static/js/app.js` 第 208-258 行 + +**问题描述**: + +- 直接返回 ES 原始格式(`buckets` 结构) +- 前端需要知道不同聚合类型的响应结构 +- 没有统一的分面结果模型 + +**影响**: + +- 前端解析逻辑复杂 +- 不同类型的聚合处理方式不一致 +- 难以扩展新的聚合类型 + +#### 问题 4:缺少搜索建议功能 + +**当前状态**:完全没有实现 + +**需求**: + +- 自动补全(Autocomplete) +- 搜索建议(Suggestions) +- 搜索即时反馈(Instant Search) + +### 2. 依赖关系分析 + +**影响范围**: + +1. **后端模型层**:`api/models.py` +2. **查询构建层**:`search/es_query_builder.py` +3. **搜索执行层**:`search/searcher.py` +4. **API 路由层**:`api/routes/search.py` +5. **前端代码**:`frontend/static/js/app.js` + +## 第二部分:优化方案设计 + +### 方案概述 + +采用**结构化过滤参数方案(方案 A 的简化版)**: + +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) +- **不支持单字段多个不连续范围**,简化设计 +- 标准化聚合参数,使用简化的接口 +- 统一分面搜索响应格式 +- **完全重构,不保留旧接口和兼容代码** + +### 1. 新的请求模型设计 + +#### 1.1 核心模型定义 + +**文件**:`api/models.py` + +```python +from pydantic import BaseModel, Field, field_validator +from typing import List, Dict, Any, Optional, Union, Literal + + +class RangeFilter(BaseModel): + """数值范围过滤器""" + gte: Optional[float] = Field(None, description="大于等于 (>=)") + gt: Optional[float] = Field(None, description="大于 (>)") + lte: Optional[float] = Field(None, description="小于等于 (<=)") + lt: Optional[float] = Field(None, description="小于 (<)") + + @field_validator('*') + def check_at_least_one(cls, v, info): + """确保至少指定一个边界""" + values = info.data + if not any([values.get('gte'), values.get('gt'), + values.get('lte'), values.get('lt')]): + raise ValueError('至少需要指定一个范围边界') + return v + + class Config: + json_schema_extra = { + "examples": [ + {"gte": 50, "lte": 200}, + {"gt": 100}, + {"lt": 50} + ] + } + + +class FacetConfig(BaseModel): + """分面配置(简化版)""" + field: str = Field(..., description="分面字段名") + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") + type: Literal["terms", "range"] = Field("terms", description="分面类型") + ranges: Optional[List[Dict[str, Any]]] = Field( + None, + description="范围分面的范围定义(仅当 type='range' 时需要)" + ) + + class Config: + json_schema_extra = { + "examples": [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "size": 4, + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] + } + + +class SearchRequest(BaseModel): + """搜索请求模型(重构版)""" + + # 基础搜索参数 + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") + + # 过滤器 - 精确匹配和多值匹配 + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( + None, + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", + json_schema_extra={ + "examples": [ + { + "categoryName_keyword": ["玩具", "益智玩具"], + "brandName_keyword": "乐高", + "in_stock": True + } + ] + } + ) + + # 范围过滤器 - 数值范围 + range_filters: Optional[Dict[str, RangeFilter]] = Field( + None, + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", + json_schema_extra={ + "examples": [ + { + "price": {"gte": 50, "lte": 200}, + "days_since_last_update": {"lte": 30} + } + ] + } + ) + + # 排序 + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") + + # 分面搜索 - 简化接口 + facets: Optional[List[Union[str, FacetConfig]]] = Field( + None, + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", + json_schema_extra={ + "examples": [ + # 简单模式:只指定字段名,使用默认配置 + ["categoryName_keyword", "brandName_keyword"], + # 高级模式:详细配置 + [ + {"field": "categoryName_keyword", "size": 15}, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + ] + } + ) + + # 高级选项 + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") + debug: bool = Field(False, description="是否返回调试信息") + + # 个性化参数(预留) + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") + + +class ImageSearchRequest(BaseModel): + """图片搜索请求模型""" + image_url: str = Field(..., description="查询图片的 URL") + size: int = Field(10, ge=1, le=100, description="返回结果数量") + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None + range_filters: Optional[Dict[str, RangeFilter]] = None + + +class SearchSuggestRequest(BaseModel): + """搜索建议请求模型(框架,暂不实现)""" + query: str = Field(..., min_length=1, description="搜索查询字符串") + size: int = Field(5, ge=1, le=20, description="返回建议数量") + types: List[Literal["query", "product", "category", "brand"]] = Field( + ["query"], + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" + ) +``` + +#### 1.2 响应模型定义 + +```python +class FacetValue(BaseModel): + """分面值""" + value: Union[str, int, float] = Field(..., description="分面值") + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") + count: int = Field(..., description="匹配的文档数量") + selected: bool = Field(False, description="是否已选中(当前过滤器中)") + + +class FacetResult(BaseModel): + """分面结果(标准化格式)""" + field: str = Field(..., description="字段名") + label: str = Field(..., description="分面显示名称") + type: Literal["terms", "range"] = Field(..., description="分面类型") + values: List[FacetValue] = Field(..., description="分面值列表") + total_count: Optional[int] = Field(None, description="该字段的总文档数") + + +class SearchResponse(BaseModel): + """搜索响应模型(重构版)""" + + # 核心结果 + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") + total: int = Field(..., description="匹配的总文档数") + max_score: float = Field(..., description="最高相关性分数") + + # 分面搜索结果(标准化格式) + facets: Optional[List[FacetResult]] = Field( + None, + description="分面统计结果(标准化格式)" + ) + + # 查询信息 + query_info: Dict[str, Any] = Field( + default_factory=dict, + description="查询处理信息(原始查询、改写、语言检测、翻译等)" + ) + + # 推荐与建议(预留) + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") + + # 性能指标 + took_ms: int = Field(..., description="搜索总耗时(毫秒)") + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") + + # 调试信息 + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") + + +class SearchSuggestResponse(BaseModel): + """搜索建议响应模型(框架,暂不实现)""" + query: str = Field(..., description="原始查询") + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") + took_ms: int = Field(..., description="耗时(毫秒)") +``` + +### 2. 查询构建器重构 + +#### 2.1 移除硬编码的 price_ranges 逻辑 + +**文件**:`search/es_query_builder.py` + +**需要修改的方法**:`_build_filters(self, filters, range_filters)` + +**改进点**: + +1. **完全移除** `if field == 'price_ranges'` 的特殊处理代码 +2. 分离 filters 和 range_filters 的处理逻辑 +3. 添加字段类型验证(利用配置系统) + +**新的实现逻辑**: + +```python +def _build_filters( + self, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None +) -> List[Dict[str, Any]]: + """ + 构建过滤子句(重构版)。 + + Args: + filters: 精确匹配过滤器字典 + range_filters: 范围过滤器字典 + + Returns: + ES filter子句列表 + """ + filter_clauses = [] + + # 1. 处理精确匹配过滤 + if filters: + for field, value in filters.items(): + if isinstance(value, list): + # 多值匹配(OR) + filter_clauses.append({ + "terms": {field: value} + }) + else: + # 单值精确匹配 + filter_clauses.append({ + "term": {field: value} + }) + + # 2. 处理范围过滤 + if range_filters: + for field, range_spec in range_filters.items(): + # 构建范围查询 + range_conditions = {} + if isinstance(range_spec, dict): + for op in ['gte', 'gt', 'lte', 'lt']: + if op in range_spec and range_spec[op] is not None: + range_conditions[op] = range_spec[op] + + if range_conditions: + filter_clauses.append({ + "range": {field: range_conditions} + }) + + return filter_clauses +``` + +#### 2.2 优化聚合参数接口 + +**需要完全移除的方法**: + +- `add_dynamic_aggregations` - 直接暴露 ES DSL,完全删除 + +**需要重构的方法**: + +- `add_aggregations` → 重构为 `build_facets` + +**新增方法**:`build_facets(self, facet_configs)` + +**新的实现逻辑**: + +```python +def build_facets( + self, + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None +) -> Dict[str, Any]: + """ + 构建分面聚合(重构版)。 + + Args: + facet_configs: 分面配置列表。可以是: + - 字符串列表:字段名,使用默认配置 + - 配置对象列表:详细的分面配置 + + Returns: + ES aggregations字典 + """ + if not facet_configs: + return {} + + aggs = {} + + for config in facet_configs: + # 1. 简单模式:只有字段名 + if isinstance(config, str): + field = config + agg_name = f"{field}_facet" + aggs[agg_name] = { + "terms": { + "field": field, + "size": 10, # 默认大小 + "order": {"_count": "desc"} + } + } + + # 2. 高级模式:详细配置对象 + elif isinstance(config, dict): + field = config['field'] + facet_type = config.get('type', 'terms') + size = config.get('size', 10) + agg_name = f"{field}_facet" + + if facet_type == 'terms': + # Terms 聚合(分组统计) + aggs[agg_name] = { + "terms": { + "field": field, + "size": size, + "order": {"_count": "desc"} + } + } + + elif facet_type == 'range': + # Range 聚合(范围统计) + ranges = config.get('ranges', []) + if ranges: + aggs[agg_name] = { + "range": { + "field": field, + "ranges": ranges + } + } + + return aggs +``` + +#### 2.3 更新主查询构建方法 + +**修改方法签名**:`build_query()` + +```python +def build_query( + self, + query_text: str, + query_vector: Optional[np.ndarray] = None, + query_node: Optional[QueryNode] = None, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + size: int = 10, + from_: int = 0, + enable_knn: bool = True, + knn_k: int = 50, + knn_num_candidates: int = 200, + min_score: Optional[float] = None +) -> Dict[str, Any]: + """构建完整的 ES 查询(重构版)""" + # ... 实现 + + # 添加过滤器 + if filters or range_filters: + filter_clauses = self._build_filters(filters, range_filters) + if filter_clauses: + es_query["query"] = { + "bool": { + "must": [query_clause], + "filter": filter_clauses + } + } +``` + +### 3. 搜索执行层重构 + +**文件**:`search/searcher.py` + +**需要修改的方法**:`search()` + +**改进点**: + +1. 更新方法签名,接受 `range_filters` 和 `facets` 参数 +2. **完全移除** `aggregations` 参数支持 +3. 使用新的 `build_facets` 方法替代旧的聚合逻辑 +4. 标准化分面搜索结果 + +**关键代码片段**: + +```python +def search( + self, + query: str, + size: int = 10, + from_: int = 0, + filters: Optional[Dict[str, Any]] = None, + range_filters: Optional[Dict[str, Any]] = None, # 新增 + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations + min_score: Optional[float] = None, + sort_by: Optional[str] = None, + sort_order: Optional[str] = "desc", + debug: bool = False, + context: Optional[RequestContext] = None +) -> SearchResult: + """执行搜索(重构版)""" + + # ... 查询解析 ... + + # 构建 ES 查询 + es_query = self.query_builder.build_multilang_query( + parsed_query=parsed_query, + query_vector=parsed_query.query_vector, + query_node=query_node, + filters=filters, + range_filters=range_filters, # 新增 + size=size, + from_=from_, + enable_knn=enable_embedding, + min_score=min_score + ) + + # 添加分面聚合(完全替代旧的 aggregations 逻辑) + if facets: + facet_aggs = self.query_builder.build_facets(facets) + if facet_aggs: + if "aggs" not in es_query: + es_query["aggs"] = {} + es_query["aggs"].update(facet_aggs) + + # ... 执行搜索 ... + + # 标准化分面结果 + standardized_facets = self._standardize_facets( + es_response.get('aggregations', {}), + facets, + filters + ) + + return SearchResult( + hits=hits, + total=total_value, + max_score=max_score, + took_ms=int(total_duration), + facets=standardized_facets, # 标准化格式 + query_info=parsed_query.to_dict(), + debug_info=debug_info + ) +``` + +**新增辅助方法**: + +```python +def _standardize_facets( + self, + es_aggregations: Dict[str, Any], + facet_configs: Optional[List[Union[str, Dict]]], + current_filters: Optional[Dict[str, Any]] +) -> Optional[List[Dict[str, Any]]]: + """ + 将 ES 聚合结果转换为标准化的分面格式。 + + Args: + es_aggregations: ES 原始聚合结果 + facet_configs: 分面配置列表 + current_filters: 当前应用的过滤器 + + Returns: + 标准化的分面结果列表 + """ + if not es_aggregations or not facet_configs: + return None + + standardized_facets = [] + + for config in facet_configs: + # 解析配置 + if isinstance(config, str): + field = config + facet_type = "terms" + else: + field = config['field'] + facet_type = config.get('type', 'terms') + + agg_name = f"{field}_facet" + + if agg_name not in es_aggregations: + continue + + agg_result = es_aggregations[agg_name] + + # 构建标准化分面结果 + facet = { + "field": field, + "label": self._get_field_label(field), + "type": facet_type, + "values": [] + } + + # 获取当前字段的选中值 + selected_values = set() + if current_filters and field in current_filters: + filter_value = current_filters[field] + if isinstance(filter_value, list): + selected_values = set(filter_value) + else: + selected_values = {filter_value} + + # 转换 buckets + if 'buckets' in agg_result: + for bucket in agg_result['buckets']: + value = bucket.get('key') + count = bucket.get('doc_count', 0) + + facet['values'].append({ + "value": value, + "label": str(value), + "count": count, + "selected": value in selected_values + }) + + standardized_facets.append(facet) + + return standardized_facets + + +def _get_field_label(self, field: str) -> str: + """获取字段的显示标签""" + # 从配置中获取字段标签 + for field_config in self.config.fields: + if field_config.name == field: + return getattr(field_config, 'label', field) + return field +``` + +### 4. API 路由层更新 + +**文件**:`api/routes/search.py` + +**改进点**: + +1. 接受新的请求模型参数 +2. 添加搜索建议端点(框架) +3. **完全移除**旧的 `aggregations` 参数支持 + +**主搜索端点更新**: + +```python +@router.post("/", response_model=SearchResponse) +async def search(request: SearchRequest, http_request: Request): + """ + 执行文本搜索。 + + 支持: + - 布尔表达式(AND, OR, RANK, ANDNOT) + - 精确匹配过滤器 + - 范围过滤器 + - 分面搜索 + - 自定义排序 + """ + # ... 实现使用新的参数 + result = searcher.search( + query=request.query, + size=request.size, + from_=request.from_, + filters=request.filters, + range_filters=request.range_filters, # 新增 + facets=request.facets, # 替代 aggregations + min_score=request.min_score, + sort_by=request.sort_by, + sort_order=request.sort_order, + debug=request.debug, + context=context + ) +``` + +**新增端点**: + +```python +@router.get("/suggestions", response_model=SearchSuggestResponse) +async def search_suggestions( + q: str = Query(..., min_length=1, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="建议数量"), + types: str = Query("query", description="建议类型(逗号分隔)") +): + """ + 获取搜索建议(自动补全)。 + + 功能说明: + - 查询建议(query):基于历史搜索和热门搜索 + - 商品建议(product):匹配的商品 + - 类目建议(category):匹配的类目 + - 品牌建议(brand):匹配的品牌 + + 注意:此功能暂未实现,仅返回框架响应。 + """ + import time + start_time = time.time() + + # TODO: 实现搜索建议逻辑 + suggestions = [] + took_ms = int((time.time() - start_time) * 1000) + + return SearchSuggestResponse( + query=q, + suggestions=suggestions, + took_ms=took_ms + ) + + +@router.get("/instant", response_model=SearchResponse) +async def instant_search( + q: str = Query(..., min_length=2, description="搜索查询"), + size: int = Query(5, ge=1, le=20, description="结果数量") +): + """ + 即时搜索(Instant Search)。 + + 功能说明: + - 边输入边搜索,无需点击搜索按钮 + - 返回简化的搜索结果 + + 注意:此功能暂未实现,调用标准搜索接口。 + """ + from api.app import get_searcher + searcher = get_searcher() + + result = searcher.search( + query=q, + size=size, + from_=0 + ) + + return SearchResponse( + hits=result.hits, + total=result.total, + max_score=result.max_score, + took_ms=result.took_ms, + query_info=result.query_info + ) +``` + +### 5. 前端适配 + +**文件**:`frontend/static/js/app.js` + +**需要完全重写的地方**: + +1. **聚合参数改用简化配置**(第 57-87 行): +```javascript +// 旧的方式(ES DSL)- 完全删除 +const aggregations = { + "category_stats": {...} // 删除 +}; + +// 新的方式(简化配置) +const facets = [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "brandName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "supplierName_keyword", + "size": 10, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } +]; +``` + +2. **过滤器使用新格式**(第 103 行): +```javascript +// 旧的方式 - 完全删除 +filters: { + "price_ranges": ["0-50", "50-100"] // 删除 +} + +// 新的方式 +filters: Object.keys(state.filters).length > 0 ? state.filters : null, +range_filters: state.rangeFilters ? state.rangeFilters : null +``` + +3. **解析标准化的分面结果**(第 208-258 行): +```javascript +// 旧的方式(ES 结构)- 完全删除并重写 +function displayAggregations(aggregations) { + if (!aggregations) return; + // 旧的代码全部删除 +} + +// 新的方式(标准化格式) +function displayFacets(facets) { + if (!facets) return; + + facets.forEach(facet => { + if (facet.field === 'categoryName_keyword') { + const container = document.getElementById('categoryTags'); + let html = ''; + + facet.values.forEach(facetValue => { + const value = facetValue.value; + const count = facetValue.count; + const selected = facetValue.selected; + + html += ` + + ${escapeHtml(value)} (${count}) + + `; + }); + + container.innerHTML = html; + } + // 处理其他分面... + }); +} +``` + + +## 第三部分:实施步骤 + +### 阶段 1:后端模型层重构 + +**任务清单**: + +- [ ] 更新 `api/models.py` + - [ ] 定义 `RangeFilter` 模型 + - [ ] 定义 `FacetConfig` 模型 + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` + - [ ] **完全移除** `aggregations` 参数 + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 + - [ ] 更新 `SearchResponse`,使用标准化分面格式 + - [ ] 更新 `ImageSearchRequest`,添加 `range_filters` + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) + +**验证方式**: + +- 运行 Pydantic 模型验证 +- 检查 API 文档(`/docs`)是否正确生成 +- 确认旧的 `aggregations` 参数已完全移除 + +### 阶段 2:查询构建器重构 + +**任务清单**: + +- [ ] 重构 `search/es_query_builder.py` + - [ ] **完全删除** `price_ranges` 硬编码逻辑(第 205-233 行) + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` + - [ ] **完全删除** `add_dynamic_aggregations` 方法 + - [ ] **完全删除或重命名** `add_aggregations` 方法 + - [ ] 新增 `build_facets` 方法 + - [ ] 更新 `build_query` 方法签名,添加 `range_filters` +- [ ] 更新 `search/multilang_query_builder.py` + - [ ] 更新 `build_multilang_query` 方法签名 + - [ ] 确保正确传递 `range_filters` 参数 + +**验证方式**: + +- 打印生成的 ES DSL,检查正确性 +- 确认旧的硬编码逻辑和方法已完全移除 +- 手动测试不同的过滤器组合 + +### 阶段 3:搜索执行层重构 + +**任务清单**: + +- [ ] 更新 `search/searcher.py` + - [ ] 更新 `search()` 方法签名,添加 `range_filters` 和 `facets` + - [ ] **完全移除** `aggregations` 参数支持 + - [ ] 使用新的 `build_facets` 方法 + - [ ] 实现 `_standardize_facets()` 辅助方法 + - [ ] 实现 `_get_field_label()` 辅助方法 + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 + - [ ] 更新 `search_by_image()` 方法,支持 `range_filters` + +**验证方式**: + +- 手动测试各种搜索场景 +- 检查分面结果格式是否标准化 +- 确认旧的聚合逻辑已完全移除 + +### 阶段 4:API 路由层更新 + +**任务清单**: + +- [ ] 更新 `api/routes/search.py` + - [ ] 更新 `/search/` 端点,使用新的请求参数 + - [ ] **确认完全移除**对旧 `aggregations` 的支持 + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) + - [ ] 更新 `/search/image` 端点,支持 `range_filters` + - [ ] 添加完整的端点文档和示例 + +**验证方式**: + +- 使用 Swagger UI(`/docs`)测试所有端点 +- 检查请求和响应格式 +- 确认旧参数不再被接受 + +### 阶段 5:前端适配 + +**任务清单**: + +- [ ] 完全重构 `frontend/static/js/app.js` + - [ ] **删除**所有 ES DSL 聚合代码(第 57-87 行) + - [ ] 使用新的 `facets` 简化配置 + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` + - [ ] **完全重写** `displayAggregations()` 为 `displayFacets()` + - [ ] 解析标准化的分面结果格式 + - [ ] **删除**所有 `price_ranges` 硬编码 + - [ ] 添加范围过滤器 UI 组件 + - [ ] 更新状态管理,支持 `rangeFilters` + +**验证方式**: + +- 浏览器测试所有前端功能 +- 检查网络请求格式 +- 检查分面结果显示 +- 确认旧的代码已完全删除 + +### 阶段 6:文档更新与示例 + +**任务清单**: + +- [ ] 撰写完整的 API 接口文档 + - [ ] 搜索接口文档 + - [ ] 过滤器使用说明 + - [ ] 分面搜索使用说明 + - [ ] 搜索建议接口文档(标注为框架) +- [ ] 更新项目文档 + - [ ] 更新 `README.md` + - [ ] 更新 `USER_GUIDE.md` + - [ ] 更新 `CHANGES.md` 或 `CHANGELOG.md` +- [ ] 添加完整的使用示例 + - [ ] 简单搜索示例 + - [ ] 过滤器搜索示例 + - [ ] 分面搜索示例 + - [ ] cURL 命令示例 + - [ ] Python 代码示例 + - [ ] JavaScript 代码示例 +- [ ] 更新 API 文档注释 + - [ ] 所有模型的文档字符串 + - [ ] 所有端点的文档字符串 + - [ ] 参数说明和示例 + +**验证方式**: + +- 文档审查 +- 示例代码验证 +- 确保文档与实现一致 + +## 第四部分:API 使用示例 + +### 示例 1:简单搜索 + +```bash +POST /search/ +Content-Type: application/json + +{ + "query": "芭比娃娃", + "size": 20 +} +``` + +**响应**: + +```json +{ + "hits": [ + { + "_id": "12345", + "_score": 8.5, + "_source": { + "name": "芭比时尚娃娃", + "price": 89.99, + "categoryName": "玩具" + } + } + ], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "query_info": { + "original_query": "芭比娃娃", + "detected_language": "zh" + } +} +``` + +### 示例 2:带过滤器的搜索 + +```bash +POST /search/ +Content-Type: application/json + +{ + "query": "玩具", + "size": 20, + "filters": { + "categoryName_keyword": ["玩具", "益智玩具"], + "in_stock": true + }, + "range_filters": { + "price": { + "gte": 50, + "lte": 200 + } + } +} +``` + +**响应**: + +```json +{ + "hits": [...], + "total": 45, + "max_score": 7.2, + "took_ms": 38 +} +``` + +### 示例 3:带分面搜索的请求(简单模式) + +```bash +POST /search/ +Content-Type: application/json + +{ + "query": "玩具", + "size": 20, + "facets": [ + "categoryName_keyword", + "brandName_keyword" + ] +} +``` + +**响应**: + +```json +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "facets": [ + { + "field": "categoryName_keyword", + "label": "商品类目", + "type": "terms", + "values": [ + { + "value": "玩具", + "label": "玩具", + "count": 85, + "selected": false + }, + { + "value": "益智玩具", + "label": "益智玩具", + "count": 33, + "selected": false + } + ] + }, + { + "field": "brandName_keyword", + "label": "品牌", + "type": "terms", + "values": [ + { + "value": "乐高", + "label": "乐高", + "count": 42, + "selected": false + } + ] + } + ] +} +``` + +### 示例 4:带分面搜索的请求(高级模式) + +```bash +POST /search/ +Content-Type: application/json + +{ + "query": "玩具", + "size": 20, + "facets": [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "brandName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100-200", "from": 100, "to": 200}, + {"key": "200+", "from": 200} + ] + } + ] +} +``` + +**响应**: + +```json +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 45, + "facets": [ + { + "field": "categoryName_keyword", + "label": "商品类目", + "type": "terms", + "values": [...] + }, + { + "field": "price", + "label": "价格区间", + "type": "range", + "values": [ + { + "value": "0-50", + "label": "0-50", + "count": 23, + "selected": false + }, + { + "value": "50-100", + "label": "50-100", + "count": 45, + "selected": false + }, + { + "value": "100-200", + "label": "100-200", + "count": 38, + "selected": false + }, + { + "value": "200+", + "label": "200+", + "count": 12, + "selected": false + } + ] + } + ] +} +``` + +### 示例 5:复杂搜索(过滤+分面+排序) + +```bash +POST /search/ +Content-Type: application/json + +{ + "query": "玩具 AND (乐高 OR 芭比)", + "size": 20, + "from": 0, + "filters": { + "categoryName_keyword": "玩具" + }, + "range_filters": { + "price": { + "gte": 50, + "lte": 200 + }, + "days_since_last_update": { + "lte": 30 + } + }, + "facets": [ + {"field": "brandName_keyword", "size": 15}, + {"field": "supplierName_keyword", "size": 10} + ], + "sort_by": "price", + "sort_order": "asc", + "debug": false +} +``` + +### 示例 6:搜索建议(框架) + +```bash +GET /search/suggestions?q=芭&size=5 + +{ + "query": "芭", + "suggestions": [], + "took_ms": 2 +} +``` + +### 示例 7:即时搜索(框架) + +```bash +GET /search/instant?q=玩具&size=5 + +{ + "hits": [...], + "total": 118, + "max_score": 8.5, + "took_ms": 25, + "query_info": {...} +} +``` + +### Python 示例代码 + +```python +import requests + +API_URL = "http://localhost:6002/search/" + +# 简单搜索 +response = requests.post(API_URL, json={ + "query": "芭比娃娃", + "size": 20 +}) +print(response.json()) + +# 带过滤器和分面的搜索 +response = requests.post(API_URL, json={ + "query": "玩具", + "size": 20, + "filters": { + "categoryName_keyword": ["玩具", "益智玩具"] + }, + "range_filters": { + "price": {"gte": 50, "lte": 200} + }, + "facets": [ + {"field": "brandName_keyword", "size": 15}, + {"field": "categoryName_keyword", "size": 15} + ], + "sort_by": "price", + "sort_order": "asc" +}) +result = response.json() + +# 处理分面结果 +for facet in result.get('facets', []): + print(f"\n{facet['label']}:") + for value in facet['values']: + print(f" - {value['label']}: {value['count']}") +``` + +### JavaScript 示例代码 + +```javascript +// 搜索函数 +async function searchProducts(query, filters, rangeFilters, facets) { + const response = await fetch('http://localhost:6002/search/', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + query: query, + size: 20, + filters: filters, + range_filters: rangeFilters, + facets: facets + }) + }); + + const data = await response.json(); + return data; +} + +// 使用示例 +const result = await searchProducts( + "玩具", + { categoryName_keyword: ["玩具"] }, + { price: { gte: 50, lte: 200 } }, + [ + { field: "brandName_keyword", size: 15 }, + { field: "categoryName_keyword", size: 15 } + ] +); + +// 显示分面结果 +result.facets.forEach(facet => { + console.log(`${facet.label}:`); + facet.values.forEach(value => { + console.log(` - ${value.label}: ${value.count}`); + }); +}); +``` + +## 第五部分:验收标准 + +### 功能验收 + +- [ ] 所有硬编码逻辑已完全移除 +- [ ] 新的过滤器参数正常工作 +- [ ] 分面搜索返回标准化格式 +- [ ] API 文档完整准确 +- [ ] 前端功能正常工作 +- [ ] 搜索建议端点已添加(框架) + +### 代码质量验收 + +- [ ] 代码中不再有 `price_ranges` 字符串 +- [ ] 代码中不再有 `add_dynamic_aggregations` 方法 +- [ ] 代码中不再有 `aggregations` 参数(除了文档说明) +- [ ] 所有方法签名已更新 +- [ ] 所有文档字符串已更新 + +### 性能验收 + +- [ ] 搜索响应时间无明显增加 +- [ ] 分面查询性能正常 +- [ ] 过滤器性能正常 + +## 总结 + +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: + +1. ✅ **完全移除**硬编码的 price_ranges 逻辑 +2. ✅ 实现结构化的过滤参数(filters + range_filters) +3. ✅ **完全移除** aggregations 参数,使用简化的 facets 接口 +4. ✅ 标准化分面搜索响应格式 +5. ✅ 添加搜索建议功能框架(暂不实现) +6. ✅ **不保留向后兼容代码**,完全重构 + +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,代码更加精简和统一。 \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 58fe2e3..40fb693 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -96,7 +96,6 @@ The `searcher` supports: - Extracts product data from MySQL - Maps images from filebank database - Creates inverted index (URL → SKU list) - - Configurable year range via `--years` parameter ## Key Implementation Notes diff --git a/query/query_rewriter.py b/query/query_rewriter.py index d5ef9ad..b460770 100644 --- a/query/query_rewriter.py +++ b/query/query_rewriter.py @@ -7,21 +7,26 @@ import re class QueryRewriter: - """Rewrites queries based on configured dictionary rules.""" + """Rewrites queries based on exact word matching with configured dictionary rules. + + Only performs full word matching - no partial matching or substring replacement. + The entire query must exactly match a key in the rewrite dictionary to be rewritten. + """ def __init__(self, rewrite_dict: Dict[str, str] = None): """ - Initialize query rewriter. + Initialize query rewriter for exact word matching only. Args: - rewrite_dict: Dictionary mapping query patterns to rewrite expressions + rewrite_dict: Dictionary mapping exact query terms to rewrite expressions e.g., {"芭比": "brand:芭比 OR name:芭比娃娃"} + Only full word matches will be rewritten, no partial matching. """ self.rewrite_dict = rewrite_dict or {} def rewrite(self, query: str) -> str: """ - Rewrite query based on dictionary rules. + Rewrite query based on dictionary rules using exact word matching only. Args: query: Original query string @@ -32,21 +37,13 @@ class QueryRewriter: if not query or not query.strip(): return query - # Check for exact matches first + # Only check for exact matches - no partial matching if query in self.rewrite_dict: rewritten = self.rewrite_dict[query] print(f"[QueryRewriter] Exact match: '{query}' -> '{rewritten}'") return rewritten - # Check for partial matches (query contains a rewrite key) - for pattern, replacement in self.rewrite_dict.items(): - if pattern in query: - # Replace the pattern - rewritten = query.replace(pattern, replacement) - print(f"[QueryRewriter] Partial match: '{query}' -> '{rewritten}'") - return rewritten - - # No rewrite needed + # No rewrite needed - no partial matching return query def add_rule(self, pattern: str, replacement: str) -> None: diff --git a/simple_server.py b/simple_server.py deleted file mode 100644 index 25fda66..0000000 --- a/simple_server.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple API server for testing aggregation functionality without external dependencies. -""" - -import json -import time -import random -from http.server import HTTPServer, BaseHTTPRequestHandler -from urllib.parse import urlparse, parse_qs -import threading - -class SearchAPIHandler(BaseHTTPRequestHandler): - """Simple API handler for search requests.""" - - def do_OPTIONS(self): - """Handle CORS preflight requests.""" - self.send_response(200) - self.send_header('Access-Control-Allow-Origin', '*') - self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') - self.send_header('Access-Control-Allow-Headers', 'Content-Type') - self.end_headers() - - def do_POST(self): - """Handle POST requests.""" - if self.path == '/': - self.handle_search() - elif self.path == '/search/': - self.handle_search() - else: - self.send_response(404) - self.end_headers() - - def handle_search(self): - """Handle search requests with aggregations.""" - try: - # Read request body - content_length = int(self.headers['Content-Length']) - post_data = self.rfile.read(content_length) - request_data = json.loads(post_data.decode('utf-8')) - - query = request_data.get('query', '') - size = request_data.get('size', 10) - sort_by = request_data.get('sort_by', 'relevance') - aggregations = request_data.get('aggregations', {}) - filters = request_data.get('filters', {}) - - print(f"Search request: query='{query}', size={size}, sort_by={sort_by}") - print(f"Aggregations: {list(aggregations.keys()) if aggregations else 'None'}") - print(f"Filters: {filters if filters else 'None'}") - - # Simulate processing time - time.sleep(0.1) - - # Generate mock search results - results = self.generate_mock_results(query, size, sort_by, filters) - - # Generate mock aggregations - aggregation_results = self.generate_mock_aggregations(aggregations, filters) - - # Build response - response = { - "hits": results, - "total": len(results) + random.randint(10, 100), - "max_score": round(random.uniform(1.5, 3.5), 3), - "took_ms": random.randint(15, 45), - "aggregations": aggregation_results, - "query_info": { - "original_query": query, - "rewritten_query": query, - "detected_language": "zh" if any('\u4e00' <= char <= '\u9fff' for char in query) else "en", - "domain": "default", - "translations": {}, - "has_vector": False - } - } - - # Send response - self.send_response(200) - self.send_header('Content-Type', 'application/json') - self.send_header('Access-Control-Allow-Origin', '*') - self.end_headers() - - response_json = json.dumps(response, ensure_ascii=False, indent=2) - self.wfile.write(response_json.encode('utf-8')) - - print(f"Response sent with {len(results)} results and {len(aggregation_results)} aggregations") - - except Exception as e: - print(f"Error handling request: {e}") - self.send_response(500) - self.send_header('Content-Type', 'application/json') - self.send_header('Access-Control-Allow-Origin', '*') - self.end_headers() - - error_response = { - "error": str(e), - "detail": "Internal server error" - } - - self.wfile.write(json.dumps(error_response).encode('utf-8')) - - def generate_mock_results(self, query, size, sort_by, filters): - """Generate mock search results.""" - - # Sample product data - sample_products = [ - { - "skuId": 1001, - "name": "芭比娃娃梦幻套装", - "enSpuName": "Barbie Dream House Playset", - "ruSkuName": "Кукла Барби Мечтательный домик", - "categoryName": "芭比", - "brandName": "美泰", - "supplierName": "义乌玩具厂", - "price": 89.99, - "imageUrl": "https://picsum.photos/seed/barbie1/200/200.jpg", - "create_time": "2024-01-15T10:30:00Z", - "days_since_last_update": 45 - }, - { - "skuId": 1002, - "name": "芭比娃娃时尚系列", - "enSpuName": "Barbie Fashion Doll Collection", - "ruSkuName": "Кукла Барби Модная коллекция", - "categoryName": "芭比", - "brandName": "美泰", - "supplierName": "汕头玩具公司", - "price": 45.50, - "imageUrl": "https://picsum.photos/seed/barbie2/200/200.jpg", - "create_time": "2024-02-20T14:15:00Z", - "days_since_last_update": 30 - }, - { - "skuId": 1003, - "name": "儿童积木套装", - "enSpuName": "Kids Building Blocks Set", - "ruSkuName": "Детский строительный набор", - "categoryName": "积木", - "brandName": "乐高", - "supplierName": "深圳塑胶制品厂", - "price": 158.00, - "imageUrl": "https://picsum.photos/seed/blocks1/200/200.jpg", - "create_time": "2024-01-10T09:20:00Z", - "days_since_last_update": 60 - }, - { - "skuId": 1004, - "name": "消防车玩具模型", - "enSpuName": "Fire Truck Toy Model", - "ruSkuName": "Модель пожарной машины", - "categoryName": "小汽车", - "brandName": "多美卡", - "supplierName": "东莞玩具制造厂", - "price": 78.50, - "imageUrl": "https://picsum.photos/seed/firetruck1/200/200.jpg", - "create_time": "2024-03-05T16:45:00Z", - "days_since_last_update": 15 - }, - { - "skuId": 1005, - "name": "婴儿毛绒玩具", - "enSpuName": "Baby Plush Toy", - "ruSkuName": "Детская плюшевая игрушка", - "categoryName": "婴儿娃娃", - "brandName": "迪士尼", - "supplierName": "上海礼品公司", - "price": 32.00, - "imageUrl": "https://picsum.photos/seed/plush1/200/200.jpg", - "create_time": "2024-02-14T11:30:00Z", - "days_since_last_update": 25 - } - ] - - # Apply filters if any - if filters: - filtered_products = [] - for product in sample_products: - include = True - - # Check category filter - if 'category_name' in filters: - if product['categoryName'] not in filters['category_name']: - include = False - - # Check brand filter - if 'brand_name' in filters: - if product['brandName'] not in filters['brand_name']: - include = False - - # Check price range filter - if 'price_ranges' in filters: - price = product['price'] - in_range = False - for price_range in filters['price_ranges']: - if price_range == '0-50' and price <= 50: - in_range = True - elif price_range == '50-100' and 50 < price <= 100: - in_range = True - elif price_range == '100-200' and 100 < price <= 200: - in_range = True - elif price_range == '200+' and price > 200: - in_range = True - if not in_range: - include = False - - if include: - filtered_products.append(product) - sample_products = filtered_products - - # Apply sorting - if sort_by == 'price_asc': - sample_products.sort(key=lambda x: x.get('price', 0)) - elif sort_by == 'price_desc': - sample_products.sort(key=lambda x: x.get('price', 0), reverse=True) - elif sort_by == 'time_desc': - sample_products.sort(key=lambda x: x.get('create_time', ''), reverse=True) - - # Convert to API response format - results = [] - for i, product in enumerate(sample_products[:size]): - hit = { - "_id": str(product['skuId']), - "_score": round(random.uniform(1.5, 3.5), 3), - "_source": product - } - results.append(hit) - - return results - - def generate_mock_aggregations(self, aggregations, filters): - """Generate mock aggregation results.""" - if not aggregations: - return {} - - result = {} - - for agg_name, agg_spec in aggregations.items(): - agg_type = agg_spec.get('type', 'terms') - - if agg_type == 'terms': - # Generate mock terms aggregation - if agg_name == 'category_name': - buckets = [ - {"key": "芭比", "doc_count": random.randint(15, 35)}, - {"key": "儿童娃娃", "doc_count": random.randint(8, 20)}, - {"key": "积木", "doc_count": random.randint(5, 15)}, - {"key": "小汽车", "doc_count": random.randint(3, 12)}, - {"key": "婴儿娃娃", "doc_count": random.randint(4, 10)}, - {"key": "人物", "doc_count": random.randint(6, 18)} - ] - elif agg_name == 'brand_name': - buckets = [ - {"key": "美泰", "doc_count": random.randint(20, 40)}, - {"key": "乐高", "doc_count": random.randint(10, 25)}, - {"key": "迪士尼", "doc_count": random.randint(8, 20)}, - {"key": "多美卡", "doc_count": random.randint(5, 15)}, - {"key": "孩之宝", "doc_count": random.randint(6, 18)}, - {"key": "万代", "doc_count": random.randint(3, 10)} - ] - elif agg_name == 'material_type': - buckets = [ - {"key": "塑料", "doc_count": random.randint(40, 80)}, - {"key": "布绒", "doc_count": random.randint(8, 20)}, - {"key": "金属", "doc_count": random.randint(5, 15)}, - {"key": "木质", "doc_count": random.randint(3, 12)} - ] - else: - # Generic terms aggregation - buckets = [ - {"key": f"选项{i+1}", "doc_count": random.randint(5, 25)} - for i in range(5) - ] - - result[agg_name] = { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": random.randint(10, 50), - "buckets": buckets - } - - elif agg_type == 'range': - # Generate mock range aggregation (usually for price) - if agg_name == 'price_ranges': - ranges = agg_spec.get('ranges', []) - buckets = [] - for range_spec in ranges: - key = range_spec.get('key', 'unknown') - count = random.randint(5, 30) - bucket_data = {"key": key, "doc_count": count} - - # Add range bounds - if 'to' in range_spec: - bucket_data['to'] = range_spec['to'] - if 'from' in range_spec: - bucket_data['from'] = range_spec['from'] - - buckets.append(bucket_data) - - result[agg_name] = {"buckets": buckets} - - return result - - def log_message(self, format, *args): - """Override to reduce log noise.""" - pass - -def run_server(): - """Run the API server.""" - server_address = ('', 6002) - httpd = HTTPServer(server_address, SearchAPIHandler) - print("🚀 Simple Search API Server started!") - print("📍 API: http://localhost:6002") - print("🔍 Search endpoint: http://localhost:6002/search/") - print("🌐 Frontend should connect to: http://localhost:6002") - print("⏹️ Press Ctrl+C to stop") - - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\n🛑 Server stopped") - httpd.server_close() - -def run_server(): - """Run the API server - main entry point.""" - server_address = ('', 6002) - httpd = HTTPServer(server_address, SearchAPIHandler) - print("🚀 Simple Search API Server started!") - print("📍 API: http://localhost:6002") - print("🔍 Search endpoint: http://localhost:6002/search/") - print("🌐 Frontend should connect to: http://localhost:6002") - print("⏹️ Press Ctrl+C to stop") - - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\n🛑 Server stopped") - httpd.server_close() - -if __name__ == '__main__': - run_server() \ No newline at end of file diff --git a/test_aggregation_api.py b/test_aggregation_api.py deleted file mode 100644 index 1974042..0000000 --- a/test_aggregation_api.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for aggregation functionality -""" - -import requests -import json - -API_BASE_URL = 'http://120.76.41.98:6002' - -def test_search_with_aggregations(): - """Test search with aggregations""" - - # Test data - test_query = { - "query": "玩具", - "size": 5, - "aggregations": { - "category_stats": { - "terms": { - "field": "categoryName_keyword", - "size": 10 - } - }, - "brand_stats": { - "terms": { - "field": "brandName_keyword", - "size": 10 - } - }, - "price_ranges": { - "range": { - "field": "price", - "ranges": [ - {"key": "0-50", "to": 50}, - {"key": "50-100", "from": 50, "to": 100}, - {"key": "100-200", "from": 100, "to": 200}, - {"key": "200+", "from": 200} - ] - } - } - } - } - - print("Testing search with aggregations...") - print(f"Query: {json.dumps(test_query, indent=2, ensure_ascii=False)}") - - try: - response = requests.post(f"{API_BASE_URL}/search/", - json=test_query, - headers={'Content-Type': 'application/json'}) - - print(f"Status Code: {response.status_code}") - - if response.ok: - data = response.json() - print(f"Found {data['total']} results in {data['took_ms']}ms") - print(f"Max Score: {data['max_score']}") - - # Print aggregations - if data.get('aggregations'): - print("\nAggregations:") - for agg_name, agg_result in data['aggregations'].items(): - print(f"\n{agg_name}:") - if 'buckets' in agg_result: - for bucket in agg_result['buckets'][:5]: # Show first 5 buckets - print(f" - {bucket['key']}: {bucket['doc_count']}") - - # Print first few results - print(f"\nFirst 3 results:") - for i, hit in enumerate(data['hits'][:3]): - source = hit['_source'] - print(f"\n{i+1}. {source.get('name', 'N/A')}") - print(f" Category: {source.get('categoryName', 'N/A')}") - print(f" Brand: {source.get('brandName', 'N/A')}") - print(f" Price: {source.get('price', 'N/A')}") - print(f" Score: {hit['_score']:.4f}") - else: - print(f"Error: {response.status_code}") - print(f"Response: {response.text}") - - except Exception as e: - print(f"Request failed: {e}") - -def test_search_with_filters(): - """Test search with filters""" - - test_filters = { - "query": "玩具", - "size": 5, - "filters": { - "categoryName_keyword": ["玩具"], - "price_ranges": ["0-50", "50-100"] - } - } - - print("\n\nTesting search with filters...") - print(f"Query: {json.dumps(test_filters, indent=2, ensure_ascii=False)}") - - try: - response = requests.post(f"{API_BASE_URL}/search/", - json=test_filters, - headers={'Content-Type': 'application/json'}) - - print(f"Status Code: {response.status_code}") - - if response.ok: - data = response.json() - print(f"Found {data['total']} results in {data['took_ms']}ms") - - print(f"\nFirst 3 results:") - for i, hit in enumerate(data['hits'][:3]): - source = hit['_source'] - print(f"\n{i+1}. {source.get('name', 'N/A')}") - print(f" Category: {source.get('categoryName', 'N/A')}") - print(f" Brand: {source.get('brandName', 'N/A')}") - print(f" Price: {source.get('price', 'N/A')}") - print(f" Score: {hit['_score']:.4f}") - else: - print(f"Error: {response.status_code}") - print(f"Response: {response.text}") - - except Exception as e: - print(f"Request failed: {e}") - -def test_search_with_sorting(): - """Test search with sorting""" - - test_sort = { - "query": "玩具", - "size": 5, - "sort_by": "price", - "sort_order": "asc" - } - - print("\n\nTesting search with sorting (price ascending)...") - print(f"Query: {json.dumps(test_sort, indent=2, ensure_ascii=False)}") - - try: - response = requests.post(f"{API_BASE_URL}/search/", - json=test_sort, - headers={'Content-Type': 'application/json'}) - - print(f"Status Code: {response.status_code}") - - if response.ok: - data = response.json() - print(f"Found {data['total']} results in {data['took_ms']}ms") - - print(f"\nFirst 3 results (sorted by price):") - for i, hit in enumerate(data['hits'][:3]): - source = hit['_source'] - print(f"\n{i+1}. {source.get('name', 'N/A')}") - print(f" Price: {source.get('price', 'N/A')}") - print(f" Score: {hit['_score']:.4f}") - else: - print(f"Error: {response.status_code}") - print(f"Response: {response.text}") - - except Exception as e: - print(f"Request failed: {e}") - -if __name__ == "__main__": - test_search_with_aggregations() - test_search_with_filters() - test_search_with_sorting() \ No newline at end of file diff --git a/test_aggregation_functionality.py b/test_aggregation_functionality.py deleted file mode 100644 index 5adb5fb..0000000 --- a/test_aggregation_functionality.py +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test script to verify aggregation functionality without external dependencies. -""" - -import sys -import os -import inspect - -# Add the project root to the Python path -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -def test_es_query_builder_aggregations(): - """Test the ES query builder aggregation methods.""" - print("Testing ES Query Builder Aggregation Methods...") - - # Import the query builder - try: - from search.es_query_builder import ESQueryBuilder - print("✓ ESQueryBuilder imported successfully") - except ImportError as e: - print(f"✗ Failed to import ESQueryBuilder: {e}") - return False - - # Create a query builder instance - builder = ESQueryBuilder( - index_name="test_index", - match_fields=["name", "description"] - ) - - # Test basic aggregation - es_query = {"query": {"match_all": {}}} - - # Test add_dynamic_aggregations - aggregations = { - "category_name": { - "type": "terms", - "field": "categoryName_keyword", - "size": 10 - }, - "price_ranges": { - "type": "range", - "field": "price", - "ranges": [ - {"key": "0-50", "to": 50}, - {"key": "50-100", "from": 50, "to": 100} - ] - } - } - - result_query = builder.add_dynamic_aggregations(es_query, aggregations) - - if "aggs" in result_query: - print("✓ Aggregations added to query") - - # Check category aggregation - if "category_name" in result_query["aggs"]: - category_agg = result_query["aggs"]["category_name"] - if "terms" in category_agg and category_agg["terms"]["field"] == "categoryName_keyword": - print("✓ Category aggregation correctly configured") - else: - print("✗ Category aggregation incorrectly configured") - return False - - # Check price range aggregation - if "price_ranges" in result_query["aggs"]: - price_agg = result_query["aggs"]["price_ranges"] - if "range" in price_agg and price_agg["range"]["field"] == "price": - print("✓ Price range aggregation correctly configured") - else: - print("✗ Price range aggregation incorrectly configured") - return False - else: - print("✗ No aggregations added to query") - return False - - # Test sorting - result_query_asc = builder.add_sorting({}, "price_asc") - if "sort" in result_query_asc: - print("✓ Price ascending sort added") - else: - print("✗ Price ascending sort not added") - return False - - result_query_desc = builder.add_sorting({}, "price_desc") - if "sort" in result_query_desc: - print("✓ Price descending sort added") - else: - print("✗ Price descending sort not added") - return False - - result_query_time = builder.add_sorting({}, "time_desc") - if "sort" in result_query_time: - print("✓ Time descending sort added") - else: - print("✗ Time descending sort not added") - return False - - return True - - -def test_searcher_integration(): - """Test searcher integration with new parameters.""" - print("\nTesting Searcher Integration...") - - try: - from search.searcher import Searcher - print("✓ Searcher imported successfully") - except ImportError as e: - print(f"✗ Failed to import Searcher: {e}") - return False - - # We can't easily test the full searcher without ES, but we can check the method signature - search_method = getattr(Searcher, 'search', None) - - if search_method: - sig = inspect.signature(search_method) - params = list(sig.parameters.keys()) - - expected_params = ['query', 'size', 'from_', 'filters', 'min_score', 'aggregations', 'sort_by', 'context'] - for param in expected_params: - if param in params: - print(f"✓ Parameter '{param}' found in search method") - else: - print(f"✗ Parameter '{param}' missing from search method") - return False - else: - print("✗ Search method not found in Searcher class") - return False - - return True - - -def test_api_route_integration(): - """Test API route integration.""" - print("\nTesting API Route Integration...") - - try: - from api.routes.search import router - print("✓ Search router imported successfully") - except ImportError as e: - print(f"✗ Failed to import search router: {e}") - return False - - # Check if the route exists - routes = [route.path for route in router.routes] - if "/" in routes: - print("✓ Main search route found") - else: - print("✗ Main search route not found") - return False - - return True - - -def test_configuration(): - """Test configuration parsing.""" - print("\nTesting Configuration...") - - try: - from config import CustomerConfig - print("✓ CustomerConfig imported successfully") - except ImportError as e: - print(f"✗ Failed to import CustomerConfig: {e}") - return False - - # Try to load the customer1 config - try: - config = CustomerConfig.load_from_file("config/schema/customer1_config.yaml") - print("✓ Customer1 configuration loaded successfully") - - # Check if price field is in the configuration - field_names = [field.name for field in config.fields] - if "price" in field_names: - print("✓ Price field found in configuration") - else: - print("✗ Price field not found in configuration") - return False - - # Check keyword fields for aggregations - if "categoryName_keyword" in field_names: - print("✓ Category keyword field found") - else: - print("✗ Category keyword field not found") - return False - - if "brandName_keyword" in field_names: - print("✓ Brand keyword field found") - else: - print("✗ Brand keyword field not found") - return False - - except Exception as e: - print(f"✗ Failed to load configuration: {e}") - return False - - return True - - -def main(): - """Run all tests.""" - print("=== Search Engine Aggregation Functionality Tests ===\n") - - tests = [ - test_es_query_builder_aggregations, - test_searcher_integration, - test_api_route_integration, - test_configuration - ] - - passed = 0 - total = len(tests) - - for test in tests: - try: - if test(): - passed += 1 - print(f"✓ {test.__name__} PASSED") - else: - print(f"✗ {test.__name__} FAILED") - except Exception as e: - print(f"✗ {test.__name__} ERROR: {e}") - - print(f"\n=== Test Results: {passed}/{total} tests passed ===") - - if passed == total: - print("🎉 All tests passed! Aggregation functionality is ready.") - return True - else: - print("❌ Some tests failed. Please check the implementation.") - return False - - -if __name__ == "__main__": - success = main() - sys.exit(0 if success else 1) \ No newline at end of file diff --git a/test_complete_search.py b/test_complete_search.py deleted file mode 100644 index ded765f..0000000 --- a/test_complete_search.py +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env python3 -""" -Complete test script simulating frontend search interaction -""" - -import requests -import json - -API_BASE_URL = 'http://120.76.41.98:6002' - -def test_complete_search_workflow(): - """Test complete search workflow similar to frontend""" - - print("=" * 60) - print("完整搜索流程测试") - print("=" * 60) - - # Step 1: Initial search with aggregations - print("\n1️⃣ 初始搜索(带聚合功能)") - print("-" * 30) - - search_request = { - "query": "芭比娃娃", - "size": 10, - "aggregations": { - "category_stats": { - "terms": { - "field": "categoryName_keyword", - "size": 10 - } - }, - "brand_stats": { - "terms": { - "field": "brandName_keyword", - "size": 10 - } - }, - "price_ranges": { - "range": { - "field": "price", - "ranges": [ - {"key": "0-50", "to": 50}, - {"key": "50-100", "from": 50, "to": 100}, - {"key": "100-200", "from": 100, "to": 200}, - {"key": "200+", "from": 200} - ] - } - } - } - } - - try: - response = requests.post(f"{API_BASE_URL}/search/", json=search_request) - - if response.ok: - data = response.json() - print(f"✅ 找到 {data['total']} 个结果,耗时 {data['took_ms']}ms") - - # Show aggregations results - if data.get('aggregations'): - print("\n📊 聚合结果:") - - # Category aggregations - if 'category_stats' in data['aggregations']: - print(" 🏷️ 分类统计:") - for bucket in data['aggregations']['category_stats']['buckets'][:3]: - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") - - # Brand aggregations - if 'brand_stats' in data['aggregations']: - print(" 🏢 品牌统计:") - for bucket in data['aggregations']['brand_stats']['buckets'][:3]: - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") - - # Price ranges - if 'price_ranges' in data['aggregations']: - print(" 💰 价格分布:") - for bucket in data['aggregations']['price_ranges']['buckets']: - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") - - # Show sample results - print(f"\n🔍 前3个搜索结果:") - for i, hit in enumerate(data['hits'][:3]): - source = hit['_source'] - price = source.get('price', 'N/A') - category = source.get('categoryName', 'N/A') - brand = source.get('brandName', 'N/A') - print(f" {i+1}. {source.get('name', 'N/A')}") - print(f" 💰 价格: {price}") - print(f" 📁 分类: {category}") - print(f" 🏷️ 品牌: {brand}") - print(f" ⭐ 评分: {hit['_score']:.3f}") - print() - - else: - print(f"❌ 搜索失败: {response.status_code}") - print(f"错误信息: {response.text}") - - except Exception as e: - print(f"❌ 请求异常: {e}") - - # Step 2: Search with filters - print("\n2️⃣ 带过滤条件的搜索") - print("-" * 30) - - filtered_search = { - "query": "芭比娃娃", - "size": 5, - "filters": { - "brandName_keyword": ["美泰"], - "price_ranges": ["50-100", "100-200"] - } - } - - try: - response = requests.post(f"{API_BASE_URL}/search/", json=filtered_search) - - if response.ok: - data = response.json() - print(f"✅ 过滤后找到 {data['total']} 个结果,耗时 {data['took_ms']}ms") - print(" 🎯 过滤条件: 品牌=美泰, 价格=¥50-200") - - print(f"\n💫 前3个过滤结果:") - for i, hit in enumerate(data['hits'][:3]): - source = hit['_source'] - price = source.get('price', 'N/A') - category = source.get('categoryName', 'N/A') - brand = source.get('brandName', 'N/A') - print(f" {i+1}. {source.get('name', 'N/A')}") - print(f" 💰 ¥{price} | 📁 {category} | 🏷️ {brand}") - print(f" ⭐ 评分: {hit['_score']:.3f}") - - else: - print(f"❌ 过滤搜索失败: {response.status_code}") - - except Exception as e: - print(f"❌ 请求异常: {e}") - - # Step 3: Search with sorting - print("\n3️⃣ 排序搜索") - print("-" * 30) - - # Test price ascending - price_asc_search = { - "query": "芭比娃娃", - "size": 3, - "sort_by": "price", - "sort_order": "asc" - } - - try: - response = requests.post(f"{API_BASE_URL}/search/", json=price_asc_search) - - if response.ok: - data = response.json() - print(f"✅ 价格升序排序,找到 {data['total']} 个结果") - print(" 📈 排序方式: 价格从低到高") - - print(f"\n💵 价格排序结果:") - for i, hit in enumerate(data['hits']): - source = hit['_source'] - price = source.get('price', 'N/A') - name = source.get('name', 'N/A') - print(f" {i+1}. ¥{price} - {name}") - - else: - print(f"❌ 排序搜索失败: {response.status_code}") - - except Exception as e: - print(f"❌ 请求异常: {e}") - - # Step 4: Test time sorting - print("\n4️⃣ 时间排序测试") - print("-" * 30) - - time_sort_search = { - "query": "芭比娃娃", - "size": 3, - "sort_by": "create_time", - "sort_order": "desc" - } - - try: - response = requests.post(f"{API_BASE_URL}/search/", json=time_sort_search) - - if response.ok: - data = response.json() - print(f"✅ 时间降序排序,找到 {data['total']} 个结果") - print(" 📅 排序方式: 上架时间从新到旧") - - print(f"\n🕐 时间排序结果:") - for i, hit in enumerate(data['hits']): - source = hit['_source'] - create_time = source.get('create_time', 'N/A') - name = source.get('name', 'N/A') - print(f" {i+1}. {create_time} - {name}") - - else: - print(f"❌ 时间排序失败: {response.status_code}") - - except Exception as e: - print(f"❌ 请求异常: {e}") - - print("\n" + "=" * 60) - print("🎉 搜索功能测试完成!") - print("✨ 前端访问地址: http://localhost:8080") - print("🔧 后端API地址: http://120.76.41.98:6002") - print("=" * 60) - -if __name__ == "__main__": - test_complete_search_workflow() \ No newline at end of file diff --git a/test_minimal_sort.py b/test_minimal_sort.py deleted file mode 100644 index 946058c..0000000 --- a/test_minimal_sort.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -""" -Minimal test to isolate sort issue -""" - -import requests -import json - -def test_minimal_sort(): - """Test minimal sort case""" - - base_url = "http://120.76.41.98:6002" - - # Test 1: No sort parameters - print("Test 1: No sort parameters") - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1}) - print(f"Status: {response.status_code}") - print(f"Response: {response.text[:200]}...") - - # Test 2: Empty sort_by - print("\nTest 2: Empty sort_by") - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": ""}) - print(f"Status: {response.status_code}") - print(f"Response: {response.text[:200]}...") - - # Test 3: sort_by only (no sort_order) - print("\nTest 3: sort_by only") - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": "create_time"}) - print(f"Status: {response.status_code}") - print(f"Response: {response.text[:200]}...") - - # Test 4: sort_order only (no sort_by) - print("\nTest 4: sort_order only") - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_order": "desc"}) - print(f"Status: {response.status_code}") - print(f"Response: {response.text[:200]}...") - - # Test 5: Both parameters with None values - print("\nTest 5: Both parameters with null values") - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": None, "sort_order": None}) - print(f"Status: {response.status_code}") - print(f"Response: {response.text[:200]}...") - -if __name__ == "__main__": - test_minimal_sort() \ No newline at end of file diff --git a/支持多语言查询.md b/支持多语言查询.md index fe93dea..a40d477 100644 --- a/支持多语言查询.md +++ b/支持多语言查询.md @@ -18,6 +18,7 @@ index 397a9f7..3e728c9 100644 暂时具体实现为 bm25()+0.2*text_embedding_relevence(也就是knn检索表达式的打分) +bm25() 包括多语言的打分:内部需要通过配置翻译为多种语言(配置几种目标语言 默认中文、英文,并且设置对应的检索域),然后分别到对应的字段搜索,中文字段到配置的中文title搜索,英文到对应的英文title搜索。 +```text bm25打分(base_query): "multi_match": { "query": search_query, @@ -186,11 +187,5 @@ index a7088ec..0a798ed 100644 def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: """ Get single document by ID. - - - - - - - +``` -- libgit2 0.21.2