Commit bb52dba655cf6c3d9b2e9b8e331b987fea4e7dc3
1 parent
a77693fe
API接口设计优化:
Showing
12 changed files
with
5081 additions
and
1020 deletions
Show diff stats
.cursor/plans/api-interface-analysis-42918612.plan.0.md
0 → 100644
| @@ -0,0 +1,1222 @@ | @@ -0,0 +1,1222 @@ | ||
| 1 | +<!-- 42918612-89ae-45a6-9ccb-e2b0df0a7aec 4ab292c1-f296-4621-89e3-872de57cac11 --> | ||
| 2 | +# 搜索引擎 API 接口重构实施计划 | ||
| 3 | + | ||
| 4 | +## 第一部分:现状分析 | ||
| 5 | + | ||
| 6 | +### 1. 当前实现存在的问题 | ||
| 7 | + | ||
| 8 | +#### 问题 1:硬编码的价格范围过滤 | ||
| 9 | + | ||
| 10 | +**位置**:`search/es_query_builder.py` 第 205-233 行 | ||
| 11 | + | ||
| 12 | +**问题描述**: | ||
| 13 | + | ||
| 14 | +```python | ||
| 15 | +if field == 'price_ranges': | ||
| 16 | + # 硬编码特定字符串值 | ||
| 17 | + if price_range == '0-50': | ||
| 18 | + price_ranges.append({"lt": 50}) | ||
| 19 | + elif price_range == '50-100': | ||
| 20 | + price_ranges.append({"gte": 50, "lt": 100}) | ||
| 21 | + # ... | ||
| 22 | +``` | ||
| 23 | + | ||
| 24 | +**影响**: | ||
| 25 | + | ||
| 26 | +- 只支持 `price` 字段,无法扩展到其他数值字段 | ||
| 27 | +- 范围值硬编码,无法根据业务需求调整 | ||
| 28 | +- 不符合 SaaS 系统的通用性要求 | ||
| 29 | + | ||
| 30 | +#### 问题 2:聚合参数直接暴露 ES DSL | ||
| 31 | + | ||
| 32 | +**位置**: | ||
| 33 | + | ||
| 34 | +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` | ||
| 35 | +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` | ||
| 36 | +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL | ||
| 37 | + | ||
| 38 | +**问题描述**: | ||
| 39 | + | ||
| 40 | +前端需要了解 Elasticsearch 的聚合语法: | ||
| 41 | + | ||
| 42 | +```javascript | ||
| 43 | +const aggregations = { | ||
| 44 | + "category_stats": { | ||
| 45 | + "terms": { | ||
| 46 | + "field": "categoryName_keyword", | ||
| 47 | + "size": 15 | ||
| 48 | + } | ||
| 49 | + }, | ||
| 50 | + "price_ranges": { | ||
| 51 | + "range": { | ||
| 52 | + "field": "price", | ||
| 53 | + "ranges": [ | ||
| 54 | + {"key": "0-50", "to": 50}, | ||
| 55 | + // ... | ||
| 56 | + ] | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | +}; | ||
| 60 | +``` | ||
| 61 | + | ||
| 62 | +**影响**: | ||
| 63 | + | ||
| 64 | +- 前端需要了解 ES 语法,增加集成难度 | ||
| 65 | +- 不符合 SaaS 产品易用性原则 | ||
| 66 | +- 难以进行参数验证和文档生成 | ||
| 67 | + | ||
| 68 | +#### 问题 3:分面搜索结果格式不统一 | ||
| 69 | + | ||
| 70 | +**位置**:`frontend/static/js/app.js` 第 208-258 行 | ||
| 71 | + | ||
| 72 | +**问题描述**: | ||
| 73 | + | ||
| 74 | +- 直接返回 ES 原始格式(`buckets` 结构) | ||
| 75 | +- 前端需要知道不同聚合类型的响应结构 | ||
| 76 | +- 没有统一的分面结果模型 | ||
| 77 | + | ||
| 78 | +**影响**: | ||
| 79 | + | ||
| 80 | +- 前端解析逻辑复杂 | ||
| 81 | +- 不同类型的聚合处理方式不一致 | ||
| 82 | +- 难以扩展新的聚合类型 | ||
| 83 | + | ||
| 84 | +#### 问题 4:缺少搜索建议功能 | ||
| 85 | + | ||
| 86 | +**当前状态**:完全没有实现 | ||
| 87 | + | ||
| 88 | +**需求**: | ||
| 89 | + | ||
| 90 | +- 自动补全(Autocomplete) | ||
| 91 | +- 搜索建议(Suggestions) | ||
| 92 | +- 搜索即时反馈(Instant Search) | ||
| 93 | + | ||
| 94 | +### 2. 依赖关系分析 | ||
| 95 | + | ||
| 96 | +**影响范围**: | ||
| 97 | + | ||
| 98 | +1. **后端模型层**:`api/models.py` | ||
| 99 | +2. **查询构建层**:`search/es_query_builder.py` | ||
| 100 | +3. **搜索执行层**:`search/searcher.py` | ||
| 101 | +4. **API 路由层**:`api/routes/search.py` | ||
| 102 | +5. **前端代码**:`frontend/static/js/app.js` | ||
| 103 | +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` | ||
| 104 | + | ||
| 105 | +## 第二部分:优化方案设计 | ||
| 106 | + | ||
| 107 | +### 方案概述 | ||
| 108 | + | ||
| 109 | +采用**结构化过滤参数方案(方案 A 的简化版)**: | ||
| 110 | + | ||
| 111 | +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) | ||
| 112 | +- **不支持单字段多个不连续范围**,简化设计 | ||
| 113 | +- 标准化聚合参数,使用简化的接口 | ||
| 114 | +- 统一分面搜索响应格式 | ||
| 115 | + | ||
| 116 | +### 1. 新的请求模型设计 | ||
| 117 | + | ||
| 118 | +#### 1.1 核心模型定义 | ||
| 119 | + | ||
| 120 | +**文件**:`api/models.py` | ||
| 121 | + | ||
| 122 | +```python | ||
| 123 | +from pydantic import BaseModel, Field, field_validator | ||
| 124 | +from typing import List, Dict, Any, Optional, Union, Literal | ||
| 125 | + | ||
| 126 | + | ||
| 127 | +class RangeFilter(BaseModel): | ||
| 128 | + """数值范围过滤器""" | ||
| 129 | + gte: Optional[float] = Field(None, description="大于等于 (>=)") | ||
| 130 | + gt: Optional[float] = Field(None, description="大于 (>)") | ||
| 131 | + lte: Optional[float] = Field(None, description="小于等于 (<=)") | ||
| 132 | + lt: Optional[float] = Field(None, description="小于 (<)") | ||
| 133 | + | ||
| 134 | + @field_validator('*') | ||
| 135 | + def check_at_least_one(cls, v, info): | ||
| 136 | + """确保至少指定一个边界""" | ||
| 137 | + values = info.data | ||
| 138 | + if not any([values.get('gte'), values.get('gt'), | ||
| 139 | + values.get('lte'), values.get('lt')]): | ||
| 140 | + raise ValueError('至少需要指定一个范围边界') | ||
| 141 | + return v | ||
| 142 | + | ||
| 143 | + class Config: | ||
| 144 | + json_schema_extra = { | ||
| 145 | + "examples": [ | ||
| 146 | + {"gte": 50, "lte": 200}, | ||
| 147 | + {"gt": 100}, | ||
| 148 | + {"lt": 50} | ||
| 149 | + ] | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + | ||
| 153 | +class FacetConfig(BaseModel): | ||
| 154 | + """分面配置(简化版)""" | ||
| 155 | + field: str = Field(..., description="分面字段名") | ||
| 156 | + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") | ||
| 157 | + type: Literal["terms", "range"] = Field("terms", description="分面类型") | ||
| 158 | + ranges: Optional[List[Dict[str, Any]]] = Field( | ||
| 159 | + None, | ||
| 160 | + description="范围分面的范围定义(仅当 type='range' 时需要)" | ||
| 161 | + ) | ||
| 162 | + | ||
| 163 | + class Config: | ||
| 164 | + json_schema_extra = { | ||
| 165 | + "examples": [ | ||
| 166 | + { | ||
| 167 | + "field": "categoryName_keyword", | ||
| 168 | + "size": 15, | ||
| 169 | + "type": "terms" | ||
| 170 | + }, | ||
| 171 | + { | ||
| 172 | + "field": "price", | ||
| 173 | + "size": 4, | ||
| 174 | + "type": "range", | ||
| 175 | + "ranges": [ | ||
| 176 | + {"key": "0-50", "to": 50}, | ||
| 177 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 178 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 179 | + {"key": "200+", "from": 200} | ||
| 180 | + ] | ||
| 181 | + } | ||
| 182 | + ] | ||
| 183 | + } | ||
| 184 | + | ||
| 185 | + | ||
| 186 | +class SearchRequest(BaseModel): | ||
| 187 | + """搜索请求模型(重构版)""" | ||
| 188 | + | ||
| 189 | + # 基础搜索参数 | ||
| 190 | + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") | ||
| 191 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 192 | + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") | ||
| 193 | + | ||
| 194 | + # 过滤器 - 精确匹配和多值匹配 | ||
| 195 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( | ||
| 196 | + None, | ||
| 197 | + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", | ||
| 198 | + json_schema_extra={ | ||
| 199 | + "examples": [ | ||
| 200 | + { | ||
| 201 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 202 | + "brandName_keyword": "乐高", | ||
| 203 | + "in_stock": True | ||
| 204 | + } | ||
| 205 | + ] | ||
| 206 | + } | ||
| 207 | + ) | ||
| 208 | + | ||
| 209 | + # 范围过滤器 - 数值范围 | ||
| 210 | + range_filters: Optional[Dict[str, RangeFilter]] = Field( | ||
| 211 | + None, | ||
| 212 | + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", | ||
| 213 | + json_schema_extra={ | ||
| 214 | + "examples": [ | ||
| 215 | + { | ||
| 216 | + "price": {"gte": 50, "lte": 200}, | ||
| 217 | + "days_since_last_update": {"lte": 30} | ||
| 218 | + } | ||
| 219 | + ] | ||
| 220 | + } | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + # 排序 | ||
| 224 | + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") | ||
| 225 | + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") | ||
| 226 | + | ||
| 227 | + # 分面搜索 - 简化接口 | ||
| 228 | + facets: Optional[List[Union[str, FacetConfig]]] = Field( | ||
| 229 | + None, | ||
| 230 | + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", | ||
| 231 | + json_schema_extra={ | ||
| 232 | + "examples": [ | ||
| 233 | + # 简单模式:只指定字段名,使用默认配置 | ||
| 234 | + ["categoryName_keyword", "brandName_keyword"], | ||
| 235 | + # 高级模式:详细配置 | ||
| 236 | + [ | ||
| 237 | + {"field": "categoryName_keyword", "size": 15}, | ||
| 238 | + { | ||
| 239 | + "field": "price", | ||
| 240 | + "type": "range", | ||
| 241 | + "ranges": [ | ||
| 242 | + {"key": "0-50", "to": 50}, | ||
| 243 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 244 | + ] | ||
| 245 | + } | ||
| 246 | + ] | ||
| 247 | + ] | ||
| 248 | + } | ||
| 249 | + ) | ||
| 250 | + | ||
| 251 | + # 高级选项 | ||
| 252 | + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") | ||
| 253 | + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") | ||
| 254 | + debug: bool = Field(False, description="是否返回调试信息") | ||
| 255 | + | ||
| 256 | + # 个性化参数(预留) | ||
| 257 | + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") | ||
| 258 | + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") | ||
| 259 | + | ||
| 260 | + | ||
| 261 | +class ImageSearchRequest(BaseModel): | ||
| 262 | + """图片搜索请求模型""" | ||
| 263 | + image_url: str = Field(..., description="查询图片的 URL") | ||
| 264 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 265 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None | ||
| 266 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 267 | + | ||
| 268 | + | ||
| 269 | +class SearchSuggestRequest(BaseModel): | ||
| 270 | + """搜索建议请求模型(框架,暂不实现)""" | ||
| 271 | + query: str = Field(..., min_length=1, description="搜索查询字符串") | ||
| 272 | + size: int = Field(5, ge=1, le=20, description="返回建议数量") | ||
| 273 | + types: List[Literal["query", "product", "category", "brand"]] = Field( | ||
| 274 | + ["query"], | ||
| 275 | + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" | ||
| 276 | + ) | ||
| 277 | +``` | ||
| 278 | + | ||
| 279 | +#### 1.2 响应模型定义 | ||
| 280 | + | ||
| 281 | +```python | ||
| 282 | +class FacetValue(BaseModel): | ||
| 283 | + """分面值""" | ||
| 284 | + value: Union[str, int, float] = Field(..., description="分面值") | ||
| 285 | + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") | ||
| 286 | + count: int = Field(..., description="匹配的文档数量") | ||
| 287 | + selected: bool = Field(False, description="是否已选中(当前过滤器中)") | ||
| 288 | + | ||
| 289 | + | ||
| 290 | +class FacetResult(BaseModel): | ||
| 291 | + """分面结果(标准化格式)""" | ||
| 292 | + field: str = Field(..., description="字段名") | ||
| 293 | + label: str = Field(..., description="分面显示名称") | ||
| 294 | + type: Literal["terms", "range"] = Field(..., description="分面类型") | ||
| 295 | + values: List[FacetValue] = Field(..., description="分面值列表") | ||
| 296 | + total_count: Optional[int] = Field(None, description="该字段的总文档数") | ||
| 297 | + | ||
| 298 | + | ||
| 299 | +class SearchResponse(BaseModel): | ||
| 300 | + """搜索响应模型(重构版)""" | ||
| 301 | + | ||
| 302 | + # 核心结果 | ||
| 303 | + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") | ||
| 304 | + total: int = Field(..., description="匹配的总文档数") | ||
| 305 | + max_score: float = Field(..., description="最高相关性分数") | ||
| 306 | + | ||
| 307 | + # 分面搜索结果(标准化格式) | ||
| 308 | + facets: Optional[List[FacetResult]] = Field( | ||
| 309 | + None, | ||
| 310 | + description="分面统计结果(标准化格式)" | ||
| 311 | + ) | ||
| 312 | + | ||
| 313 | + # 查询信息 | ||
| 314 | + query_info: Dict[str, Any] = Field( | ||
| 315 | + default_factory=dict, | ||
| 316 | + description="查询处理信息(原始查询、改写、语言检测、翻译等)" | ||
| 317 | + ) | ||
| 318 | + | ||
| 319 | + # 推荐与建议(预留) | ||
| 320 | + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") | ||
| 321 | + | ||
| 322 | + # 性能指标 | ||
| 323 | + took_ms: int = Field(..., description="搜索总耗时(毫秒)") | ||
| 324 | + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") | ||
| 325 | + | ||
| 326 | + # 调试信息 | ||
| 327 | + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") | ||
| 328 | + | ||
| 329 | + | ||
| 330 | +class SearchSuggestResponse(BaseModel): | ||
| 331 | + """搜索建议响应模型(框架,暂不实现)""" | ||
| 332 | + query: str = Field(..., description="原始查询") | ||
| 333 | + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") | ||
| 334 | + took_ms: int = Field(..., description="耗时(毫秒)") | ||
| 335 | +``` | ||
| 336 | + | ||
| 337 | +### 2. 查询构建器重构 | ||
| 338 | + | ||
| 339 | +#### 2.1 移除硬编码的 price_ranges 逻辑 | ||
| 340 | + | ||
| 341 | +**文件**:`search/es_query_builder.py` | ||
| 342 | + | ||
| 343 | +**需要修改的方法**:`_build_filters(self, filters, range_filters)` | ||
| 344 | + | ||
| 345 | +**改进点**: | ||
| 346 | + | ||
| 347 | +1. 移除 `if field == 'price_ranges'` 的特殊处理 | ||
| 348 | +2. 分离 filters 和 range_filters 的处理逻辑 | ||
| 349 | +3. 添加字段类型验证(利用配置系统) | ||
| 350 | + | ||
| 351 | +**新的实现逻辑**: | ||
| 352 | + | ||
| 353 | +```python | ||
| 354 | +def _build_filters( | ||
| 355 | + self, | ||
| 356 | + filters: Optional[Dict[str, Any]] = None, | ||
| 357 | + range_filters: Optional[Dict[str, Any]] = None | ||
| 358 | +) -> List[Dict[str, Any]]: | ||
| 359 | + """ | ||
| 360 | + 构建过滤子句(重构版)。 | ||
| 361 | + | ||
| 362 | + Args: | ||
| 363 | + filters: 精确匹配过滤器字典 | ||
| 364 | + range_filters: 范围过滤器字典 | ||
| 365 | + | ||
| 366 | + Returns: | ||
| 367 | + ES filter子句列表 | ||
| 368 | + """ | ||
| 369 | + filter_clauses = [] | ||
| 370 | + | ||
| 371 | + # 1. 处理精确匹配过滤 | ||
| 372 | + if filters: | ||
| 373 | + for field, value in filters.items(): | ||
| 374 | + if isinstance(value, list): | ||
| 375 | + # 多值匹配(OR) | ||
| 376 | + filter_clauses.append({ | ||
| 377 | + "terms": {field: value} | ||
| 378 | + }) | ||
| 379 | + else: | ||
| 380 | + # 单值精确匹配 | ||
| 381 | + filter_clauses.append({ | ||
| 382 | + "term": {field: value} | ||
| 383 | + }) | ||
| 384 | + | ||
| 385 | + # 2. 处理范围过滤 | ||
| 386 | + if range_filters: | ||
| 387 | + for field, range_spec in range_filters.items(): | ||
| 388 | + # 验证字段是否为数值类型(可选,基于配置) | ||
| 389 | + # TODO: 添加字段类型验证 | ||
| 390 | + | ||
| 391 | + # 构建范围查询 | ||
| 392 | + range_conditions = {} | ||
| 393 | + if isinstance(range_spec, dict): | ||
| 394 | + for op in ['gte', 'gt', 'lte', 'lt']: | ||
| 395 | + if op in range_spec and range_spec[op] is not None: | ||
| 396 | + range_conditions[op] = range_spec[op] | ||
| 397 | + | ||
| 398 | + if range_conditions: | ||
| 399 | + filter_clauses.append({ | ||
| 400 | + "range": {field: range_conditions} | ||
| 401 | + }) | ||
| 402 | + | ||
| 403 | + return filter_clauses | ||
| 404 | +``` | ||
| 405 | + | ||
| 406 | +#### 2.2 优化聚合参数接口 | ||
| 407 | + | ||
| 408 | +**新增方法**:`build_facets(self, facet_configs)` | ||
| 409 | + | ||
| 410 | +**改进点**: | ||
| 411 | + | ||
| 412 | +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) | ||
| 413 | +2. 重构 `add_aggregations` 为更通用的 `build_facets` | ||
| 414 | +3. 支持简化配置和高级配置两种模式 | ||
| 415 | + | ||
| 416 | +**新的实现逻辑**: | ||
| 417 | + | ||
| 418 | +```python | ||
| 419 | +def build_facets( | ||
| 420 | + self, | ||
| 421 | + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None | ||
| 422 | +) -> Dict[str, Any]: | ||
| 423 | + """ | ||
| 424 | + 构建分面聚合(重构版)。 | ||
| 425 | + | ||
| 426 | + Args: | ||
| 427 | + facet_configs: 分面配置列表。可以是: | ||
| 428 | + - 字符串列表:字段名,使用默认配置 | ||
| 429 | + - 配置对象列表:详细的分面配置 | ||
| 430 | + | ||
| 431 | + Returns: | ||
| 432 | + ES aggregations字典 | ||
| 433 | + """ | ||
| 434 | + if not facet_configs: | ||
| 435 | + return {} | ||
| 436 | + | ||
| 437 | + aggs = {} | ||
| 438 | + | ||
| 439 | + for config in facet_configs: | ||
| 440 | + # 1. 简单模式:只有字段名 | ||
| 441 | + if isinstance(config, str): | ||
| 442 | + field = config | ||
| 443 | + agg_name = f"{field}_facet" | ||
| 444 | + aggs[agg_name] = { | ||
| 445 | + "terms": { | ||
| 446 | + "field": field, | ||
| 447 | + "size": 10, # 默认大小 | ||
| 448 | + "order": {"_count": "desc"} | ||
| 449 | + } | ||
| 450 | + } | ||
| 451 | + | ||
| 452 | + # 2. 高级模式:详细配置对象 | ||
| 453 | + elif isinstance(config, dict): | ||
| 454 | + field = config['field'] | ||
| 455 | + facet_type = config.get('type', 'terms') | ||
| 456 | + size = config.get('size', 10) | ||
| 457 | + agg_name = f"{field}_facet" | ||
| 458 | + | ||
| 459 | + if facet_type == 'terms': | ||
| 460 | + # Terms 聚合(分组统计) | ||
| 461 | + aggs[agg_name] = { | ||
| 462 | + "terms": { | ||
| 463 | + "field": field, | ||
| 464 | + "size": size, | ||
| 465 | + "order": {"_count": "desc"} | ||
| 466 | + } | ||
| 467 | + } | ||
| 468 | + | ||
| 469 | + elif facet_type == 'range': | ||
| 470 | + # Range 聚合(范围统计) | ||
| 471 | + ranges = config.get('ranges', []) | ||
| 472 | + if ranges: | ||
| 473 | + aggs[agg_name] = { | ||
| 474 | + "range": { | ||
| 475 | + "field": field, | ||
| 476 | + "ranges": ranges | ||
| 477 | + } | ||
| 478 | + } | ||
| 479 | + | ||
| 480 | + return aggs | ||
| 481 | +``` | ||
| 482 | + | ||
| 483 | +#### 2.3 更新主查询构建方法 | ||
| 484 | + | ||
| 485 | +**修改方法签名**:`build_query()` | ||
| 486 | + | ||
| 487 | +```python | ||
| 488 | +def build_query( | ||
| 489 | + self, | ||
| 490 | + query_text: str, | ||
| 491 | + query_vector: Optional[np.ndarray] = None, | ||
| 492 | + query_node: Optional[QueryNode] = None, | ||
| 493 | + filters: Optional[Dict[str, Any]] = None, | ||
| 494 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 495 | + size: int = 10, | ||
| 496 | + from_: int = 0, | ||
| 497 | + enable_knn: bool = True, | ||
| 498 | + knn_k: int = 50, | ||
| 499 | + knn_num_candidates: int = 200, | ||
| 500 | + min_score: Optional[float] = None | ||
| 501 | +) -> Dict[str, Any]: | ||
| 502 | + """构建完整的 ES 查询(重构版)""" | ||
| 503 | + # ... 实现 | ||
| 504 | + | ||
| 505 | + # 添加过滤器 | ||
| 506 | + if filters or range_filters: | ||
| 507 | + filter_clauses = self._build_filters(filters, range_filters) | ||
| 508 | + if filter_clauses: | ||
| 509 | + es_query["query"] = { | ||
| 510 | + "bool": { | ||
| 511 | + "must": [query_clause], | ||
| 512 | + "filter": filter_clauses | ||
| 513 | + } | ||
| 514 | + } | ||
| 515 | +``` | ||
| 516 | + | ||
| 517 | +### 3. 搜索执行层重构 | ||
| 518 | + | ||
| 519 | +**文件**:`search/searcher.py` | ||
| 520 | + | ||
| 521 | +**需要修改的方法**:`search()` | ||
| 522 | + | ||
| 523 | +**改进点**: | ||
| 524 | + | ||
| 525 | +1. 更新方法签名,接受 `range_filters` 参数 | ||
| 526 | +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 | ||
| 527 | +3. 标准化分面搜索结果 | ||
| 528 | + | ||
| 529 | +**关键代码片段**: | ||
| 530 | + | ||
| 531 | +```python | ||
| 532 | +def search( | ||
| 533 | + self, | ||
| 534 | + query: str, | ||
| 535 | + size: int = 10, | ||
| 536 | + from_: int = 0, | ||
| 537 | + filters: Optional[Dict[str, Any]] = None, | ||
| 538 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 539 | + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations | ||
| 540 | + min_score: Optional[float] = None, | ||
| 541 | + sort_by: Optional[str] = None, | ||
| 542 | + sort_order: Optional[str] = "desc", | ||
| 543 | + debug: bool = False, | ||
| 544 | + context: Optional[RequestContext] = None | ||
| 545 | +) -> SearchResult: | ||
| 546 | + """执行搜索(重构版)""" | ||
| 547 | + | ||
| 548 | + # ... 查询解析 ... | ||
| 549 | + | ||
| 550 | + # 构建 ES 查询 | ||
| 551 | + es_query = self.query_builder.build_multilang_query( | ||
| 552 | + parsed_query=parsed_query, | ||
| 553 | + query_vector=parsed_query.query_vector, | ||
| 554 | + query_node=query_node, | ||
| 555 | + filters=filters, | ||
| 556 | + range_filters=range_filters, # 新增 | ||
| 557 | + size=size, | ||
| 558 | + from_=from_, | ||
| 559 | + enable_knn=enable_embedding, | ||
| 560 | + min_score=min_score | ||
| 561 | + ) | ||
| 562 | + | ||
| 563 | + # 添加分面聚合 | ||
| 564 | + if facets: | ||
| 565 | + facet_aggs = self.query_builder.build_facets(facets) | ||
| 566 | + if facet_aggs: | ||
| 567 | + if "aggs" not in es_query: | ||
| 568 | + es_query["aggs"] = {} | ||
| 569 | + es_query["aggs"].update(facet_aggs) | ||
| 570 | + | ||
| 571 | + # ... 执行搜索 ... | ||
| 572 | + | ||
| 573 | + # 标准化分面结果 | ||
| 574 | + standardized_facets = self._standardize_facets( | ||
| 575 | + es_response.get('aggregations', {}), | ||
| 576 | + facets, | ||
| 577 | + filters | ||
| 578 | + ) | ||
| 579 | + | ||
| 580 | + return SearchResult( | ||
| 581 | + hits=hits, | ||
| 582 | + total=total_value, | ||
| 583 | + max_score=max_score, | ||
| 584 | + took_ms=int(total_duration), | ||
| 585 | + facets=standardized_facets, # 标准化格式 | ||
| 586 | + query_info=parsed_query.to_dict(), | ||
| 587 | + debug_info=debug_info | ||
| 588 | + ) | ||
| 589 | +``` | ||
| 590 | + | ||
| 591 | +**新增辅助方法**: | ||
| 592 | + | ||
| 593 | +```python | ||
| 594 | +def _standardize_facets( | ||
| 595 | + self, | ||
| 596 | + es_aggregations: Dict[str, Any], | ||
| 597 | + facet_configs: Optional[List[Union[str, Dict]]], | ||
| 598 | + current_filters: Optional[Dict[str, Any]] | ||
| 599 | +) -> Optional[List[Dict[str, Any]]]: | ||
| 600 | + """ | ||
| 601 | + 将 ES 聚合结果转换为标准化的分面格式。 | ||
| 602 | + | ||
| 603 | + Args: | ||
| 604 | + es_aggregations: ES 原始聚合结果 | ||
| 605 | + facet_configs: 分面配置列表 | ||
| 606 | + current_filters: 当前应用的过滤器 | ||
| 607 | + | ||
| 608 | + Returns: | ||
| 609 | + 标准化的分面结果列表 | ||
| 610 | + """ | ||
| 611 | + if not es_aggregations or not facet_configs: | ||
| 612 | + return None | ||
| 613 | + | ||
| 614 | + standardized_facets = [] | ||
| 615 | + | ||
| 616 | + for config in facet_configs: | ||
| 617 | + # 解析配置 | ||
| 618 | + if isinstance(config, str): | ||
| 619 | + field = config | ||
| 620 | + facet_type = "terms" | ||
| 621 | + else: | ||
| 622 | + field = config['field'] | ||
| 623 | + facet_type = config.get('type', 'terms') | ||
| 624 | + | ||
| 625 | + agg_name = f"{field}_facet" | ||
| 626 | + | ||
| 627 | + if agg_name not in es_aggregations: | ||
| 628 | + continue | ||
| 629 | + | ||
| 630 | + agg_result = es_aggregations[agg_name] | ||
| 631 | + | ||
| 632 | + # 构建标准化分面结果 | ||
| 633 | + facet = { | ||
| 634 | + "field": field, | ||
| 635 | + "label": self._get_field_label(field), # 从配置获取 | ||
| 636 | + "type": facet_type, | ||
| 637 | + "values": [] | ||
| 638 | + } | ||
| 639 | + | ||
| 640 | + # 获取当前字段的选中值 | ||
| 641 | + selected_values = set() | ||
| 642 | + if current_filters and field in current_filters: | ||
| 643 | + filter_value = current_filters[field] | ||
| 644 | + if isinstance(filter_value, list): | ||
| 645 | + selected_values = set(filter_value) | ||
| 646 | + else: | ||
| 647 | + selected_values = {filter_value} | ||
| 648 | + | ||
| 649 | + # 转换 buckets | ||
| 650 | + if 'buckets' in agg_result: | ||
| 651 | + for bucket in agg_result['buckets']: | ||
| 652 | + value = bucket.get('key') | ||
| 653 | + count = bucket.get('doc_count', 0) | ||
| 654 | + | ||
| 655 | + facet['values'].append({ | ||
| 656 | + "value": value, | ||
| 657 | + "label": str(value), # 可以从配置映射 | ||
| 658 | + "count": count, | ||
| 659 | + "selected": value in selected_values | ||
| 660 | + }) | ||
| 661 | + | ||
| 662 | + standardized_facets.append(facet) | ||
| 663 | + | ||
| 664 | + return standardized_facets | ||
| 665 | + | ||
| 666 | + | ||
| 667 | +def _get_field_label(self, field: str) -> str: | ||
| 668 | + """获取字段的显示标签""" | ||
| 669 | + # 从配置中获取字段标签 | ||
| 670 | + for field_config in self.config.fields: | ||
| 671 | + if field_config.name == field: | ||
| 672 | + # 假设配置中有 label 字段 | ||
| 673 | + return getattr(field_config, 'label', field) | ||
| 674 | + return field | ||
| 675 | +``` | ||
| 676 | + | ||
| 677 | +### 4. API 路由层更新 | ||
| 678 | + | ||
| 679 | +**文件**:`api/routes/search.py` | ||
| 680 | + | ||
| 681 | +**改进点**: | ||
| 682 | + | ||
| 683 | +1. 接受新的请求模型参数 | ||
| 684 | +2. 添加搜索建议端点(框架) | ||
| 685 | + | ||
| 686 | +**新增端点**: | ||
| 687 | + | ||
| 688 | +```python | ||
| 689 | +@router.get("/suggestions", response_model=SearchSuggestResponse) | ||
| 690 | +async def search_suggestions( | ||
| 691 | + q: str = Query(..., min_length=1, description="搜索查询"), | ||
| 692 | + size: int = Query(5, ge=1, le=20, description="建议数量"), | ||
| 693 | + types: str = Query("query", description="建议类型(逗号分隔)") | ||
| 694 | +): | ||
| 695 | + """ | ||
| 696 | + 获取搜索建议(自动补全)。 | ||
| 697 | + | ||
| 698 | + 功能说明: | ||
| 699 | + - 查询建议(query):基于历史搜索和热门搜索 | ||
| 700 | + - 商品建议(product):匹配的商品 | ||
| 701 | + - 类目建议(category):匹配的类目 | ||
| 702 | + - 品牌建议(brand):匹配的品牌 | ||
| 703 | + | ||
| 704 | + 注意:此功能暂未实现,仅返回框架响应。 | ||
| 705 | + """ | ||
| 706 | + import time | ||
| 707 | + start_time = time.time() | ||
| 708 | + | ||
| 709 | + # TODO: 实现搜索建议逻辑 | ||
| 710 | + # 1. 从搜索历史中获取建议 | ||
| 711 | + # 2. 从商品标题中匹配前缀 | ||
| 712 | + # 3. 从类目、品牌中匹配 | ||
| 713 | + | ||
| 714 | + # 临时返回空结果 | ||
| 715 | + suggestions = [] | ||
| 716 | + | ||
| 717 | + # 示例结构(暂不实现) | ||
| 718 | + # suggestions = [ | ||
| 719 | + # { | ||
| 720 | + # "text": "芭比娃娃", | ||
| 721 | + # "type": "query", | ||
| 722 | + # "highlight": "<em>芭</em>比娃娃", | ||
| 723 | + # "popularity": 850 | ||
| 724 | + # } | ||
| 725 | + # ] | ||
| 726 | + | ||
| 727 | + took_ms = int((time.time() - start_time) * 1000) | ||
| 728 | + | ||
| 729 | + return SearchSuggestResponse( | ||
| 730 | + query=q, | ||
| 731 | + suggestions=suggestions, | ||
| 732 | + took_ms=took_ms | ||
| 733 | + ) | ||
| 734 | + | ||
| 735 | + | ||
| 736 | +@router.get("/instant", response_model=SearchResponse) | ||
| 737 | +async def instant_search( | ||
| 738 | + q: str = Query(..., min_length=2, description="搜索查询"), | ||
| 739 | + size: int = Query(5, ge=1, le=20, description="结果数量") | ||
| 740 | +): | ||
| 741 | + """ | ||
| 742 | + 即时搜索(Instant Search)。 | ||
| 743 | + | ||
| 744 | + 功能说明: | ||
| 745 | + - 边输入边搜索,无需点击搜索按钮 | ||
| 746 | + - 返回简化的搜索结果 | ||
| 747 | + - 性能优化:缓存、限流 | ||
| 748 | + | ||
| 749 | + 注意:此功能暂未实现,调用标准搜索接口。 | ||
| 750 | + """ | ||
| 751 | + # TODO: 优化即时搜索性能 | ||
| 752 | + # 1. 添加防抖/节流 | ||
| 753 | + # 2. 实现结果缓存 | ||
| 754 | + # 3. 简化返回字段 | ||
| 755 | + | ||
| 756 | + # 临时使用标准搜索接口 | ||
| 757 | + from api.app import get_searcher | ||
| 758 | + searcher = get_searcher() | ||
| 759 | + | ||
| 760 | + result = searcher.search( | ||
| 761 | + query=q, | ||
| 762 | + size=size, | ||
| 763 | + from_=0 | ||
| 764 | + ) | ||
| 765 | + | ||
| 766 | + return SearchResponse( | ||
| 767 | + hits=result.hits, | ||
| 768 | + total=result.total, | ||
| 769 | + max_score=result.max_score, | ||
| 770 | + took_ms=result.took_ms, | ||
| 771 | + query_info=result.query_info | ||
| 772 | + ) | ||
| 773 | +``` | ||
| 774 | + | ||
| 775 | +### 5. 前端适配 | ||
| 776 | + | ||
| 777 | +**文件**:`frontend/static/js/app.js` | ||
| 778 | + | ||
| 779 | +**需要修改的地方**: | ||
| 780 | + | ||
| 781 | +1. **聚合参数改用简化配置**(第 57-87 行): | ||
| 782 | +```javascript | ||
| 783 | +// 旧的方式(直接 ES DSL) | ||
| 784 | +const aggregations = { | ||
| 785 | + "category_stats": { | ||
| 786 | + "terms": { | ||
| 787 | + "field": "categoryName_keyword", | ||
| 788 | + "size": 15 | ||
| 789 | + } | ||
| 790 | + } | ||
| 791 | +}; | ||
| 792 | + | ||
| 793 | +// 新的方式(简化配置) | ||
| 794 | +const facets = [ | ||
| 795 | + { | ||
| 796 | + "field": "categoryName_keyword", | ||
| 797 | + "size": 15, | ||
| 798 | + "type": "terms" | ||
| 799 | + }, | ||
| 800 | + { | ||
| 801 | + "field": "brandName_keyword", | ||
| 802 | + "size": 15, | ||
| 803 | + "type": "terms" | ||
| 804 | + }, | ||
| 805 | + { | ||
| 806 | + "field": "price", | ||
| 807 | + "type": "range", | ||
| 808 | + "ranges": [ | ||
| 809 | + {"key": "0-50", "to": 50}, | ||
| 810 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 811 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 812 | + {"key": "200+", "from": 200} | ||
| 813 | + ] | ||
| 814 | + } | ||
| 815 | +]; | ||
| 816 | +``` | ||
| 817 | + | ||
| 818 | +2. **过滤器使用新格式**(第 103 行): | ||
| 819 | +```javascript | ||
| 820 | +// 旧的方式 | ||
| 821 | +filters: { | ||
| 822 | + "price_ranges": ["0-50", "50-100"] // 硬编码 | ||
| 823 | +} | ||
| 824 | + | ||
| 825 | +// 新的方式 | ||
| 826 | +filters: { | ||
| 827 | + "categoryName_keyword": ["玩具"], | ||
| 828 | + "in_stock": true | ||
| 829 | +}, | ||
| 830 | +range_filters: { | ||
| 831 | + "price": {"gte": 50, "lte": 100} | ||
| 832 | +} | ||
| 833 | +``` | ||
| 834 | + | ||
| 835 | +3. **解析标准化的分面结果**(第 208-258 行): | ||
| 836 | +```javascript | ||
| 837 | +// 旧的方式(直接访问 ES 结构) | ||
| 838 | +if (aggregations.category_stats && aggregations.category_stats.buckets) { | ||
| 839 | + aggregations.category_stats.buckets.forEach(bucket => { | ||
| 840 | + // ... | ||
| 841 | + }); | ||
| 842 | +} | ||
| 843 | + | ||
| 844 | +// 新的方式(标准化格式) | ||
| 845 | +if (data.facets) { | ||
| 846 | + data.facets.forEach(facet => { | ||
| 847 | + if (facet.field === 'categoryName_keyword') { | ||
| 848 | + facet.values.forEach(facetValue => { | ||
| 849 | + const value = facetValue.value; | ||
| 850 | + const count = facetValue.count; | ||
| 851 | + const selected = facetValue.selected; | ||
| 852 | + // ... | ||
| 853 | + }); | ||
| 854 | + } | ||
| 855 | + }); | ||
| 856 | +} | ||
| 857 | +``` | ||
| 858 | + | ||
| 859 | + | ||
| 860 | +### 6. 测试代码更新 | ||
| 861 | + | ||
| 862 | +**文件**:`test_aggregation_api.py` | ||
| 863 | + | ||
| 864 | +**需要修改的地方**: | ||
| 865 | + | ||
| 866 | +1. 移除 `price_ranges` 硬编码测试(第 93 行) | ||
| 867 | +2. 使用新的 `range_filters` 格式 | ||
| 868 | +3. 使用新的 `facets` 配置 | ||
| 869 | + | ||
| 870 | +**新的测试代码**: | ||
| 871 | + | ||
| 872 | +```python | ||
| 873 | +def test_search_with_filters(): | ||
| 874 | + """测试新的过滤器格式""" | ||
| 875 | + test_request = { | ||
| 876 | + "query": "玩具", | ||
| 877 | + "size": 5, | ||
| 878 | + "filters": { | ||
| 879 | + "categoryName_keyword": ["玩具"] | ||
| 880 | + }, | ||
| 881 | + "range_filters": { | ||
| 882 | + "price": {"gte": 50, "lte": 100} | ||
| 883 | + } | ||
| 884 | + } | ||
| 885 | + # ... | ||
| 886 | + | ||
| 887 | +def test_search_with_facets(): | ||
| 888 | + """测试新的分面配置""" | ||
| 889 | + test_request = { | ||
| 890 | + "query": "玩具", | ||
| 891 | + "size": 20, | ||
| 892 | + "facets": [ | ||
| 893 | + { | ||
| 894 | + "field": "categoryName_keyword", | ||
| 895 | + "size": 15 | ||
| 896 | + }, | ||
| 897 | + { | ||
| 898 | + "field": "price", | ||
| 899 | + "type": "range", | ||
| 900 | + "ranges": [ | ||
| 901 | + {"key": "0-50", "to": 50}, | ||
| 902 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 903 | + ] | ||
| 904 | + } | ||
| 905 | + ] | ||
| 906 | + } | ||
| 907 | + # ... | ||
| 908 | +``` | ||
| 909 | + | ||
| 910 | +## 第三部分:实施步骤 | ||
| 911 | + | ||
| 912 | +### 阶段 1:后端模型层重构(高优先级) | ||
| 913 | + | ||
| 914 | +**任务清单**: | ||
| 915 | + | ||
| 916 | +- [ ] 更新 `api/models.py` | ||
| 917 | + - [ ] 定义 `RangeFilter` 模型 | ||
| 918 | + - [ ] 定义 `FacetConfig` 模型 | ||
| 919 | + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` | ||
| 920 | + - [ ] 移除 `aggregations` 参数 | ||
| 921 | + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 | ||
| 922 | + - [ ] 更新 `SearchResponse`,使用标准化分面格式 | ||
| 923 | + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) | ||
| 924 | + | ||
| 925 | +**验证方式**: | ||
| 926 | + | ||
| 927 | +- 运行 Pydantic 模型验证 | ||
| 928 | +- 检查 API 文档(`/docs`)是否正确生成 | ||
| 929 | + | ||
| 930 | +### 阶段 2:查询构建器重构(高优先级) | ||
| 931 | + | ||
| 932 | +**任务清单**: | ||
| 933 | + | ||
| 934 | +- [ ] 重构 `search/es_query_builder.py` | ||
| 935 | + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) | ||
| 936 | + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` | ||
| 937 | + - [ ] 移除 `add_dynamic_aggregations` 方法 | ||
| 938 | + - [ ] 重构 `add_aggregations` 为 `build_facets` | ||
| 939 | + - [ ] 更新 `build_query` 方法签名 | ||
| 940 | +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) | ||
| 941 | + | ||
| 942 | +**验证方式**: | ||
| 943 | + | ||
| 944 | +- 编写单元测试验证过滤器构建逻辑 | ||
| 945 | +- 打印生成的 ES DSL,检查正确性 | ||
| 946 | + | ||
| 947 | +### 阶段 3:搜索执行层重构(高优先级) | ||
| 948 | + | ||
| 949 | +**任务清单**: | ||
| 950 | + | ||
| 951 | +- [ ] 更新 `search/searcher.py` | ||
| 952 | + - [ ] 更新 `search()` 方法签名 | ||
| 953 | + - [ ] 使用新的 `build_facets` 方法 | ||
| 954 | + - [ ] 实现 `_standardize_facets()` 辅助方法 | ||
| 955 | + - [ ] 实现 `_get_field_label()` 辅助方法 | ||
| 956 | + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 | ||
| 957 | + | ||
| 958 | +**验证方式**: | ||
| 959 | + | ||
| 960 | +- 编写集成测试 | ||
| 961 | +- 手动测试搜索功能 | ||
| 962 | + | ||
| 963 | +### 阶段 4:API 路由层更新(中优先级) | ||
| 964 | + | ||
| 965 | +**任务清单**: | ||
| 966 | + | ||
| 967 | +- [ ] 更新 `api/routes/search.py` | ||
| 968 | + - [ ] 更新 `/search/` 端点,接受新的请求参数 | ||
| 969 | + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) | ||
| 970 | + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) | ||
| 971 | + - [ ] 添加端点文档和示例 | ||
| 972 | + | ||
| 973 | +**验证方式**: | ||
| 974 | + | ||
| 975 | +- 使用 Swagger UI 测试端点 | ||
| 976 | +- 检查 API 文档完整性 | ||
| 977 | + | ||
| 978 | +### 阶段 5:前端适配(中优先级) | ||
| 979 | + | ||
| 980 | +**任务清单**: | ||
| 981 | + | ||
| 982 | +- [ ] 更新 `frontend/static/js/app.js` | ||
| 983 | + - [ ] 修改聚合参数为 `facets` 简化配置 | ||
| 984 | + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` | ||
| 985 | + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 | ||
| 986 | + - [ ] 添加范围过滤器 UI(如价格滑块) | ||
| 987 | + - [ ] 移除硬编码的 `price_ranges` | ||
| 988 | + | ||
| 989 | +**验证方式**: | ||
| 990 | + | ||
| 991 | +- 浏览器测试前端功能 | ||
| 992 | +- 检查网络请求和响应格式 | ||
| 993 | + | ||
| 994 | +### 阶段 6:测试代码更新(低优先级) | ||
| 995 | + | ||
| 996 | +**任务清单**: | ||
| 997 | + | ||
| 998 | +- [ ] 更新 `test_aggregation_api.py` | ||
| 999 | + - [ ] 移除 `price_ranges` 测试 | ||
| 1000 | + - [ ] 添加 `range_filters` 测试 | ||
| 1001 | + - [ ] 添加新的 `facets` 测试 | ||
| 1002 | +- [ ] 更新 `test_complete_search.py` | ||
| 1003 | +- [ ] 更新 `tests/integration/test_aggregation_api.py` | ||
| 1004 | +- [ ] 更新 `tests/unit/test_searcher.py` | ||
| 1005 | + | ||
| 1006 | +**验证方式**: | ||
| 1007 | + | ||
| 1008 | +- 运行所有测试,确保通过 | ||
| 1009 | +- 检查测试覆盖率 | ||
| 1010 | + | ||
| 1011 | +### 阶段 7:文档更新(低优先级) | ||
| 1012 | + | ||
| 1013 | +**任务清单**: | ||
| 1014 | + | ||
| 1015 | +- [ ] 撰写完整的 API 接口文档 | ||
| 1016 | +- [ ] 更新 `README.md` | ||
| 1017 | +- [ ] 更新 `USER_GUIDE.md` | ||
| 1018 | +- [ ] 添加接口使用示例 | ||
| 1019 | +- [ ] 添加迁移指南(旧接口 → 新接口) | ||
| 1020 | + | ||
| 1021 | +## 第四部分:API 使用示例 | ||
| 1022 | + | ||
| 1023 | +### 示例 1:简单搜索 | ||
| 1024 | + | ||
| 1025 | +```bash | ||
| 1026 | +POST /search/ | ||
| 1027 | +{ | ||
| 1028 | + "query": "芭比娃娃", | ||
| 1029 | + "size": 20 | ||
| 1030 | +} | ||
| 1031 | +``` | ||
| 1032 | + | ||
| 1033 | +### 示例 2:带过滤器的搜索 | ||
| 1034 | + | ||
| 1035 | +```bash | ||
| 1036 | +POST /search/ | ||
| 1037 | +{ | ||
| 1038 | + "query": "玩具", | ||
| 1039 | + "size": 20, | ||
| 1040 | + "filters": { | ||
| 1041 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 1042 | + "in_stock": true | ||
| 1043 | + }, | ||
| 1044 | + "range_filters": { | ||
| 1045 | + "price": {"gte": 50, "lte": 200} | ||
| 1046 | + } | ||
| 1047 | +} | ||
| 1048 | +``` | ||
| 1049 | + | ||
| 1050 | +### 示例 3:带分面搜索的请求 | ||
| 1051 | + | ||
| 1052 | +```bash | ||
| 1053 | +POST /search/ | ||
| 1054 | +{ | ||
| 1055 | + "query": "玩具", | ||
| 1056 | + "size": 20, | ||
| 1057 | + "facets": [ | ||
| 1058 | + { | ||
| 1059 | + "field": "categoryName_keyword", | ||
| 1060 | + "size": 15 | ||
| 1061 | + }, | ||
| 1062 | + { | ||
| 1063 | + "field": "brandName_keyword", | ||
| 1064 | + "size": 15 | ||
| 1065 | + }, | ||
| 1066 | + { | ||
| 1067 | + "field": "price", | ||
| 1068 | + "type": "range", | ||
| 1069 | + "ranges": [ | ||
| 1070 | + {"key": "0-50", "to": 50}, | ||
| 1071 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 1072 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 1073 | + {"key": "200+", "from": 200} | ||
| 1074 | + ] | ||
| 1075 | + } | ||
| 1076 | + ] | ||
| 1077 | +} | ||
| 1078 | +``` | ||
| 1079 | + | ||
| 1080 | +**响应示例**(标准化分面格式): | ||
| 1081 | + | ||
| 1082 | +```json | ||
| 1083 | +{ | ||
| 1084 | + "hits": [...], | ||
| 1085 | + "total": 118, | ||
| 1086 | + "max_score": 8.5, | ||
| 1087 | + "took_ms": 45, | ||
| 1088 | + "facets": [ | ||
| 1089 | + { | ||
| 1090 | + "field": "categoryName_keyword", | ||
| 1091 | + "label": "商品类目", | ||
| 1092 | + "type": "terms", | ||
| 1093 | + "values": [ | ||
| 1094 | + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, | ||
| 1095 | + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} | ||
| 1096 | + ] | ||
| 1097 | + }, | ||
| 1098 | + { | ||
| 1099 | + "field": "price", | ||
| 1100 | + "label": "价格区间", | ||
| 1101 | + "type": "range", | ||
| 1102 | + "values": [ | ||
| 1103 | + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, | ||
| 1104 | + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, | ||
| 1105 | + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, | ||
| 1106 | + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} | ||
| 1107 | + ] | ||
| 1108 | + } | ||
| 1109 | + ] | ||
| 1110 | +} | ||
| 1111 | +``` | ||
| 1112 | + | ||
| 1113 | +### 示例 4:搜索建议(框架) | ||
| 1114 | + | ||
| 1115 | +```bash | ||
| 1116 | +GET /search/suggestions?q=芭&size=5 | ||
| 1117 | + | ||
| 1118 | +{ | ||
| 1119 | + "query": "芭", | ||
| 1120 | + "suggestions": [ | ||
| 1121 | + { | ||
| 1122 | + "text": "芭比娃娃", | ||
| 1123 | + "type": "query", | ||
| 1124 | + "highlight": "<em>芭</em>比娃娃", | ||
| 1125 | + "popularity": 850 | ||
| 1126 | + }, | ||
| 1127 | + { | ||
| 1128 | + "text": "芭比娃娃屋", | ||
| 1129 | + "type": "query", | ||
| 1130 | + "highlight": "<em>芭</em>比娃娃屋", | ||
| 1131 | + "popularity": 320 | ||
| 1132 | + } | ||
| 1133 | + ], | ||
| 1134 | + "took_ms": 5 | ||
| 1135 | +} | ||
| 1136 | +``` | ||
| 1137 | + | ||
| 1138 | +## 第五部分:向后兼容性 | ||
| 1139 | + | ||
| 1140 | +### 兼容策略 | ||
| 1141 | + | ||
| 1142 | +为保持向后兼容,在过渡期(1-2 个版本)内: | ||
| 1143 | + | ||
| 1144 | +1. **同时支持旧参数和新参数**: | ||
| 1145 | +```python | ||
| 1146 | +class SearchRequest(BaseModel): | ||
| 1147 | + # 新参数 | ||
| 1148 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 1149 | + facets: Optional[List[Union[str, FacetConfig]]] = None | ||
| 1150 | + | ||
| 1151 | + # 旧参数(标记为废弃) | ||
| 1152 | + aggregations: Optional[Dict[str, Any]] = Field( | ||
| 1153 | + None, | ||
| 1154 | + deprecated=True, | ||
| 1155 | + description="已废弃。请使用 'facets' 参数" | ||
| 1156 | + ) | ||
| 1157 | +``` | ||
| 1158 | + | ||
| 1159 | +2. **在后端自动转换旧格式**: | ||
| 1160 | +```python | ||
| 1161 | +# 在 searcher.py 中 | ||
| 1162 | +if request.aggregations and not request.facets: | ||
| 1163 | + # 将旧的 aggregations 转换为新的 facets | ||
| 1164 | + request.facets = self._convert_legacy_aggregations(request.aggregations) | ||
| 1165 | +``` | ||
| 1166 | + | ||
| 1167 | +3. **在响应中提供迁移提示**: | ||
| 1168 | +```python | ||
| 1169 | +if request.aggregations: | ||
| 1170 | + warnings.append({ | ||
| 1171 | + "type": "deprecation", | ||
| 1172 | + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", | ||
| 1173 | + "migration_guide": "https://docs.example.com/migration" | ||
| 1174 | + }) | ||
| 1175 | +``` | ||
| 1176 | + | ||
| 1177 | + | ||
| 1178 | +### 迁移时间线 | ||
| 1179 | + | ||
| 1180 | +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 | ||
| 1181 | +- **v3.1**(1 个月后):移除旧接口的自动转换 | ||
| 1182 | +- **v4.0**(3 个月后):完全移除旧接口 | ||
| 1183 | + | ||
| 1184 | +## 第六部分:风险评估与缓解 | ||
| 1185 | + | ||
| 1186 | +### 风险点 | ||
| 1187 | + | ||
| 1188 | +1. **破坏性变更风险**: | ||
| 1189 | + | ||
| 1190 | + - 风险:现有客户代码可能依赖旧接口 | ||
| 1191 | + - 缓解:提供向后兼容层,发布详细迁移指南 | ||
| 1192 | + | ||
| 1193 | +2. **性能影响风险**: | ||
| 1194 | + | ||
| 1195 | + - 风险:新的标准化处理可能增加延迟 | ||
| 1196 | + - 缓解:添加性能测试,优化关键路径 | ||
| 1197 | + | ||
| 1198 | +3. **测试覆盖不足风险**: | ||
| 1199 | + | ||
| 1200 | + - 风险:重构可能引入新 bug | ||
| 1201 | + - 缓解:全面的单元测试和集成测试 | ||
| 1202 | + | ||
| 1203 | +### 验收标准 | ||
| 1204 | + | ||
| 1205 | +- [ ] 所有单元测试通过 | ||
| 1206 | +- [ ] 所有集成测试通过 | ||
| 1207 | +- [ ] API 文档完整且准确 | ||
| 1208 | +- [ ] 性能无明显下降(< 10% 延迟增加) | ||
| 1209 | +- [ ] 前端功能正常工作 | ||
| 1210 | +- [ ] 提供完整的迁移指南 | ||
| 1211 | + | ||
| 1212 | +## 总结 | ||
| 1213 | + | ||
| 1214 | +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: | ||
| 1215 | + | ||
| 1216 | +1. ✅ 移除硬编码的 price_ranges 逻辑 | ||
| 1217 | +2. ✅ 实现结构化的过滤参数(filters + range_filters) | ||
| 1218 | +3. ✅ 简化聚合参数接口,不暴露 ES DSL | ||
| 1219 | +4. ✅ 标准化分面搜索响应格式 | ||
| 1220 | +5. ✅ 添加搜索建议功能框架(暂不实现) | ||
| 1221 | + | ||
| 1222 | +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 | ||
| 0 | \ No newline at end of file | 1223 | \ No newline at end of file |
.cursor/plans/api-interface-analysis-42918612.plan.1.md
0 → 100644
| @@ -0,0 +1,1222 @@ | @@ -0,0 +1,1222 @@ | ||
| 1 | +<!-- 42918612-89ae-45a6-9ccb-e2b0df0a7aec 16a7805b-d679-466c-8887-3f6a8cc80493 --> | ||
| 2 | +# 搜索引擎 API 接口重构实施计划 | ||
| 3 | + | ||
| 4 | +## 第一部分:现状分析 | ||
| 5 | + | ||
| 6 | +### 1. 当前实现存在的问题 | ||
| 7 | + | ||
| 8 | +#### 问题 1:硬编码的价格范围过滤 | ||
| 9 | + | ||
| 10 | +**位置**:`search/es_query_builder.py` 第 205-233 行 | ||
| 11 | + | ||
| 12 | +**问题描述**: | ||
| 13 | + | ||
| 14 | +```python | ||
| 15 | +if field == 'price_ranges': | ||
| 16 | + # 硬编码特定字符串值 | ||
| 17 | + if price_range == '0-50': | ||
| 18 | + price_ranges.append({"lt": 50}) | ||
| 19 | + elif price_range == '50-100': | ||
| 20 | + price_ranges.append({"gte": 50, "lt": 100}) | ||
| 21 | + # ... | ||
| 22 | +``` | ||
| 23 | + | ||
| 24 | +**影响**: | ||
| 25 | + | ||
| 26 | +- 只支持 `price` 字段,无法扩展到其他数值字段 | ||
| 27 | +- 范围值硬编码,无法根据业务需求调整 | ||
| 28 | +- 不符合 SaaS 系统的通用性要求 | ||
| 29 | + | ||
| 30 | +#### 问题 2:聚合参数直接暴露 ES DSL | ||
| 31 | + | ||
| 32 | +**位置**: | ||
| 33 | + | ||
| 34 | +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` | ||
| 35 | +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` | ||
| 36 | +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL | ||
| 37 | + | ||
| 38 | +**问题描述**: | ||
| 39 | + | ||
| 40 | +前端需要了解 Elasticsearch 的聚合语法: | ||
| 41 | + | ||
| 42 | +```javascript | ||
| 43 | +const aggregations = { | ||
| 44 | + "category_stats": { | ||
| 45 | + "terms": { | ||
| 46 | + "field": "categoryName_keyword", | ||
| 47 | + "size": 15 | ||
| 48 | + } | ||
| 49 | + }, | ||
| 50 | + "price_ranges": { | ||
| 51 | + "range": { | ||
| 52 | + "field": "price", | ||
| 53 | + "ranges": [ | ||
| 54 | + {"key": "0-50", "to": 50}, | ||
| 55 | + // ... | ||
| 56 | + ] | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | +}; | ||
| 60 | +``` | ||
| 61 | + | ||
| 62 | +**影响**: | ||
| 63 | + | ||
| 64 | +- 前端需要了解 ES 语法,增加集成难度 | ||
| 65 | +- 不符合 SaaS 产品易用性原则 | ||
| 66 | +- 难以进行参数验证和文档生成 | ||
| 67 | + | ||
| 68 | +#### 问题 3:分面搜索结果格式不统一 | ||
| 69 | + | ||
| 70 | +**位置**:`frontend/static/js/app.js` 第 208-258 行 | ||
| 71 | + | ||
| 72 | +**问题描述**: | ||
| 73 | + | ||
| 74 | +- 直接返回 ES 原始格式(`buckets` 结构) | ||
| 75 | +- 前端需要知道不同聚合类型的响应结构 | ||
| 76 | +- 没有统一的分面结果模型 | ||
| 77 | + | ||
| 78 | +**影响**: | ||
| 79 | + | ||
| 80 | +- 前端解析逻辑复杂 | ||
| 81 | +- 不同类型的聚合处理方式不一致 | ||
| 82 | +- 难以扩展新的聚合类型 | ||
| 83 | + | ||
| 84 | +#### 问题 4:缺少搜索建议功能 | ||
| 85 | + | ||
| 86 | +**当前状态**:完全没有实现 | ||
| 87 | + | ||
| 88 | +**需求**: | ||
| 89 | + | ||
| 90 | +- 自动补全(Autocomplete) | ||
| 91 | +- 搜索建议(Suggestions) | ||
| 92 | +- 搜索即时反馈(Instant Search) | ||
| 93 | + | ||
| 94 | +### 2. 依赖关系分析 | ||
| 95 | + | ||
| 96 | +**影响范围**: | ||
| 97 | + | ||
| 98 | +1. **后端模型层**:`api/models.py` | ||
| 99 | +2. **查询构建层**:`search/es_query_builder.py` | ||
| 100 | +3. **搜索执行层**:`search/searcher.py` | ||
| 101 | +4. **API 路由层**:`api/routes/search.py` | ||
| 102 | +5. **前端代码**:`frontend/static/js/app.js` | ||
| 103 | +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` | ||
| 104 | + | ||
| 105 | +## 第二部分:优化方案设计 | ||
| 106 | + | ||
| 107 | +### 方案概述 | ||
| 108 | + | ||
| 109 | +采用**结构化过滤参数方案(方案 A 的简化版)**: | ||
| 110 | + | ||
| 111 | +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) | ||
| 112 | +- **不支持单字段多个不连续范围**,简化设计 | ||
| 113 | +- 标准化聚合参数,使用简化的接口 | ||
| 114 | +- 统一分面搜索响应格式 | ||
| 115 | + | ||
| 116 | +### 1. 新的请求模型设计 | ||
| 117 | + | ||
| 118 | +#### 1.1 核心模型定义 | ||
| 119 | + | ||
| 120 | +**文件**:`api/models.py` | ||
| 121 | + | ||
| 122 | +```python | ||
| 123 | +from pydantic import BaseModel, Field, field_validator | ||
| 124 | +from typing import List, Dict, Any, Optional, Union, Literal | ||
| 125 | + | ||
| 126 | + | ||
| 127 | +class RangeFilter(BaseModel): | ||
| 128 | + """数值范围过滤器""" | ||
| 129 | + gte: Optional[float] = Field(None, description="大于等于 (>=)") | ||
| 130 | + gt: Optional[float] = Field(None, description="大于 (>)") | ||
| 131 | + lte: Optional[float] = Field(None, description="小于等于 (<=)") | ||
| 132 | + lt: Optional[float] = Field(None, description="小于 (<)") | ||
| 133 | + | ||
| 134 | + @field_validator('*') | ||
| 135 | + def check_at_least_one(cls, v, info): | ||
| 136 | + """确保至少指定一个边界""" | ||
| 137 | + values = info.data | ||
| 138 | + if not any([values.get('gte'), values.get('gt'), | ||
| 139 | + values.get('lte'), values.get('lt')]): | ||
| 140 | + raise ValueError('至少需要指定一个范围边界') | ||
| 141 | + return v | ||
| 142 | + | ||
| 143 | + class Config: | ||
| 144 | + json_schema_extra = { | ||
| 145 | + "examples": [ | ||
| 146 | + {"gte": 50, "lte": 200}, | ||
| 147 | + {"gt": 100}, | ||
| 148 | + {"lt": 50} | ||
| 149 | + ] | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + | ||
| 153 | +class FacetConfig(BaseModel): | ||
| 154 | + """分面配置(简化版)""" | ||
| 155 | + field: str = Field(..., description="分面字段名") | ||
| 156 | + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") | ||
| 157 | + type: Literal["terms", "range"] = Field("terms", description="分面类型") | ||
| 158 | + ranges: Optional[List[Dict[str, Any]]] = Field( | ||
| 159 | + None, | ||
| 160 | + description="范围分面的范围定义(仅当 type='range' 时需要)" | ||
| 161 | + ) | ||
| 162 | + | ||
| 163 | + class Config: | ||
| 164 | + json_schema_extra = { | ||
| 165 | + "examples": [ | ||
| 166 | + { | ||
| 167 | + "field": "categoryName_keyword", | ||
| 168 | + "size": 15, | ||
| 169 | + "type": "terms" | ||
| 170 | + }, | ||
| 171 | + { | ||
| 172 | + "field": "price", | ||
| 173 | + "size": 4, | ||
| 174 | + "type": "range", | ||
| 175 | + "ranges": [ | ||
| 176 | + {"key": "0-50", "to": 50}, | ||
| 177 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 178 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 179 | + {"key": "200+", "from": 200} | ||
| 180 | + ] | ||
| 181 | + } | ||
| 182 | + ] | ||
| 183 | + } | ||
| 184 | + | ||
| 185 | + | ||
| 186 | +class SearchRequest(BaseModel): | ||
| 187 | + """搜索请求模型(重构版)""" | ||
| 188 | + | ||
| 189 | + # 基础搜索参数 | ||
| 190 | + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") | ||
| 191 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 192 | + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") | ||
| 193 | + | ||
| 194 | + # 过滤器 - 精确匹配和多值匹配 | ||
| 195 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( | ||
| 196 | + None, | ||
| 197 | + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", | ||
| 198 | + json_schema_extra={ | ||
| 199 | + "examples": [ | ||
| 200 | + { | ||
| 201 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 202 | + "brandName_keyword": "乐高", | ||
| 203 | + "in_stock": True | ||
| 204 | + } | ||
| 205 | + ] | ||
| 206 | + } | ||
| 207 | + ) | ||
| 208 | + | ||
| 209 | + # 范围过滤器 - 数值范围 | ||
| 210 | + range_filters: Optional[Dict[str, RangeFilter]] = Field( | ||
| 211 | + None, | ||
| 212 | + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", | ||
| 213 | + json_schema_extra={ | ||
| 214 | + "examples": [ | ||
| 215 | + { | ||
| 216 | + "price": {"gte": 50, "lte": 200}, | ||
| 217 | + "days_since_last_update": {"lte": 30} | ||
| 218 | + } | ||
| 219 | + ] | ||
| 220 | + } | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + # 排序 | ||
| 224 | + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") | ||
| 225 | + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") | ||
| 226 | + | ||
| 227 | + # 分面搜索 - 简化接口 | ||
| 228 | + facets: Optional[List[Union[str, FacetConfig]]] = Field( | ||
| 229 | + None, | ||
| 230 | + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", | ||
| 231 | + json_schema_extra={ | ||
| 232 | + "examples": [ | ||
| 233 | + # 简单模式:只指定字段名,使用默认配置 | ||
| 234 | + ["categoryName_keyword", "brandName_keyword"], | ||
| 235 | + # 高级模式:详细配置 | ||
| 236 | + [ | ||
| 237 | + {"field": "categoryName_keyword", "size": 15}, | ||
| 238 | + { | ||
| 239 | + "field": "price", | ||
| 240 | + "type": "range", | ||
| 241 | + "ranges": [ | ||
| 242 | + {"key": "0-50", "to": 50}, | ||
| 243 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 244 | + ] | ||
| 245 | + } | ||
| 246 | + ] | ||
| 247 | + ] | ||
| 248 | + } | ||
| 249 | + ) | ||
| 250 | + | ||
| 251 | + # 高级选项 | ||
| 252 | + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") | ||
| 253 | + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") | ||
| 254 | + debug: bool = Field(False, description="是否返回调试信息") | ||
| 255 | + | ||
| 256 | + # 个性化参数(预留) | ||
| 257 | + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") | ||
| 258 | + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") | ||
| 259 | + | ||
| 260 | + | ||
| 261 | +class ImageSearchRequest(BaseModel): | ||
| 262 | + """图片搜索请求模型""" | ||
| 263 | + image_url: str = Field(..., description="查询图片的 URL") | ||
| 264 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 265 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None | ||
| 266 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 267 | + | ||
| 268 | + | ||
| 269 | +class SearchSuggestRequest(BaseModel): | ||
| 270 | + """搜索建议请求模型(框架,暂不实现)""" | ||
| 271 | + query: str = Field(..., min_length=1, description="搜索查询字符串") | ||
| 272 | + size: int = Field(5, ge=1, le=20, description="返回建议数量") | ||
| 273 | + types: List[Literal["query", "product", "category", "brand"]] = Field( | ||
| 274 | + ["query"], | ||
| 275 | + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" | ||
| 276 | + ) | ||
| 277 | +``` | ||
| 278 | + | ||
| 279 | +#### 1.2 响应模型定义 | ||
| 280 | + | ||
| 281 | +```python | ||
| 282 | +class FacetValue(BaseModel): | ||
| 283 | + """分面值""" | ||
| 284 | + value: Union[str, int, float] = Field(..., description="分面值") | ||
| 285 | + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") | ||
| 286 | + count: int = Field(..., description="匹配的文档数量") | ||
| 287 | + selected: bool = Field(False, description="是否已选中(当前过滤器中)") | ||
| 288 | + | ||
| 289 | + | ||
| 290 | +class FacetResult(BaseModel): | ||
| 291 | + """分面结果(标准化格式)""" | ||
| 292 | + field: str = Field(..., description="字段名") | ||
| 293 | + label: str = Field(..., description="分面显示名称") | ||
| 294 | + type: Literal["terms", "range"] = Field(..., description="分面类型") | ||
| 295 | + values: List[FacetValue] = Field(..., description="分面值列表") | ||
| 296 | + total_count: Optional[int] = Field(None, description="该字段的总文档数") | ||
| 297 | + | ||
| 298 | + | ||
| 299 | +class SearchResponse(BaseModel): | ||
| 300 | + """搜索响应模型(重构版)""" | ||
| 301 | + | ||
| 302 | + # 核心结果 | ||
| 303 | + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") | ||
| 304 | + total: int = Field(..., description="匹配的总文档数") | ||
| 305 | + max_score: float = Field(..., description="最高相关性分数") | ||
| 306 | + | ||
| 307 | + # 分面搜索结果(标准化格式) | ||
| 308 | + facets: Optional[List[FacetResult]] = Field( | ||
| 309 | + None, | ||
| 310 | + description="分面统计结果(标准化格式)" | ||
| 311 | + ) | ||
| 312 | + | ||
| 313 | + # 查询信息 | ||
| 314 | + query_info: Dict[str, Any] = Field( | ||
| 315 | + default_factory=dict, | ||
| 316 | + description="查询处理信息(原始查询、改写、语言检测、翻译等)" | ||
| 317 | + ) | ||
| 318 | + | ||
| 319 | + # 推荐与建议(预留) | ||
| 320 | + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") | ||
| 321 | + | ||
| 322 | + # 性能指标 | ||
| 323 | + took_ms: int = Field(..., description="搜索总耗时(毫秒)") | ||
| 324 | + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") | ||
| 325 | + | ||
| 326 | + # 调试信息 | ||
| 327 | + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") | ||
| 328 | + | ||
| 329 | + | ||
| 330 | +class SearchSuggestResponse(BaseModel): | ||
| 331 | + """搜索建议响应模型(框架,暂不实现)""" | ||
| 332 | + query: str = Field(..., description="原始查询") | ||
| 333 | + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") | ||
| 334 | + took_ms: int = Field(..., description="耗时(毫秒)") | ||
| 335 | +``` | ||
| 336 | + | ||
| 337 | +### 2. 查询构建器重构 | ||
| 338 | + | ||
| 339 | +#### 2.1 移除硬编码的 price_ranges 逻辑 | ||
| 340 | + | ||
| 341 | +**文件**:`search/es_query_builder.py` | ||
| 342 | + | ||
| 343 | +**需要修改的方法**:`_build_filters(self, filters, range_filters)` | ||
| 344 | + | ||
| 345 | +**改进点**: | ||
| 346 | + | ||
| 347 | +1. 移除 `if field == 'price_ranges'` 的特殊处理 | ||
| 348 | +2. 分离 filters 和 range_filters 的处理逻辑 | ||
| 349 | +3. 添加字段类型验证(利用配置系统) | ||
| 350 | + | ||
| 351 | +**新的实现逻辑**: | ||
| 352 | + | ||
| 353 | +```python | ||
| 354 | +def _build_filters( | ||
| 355 | + self, | ||
| 356 | + filters: Optional[Dict[str, Any]] = None, | ||
| 357 | + range_filters: Optional[Dict[str, Any]] = None | ||
| 358 | +) -> List[Dict[str, Any]]: | ||
| 359 | + """ | ||
| 360 | + 构建过滤子句(重构版)。 | ||
| 361 | + | ||
| 362 | + Args: | ||
| 363 | + filters: 精确匹配过滤器字典 | ||
| 364 | + range_filters: 范围过滤器字典 | ||
| 365 | + | ||
| 366 | + Returns: | ||
| 367 | + ES filter子句列表 | ||
| 368 | + """ | ||
| 369 | + filter_clauses = [] | ||
| 370 | + | ||
| 371 | + # 1. 处理精确匹配过滤 | ||
| 372 | + if filters: | ||
| 373 | + for field, value in filters.items(): | ||
| 374 | + if isinstance(value, list): | ||
| 375 | + # 多值匹配(OR) | ||
| 376 | + filter_clauses.append({ | ||
| 377 | + "terms": {field: value} | ||
| 378 | + }) | ||
| 379 | + else: | ||
| 380 | + # 单值精确匹配 | ||
| 381 | + filter_clauses.append({ | ||
| 382 | + "term": {field: value} | ||
| 383 | + }) | ||
| 384 | + | ||
| 385 | + # 2. 处理范围过滤 | ||
| 386 | + if range_filters: | ||
| 387 | + for field, range_spec in range_filters.items(): | ||
| 388 | + # 验证字段是否为数值类型(可选,基于配置) | ||
| 389 | + # TODO: 添加字段类型验证 | ||
| 390 | + | ||
| 391 | + # 构建范围查询 | ||
| 392 | + range_conditions = {} | ||
| 393 | + if isinstance(range_spec, dict): | ||
| 394 | + for op in ['gte', 'gt', 'lte', 'lt']: | ||
| 395 | + if op in range_spec and range_spec[op] is not None: | ||
| 396 | + range_conditions[op] = range_spec[op] | ||
| 397 | + | ||
| 398 | + if range_conditions: | ||
| 399 | + filter_clauses.append({ | ||
| 400 | + "range": {field: range_conditions} | ||
| 401 | + }) | ||
| 402 | + | ||
| 403 | + return filter_clauses | ||
| 404 | +``` | ||
| 405 | + | ||
| 406 | +#### 2.2 优化聚合参数接口 | ||
| 407 | + | ||
| 408 | +**新增方法**:`build_facets(self, facet_configs)` | ||
| 409 | + | ||
| 410 | +**改进点**: | ||
| 411 | + | ||
| 412 | +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) | ||
| 413 | +2. 重构 `add_aggregations` 为更通用的 `build_facets` | ||
| 414 | +3. 支持简化配置和高级配置两种模式 | ||
| 415 | + | ||
| 416 | +**新的实现逻辑**: | ||
| 417 | + | ||
| 418 | +```python | ||
| 419 | +def build_facets( | ||
| 420 | + self, | ||
| 421 | + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None | ||
| 422 | +) -> Dict[str, Any]: | ||
| 423 | + """ | ||
| 424 | + 构建分面聚合(重构版)。 | ||
| 425 | + | ||
| 426 | + Args: | ||
| 427 | + facet_configs: 分面配置列表。可以是: | ||
| 428 | + - 字符串列表:字段名,使用默认配置 | ||
| 429 | + - 配置对象列表:详细的分面配置 | ||
| 430 | + | ||
| 431 | + Returns: | ||
| 432 | + ES aggregations字典 | ||
| 433 | + """ | ||
| 434 | + if not facet_configs: | ||
| 435 | + return {} | ||
| 436 | + | ||
| 437 | + aggs = {} | ||
| 438 | + | ||
| 439 | + for config in facet_configs: | ||
| 440 | + # 1. 简单模式:只有字段名 | ||
| 441 | + if isinstance(config, str): | ||
| 442 | + field = config | ||
| 443 | + agg_name = f"{field}_facet" | ||
| 444 | + aggs[agg_name] = { | ||
| 445 | + "terms": { | ||
| 446 | + "field": field, | ||
| 447 | + "size": 10, # 默认大小 | ||
| 448 | + "order": {"_count": "desc"} | ||
| 449 | + } | ||
| 450 | + } | ||
| 451 | + | ||
| 452 | + # 2. 高级模式:详细配置对象 | ||
| 453 | + elif isinstance(config, dict): | ||
| 454 | + field = config['field'] | ||
| 455 | + facet_type = config.get('type', 'terms') | ||
| 456 | + size = config.get('size', 10) | ||
| 457 | + agg_name = f"{field}_facet" | ||
| 458 | + | ||
| 459 | + if facet_type == 'terms': | ||
| 460 | + # Terms 聚合(分组统计) | ||
| 461 | + aggs[agg_name] = { | ||
| 462 | + "terms": { | ||
| 463 | + "field": field, | ||
| 464 | + "size": size, | ||
| 465 | + "order": {"_count": "desc"} | ||
| 466 | + } | ||
| 467 | + } | ||
| 468 | + | ||
| 469 | + elif facet_type == 'range': | ||
| 470 | + # Range 聚合(范围统计) | ||
| 471 | + ranges = config.get('ranges', []) | ||
| 472 | + if ranges: | ||
| 473 | + aggs[agg_name] = { | ||
| 474 | + "range": { | ||
| 475 | + "field": field, | ||
| 476 | + "ranges": ranges | ||
| 477 | + } | ||
| 478 | + } | ||
| 479 | + | ||
| 480 | + return aggs | ||
| 481 | +``` | ||
| 482 | + | ||
| 483 | +#### 2.3 更新主查询构建方法 | ||
| 484 | + | ||
| 485 | +**修改方法签名**:`build_query()` | ||
| 486 | + | ||
| 487 | +```python | ||
| 488 | +def build_query( | ||
| 489 | + self, | ||
| 490 | + query_text: str, | ||
| 491 | + query_vector: Optional[np.ndarray] = None, | ||
| 492 | + query_node: Optional[QueryNode] = None, | ||
| 493 | + filters: Optional[Dict[str, Any]] = None, | ||
| 494 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 495 | + size: int = 10, | ||
| 496 | + from_: int = 0, | ||
| 497 | + enable_knn: bool = True, | ||
| 498 | + knn_k: int = 50, | ||
| 499 | + knn_num_candidates: int = 200, | ||
| 500 | + min_score: Optional[float] = None | ||
| 501 | +) -> Dict[str, Any]: | ||
| 502 | + """构建完整的 ES 查询(重构版)""" | ||
| 503 | + # ... 实现 | ||
| 504 | + | ||
| 505 | + # 添加过滤器 | ||
| 506 | + if filters or range_filters: | ||
| 507 | + filter_clauses = self._build_filters(filters, range_filters) | ||
| 508 | + if filter_clauses: | ||
| 509 | + es_query["query"] = { | ||
| 510 | + "bool": { | ||
| 511 | + "must": [query_clause], | ||
| 512 | + "filter": filter_clauses | ||
| 513 | + } | ||
| 514 | + } | ||
| 515 | +``` | ||
| 516 | + | ||
| 517 | +### 3. 搜索执行层重构 | ||
| 518 | + | ||
| 519 | +**文件**:`search/searcher.py` | ||
| 520 | + | ||
| 521 | +**需要修改的方法**:`search()` | ||
| 522 | + | ||
| 523 | +**改进点**: | ||
| 524 | + | ||
| 525 | +1. 更新方法签名,接受 `range_filters` 参数 | ||
| 526 | +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 | ||
| 527 | +3. 标准化分面搜索结果 | ||
| 528 | + | ||
| 529 | +**关键代码片段**: | ||
| 530 | + | ||
| 531 | +```python | ||
| 532 | +def search( | ||
| 533 | + self, | ||
| 534 | + query: str, | ||
| 535 | + size: int = 10, | ||
| 536 | + from_: int = 0, | ||
| 537 | + filters: Optional[Dict[str, Any]] = None, | ||
| 538 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 539 | + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations | ||
| 540 | + min_score: Optional[float] = None, | ||
| 541 | + sort_by: Optional[str] = None, | ||
| 542 | + sort_order: Optional[str] = "desc", | ||
| 543 | + debug: bool = False, | ||
| 544 | + context: Optional[RequestContext] = None | ||
| 545 | +) -> SearchResult: | ||
| 546 | + """执行搜索(重构版)""" | ||
| 547 | + | ||
| 548 | + # ... 查询解析 ... | ||
| 549 | + | ||
| 550 | + # 构建 ES 查询 | ||
| 551 | + es_query = self.query_builder.build_multilang_query( | ||
| 552 | + parsed_query=parsed_query, | ||
| 553 | + query_vector=parsed_query.query_vector, | ||
| 554 | + query_node=query_node, | ||
| 555 | + filters=filters, | ||
| 556 | + range_filters=range_filters, # 新增 | ||
| 557 | + size=size, | ||
| 558 | + from_=from_, | ||
| 559 | + enable_knn=enable_embedding, | ||
| 560 | + min_score=min_score | ||
| 561 | + ) | ||
| 562 | + | ||
| 563 | + # 添加分面聚合 | ||
| 564 | + if facets: | ||
| 565 | + facet_aggs = self.query_builder.build_facets(facets) | ||
| 566 | + if facet_aggs: | ||
| 567 | + if "aggs" not in es_query: | ||
| 568 | + es_query["aggs"] = {} | ||
| 569 | + es_query["aggs"].update(facet_aggs) | ||
| 570 | + | ||
| 571 | + # ... 执行搜索 ... | ||
| 572 | + | ||
| 573 | + # 标准化分面结果 | ||
| 574 | + standardized_facets = self._standardize_facets( | ||
| 575 | + es_response.get('aggregations', {}), | ||
| 576 | + facets, | ||
| 577 | + filters | ||
| 578 | + ) | ||
| 579 | + | ||
| 580 | + return SearchResult( | ||
| 581 | + hits=hits, | ||
| 582 | + total=total_value, | ||
| 583 | + max_score=max_score, | ||
| 584 | + took_ms=int(total_duration), | ||
| 585 | + facets=standardized_facets, # 标准化格式 | ||
| 586 | + query_info=parsed_query.to_dict(), | ||
| 587 | + debug_info=debug_info | ||
| 588 | + ) | ||
| 589 | +``` | ||
| 590 | + | ||
| 591 | +**新增辅助方法**: | ||
| 592 | + | ||
| 593 | +```python | ||
| 594 | +def _standardize_facets( | ||
| 595 | + self, | ||
| 596 | + es_aggregations: Dict[str, Any], | ||
| 597 | + facet_configs: Optional[List[Union[str, Dict]]], | ||
| 598 | + current_filters: Optional[Dict[str, Any]] | ||
| 599 | +) -> Optional[List[Dict[str, Any]]]: | ||
| 600 | + """ | ||
| 601 | + 将 ES 聚合结果转换为标准化的分面格式。 | ||
| 602 | + | ||
| 603 | + Args: | ||
| 604 | + es_aggregations: ES 原始聚合结果 | ||
| 605 | + facet_configs: 分面配置列表 | ||
| 606 | + current_filters: 当前应用的过滤器 | ||
| 607 | + | ||
| 608 | + Returns: | ||
| 609 | + 标准化的分面结果列表 | ||
| 610 | + """ | ||
| 611 | + if not es_aggregations or not facet_configs: | ||
| 612 | + return None | ||
| 613 | + | ||
| 614 | + standardized_facets = [] | ||
| 615 | + | ||
| 616 | + for config in facet_configs: | ||
| 617 | + # 解析配置 | ||
| 618 | + if isinstance(config, str): | ||
| 619 | + field = config | ||
| 620 | + facet_type = "terms" | ||
| 621 | + else: | ||
| 622 | + field = config['field'] | ||
| 623 | + facet_type = config.get('type', 'terms') | ||
| 624 | + | ||
| 625 | + agg_name = f"{field}_facet" | ||
| 626 | + | ||
| 627 | + if agg_name not in es_aggregations: | ||
| 628 | + continue | ||
| 629 | + | ||
| 630 | + agg_result = es_aggregations[agg_name] | ||
| 631 | + | ||
| 632 | + # 构建标准化分面结果 | ||
| 633 | + facet = { | ||
| 634 | + "field": field, | ||
| 635 | + "label": self._get_field_label(field), # 从配置获取 | ||
| 636 | + "type": facet_type, | ||
| 637 | + "values": [] | ||
| 638 | + } | ||
| 639 | + | ||
| 640 | + # 获取当前字段的选中值 | ||
| 641 | + selected_values = set() | ||
| 642 | + if current_filters and field in current_filters: | ||
| 643 | + filter_value = current_filters[field] | ||
| 644 | + if isinstance(filter_value, list): | ||
| 645 | + selected_values = set(filter_value) | ||
| 646 | + else: | ||
| 647 | + selected_values = {filter_value} | ||
| 648 | + | ||
| 649 | + # 转换 buckets | ||
| 650 | + if 'buckets' in agg_result: | ||
| 651 | + for bucket in agg_result['buckets']: | ||
| 652 | + value = bucket.get('key') | ||
| 653 | + count = bucket.get('doc_count', 0) | ||
| 654 | + | ||
| 655 | + facet['values'].append({ | ||
| 656 | + "value": value, | ||
| 657 | + "label": str(value), # 可以从配置映射 | ||
| 658 | + "count": count, | ||
| 659 | + "selected": value in selected_values | ||
| 660 | + }) | ||
| 661 | + | ||
| 662 | + standardized_facets.append(facet) | ||
| 663 | + | ||
| 664 | + return standardized_facets | ||
| 665 | + | ||
| 666 | + | ||
| 667 | +def _get_field_label(self, field: str) -> str: | ||
| 668 | + """获取字段的显示标签""" | ||
| 669 | + # 从配置中获取字段标签 | ||
| 670 | + for field_config in self.config.fields: | ||
| 671 | + if field_config.name == field: | ||
| 672 | + # 假设配置中有 label 字段 | ||
| 673 | + return getattr(field_config, 'label', field) | ||
| 674 | + return field | ||
| 675 | +``` | ||
| 676 | + | ||
| 677 | +### 4. API 路由层更新 | ||
| 678 | + | ||
| 679 | +**文件**:`api/routes/search.py` | ||
| 680 | + | ||
| 681 | +**改进点**: | ||
| 682 | + | ||
| 683 | +1. 接受新的请求模型参数 | ||
| 684 | +2. 添加搜索建议端点(框架) | ||
| 685 | + | ||
| 686 | +**新增端点**: | ||
| 687 | + | ||
| 688 | +```python | ||
| 689 | +@router.get("/suggestions", response_model=SearchSuggestResponse) | ||
| 690 | +async def search_suggestions( | ||
| 691 | + q: str = Query(..., min_length=1, description="搜索查询"), | ||
| 692 | + size: int = Query(5, ge=1, le=20, description="建议数量"), | ||
| 693 | + types: str = Query("query", description="建议类型(逗号分隔)") | ||
| 694 | +): | ||
| 695 | + """ | ||
| 696 | + 获取搜索建议(自动补全)。 | ||
| 697 | + | ||
| 698 | + 功能说明: | ||
| 699 | + - 查询建议(query):基于历史搜索和热门搜索 | ||
| 700 | + - 商品建议(product):匹配的商品 | ||
| 701 | + - 类目建议(category):匹配的类目 | ||
| 702 | + - 品牌建议(brand):匹配的品牌 | ||
| 703 | + | ||
| 704 | + 注意:此功能暂未实现,仅返回框架响应。 | ||
| 705 | + """ | ||
| 706 | + import time | ||
| 707 | + start_time = time.time() | ||
| 708 | + | ||
| 709 | + # TODO: 实现搜索建议逻辑 | ||
| 710 | + # 1. 从搜索历史中获取建议 | ||
| 711 | + # 2. 从商品标题中匹配前缀 | ||
| 712 | + # 3. 从类目、品牌中匹配 | ||
| 713 | + | ||
| 714 | + # 临时返回空结果 | ||
| 715 | + suggestions = [] | ||
| 716 | + | ||
| 717 | + # 示例结构(暂不实现) | ||
| 718 | + # suggestions = [ | ||
| 719 | + # { | ||
| 720 | + # "text": "芭比娃娃", | ||
| 721 | + # "type": "query", | ||
| 722 | + # "highlight": "<em>芭</em>比娃娃", | ||
| 723 | + # "popularity": 850 | ||
| 724 | + # } | ||
| 725 | + # ] | ||
| 726 | + | ||
| 727 | + took_ms = int((time.time() - start_time) * 1000) | ||
| 728 | + | ||
| 729 | + return SearchSuggestResponse( | ||
| 730 | + query=q, | ||
| 731 | + suggestions=suggestions, | ||
| 732 | + took_ms=took_ms | ||
| 733 | + ) | ||
| 734 | + | ||
| 735 | + | ||
| 736 | +@router.get("/instant", response_model=SearchResponse) | ||
| 737 | +async def instant_search( | ||
| 738 | + q: str = Query(..., min_length=2, description="搜索查询"), | ||
| 739 | + size: int = Query(5, ge=1, le=20, description="结果数量") | ||
| 740 | +): | ||
| 741 | + """ | ||
| 742 | + 即时搜索(Instant Search)。 | ||
| 743 | + | ||
| 744 | + 功能说明: | ||
| 745 | + - 边输入边搜索,无需点击搜索按钮 | ||
| 746 | + - 返回简化的搜索结果 | ||
| 747 | + - 性能优化:缓存、限流 | ||
| 748 | + | ||
| 749 | + 注意:此功能暂未实现,调用标准搜索接口。 | ||
| 750 | + """ | ||
| 751 | + # TODO: 优化即时搜索性能 | ||
| 752 | + # 1. 添加防抖/节流 | ||
| 753 | + # 2. 实现结果缓存 | ||
| 754 | + # 3. 简化返回字段 | ||
| 755 | + | ||
| 756 | + # 临时使用标准搜索接口 | ||
| 757 | + from api.app import get_searcher | ||
| 758 | + searcher = get_searcher() | ||
| 759 | + | ||
| 760 | + result = searcher.search( | ||
| 761 | + query=q, | ||
| 762 | + size=size, | ||
| 763 | + from_=0 | ||
| 764 | + ) | ||
| 765 | + | ||
| 766 | + return SearchResponse( | ||
| 767 | + hits=result.hits, | ||
| 768 | + total=result.total, | ||
| 769 | + max_score=result.max_score, | ||
| 770 | + took_ms=result.took_ms, | ||
| 771 | + query_info=result.query_info | ||
| 772 | + ) | ||
| 773 | +``` | ||
| 774 | + | ||
| 775 | +### 5. 前端适配 | ||
| 776 | + | ||
| 777 | +**文件**:`frontend/static/js/app.js` | ||
| 778 | + | ||
| 779 | +**需要修改的地方**: | ||
| 780 | + | ||
| 781 | +1. **聚合参数改用简化配置**(第 57-87 行): | ||
| 782 | +```javascript | ||
| 783 | +// 旧的方式(直接 ES DSL) | ||
| 784 | +const aggregations = { | ||
| 785 | + "category_stats": { | ||
| 786 | + "terms": { | ||
| 787 | + "field": "categoryName_keyword", | ||
| 788 | + "size": 15 | ||
| 789 | + } | ||
| 790 | + } | ||
| 791 | +}; | ||
| 792 | + | ||
| 793 | +// 新的方式(简化配置) | ||
| 794 | +const facets = [ | ||
| 795 | + { | ||
| 796 | + "field": "categoryName_keyword", | ||
| 797 | + "size": 15, | ||
| 798 | + "type": "terms" | ||
| 799 | + }, | ||
| 800 | + { | ||
| 801 | + "field": "brandName_keyword", | ||
| 802 | + "size": 15, | ||
| 803 | + "type": "terms" | ||
| 804 | + }, | ||
| 805 | + { | ||
| 806 | + "field": "price", | ||
| 807 | + "type": "range", | ||
| 808 | + "ranges": [ | ||
| 809 | + {"key": "0-50", "to": 50}, | ||
| 810 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 811 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 812 | + {"key": "200+", "from": 200} | ||
| 813 | + ] | ||
| 814 | + } | ||
| 815 | +]; | ||
| 816 | +``` | ||
| 817 | + | ||
| 818 | +2. **过滤器使用新格式**(第 103 行): | ||
| 819 | +```javascript | ||
| 820 | +// 旧的方式 | ||
| 821 | +filters: { | ||
| 822 | + "price_ranges": ["0-50", "50-100"] // 硬编码 | ||
| 823 | +} | ||
| 824 | + | ||
| 825 | +// 新的方式 | ||
| 826 | +filters: { | ||
| 827 | + "categoryName_keyword": ["玩具"], | ||
| 828 | + "in_stock": true | ||
| 829 | +}, | ||
| 830 | +range_filters: { | ||
| 831 | + "price": {"gte": 50, "lte": 100} | ||
| 832 | +} | ||
| 833 | +``` | ||
| 834 | + | ||
| 835 | +3. **解析标准化的分面结果**(第 208-258 行): | ||
| 836 | +```javascript | ||
| 837 | +// 旧的方式(直接访问 ES 结构) | ||
| 838 | +if (aggregations.category_stats && aggregations.category_stats.buckets) { | ||
| 839 | + aggregations.category_stats.buckets.forEach(bucket => { | ||
| 840 | + // ... | ||
| 841 | + }); | ||
| 842 | +} | ||
| 843 | + | ||
| 844 | +// 新的方式(标准化格式) | ||
| 845 | +if (data.facets) { | ||
| 846 | + data.facets.forEach(facet => { | ||
| 847 | + if (facet.field === 'categoryName_keyword') { | ||
| 848 | + facet.values.forEach(facetValue => { | ||
| 849 | + const value = facetValue.value; | ||
| 850 | + const count = facetValue.count; | ||
| 851 | + const selected = facetValue.selected; | ||
| 852 | + // ... | ||
| 853 | + }); | ||
| 854 | + } | ||
| 855 | + }); | ||
| 856 | +} | ||
| 857 | +``` | ||
| 858 | + | ||
| 859 | + | ||
| 860 | +### 6. 测试代码更新 | ||
| 861 | + | ||
| 862 | +**文件**:`test_aggregation_api.py` | ||
| 863 | + | ||
| 864 | +**需要修改的地方**: | ||
| 865 | + | ||
| 866 | +1. 移除 `price_ranges` 硬编码测试(第 93 行) | ||
| 867 | +2. 使用新的 `range_filters` 格式 | ||
| 868 | +3. 使用新的 `facets` 配置 | ||
| 869 | + | ||
| 870 | +**新的测试代码**: | ||
| 871 | + | ||
| 872 | +```python | ||
| 873 | +def test_search_with_filters(): | ||
| 874 | + """测试新的过滤器格式""" | ||
| 875 | + test_request = { | ||
| 876 | + "query": "玩具", | ||
| 877 | + "size": 5, | ||
| 878 | + "filters": { | ||
| 879 | + "categoryName_keyword": ["玩具"] | ||
| 880 | + }, | ||
| 881 | + "range_filters": { | ||
| 882 | + "price": {"gte": 50, "lte": 100} | ||
| 883 | + } | ||
| 884 | + } | ||
| 885 | + # ... | ||
| 886 | + | ||
| 887 | +def test_search_with_facets(): | ||
| 888 | + """测试新的分面配置""" | ||
| 889 | + test_request = { | ||
| 890 | + "query": "玩具", | ||
| 891 | + "size": 20, | ||
| 892 | + "facets": [ | ||
| 893 | + { | ||
| 894 | + "field": "categoryName_keyword", | ||
| 895 | + "size": 15 | ||
| 896 | + }, | ||
| 897 | + { | ||
| 898 | + "field": "price", | ||
| 899 | + "type": "range", | ||
| 900 | + "ranges": [ | ||
| 901 | + {"key": "0-50", "to": 50}, | ||
| 902 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 903 | + ] | ||
| 904 | + } | ||
| 905 | + ] | ||
| 906 | + } | ||
| 907 | + # ... | ||
| 908 | +``` | ||
| 909 | + | ||
| 910 | +## 第三部分:实施步骤 | ||
| 911 | + | ||
| 912 | +### 阶段 1:后端模型层重构(高优先级) | ||
| 913 | + | ||
| 914 | +**任务清单**: | ||
| 915 | + | ||
| 916 | +- [ ] 更新 `api/models.py` | ||
| 917 | + - [ ] 定义 `RangeFilter` 模型 | ||
| 918 | + - [ ] 定义 `FacetConfig` 模型 | ||
| 919 | + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` | ||
| 920 | + - [ ] 移除 `aggregations` 参数 | ||
| 921 | + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 | ||
| 922 | + - [ ] 更新 `SearchResponse`,使用标准化分面格式 | ||
| 923 | + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) | ||
| 924 | + | ||
| 925 | +**验证方式**: | ||
| 926 | + | ||
| 927 | +- 运行 Pydantic 模型验证 | ||
| 928 | +- 检查 API 文档(`/docs`)是否正确生成 | ||
| 929 | + | ||
| 930 | +### 阶段 2:查询构建器重构(高优先级) | ||
| 931 | + | ||
| 932 | +**任务清单**: | ||
| 933 | + | ||
| 934 | +- [ ] 重构 `search/es_query_builder.py` | ||
| 935 | + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) | ||
| 936 | + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` | ||
| 937 | + - [ ] 移除 `add_dynamic_aggregations` 方法 | ||
| 938 | + - [ ] 重构 `add_aggregations` 为 `build_facets` | ||
| 939 | + - [ ] 更新 `build_query` 方法签名 | ||
| 940 | +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) | ||
| 941 | + | ||
| 942 | +**验证方式**: | ||
| 943 | + | ||
| 944 | +- 编写单元测试验证过滤器构建逻辑 | ||
| 945 | +- 打印生成的 ES DSL,检查正确性 | ||
| 946 | + | ||
| 947 | +### 阶段 3:搜索执行层重构(高优先级) | ||
| 948 | + | ||
| 949 | +**任务清单**: | ||
| 950 | + | ||
| 951 | +- [ ] 更新 `search/searcher.py` | ||
| 952 | + - [ ] 更新 `search()` 方法签名 | ||
| 953 | + - [ ] 使用新的 `build_facets` 方法 | ||
| 954 | + - [ ] 实现 `_standardize_facets()` 辅助方法 | ||
| 955 | + - [ ] 实现 `_get_field_label()` 辅助方法 | ||
| 956 | + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 | ||
| 957 | + | ||
| 958 | +**验证方式**: | ||
| 959 | + | ||
| 960 | +- 编写集成测试 | ||
| 961 | +- 手动测试搜索功能 | ||
| 962 | + | ||
| 963 | +### 阶段 4:API 路由层更新(中优先级) | ||
| 964 | + | ||
| 965 | +**任务清单**: | ||
| 966 | + | ||
| 967 | +- [ ] 更新 `api/routes/search.py` | ||
| 968 | + - [ ] 更新 `/search/` 端点,接受新的请求参数 | ||
| 969 | + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) | ||
| 970 | + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) | ||
| 971 | + - [ ] 添加端点文档和示例 | ||
| 972 | + | ||
| 973 | +**验证方式**: | ||
| 974 | + | ||
| 975 | +- 使用 Swagger UI 测试端点 | ||
| 976 | +- 检查 API 文档完整性 | ||
| 977 | + | ||
| 978 | +### 阶段 5:前端适配(中优先级) | ||
| 979 | + | ||
| 980 | +**任务清单**: | ||
| 981 | + | ||
| 982 | +- [ ] 更新 `frontend/static/js/app.js` | ||
| 983 | + - [ ] 修改聚合参数为 `facets` 简化配置 | ||
| 984 | + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` | ||
| 985 | + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 | ||
| 986 | + - [ ] 添加范围过滤器 UI(如价格滑块) | ||
| 987 | + - [ ] 移除硬编码的 `price_ranges` | ||
| 988 | + | ||
| 989 | +**验证方式**: | ||
| 990 | + | ||
| 991 | +- 浏览器测试前端功能 | ||
| 992 | +- 检查网络请求和响应格式 | ||
| 993 | + | ||
| 994 | +### 阶段 6:测试代码更新(低优先级) | ||
| 995 | + | ||
| 996 | +**任务清单**: | ||
| 997 | + | ||
| 998 | +- [ ] 更新 `test_aggregation_api.py` | ||
| 999 | + - [ ] 移除 `price_ranges` 测试 | ||
| 1000 | + - [ ] 添加 `range_filters` 测试 | ||
| 1001 | + - [ ] 添加新的 `facets` 测试 | ||
| 1002 | +- [ ] 更新 `test_complete_search.py` | ||
| 1003 | +- [ ] 更新 `tests/integration/test_aggregation_api.py` | ||
| 1004 | +- [ ] 更新 `tests/unit/test_searcher.py` | ||
| 1005 | + | ||
| 1006 | +**验证方式**: | ||
| 1007 | + | ||
| 1008 | +- 运行所有测试,确保通过 | ||
| 1009 | +- 检查测试覆盖率 | ||
| 1010 | + | ||
| 1011 | +### 阶段 7:文档更新(低优先级) | ||
| 1012 | + | ||
| 1013 | +**任务清单**: | ||
| 1014 | + | ||
| 1015 | +- [ ] 撰写完整的 API 接口文档 | ||
| 1016 | +- [ ] 更新 `README.md` | ||
| 1017 | +- [ ] 更新 `USER_GUIDE.md` | ||
| 1018 | +- [ ] 添加接口使用示例 | ||
| 1019 | +- [ ] 添加迁移指南(旧接口 → 新接口) | ||
| 1020 | + | ||
| 1021 | +## 第四部分:API 使用示例 | ||
| 1022 | + | ||
| 1023 | +### 示例 1:简单搜索 | ||
| 1024 | + | ||
| 1025 | +```bash | ||
| 1026 | +POST /search/ | ||
| 1027 | +{ | ||
| 1028 | + "query": "芭比娃娃", | ||
| 1029 | + "size": 20 | ||
| 1030 | +} | ||
| 1031 | +``` | ||
| 1032 | + | ||
| 1033 | +### 示例 2:带过滤器的搜索 | ||
| 1034 | + | ||
| 1035 | +```bash | ||
| 1036 | +POST /search/ | ||
| 1037 | +{ | ||
| 1038 | + "query": "玩具", | ||
| 1039 | + "size": 20, | ||
| 1040 | + "filters": { | ||
| 1041 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 1042 | + "in_stock": true | ||
| 1043 | + }, | ||
| 1044 | + "range_filters": { | ||
| 1045 | + "price": {"gte": 50, "lte": 200} | ||
| 1046 | + } | ||
| 1047 | +} | ||
| 1048 | +``` | ||
| 1049 | + | ||
| 1050 | +### 示例 3:带分面搜索的请求 | ||
| 1051 | + | ||
| 1052 | +```bash | ||
| 1053 | +POST /search/ | ||
| 1054 | +{ | ||
| 1055 | + "query": "玩具", | ||
| 1056 | + "size": 20, | ||
| 1057 | + "facets": [ | ||
| 1058 | + { | ||
| 1059 | + "field": "categoryName_keyword", | ||
| 1060 | + "size": 15 | ||
| 1061 | + }, | ||
| 1062 | + { | ||
| 1063 | + "field": "brandName_keyword", | ||
| 1064 | + "size": 15 | ||
| 1065 | + }, | ||
| 1066 | + { | ||
| 1067 | + "field": "price", | ||
| 1068 | + "type": "range", | ||
| 1069 | + "ranges": [ | ||
| 1070 | + {"key": "0-50", "to": 50}, | ||
| 1071 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 1072 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 1073 | + {"key": "200+", "from": 200} | ||
| 1074 | + ] | ||
| 1075 | + } | ||
| 1076 | + ] | ||
| 1077 | +} | ||
| 1078 | +``` | ||
| 1079 | + | ||
| 1080 | +**响应示例**(标准化分面格式): | ||
| 1081 | + | ||
| 1082 | +```json | ||
| 1083 | +{ | ||
| 1084 | + "hits": [...], | ||
| 1085 | + "total": 118, | ||
| 1086 | + "max_score": 8.5, | ||
| 1087 | + "took_ms": 45, | ||
| 1088 | + "facets": [ | ||
| 1089 | + { | ||
| 1090 | + "field": "categoryName_keyword", | ||
| 1091 | + "label": "商品类目", | ||
| 1092 | + "type": "terms", | ||
| 1093 | + "values": [ | ||
| 1094 | + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, | ||
| 1095 | + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} | ||
| 1096 | + ] | ||
| 1097 | + }, | ||
| 1098 | + { | ||
| 1099 | + "field": "price", | ||
| 1100 | + "label": "价格区间", | ||
| 1101 | + "type": "range", | ||
| 1102 | + "values": [ | ||
| 1103 | + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, | ||
| 1104 | + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, | ||
| 1105 | + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, | ||
| 1106 | + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} | ||
| 1107 | + ] | ||
| 1108 | + } | ||
| 1109 | + ] | ||
| 1110 | +} | ||
| 1111 | +``` | ||
| 1112 | + | ||
| 1113 | +### 示例 4:搜索建议(框架) | ||
| 1114 | + | ||
| 1115 | +```bash | ||
| 1116 | +GET /search/suggestions?q=芭&size=5 | ||
| 1117 | + | ||
| 1118 | +{ | ||
| 1119 | + "query": "芭", | ||
| 1120 | + "suggestions": [ | ||
| 1121 | + { | ||
| 1122 | + "text": "芭比娃娃", | ||
| 1123 | + "type": "query", | ||
| 1124 | + "highlight": "<em>芭</em>比娃娃", | ||
| 1125 | + "popularity": 850 | ||
| 1126 | + }, | ||
| 1127 | + { | ||
| 1128 | + "text": "芭比娃娃屋", | ||
| 1129 | + "type": "query", | ||
| 1130 | + "highlight": "<em>芭</em>比娃娃屋", | ||
| 1131 | + "popularity": 320 | ||
| 1132 | + } | ||
| 1133 | + ], | ||
| 1134 | + "took_ms": 5 | ||
| 1135 | +} | ||
| 1136 | +``` | ||
| 1137 | + | ||
| 1138 | +## 第五部分:向后兼容性 | ||
| 1139 | + | ||
| 1140 | +### 兼容策略 | ||
| 1141 | + | ||
| 1142 | +为保持向后兼容,在过渡期(1-2 个版本)内: | ||
| 1143 | + | ||
| 1144 | +1. **同时支持旧参数和新参数**: | ||
| 1145 | +```python | ||
| 1146 | +class SearchRequest(BaseModel): | ||
| 1147 | + # 新参数 | ||
| 1148 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 1149 | + facets: Optional[List[Union[str, FacetConfig]]] = None | ||
| 1150 | + | ||
| 1151 | + # 旧参数(标记为废弃) | ||
| 1152 | + aggregations: Optional[Dict[str, Any]] = Field( | ||
| 1153 | + None, | ||
| 1154 | + deprecated=True, | ||
| 1155 | + description="已废弃。请使用 'facets' 参数" | ||
| 1156 | + ) | ||
| 1157 | +``` | ||
| 1158 | + | ||
| 1159 | +2. **在后端自动转换旧格式**: | ||
| 1160 | +```python | ||
| 1161 | +# 在 searcher.py 中 | ||
| 1162 | +if request.aggregations and not request.facets: | ||
| 1163 | + # 将旧的 aggregations 转换为新的 facets | ||
| 1164 | + request.facets = self._convert_legacy_aggregations(request.aggregations) | ||
| 1165 | +``` | ||
| 1166 | + | ||
| 1167 | +3. **在响应中提供迁移提示**: | ||
| 1168 | +```python | ||
| 1169 | +if request.aggregations: | ||
| 1170 | + warnings.append({ | ||
| 1171 | + "type": "deprecation", | ||
| 1172 | + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", | ||
| 1173 | + "migration_guide": "https://docs.example.com/migration" | ||
| 1174 | + }) | ||
| 1175 | +``` | ||
| 1176 | + | ||
| 1177 | + | ||
| 1178 | +### 迁移时间线 | ||
| 1179 | + | ||
| 1180 | +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 | ||
| 1181 | +- **v3.1**(1 个月后):移除旧接口的自动转换 | ||
| 1182 | +- **v4.0**(3 个月后):完全移除旧接口 | ||
| 1183 | + | ||
| 1184 | +## 第六部分:风险评估与缓解 | ||
| 1185 | + | ||
| 1186 | +### 风险点 | ||
| 1187 | + | ||
| 1188 | +1. **破坏性变更风险**: | ||
| 1189 | + | ||
| 1190 | + - 风险:现有客户代码可能依赖旧接口 | ||
| 1191 | + - 缓解:提供向后兼容层,发布详细迁移指南 | ||
| 1192 | + | ||
| 1193 | +2. **性能影响风险**: | ||
| 1194 | + | ||
| 1195 | + - 风险:新的标准化处理可能增加延迟 | ||
| 1196 | + - 缓解:添加性能测试,优化关键路径 | ||
| 1197 | + | ||
| 1198 | +3. **测试覆盖不足风险**: | ||
| 1199 | + | ||
| 1200 | + - 风险:重构可能引入新 bug | ||
| 1201 | + - 缓解:全面的单元测试和集成测试 | ||
| 1202 | + | ||
| 1203 | +### 验收标准 | ||
| 1204 | + | ||
| 1205 | +- [ ] 所有单元测试通过 | ||
| 1206 | +- [ ] 所有集成测试通过 | ||
| 1207 | +- [ ] API 文档完整且准确 | ||
| 1208 | +- [ ] 性能无明显下降(< 10% 延迟增加) | ||
| 1209 | +- [ ] 前端功能正常工作 | ||
| 1210 | +- [ ] 提供完整的迁移指南 | ||
| 1211 | + | ||
| 1212 | +## 总结 | ||
| 1213 | + | ||
| 1214 | +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: | ||
| 1215 | + | ||
| 1216 | +1. ✅ 移除硬编码的 price_ranges 逻辑 | ||
| 1217 | +2. ✅ 实现结构化的过滤参数(filters + range_filters) | ||
| 1218 | +3. ✅ 简化聚合参数接口,不暴露 ES DSL | ||
| 1219 | +4. ✅ 标准化分面搜索响应格式 | ||
| 1220 | +5. ✅ 添加搜索建议功能框架(暂不实现) | ||
| 1221 | + | ||
| 1222 | +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 | ||
| 0 | \ No newline at end of file | 1223 | \ No newline at end of file |
.cursor/plans/api-interface-analysis-42918612.plan.2.md
0 → 100644
| @@ -0,0 +1,1222 @@ | @@ -0,0 +1,1222 @@ | ||
| 1 | +<!-- 42918612-89ae-45a6-9ccb-e2b0df0a7aec c0a3c6ad-c0f3-447c-8809-bb4c2ef6a845 --> | ||
| 2 | +# 搜索引擎 API 接口重构实施计划 | ||
| 3 | + | ||
| 4 | +## 第一部分:现状分析 | ||
| 5 | + | ||
| 6 | +### 1. 当前实现存在的问题 | ||
| 7 | + | ||
| 8 | +#### 问题 1:硬编码的价格范围过滤 | ||
| 9 | + | ||
| 10 | +**位置**:`search/es_query_builder.py` 第 205-233 行 | ||
| 11 | + | ||
| 12 | +**问题描述**: | ||
| 13 | + | ||
| 14 | +```python | ||
| 15 | +if field == 'price_ranges': | ||
| 16 | + # 硬编码特定字符串值 | ||
| 17 | + if price_range == '0-50': | ||
| 18 | + price_ranges.append({"lt": 50}) | ||
| 19 | + elif price_range == '50-100': | ||
| 20 | + price_ranges.append({"gte": 50, "lt": 100}) | ||
| 21 | + # ... | ||
| 22 | +``` | ||
| 23 | + | ||
| 24 | +**影响**: | ||
| 25 | + | ||
| 26 | +- 只支持 `price` 字段,无法扩展到其他数值字段 | ||
| 27 | +- 范围值硬编码,无法根据业务需求调整 | ||
| 28 | +- 不符合 SaaS 系统的通用性要求 | ||
| 29 | + | ||
| 30 | +#### 问题 2:聚合参数直接暴露 ES DSL | ||
| 31 | + | ||
| 32 | +**位置**: | ||
| 33 | + | ||
| 34 | +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` | ||
| 35 | +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` | ||
| 36 | +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL | ||
| 37 | + | ||
| 38 | +**问题描述**: | ||
| 39 | + | ||
| 40 | +前端需要了解 Elasticsearch 的聚合语法: | ||
| 41 | + | ||
| 42 | +```javascript | ||
| 43 | +const aggregations = { | ||
| 44 | + "category_stats": { | ||
| 45 | + "terms": { | ||
| 46 | + "field": "categoryName_keyword", | ||
| 47 | + "size": 15 | ||
| 48 | + } | ||
| 49 | + }, | ||
| 50 | + "price_ranges": { | ||
| 51 | + "range": { | ||
| 52 | + "field": "price", | ||
| 53 | + "ranges": [ | ||
| 54 | + {"key": "0-50", "to": 50}, | ||
| 55 | + // ... | ||
| 56 | + ] | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | +}; | ||
| 60 | +``` | ||
| 61 | + | ||
| 62 | +**影响**: | ||
| 63 | + | ||
| 64 | +- 前端需要了解 ES 语法,增加集成难度 | ||
| 65 | +- 不符合 SaaS 产品易用性原则 | ||
| 66 | +- 难以进行参数验证和文档生成 | ||
| 67 | + | ||
| 68 | +#### 问题 3:分面搜索结果格式不统一 | ||
| 69 | + | ||
| 70 | +**位置**:`frontend/static/js/app.js` 第 208-258 行 | ||
| 71 | + | ||
| 72 | +**问题描述**: | ||
| 73 | + | ||
| 74 | +- 直接返回 ES 原始格式(`buckets` 结构) | ||
| 75 | +- 前端需要知道不同聚合类型的响应结构 | ||
| 76 | +- 没有统一的分面结果模型 | ||
| 77 | + | ||
| 78 | +**影响**: | ||
| 79 | + | ||
| 80 | +- 前端解析逻辑复杂 | ||
| 81 | +- 不同类型的聚合处理方式不一致 | ||
| 82 | +- 难以扩展新的聚合类型 | ||
| 83 | + | ||
| 84 | +#### 问题 4:缺少搜索建议功能 | ||
| 85 | + | ||
| 86 | +**当前状态**:完全没有实现 | ||
| 87 | + | ||
| 88 | +**需求**: | ||
| 89 | + | ||
| 90 | +- 自动补全(Autocomplete) | ||
| 91 | +- 搜索建议(Suggestions) | ||
| 92 | +- 搜索即时反馈(Instant Search) | ||
| 93 | + | ||
| 94 | +### 2. 依赖关系分析 | ||
| 95 | + | ||
| 96 | +**影响范围**: | ||
| 97 | + | ||
| 98 | +1. **后端模型层**:`api/models.py` | ||
| 99 | +2. **查询构建层**:`search/es_query_builder.py` | ||
| 100 | +3. **搜索执行层**:`search/searcher.py` | ||
| 101 | +4. **API 路由层**:`api/routes/search.py` | ||
| 102 | +5. **前端代码**:`frontend/static/js/app.js` | ||
| 103 | +6. **测试代码**:`test_aggregation_api.py`, `test_complete_search.py` | ||
| 104 | + | ||
| 105 | +## 第二部分:优化方案设计 | ||
| 106 | + | ||
| 107 | +### 方案概述 | ||
| 108 | + | ||
| 109 | +采用**结构化过滤参数方案(方案 A 的简化版)**: | ||
| 110 | + | ||
| 111 | +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) | ||
| 112 | +- **不支持单字段多个不连续范围**,简化设计 | ||
| 113 | +- 标准化聚合参数,使用简化的接口 | ||
| 114 | +- 统一分面搜索响应格式 | ||
| 115 | + | ||
| 116 | +### 1. 新的请求模型设计 | ||
| 117 | + | ||
| 118 | +#### 1.1 核心模型定义 | ||
| 119 | + | ||
| 120 | +**文件**:`api/models.py` | ||
| 121 | + | ||
| 122 | +```python | ||
| 123 | +from pydantic import BaseModel, Field, field_validator | ||
| 124 | +from typing import List, Dict, Any, Optional, Union, Literal | ||
| 125 | + | ||
| 126 | + | ||
| 127 | +class RangeFilter(BaseModel): | ||
| 128 | + """数值范围过滤器""" | ||
| 129 | + gte: Optional[float] = Field(None, description="大于等于 (>=)") | ||
| 130 | + gt: Optional[float] = Field(None, description="大于 (>)") | ||
| 131 | + lte: Optional[float] = Field(None, description="小于等于 (<=)") | ||
| 132 | + lt: Optional[float] = Field(None, description="小于 (<)") | ||
| 133 | + | ||
| 134 | + @field_validator('*') | ||
| 135 | + def check_at_least_one(cls, v, info): | ||
| 136 | + """确保至少指定一个边界""" | ||
| 137 | + values = info.data | ||
| 138 | + if not any([values.get('gte'), values.get('gt'), | ||
| 139 | + values.get('lte'), values.get('lt')]): | ||
| 140 | + raise ValueError('至少需要指定一个范围边界') | ||
| 141 | + return v | ||
| 142 | + | ||
| 143 | + class Config: | ||
| 144 | + json_schema_extra = { | ||
| 145 | + "examples": [ | ||
| 146 | + {"gte": 50, "lte": 200}, | ||
| 147 | + {"gt": 100}, | ||
| 148 | + {"lt": 50} | ||
| 149 | + ] | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + | ||
| 153 | +class FacetConfig(BaseModel): | ||
| 154 | + """分面配置(简化版)""" | ||
| 155 | + field: str = Field(..., description="分面字段名") | ||
| 156 | + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") | ||
| 157 | + type: Literal["terms", "range"] = Field("terms", description="分面类型") | ||
| 158 | + ranges: Optional[List[Dict[str, Any]]] = Field( | ||
| 159 | + None, | ||
| 160 | + description="范围分面的范围定义(仅当 type='range' 时需要)" | ||
| 161 | + ) | ||
| 162 | + | ||
| 163 | + class Config: | ||
| 164 | + json_schema_extra = { | ||
| 165 | + "examples": [ | ||
| 166 | + { | ||
| 167 | + "field": "categoryName_keyword", | ||
| 168 | + "size": 15, | ||
| 169 | + "type": "terms" | ||
| 170 | + }, | ||
| 171 | + { | ||
| 172 | + "field": "price", | ||
| 173 | + "size": 4, | ||
| 174 | + "type": "range", | ||
| 175 | + "ranges": [ | ||
| 176 | + {"key": "0-50", "to": 50}, | ||
| 177 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 178 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 179 | + {"key": "200+", "from": 200} | ||
| 180 | + ] | ||
| 181 | + } | ||
| 182 | + ] | ||
| 183 | + } | ||
| 184 | + | ||
| 185 | + | ||
| 186 | +class SearchRequest(BaseModel): | ||
| 187 | + """搜索请求模型(重构版)""" | ||
| 188 | + | ||
| 189 | + # 基础搜索参数 | ||
| 190 | + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") | ||
| 191 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 192 | + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") | ||
| 193 | + | ||
| 194 | + # 过滤器 - 精确匹配和多值匹配 | ||
| 195 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( | ||
| 196 | + None, | ||
| 197 | + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", | ||
| 198 | + json_schema_extra={ | ||
| 199 | + "examples": [ | ||
| 200 | + { | ||
| 201 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 202 | + "brandName_keyword": "乐高", | ||
| 203 | + "in_stock": True | ||
| 204 | + } | ||
| 205 | + ] | ||
| 206 | + } | ||
| 207 | + ) | ||
| 208 | + | ||
| 209 | + # 范围过滤器 - 数值范围 | ||
| 210 | + range_filters: Optional[Dict[str, RangeFilter]] = Field( | ||
| 211 | + None, | ||
| 212 | + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", | ||
| 213 | + json_schema_extra={ | ||
| 214 | + "examples": [ | ||
| 215 | + { | ||
| 216 | + "price": {"gte": 50, "lte": 200}, | ||
| 217 | + "days_since_last_update": {"lte": 30} | ||
| 218 | + } | ||
| 219 | + ] | ||
| 220 | + } | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + # 排序 | ||
| 224 | + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") | ||
| 225 | + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") | ||
| 226 | + | ||
| 227 | + # 分面搜索 - 简化接口 | ||
| 228 | + facets: Optional[List[Union[str, FacetConfig]]] = Field( | ||
| 229 | + None, | ||
| 230 | + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", | ||
| 231 | + json_schema_extra={ | ||
| 232 | + "examples": [ | ||
| 233 | + # 简单模式:只指定字段名,使用默认配置 | ||
| 234 | + ["categoryName_keyword", "brandName_keyword"], | ||
| 235 | + # 高级模式:详细配置 | ||
| 236 | + [ | ||
| 237 | + {"field": "categoryName_keyword", "size": 15}, | ||
| 238 | + { | ||
| 239 | + "field": "price", | ||
| 240 | + "type": "range", | ||
| 241 | + "ranges": [ | ||
| 242 | + {"key": "0-50", "to": 50}, | ||
| 243 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 244 | + ] | ||
| 245 | + } | ||
| 246 | + ] | ||
| 247 | + ] | ||
| 248 | + } | ||
| 249 | + ) | ||
| 250 | + | ||
| 251 | + # 高级选项 | ||
| 252 | + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") | ||
| 253 | + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") | ||
| 254 | + debug: bool = Field(False, description="是否返回调试信息") | ||
| 255 | + | ||
| 256 | + # 个性化参数(预留) | ||
| 257 | + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") | ||
| 258 | + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") | ||
| 259 | + | ||
| 260 | + | ||
| 261 | +class ImageSearchRequest(BaseModel): | ||
| 262 | + """图片搜索请求模型""" | ||
| 263 | + image_url: str = Field(..., description="查询图片的 URL") | ||
| 264 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 265 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None | ||
| 266 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 267 | + | ||
| 268 | + | ||
| 269 | +class SearchSuggestRequest(BaseModel): | ||
| 270 | + """搜索建议请求模型(框架,暂不实现)""" | ||
| 271 | + query: str = Field(..., min_length=1, description="搜索查询字符串") | ||
| 272 | + size: int = Field(5, ge=1, le=20, description="返回建议数量") | ||
| 273 | + types: List[Literal["query", "product", "category", "brand"]] = Field( | ||
| 274 | + ["query"], | ||
| 275 | + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" | ||
| 276 | + ) | ||
| 277 | +``` | ||
| 278 | + | ||
| 279 | +#### 1.2 响应模型定义 | ||
| 280 | + | ||
| 281 | +```python | ||
| 282 | +class FacetValue(BaseModel): | ||
| 283 | + """分面值""" | ||
| 284 | + value: Union[str, int, float] = Field(..., description="分面值") | ||
| 285 | + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") | ||
| 286 | + count: int = Field(..., description="匹配的文档数量") | ||
| 287 | + selected: bool = Field(False, description="是否已选中(当前过滤器中)") | ||
| 288 | + | ||
| 289 | + | ||
| 290 | +class FacetResult(BaseModel): | ||
| 291 | + """分面结果(标准化格式)""" | ||
| 292 | + field: str = Field(..., description="字段名") | ||
| 293 | + label: str = Field(..., description="分面显示名称") | ||
| 294 | + type: Literal["terms", "range"] = Field(..., description="分面类型") | ||
| 295 | + values: List[FacetValue] = Field(..., description="分面值列表") | ||
| 296 | + total_count: Optional[int] = Field(None, description="该字段的总文档数") | ||
| 297 | + | ||
| 298 | + | ||
| 299 | +class SearchResponse(BaseModel): | ||
| 300 | + """搜索响应模型(重构版)""" | ||
| 301 | + | ||
| 302 | + # 核心结果 | ||
| 303 | + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") | ||
| 304 | + total: int = Field(..., description="匹配的总文档数") | ||
| 305 | + max_score: float = Field(..., description="最高相关性分数") | ||
| 306 | + | ||
| 307 | + # 分面搜索结果(标准化格式) | ||
| 308 | + facets: Optional[List[FacetResult]] = Field( | ||
| 309 | + None, | ||
| 310 | + description="分面统计结果(标准化格式)" | ||
| 311 | + ) | ||
| 312 | + | ||
| 313 | + # 查询信息 | ||
| 314 | + query_info: Dict[str, Any] = Field( | ||
| 315 | + default_factory=dict, | ||
| 316 | + description="查询处理信息(原始查询、改写、语言检测、翻译等)" | ||
| 317 | + ) | ||
| 318 | + | ||
| 319 | + # 推荐与建议(预留) | ||
| 320 | + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") | ||
| 321 | + | ||
| 322 | + # 性能指标 | ||
| 323 | + took_ms: int = Field(..., description="搜索总耗时(毫秒)") | ||
| 324 | + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") | ||
| 325 | + | ||
| 326 | + # 调试信息 | ||
| 327 | + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") | ||
| 328 | + | ||
| 329 | + | ||
| 330 | +class SearchSuggestResponse(BaseModel): | ||
| 331 | + """搜索建议响应模型(框架,暂不实现)""" | ||
| 332 | + query: str = Field(..., description="原始查询") | ||
| 333 | + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") | ||
| 334 | + took_ms: int = Field(..., description="耗时(毫秒)") | ||
| 335 | +``` | ||
| 336 | + | ||
| 337 | +### 2. 查询构建器重构 | ||
| 338 | + | ||
| 339 | +#### 2.1 移除硬编码的 price_ranges 逻辑 | ||
| 340 | + | ||
| 341 | +**文件**:`search/es_query_builder.py` | ||
| 342 | + | ||
| 343 | +**需要修改的方法**:`_build_filters(self, filters, range_filters)` | ||
| 344 | + | ||
| 345 | +**改进点**: | ||
| 346 | + | ||
| 347 | +1. 移除 `if field == 'price_ranges'` 的特殊处理 | ||
| 348 | +2. 分离 filters 和 range_filters 的处理逻辑 | ||
| 349 | +3. 添加字段类型验证(利用配置系统) | ||
| 350 | + | ||
| 351 | +**新的实现逻辑**: | ||
| 352 | + | ||
| 353 | +```python | ||
| 354 | +def _build_filters( | ||
| 355 | + self, | ||
| 356 | + filters: Optional[Dict[str, Any]] = None, | ||
| 357 | + range_filters: Optional[Dict[str, Any]] = None | ||
| 358 | +) -> List[Dict[str, Any]]: | ||
| 359 | + """ | ||
| 360 | + 构建过滤子句(重构版)。 | ||
| 361 | + | ||
| 362 | + Args: | ||
| 363 | + filters: 精确匹配过滤器字典 | ||
| 364 | + range_filters: 范围过滤器字典 | ||
| 365 | + | ||
| 366 | + Returns: | ||
| 367 | + ES filter子句列表 | ||
| 368 | + """ | ||
| 369 | + filter_clauses = [] | ||
| 370 | + | ||
| 371 | + # 1. 处理精确匹配过滤 | ||
| 372 | + if filters: | ||
| 373 | + for field, value in filters.items(): | ||
| 374 | + if isinstance(value, list): | ||
| 375 | + # 多值匹配(OR) | ||
| 376 | + filter_clauses.append({ | ||
| 377 | + "terms": {field: value} | ||
| 378 | + }) | ||
| 379 | + else: | ||
| 380 | + # 单值精确匹配 | ||
| 381 | + filter_clauses.append({ | ||
| 382 | + "term": {field: value} | ||
| 383 | + }) | ||
| 384 | + | ||
| 385 | + # 2. 处理范围过滤 | ||
| 386 | + if range_filters: | ||
| 387 | + for field, range_spec in range_filters.items(): | ||
| 388 | + # 验证字段是否为数值类型(可选,基于配置) | ||
| 389 | + # TODO: 添加字段类型验证 | ||
| 390 | + | ||
| 391 | + # 构建范围查询 | ||
| 392 | + range_conditions = {} | ||
| 393 | + if isinstance(range_spec, dict): | ||
| 394 | + for op in ['gte', 'gt', 'lte', 'lt']: | ||
| 395 | + if op in range_spec and range_spec[op] is not None: | ||
| 396 | + range_conditions[op] = range_spec[op] | ||
| 397 | + | ||
| 398 | + if range_conditions: | ||
| 399 | + filter_clauses.append({ | ||
| 400 | + "range": {field: range_conditions} | ||
| 401 | + }) | ||
| 402 | + | ||
| 403 | + return filter_clauses | ||
| 404 | +``` | ||
| 405 | + | ||
| 406 | +#### 2.2 优化聚合参数接口 | ||
| 407 | + | ||
| 408 | +**新增方法**:`build_facets(self, facet_configs)` | ||
| 409 | + | ||
| 410 | +**改进点**: | ||
| 411 | + | ||
| 412 | +1. 移除 `add_dynamic_aggregations`(直接暴露 ES DSL) | ||
| 413 | +2. 重构 `add_aggregations` 为更通用的 `build_facets` | ||
| 414 | +3. 支持简化配置和高级配置两种模式 | ||
| 415 | + | ||
| 416 | +**新的实现逻辑**: | ||
| 417 | + | ||
| 418 | +```python | ||
| 419 | +def build_facets( | ||
| 420 | + self, | ||
| 421 | + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None | ||
| 422 | +) -> Dict[str, Any]: | ||
| 423 | + """ | ||
| 424 | + 构建分面聚合(重构版)。 | ||
| 425 | + | ||
| 426 | + Args: | ||
| 427 | + facet_configs: 分面配置列表。可以是: | ||
| 428 | + - 字符串列表:字段名,使用默认配置 | ||
| 429 | + - 配置对象列表:详细的分面配置 | ||
| 430 | + | ||
| 431 | + Returns: | ||
| 432 | + ES aggregations字典 | ||
| 433 | + """ | ||
| 434 | + if not facet_configs: | ||
| 435 | + return {} | ||
| 436 | + | ||
| 437 | + aggs = {} | ||
| 438 | + | ||
| 439 | + for config in facet_configs: | ||
| 440 | + # 1. 简单模式:只有字段名 | ||
| 441 | + if isinstance(config, str): | ||
| 442 | + field = config | ||
| 443 | + agg_name = f"{field}_facet" | ||
| 444 | + aggs[agg_name] = { | ||
| 445 | + "terms": { | ||
| 446 | + "field": field, | ||
| 447 | + "size": 10, # 默认大小 | ||
| 448 | + "order": {"_count": "desc"} | ||
| 449 | + } | ||
| 450 | + } | ||
| 451 | + | ||
| 452 | + # 2. 高级模式:详细配置对象 | ||
| 453 | + elif isinstance(config, dict): | ||
| 454 | + field = config['field'] | ||
| 455 | + facet_type = config.get('type', 'terms') | ||
| 456 | + size = config.get('size', 10) | ||
| 457 | + agg_name = f"{field}_facet" | ||
| 458 | + | ||
| 459 | + if facet_type == 'terms': | ||
| 460 | + # Terms 聚合(分组统计) | ||
| 461 | + aggs[agg_name] = { | ||
| 462 | + "terms": { | ||
| 463 | + "field": field, | ||
| 464 | + "size": size, | ||
| 465 | + "order": {"_count": "desc"} | ||
| 466 | + } | ||
| 467 | + } | ||
| 468 | + | ||
| 469 | + elif facet_type == 'range': | ||
| 470 | + # Range 聚合(范围统计) | ||
| 471 | + ranges = config.get('ranges', []) | ||
| 472 | + if ranges: | ||
| 473 | + aggs[agg_name] = { | ||
| 474 | + "range": { | ||
| 475 | + "field": field, | ||
| 476 | + "ranges": ranges | ||
| 477 | + } | ||
| 478 | + } | ||
| 479 | + | ||
| 480 | + return aggs | ||
| 481 | +``` | ||
| 482 | + | ||
| 483 | +#### 2.3 更新主查询构建方法 | ||
| 484 | + | ||
| 485 | +**修改方法签名**:`build_query()` | ||
| 486 | + | ||
| 487 | +```python | ||
| 488 | +def build_query( | ||
| 489 | + self, | ||
| 490 | + query_text: str, | ||
| 491 | + query_vector: Optional[np.ndarray] = None, | ||
| 492 | + query_node: Optional[QueryNode] = None, | ||
| 493 | + filters: Optional[Dict[str, Any]] = None, | ||
| 494 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 495 | + size: int = 10, | ||
| 496 | + from_: int = 0, | ||
| 497 | + enable_knn: bool = True, | ||
| 498 | + knn_k: int = 50, | ||
| 499 | + knn_num_candidates: int = 200, | ||
| 500 | + min_score: Optional[float] = None | ||
| 501 | +) -> Dict[str, Any]: | ||
| 502 | + """构建完整的 ES 查询(重构版)""" | ||
| 503 | + # ... 实现 | ||
| 504 | + | ||
| 505 | + # 添加过滤器 | ||
| 506 | + if filters or range_filters: | ||
| 507 | + filter_clauses = self._build_filters(filters, range_filters) | ||
| 508 | + if filter_clauses: | ||
| 509 | + es_query["query"] = { | ||
| 510 | + "bool": { | ||
| 511 | + "must": [query_clause], | ||
| 512 | + "filter": filter_clauses | ||
| 513 | + } | ||
| 514 | + } | ||
| 515 | +``` | ||
| 516 | + | ||
| 517 | +### 3. 搜索执行层重构 | ||
| 518 | + | ||
| 519 | +**文件**:`search/searcher.py` | ||
| 520 | + | ||
| 521 | +**需要修改的方法**:`search()` | ||
| 522 | + | ||
| 523 | +**改进点**: | ||
| 524 | + | ||
| 525 | +1. 更新方法签名,接受 `range_filters` 参数 | ||
| 526 | +2. 使用新的 `build_facets` 方法替代旧的聚合逻辑 | ||
| 527 | +3. 标准化分面搜索结果 | ||
| 528 | + | ||
| 529 | +**关键代码片段**: | ||
| 530 | + | ||
| 531 | +```python | ||
| 532 | +def search( | ||
| 533 | + self, | ||
| 534 | + query: str, | ||
| 535 | + size: int = 10, | ||
| 536 | + from_: int = 0, | ||
| 537 | + filters: Optional[Dict[str, Any]] = None, | ||
| 538 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 539 | + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations | ||
| 540 | + min_score: Optional[float] = None, | ||
| 541 | + sort_by: Optional[str] = None, | ||
| 542 | + sort_order: Optional[str] = "desc", | ||
| 543 | + debug: bool = False, | ||
| 544 | + context: Optional[RequestContext] = None | ||
| 545 | +) -> SearchResult: | ||
| 546 | + """执行搜索(重构版)""" | ||
| 547 | + | ||
| 548 | + # ... 查询解析 ... | ||
| 549 | + | ||
| 550 | + # 构建 ES 查询 | ||
| 551 | + es_query = self.query_builder.build_multilang_query( | ||
| 552 | + parsed_query=parsed_query, | ||
| 553 | + query_vector=parsed_query.query_vector, | ||
| 554 | + query_node=query_node, | ||
| 555 | + filters=filters, | ||
| 556 | + range_filters=range_filters, # 新增 | ||
| 557 | + size=size, | ||
| 558 | + from_=from_, | ||
| 559 | + enable_knn=enable_embedding, | ||
| 560 | + min_score=min_score | ||
| 561 | + ) | ||
| 562 | + | ||
| 563 | + # 添加分面聚合 | ||
| 564 | + if facets: | ||
| 565 | + facet_aggs = self.query_builder.build_facets(facets) | ||
| 566 | + if facet_aggs: | ||
| 567 | + if "aggs" not in es_query: | ||
| 568 | + es_query["aggs"] = {} | ||
| 569 | + es_query["aggs"].update(facet_aggs) | ||
| 570 | + | ||
| 571 | + # ... 执行搜索 ... | ||
| 572 | + | ||
| 573 | + # 标准化分面结果 | ||
| 574 | + standardized_facets = self._standardize_facets( | ||
| 575 | + es_response.get('aggregations', {}), | ||
| 576 | + facets, | ||
| 577 | + filters | ||
| 578 | + ) | ||
| 579 | + | ||
| 580 | + return SearchResult( | ||
| 581 | + hits=hits, | ||
| 582 | + total=total_value, | ||
| 583 | + max_score=max_score, | ||
| 584 | + took_ms=int(total_duration), | ||
| 585 | + facets=standardized_facets, # 标准化格式 | ||
| 586 | + query_info=parsed_query.to_dict(), | ||
| 587 | + debug_info=debug_info | ||
| 588 | + ) | ||
| 589 | +``` | ||
| 590 | + | ||
| 591 | +**新增辅助方法**: | ||
| 592 | + | ||
| 593 | +```python | ||
| 594 | +def _standardize_facets( | ||
| 595 | + self, | ||
| 596 | + es_aggregations: Dict[str, Any], | ||
| 597 | + facet_configs: Optional[List[Union[str, Dict]]], | ||
| 598 | + current_filters: Optional[Dict[str, Any]] | ||
| 599 | +) -> Optional[List[Dict[str, Any]]]: | ||
| 600 | + """ | ||
| 601 | + 将 ES 聚合结果转换为标准化的分面格式。 | ||
| 602 | + | ||
| 603 | + Args: | ||
| 604 | + es_aggregations: ES 原始聚合结果 | ||
| 605 | + facet_configs: 分面配置列表 | ||
| 606 | + current_filters: 当前应用的过滤器 | ||
| 607 | + | ||
| 608 | + Returns: | ||
| 609 | + 标准化的分面结果列表 | ||
| 610 | + """ | ||
| 611 | + if not es_aggregations or not facet_configs: | ||
| 612 | + return None | ||
| 613 | + | ||
| 614 | + standardized_facets = [] | ||
| 615 | + | ||
| 616 | + for config in facet_configs: | ||
| 617 | + # 解析配置 | ||
| 618 | + if isinstance(config, str): | ||
| 619 | + field = config | ||
| 620 | + facet_type = "terms" | ||
| 621 | + else: | ||
| 622 | + field = config['field'] | ||
| 623 | + facet_type = config.get('type', 'terms') | ||
| 624 | + | ||
| 625 | + agg_name = f"{field}_facet" | ||
| 626 | + | ||
| 627 | + if agg_name not in es_aggregations: | ||
| 628 | + continue | ||
| 629 | + | ||
| 630 | + agg_result = es_aggregations[agg_name] | ||
| 631 | + | ||
| 632 | + # 构建标准化分面结果 | ||
| 633 | + facet = { | ||
| 634 | + "field": field, | ||
| 635 | + "label": self._get_field_label(field), # 从配置获取 | ||
| 636 | + "type": facet_type, | ||
| 637 | + "values": [] | ||
| 638 | + } | ||
| 639 | + | ||
| 640 | + # 获取当前字段的选中值 | ||
| 641 | + selected_values = set() | ||
| 642 | + if current_filters and field in current_filters: | ||
| 643 | + filter_value = current_filters[field] | ||
| 644 | + if isinstance(filter_value, list): | ||
| 645 | + selected_values = set(filter_value) | ||
| 646 | + else: | ||
| 647 | + selected_values = {filter_value} | ||
| 648 | + | ||
| 649 | + # 转换 buckets | ||
| 650 | + if 'buckets' in agg_result: | ||
| 651 | + for bucket in agg_result['buckets']: | ||
| 652 | + value = bucket.get('key') | ||
| 653 | + count = bucket.get('doc_count', 0) | ||
| 654 | + | ||
| 655 | + facet['values'].append({ | ||
| 656 | + "value": value, | ||
| 657 | + "label": str(value), # 可以从配置映射 | ||
| 658 | + "count": count, | ||
| 659 | + "selected": value in selected_values | ||
| 660 | + }) | ||
| 661 | + | ||
| 662 | + standardized_facets.append(facet) | ||
| 663 | + | ||
| 664 | + return standardized_facets | ||
| 665 | + | ||
| 666 | + | ||
| 667 | +def _get_field_label(self, field: str) -> str: | ||
| 668 | + """获取字段的显示标签""" | ||
| 669 | + # 从配置中获取字段标签 | ||
| 670 | + for field_config in self.config.fields: | ||
| 671 | + if field_config.name == field: | ||
| 672 | + # 假设配置中有 label 字段 | ||
| 673 | + return getattr(field_config, 'label', field) | ||
| 674 | + return field | ||
| 675 | +``` | ||
| 676 | + | ||
| 677 | +### 4. API 路由层更新 | ||
| 678 | + | ||
| 679 | +**文件**:`api/routes/search.py` | ||
| 680 | + | ||
| 681 | +**改进点**: | ||
| 682 | + | ||
| 683 | +1. 接受新的请求模型参数 | ||
| 684 | +2. 添加搜索建议端点(框架) | ||
| 685 | + | ||
| 686 | +**新增端点**: | ||
| 687 | + | ||
| 688 | +```python | ||
| 689 | +@router.get("/suggestions", response_model=SearchSuggestResponse) | ||
| 690 | +async def search_suggestions( | ||
| 691 | + q: str = Query(..., min_length=1, description="搜索查询"), | ||
| 692 | + size: int = Query(5, ge=1, le=20, description="建议数量"), | ||
| 693 | + types: str = Query("query", description="建议类型(逗号分隔)") | ||
| 694 | +): | ||
| 695 | + """ | ||
| 696 | + 获取搜索建议(自动补全)。 | ||
| 697 | + | ||
| 698 | + 功能说明: | ||
| 699 | + - 查询建议(query):基于历史搜索和热门搜索 | ||
| 700 | + - 商品建议(product):匹配的商品 | ||
| 701 | + - 类目建议(category):匹配的类目 | ||
| 702 | + - 品牌建议(brand):匹配的品牌 | ||
| 703 | + | ||
| 704 | + 注意:此功能暂未实现,仅返回框架响应。 | ||
| 705 | + """ | ||
| 706 | + import time | ||
| 707 | + start_time = time.time() | ||
| 708 | + | ||
| 709 | + # TODO: 实现搜索建议逻辑 | ||
| 710 | + # 1. 从搜索历史中获取建议 | ||
| 711 | + # 2. 从商品标题中匹配前缀 | ||
| 712 | + # 3. 从类目、品牌中匹配 | ||
| 713 | + | ||
| 714 | + # 临时返回空结果 | ||
| 715 | + suggestions = [] | ||
| 716 | + | ||
| 717 | + # 示例结构(暂不实现) | ||
| 718 | + # suggestions = [ | ||
| 719 | + # { | ||
| 720 | + # "text": "芭比娃娃", | ||
| 721 | + # "type": "query", | ||
| 722 | + # "highlight": "<em>芭</em>比娃娃", | ||
| 723 | + # "popularity": 850 | ||
| 724 | + # } | ||
| 725 | + # ] | ||
| 726 | + | ||
| 727 | + took_ms = int((time.time() - start_time) * 1000) | ||
| 728 | + | ||
| 729 | + return SearchSuggestResponse( | ||
| 730 | + query=q, | ||
| 731 | + suggestions=suggestions, | ||
| 732 | + took_ms=took_ms | ||
| 733 | + ) | ||
| 734 | + | ||
| 735 | + | ||
| 736 | +@router.get("/instant", response_model=SearchResponse) | ||
| 737 | +async def instant_search( | ||
| 738 | + q: str = Query(..., min_length=2, description="搜索查询"), | ||
| 739 | + size: int = Query(5, ge=1, le=20, description="结果数量") | ||
| 740 | +): | ||
| 741 | + """ | ||
| 742 | + 即时搜索(Instant Search)。 | ||
| 743 | + | ||
| 744 | + 功能说明: | ||
| 745 | + - 边输入边搜索,无需点击搜索按钮 | ||
| 746 | + - 返回简化的搜索结果 | ||
| 747 | + - 性能优化:缓存、限流 | ||
| 748 | + | ||
| 749 | + 注意:此功能暂未实现,调用标准搜索接口。 | ||
| 750 | + """ | ||
| 751 | + # TODO: 优化即时搜索性能 | ||
| 752 | + # 1. 添加防抖/节流 | ||
| 753 | + # 2. 实现结果缓存 | ||
| 754 | + # 3. 简化返回字段 | ||
| 755 | + | ||
| 756 | + # 临时使用标准搜索接口 | ||
| 757 | + from api.app import get_searcher | ||
| 758 | + searcher = get_searcher() | ||
| 759 | + | ||
| 760 | + result = searcher.search( | ||
| 761 | + query=q, | ||
| 762 | + size=size, | ||
| 763 | + from_=0 | ||
| 764 | + ) | ||
| 765 | + | ||
| 766 | + return SearchResponse( | ||
| 767 | + hits=result.hits, | ||
| 768 | + total=result.total, | ||
| 769 | + max_score=result.max_score, | ||
| 770 | + took_ms=result.took_ms, | ||
| 771 | + query_info=result.query_info | ||
| 772 | + ) | ||
| 773 | +``` | ||
| 774 | + | ||
| 775 | +### 5. 前端适配 | ||
| 776 | + | ||
| 777 | +**文件**:`frontend/static/js/app.js` | ||
| 778 | + | ||
| 779 | +**需要修改的地方**: | ||
| 780 | + | ||
| 781 | +1. **聚合参数改用简化配置**(第 57-87 行): | ||
| 782 | +```javascript | ||
| 783 | +// 旧的方式(直接 ES DSL) | ||
| 784 | +const aggregations = { | ||
| 785 | + "category_stats": { | ||
| 786 | + "terms": { | ||
| 787 | + "field": "categoryName_keyword", | ||
| 788 | + "size": 15 | ||
| 789 | + } | ||
| 790 | + } | ||
| 791 | +}; | ||
| 792 | + | ||
| 793 | +// 新的方式(简化配置) | ||
| 794 | +const facets = [ | ||
| 795 | + { | ||
| 796 | + "field": "categoryName_keyword", | ||
| 797 | + "size": 15, | ||
| 798 | + "type": "terms" | ||
| 799 | + }, | ||
| 800 | + { | ||
| 801 | + "field": "brandName_keyword", | ||
| 802 | + "size": 15, | ||
| 803 | + "type": "terms" | ||
| 804 | + }, | ||
| 805 | + { | ||
| 806 | + "field": "price", | ||
| 807 | + "type": "range", | ||
| 808 | + "ranges": [ | ||
| 809 | + {"key": "0-50", "to": 50}, | ||
| 810 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 811 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 812 | + {"key": "200+", "from": 200} | ||
| 813 | + ] | ||
| 814 | + } | ||
| 815 | +]; | ||
| 816 | +``` | ||
| 817 | + | ||
| 818 | +2. **过滤器使用新格式**(第 103 行): | ||
| 819 | +```javascript | ||
| 820 | +// 旧的方式 | ||
| 821 | +filters: { | ||
| 822 | + "price_ranges": ["0-50", "50-100"] // 硬编码 | ||
| 823 | +} | ||
| 824 | + | ||
| 825 | +// 新的方式 | ||
| 826 | +filters: { | ||
| 827 | + "categoryName_keyword": ["玩具"], | ||
| 828 | + "in_stock": true | ||
| 829 | +}, | ||
| 830 | +range_filters: { | ||
| 831 | + "price": {"gte": 50, "lte": 100} | ||
| 832 | +} | ||
| 833 | +``` | ||
| 834 | + | ||
| 835 | +3. **解析标准化的分面结果**(第 208-258 行): | ||
| 836 | +```javascript | ||
| 837 | +// 旧的方式(直接访问 ES 结构) | ||
| 838 | +if (aggregations.category_stats && aggregations.category_stats.buckets) { | ||
| 839 | + aggregations.category_stats.buckets.forEach(bucket => { | ||
| 840 | + // ... | ||
| 841 | + }); | ||
| 842 | +} | ||
| 843 | + | ||
| 844 | +// 新的方式(标准化格式) | ||
| 845 | +if (data.facets) { | ||
| 846 | + data.facets.forEach(facet => { | ||
| 847 | + if (facet.field === 'categoryName_keyword') { | ||
| 848 | + facet.values.forEach(facetValue => { | ||
| 849 | + const value = facetValue.value; | ||
| 850 | + const count = facetValue.count; | ||
| 851 | + const selected = facetValue.selected; | ||
| 852 | + // ... | ||
| 853 | + }); | ||
| 854 | + } | ||
| 855 | + }); | ||
| 856 | +} | ||
| 857 | +``` | ||
| 858 | + | ||
| 859 | + | ||
| 860 | +### 6. 测试代码更新 | ||
| 861 | + | ||
| 862 | +**文件**:`test_aggregation_api.py` | ||
| 863 | + | ||
| 864 | +**需要修改的地方**: | ||
| 865 | + | ||
| 866 | +1. 移除 `price_ranges` 硬编码测试(第 93 行) | ||
| 867 | +2. 使用新的 `range_filters` 格式 | ||
| 868 | +3. 使用新的 `facets` 配置 | ||
| 869 | + | ||
| 870 | +**新的测试代码**: | ||
| 871 | + | ||
| 872 | +```python | ||
| 873 | +def test_search_with_filters(): | ||
| 874 | + """测试新的过滤器格式""" | ||
| 875 | + test_request = { | ||
| 876 | + "query": "玩具", | ||
| 877 | + "size": 5, | ||
| 878 | + "filters": { | ||
| 879 | + "categoryName_keyword": ["玩具"] | ||
| 880 | + }, | ||
| 881 | + "range_filters": { | ||
| 882 | + "price": {"gte": 50, "lte": 100} | ||
| 883 | + } | ||
| 884 | + } | ||
| 885 | + # ... | ||
| 886 | + | ||
| 887 | +def test_search_with_facets(): | ||
| 888 | + """测试新的分面配置""" | ||
| 889 | + test_request = { | ||
| 890 | + "query": "玩具", | ||
| 891 | + "size": 20, | ||
| 892 | + "facets": [ | ||
| 893 | + { | ||
| 894 | + "field": "categoryName_keyword", | ||
| 895 | + "size": 15 | ||
| 896 | + }, | ||
| 897 | + { | ||
| 898 | + "field": "price", | ||
| 899 | + "type": "range", | ||
| 900 | + "ranges": [ | ||
| 901 | + {"key": "0-50", "to": 50}, | ||
| 902 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 903 | + ] | ||
| 904 | + } | ||
| 905 | + ] | ||
| 906 | + } | ||
| 907 | + # ... | ||
| 908 | +``` | ||
| 909 | + | ||
| 910 | +## 第三部分:实施步骤 | ||
| 911 | + | ||
| 912 | +### 阶段 1:后端模型层重构(高优先级) | ||
| 913 | + | ||
| 914 | +**任务清单**: | ||
| 915 | + | ||
| 916 | +- [ ] 更新 `api/models.py` | ||
| 917 | + - [ ] 定义 `RangeFilter` 模型 | ||
| 918 | + - [ ] 定义 `FacetConfig` 模型 | ||
| 919 | + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` | ||
| 920 | + - [ ] 移除 `aggregations` 参数 | ||
| 921 | + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 | ||
| 922 | + - [ ] 更新 `SearchResponse`,使用标准化分面格式 | ||
| 923 | + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) | ||
| 924 | + | ||
| 925 | +**验证方式**: | ||
| 926 | + | ||
| 927 | +- 运行 Pydantic 模型验证 | ||
| 928 | +- 检查 API 文档(`/docs`)是否正确生成 | ||
| 929 | + | ||
| 930 | +### 阶段 2:查询构建器重构(高优先级) | ||
| 931 | + | ||
| 932 | +**任务清单**: | ||
| 933 | + | ||
| 934 | +- [ ] 重构 `search/es_query_builder.py` | ||
| 935 | + - [ ] 移除 `price_ranges` 硬编码逻辑(第 205-233 行) | ||
| 936 | + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` | ||
| 937 | + - [ ] 移除 `add_dynamic_aggregations` 方法 | ||
| 938 | + - [ ] 重构 `add_aggregations` 为 `build_facets` | ||
| 939 | + - [ ] 更新 `build_query` 方法签名 | ||
| 940 | +- [ ] 更新 `search/multilang_query_builder.py`(如果需要) | ||
| 941 | + | ||
| 942 | +**验证方式**: | ||
| 943 | + | ||
| 944 | +- 编写单元测试验证过滤器构建逻辑 | ||
| 945 | +- 打印生成的 ES DSL,检查正确性 | ||
| 946 | + | ||
| 947 | +### 阶段 3:搜索执行层重构(高优先级) | ||
| 948 | + | ||
| 949 | +**任务清单**: | ||
| 950 | + | ||
| 951 | +- [ ] 更新 `search/searcher.py` | ||
| 952 | + - [ ] 更新 `search()` 方法签名 | ||
| 953 | + - [ ] 使用新的 `build_facets` 方法 | ||
| 954 | + - [ ] 实现 `_standardize_facets()` 辅助方法 | ||
| 955 | + - [ ] 实现 `_get_field_label()` 辅助方法 | ||
| 956 | + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 | ||
| 957 | + | ||
| 958 | +**验证方式**: | ||
| 959 | + | ||
| 960 | +- 编写集成测试 | ||
| 961 | +- 手动测试搜索功能 | ||
| 962 | + | ||
| 963 | +### 阶段 4:API 路由层更新(中优先级) | ||
| 964 | + | ||
| 965 | +**任务清单**: | ||
| 966 | + | ||
| 967 | +- [ ] 更新 `api/routes/search.py` | ||
| 968 | + - [ ] 更新 `/search/` 端点,接受新的请求参数 | ||
| 969 | + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) | ||
| 970 | + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) | ||
| 971 | + - [ ] 添加端点文档和示例 | ||
| 972 | + | ||
| 973 | +**验证方式**: | ||
| 974 | + | ||
| 975 | +- 使用 Swagger UI 测试端点 | ||
| 976 | +- 检查 API 文档完整性 | ||
| 977 | + | ||
| 978 | +### 阶段 5:前端适配(中优先级) | ||
| 979 | + | ||
| 980 | +**任务清单**: | ||
| 981 | + | ||
| 982 | +- [ ] 更新 `frontend/static/js/app.js` | ||
| 983 | + - [ ] 修改聚合参数为 `facets` 简化配置 | ||
| 984 | + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` | ||
| 985 | + - [ ] 更新 `displayAggregations()` 方法,解析标准化分面结果 | ||
| 986 | + - [ ] 添加范围过滤器 UI(如价格滑块) | ||
| 987 | + - [ ] 移除硬编码的 `price_ranges` | ||
| 988 | + | ||
| 989 | +**验证方式**: | ||
| 990 | + | ||
| 991 | +- 浏览器测试前端功能 | ||
| 992 | +- 检查网络请求和响应格式 | ||
| 993 | + | ||
| 994 | +### 阶段 6:测试代码更新(低优先级) | ||
| 995 | + | ||
| 996 | +**任务清单**: | ||
| 997 | + | ||
| 998 | +- [ ] 更新 `test_aggregation_api.py` | ||
| 999 | + - [ ] 移除 `price_ranges` 测试 | ||
| 1000 | + - [ ] 添加 `range_filters` 测试 | ||
| 1001 | + - [ ] 添加新的 `facets` 测试 | ||
| 1002 | +- [ ] 更新 `test_complete_search.py` | ||
| 1003 | +- [ ] 更新 `tests/integration/test_aggregation_api.py` | ||
| 1004 | +- [ ] 更新 `tests/unit/test_searcher.py` | ||
| 1005 | + | ||
| 1006 | +**验证方式**: | ||
| 1007 | + | ||
| 1008 | +- 运行所有测试,确保通过 | ||
| 1009 | +- 检查测试覆盖率 | ||
| 1010 | + | ||
| 1011 | +### 阶段 7:文档更新(低优先级) | ||
| 1012 | + | ||
| 1013 | +**任务清单**: | ||
| 1014 | + | ||
| 1015 | +- [ ] 撰写完整的 API 接口文档 | ||
| 1016 | +- [ ] 更新 `README.md` | ||
| 1017 | +- [ ] 更新 `USER_GUIDE.md` | ||
| 1018 | +- [ ] 添加接口使用示例 | ||
| 1019 | +- [ ] 添加迁移指南(旧接口 → 新接口) | ||
| 1020 | + | ||
| 1021 | +## 第四部分:API 使用示例 | ||
| 1022 | + | ||
| 1023 | +### 示例 1:简单搜索 | ||
| 1024 | + | ||
| 1025 | +```bash | ||
| 1026 | +POST /search/ | ||
| 1027 | +{ | ||
| 1028 | + "query": "芭比娃娃", | ||
| 1029 | + "size": 20 | ||
| 1030 | +} | ||
| 1031 | +``` | ||
| 1032 | + | ||
| 1033 | +### 示例 2:带过滤器的搜索 | ||
| 1034 | + | ||
| 1035 | +```bash | ||
| 1036 | +POST /search/ | ||
| 1037 | +{ | ||
| 1038 | + "query": "玩具", | ||
| 1039 | + "size": 20, | ||
| 1040 | + "filters": { | ||
| 1041 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 1042 | + "in_stock": true | ||
| 1043 | + }, | ||
| 1044 | + "range_filters": { | ||
| 1045 | + "price": {"gte": 50, "lte": 200} | ||
| 1046 | + } | ||
| 1047 | +} | ||
| 1048 | +``` | ||
| 1049 | + | ||
| 1050 | +### 示例 3:带分面搜索的请求 | ||
| 1051 | + | ||
| 1052 | +```bash | ||
| 1053 | +POST /search/ | ||
| 1054 | +{ | ||
| 1055 | + "query": "玩具", | ||
| 1056 | + "size": 20, | ||
| 1057 | + "facets": [ | ||
| 1058 | + { | ||
| 1059 | + "field": "categoryName_keyword", | ||
| 1060 | + "size": 15 | ||
| 1061 | + }, | ||
| 1062 | + { | ||
| 1063 | + "field": "brandName_keyword", | ||
| 1064 | + "size": 15 | ||
| 1065 | + }, | ||
| 1066 | + { | ||
| 1067 | + "field": "price", | ||
| 1068 | + "type": "range", | ||
| 1069 | + "ranges": [ | ||
| 1070 | + {"key": "0-50", "to": 50}, | ||
| 1071 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 1072 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 1073 | + {"key": "200+", "from": 200} | ||
| 1074 | + ] | ||
| 1075 | + } | ||
| 1076 | + ] | ||
| 1077 | +} | ||
| 1078 | +``` | ||
| 1079 | + | ||
| 1080 | +**响应示例**(标准化分面格式): | ||
| 1081 | + | ||
| 1082 | +```json | ||
| 1083 | +{ | ||
| 1084 | + "hits": [...], | ||
| 1085 | + "total": 118, | ||
| 1086 | + "max_score": 8.5, | ||
| 1087 | + "took_ms": 45, | ||
| 1088 | + "facets": [ | ||
| 1089 | + { | ||
| 1090 | + "field": "categoryName_keyword", | ||
| 1091 | + "label": "商品类目", | ||
| 1092 | + "type": "terms", | ||
| 1093 | + "values": [ | ||
| 1094 | + {"value": "玩具", "label": "玩具", "count": 85, "selected": false}, | ||
| 1095 | + {"value": "益智玩具", "label": "益智玩具", "count": 33, "selected": false} | ||
| 1096 | + ] | ||
| 1097 | + }, | ||
| 1098 | + { | ||
| 1099 | + "field": "price", | ||
| 1100 | + "label": "价格区间", | ||
| 1101 | + "type": "range", | ||
| 1102 | + "values": [ | ||
| 1103 | + {"value": "0-50", "label": "0-50元", "count": 23, "selected": false}, | ||
| 1104 | + {"value": "50-100", "label": "50-100元", "count": 45, "selected": false}, | ||
| 1105 | + {"value": "100-200", "label": "100-200元", "count": 38, "selected": false}, | ||
| 1106 | + {"value": "200+", "label": "200元以上", "count": 12, "selected": false} | ||
| 1107 | + ] | ||
| 1108 | + } | ||
| 1109 | + ] | ||
| 1110 | +} | ||
| 1111 | +``` | ||
| 1112 | + | ||
| 1113 | +### 示例 4:搜索建议(框架) | ||
| 1114 | + | ||
| 1115 | +```bash | ||
| 1116 | +GET /search/suggestions?q=芭&size=5 | ||
| 1117 | + | ||
| 1118 | +{ | ||
| 1119 | + "query": "芭", | ||
| 1120 | + "suggestions": [ | ||
| 1121 | + { | ||
| 1122 | + "text": "芭比娃娃", | ||
| 1123 | + "type": "query", | ||
| 1124 | + "highlight": "<em>芭</em>比娃娃", | ||
| 1125 | + "popularity": 850 | ||
| 1126 | + }, | ||
| 1127 | + { | ||
| 1128 | + "text": "芭比娃娃屋", | ||
| 1129 | + "type": "query", | ||
| 1130 | + "highlight": "<em>芭</em>比娃娃屋", | ||
| 1131 | + "popularity": 320 | ||
| 1132 | + } | ||
| 1133 | + ], | ||
| 1134 | + "took_ms": 5 | ||
| 1135 | +} | ||
| 1136 | +``` | ||
| 1137 | + | ||
| 1138 | +## 第五部分:向后兼容性 | ||
| 1139 | + | ||
| 1140 | +### 兼容策略 | ||
| 1141 | + | ||
| 1142 | +为保持向后兼容,在过渡期(1-2 个版本)内: | ||
| 1143 | + | ||
| 1144 | +1. **同时支持旧参数和新参数**: | ||
| 1145 | +```python | ||
| 1146 | +class SearchRequest(BaseModel): | ||
| 1147 | + # 新参数 | ||
| 1148 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 1149 | + facets: Optional[List[Union[str, FacetConfig]]] = None | ||
| 1150 | + | ||
| 1151 | + # 旧参数(标记为废弃) | ||
| 1152 | + aggregations: Optional[Dict[str, Any]] = Field( | ||
| 1153 | + None, | ||
| 1154 | + deprecated=True, | ||
| 1155 | + description="已废弃。请使用 'facets' 参数" | ||
| 1156 | + ) | ||
| 1157 | +``` | ||
| 1158 | + | ||
| 1159 | +2. **在后端自动转换旧格式**: | ||
| 1160 | +```python | ||
| 1161 | +# 在 searcher.py 中 | ||
| 1162 | +if request.aggregations and not request.facets: | ||
| 1163 | + # 将旧的 aggregations 转换为新的 facets | ||
| 1164 | + request.facets = self._convert_legacy_aggregations(request.aggregations) | ||
| 1165 | +``` | ||
| 1166 | + | ||
| 1167 | +3. **在响应中提供迁移提示**: | ||
| 1168 | +```python | ||
| 1169 | +if request.aggregations: | ||
| 1170 | + warnings.append({ | ||
| 1171 | + "type": "deprecation", | ||
| 1172 | + "message": "'aggregations' 参数已废弃,请使用 'facets' 参数", | ||
| 1173 | + "migration_guide": "https://docs.example.com/migration" | ||
| 1174 | + }) | ||
| 1175 | +``` | ||
| 1176 | + | ||
| 1177 | + | ||
| 1178 | +### 迁移时间线 | ||
| 1179 | + | ||
| 1180 | +- **v3.0**(当前版本):发布新接口,旧接口标记为废弃 | ||
| 1181 | +- **v3.1**(1 个月后):移除旧接口的自动转换 | ||
| 1182 | +- **v4.0**(3 个月后):完全移除旧接口 | ||
| 1183 | + | ||
| 1184 | +## 第六部分:风险评估与缓解 | ||
| 1185 | + | ||
| 1186 | +### 风险点 | ||
| 1187 | + | ||
| 1188 | +1. **破坏性变更风险**: | ||
| 1189 | + | ||
| 1190 | + - 风险:现有客户代码可能依赖旧接口 | ||
| 1191 | + - 缓解:提供向后兼容层,发布详细迁移指南 | ||
| 1192 | + | ||
| 1193 | +2. **性能影响风险**: | ||
| 1194 | + | ||
| 1195 | + - 风险:新的标准化处理可能增加延迟 | ||
| 1196 | + - 缓解:添加性能测试,优化关键路径 | ||
| 1197 | + | ||
| 1198 | +3. **测试覆盖不足风险**: | ||
| 1199 | + | ||
| 1200 | + - 风险:重构可能引入新 bug | ||
| 1201 | + - 缓解:全面的单元测试和集成测试 | ||
| 1202 | + | ||
| 1203 | +### 验收标准 | ||
| 1204 | + | ||
| 1205 | +- [ ] 所有单元测试通过 | ||
| 1206 | +- [ ] 所有集成测试通过 | ||
| 1207 | +- [ ] API 文档完整且准确 | ||
| 1208 | +- [ ] 性能无明显下降(< 10% 延迟增加) | ||
| 1209 | +- [ ] 前端功能正常工作 | ||
| 1210 | +- [ ] 提供完整的迁移指南 | ||
| 1211 | + | ||
| 1212 | +## 总结 | ||
| 1213 | + | ||
| 1214 | +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: | ||
| 1215 | + | ||
| 1216 | +1. ✅ 移除硬编码的 price_ranges 逻辑 | ||
| 1217 | +2. ✅ 实现结构化的过滤参数(filters + range_filters) | ||
| 1218 | +3. ✅ 简化聚合参数接口,不暴露 ES DSL | ||
| 1219 | +4. ✅ 标准化分面搜索响应格式 | ||
| 1220 | +5. ✅ 添加搜索建议功能框架(暂不实现) | ||
| 1221 | + | ||
| 1222 | +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,为未来功能扩展奠定基础。 | ||
| 0 | \ No newline at end of file | 1223 | \ No newline at end of file |
.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md
0 → 100644
| @@ -0,0 +1,1402 @@ | @@ -0,0 +1,1402 @@ | ||
| 1 | +<!-- 42918612-89ae-45a6-9ccb-e2b0df0a7aec 6bb705f4-0105-4c72-8a0b-a067d2e83dc0 --> | ||
| 2 | +# 搜索引擎 API 接口重构实施计划 | ||
| 3 | + | ||
| 4 | +## 第一部分:现状分析 | ||
| 5 | + | ||
| 6 | +### 1. 当前实现存在的问题 | ||
| 7 | + | ||
| 8 | +#### 问题 1:硬编码的价格范围过滤 | ||
| 9 | + | ||
| 10 | +**位置**:`search/es_query_builder.py` 第 205-233 行 | ||
| 11 | + | ||
| 12 | +**问题描述**: | ||
| 13 | + | ||
| 14 | +```python | ||
| 15 | +if field == 'price_ranges': | ||
| 16 | + # 硬编码特定字符串值 | ||
| 17 | + if price_range == '0-50': | ||
| 18 | + price_ranges.append({"lt": 50}) | ||
| 19 | + elif price_range == '50-100': | ||
| 20 | + price_ranges.append({"gte": 50, "lt": 100}) | ||
| 21 | + # ... | ||
| 22 | +``` | ||
| 23 | + | ||
| 24 | +**影响**: | ||
| 25 | + | ||
| 26 | +- 只支持 `price` 字段,无法扩展到其他数值字段 | ||
| 27 | +- 范围值硬编码,无法根据业务需求调整 | ||
| 28 | +- 不符合 SaaS 系统的通用性要求 | ||
| 29 | + | ||
| 30 | +#### 问题 2:聚合参数直接暴露 ES DSL | ||
| 31 | + | ||
| 32 | +**位置**: | ||
| 33 | + | ||
| 34 | +- `api/models.py` 第 17 行:`aggregations: Optional[Dict[str, Any]]` | ||
| 35 | +- `search/es_query_builder.py` 第 298-319 行:`add_dynamic_aggregations` | ||
| 36 | +- `frontend/static/js/app.js` 第 57-87 行:前端硬编码 ES DSL | ||
| 37 | + | ||
| 38 | +**问题描述**: | ||
| 39 | + | ||
| 40 | +前端需要了解 Elasticsearch 的聚合语法: | ||
| 41 | + | ||
| 42 | +```javascript | ||
| 43 | +const aggregations = { | ||
| 44 | + "category_stats": { | ||
| 45 | + "terms": { | ||
| 46 | + "field": "categoryName_keyword", | ||
| 47 | + "size": 15 | ||
| 48 | + } | ||
| 49 | + }, | ||
| 50 | + "price_ranges": { | ||
| 51 | + "range": { | ||
| 52 | + "field": "price", | ||
| 53 | + "ranges": [ | ||
| 54 | + {"key": "0-50", "to": 50}, | ||
| 55 | + // ... | ||
| 56 | + ] | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | +}; | ||
| 60 | +``` | ||
| 61 | + | ||
| 62 | +**影响**: | ||
| 63 | + | ||
| 64 | +- 前端需要了解 ES 语法,增加集成难度 | ||
| 65 | +- 不符合 SaaS 产品易用性原则 | ||
| 66 | +- 难以进行参数验证和文档生成 | ||
| 67 | + | ||
| 68 | +#### 问题 3:分面搜索结果格式不统一 | ||
| 69 | + | ||
| 70 | +**位置**:`frontend/static/js/app.js` 第 208-258 行 | ||
| 71 | + | ||
| 72 | +**问题描述**: | ||
| 73 | + | ||
| 74 | +- 直接返回 ES 原始格式(`buckets` 结构) | ||
| 75 | +- 前端需要知道不同聚合类型的响应结构 | ||
| 76 | +- 没有统一的分面结果模型 | ||
| 77 | + | ||
| 78 | +**影响**: | ||
| 79 | + | ||
| 80 | +- 前端解析逻辑复杂 | ||
| 81 | +- 不同类型的聚合处理方式不一致 | ||
| 82 | +- 难以扩展新的聚合类型 | ||
| 83 | + | ||
| 84 | +#### 问题 4:缺少搜索建议功能 | ||
| 85 | + | ||
| 86 | +**当前状态**:完全没有实现 | ||
| 87 | + | ||
| 88 | +**需求**: | ||
| 89 | + | ||
| 90 | +- 自动补全(Autocomplete) | ||
| 91 | +- 搜索建议(Suggestions) | ||
| 92 | +- 搜索即时反馈(Instant Search) | ||
| 93 | + | ||
| 94 | +### 2. 依赖关系分析 | ||
| 95 | + | ||
| 96 | +**影响范围**: | ||
| 97 | + | ||
| 98 | +1. **后端模型层**:`api/models.py` | ||
| 99 | +2. **查询构建层**:`search/es_query_builder.py` | ||
| 100 | +3. **搜索执行层**:`search/searcher.py` | ||
| 101 | +4. **API 路由层**:`api/routes/search.py` | ||
| 102 | +5. **前端代码**:`frontend/static/js/app.js` | ||
| 103 | + | ||
| 104 | +## 第二部分:优化方案设计 | ||
| 105 | + | ||
| 106 | +### 方案概述 | ||
| 107 | + | ||
| 108 | +采用**结构化过滤参数方案(方案 A 的简化版)**: | ||
| 109 | + | ||
| 110 | +- 分离 `filters`(精确匹配)和 `range_filters`(范围过滤) | ||
| 111 | +- **不支持单字段多个不连续范围**,简化设计 | ||
| 112 | +- 标准化聚合参数,使用简化的接口 | ||
| 113 | +- 统一分面搜索响应格式 | ||
| 114 | +- **完全重构,不保留旧接口和兼容代码** | ||
| 115 | + | ||
| 116 | +### 1. 新的请求模型设计 | ||
| 117 | + | ||
| 118 | +#### 1.1 核心模型定义 | ||
| 119 | + | ||
| 120 | +**文件**:`api/models.py` | ||
| 121 | + | ||
| 122 | +```python | ||
| 123 | +from pydantic import BaseModel, Field, field_validator | ||
| 124 | +from typing import List, Dict, Any, Optional, Union, Literal | ||
| 125 | + | ||
| 126 | + | ||
| 127 | +class RangeFilter(BaseModel): | ||
| 128 | + """数值范围过滤器""" | ||
| 129 | + gte: Optional[float] = Field(None, description="大于等于 (>=)") | ||
| 130 | + gt: Optional[float] = Field(None, description="大于 (>)") | ||
| 131 | + lte: Optional[float] = Field(None, description="小于等于 (<=)") | ||
| 132 | + lt: Optional[float] = Field(None, description="小于 (<)") | ||
| 133 | + | ||
| 134 | + @field_validator('*') | ||
| 135 | + def check_at_least_one(cls, v, info): | ||
| 136 | + """确保至少指定一个边界""" | ||
| 137 | + values = info.data | ||
| 138 | + if not any([values.get('gte'), values.get('gt'), | ||
| 139 | + values.get('lte'), values.get('lt')]): | ||
| 140 | + raise ValueError('至少需要指定一个范围边界') | ||
| 141 | + return v | ||
| 142 | + | ||
| 143 | + class Config: | ||
| 144 | + json_schema_extra = { | ||
| 145 | + "examples": [ | ||
| 146 | + {"gte": 50, "lte": 200}, | ||
| 147 | + {"gt": 100}, | ||
| 148 | + {"lt": 50} | ||
| 149 | + ] | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + | ||
| 153 | +class FacetConfig(BaseModel): | ||
| 154 | + """分面配置(简化版)""" | ||
| 155 | + field: str = Field(..., description="分面字段名") | ||
| 156 | + size: int = Field(10, ge=1, le=100, description="返回的分面值数量") | ||
| 157 | + type: Literal["terms", "range"] = Field("terms", description="分面类型") | ||
| 158 | + ranges: Optional[List[Dict[str, Any]]] = Field( | ||
| 159 | + None, | ||
| 160 | + description="范围分面的范围定义(仅当 type='range' 时需要)" | ||
| 161 | + ) | ||
| 162 | + | ||
| 163 | + class Config: | ||
| 164 | + json_schema_extra = { | ||
| 165 | + "examples": [ | ||
| 166 | + { | ||
| 167 | + "field": "categoryName_keyword", | ||
| 168 | + "size": 15, | ||
| 169 | + "type": "terms" | ||
| 170 | + }, | ||
| 171 | + { | ||
| 172 | + "field": "price", | ||
| 173 | + "size": 4, | ||
| 174 | + "type": "range", | ||
| 175 | + "ranges": [ | ||
| 176 | + {"key": "0-50", "to": 50}, | ||
| 177 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 178 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 179 | + {"key": "200+", "from": 200} | ||
| 180 | + ] | ||
| 181 | + } | ||
| 182 | + ] | ||
| 183 | + } | ||
| 184 | + | ||
| 185 | + | ||
| 186 | +class SearchRequest(BaseModel): | ||
| 187 | + """搜索请求模型(重构版)""" | ||
| 188 | + | ||
| 189 | + # 基础搜索参数 | ||
| 190 | + query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)") | ||
| 191 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 192 | + from_: int = Field(0, ge=0, alias="from", description="分页偏移量") | ||
| 193 | + | ||
| 194 | + # 过滤器 - 精确匹配和多值匹配 | ||
| 195 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = Field( | ||
| 196 | + None, | ||
| 197 | + description="精确匹配过滤器。单值表示精确匹配,数组表示 OR 匹配(匹配任意一个值)", | ||
| 198 | + json_schema_extra={ | ||
| 199 | + "examples": [ | ||
| 200 | + { | ||
| 201 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 202 | + "brandName_keyword": "乐高", | ||
| 203 | + "in_stock": True | ||
| 204 | + } | ||
| 205 | + ] | ||
| 206 | + } | ||
| 207 | + ) | ||
| 208 | + | ||
| 209 | + # 范围过滤器 - 数值范围 | ||
| 210 | + range_filters: Optional[Dict[str, RangeFilter]] = Field( | ||
| 211 | + None, | ||
| 212 | + description="数值范围过滤器。支持 gte, gt, lte, lt 操作符", | ||
| 213 | + json_schema_extra={ | ||
| 214 | + "examples": [ | ||
| 215 | + { | ||
| 216 | + "price": {"gte": 50, "lte": 200}, | ||
| 217 | + "days_since_last_update": {"lte": 30} | ||
| 218 | + } | ||
| 219 | + ] | ||
| 220 | + } | ||
| 221 | + ) | ||
| 222 | + | ||
| 223 | + # 排序 | ||
| 224 | + sort_by: Optional[str] = Field(None, description="排序字段名(如 'price', 'create_time')") | ||
| 225 | + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)") | ||
| 226 | + | ||
| 227 | + # 分面搜索 - 简化接口 | ||
| 228 | + facets: Optional[List[Union[str, FacetConfig]]] = Field( | ||
| 229 | + None, | ||
| 230 | + description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象", | ||
| 231 | + json_schema_extra={ | ||
| 232 | + "examples": [ | ||
| 233 | + # 简单模式:只指定字段名,使用默认配置 | ||
| 234 | + ["categoryName_keyword", "brandName_keyword"], | ||
| 235 | + # 高级模式:详细配置 | ||
| 236 | + [ | ||
| 237 | + {"field": "categoryName_keyword", "size": 15}, | ||
| 238 | + { | ||
| 239 | + "field": "price", | ||
| 240 | + "type": "range", | ||
| 241 | + "ranges": [ | ||
| 242 | + {"key": "0-50", "to": 50}, | ||
| 243 | + {"key": "50-100", "from": 50, "to": 100} | ||
| 244 | + ] | ||
| 245 | + } | ||
| 246 | + ] | ||
| 247 | + ] | ||
| 248 | + } | ||
| 249 | + ) | ||
| 250 | + | ||
| 251 | + # 高级选项 | ||
| 252 | + min_score: Optional[float] = Field(None, ge=0, description="最小相关性分数阈值") | ||
| 253 | + highlight: bool = Field(False, description="是否高亮搜索关键词(暂不实现)") | ||
| 254 | + debug: bool = Field(False, description="是否返回调试信息") | ||
| 255 | + | ||
| 256 | + # 个性化参数(预留) | ||
| 257 | + user_id: Optional[str] = Field(None, description="用户ID,用于个性化搜索和推荐") | ||
| 258 | + session_id: Optional[str] = Field(None, description="会话ID,用于搜索分析") | ||
| 259 | + | ||
| 260 | + | ||
| 261 | +class ImageSearchRequest(BaseModel): | ||
| 262 | + """图片搜索请求模型""" | ||
| 263 | + image_url: str = Field(..., description="查询图片的 URL") | ||
| 264 | + size: int = Field(10, ge=1, le=100, description="返回结果数量") | ||
| 265 | + filters: Optional[Dict[str, Union[str, int, bool, List[Union[str, int]]]]] = None | ||
| 266 | + range_filters: Optional[Dict[str, RangeFilter]] = None | ||
| 267 | + | ||
| 268 | + | ||
| 269 | +class SearchSuggestRequest(BaseModel): | ||
| 270 | + """搜索建议请求模型(框架,暂不实现)""" | ||
| 271 | + query: str = Field(..., min_length=1, description="搜索查询字符串") | ||
| 272 | + size: int = Field(5, ge=1, le=20, description="返回建议数量") | ||
| 273 | + types: List[Literal["query", "product", "category", "brand"]] = Field( | ||
| 274 | + ["query"], | ||
| 275 | + description="建议类型:query(查询建议), product(商品建议), category(类目建议), brand(品牌建议)" | ||
| 276 | + ) | ||
| 277 | +``` | ||
| 278 | + | ||
| 279 | +#### 1.2 响应模型定义 | ||
| 280 | + | ||
| 281 | +```python | ||
| 282 | +class FacetValue(BaseModel): | ||
| 283 | + """分面值""" | ||
| 284 | + value: Union[str, int, float] = Field(..., description="分面值") | ||
| 285 | + label: Optional[str] = Field(None, description="显示标签(如果与 value 不同)") | ||
| 286 | + count: int = Field(..., description="匹配的文档数量") | ||
| 287 | + selected: bool = Field(False, description="是否已选中(当前过滤器中)") | ||
| 288 | + | ||
| 289 | + | ||
| 290 | +class FacetResult(BaseModel): | ||
| 291 | + """分面结果(标准化格式)""" | ||
| 292 | + field: str = Field(..., description="字段名") | ||
| 293 | + label: str = Field(..., description="分面显示名称") | ||
| 294 | + type: Literal["terms", "range"] = Field(..., description="分面类型") | ||
| 295 | + values: List[FacetValue] = Field(..., description="分面值列表") | ||
| 296 | + total_count: Optional[int] = Field(None, description="该字段的总文档数") | ||
| 297 | + | ||
| 298 | + | ||
| 299 | +class SearchResponse(BaseModel): | ||
| 300 | + """搜索响应模型(重构版)""" | ||
| 301 | + | ||
| 302 | + # 核心结果 | ||
| 303 | + hits: List[Dict[str, Any]] = Field(..., description="搜索结果列表") | ||
| 304 | + total: int = Field(..., description="匹配的总文档数") | ||
| 305 | + max_score: float = Field(..., description="最高相关性分数") | ||
| 306 | + | ||
| 307 | + # 分面搜索结果(标准化格式) | ||
| 308 | + facets: Optional[List[FacetResult]] = Field( | ||
| 309 | + None, | ||
| 310 | + description="分面统计结果(标准化格式)" | ||
| 311 | + ) | ||
| 312 | + | ||
| 313 | + # 查询信息 | ||
| 314 | + query_info: Dict[str, Any] = Field( | ||
| 315 | + default_factory=dict, | ||
| 316 | + description="查询处理信息(原始查询、改写、语言检测、翻译等)" | ||
| 317 | + ) | ||
| 318 | + | ||
| 319 | + # 推荐与建议(预留) | ||
| 320 | + related_queries: Optional[List[str]] = Field(None, description="相关搜索查询") | ||
| 321 | + | ||
| 322 | + # 性能指标 | ||
| 323 | + took_ms: int = Field(..., description="搜索总耗时(毫秒)") | ||
| 324 | + performance_info: Optional[Dict[str, Any]] = Field(None, description="详细性能信息") | ||
| 325 | + | ||
| 326 | + # 调试信息 | ||
| 327 | + debug_info: Optional[Dict[str, Any]] = Field(None, description="调试信息(仅当 debug=True)") | ||
| 328 | + | ||
| 329 | + | ||
| 330 | +class SearchSuggestResponse(BaseModel): | ||
| 331 | + """搜索建议响应模型(框架,暂不实现)""" | ||
| 332 | + query: str = Field(..., description="原始查询") | ||
| 333 | + suggestions: List[Dict[str, Any]] = Field(..., description="建议列表") | ||
| 334 | + took_ms: int = Field(..., description="耗时(毫秒)") | ||
| 335 | +``` | ||
| 336 | + | ||
| 337 | +### 2. 查询构建器重构 | ||
| 338 | + | ||
| 339 | +#### 2.1 移除硬编码的 price_ranges 逻辑 | ||
| 340 | + | ||
| 341 | +**文件**:`search/es_query_builder.py` | ||
| 342 | + | ||
| 343 | +**需要修改的方法**:`_build_filters(self, filters, range_filters)` | ||
| 344 | + | ||
| 345 | +**改进点**: | ||
| 346 | + | ||
| 347 | +1. **完全移除** `if field == 'price_ranges'` 的特殊处理代码 | ||
| 348 | +2. 分离 filters 和 range_filters 的处理逻辑 | ||
| 349 | +3. 添加字段类型验证(利用配置系统) | ||
| 350 | + | ||
| 351 | +**新的实现逻辑**: | ||
| 352 | + | ||
| 353 | +```python | ||
| 354 | +def _build_filters( | ||
| 355 | + self, | ||
| 356 | + filters: Optional[Dict[str, Any]] = None, | ||
| 357 | + range_filters: Optional[Dict[str, Any]] = None | ||
| 358 | +) -> List[Dict[str, Any]]: | ||
| 359 | + """ | ||
| 360 | + 构建过滤子句(重构版)。 | ||
| 361 | + | ||
| 362 | + Args: | ||
| 363 | + filters: 精确匹配过滤器字典 | ||
| 364 | + range_filters: 范围过滤器字典 | ||
| 365 | + | ||
| 366 | + Returns: | ||
| 367 | + ES filter子句列表 | ||
| 368 | + """ | ||
| 369 | + filter_clauses = [] | ||
| 370 | + | ||
| 371 | + # 1. 处理精确匹配过滤 | ||
| 372 | + if filters: | ||
| 373 | + for field, value in filters.items(): | ||
| 374 | + if isinstance(value, list): | ||
| 375 | + # 多值匹配(OR) | ||
| 376 | + filter_clauses.append({ | ||
| 377 | + "terms": {field: value} | ||
| 378 | + }) | ||
| 379 | + else: | ||
| 380 | + # 单值精确匹配 | ||
| 381 | + filter_clauses.append({ | ||
| 382 | + "term": {field: value} | ||
| 383 | + }) | ||
| 384 | + | ||
| 385 | + # 2. 处理范围过滤 | ||
| 386 | + if range_filters: | ||
| 387 | + for field, range_spec in range_filters.items(): | ||
| 388 | + # 构建范围查询 | ||
| 389 | + range_conditions = {} | ||
| 390 | + if isinstance(range_spec, dict): | ||
| 391 | + for op in ['gte', 'gt', 'lte', 'lt']: | ||
| 392 | + if op in range_spec and range_spec[op] is not None: | ||
| 393 | + range_conditions[op] = range_spec[op] | ||
| 394 | + | ||
| 395 | + if range_conditions: | ||
| 396 | + filter_clauses.append({ | ||
| 397 | + "range": {field: range_conditions} | ||
| 398 | + }) | ||
| 399 | + | ||
| 400 | + return filter_clauses | ||
| 401 | +``` | ||
| 402 | + | ||
| 403 | +#### 2.2 优化聚合参数接口 | ||
| 404 | + | ||
| 405 | +**需要完全移除的方法**: | ||
| 406 | + | ||
| 407 | +- `add_dynamic_aggregations` - 直接暴露 ES DSL,完全删除 | ||
| 408 | + | ||
| 409 | +**需要重构的方法**: | ||
| 410 | + | ||
| 411 | +- `add_aggregations` → 重构为 `build_facets` | ||
| 412 | + | ||
| 413 | +**新增方法**:`build_facets(self, facet_configs)` | ||
| 414 | + | ||
| 415 | +**新的实现逻辑**: | ||
| 416 | + | ||
| 417 | +```python | ||
| 418 | +def build_facets( | ||
| 419 | + self, | ||
| 420 | + facet_configs: Optional[List[Union[str, Dict[str, Any]]]] = None | ||
| 421 | +) -> Dict[str, Any]: | ||
| 422 | + """ | ||
| 423 | + 构建分面聚合(重构版)。 | ||
| 424 | + | ||
| 425 | + Args: | ||
| 426 | + facet_configs: 分面配置列表。可以是: | ||
| 427 | + - 字符串列表:字段名,使用默认配置 | ||
| 428 | + - 配置对象列表:详细的分面配置 | ||
| 429 | + | ||
| 430 | + Returns: | ||
| 431 | + ES aggregations字典 | ||
| 432 | + """ | ||
| 433 | + if not facet_configs: | ||
| 434 | + return {} | ||
| 435 | + | ||
| 436 | + aggs = {} | ||
| 437 | + | ||
| 438 | + for config in facet_configs: | ||
| 439 | + # 1. 简单模式:只有字段名 | ||
| 440 | + if isinstance(config, str): | ||
| 441 | + field = config | ||
| 442 | + agg_name = f"{field}_facet" | ||
| 443 | + aggs[agg_name] = { | ||
| 444 | + "terms": { | ||
| 445 | + "field": field, | ||
| 446 | + "size": 10, # 默认大小 | ||
| 447 | + "order": {"_count": "desc"} | ||
| 448 | + } | ||
| 449 | + } | ||
| 450 | + | ||
| 451 | + # 2. 高级模式:详细配置对象 | ||
| 452 | + elif isinstance(config, dict): | ||
| 453 | + field = config['field'] | ||
| 454 | + facet_type = config.get('type', 'terms') | ||
| 455 | + size = config.get('size', 10) | ||
| 456 | + agg_name = f"{field}_facet" | ||
| 457 | + | ||
| 458 | + if facet_type == 'terms': | ||
| 459 | + # Terms 聚合(分组统计) | ||
| 460 | + aggs[agg_name] = { | ||
| 461 | + "terms": { | ||
| 462 | + "field": field, | ||
| 463 | + "size": size, | ||
| 464 | + "order": {"_count": "desc"} | ||
| 465 | + } | ||
| 466 | + } | ||
| 467 | + | ||
| 468 | + elif facet_type == 'range': | ||
| 469 | + # Range 聚合(范围统计) | ||
| 470 | + ranges = config.get('ranges', []) | ||
| 471 | + if ranges: | ||
| 472 | + aggs[agg_name] = { | ||
| 473 | + "range": { | ||
| 474 | + "field": field, | ||
| 475 | + "ranges": ranges | ||
| 476 | + } | ||
| 477 | + } | ||
| 478 | + | ||
| 479 | + return aggs | ||
| 480 | +``` | ||
| 481 | + | ||
| 482 | +#### 2.3 更新主查询构建方法 | ||
| 483 | + | ||
| 484 | +**修改方法签名**:`build_query()` | ||
| 485 | + | ||
| 486 | +```python | ||
| 487 | +def build_query( | ||
| 488 | + self, | ||
| 489 | + query_text: str, | ||
| 490 | + query_vector: Optional[np.ndarray] = None, | ||
| 491 | + query_node: Optional[QueryNode] = None, | ||
| 492 | + filters: Optional[Dict[str, Any]] = None, | ||
| 493 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 494 | + size: int = 10, | ||
| 495 | + from_: int = 0, | ||
| 496 | + enable_knn: bool = True, | ||
| 497 | + knn_k: int = 50, | ||
| 498 | + knn_num_candidates: int = 200, | ||
| 499 | + min_score: Optional[float] = None | ||
| 500 | +) -> Dict[str, Any]: | ||
| 501 | + """构建完整的 ES 查询(重构版)""" | ||
| 502 | + # ... 实现 | ||
| 503 | + | ||
| 504 | + # 添加过滤器 | ||
| 505 | + if filters or range_filters: | ||
| 506 | + filter_clauses = self._build_filters(filters, range_filters) | ||
| 507 | + if filter_clauses: | ||
| 508 | + es_query["query"] = { | ||
| 509 | + "bool": { | ||
| 510 | + "must": [query_clause], | ||
| 511 | + "filter": filter_clauses | ||
| 512 | + } | ||
| 513 | + } | ||
| 514 | +``` | ||
| 515 | + | ||
| 516 | +### 3. 搜索执行层重构 | ||
| 517 | + | ||
| 518 | +**文件**:`search/searcher.py` | ||
| 519 | + | ||
| 520 | +**需要修改的方法**:`search()` | ||
| 521 | + | ||
| 522 | +**改进点**: | ||
| 523 | + | ||
| 524 | +1. 更新方法签名,接受 `range_filters` 和 `facets` 参数 | ||
| 525 | +2. **完全移除** `aggregations` 参数支持 | ||
| 526 | +3. 使用新的 `build_facets` 方法替代旧的聚合逻辑 | ||
| 527 | +4. 标准化分面搜索结果 | ||
| 528 | + | ||
| 529 | +**关键代码片段**: | ||
| 530 | + | ||
| 531 | +```python | ||
| 532 | +def search( | ||
| 533 | + self, | ||
| 534 | + query: str, | ||
| 535 | + size: int = 10, | ||
| 536 | + from_: int = 0, | ||
| 537 | + filters: Optional[Dict[str, Any]] = None, | ||
| 538 | + range_filters: Optional[Dict[str, Any]] = None, # 新增 | ||
| 539 | + facets: Optional[List[Union[str, Dict]]] = None, # 替代 aggregations | ||
| 540 | + min_score: Optional[float] = None, | ||
| 541 | + sort_by: Optional[str] = None, | ||
| 542 | + sort_order: Optional[str] = "desc", | ||
| 543 | + debug: bool = False, | ||
| 544 | + context: Optional[RequestContext] = None | ||
| 545 | +) -> SearchResult: | ||
| 546 | + """执行搜索(重构版)""" | ||
| 547 | + | ||
| 548 | + # ... 查询解析 ... | ||
| 549 | + | ||
| 550 | + # 构建 ES 查询 | ||
| 551 | + es_query = self.query_builder.build_multilang_query( | ||
| 552 | + parsed_query=parsed_query, | ||
| 553 | + query_vector=parsed_query.query_vector, | ||
| 554 | + query_node=query_node, | ||
| 555 | + filters=filters, | ||
| 556 | + range_filters=range_filters, # 新增 | ||
| 557 | + size=size, | ||
| 558 | + from_=from_, | ||
| 559 | + enable_knn=enable_embedding, | ||
| 560 | + min_score=min_score | ||
| 561 | + ) | ||
| 562 | + | ||
| 563 | + # 添加分面聚合(完全替代旧的 aggregations 逻辑) | ||
| 564 | + if facets: | ||
| 565 | + facet_aggs = self.query_builder.build_facets(facets) | ||
| 566 | + if facet_aggs: | ||
| 567 | + if "aggs" not in es_query: | ||
| 568 | + es_query["aggs"] = {} | ||
| 569 | + es_query["aggs"].update(facet_aggs) | ||
| 570 | + | ||
| 571 | + # ... 执行搜索 ... | ||
| 572 | + | ||
| 573 | + # 标准化分面结果 | ||
| 574 | + standardized_facets = self._standardize_facets( | ||
| 575 | + es_response.get('aggregations', {}), | ||
| 576 | + facets, | ||
| 577 | + filters | ||
| 578 | + ) | ||
| 579 | + | ||
| 580 | + return SearchResult( | ||
| 581 | + hits=hits, | ||
| 582 | + total=total_value, | ||
| 583 | + max_score=max_score, | ||
| 584 | + took_ms=int(total_duration), | ||
| 585 | + facets=standardized_facets, # 标准化格式 | ||
| 586 | + query_info=parsed_query.to_dict(), | ||
| 587 | + debug_info=debug_info | ||
| 588 | + ) | ||
| 589 | +``` | ||
| 590 | + | ||
| 591 | +**新增辅助方法**: | ||
| 592 | + | ||
| 593 | +```python | ||
| 594 | +def _standardize_facets( | ||
| 595 | + self, | ||
| 596 | + es_aggregations: Dict[str, Any], | ||
| 597 | + facet_configs: Optional[List[Union[str, Dict]]], | ||
| 598 | + current_filters: Optional[Dict[str, Any]] | ||
| 599 | +) -> Optional[List[Dict[str, Any]]]: | ||
| 600 | + """ | ||
| 601 | + 将 ES 聚合结果转换为标准化的分面格式。 | ||
| 602 | + | ||
| 603 | + Args: | ||
| 604 | + es_aggregations: ES 原始聚合结果 | ||
| 605 | + facet_configs: 分面配置列表 | ||
| 606 | + current_filters: 当前应用的过滤器 | ||
| 607 | + | ||
| 608 | + Returns: | ||
| 609 | + 标准化的分面结果列表 | ||
| 610 | + """ | ||
| 611 | + if not es_aggregations or not facet_configs: | ||
| 612 | + return None | ||
| 613 | + | ||
| 614 | + standardized_facets = [] | ||
| 615 | + | ||
| 616 | + for config in facet_configs: | ||
| 617 | + # 解析配置 | ||
| 618 | + if isinstance(config, str): | ||
| 619 | + field = config | ||
| 620 | + facet_type = "terms" | ||
| 621 | + else: | ||
| 622 | + field = config['field'] | ||
| 623 | + facet_type = config.get('type', 'terms') | ||
| 624 | + | ||
| 625 | + agg_name = f"{field}_facet" | ||
| 626 | + | ||
| 627 | + if agg_name not in es_aggregations: | ||
| 628 | + continue | ||
| 629 | + | ||
| 630 | + agg_result = es_aggregations[agg_name] | ||
| 631 | + | ||
| 632 | + # 构建标准化分面结果 | ||
| 633 | + facet = { | ||
| 634 | + "field": field, | ||
| 635 | + "label": self._get_field_label(field), | ||
| 636 | + "type": facet_type, | ||
| 637 | + "values": [] | ||
| 638 | + } | ||
| 639 | + | ||
| 640 | + # 获取当前字段的选中值 | ||
| 641 | + selected_values = set() | ||
| 642 | + if current_filters and field in current_filters: | ||
| 643 | + filter_value = current_filters[field] | ||
| 644 | + if isinstance(filter_value, list): | ||
| 645 | + selected_values = set(filter_value) | ||
| 646 | + else: | ||
| 647 | + selected_values = {filter_value} | ||
| 648 | + | ||
| 649 | + # 转换 buckets | ||
| 650 | + if 'buckets' in agg_result: | ||
| 651 | + for bucket in agg_result['buckets']: | ||
| 652 | + value = bucket.get('key') | ||
| 653 | + count = bucket.get('doc_count', 0) | ||
| 654 | + | ||
| 655 | + facet['values'].append({ | ||
| 656 | + "value": value, | ||
| 657 | + "label": str(value), | ||
| 658 | + "count": count, | ||
| 659 | + "selected": value in selected_values | ||
| 660 | + }) | ||
| 661 | + | ||
| 662 | + standardized_facets.append(facet) | ||
| 663 | + | ||
| 664 | + return standardized_facets | ||
| 665 | + | ||
| 666 | + | ||
| 667 | +def _get_field_label(self, field: str) -> str: | ||
| 668 | + """获取字段的显示标签""" | ||
| 669 | + # 从配置中获取字段标签 | ||
| 670 | + for field_config in self.config.fields: | ||
| 671 | + if field_config.name == field: | ||
| 672 | + return getattr(field_config, 'label', field) | ||
| 673 | + return field | ||
| 674 | +``` | ||
| 675 | + | ||
| 676 | +### 4. API 路由层更新 | ||
| 677 | + | ||
| 678 | +**文件**:`api/routes/search.py` | ||
| 679 | + | ||
| 680 | +**改进点**: | ||
| 681 | + | ||
| 682 | +1. 接受新的请求模型参数 | ||
| 683 | +2. 添加搜索建议端点(框架) | ||
| 684 | +3. **完全移除**旧的 `aggregations` 参数支持 | ||
| 685 | + | ||
| 686 | +**主搜索端点更新**: | ||
| 687 | + | ||
| 688 | +```python | ||
| 689 | +@router.post("/", response_model=SearchResponse) | ||
| 690 | +async def search(request: SearchRequest, http_request: Request): | ||
| 691 | + """ | ||
| 692 | + 执行文本搜索。 | ||
| 693 | + | ||
| 694 | + 支持: | ||
| 695 | + - 布尔表达式(AND, OR, RANK, ANDNOT) | ||
| 696 | + - 精确匹配过滤器 | ||
| 697 | + - 范围过滤器 | ||
| 698 | + - 分面搜索 | ||
| 699 | + - 自定义排序 | ||
| 700 | + """ | ||
| 701 | + # ... 实现使用新的参数 | ||
| 702 | + result = searcher.search( | ||
| 703 | + query=request.query, | ||
| 704 | + size=request.size, | ||
| 705 | + from_=request.from_, | ||
| 706 | + filters=request.filters, | ||
| 707 | + range_filters=request.range_filters, # 新增 | ||
| 708 | + facets=request.facets, # 替代 aggregations | ||
| 709 | + min_score=request.min_score, | ||
| 710 | + sort_by=request.sort_by, | ||
| 711 | + sort_order=request.sort_order, | ||
| 712 | + debug=request.debug, | ||
| 713 | + context=context | ||
| 714 | + ) | ||
| 715 | +``` | ||
| 716 | + | ||
| 717 | +**新增端点**: | ||
| 718 | + | ||
| 719 | +```python | ||
| 720 | +@router.get("/suggestions", response_model=SearchSuggestResponse) | ||
| 721 | +async def search_suggestions( | ||
| 722 | + q: str = Query(..., min_length=1, description="搜索查询"), | ||
| 723 | + size: int = Query(5, ge=1, le=20, description="建议数量"), | ||
| 724 | + types: str = Query("query", description="建议类型(逗号分隔)") | ||
| 725 | +): | ||
| 726 | + """ | ||
| 727 | + 获取搜索建议(自动补全)。 | ||
| 728 | + | ||
| 729 | + 功能说明: | ||
| 730 | + - 查询建议(query):基于历史搜索和热门搜索 | ||
| 731 | + - 商品建议(product):匹配的商品 | ||
| 732 | + - 类目建议(category):匹配的类目 | ||
| 733 | + - 品牌建议(brand):匹配的品牌 | ||
| 734 | + | ||
| 735 | + 注意:此功能暂未实现,仅返回框架响应。 | ||
| 736 | + """ | ||
| 737 | + import time | ||
| 738 | + start_time = time.time() | ||
| 739 | + | ||
| 740 | + # TODO: 实现搜索建议逻辑 | ||
| 741 | + suggestions = [] | ||
| 742 | + took_ms = int((time.time() - start_time) * 1000) | ||
| 743 | + | ||
| 744 | + return SearchSuggestResponse( | ||
| 745 | + query=q, | ||
| 746 | + suggestions=suggestions, | ||
| 747 | + took_ms=took_ms | ||
| 748 | + ) | ||
| 749 | + | ||
| 750 | + | ||
| 751 | +@router.get("/instant", response_model=SearchResponse) | ||
| 752 | +async def instant_search( | ||
| 753 | + q: str = Query(..., min_length=2, description="搜索查询"), | ||
| 754 | + size: int = Query(5, ge=1, le=20, description="结果数量") | ||
| 755 | +): | ||
| 756 | + """ | ||
| 757 | + 即时搜索(Instant Search)。 | ||
| 758 | + | ||
| 759 | + 功能说明: | ||
| 760 | + - 边输入边搜索,无需点击搜索按钮 | ||
| 761 | + - 返回简化的搜索结果 | ||
| 762 | + | ||
| 763 | + 注意:此功能暂未实现,调用标准搜索接口。 | ||
| 764 | + """ | ||
| 765 | + from api.app import get_searcher | ||
| 766 | + searcher = get_searcher() | ||
| 767 | + | ||
| 768 | + result = searcher.search( | ||
| 769 | + query=q, | ||
| 770 | + size=size, | ||
| 771 | + from_=0 | ||
| 772 | + ) | ||
| 773 | + | ||
| 774 | + return SearchResponse( | ||
| 775 | + hits=result.hits, | ||
| 776 | + total=result.total, | ||
| 777 | + max_score=result.max_score, | ||
| 778 | + took_ms=result.took_ms, | ||
| 779 | + query_info=result.query_info | ||
| 780 | + ) | ||
| 781 | +``` | ||
| 782 | + | ||
| 783 | +### 5. 前端适配 | ||
| 784 | + | ||
| 785 | +**文件**:`frontend/static/js/app.js` | ||
| 786 | + | ||
| 787 | +**需要完全重写的地方**: | ||
| 788 | + | ||
| 789 | +1. **聚合参数改用简化配置**(第 57-87 行): | ||
| 790 | +```javascript | ||
| 791 | +// 旧的方式(ES DSL)- 完全删除 | ||
| 792 | +const aggregations = { | ||
| 793 | + "category_stats": {...} // 删除 | ||
| 794 | +}; | ||
| 795 | + | ||
| 796 | +// 新的方式(简化配置) | ||
| 797 | +const facets = [ | ||
| 798 | + { | ||
| 799 | + "field": "categoryName_keyword", | ||
| 800 | + "size": 15, | ||
| 801 | + "type": "terms" | ||
| 802 | + }, | ||
| 803 | + { | ||
| 804 | + "field": "brandName_keyword", | ||
| 805 | + "size": 15, | ||
| 806 | + "type": "terms" | ||
| 807 | + }, | ||
| 808 | + { | ||
| 809 | + "field": "supplierName_keyword", | ||
| 810 | + "size": 10, | ||
| 811 | + "type": "terms" | ||
| 812 | + }, | ||
| 813 | + { | ||
| 814 | + "field": "price", | ||
| 815 | + "type": "range", | ||
| 816 | + "ranges": [ | ||
| 817 | + {"key": "0-50", "to": 50}, | ||
| 818 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 819 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 820 | + {"key": "200+", "from": 200} | ||
| 821 | + ] | ||
| 822 | + } | ||
| 823 | +]; | ||
| 824 | +``` | ||
| 825 | + | ||
| 826 | +2. **过滤器使用新格式**(第 103 行): | ||
| 827 | +```javascript | ||
| 828 | +// 旧的方式 - 完全删除 | ||
| 829 | +filters: { | ||
| 830 | + "price_ranges": ["0-50", "50-100"] // 删除 | ||
| 831 | +} | ||
| 832 | + | ||
| 833 | +// 新的方式 | ||
| 834 | +filters: Object.keys(state.filters).length > 0 ? state.filters : null, | ||
| 835 | +range_filters: state.rangeFilters ? state.rangeFilters : null | ||
| 836 | +``` | ||
| 837 | + | ||
| 838 | +3. **解析标准化的分面结果**(第 208-258 行): | ||
| 839 | +```javascript | ||
| 840 | +// 旧的方式(ES 结构)- 完全删除并重写 | ||
| 841 | +function displayAggregations(aggregations) { | ||
| 842 | + if (!aggregations) return; | ||
| 843 | + // 旧的代码全部删除 | ||
| 844 | +} | ||
| 845 | + | ||
| 846 | +// 新的方式(标准化格式) | ||
| 847 | +function displayFacets(facets) { | ||
| 848 | + if (!facets) return; | ||
| 849 | + | ||
| 850 | + facets.forEach(facet => { | ||
| 851 | + if (facet.field === 'categoryName_keyword') { | ||
| 852 | + const container = document.getElementById('categoryTags'); | ||
| 853 | + let html = ''; | ||
| 854 | + | ||
| 855 | + facet.values.forEach(facetValue => { | ||
| 856 | + const value = facetValue.value; | ||
| 857 | + const count = facetValue.count; | ||
| 858 | + const selected = facetValue.selected; | ||
| 859 | + | ||
| 860 | + html += ` | ||
| 861 | + <span class="filter-tag ${selected ? 'active' : ''}" | ||
| 862 | + onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(value)}')"> | ||
| 863 | + ${escapeHtml(value)} (${count}) | ||
| 864 | + </span> | ||
| 865 | + `; | ||
| 866 | + }); | ||
| 867 | + | ||
| 868 | + container.innerHTML = html; | ||
| 869 | + } | ||
| 870 | + // 处理其他分面... | ||
| 871 | + }); | ||
| 872 | +} | ||
| 873 | +``` | ||
| 874 | + | ||
| 875 | + | ||
| 876 | +## 第三部分:实施步骤 | ||
| 877 | + | ||
| 878 | +### 阶段 1:后端模型层重构 | ||
| 879 | + | ||
| 880 | +**任务清单**: | ||
| 881 | + | ||
| 882 | +- [ ] 更新 `api/models.py` | ||
| 883 | + - [ ] 定义 `RangeFilter` 模型 | ||
| 884 | + - [ ] 定义 `FacetConfig` 模型 | ||
| 885 | + - [ ] 更新 `SearchRequest`,添加 `range_filters` 和 `facets` | ||
| 886 | + - [ ] **完全移除** `aggregations` 参数 | ||
| 887 | + - [ ] 定义 `FacetValue` 和 `FacetResult` 模型 | ||
| 888 | + - [ ] 更新 `SearchResponse`,使用标准化分面格式 | ||
| 889 | + - [ ] 更新 `ImageSearchRequest`,添加 `range_filters` | ||
| 890 | + - [ ] 添加 `SearchSuggestRequest` 和 `SearchSuggestResponse`(框架) | ||
| 891 | + | ||
| 892 | +**验证方式**: | ||
| 893 | + | ||
| 894 | +- 运行 Pydantic 模型验证 | ||
| 895 | +- 检查 API 文档(`/docs`)是否正确生成 | ||
| 896 | +- 确认旧的 `aggregations` 参数已完全移除 | ||
| 897 | + | ||
| 898 | +### 阶段 2:查询构建器重构 | ||
| 899 | + | ||
| 900 | +**任务清单**: | ||
| 901 | + | ||
| 902 | +- [ ] 重构 `search/es_query_builder.py` | ||
| 903 | + - [ ] **完全删除** `price_ranges` 硬编码逻辑(第 205-233 行) | ||
| 904 | + - [ ] 重构 `_build_filters` 方法,支持 `range_filters` | ||
| 905 | + - [ ] **完全删除** `add_dynamic_aggregations` 方法 | ||
| 906 | + - [ ] **完全删除或重命名** `add_aggregations` 方法 | ||
| 907 | + - [ ] 新增 `build_facets` 方法 | ||
| 908 | + - [ ] 更新 `build_query` 方法签名,添加 `range_filters` | ||
| 909 | +- [ ] 更新 `search/multilang_query_builder.py` | ||
| 910 | + - [ ] 更新 `build_multilang_query` 方法签名 | ||
| 911 | + - [ ] 确保正确传递 `range_filters` 参数 | ||
| 912 | + | ||
| 913 | +**验证方式**: | ||
| 914 | + | ||
| 915 | +- 打印生成的 ES DSL,检查正确性 | ||
| 916 | +- 确认旧的硬编码逻辑和方法已完全移除 | ||
| 917 | +- 手动测试不同的过滤器组合 | ||
| 918 | + | ||
| 919 | +### 阶段 3:搜索执行层重构 | ||
| 920 | + | ||
| 921 | +**任务清单**: | ||
| 922 | + | ||
| 923 | +- [ ] 更新 `search/searcher.py` | ||
| 924 | + - [ ] 更新 `search()` 方法签名,添加 `range_filters` 和 `facets` | ||
| 925 | + - [ ] **完全移除** `aggregations` 参数支持 | ||
| 926 | + - [ ] 使用新的 `build_facets` 方法 | ||
| 927 | + - [ ] 实现 `_standardize_facets()` 辅助方法 | ||
| 928 | + - [ ] 实现 `_get_field_label()` 辅助方法 | ||
| 929 | + - [ ] 更新 `SearchResult` 类,使用标准化分面格式 | ||
| 930 | + - [ ] 更新 `search_by_image()` 方法,支持 `range_filters` | ||
| 931 | + | ||
| 932 | +**验证方式**: | ||
| 933 | + | ||
| 934 | +- 手动测试各种搜索场景 | ||
| 935 | +- 检查分面结果格式是否标准化 | ||
| 936 | +- 确认旧的聚合逻辑已完全移除 | ||
| 937 | + | ||
| 938 | +### 阶段 4:API 路由层更新 | ||
| 939 | + | ||
| 940 | +**任务清单**: | ||
| 941 | + | ||
| 942 | +- [ ] 更新 `api/routes/search.py` | ||
| 943 | + - [ ] 更新 `/search/` 端点,使用新的请求参数 | ||
| 944 | + - [ ] **确认完全移除**对旧 `aggregations` 的支持 | ||
| 945 | + - [ ] 添加 `/search/suggestions` 端点(框架,返回空结果) | ||
| 946 | + - [ ] 添加 `/search/instant` 端点(框架,调用标准搜索) | ||
| 947 | + - [ ] 更新 `/search/image` 端点,支持 `range_filters` | ||
| 948 | + - [ ] 添加完整的端点文档和示例 | ||
| 949 | + | ||
| 950 | +**验证方式**: | ||
| 951 | + | ||
| 952 | +- 使用 Swagger UI(`/docs`)测试所有端点 | ||
| 953 | +- 检查请求和响应格式 | ||
| 954 | +- 确认旧参数不再被接受 | ||
| 955 | + | ||
| 956 | +### 阶段 5:前端适配 | ||
| 957 | + | ||
| 958 | +**任务清单**: | ||
| 959 | + | ||
| 960 | +- [ ] 完全重构 `frontend/static/js/app.js` | ||
| 961 | + - [ ] **删除**所有 ES DSL 聚合代码(第 57-87 行) | ||
| 962 | + - [ ] 使用新的 `facets` 简化配置 | ||
| 963 | + - [ ] 修改过滤器参数,分离 `filters` 和 `range_filters` | ||
| 964 | + - [ ] **完全重写** `displayAggregations()` 为 `displayFacets()` | ||
| 965 | + - [ ] 解析标准化的分面结果格式 | ||
| 966 | + - [ ] **删除**所有 `price_ranges` 硬编码 | ||
| 967 | + - [ ] 添加范围过滤器 UI 组件 | ||
| 968 | + - [ ] 更新状态管理,支持 `rangeFilters` | ||
| 969 | + | ||
| 970 | +**验证方式**: | ||
| 971 | + | ||
| 972 | +- 浏览器测试所有前端功能 | ||
| 973 | +- 检查网络请求格式 | ||
| 974 | +- 检查分面结果显示 | ||
| 975 | +- 确认旧的代码已完全删除 | ||
| 976 | + | ||
| 977 | +### 阶段 6:文档更新与示例 | ||
| 978 | + | ||
| 979 | +**任务清单**: | ||
| 980 | + | ||
| 981 | +- [ ] 撰写完整的 API 接口文档 | ||
| 982 | + - [ ] 搜索接口文档 | ||
| 983 | + - [ ] 过滤器使用说明 | ||
| 984 | + - [ ] 分面搜索使用说明 | ||
| 985 | + - [ ] 搜索建议接口文档(标注为框架) | ||
| 986 | +- [ ] 更新项目文档 | ||
| 987 | + - [ ] 更新 `README.md` | ||
| 988 | + - [ ] 更新 `USER_GUIDE.md` | ||
| 989 | + - [ ] 更新 `CHANGES.md` 或 `CHANGELOG.md` | ||
| 990 | +- [ ] 添加完整的使用示例 | ||
| 991 | + - [ ] 简单搜索示例 | ||
| 992 | + - [ ] 过滤器搜索示例 | ||
| 993 | + - [ ] 分面搜索示例 | ||
| 994 | + - [ ] cURL 命令示例 | ||
| 995 | + - [ ] Python 代码示例 | ||
| 996 | + - [ ] JavaScript 代码示例 | ||
| 997 | +- [ ] 更新 API 文档注释 | ||
| 998 | + - [ ] 所有模型的文档字符串 | ||
| 999 | + - [ ] 所有端点的文档字符串 | ||
| 1000 | + - [ ] 参数说明和示例 | ||
| 1001 | + | ||
| 1002 | +**验证方式**: | ||
| 1003 | + | ||
| 1004 | +- 文档审查 | ||
| 1005 | +- 示例代码验证 | ||
| 1006 | +- 确保文档与实现一致 | ||
| 1007 | + | ||
| 1008 | +## 第四部分:API 使用示例 | ||
| 1009 | + | ||
| 1010 | +### 示例 1:简单搜索 | ||
| 1011 | + | ||
| 1012 | +```bash | ||
| 1013 | +POST /search/ | ||
| 1014 | +Content-Type: application/json | ||
| 1015 | + | ||
| 1016 | +{ | ||
| 1017 | + "query": "芭比娃娃", | ||
| 1018 | + "size": 20 | ||
| 1019 | +} | ||
| 1020 | +``` | ||
| 1021 | + | ||
| 1022 | +**响应**: | ||
| 1023 | + | ||
| 1024 | +```json | ||
| 1025 | +{ | ||
| 1026 | + "hits": [ | ||
| 1027 | + { | ||
| 1028 | + "_id": "12345", | ||
| 1029 | + "_score": 8.5, | ||
| 1030 | + "_source": { | ||
| 1031 | + "name": "芭比时尚娃娃", | ||
| 1032 | + "price": 89.99, | ||
| 1033 | + "categoryName": "玩具" | ||
| 1034 | + } | ||
| 1035 | + } | ||
| 1036 | + ], | ||
| 1037 | + "total": 118, | ||
| 1038 | + "max_score": 8.5, | ||
| 1039 | + "took_ms": 45, | ||
| 1040 | + "query_info": { | ||
| 1041 | + "original_query": "芭比娃娃", | ||
| 1042 | + "detected_language": "zh" | ||
| 1043 | + } | ||
| 1044 | +} | ||
| 1045 | +``` | ||
| 1046 | + | ||
| 1047 | +### 示例 2:带过滤器的搜索 | ||
| 1048 | + | ||
| 1049 | +```bash | ||
| 1050 | +POST /search/ | ||
| 1051 | +Content-Type: application/json | ||
| 1052 | + | ||
| 1053 | +{ | ||
| 1054 | + "query": "玩具", | ||
| 1055 | + "size": 20, | ||
| 1056 | + "filters": { | ||
| 1057 | + "categoryName_keyword": ["玩具", "益智玩具"], | ||
| 1058 | + "in_stock": true | ||
| 1059 | + }, | ||
| 1060 | + "range_filters": { | ||
| 1061 | + "price": { | ||
| 1062 | + "gte": 50, | ||
| 1063 | + "lte": 200 | ||
| 1064 | + } | ||
| 1065 | + } | ||
| 1066 | +} | ||
| 1067 | +``` | ||
| 1068 | + | ||
| 1069 | +**响应**: | ||
| 1070 | + | ||
| 1071 | +```json | ||
| 1072 | +{ | ||
| 1073 | + "hits": [...], | ||
| 1074 | + "total": 45, | ||
| 1075 | + "max_score": 7.2, | ||
| 1076 | + "took_ms": 38 | ||
| 1077 | +} | ||
| 1078 | +``` | ||
| 1079 | + | ||
| 1080 | +### 示例 3:带分面搜索的请求(简单模式) | ||
| 1081 | + | ||
| 1082 | +```bash | ||
| 1083 | +POST /search/ | ||
| 1084 | +Content-Type: application/json | ||
| 1085 | + | ||
| 1086 | +{ | ||
| 1087 | + "query": "玩具", | ||
| 1088 | + "size": 20, | ||
| 1089 | + "facets": [ | ||
| 1090 | + "categoryName_keyword", | ||
| 1091 | + "brandName_keyword" | ||
| 1092 | + ] | ||
| 1093 | +} | ||
| 1094 | +``` | ||
| 1095 | + | ||
| 1096 | +**响应**: | ||
| 1097 | + | ||
| 1098 | +```json | ||
| 1099 | +{ | ||
| 1100 | + "hits": [...], | ||
| 1101 | + "total": 118, | ||
| 1102 | + "max_score": 8.5, | ||
| 1103 | + "took_ms": 45, | ||
| 1104 | + "facets": [ | ||
| 1105 | + { | ||
| 1106 | + "field": "categoryName_keyword", | ||
| 1107 | + "label": "商品类目", | ||
| 1108 | + "type": "terms", | ||
| 1109 | + "values": [ | ||
| 1110 | + { | ||
| 1111 | + "value": "玩具", | ||
| 1112 | + "label": "玩具", | ||
| 1113 | + "count": 85, | ||
| 1114 | + "selected": false | ||
| 1115 | + }, | ||
| 1116 | + { | ||
| 1117 | + "value": "益智玩具", | ||
| 1118 | + "label": "益智玩具", | ||
| 1119 | + "count": 33, | ||
| 1120 | + "selected": false | ||
| 1121 | + } | ||
| 1122 | + ] | ||
| 1123 | + }, | ||
| 1124 | + { | ||
| 1125 | + "field": "brandName_keyword", | ||
| 1126 | + "label": "品牌", | ||
| 1127 | + "type": "terms", | ||
| 1128 | + "values": [ | ||
| 1129 | + { | ||
| 1130 | + "value": "乐高", | ||
| 1131 | + "label": "乐高", | ||
| 1132 | + "count": 42, | ||
| 1133 | + "selected": false | ||
| 1134 | + } | ||
| 1135 | + ] | ||
| 1136 | + } | ||
| 1137 | + ] | ||
| 1138 | +} | ||
| 1139 | +``` | ||
| 1140 | + | ||
| 1141 | +### 示例 4:带分面搜索的请求(高级模式) | ||
| 1142 | + | ||
| 1143 | +```bash | ||
| 1144 | +POST /search/ | ||
| 1145 | +Content-Type: application/json | ||
| 1146 | + | ||
| 1147 | +{ | ||
| 1148 | + "query": "玩具", | ||
| 1149 | + "size": 20, | ||
| 1150 | + "facets": [ | ||
| 1151 | + { | ||
| 1152 | + "field": "categoryName_keyword", | ||
| 1153 | + "size": 15, | ||
| 1154 | + "type": "terms" | ||
| 1155 | + }, | ||
| 1156 | + { | ||
| 1157 | + "field": "brandName_keyword", | ||
| 1158 | + "size": 15, | ||
| 1159 | + "type": "terms" | ||
| 1160 | + }, | ||
| 1161 | + { | ||
| 1162 | + "field": "price", | ||
| 1163 | + "type": "range", | ||
| 1164 | + "ranges": [ | ||
| 1165 | + {"key": "0-50", "to": 50}, | ||
| 1166 | + {"key": "50-100", "from": 50, "to": 100}, | ||
| 1167 | + {"key": "100-200", "from": 100, "to": 200}, | ||
| 1168 | + {"key": "200+", "from": 200} | ||
| 1169 | + ] | ||
| 1170 | + } | ||
| 1171 | + ] | ||
| 1172 | +} | ||
| 1173 | +``` | ||
| 1174 | + | ||
| 1175 | +**响应**: | ||
| 1176 | + | ||
| 1177 | +```json | ||
| 1178 | +{ | ||
| 1179 | + "hits": [...], | ||
| 1180 | + "total": 118, | ||
| 1181 | + "max_score": 8.5, | ||
| 1182 | + "took_ms": 45, | ||
| 1183 | + "facets": [ | ||
| 1184 | + { | ||
| 1185 | + "field": "categoryName_keyword", | ||
| 1186 | + "label": "商品类目", | ||
| 1187 | + "type": "terms", | ||
| 1188 | + "values": [...] | ||
| 1189 | + }, | ||
| 1190 | + { | ||
| 1191 | + "field": "price", | ||
| 1192 | + "label": "价格区间", | ||
| 1193 | + "type": "range", | ||
| 1194 | + "values": [ | ||
| 1195 | + { | ||
| 1196 | + "value": "0-50", | ||
| 1197 | + "label": "0-50", | ||
| 1198 | + "count": 23, | ||
| 1199 | + "selected": false | ||
| 1200 | + }, | ||
| 1201 | + { | ||
| 1202 | + "value": "50-100", | ||
| 1203 | + "label": "50-100", | ||
| 1204 | + "count": 45, | ||
| 1205 | + "selected": false | ||
| 1206 | + }, | ||
| 1207 | + { | ||
| 1208 | + "value": "100-200", | ||
| 1209 | + "label": "100-200", | ||
| 1210 | + "count": 38, | ||
| 1211 | + "selected": false | ||
| 1212 | + }, | ||
| 1213 | + { | ||
| 1214 | + "value": "200+", | ||
| 1215 | + "label": "200+", | ||
| 1216 | + "count": 12, | ||
| 1217 | + "selected": false | ||
| 1218 | + } | ||
| 1219 | + ] | ||
| 1220 | + } | ||
| 1221 | + ] | ||
| 1222 | +} | ||
| 1223 | +``` | ||
| 1224 | + | ||
| 1225 | +### 示例 5:复杂搜索(过滤+分面+排序) | ||
| 1226 | + | ||
| 1227 | +```bash | ||
| 1228 | +POST /search/ | ||
| 1229 | +Content-Type: application/json | ||
| 1230 | + | ||
| 1231 | +{ | ||
| 1232 | + "query": "玩具 AND (乐高 OR 芭比)", | ||
| 1233 | + "size": 20, | ||
| 1234 | + "from": 0, | ||
| 1235 | + "filters": { | ||
| 1236 | + "categoryName_keyword": "玩具" | ||
| 1237 | + }, | ||
| 1238 | + "range_filters": { | ||
| 1239 | + "price": { | ||
| 1240 | + "gte": 50, | ||
| 1241 | + "lte": 200 | ||
| 1242 | + }, | ||
| 1243 | + "days_since_last_update": { | ||
| 1244 | + "lte": 30 | ||
| 1245 | + } | ||
| 1246 | + }, | ||
| 1247 | + "facets": [ | ||
| 1248 | + {"field": "brandName_keyword", "size": 15}, | ||
| 1249 | + {"field": "supplierName_keyword", "size": 10} | ||
| 1250 | + ], | ||
| 1251 | + "sort_by": "price", | ||
| 1252 | + "sort_order": "asc", | ||
| 1253 | + "debug": false | ||
| 1254 | +} | ||
| 1255 | +``` | ||
| 1256 | + | ||
| 1257 | +### 示例 6:搜索建议(框架) | ||
| 1258 | + | ||
| 1259 | +```bash | ||
| 1260 | +GET /search/suggestions?q=芭&size=5 | ||
| 1261 | + | ||
| 1262 | +{ | ||
| 1263 | + "query": "芭", | ||
| 1264 | + "suggestions": [], | ||
| 1265 | + "took_ms": 2 | ||
| 1266 | +} | ||
| 1267 | +``` | ||
| 1268 | + | ||
| 1269 | +### 示例 7:即时搜索(框架) | ||
| 1270 | + | ||
| 1271 | +```bash | ||
| 1272 | +GET /search/instant?q=玩具&size=5 | ||
| 1273 | + | ||
| 1274 | +{ | ||
| 1275 | + "hits": [...], | ||
| 1276 | + "total": 118, | ||
| 1277 | + "max_score": 8.5, | ||
| 1278 | + "took_ms": 25, | ||
| 1279 | + "query_info": {...} | ||
| 1280 | +} | ||
| 1281 | +``` | ||
| 1282 | + | ||
| 1283 | +### Python 示例代码 | ||
| 1284 | + | ||
| 1285 | +```python | ||
| 1286 | +import requests | ||
| 1287 | + | ||
| 1288 | +API_URL = "http://localhost:6002/search/" | ||
| 1289 | + | ||
| 1290 | +# 简单搜索 | ||
| 1291 | +response = requests.post(API_URL, json={ | ||
| 1292 | + "query": "芭比娃娃", | ||
| 1293 | + "size": 20 | ||
| 1294 | +}) | ||
| 1295 | +print(response.json()) | ||
| 1296 | + | ||
| 1297 | +# 带过滤器和分面的搜索 | ||
| 1298 | +response = requests.post(API_URL, json={ | ||
| 1299 | + "query": "玩具", | ||
| 1300 | + "size": 20, | ||
| 1301 | + "filters": { | ||
| 1302 | + "categoryName_keyword": ["玩具", "益智玩具"] | ||
| 1303 | + }, | ||
| 1304 | + "range_filters": { | ||
| 1305 | + "price": {"gte": 50, "lte": 200} | ||
| 1306 | + }, | ||
| 1307 | + "facets": [ | ||
| 1308 | + {"field": "brandName_keyword", "size": 15}, | ||
| 1309 | + {"field": "categoryName_keyword", "size": 15} | ||
| 1310 | + ], | ||
| 1311 | + "sort_by": "price", | ||
| 1312 | + "sort_order": "asc" | ||
| 1313 | +}) | ||
| 1314 | +result = response.json() | ||
| 1315 | + | ||
| 1316 | +# 处理分面结果 | ||
| 1317 | +for facet in result.get('facets', []): | ||
| 1318 | + print(f"\n{facet['label']}:") | ||
| 1319 | + for value in facet['values']: | ||
| 1320 | + print(f" - {value['label']}: {value['count']}") | ||
| 1321 | +``` | ||
| 1322 | + | ||
| 1323 | +### JavaScript 示例代码 | ||
| 1324 | + | ||
| 1325 | +```javascript | ||
| 1326 | +// 搜索函数 | ||
| 1327 | +async function searchProducts(query, filters, rangeFilters, facets) { | ||
| 1328 | + const response = await fetch('http://localhost:6002/search/', { | ||
| 1329 | + method: 'POST', | ||
| 1330 | + headers: { | ||
| 1331 | + 'Content-Type': 'application/json' | ||
| 1332 | + }, | ||
| 1333 | + body: JSON.stringify({ | ||
| 1334 | + query: query, | ||
| 1335 | + size: 20, | ||
| 1336 | + filters: filters, | ||
| 1337 | + range_filters: rangeFilters, | ||
| 1338 | + facets: facets | ||
| 1339 | + }) | ||
| 1340 | + }); | ||
| 1341 | + | ||
| 1342 | + const data = await response.json(); | ||
| 1343 | + return data; | ||
| 1344 | +} | ||
| 1345 | + | ||
| 1346 | +// 使用示例 | ||
| 1347 | +const result = await searchProducts( | ||
| 1348 | + "玩具", | ||
| 1349 | + { categoryName_keyword: ["玩具"] }, | ||
| 1350 | + { price: { gte: 50, lte: 200 } }, | ||
| 1351 | + [ | ||
| 1352 | + { field: "brandName_keyword", size: 15 }, | ||
| 1353 | + { field: "categoryName_keyword", size: 15 } | ||
| 1354 | + ] | ||
| 1355 | +); | ||
| 1356 | + | ||
| 1357 | +// 显示分面结果 | ||
| 1358 | +result.facets.forEach(facet => { | ||
| 1359 | + console.log(`${facet.label}:`); | ||
| 1360 | + facet.values.forEach(value => { | ||
| 1361 | + console.log(` - ${value.label}: ${value.count}`); | ||
| 1362 | + }); | ||
| 1363 | +}); | ||
| 1364 | +``` | ||
| 1365 | + | ||
| 1366 | +## 第五部分:验收标准 | ||
| 1367 | + | ||
| 1368 | +### 功能验收 | ||
| 1369 | + | ||
| 1370 | +- [ ] 所有硬编码逻辑已完全移除 | ||
| 1371 | +- [ ] 新的过滤器参数正常工作 | ||
| 1372 | +- [ ] 分面搜索返回标准化格式 | ||
| 1373 | +- [ ] API 文档完整准确 | ||
| 1374 | +- [ ] 前端功能正常工作 | ||
| 1375 | +- [ ] 搜索建议端点已添加(框架) | ||
| 1376 | + | ||
| 1377 | +### 代码质量验收 | ||
| 1378 | + | ||
| 1379 | +- [ ] 代码中不再有 `price_ranges` 字符串 | ||
| 1380 | +- [ ] 代码中不再有 `add_dynamic_aggregations` 方法 | ||
| 1381 | +- [ ] 代码中不再有 `aggregations` 参数(除了文档说明) | ||
| 1382 | +- [ ] 所有方法签名已更新 | ||
| 1383 | +- [ ] 所有文档字符串已更新 | ||
| 1384 | + | ||
| 1385 | +### 性能验收 | ||
| 1386 | + | ||
| 1387 | +- [ ] 搜索响应时间无明显增加 | ||
| 1388 | +- [ ] 分面查询性能正常 | ||
| 1389 | +- [ ] 过滤器性能正常 | ||
| 1390 | + | ||
| 1391 | +## 总结 | ||
| 1392 | + | ||
| 1393 | +本计划通过系统性的重构,将搜索 API 从硬编码、暴露 ES 细节的实现,转变为灵活、通用、易用的 SaaS 产品接口。关键改进包括: | ||
| 1394 | + | ||
| 1395 | +1. ✅ **完全移除**硬编码的 price_ranges 逻辑 | ||
| 1396 | +2. ✅ 实现结构化的过滤参数(filters + range_filters) | ||
| 1397 | +3. ✅ **完全移除** aggregations 参数,使用简化的 facets 接口 | ||
| 1398 | +4. ✅ 标准化分面搜索响应格式 | ||
| 1399 | +5. ✅ 添加搜索建议功能框架(暂不实现) | ||
| 1400 | +6. ✅ **不保留向后兼容代码**,完全重构 | ||
| 1401 | + | ||
| 1402 | +通过这些改进,系统将具备更好的通用性、可维护性和可扩展性,代码更加精简和统一。 | ||
| 0 | \ No newline at end of file | 1403 | \ No newline at end of file |
CLAUDE.md
| @@ -96,7 +96,6 @@ The `searcher` supports: | @@ -96,7 +96,6 @@ The `searcher` supports: | ||
| 96 | - Extracts product data from MySQL | 96 | - Extracts product data from MySQL |
| 97 | - Maps images from filebank database | 97 | - Maps images from filebank database |
| 98 | - Creates inverted index (URL → SKU list) | 98 | - Creates inverted index (URL → SKU list) |
| 99 | - - Configurable year range via `--years` parameter | ||
| 100 | 99 | ||
| 101 | ## Key Implementation Notes | 100 | ## Key Implementation Notes |
| 102 | 101 |
query/query_rewriter.py
| @@ -7,21 +7,26 @@ import re | @@ -7,21 +7,26 @@ import re | ||
| 7 | 7 | ||
| 8 | 8 | ||
| 9 | class QueryRewriter: | 9 | class QueryRewriter: |
| 10 | - """Rewrites queries based on configured dictionary rules.""" | 10 | + """Rewrites queries based on exact word matching with configured dictionary rules. |
| 11 | + | ||
| 12 | + Only performs full word matching - no partial matching or substring replacement. | ||
| 13 | + The entire query must exactly match a key in the rewrite dictionary to be rewritten. | ||
| 14 | + """ | ||
| 11 | 15 | ||
| 12 | def __init__(self, rewrite_dict: Dict[str, str] = None): | 16 | def __init__(self, rewrite_dict: Dict[str, str] = None): |
| 13 | """ | 17 | """ |
| 14 | - Initialize query rewriter. | 18 | + Initialize query rewriter for exact word matching only. |
| 15 | 19 | ||
| 16 | Args: | 20 | Args: |
| 17 | - rewrite_dict: Dictionary mapping query patterns to rewrite expressions | 21 | + rewrite_dict: Dictionary mapping exact query terms to rewrite expressions |
| 18 | e.g., {"芭比": "brand:芭比 OR name:芭比娃娃"} | 22 | e.g., {"芭比": "brand:芭比 OR name:芭比娃娃"} |
| 23 | + Only full word matches will be rewritten, no partial matching. | ||
| 19 | """ | 24 | """ |
| 20 | self.rewrite_dict = rewrite_dict or {} | 25 | self.rewrite_dict = rewrite_dict or {} |
| 21 | 26 | ||
| 22 | def rewrite(self, query: str) -> str: | 27 | def rewrite(self, query: str) -> str: |
| 23 | """ | 28 | """ |
| 24 | - Rewrite query based on dictionary rules. | 29 | + Rewrite query based on dictionary rules using exact word matching only. |
| 25 | 30 | ||
| 26 | Args: | 31 | Args: |
| 27 | query: Original query string | 32 | query: Original query string |
| @@ -32,21 +37,13 @@ class QueryRewriter: | @@ -32,21 +37,13 @@ class QueryRewriter: | ||
| 32 | if not query or not query.strip(): | 37 | if not query or not query.strip(): |
| 33 | return query | 38 | return query |
| 34 | 39 | ||
| 35 | - # Check for exact matches first | 40 | + # Only check for exact matches - no partial matching |
| 36 | if query in self.rewrite_dict: | 41 | if query in self.rewrite_dict: |
| 37 | rewritten = self.rewrite_dict[query] | 42 | rewritten = self.rewrite_dict[query] |
| 38 | print(f"[QueryRewriter] Exact match: '{query}' -> '{rewritten}'") | 43 | print(f"[QueryRewriter] Exact match: '{query}' -> '{rewritten}'") |
| 39 | return rewritten | 44 | return rewritten |
| 40 | 45 | ||
| 41 | - # Check for partial matches (query contains a rewrite key) | ||
| 42 | - for pattern, replacement in self.rewrite_dict.items(): | ||
| 43 | - if pattern in query: | ||
| 44 | - # Replace the pattern | ||
| 45 | - rewritten = query.replace(pattern, replacement) | ||
| 46 | - print(f"[QueryRewriter] Partial match: '{query}' -> '{rewritten}'") | ||
| 47 | - return rewritten | ||
| 48 | - | ||
| 49 | - # No rewrite needed | 46 | + # No rewrite needed - no partial matching |
| 50 | return query | 47 | return query |
| 51 | 48 | ||
| 52 | def add_rule(self, pattern: str, replacement: str) -> None: | 49 | def add_rule(self, pattern: str, replacement: str) -> None: |
simple_server.py deleted
| @@ -1,340 +0,0 @@ | @@ -1,340 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -Simple API server for testing aggregation functionality without external dependencies. | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import json | ||
| 7 | -import time | ||
| 8 | -import random | ||
| 9 | -from http.server import HTTPServer, BaseHTTPRequestHandler | ||
| 10 | -from urllib.parse import urlparse, parse_qs | ||
| 11 | -import threading | ||
| 12 | - | ||
| 13 | -class SearchAPIHandler(BaseHTTPRequestHandler): | ||
| 14 | - """Simple API handler for search requests.""" | ||
| 15 | - | ||
| 16 | - def do_OPTIONS(self): | ||
| 17 | - """Handle CORS preflight requests.""" | ||
| 18 | - self.send_response(200) | ||
| 19 | - self.send_header('Access-Control-Allow-Origin', '*') | ||
| 20 | - self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | ||
| 21 | - self.send_header('Access-Control-Allow-Headers', 'Content-Type') | ||
| 22 | - self.end_headers() | ||
| 23 | - | ||
| 24 | - def do_POST(self): | ||
| 25 | - """Handle POST requests.""" | ||
| 26 | - if self.path == '/': | ||
| 27 | - self.handle_search() | ||
| 28 | - elif self.path == '/search/': | ||
| 29 | - self.handle_search() | ||
| 30 | - else: | ||
| 31 | - self.send_response(404) | ||
| 32 | - self.end_headers() | ||
| 33 | - | ||
| 34 | - def handle_search(self): | ||
| 35 | - """Handle search requests with aggregations.""" | ||
| 36 | - try: | ||
| 37 | - # Read request body | ||
| 38 | - content_length = int(self.headers['Content-Length']) | ||
| 39 | - post_data = self.rfile.read(content_length) | ||
| 40 | - request_data = json.loads(post_data.decode('utf-8')) | ||
| 41 | - | ||
| 42 | - query = request_data.get('query', '') | ||
| 43 | - size = request_data.get('size', 10) | ||
| 44 | - sort_by = request_data.get('sort_by', 'relevance') | ||
| 45 | - aggregations = request_data.get('aggregations', {}) | ||
| 46 | - filters = request_data.get('filters', {}) | ||
| 47 | - | ||
| 48 | - print(f"Search request: query='{query}', size={size}, sort_by={sort_by}") | ||
| 49 | - print(f"Aggregations: {list(aggregations.keys()) if aggregations else 'None'}") | ||
| 50 | - print(f"Filters: {filters if filters else 'None'}") | ||
| 51 | - | ||
| 52 | - # Simulate processing time | ||
| 53 | - time.sleep(0.1) | ||
| 54 | - | ||
| 55 | - # Generate mock search results | ||
| 56 | - results = self.generate_mock_results(query, size, sort_by, filters) | ||
| 57 | - | ||
| 58 | - # Generate mock aggregations | ||
| 59 | - aggregation_results = self.generate_mock_aggregations(aggregations, filters) | ||
| 60 | - | ||
| 61 | - # Build response | ||
| 62 | - response = { | ||
| 63 | - "hits": results, | ||
| 64 | - "total": len(results) + random.randint(10, 100), | ||
| 65 | - "max_score": round(random.uniform(1.5, 3.5), 3), | ||
| 66 | - "took_ms": random.randint(15, 45), | ||
| 67 | - "aggregations": aggregation_results, | ||
| 68 | - "query_info": { | ||
| 69 | - "original_query": query, | ||
| 70 | - "rewritten_query": query, | ||
| 71 | - "detected_language": "zh" if any('\u4e00' <= char <= '\u9fff' for char in query) else "en", | ||
| 72 | - "domain": "default", | ||
| 73 | - "translations": {}, | ||
| 74 | - "has_vector": False | ||
| 75 | - } | ||
| 76 | - } | ||
| 77 | - | ||
| 78 | - # Send response | ||
| 79 | - self.send_response(200) | ||
| 80 | - self.send_header('Content-Type', 'application/json') | ||
| 81 | - self.send_header('Access-Control-Allow-Origin', '*') | ||
| 82 | - self.end_headers() | ||
| 83 | - | ||
| 84 | - response_json = json.dumps(response, ensure_ascii=False, indent=2) | ||
| 85 | - self.wfile.write(response_json.encode('utf-8')) | ||
| 86 | - | ||
| 87 | - print(f"Response sent with {len(results)} results and {len(aggregation_results)} aggregations") | ||
| 88 | - | ||
| 89 | - except Exception as e: | ||
| 90 | - print(f"Error handling request: {e}") | ||
| 91 | - self.send_response(500) | ||
| 92 | - self.send_header('Content-Type', 'application/json') | ||
| 93 | - self.send_header('Access-Control-Allow-Origin', '*') | ||
| 94 | - self.end_headers() | ||
| 95 | - | ||
| 96 | - error_response = { | ||
| 97 | - "error": str(e), | ||
| 98 | - "detail": "Internal server error" | ||
| 99 | - } | ||
| 100 | - | ||
| 101 | - self.wfile.write(json.dumps(error_response).encode('utf-8')) | ||
| 102 | - | ||
| 103 | - def generate_mock_results(self, query, size, sort_by, filters): | ||
| 104 | - """Generate mock search results.""" | ||
| 105 | - | ||
| 106 | - # Sample product data | ||
| 107 | - sample_products = [ | ||
| 108 | - { | ||
| 109 | - "skuId": 1001, | ||
| 110 | - "name": "芭比娃娃梦幻套装", | ||
| 111 | - "enSpuName": "Barbie Dream House Playset", | ||
| 112 | - "ruSkuName": "Кукла Барби Мечтательный домик", | ||
| 113 | - "categoryName": "芭比", | ||
| 114 | - "brandName": "美泰", | ||
| 115 | - "supplierName": "义乌玩具厂", | ||
| 116 | - "price": 89.99, | ||
| 117 | - "imageUrl": "https://picsum.photos/seed/barbie1/200/200.jpg", | ||
| 118 | - "create_time": "2024-01-15T10:30:00Z", | ||
| 119 | - "days_since_last_update": 45 | ||
| 120 | - }, | ||
| 121 | - { | ||
| 122 | - "skuId": 1002, | ||
| 123 | - "name": "芭比娃娃时尚系列", | ||
| 124 | - "enSpuName": "Barbie Fashion Doll Collection", | ||
| 125 | - "ruSkuName": "Кукла Барби Модная коллекция", | ||
| 126 | - "categoryName": "芭比", | ||
| 127 | - "brandName": "美泰", | ||
| 128 | - "supplierName": "汕头玩具公司", | ||
| 129 | - "price": 45.50, | ||
| 130 | - "imageUrl": "https://picsum.photos/seed/barbie2/200/200.jpg", | ||
| 131 | - "create_time": "2024-02-20T14:15:00Z", | ||
| 132 | - "days_since_last_update": 30 | ||
| 133 | - }, | ||
| 134 | - { | ||
| 135 | - "skuId": 1003, | ||
| 136 | - "name": "儿童积木套装", | ||
| 137 | - "enSpuName": "Kids Building Blocks Set", | ||
| 138 | - "ruSkuName": "Детский строительный набор", | ||
| 139 | - "categoryName": "积木", | ||
| 140 | - "brandName": "乐高", | ||
| 141 | - "supplierName": "深圳塑胶制品厂", | ||
| 142 | - "price": 158.00, | ||
| 143 | - "imageUrl": "https://picsum.photos/seed/blocks1/200/200.jpg", | ||
| 144 | - "create_time": "2024-01-10T09:20:00Z", | ||
| 145 | - "days_since_last_update": 60 | ||
| 146 | - }, | ||
| 147 | - { | ||
| 148 | - "skuId": 1004, | ||
| 149 | - "name": "消防车玩具模型", | ||
| 150 | - "enSpuName": "Fire Truck Toy Model", | ||
| 151 | - "ruSkuName": "Модель пожарной машины", | ||
| 152 | - "categoryName": "小汽车", | ||
| 153 | - "brandName": "多美卡", | ||
| 154 | - "supplierName": "东莞玩具制造厂", | ||
| 155 | - "price": 78.50, | ||
| 156 | - "imageUrl": "https://picsum.photos/seed/firetruck1/200/200.jpg", | ||
| 157 | - "create_time": "2024-03-05T16:45:00Z", | ||
| 158 | - "days_since_last_update": 15 | ||
| 159 | - }, | ||
| 160 | - { | ||
| 161 | - "skuId": 1005, | ||
| 162 | - "name": "婴儿毛绒玩具", | ||
| 163 | - "enSpuName": "Baby Plush Toy", | ||
| 164 | - "ruSkuName": "Детская плюшевая игрушка", | ||
| 165 | - "categoryName": "婴儿娃娃", | ||
| 166 | - "brandName": "迪士尼", | ||
| 167 | - "supplierName": "上海礼品公司", | ||
| 168 | - "price": 32.00, | ||
| 169 | - "imageUrl": "https://picsum.photos/seed/plush1/200/200.jpg", | ||
| 170 | - "create_time": "2024-02-14T11:30:00Z", | ||
| 171 | - "days_since_last_update": 25 | ||
| 172 | - } | ||
| 173 | - ] | ||
| 174 | - | ||
| 175 | - # Apply filters if any | ||
| 176 | - if filters: | ||
| 177 | - filtered_products = [] | ||
| 178 | - for product in sample_products: | ||
| 179 | - include = True | ||
| 180 | - | ||
| 181 | - # Check category filter | ||
| 182 | - if 'category_name' in filters: | ||
| 183 | - if product['categoryName'] not in filters['category_name']: | ||
| 184 | - include = False | ||
| 185 | - | ||
| 186 | - # Check brand filter | ||
| 187 | - if 'brand_name' in filters: | ||
| 188 | - if product['brandName'] not in filters['brand_name']: | ||
| 189 | - include = False | ||
| 190 | - | ||
| 191 | - # Check price range filter | ||
| 192 | - if 'price_ranges' in filters: | ||
| 193 | - price = product['price'] | ||
| 194 | - in_range = False | ||
| 195 | - for price_range in filters['price_ranges']: | ||
| 196 | - if price_range == '0-50' and price <= 50: | ||
| 197 | - in_range = True | ||
| 198 | - elif price_range == '50-100' and 50 < price <= 100: | ||
| 199 | - in_range = True | ||
| 200 | - elif price_range == '100-200' and 100 < price <= 200: | ||
| 201 | - in_range = True | ||
| 202 | - elif price_range == '200+' and price > 200: | ||
| 203 | - in_range = True | ||
| 204 | - if not in_range: | ||
| 205 | - include = False | ||
| 206 | - | ||
| 207 | - if include: | ||
| 208 | - filtered_products.append(product) | ||
| 209 | - sample_products = filtered_products | ||
| 210 | - | ||
| 211 | - # Apply sorting | ||
| 212 | - if sort_by == 'price_asc': | ||
| 213 | - sample_products.sort(key=lambda x: x.get('price', 0)) | ||
| 214 | - elif sort_by == 'price_desc': | ||
| 215 | - sample_products.sort(key=lambda x: x.get('price', 0), reverse=True) | ||
| 216 | - elif sort_by == 'time_desc': | ||
| 217 | - sample_products.sort(key=lambda x: x.get('create_time', ''), reverse=True) | ||
| 218 | - | ||
| 219 | - # Convert to API response format | ||
| 220 | - results = [] | ||
| 221 | - for i, product in enumerate(sample_products[:size]): | ||
| 222 | - hit = { | ||
| 223 | - "_id": str(product['skuId']), | ||
| 224 | - "_score": round(random.uniform(1.5, 3.5), 3), | ||
| 225 | - "_source": product | ||
| 226 | - } | ||
| 227 | - results.append(hit) | ||
| 228 | - | ||
| 229 | - return results | ||
| 230 | - | ||
| 231 | - def generate_mock_aggregations(self, aggregations, filters): | ||
| 232 | - """Generate mock aggregation results.""" | ||
| 233 | - if not aggregations: | ||
| 234 | - return {} | ||
| 235 | - | ||
| 236 | - result = {} | ||
| 237 | - | ||
| 238 | - for agg_name, agg_spec in aggregations.items(): | ||
| 239 | - agg_type = agg_spec.get('type', 'terms') | ||
| 240 | - | ||
| 241 | - if agg_type == 'terms': | ||
| 242 | - # Generate mock terms aggregation | ||
| 243 | - if agg_name == 'category_name': | ||
| 244 | - buckets = [ | ||
| 245 | - {"key": "芭比", "doc_count": random.randint(15, 35)}, | ||
| 246 | - {"key": "儿童娃娃", "doc_count": random.randint(8, 20)}, | ||
| 247 | - {"key": "积木", "doc_count": random.randint(5, 15)}, | ||
| 248 | - {"key": "小汽车", "doc_count": random.randint(3, 12)}, | ||
| 249 | - {"key": "婴儿娃娃", "doc_count": random.randint(4, 10)}, | ||
| 250 | - {"key": "人物", "doc_count": random.randint(6, 18)} | ||
| 251 | - ] | ||
| 252 | - elif agg_name == 'brand_name': | ||
| 253 | - buckets = [ | ||
| 254 | - {"key": "美泰", "doc_count": random.randint(20, 40)}, | ||
| 255 | - {"key": "乐高", "doc_count": random.randint(10, 25)}, | ||
| 256 | - {"key": "迪士尼", "doc_count": random.randint(8, 20)}, | ||
| 257 | - {"key": "多美卡", "doc_count": random.randint(5, 15)}, | ||
| 258 | - {"key": "孩之宝", "doc_count": random.randint(6, 18)}, | ||
| 259 | - {"key": "万代", "doc_count": random.randint(3, 10)} | ||
| 260 | - ] | ||
| 261 | - elif agg_name == 'material_type': | ||
| 262 | - buckets = [ | ||
| 263 | - {"key": "塑料", "doc_count": random.randint(40, 80)}, | ||
| 264 | - {"key": "布绒", "doc_count": random.randint(8, 20)}, | ||
| 265 | - {"key": "金属", "doc_count": random.randint(5, 15)}, | ||
| 266 | - {"key": "木质", "doc_count": random.randint(3, 12)} | ||
| 267 | - ] | ||
| 268 | - else: | ||
| 269 | - # Generic terms aggregation | ||
| 270 | - buckets = [ | ||
| 271 | - {"key": f"选项{i+1}", "doc_count": random.randint(5, 25)} | ||
| 272 | - for i in range(5) | ||
| 273 | - ] | ||
| 274 | - | ||
| 275 | - result[agg_name] = { | ||
| 276 | - "doc_count_error_upper_bound": 0, | ||
| 277 | - "sum_other_doc_count": random.randint(10, 50), | ||
| 278 | - "buckets": buckets | ||
| 279 | - } | ||
| 280 | - | ||
| 281 | - elif agg_type == 'range': | ||
| 282 | - # Generate mock range aggregation (usually for price) | ||
| 283 | - if agg_name == 'price_ranges': | ||
| 284 | - ranges = agg_spec.get('ranges', []) | ||
| 285 | - buckets = [] | ||
| 286 | - for range_spec in ranges: | ||
| 287 | - key = range_spec.get('key', 'unknown') | ||
| 288 | - count = random.randint(5, 30) | ||
| 289 | - bucket_data = {"key": key, "doc_count": count} | ||
| 290 | - | ||
| 291 | - # Add range bounds | ||
| 292 | - if 'to' in range_spec: | ||
| 293 | - bucket_data['to'] = range_spec['to'] | ||
| 294 | - if 'from' in range_spec: | ||
| 295 | - bucket_data['from'] = range_spec['from'] | ||
| 296 | - | ||
| 297 | - buckets.append(bucket_data) | ||
| 298 | - | ||
| 299 | - result[agg_name] = {"buckets": buckets} | ||
| 300 | - | ||
| 301 | - return result | ||
| 302 | - | ||
| 303 | - def log_message(self, format, *args): | ||
| 304 | - """Override to reduce log noise.""" | ||
| 305 | - pass | ||
| 306 | - | ||
| 307 | -def run_server(): | ||
| 308 | - """Run the API server.""" | ||
| 309 | - server_address = ('', 6002) | ||
| 310 | - httpd = HTTPServer(server_address, SearchAPIHandler) | ||
| 311 | - print("🚀 Simple Search API Server started!") | ||
| 312 | - print("📍 API: http://localhost:6002") | ||
| 313 | - print("🔍 Search endpoint: http://localhost:6002/search/") | ||
| 314 | - print("🌐 Frontend should connect to: http://localhost:6002") | ||
| 315 | - print("⏹️ Press Ctrl+C to stop") | ||
| 316 | - | ||
| 317 | - try: | ||
| 318 | - httpd.serve_forever() | ||
| 319 | - except KeyboardInterrupt: | ||
| 320 | - print("\n🛑 Server stopped") | ||
| 321 | - httpd.server_close() | ||
| 322 | - | ||
| 323 | -def run_server(): | ||
| 324 | - """Run the API server - main entry point.""" | ||
| 325 | - server_address = ('', 6002) | ||
| 326 | - httpd = HTTPServer(server_address, SearchAPIHandler) | ||
| 327 | - print("🚀 Simple Search API Server started!") | ||
| 328 | - print("📍 API: http://localhost:6002") | ||
| 329 | - print("🔍 Search endpoint: http://localhost:6002/search/") | ||
| 330 | - print("🌐 Frontend should connect to: http://localhost:6002") | ||
| 331 | - print("⏹️ Press Ctrl+C to stop") | ||
| 332 | - | ||
| 333 | - try: | ||
| 334 | - httpd.serve_forever() | ||
| 335 | - except KeyboardInterrupt: | ||
| 336 | - print("\n🛑 Server stopped") | ||
| 337 | - httpd.server_close() | ||
| 338 | - | ||
| 339 | -if __name__ == '__main__': | ||
| 340 | - run_server() | ||
| 341 | \ No newline at end of file | 0 | \ No newline at end of file |
test_aggregation_api.py deleted
| @@ -1,166 +0,0 @@ | @@ -1,166 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -Test script for aggregation functionality | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import requests | ||
| 7 | -import json | ||
| 8 | - | ||
| 9 | -API_BASE_URL = 'http://120.76.41.98:6002' | ||
| 10 | - | ||
| 11 | -def test_search_with_aggregations(): | ||
| 12 | - """Test search with aggregations""" | ||
| 13 | - | ||
| 14 | - # Test data | ||
| 15 | - test_query = { | ||
| 16 | - "query": "玩具", | ||
| 17 | - "size": 5, | ||
| 18 | - "aggregations": { | ||
| 19 | - "category_stats": { | ||
| 20 | - "terms": { | ||
| 21 | - "field": "categoryName_keyword", | ||
| 22 | - "size": 10 | ||
| 23 | - } | ||
| 24 | - }, | ||
| 25 | - "brand_stats": { | ||
| 26 | - "terms": { | ||
| 27 | - "field": "brandName_keyword", | ||
| 28 | - "size": 10 | ||
| 29 | - } | ||
| 30 | - }, | ||
| 31 | - "price_ranges": { | ||
| 32 | - "range": { | ||
| 33 | - "field": "price", | ||
| 34 | - "ranges": [ | ||
| 35 | - {"key": "0-50", "to": 50}, | ||
| 36 | - {"key": "50-100", "from": 50, "to": 100}, | ||
| 37 | - {"key": "100-200", "from": 100, "to": 200}, | ||
| 38 | - {"key": "200+", "from": 200} | ||
| 39 | - ] | ||
| 40 | - } | ||
| 41 | - } | ||
| 42 | - } | ||
| 43 | - } | ||
| 44 | - | ||
| 45 | - print("Testing search with aggregations...") | ||
| 46 | - print(f"Query: {json.dumps(test_query, indent=2, ensure_ascii=False)}") | ||
| 47 | - | ||
| 48 | - try: | ||
| 49 | - response = requests.post(f"{API_BASE_URL}/search/", | ||
| 50 | - json=test_query, | ||
| 51 | - headers={'Content-Type': 'application/json'}) | ||
| 52 | - | ||
| 53 | - print(f"Status Code: {response.status_code}") | ||
| 54 | - | ||
| 55 | - if response.ok: | ||
| 56 | - data = response.json() | ||
| 57 | - print(f"Found {data['total']} results in {data['took_ms']}ms") | ||
| 58 | - print(f"Max Score: {data['max_score']}") | ||
| 59 | - | ||
| 60 | - # Print aggregations | ||
| 61 | - if data.get('aggregations'): | ||
| 62 | - print("\nAggregations:") | ||
| 63 | - for agg_name, agg_result in data['aggregations'].items(): | ||
| 64 | - print(f"\n{agg_name}:") | ||
| 65 | - if 'buckets' in agg_result: | ||
| 66 | - for bucket in agg_result['buckets'][:5]: # Show first 5 buckets | ||
| 67 | - print(f" - {bucket['key']}: {bucket['doc_count']}") | ||
| 68 | - | ||
| 69 | - # Print first few results | ||
| 70 | - print(f"\nFirst 3 results:") | ||
| 71 | - for i, hit in enumerate(data['hits'][:3]): | ||
| 72 | - source = hit['_source'] | ||
| 73 | - print(f"\n{i+1}. {source.get('name', 'N/A')}") | ||
| 74 | - print(f" Category: {source.get('categoryName', 'N/A')}") | ||
| 75 | - print(f" Brand: {source.get('brandName', 'N/A')}") | ||
| 76 | - print(f" Price: {source.get('price', 'N/A')}") | ||
| 77 | - print(f" Score: {hit['_score']:.4f}") | ||
| 78 | - else: | ||
| 79 | - print(f"Error: {response.status_code}") | ||
| 80 | - print(f"Response: {response.text}") | ||
| 81 | - | ||
| 82 | - except Exception as e: | ||
| 83 | - print(f"Request failed: {e}") | ||
| 84 | - | ||
| 85 | -def test_search_with_filters(): | ||
| 86 | - """Test search with filters""" | ||
| 87 | - | ||
| 88 | - test_filters = { | ||
| 89 | - "query": "玩具", | ||
| 90 | - "size": 5, | ||
| 91 | - "filters": { | ||
| 92 | - "categoryName_keyword": ["玩具"], | ||
| 93 | - "price_ranges": ["0-50", "50-100"] | ||
| 94 | - } | ||
| 95 | - } | ||
| 96 | - | ||
| 97 | - print("\n\nTesting search with filters...") | ||
| 98 | - print(f"Query: {json.dumps(test_filters, indent=2, ensure_ascii=False)}") | ||
| 99 | - | ||
| 100 | - try: | ||
| 101 | - response = requests.post(f"{API_BASE_URL}/search/", | ||
| 102 | - json=test_filters, | ||
| 103 | - headers={'Content-Type': 'application/json'}) | ||
| 104 | - | ||
| 105 | - print(f"Status Code: {response.status_code}") | ||
| 106 | - | ||
| 107 | - if response.ok: | ||
| 108 | - data = response.json() | ||
| 109 | - print(f"Found {data['total']} results in {data['took_ms']}ms") | ||
| 110 | - | ||
| 111 | - print(f"\nFirst 3 results:") | ||
| 112 | - for i, hit in enumerate(data['hits'][:3]): | ||
| 113 | - source = hit['_source'] | ||
| 114 | - print(f"\n{i+1}. {source.get('name', 'N/A')}") | ||
| 115 | - print(f" Category: {source.get('categoryName', 'N/A')}") | ||
| 116 | - print(f" Brand: {source.get('brandName', 'N/A')}") | ||
| 117 | - print(f" Price: {source.get('price', 'N/A')}") | ||
| 118 | - print(f" Score: {hit['_score']:.4f}") | ||
| 119 | - else: | ||
| 120 | - print(f"Error: {response.status_code}") | ||
| 121 | - print(f"Response: {response.text}") | ||
| 122 | - | ||
| 123 | - except Exception as e: | ||
| 124 | - print(f"Request failed: {e}") | ||
| 125 | - | ||
| 126 | -def test_search_with_sorting(): | ||
| 127 | - """Test search with sorting""" | ||
| 128 | - | ||
| 129 | - test_sort = { | ||
| 130 | - "query": "玩具", | ||
| 131 | - "size": 5, | ||
| 132 | - "sort_by": "price", | ||
| 133 | - "sort_order": "asc" | ||
| 134 | - } | ||
| 135 | - | ||
| 136 | - print("\n\nTesting search with sorting (price ascending)...") | ||
| 137 | - print(f"Query: {json.dumps(test_sort, indent=2, ensure_ascii=False)}") | ||
| 138 | - | ||
| 139 | - try: | ||
| 140 | - response = requests.post(f"{API_BASE_URL}/search/", | ||
| 141 | - json=test_sort, | ||
| 142 | - headers={'Content-Type': 'application/json'}) | ||
| 143 | - | ||
| 144 | - print(f"Status Code: {response.status_code}") | ||
| 145 | - | ||
| 146 | - if response.ok: | ||
| 147 | - data = response.json() | ||
| 148 | - print(f"Found {data['total']} results in {data['took_ms']}ms") | ||
| 149 | - | ||
| 150 | - print(f"\nFirst 3 results (sorted by price):") | ||
| 151 | - for i, hit in enumerate(data['hits'][:3]): | ||
| 152 | - source = hit['_source'] | ||
| 153 | - print(f"\n{i+1}. {source.get('name', 'N/A')}") | ||
| 154 | - print(f" Price: {source.get('price', 'N/A')}") | ||
| 155 | - print(f" Score: {hit['_score']:.4f}") | ||
| 156 | - else: | ||
| 157 | - print(f"Error: {response.status_code}") | ||
| 158 | - print(f"Response: {response.text}") | ||
| 159 | - | ||
| 160 | - except Exception as e: | ||
| 161 | - print(f"Request failed: {e}") | ||
| 162 | - | ||
| 163 | -if __name__ == "__main__": | ||
| 164 | - test_search_with_aggregations() | ||
| 165 | - test_search_with_filters() | ||
| 166 | - test_search_with_sorting() | ||
| 167 | \ No newline at end of file | 0 | \ No newline at end of file |
test_aggregation_functionality.py deleted
| @@ -1,236 +0,0 @@ | @@ -1,236 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -Simple test script to verify aggregation functionality without external dependencies. | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import sys | ||
| 7 | -import os | ||
| 8 | -import inspect | ||
| 9 | - | ||
| 10 | -# Add the project root to the Python path | ||
| 11 | -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | ||
| 12 | - | ||
| 13 | -def test_es_query_builder_aggregations(): | ||
| 14 | - """Test the ES query builder aggregation methods.""" | ||
| 15 | - print("Testing ES Query Builder Aggregation Methods...") | ||
| 16 | - | ||
| 17 | - # Import the query builder | ||
| 18 | - try: | ||
| 19 | - from search.es_query_builder import ESQueryBuilder | ||
| 20 | - print("✓ ESQueryBuilder imported successfully") | ||
| 21 | - except ImportError as e: | ||
| 22 | - print(f"✗ Failed to import ESQueryBuilder: {e}") | ||
| 23 | - return False | ||
| 24 | - | ||
| 25 | - # Create a query builder instance | ||
| 26 | - builder = ESQueryBuilder( | ||
| 27 | - index_name="test_index", | ||
| 28 | - match_fields=["name", "description"] | ||
| 29 | - ) | ||
| 30 | - | ||
| 31 | - # Test basic aggregation | ||
| 32 | - es_query = {"query": {"match_all": {}}} | ||
| 33 | - | ||
| 34 | - # Test add_dynamic_aggregations | ||
| 35 | - aggregations = { | ||
| 36 | - "category_name": { | ||
| 37 | - "type": "terms", | ||
| 38 | - "field": "categoryName_keyword", | ||
| 39 | - "size": 10 | ||
| 40 | - }, | ||
| 41 | - "price_ranges": { | ||
| 42 | - "type": "range", | ||
| 43 | - "field": "price", | ||
| 44 | - "ranges": [ | ||
| 45 | - {"key": "0-50", "to": 50}, | ||
| 46 | - {"key": "50-100", "from": 50, "to": 100} | ||
| 47 | - ] | ||
| 48 | - } | ||
| 49 | - } | ||
| 50 | - | ||
| 51 | - result_query = builder.add_dynamic_aggregations(es_query, aggregations) | ||
| 52 | - | ||
| 53 | - if "aggs" in result_query: | ||
| 54 | - print("✓ Aggregations added to query") | ||
| 55 | - | ||
| 56 | - # Check category aggregation | ||
| 57 | - if "category_name" in result_query["aggs"]: | ||
| 58 | - category_agg = result_query["aggs"]["category_name"] | ||
| 59 | - if "terms" in category_agg and category_agg["terms"]["field"] == "categoryName_keyword": | ||
| 60 | - print("✓ Category aggregation correctly configured") | ||
| 61 | - else: | ||
| 62 | - print("✗ Category aggregation incorrectly configured") | ||
| 63 | - return False | ||
| 64 | - | ||
| 65 | - # Check price range aggregation | ||
| 66 | - if "price_ranges" in result_query["aggs"]: | ||
| 67 | - price_agg = result_query["aggs"]["price_ranges"] | ||
| 68 | - if "range" in price_agg and price_agg["range"]["field"] == "price": | ||
| 69 | - print("✓ Price range aggregation correctly configured") | ||
| 70 | - else: | ||
| 71 | - print("✗ Price range aggregation incorrectly configured") | ||
| 72 | - return False | ||
| 73 | - else: | ||
| 74 | - print("✗ No aggregations added to query") | ||
| 75 | - return False | ||
| 76 | - | ||
| 77 | - # Test sorting | ||
| 78 | - result_query_asc = builder.add_sorting({}, "price_asc") | ||
| 79 | - if "sort" in result_query_asc: | ||
| 80 | - print("✓ Price ascending sort added") | ||
| 81 | - else: | ||
| 82 | - print("✗ Price ascending sort not added") | ||
| 83 | - return False | ||
| 84 | - | ||
| 85 | - result_query_desc = builder.add_sorting({}, "price_desc") | ||
| 86 | - if "sort" in result_query_desc: | ||
| 87 | - print("✓ Price descending sort added") | ||
| 88 | - else: | ||
| 89 | - print("✗ Price descending sort not added") | ||
| 90 | - return False | ||
| 91 | - | ||
| 92 | - result_query_time = builder.add_sorting({}, "time_desc") | ||
| 93 | - if "sort" in result_query_time: | ||
| 94 | - print("✓ Time descending sort added") | ||
| 95 | - else: | ||
| 96 | - print("✗ Time descending sort not added") | ||
| 97 | - return False | ||
| 98 | - | ||
| 99 | - return True | ||
| 100 | - | ||
| 101 | - | ||
| 102 | -def test_searcher_integration(): | ||
| 103 | - """Test searcher integration with new parameters.""" | ||
| 104 | - print("\nTesting Searcher Integration...") | ||
| 105 | - | ||
| 106 | - try: | ||
| 107 | - from search.searcher import Searcher | ||
| 108 | - print("✓ Searcher imported successfully") | ||
| 109 | - except ImportError as e: | ||
| 110 | - print(f"✗ Failed to import Searcher: {e}") | ||
| 111 | - return False | ||
| 112 | - | ||
| 113 | - # We can't easily test the full searcher without ES, but we can check the method signature | ||
| 114 | - search_method = getattr(Searcher, 'search', None) | ||
| 115 | - | ||
| 116 | - if search_method: | ||
| 117 | - sig = inspect.signature(search_method) | ||
| 118 | - params = list(sig.parameters.keys()) | ||
| 119 | - | ||
| 120 | - expected_params = ['query', 'size', 'from_', 'filters', 'min_score', 'aggregations', 'sort_by', 'context'] | ||
| 121 | - for param in expected_params: | ||
| 122 | - if param in params: | ||
| 123 | - print(f"✓ Parameter '{param}' found in search method") | ||
| 124 | - else: | ||
| 125 | - print(f"✗ Parameter '{param}' missing from search method") | ||
| 126 | - return False | ||
| 127 | - else: | ||
| 128 | - print("✗ Search method not found in Searcher class") | ||
| 129 | - return False | ||
| 130 | - | ||
| 131 | - return True | ||
| 132 | - | ||
| 133 | - | ||
| 134 | -def test_api_route_integration(): | ||
| 135 | - """Test API route integration.""" | ||
| 136 | - print("\nTesting API Route Integration...") | ||
| 137 | - | ||
| 138 | - try: | ||
| 139 | - from api.routes.search import router | ||
| 140 | - print("✓ Search router imported successfully") | ||
| 141 | - except ImportError as e: | ||
| 142 | - print(f"✗ Failed to import search router: {e}") | ||
| 143 | - return False | ||
| 144 | - | ||
| 145 | - # Check if the route exists | ||
| 146 | - routes = [route.path for route in router.routes] | ||
| 147 | - if "/" in routes: | ||
| 148 | - print("✓ Main search route found") | ||
| 149 | - else: | ||
| 150 | - print("✗ Main search route not found") | ||
| 151 | - return False | ||
| 152 | - | ||
| 153 | - return True | ||
| 154 | - | ||
| 155 | - | ||
| 156 | -def test_configuration(): | ||
| 157 | - """Test configuration parsing.""" | ||
| 158 | - print("\nTesting Configuration...") | ||
| 159 | - | ||
| 160 | - try: | ||
| 161 | - from config import CustomerConfig | ||
| 162 | - print("✓ CustomerConfig imported successfully") | ||
| 163 | - except ImportError as e: | ||
| 164 | - print(f"✗ Failed to import CustomerConfig: {e}") | ||
| 165 | - return False | ||
| 166 | - | ||
| 167 | - # Try to load the customer1 config | ||
| 168 | - try: | ||
| 169 | - config = CustomerConfig.load_from_file("config/schema/customer1_config.yaml") | ||
| 170 | - print("✓ Customer1 configuration loaded successfully") | ||
| 171 | - | ||
| 172 | - # Check if price field is in the configuration | ||
| 173 | - field_names = [field.name for field in config.fields] | ||
| 174 | - if "price" in field_names: | ||
| 175 | - print("✓ Price field found in configuration") | ||
| 176 | - else: | ||
| 177 | - print("✗ Price field not found in configuration") | ||
| 178 | - return False | ||
| 179 | - | ||
| 180 | - # Check keyword fields for aggregations | ||
| 181 | - if "categoryName_keyword" in field_names: | ||
| 182 | - print("✓ Category keyword field found") | ||
| 183 | - else: | ||
| 184 | - print("✗ Category keyword field not found") | ||
| 185 | - return False | ||
| 186 | - | ||
| 187 | - if "brandName_keyword" in field_names: | ||
| 188 | - print("✓ Brand keyword field found") | ||
| 189 | - else: | ||
| 190 | - print("✗ Brand keyword field not found") | ||
| 191 | - return False | ||
| 192 | - | ||
| 193 | - except Exception as e: | ||
| 194 | - print(f"✗ Failed to load configuration: {e}") | ||
| 195 | - return False | ||
| 196 | - | ||
| 197 | - return True | ||
| 198 | - | ||
| 199 | - | ||
| 200 | -def main(): | ||
| 201 | - """Run all tests.""" | ||
| 202 | - print("=== Search Engine Aggregation Functionality Tests ===\n") | ||
| 203 | - | ||
| 204 | - tests = [ | ||
| 205 | - test_es_query_builder_aggregations, | ||
| 206 | - test_searcher_integration, | ||
| 207 | - test_api_route_integration, | ||
| 208 | - test_configuration | ||
| 209 | - ] | ||
| 210 | - | ||
| 211 | - passed = 0 | ||
| 212 | - total = len(tests) | ||
| 213 | - | ||
| 214 | - for test in tests: | ||
| 215 | - try: | ||
| 216 | - if test(): | ||
| 217 | - passed += 1 | ||
| 218 | - print(f"✓ {test.__name__} PASSED") | ||
| 219 | - else: | ||
| 220 | - print(f"✗ {test.__name__} FAILED") | ||
| 221 | - except Exception as e: | ||
| 222 | - print(f"✗ {test.__name__} ERROR: {e}") | ||
| 223 | - | ||
| 224 | - print(f"\n=== Test Results: {passed}/{total} tests passed ===") | ||
| 225 | - | ||
| 226 | - if passed == total: | ||
| 227 | - print("🎉 All tests passed! Aggregation functionality is ready.") | ||
| 228 | - return True | ||
| 229 | - else: | ||
| 230 | - print("❌ Some tests failed. Please check the implementation.") | ||
| 231 | - return False | ||
| 232 | - | ||
| 233 | - | ||
| 234 | -if __name__ == "__main__": | ||
| 235 | - success = main() | ||
| 236 | - sys.exit(0 if success else 1) | ||
| 237 | \ No newline at end of file | 0 | \ No newline at end of file |
test_complete_search.py deleted
| @@ -1,211 +0,0 @@ | @@ -1,211 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -Complete test script simulating frontend search interaction | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import requests | ||
| 7 | -import json | ||
| 8 | - | ||
| 9 | -API_BASE_URL = 'http://120.76.41.98:6002' | ||
| 10 | - | ||
| 11 | -def test_complete_search_workflow(): | ||
| 12 | - """Test complete search workflow similar to frontend""" | ||
| 13 | - | ||
| 14 | - print("=" * 60) | ||
| 15 | - print("完整搜索流程测试") | ||
| 16 | - print("=" * 60) | ||
| 17 | - | ||
| 18 | - # Step 1: Initial search with aggregations | ||
| 19 | - print("\n1️⃣ 初始搜索(带聚合功能)") | ||
| 20 | - print("-" * 30) | ||
| 21 | - | ||
| 22 | - search_request = { | ||
| 23 | - "query": "芭比娃娃", | ||
| 24 | - "size": 10, | ||
| 25 | - "aggregations": { | ||
| 26 | - "category_stats": { | ||
| 27 | - "terms": { | ||
| 28 | - "field": "categoryName_keyword", | ||
| 29 | - "size": 10 | ||
| 30 | - } | ||
| 31 | - }, | ||
| 32 | - "brand_stats": { | ||
| 33 | - "terms": { | ||
| 34 | - "field": "brandName_keyword", | ||
| 35 | - "size": 10 | ||
| 36 | - } | ||
| 37 | - }, | ||
| 38 | - "price_ranges": { | ||
| 39 | - "range": { | ||
| 40 | - "field": "price", | ||
| 41 | - "ranges": [ | ||
| 42 | - {"key": "0-50", "to": 50}, | ||
| 43 | - {"key": "50-100", "from": 50, "to": 100}, | ||
| 44 | - {"key": "100-200", "from": 100, "to": 200}, | ||
| 45 | - {"key": "200+", "from": 200} | ||
| 46 | - ] | ||
| 47 | - } | ||
| 48 | - } | ||
| 49 | - } | ||
| 50 | - } | ||
| 51 | - | ||
| 52 | - try: | ||
| 53 | - response = requests.post(f"{API_BASE_URL}/search/", json=search_request) | ||
| 54 | - | ||
| 55 | - if response.ok: | ||
| 56 | - data = response.json() | ||
| 57 | - print(f"✅ 找到 {data['total']} 个结果,耗时 {data['took_ms']}ms") | ||
| 58 | - | ||
| 59 | - # Show aggregations results | ||
| 60 | - if data.get('aggregations'): | ||
| 61 | - print("\n📊 聚合结果:") | ||
| 62 | - | ||
| 63 | - # Category aggregations | ||
| 64 | - if 'category_stats' in data['aggregations']: | ||
| 65 | - print(" 🏷️ 分类统计:") | ||
| 66 | - for bucket in data['aggregations']['category_stats']['buckets'][:3]: | ||
| 67 | - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") | ||
| 68 | - | ||
| 69 | - # Brand aggregations | ||
| 70 | - if 'brand_stats' in data['aggregations']: | ||
| 71 | - print(" 🏢 品牌统计:") | ||
| 72 | - for bucket in data['aggregations']['brand_stats']['buckets'][:3]: | ||
| 73 | - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") | ||
| 74 | - | ||
| 75 | - # Price ranges | ||
| 76 | - if 'price_ranges' in data['aggregations']: | ||
| 77 | - print(" 💰 价格分布:") | ||
| 78 | - for bucket in data['aggregations']['price_ranges']['buckets']: | ||
| 79 | - print(f" - {bucket['key']}: {bucket['doc_count']} 个商品") | ||
| 80 | - | ||
| 81 | - # Show sample results | ||
| 82 | - print(f"\n🔍 前3个搜索结果:") | ||
| 83 | - for i, hit in enumerate(data['hits'][:3]): | ||
| 84 | - source = hit['_source'] | ||
| 85 | - price = source.get('price', 'N/A') | ||
| 86 | - category = source.get('categoryName', 'N/A') | ||
| 87 | - brand = source.get('brandName', 'N/A') | ||
| 88 | - print(f" {i+1}. {source.get('name', 'N/A')}") | ||
| 89 | - print(f" 💰 价格: {price}") | ||
| 90 | - print(f" 📁 分类: {category}") | ||
| 91 | - print(f" 🏷️ 品牌: {brand}") | ||
| 92 | - print(f" ⭐ 评分: {hit['_score']:.3f}") | ||
| 93 | - print() | ||
| 94 | - | ||
| 95 | - else: | ||
| 96 | - print(f"❌ 搜索失败: {response.status_code}") | ||
| 97 | - print(f"错误信息: {response.text}") | ||
| 98 | - | ||
| 99 | - except Exception as e: | ||
| 100 | - print(f"❌ 请求异常: {e}") | ||
| 101 | - | ||
| 102 | - # Step 2: Search with filters | ||
| 103 | - print("\n2️⃣ 带过滤条件的搜索") | ||
| 104 | - print("-" * 30) | ||
| 105 | - | ||
| 106 | - filtered_search = { | ||
| 107 | - "query": "芭比娃娃", | ||
| 108 | - "size": 5, | ||
| 109 | - "filters": { | ||
| 110 | - "brandName_keyword": ["美泰"], | ||
| 111 | - "price_ranges": ["50-100", "100-200"] | ||
| 112 | - } | ||
| 113 | - } | ||
| 114 | - | ||
| 115 | - try: | ||
| 116 | - response = requests.post(f"{API_BASE_URL}/search/", json=filtered_search) | ||
| 117 | - | ||
| 118 | - if response.ok: | ||
| 119 | - data = response.json() | ||
| 120 | - print(f"✅ 过滤后找到 {data['total']} 个结果,耗时 {data['took_ms']}ms") | ||
| 121 | - print(" 🎯 过滤条件: 品牌=美泰, 价格=¥50-200") | ||
| 122 | - | ||
| 123 | - print(f"\n💫 前3个过滤结果:") | ||
| 124 | - for i, hit in enumerate(data['hits'][:3]): | ||
| 125 | - source = hit['_source'] | ||
| 126 | - price = source.get('price', 'N/A') | ||
| 127 | - category = source.get('categoryName', 'N/A') | ||
| 128 | - brand = source.get('brandName', 'N/A') | ||
| 129 | - print(f" {i+1}. {source.get('name', 'N/A')}") | ||
| 130 | - print(f" 💰 ¥{price} | 📁 {category} | 🏷️ {brand}") | ||
| 131 | - print(f" ⭐ 评分: {hit['_score']:.3f}") | ||
| 132 | - | ||
| 133 | - else: | ||
| 134 | - print(f"❌ 过滤搜索失败: {response.status_code}") | ||
| 135 | - | ||
| 136 | - except Exception as e: | ||
| 137 | - print(f"❌ 请求异常: {e}") | ||
| 138 | - | ||
| 139 | - # Step 3: Search with sorting | ||
| 140 | - print("\n3️⃣ 排序搜索") | ||
| 141 | - print("-" * 30) | ||
| 142 | - | ||
| 143 | - # Test price ascending | ||
| 144 | - price_asc_search = { | ||
| 145 | - "query": "芭比娃娃", | ||
| 146 | - "size": 3, | ||
| 147 | - "sort_by": "price", | ||
| 148 | - "sort_order": "asc" | ||
| 149 | - } | ||
| 150 | - | ||
| 151 | - try: | ||
| 152 | - response = requests.post(f"{API_BASE_URL}/search/", json=price_asc_search) | ||
| 153 | - | ||
| 154 | - if response.ok: | ||
| 155 | - data = response.json() | ||
| 156 | - print(f"✅ 价格升序排序,找到 {data['total']} 个结果") | ||
| 157 | - print(" 📈 排序方式: 价格从低到高") | ||
| 158 | - | ||
| 159 | - print(f"\n💵 价格排序结果:") | ||
| 160 | - for i, hit in enumerate(data['hits']): | ||
| 161 | - source = hit['_source'] | ||
| 162 | - price = source.get('price', 'N/A') | ||
| 163 | - name = source.get('name', 'N/A') | ||
| 164 | - print(f" {i+1}. ¥{price} - {name}") | ||
| 165 | - | ||
| 166 | - else: | ||
| 167 | - print(f"❌ 排序搜索失败: {response.status_code}") | ||
| 168 | - | ||
| 169 | - except Exception as e: | ||
| 170 | - print(f"❌ 请求异常: {e}") | ||
| 171 | - | ||
| 172 | - # Step 4: Test time sorting | ||
| 173 | - print("\n4️⃣ 时间排序测试") | ||
| 174 | - print("-" * 30) | ||
| 175 | - | ||
| 176 | - time_sort_search = { | ||
| 177 | - "query": "芭比娃娃", | ||
| 178 | - "size": 3, | ||
| 179 | - "sort_by": "create_time", | ||
| 180 | - "sort_order": "desc" | ||
| 181 | - } | ||
| 182 | - | ||
| 183 | - try: | ||
| 184 | - response = requests.post(f"{API_BASE_URL}/search/", json=time_sort_search) | ||
| 185 | - | ||
| 186 | - if response.ok: | ||
| 187 | - data = response.json() | ||
| 188 | - print(f"✅ 时间降序排序,找到 {data['total']} 个结果") | ||
| 189 | - print(" 📅 排序方式: 上架时间从新到旧") | ||
| 190 | - | ||
| 191 | - print(f"\n🕐 时间排序结果:") | ||
| 192 | - for i, hit in enumerate(data['hits']): | ||
| 193 | - source = hit['_source'] | ||
| 194 | - create_time = source.get('create_time', 'N/A') | ||
| 195 | - name = source.get('name', 'N/A') | ||
| 196 | - print(f" {i+1}. {create_time} - {name}") | ||
| 197 | - | ||
| 198 | - else: | ||
| 199 | - print(f"❌ 时间排序失败: {response.status_code}") | ||
| 200 | - | ||
| 201 | - except Exception as e: | ||
| 202 | - print(f"❌ 请求异常: {e}") | ||
| 203 | - | ||
| 204 | - print("\n" + "=" * 60) | ||
| 205 | - print("🎉 搜索功能测试完成!") | ||
| 206 | - print("✨ 前端访问地址: http://localhost:8080") | ||
| 207 | - print("🔧 后端API地址: http://120.76.41.98:6002") | ||
| 208 | - print("=" * 60) | ||
| 209 | - | ||
| 210 | -if __name__ == "__main__": | ||
| 211 | - test_complete_search_workflow() | ||
| 212 | \ No newline at end of file | 0 | \ No newline at end of file |
test_minimal_sort.py deleted
| @@ -1,45 +0,0 @@ | @@ -1,45 +0,0 @@ | ||
| 1 | -#!/usr/bin/env python3 | ||
| 2 | -""" | ||
| 3 | -Minimal test to isolate sort issue | ||
| 4 | -""" | ||
| 5 | - | ||
| 6 | -import requests | ||
| 7 | -import json | ||
| 8 | - | ||
| 9 | -def test_minimal_sort(): | ||
| 10 | - """Test minimal sort case""" | ||
| 11 | - | ||
| 12 | - base_url = "http://120.76.41.98:6002" | ||
| 13 | - | ||
| 14 | - # Test 1: No sort parameters | ||
| 15 | - print("Test 1: No sort parameters") | ||
| 16 | - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1}) | ||
| 17 | - print(f"Status: {response.status_code}") | ||
| 18 | - print(f"Response: {response.text[:200]}...") | ||
| 19 | - | ||
| 20 | - # Test 2: Empty sort_by | ||
| 21 | - print("\nTest 2: Empty sort_by") | ||
| 22 | - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": ""}) | ||
| 23 | - print(f"Status: {response.status_code}") | ||
| 24 | - print(f"Response: {response.text[:200]}...") | ||
| 25 | - | ||
| 26 | - # Test 3: sort_by only (no sort_order) | ||
| 27 | - print("\nTest 3: sort_by only") | ||
| 28 | - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": "create_time"}) | ||
| 29 | - print(f"Status: {response.status_code}") | ||
| 30 | - print(f"Response: {response.text[:200]}...") | ||
| 31 | - | ||
| 32 | - # Test 4: sort_order only (no sort_by) | ||
| 33 | - print("\nTest 4: sort_order only") | ||
| 34 | - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_order": "desc"}) | ||
| 35 | - print(f"Status: {response.status_code}") | ||
| 36 | - print(f"Response: {response.text[:200]}...") | ||
| 37 | - | ||
| 38 | - # Test 5: Both parameters with None values | ||
| 39 | - print("\nTest 5: Both parameters with null values") | ||
| 40 | - response = requests.post(f"{base_url}/search/", json={"query": "test", "size": 1, "sort_by": None, "sort_order": None}) | ||
| 41 | - print(f"Status: {response.status_code}") | ||
| 42 | - print(f"Response: {response.text[:200]}...") | ||
| 43 | - | ||
| 44 | -if __name__ == "__main__": | ||
| 45 | - test_minimal_sort() | ||
| 46 | \ No newline at end of file | 0 | \ No newline at end of file |
支持多语言查询.md
| @@ -18,6 +18,7 @@ index 397a9f7..3e728c9 100644 | @@ -18,6 +18,7 @@ index 397a9f7..3e728c9 100644 | ||
| 18 | 18 | ||
| 19 | 暂时具体实现为 bm25()+0.2*text_embedding_relevence(也就是knn检索表达式的打分) | 19 | 暂时具体实现为 bm25()+0.2*text_embedding_relevence(也就是knn检索表达式的打分) |
| 20 | +bm25() 包括多语言的打分:内部需要通过配置翻译为多种语言(配置几种目标语言 默认中文、英文,并且设置对应的检索域),然后分别到对应的字段搜索,中文字段到配置的中文title搜索,英文到对应的英文title搜索。 | 20 | +bm25() 包括多语言的打分:内部需要通过配置翻译为多种语言(配置几种目标语言 默认中文、英文,并且设置对应的检索域),然后分别到对应的字段搜索,中文字段到配置的中文title搜索,英文到对应的英文title搜索。 |
| 21 | +```text | ||
| 21 | bm25打分(base_query): | 22 | bm25打分(base_query): |
| 22 | "multi_match": { | 23 | "multi_match": { |
| 23 | "query": search_query, | 24 | "query": search_query, |
| @@ -186,11 +187,5 @@ index a7088ec..0a798ed 100644 | @@ -186,11 +187,5 @@ index a7088ec..0a798ed 100644 | ||
| 186 | def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: | 187 | def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: |
| 187 | """ | 188 | """ |
| 188 | Get single document by ID. | 189 | Get single document by ID. |
| 189 | - | ||
| 190 | - | ||
| 191 | - | ||
| 192 | - | ||
| 193 | - | ||
| 194 | - | ||
| 195 | - | 190 | +``` |
| 196 | 191 |