Compare View

switch
from
...
to
 
Commits (3)
  • 2. mysql->ES数据灌入脚本优化。修改了多个字段的处理方式,完善日志,为以后抽出来服务供java全量增量调用做准备
    tangwang
     
  • tangwang
     
  • {
      "facets": [
        {
          "field": "category1_name",
          "size": 15,
          "type": "terms"
        },
        "specifications.color",
        "specifications.size"
      ]
    }
    
    {
      "facets": [
        {"field": "category1_name", "size": 15, "type": "terms"},
        {"field": "specifications.color", "size": 10, "type": "terms"},
        {"field": "specifications.size", "size": 10, "type": "terms"}
      ]
    }
    
    之前是上面的接口形式,主要是考虑 属性的分面, 因为 款式都是有限的 不需要设定 "size": 10, "type": "terms" 这些参数。
    
    但是从接口设计层面,最好按下面这样,这样的话 specifications.color 和 category1_name 的组装格式 完全一样。前端不需要感知 属性分面 和 类别等其他字段分面的差异。
    tangwang
     
.cursor/plans/api-interface-analysis-42918612.plan.0.md
... ... @@ -632,7 +632,7 @@ def _standardize_facets(
632 632 # 构建标准化分面结果
633 633 facet = {
634 634 "field": field,
635   - "label": self._get_field_label(field), # 从配置获取
  635 + "label": field, # 从配置获取
636 636 "type": facet_type,
637 637 "values": []
638 638 }
... ...
.cursor/plans/api-interface-analysis-42918612.plan.1.md
... ... @@ -632,7 +632,7 @@ def _standardize_facets(
632 632 # 构建标准化分面结果
633 633 facet = {
634 634 "field": field,
635   - "label": self._get_field_label(field), # 从配置获取
  635 + "label": field, # 从配置获取
636 636 "type": facet_type,
637 637 "values": []
638 638 }
... ...
.cursor/plans/api-interface-analysis-42918612.plan.2.md
... ... @@ -632,7 +632,7 @@ def _standardize_facets(
632 632 # 构建标准化分面结果
633 633 facet = {
634 634 "field": field,
635   - "label": self._get_field_label(field), # 从配置获取
  635 + "label": field, # 从配置获取
636 636 "type": facet_type,
637 637 "values": []
638 638 }
... ...
.cursor/plans/api-interface-analysis-42918612.plan.3.最终执行.md
... ... @@ -632,7 +632,7 @@ def _standardize_facets(
632 632 # 构建标准化分面结果
633 633 facet = {
634 634 "field": field,
635   - "label": self._get_field_label(field),
  635 + "label": field,
636 636 "type": facet_type,
637 637 "values": []
638 638 }
... ...
activate.sh 0 → 100644
... ... @@ -0,0 +1,12 @@
  1 +#!/bin/bash
  2 +source /home/tw/miniconda3/etc/profile.d/conda.sh
  3 +conda activate searchengine
  4 +
  5 +# 如果需要加载 .env 中的环境变量
  6 +if [ -f .env ]; then
  7 + set -a # 自动导出所有变量
  8 + source <(grep -v '^#' .env | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//')
  9 + set +a # 关闭自动导出
  10 +fi
  11 +
  12 +echo "Environment activated: searchengine"
... ...
api/models.py
... ... @@ -67,7 +67,7 @@ class SearchRequest(BaseModel):
67 67  
68 68 # 基础搜索参数
69 69 query: str = Field(..., description="搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT)")
70   - size: int = Field(10, ge=1, le=100, description="返回结果数量")
  70 + size: int = Field(10, ge=1, le=1000, description="返回结果数量")
71 71 from_: int = Field(0, ge=0, alias="from", description="分页偏移量")
72 72 language: Literal["zh", "en"] = Field(
73 73 "zh",
... ... @@ -110,31 +110,34 @@ class SearchRequest(BaseModel):
110 110 )
111 111  
112 112 # 排序
113   - sort_by: Optional[str] = Field(None, description="排序字段名(如 'min_price', 'max_price', 'title')")
114   - sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)")
  113 + sort_by: Optional[str] = Field(None, description="排序字段名。支持:'price'(价格,自动根据sort_order选择min_price或max_price)、'sales'(销量)、'create_time'(创建时间)、'update_time'(更新时间)")
  114 + sort_order: Optional[str] = Field("desc", description="排序方向: 'asc'(升序)或 'desc'(降序)。注意:price+asc=价格从低到高,price+desc=价格从高到低")
115 115  
116   - # 分面搜索 - 简化接口
117   - facets: Optional[List[Union[str, FacetConfig]]] = Field(
  116 + # 分面搜索
  117 + facets: Optional[List[FacetConfig]] = Field(
118 118 None,
119   - description="分面配置。可以是字段名列表(使用默认配置)或详细的分面配置对象。支持 specifications 分面:\"specifications\"(所有name)或 \"specifications.color\"(指定name)",
  119 + description="分面配置对象列表。支持 specifications 分面:field=\"specifications\"(所有规格名称)或 field=\"specifications.color\"(指定规格名称)",
120 120 json_schema_extra={
121 121 "examples": [
122   - # 简单模式:只指定字段名,使用默认配置
123   - ["category1_name", "category2_name", "specifications"],
124   - # 指定specifications的某个name
125   - ["specifications.颜色", "specifications.尺寸"],
126   - # 高级模式:详细配置
127 122 [
128   - {"field": "category1_name", "size": 15},
  123 + {"field": "category1_name", "size": 15, "type": "terms"},
  124 + {"field": "category2_name", "size": 10, "type": "terms"},
  125 + {"field": "specifications.color", "size": 20, "type": "terms"},
  126 + {"field": "specifications.size", "size": 15, "type": "terms"}
  127 + ],
  128 + [
  129 + {"field": "category1_name", "size": 15, "type": "terms"},
129 130 {
130 131 "field": "min_price",
131 132 "type": "range",
132 133 "ranges": [
133 134 {"key": "0-50", "to": 50},
134   - {"key": "50-100", "from": 50, "to": 100}
  135 + {"key": "50-100", "from": 50, "to": 100},
  136 + {"key": "100-200", "from": 100, "to": 200},
  137 + {"key": "200+", "from": 200}
135 138 ]
136 139 },
137   - "specifications" # 所有specifications name的分面
  140 + {"field": "specifications", "size": 10, "type": "terms"}
138 141 ]
139 142 ]
140 143 }
... ...
docs/常用查询 - sql.sql
... ... @@ -361,4 +361,191 @@ SELECT
361 361 FROM shoplazza_product_option
362 362 WHERE tenant_id = 162 AND deleted = 0
363 363 GROUP BY position, name
364   -ORDER BY position, spu_count DESC;
365 364 \ No newline at end of file
  365 +ORDER BY position, spu_count DESC;
  366 +
  367 +
  368 +
  369 +-- ======================================
  370 +-- 10. SPU原始数据完整查询
  371 +-- ======================================
  372 +
  373 +-- 10.1 查询SPU表所有字段的原始数据
  374 +-- 用于全面检查数据质量
  375 +SELECT
  376 + id,
  377 + shop_id,
  378 + shoplazza_id,
  379 + handle,
  380 + title,
  381 + brief,
  382 + description,
  383 + spu,
  384 + vendor,
  385 + vendor_url,
  386 + seo_title,
  387 + seo_description,
  388 + seo_keywords,
  389 + image_src,
  390 + image_width,
  391 + image_height,
  392 + image_path,
  393 + image_alt,
  394 + inventory_policy,
  395 + inventory_quantity,
  396 + inventory_tracking,
  397 + published,
  398 + published_at,
  399 + requires_shipping,
  400 + taxable,
  401 + fake_sales,
  402 + display_fake_sales,
  403 + mixed_wholesale,
  404 + need_variant_image,
  405 + has_only_default_variant,
  406 + tags,
  407 + note,
  408 + category,
  409 + category_id,
  410 + category_google_id,
  411 + category_level,
  412 + category_path,
  413 + shoplazza_created_at,
  414 + shoplazza_updated_at,
  415 + tenant_id,
  416 + creator,
  417 + create_time,
  418 + updater,
  419 + update_time,
  420 + deleted
  421 +FROM shoplazza_product_spu
  422 +WHERE tenant_id = 162
  423 + AND deleted = 0
  424 +ORDER BY id DESC
  425 +LIMIT 10;
  426 +
  427 +-- 10.2 查询指定SPU的完整关联数据
  428 +-- 包含SPU、SKU、Option的完整信息
  429 +SELECT
  430 + '=== SPU基本信息 ===' AS section,
  431 + spu.id AS spu_id,
  432 + spu.title,
  433 + spu.category,
  434 + spu.category_path,
  435 + spu.category_id,
  436 + spu.category_level,
  437 + spu.vendor,
  438 + spu.tags,
  439 + NULL AS sku_id,
  440 + NULL AS option1,
  441 + NULL AS option2,
  442 + NULL AS option3,
  443 + NULL AS opt_position,
  444 + NULL AS opt_name
  445 +FROM shoplazza_product_spu spu
  446 +WHERE spu.id = 64001 -- 替换为实际的spu_id
  447 + AND spu.deleted = 0
  448 +
  449 +UNION ALL
  450 +
  451 +SELECT
  452 + '=== SKU信息 ===' AS section,
  453 + sku.spu_id,
  454 + NULL AS title,
  455 + NULL AS category,
  456 + NULL AS category_path,
  457 + NULL AS category_id,
  458 + NULL AS category_level,
  459 + NULL AS vendor,
  460 + NULL AS tags,
  461 + sku.id AS sku_id,
  462 + sku.option1,
  463 + sku.option2,
  464 + sku.option3,
  465 + NULL AS opt_position,
  466 + NULL AS opt_name
  467 +FROM shoplazza_product_sku sku
  468 +WHERE sku.spu_id = 64001 -- 替换为实际的spu_id
  469 + AND sku.deleted = 0
  470 +
  471 +UNION ALL
  472 +
  473 +SELECT
  474 + '=== Option定义 ===' AS section,
  475 + opt.spu_id,
  476 + NULL AS title,
  477 + NULL AS category,
  478 + NULL AS category_path,
  479 + NULL AS category_id,
  480 + NULL AS category_level,
  481 + NULL AS vendor,
  482 + NULL AS tags,
  483 + NULL AS sku_id,
  484 + NULL AS option1,
  485 + NULL AS option2,
  486 + NULL AS option3,
  487 + opt.position AS opt_position,
  488 + opt.name AS opt_name
  489 +FROM shoplazza_product_option opt
  490 +WHERE opt.spu_id = 64001 -- 替换为实际的spu_id
  491 + AND opt.deleted = 0;
  492 +
  493 +-- 10.3 导出SPU数据用于分析(简化版)
  494 +-- 适合导出到Excel进行分析
  495 +SELECT
  496 + id AS 'SPU_ID',
  497 + tenant_id AS '租户ID',
  498 + title AS '商品标题',
  499 + category AS '类目字段',
  500 + category_path AS '类目路径',
  501 + category_id AS '类目ID',
  502 + category_level AS '类目层级',
  503 + vendor AS '品牌',
  504 + tags AS '标签',
  505 + published AS '是否发布',
  506 + published_at AS '发布时间',
  507 + create_time AS '创建时间',
  508 + update_time AS '更新时间',
  509 + CASE
  510 + WHEN category REGEXP '^[0-9]+$' THEN 'YES-需要修复'
  511 + ELSE 'NO-正常'
  512 + END AS 'Category是数字ID'
  513 +FROM shoplazza_product_spu
  514 +WHERE tenant_id = 162
  515 + AND deleted = 0
  516 +ORDER BY id DESC
  517 +LIMIT 100;
  518 +
  519 +-- ======================================
  520 +-- 11. 租户数据概览
  521 +-- ======================================
  522 +
  523 +-- 11.1 所有租户的数据统计
  524 +SELECT
  525 + tenant_id,
  526 + COUNT(*) AS total_spu,
  527 + SUM(CASE WHEN category IS NOT NULL THEN 1 ELSE 0 END) AS has_category,
  528 + SUM(CASE WHEN category_path IS NOT NULL THEN 1 ELSE 0 END) AS has_category_path,
  529 + SUM(CASE WHEN category REGEXP '^[0-9]+$' THEN 1 ELSE 0 END) AS category_is_numeric,
  530 + MIN(create_time) AS earliest_data,
  531 + MAX(create_time) AS latest_data
  532 +FROM shoplazza_product_spu
  533 +WHERE deleted = 0
  534 +GROUP BY tenant_id
  535 +ORDER BY tenant_id;
  536 +
  537 +-- 11.2 查询某个租户的所有SPU(分页查看)
  538 +-- 用于逐条检查数据
  539 +SELECT
  540 + id,
  541 + title,
  542 + category,
  543 + category_path,
  544 + vendor,
  545 + tags,
  546 + create_time
  547 +FROM shoplazza_product_spu
  548 +WHERE tenant_id = 162
  549 + AND deleted = 0
  550 +ORDER BY id
  551 +LIMIT 50 OFFSET 0; -- 修改OFFSET查看不同页
  552 +
... ...
docs/店匠相关资料/SHOPLAZZA_INTEGRATION_GUIDE.md
... ... @@ -3111,18 +3111,6 @@ search:
3111 3111 url: http://localhost:6002
3112 3112 timeout: 30000
3113 3113  
3114   -# 向量服务配置
3115   -embedding:
3116   - service:
3117   - url: http://localhost:6003
3118   - timeout: 60000
3119   -
3120   -# Elasticsearch 配置
3121   -elasticsearch:
3122   - hosts: localhost:9200
3123   - username: elastic
3124   - password: changeme
3125   -
3126 3114 # 数据同步配置
3127 3115 sync:
3128 3116 enabled: true
... ... @@ -3133,67 +3121,6 @@ sync:
3133 3121 tenants: "0 0 4 * * ?" # 每天凌晨4点
3134 3122 ```
3135 3123  
3136   -### 12.4 故障排查
3137   -
3138   -#### 12.4.1 OAuth 认证失败
3139   -
3140   -**问题:** 授权回调时报错 "Invalid redirect_uri"
3141   -
3142   -**解决:**
3143   -1. 检查 Partner 后台配置的 Redirect URI 是否与代码中一致
3144   -2. 确保 Redirect URI 使用 HTTPS 协议
3145   -3. 确保 Redirect URI 可公网访问
3146   -
3147   -#### 12.4.2 Token 过期
3148   -
3149   -**问题:** API 调用返回 401 Unauthorized
3150   -
3151   -**解决:**
3152   -1. 检查数据库中的 `token_expires_at` 字段
3153   -2. 使用 Refresh Token 刷新 Access Token
3154   -3. 更新数据库中的 Token 信息
3155   -
3156   -#### 12.4.3 API 调用速率限制
3157   -
3158   -**问题:** API 返回 429 Too Many Requests
3159   -
3160   -**解决:**
3161   -1. 降低请求频率
3162   -2. 实现指数退避重试
3163   -3. 解析响应头中的 `X-RateLimit-Reset` 字段,等待到指定时间后再重试
3164   -
3165   -#### 12.4.4 Webhook 接收失败
3166   -
3167   -**问题:** Webhook 事件未收到或签名验证失败
3168   -
3169   -**解决:**
3170   -1. 检查 Webhook 地址是否可公网访问
3171   -2. 检查签名验证逻辑是否正确使用 Client Secret
3172   -3. 查看店匠后台的 Webhook 日志,确认发送状态
3173   -4. 确保 Webhook 处理在 3 秒内返回 200 响应
3174   -
3175   -#### 12.4.5 商品搜索无结果
3176   -
3177   -**问题:** 搜索返回空结果
3178   -
3179   -**解决:**
3180   -1. 检查 ES 索引是否存在:`GET /shoplazza_products_1/_count`
3181   -2. 检查商品是否已索引:`GET /shoplazza_products_1/_search`
3182   -3. 检查租户隔离参数是否正确
3183   -4. 查看搜索服务日志,确认查询语句
3184   -
3185   -#### 12.4.6 向量生成失败
3186   -
3187   -**问题:** 图片或文本向量生成失败
3188   -
3189   -**解决:**
3190   -1. 检查向量服务是否正常运行
3191   -2. 检查向量服务的 GPU/CPU 资源是否充足
3192   -3. 检查图片 URL 是否可访问
3193   -4. 查看向量服务日志
3194   -
3195   ----
3196   -
3197 3124 ## 13. 参考资料
3198 3125  
3199 3126 ### 13.1 官方文档
... ...
docs/店匠相关资料/店匠官方参考文档.md
... ... @@ -7,6 +7,19 @@
7 7  
8 8 店匠API文档: https://www.shoplazza.dev/reference/overview-23;
9 9 店匠应用市场: https://appstore.shoplazza.com/
  10 +通过主题装修集成的话,我们有相关的cli 和 主题扩展
  11 +https://www.shoplazza.dev/v2025.06/docs/app-command-usage-guide
  12 +主题扩展的相关文档
  13 +https://www.shoplazza.dev/v2025.06/docs/theme-extension-2
  14 +
  15 +[Quick Construction of Public Apps](https://www.shoplazza.dev/reference/quick-construction-of-public-apps)
  16 +[1.Create a developer platform login account and register as a partner](https://www.shoplazza.dev/reference/1create-a-developer-platform-login-account-and-register-as-a-partner)
  17 +[2.Create A Public APP](https://www.shoplazza.dev/reference/2create-a-public-app)
  18 +[3.Develop your application service](https://www.shoplazza.dev/reference/3develop-your-application-service)
  19 +[4.Test your app](https://www.shoplazza.dev/reference/4test-your-app)
  20 +[5.Submit App for review](https://www.shoplazza.dev/reference/5submit-app-for-review)
  21 +[Quick Construction of Private Apps](https://www.shoplazza.dev/reference/quick-construction-of-private-apps)
  22 +
10 23  
11 24 ### 13.2 技术栈文档
12 25  
... ...
docs/搜索API对接指南.md
... ... @@ -50,7 +50,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
50 50 "gte": "2020-01-01T00:00:00Z"
51 51 }
52 52 },
53   - "sort_by": "min_price",
  53 + "sort_by": "price",
54 54 "sort_order": "asc"
55 55 }'
56 56 ```
... ... @@ -63,7 +63,11 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
63 63 -H "X-Tenant-ID: 162" \
64 64 -d '{
65 65 "query": "芭比娃娃",
66   - "facets": ["category1_name", "specifications.color", "specifications.size", "specifications.material"],
  66 + "facets": [
  67 + {"field": "category1_name", "size": 10, "type": "terms"},
  68 + {"field": "specifications.color", "size": 10, "type": "terms"},
  69 + {"field": "specifications.size", "size": 10, "type": "terms"}
  70 + ],
67 71 "min_score": 0.2
68 72 }'
69 73 ```
... ... @@ -124,8 +128,8 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
124 128 | `filters` | object | N | null | 精确匹配过滤器(见下文) |
125 129 | `range_filters` | object | N | null | 数值范围过滤器(见下文) |
126 130 | `facets` | array | N | null | 分面配置(见下文) |
127   -| `sort_by` | string | N | null | 排序字段名(如 `min_price`, `max_price`) |
128   -| `sort_order` | string | N | "desc" | 排序方向:`asc`(升序)或 `desc`(降序) |
  131 +| `sort_by` | string | N | null | 排序字段名。支持:`price`(价格)、`sales`(销量)、`create_time`(创建时间)、`update_time`(更新时间)。默认按相关性排序 |
  132 +| `sort_order` | string | N | "desc" | 排序方向:`asc`(升序)或 `desc`(降序)。注意:`price`+`asc`=价格从低到高,`price`+`desc`=价格从高到低(后端自动映射为min_price或max_price) |
129 133 | `min_score` | float | N | null | 最小相关性分数阈值 |
130 134 | `sku_filter_dimension` | array[string] | N | null | 子SKU筛选维度列表(店铺配置)。指定后,每个SPU下的SKU将按这些维度的组合进行分组,每个组合只返回第一个SKU。支持的值:`option1`、`option2`、`option3` 或选项名称(如 `color`、`size`)。详见下文说明 |
131 135 | `debug` | boolean | N | false | 是否返回调试信息 |
... ... @@ -263,14 +267,8 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
263 267  
264 268 用于生成分面统计(分组聚合),常用于构建筛选器UI。
265 269  
266   -**简单模式**(字符串数组):
267   -```json
268   -{
269   - "facets": ["category1_name", "category2_name", "category3_name", "specifications"]
270   -}
271   -```
  270 +**配置格式**(对象数组):
272 271  
273   -**高级模式**(配置对象数组):
274 272 ```json
275 273 {
276 274 "facets": [
... ... @@ -280,6 +278,16 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
280 278 "type": "terms"
281 279 },
282 280 {
  281 + "field": "category2_name",
  282 + "size": 10,
  283 + "type": "terms"
  284 + },
  285 + {
  286 + "field": "specifications.color",
  287 + "size": 20,
  288 + "type": "terms"
  289 + },
  290 + {
283 291 "field": "min_price",
284 292 "type": "range",
285 293 "ranges": [
... ... @@ -288,8 +296,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
288 296 {"key": "100-200", "from": 100, "to": 200},
289 297 {"key": "200+", "from": 200}
290 298 ]
291   - },
292   - "specifications" // 规格分面(特殊处理:嵌套聚合,按name分组,然后按value聚合)
  299 + }
293 300 ]
294 301 }
295 302 ```
... ... @@ -298,18 +305,35 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
298 305  
299 306 `specifications` 是嵌套字段,支持两种分面模式:
300 307  
301   -**模式1:所有规格名称的分面** (`"specifications"`):
  308 +**模式1:所有规格名称的分面**:
302 309 ```json
303 310 {
304   - "facets": ["specifications"]
  311 + "facets": [
  312 + {
  313 + "field": "specifications",
  314 + "size": 10,
  315 + "type": "terms"
  316 + }
  317 + ]
305 318 }
306 319 ```
307 320 返回所有规格名称(name)及其对应的值(value)列表。每个 name 会生成一个独立的分面结果。
308 321  
309   -**模式2:指定规格名称的分面** (`"specifications.color"`):
  322 +**模式2:指定规格名称的分面**:
310 323 ```json
311 324 {
312   - "facets": ["specifications.color", "specifications.size", "specifications.material"]
  325 + "facets": [
  326 + {
  327 + "field": "specifications.color",
  328 + "size": 20,
  329 + "type": "terms"
  330 + },
  331 + {
  332 + "field": "specifications.size",
  333 + "size": 15,
  334 + "type": "terms"
  335 + }
  336 + ]
313 337 }
314 338 ```
315 339 只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size"、"material")。
... ... @@ -564,6 +588,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
564 588 | `sku_weights` | array[integer] | 所有SKU重量列表 |
565 589 | `sku_weight_units` | array[string] | 所有SKU重量单位列表 |
566 590 | `total_inventory` | integer | 总库存 |
  591 +| `sales` | integer | 销量(展示销量) |
567 592 | `option1_name` | string | 选项1名称(如"color") |
568 593 | `option2_name` | string | 选项2名称(如"size") |
569 594 | `option3_name` | string | 选项3名称 |
... ... @@ -605,11 +630,45 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
605 630 "query": "玩具",
606 631 "size": 20,
607 632 "from": 0,
608   - "sort_by": "min_price",
  633 + "sort_by": "price",
609 634 "sort_order": "asc"
610 635 }
611 636 ```
612 637  
  638 +**需求**: 搜索"玩具",按价格从高到低排序
  639 +
  640 +```json
  641 +{
  642 + "query": "玩具",
  643 + "size": 20,
  644 + "from": 0,
  645 + "sort_by": "price",
  646 + "sort_order": "desc"
  647 +}
  648 +```
  649 +
  650 +**需求**: 搜索"玩具",按销量从高到低排序
  651 +
  652 +```json
  653 +{
  654 + "query": "玩具",
  655 + "size": 20,
  656 + "from": 0,
  657 + "sort_by": "sales",
  658 + "sort_order": "desc"
  659 +}
  660 +```
  661 +
  662 +**需求**: 搜索"玩具",按默认(相关性)排序
  663 +
  664 +```json
  665 +{
  666 + "query": "玩具",
  667 + "size": 20,
  668 + "from": 0
  669 +}
  670 +```
  671 +
613 672 ### 场景2:SKU筛选(按维度过滤)
614 673  
615 674 **需求**: 搜索"芭比娃娃",每个SPU下按颜色筛选,每种颜色只显示一个SKU
... ... @@ -650,7 +709,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
650 709  
651 710 ### 场景4:带分面的商品搜索
652 711  
653   -**需求**: 搜索"玩具",获取类目和品牌的分面统计,用于构建筛选器
  712 +**需求**: 搜索"玩具",获取类目和规格的分面统计,用于构建筛选器
654 713  
655 714 ```json
656 715 {
... ... @@ -658,9 +717,9 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
658 717 "size": 20,
659 718 "language": "zh",
660 719 "facets": [
661   - "category1_name",
662   - "category2_name",
663   - "specifications"
  720 + {"field": "category1_name", "size": 15, "type": "terms"},
  721 + {"field": "category2_name", "size": 10, "type": "terms"},
  722 + {"field": "specifications", "size": 10, "type": "terms"}
664 723 ]
665 724 }
666 725 ```
... ... @@ -686,7 +745,8 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
686 745 "facets": [
687 746 {
688 747 "field": "category1_name",
689   - "size": 15
  748 + "size": 15,
  749 + "type": "terms"
690 750 },
691 751 {
692 752 "field": "min_price",
... ... @@ -698,7 +758,11 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
698 758 {"key": "200+", "from": 200}
699 759 ]
700 760 },
701   - "specifications"
  761 + {
  762 + "field": "specifications",
  763 + "size": 10,
  764 + "type": "terms"
  765 + }
702 766 ],
703 767 "sort_by": "min_price",
704 768 "sort_order": "asc"
... ... @@ -768,18 +832,23 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
768 832 "query": "手机",
769 833 "size": 20,
770 834 "language": "zh",
771   - "facets": ["specifications"]
  835 + "facets": [
  836 + {"field": "specifications", "size": 10, "type": "terms"}
  837 + ]
772 838 }
773 839 ```
774 840  
775   -**需求**: 只获取"color"规格的分面统计
  841 +**需求**: 只获取"color"和"size"规格的分面统计
776 842  
777 843 ```json
778 844 {
779 845 "query": "手机",
780 846 "size": 20,
781 847 "language": "zh",
782   - "facets": ["specifications.color", "specifications.size"]
  848 + "facets": [
  849 + {"field": "specifications.color", "size": 20, "type": "terms"},
  850 + {"field": "specifications.size", "size": 15, "type": "terms"}
  851 + ]
783 852 }
784 853 ```
785 854  
... ... @@ -800,10 +869,10 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
800 869 }
801 870 },
802 871 "facets": [
803   - "category1_name",
804   - "category2_name",
805   - "specifications.color",
806   - "specifications.size"
  872 + {"field": "category1_name", "size": 15, "type": "terms"},
  873 + {"field": "category2_name", "size": 10, "type": "terms"},
  874 + {"field": "specifications.color", "size": 20, "type": "terms"},
  875 + {"field": "specifications.size", "size": 15, "type": "terms"}
807 876 ]
808 877 }
809 878 ```
... ... @@ -1006,6 +1075,7 @@ curl &quot;http://localhost:6002/search/12345&quot;
1006 1075 | `sku_weights` | long | SKU重量列表(数组) |
1007 1076 | `sku_weight_units` | keyword | SKU重量单位列表(数组) |
1008 1077 | `total_inventory` | long | 总库存 |
  1078 +| `sales` | long | 销量(展示销量) |
1009 1079 | `skus` | nested | SKU详细信息(嵌套对象数组) |
1010 1080 | `create_time`, `update_time` | date | 创建/更新时间 |
1011 1081 | `title_embedding` | dense_vector | 标题向量(1024维,仅用于搜索) |
... ... @@ -1038,11 +1108,15 @@ curl &quot;http://localhost:6002/search/12345&quot;
1038 1108  
1039 1109 #### 排序字段
1040 1110  
1041   -- `min_price`: 最低价格
1042   -- `max_price`: 最高价格
  1111 +- `price`: 价格(后端自动根据sort_order映射:asc→min_price,desc→max_price)
  1112 +- `sales`: 销量
1043 1113 - `create_time`: 创建时间
1044 1114 - `update_time`: 更新时间
1045   -- `relevance_score`: 相关性分数(默认)
  1115 +- `relevance_score`: 相关性分数(默认,不指定sort_by时使用)
  1116 +
  1117 +**注意**: 前端只需传 `price`,后端会自动处理:
  1118 +- `sort_by: "price"` + `sort_order: "asc"` → 按 `min_price` 升序(价格从低到高)
  1119 +- `sort_by: "price"` + `sort_order: "desc"` → 按 `max_price` 降序(价格从高到低)
1046 1120  
1047 1121 ### 支持的分析器
1048 1122  
... ...
docs/竞品参考文档/prefixbox_api.md 0 → 100644
... ... @@ -0,0 +1,700 @@
  1 +# Prefixbox Search API 完整文档
  2 +
  3 +*基于公开信息整理的 Prefixbox Search API 技术文档*
  4 +
  5 +*官方接口文档*
  6 +https://api-docs.prefixbox.com/#search-api
  7 +
  8 +
  9 +---
  10 +
  11 +## 概述
  12 +
  13 +Prefixbox Search API 是一个基于 AI 的电商搜索解决方案,提供全文搜索、向量搜索和混合搜索功能。该 API 支持自动完成、搜索建议、商品过滤、排序等核心电商搜索功能。
  14 +
  15 +**核心特性:**
  16 +- 混合搜索(文本 + 向量 + GPT 技术)
  17 +- 自然语言查询理解
  18 +- 动态重排序
  19 +- 同义词管理
  20 +- 多语言支持
  21 +
  22 +---
  23 +
  24 +## 1. 基础信息
  25 +
  26 +### 1.1 API 端点
  27 +
  28 +```
  29 +基础 URL: https://api.prefixbox.com
  30 +版本: v1/v2 (根据集成方式不同)
  31 +```
  32 +
  33 +### 1.2 认证方式
  34 +
  35 +**API Key 认证**
  36 +- 在 Prefixbox API Portal 的 Profile 页面获取 Search API Key
  37 +- 所有请求必须在 Header 中包含:`Authorization: Bearer YOUR_SEARCH_API_KEY`
  38 +- 同时需要提供 Search Engine Identifier
  39 +
  40 +**Header 示例:**
  41 +```http
  42 +Authorization: Bearer pb_live_abc123xyz...
  43 +X-Search-Engine-Id: your-engine-identifier
  44 +```
  45 +
  46 +---
  47 +
  48 +## 2. 核心 API 端点
  49 +
  50 +### 2.1 自动完成 API (Autocomplete API)
  51 +
  52 +**端点:** `GET /autocomplete`
  53 +
  54 +**功能:** 当用户在搜索框输入时提供实时查询建议、分类建议和商品建议。
  55 +
  56 +**请求参数:**
  57 +
  58 +| 参数 | 类型 | 必填 | 说明 |
  59 +|------|------|------|------|
  60 +| `q` | string | 是 | 用户输入的查询字符串 |
  61 +| `limit` | integer | 否 | 返回建议的最大数量 (默认: 10) |
  62 +| `type` | string | 否 | 建议类型: `all`, `queries`, `categories`, `products` (默认: all) |
  63 +| `lang` | string | 否 | ISO 639-1 语言代码 (如: en, de, fr) |
  64 +
  65 +**请求示例:**
  66 +```http
  67 +GET https://api.prefixbox.com/autocomplete?q=apple&limit=8&lang=en
  68 +Authorization: Bearer pb_live_abc123xyz...
  69 +X-Search-Engine-Id: your-engine-identifier
  70 +```
  71 +
  72 +**响应格式:**
  73 +```json
  74 +{
  75 + "query": "apple",
  76 + "suggestions": {
  77 + "queries": [
  78 + {"text": "apple iphone", "count": 1250},
  79 + {"text": "apple watch", "count": 890},
  80 + {"text": "apple airpods", "count": 650}
  81 + ],
  82 + "categories": [
  83 + {"id": "cat_001", "name": "Smartphones", "path": "Electronics/Mobile"},
  84 + {"id": "cat_002", "name": "Accessories", "path": "Electronics/Accessories"}
  85 + ],
  86 + "products": [
  87 + {
  88 + "id": "prod_12345",
  89 + "name": "Apple iPhone 15 Pro",
  90 + "image": "https://cdn.example.com/iphone15.jpg",
  91 + "price": 999.99,
  92 + "currency": "USD",
  93 + "url": "https://store.example.com/products/iphone-15-pro"
  94 + }
  95 + ]
  96 + },
  97 + "latency_ms": 45
  98 +}
  99 +```
  100 +
  101 +---
  102 +
  103 +### 2.2 搜索 API (Search API)
  104 +
  105 +**端点:** `GET /search`
  106 +
  107 +**功能:** 执行完整搜索并返回商品结果、过滤器、排序选项等。
  108 +
  109 +**请求参数:**
  110 +
  111 +| 参数 | 类型 | 必填 | 说明 |
  112 +|------|------|------|------|
  113 +| `q` | string | 是 | 搜索查询字符串 |
  114 +| `page` | integer | 否 | 页码 (默认: 1) |
  115 +| `page_size` | integer | 否 | 每页商品数量 (默认: 24, 最大: 100) |
  116 +| `sort` | string | 否 | 排序方式: `relevance`, `price_asc`, `price_desc`, `newest`, `popularity` |
  117 +| `filters` | object | 否 | 过滤器对象 `{"brand": ["Apple", "Samsung"], "price_range": {"min": 500, "max": 1000}}` |
  118 +| `lang` | string | 否 | 语言代码 |
  119 +| `user_id` | string | 否 | 用户ID(用于个性化)|
  120 +| `cdp_data` | object | 否 | 客户数据平台偏好数据 |
  121 +
  122 +**请求示例:**
  123 +```http
  124 +GET https://api.prefixbox.com/search?q=running+shoes&page=1&page_size=24&sort=relevance
  125 +Authorization: Bearer pb_live_abc123xyz...
  126 +X-Search-Engine-Id: your-engine-identifier
  127 +```
  128 +
  129 +**完整带过滤器的请求:**
  130 +```http
  131 +GET https://api.prefixbox.com/search?q=running+shoes&filters={"brand":["Nike","Adidas"],"price_range":{"min":50,"max":200},"size":["8","9","10"]}
  132 +Authorization: Bearer pb_live_abc123xyz...
  133 +X-Search-Engine-Id: your-engine-identifier
  134 +```
  135 +
  136 +**响应格式:**
  137 +```json
  138 +{
  139 + "search_id": "search_abc123xyz789",
  140 + "query": "running shoes",
  141 + "total_results": 1456,
  142 + "page": 1,
  143 + "page_size": 24,
  144 + "total_pages": 61,
  145 + "latency_ms": 120,
  146 + "products": [
  147 + {
  148 + "id": "prod_67890",
  149 + "name": "Nike Air Zoom Pegasus",
  150 + "description": "Responsive running shoes with Zoom Air cushioning",
  151 + "image": "https://cdn.example.com/nike-pegasus.jpg",
  152 + "price": 129.99,
  153 + "original_price": 159.99,
  154 + "currency": "USD",
  155 + "url": "https://store.example.com/products/nike-air-zoom-pegasus",
  156 + "availability": "in_stock",
  157 + "brand": "Nike",
  158 + "category": "Running Shoes",
  159 + "score": 0.987
  160 + }
  161 + ],
  162 + "filters": [
  163 + {
  164 + "name": "brand",
  165 + "type": "multiselect",
  166 + "values": [
  167 + {"value": "Nike", "count": 456},
  168 + {"value": "Adidas", "count": 389},
  169 + {"value": "Asics", "count": 234}
  170 + ]
  171 + },
  172 + {
  173 + "name": "price",
  174 + "type": "range",
  175 + "min": 29.99,
  176 + "max": 299.99
  177 + },
  178 + {
  179 + "name": "size",
  180 + "type": "multiselect",
  181 + "values": [
  182 + {"value": "7", "count": 156},
  183 + {"value": "8", "count": 234},
  184 + {"value": "9", "count": 289}
  185 + ]
  186 + }
  187 + ],
  188 + "sort_options": [
  189 + {"value": "relevance", "label": "Relevance"},
  190 + {"value": "price_asc", "label": "Price: Low to High"},
  191 + {"value": "price_desc", "label": "Price: High to Low"},
  192 + {"value": "newest", "label": "Newest First"},
  193 + {"value": "popularity", "label": "Most Popular"}
  194 + ],
  195 + "related_keywords": ["trail running", "marathon shoes", "lightweight trainers"],
  196 + "related_categories": [
  197 + {"id": "cat_sports", "name": "Sports & Outdoors"},
  198 + {"id": "cat_footwear", "name": "Footwear"}
  199 + ],
  200 + "personalized": false,
  201 + "ai_reranking": true
  202 +}
  203 +```
  204 +
  205 +---
  206 +
  207 +### 2.3 商品详情 API (Product Details API)
  208 +
  209 +**端点:** `GET /products/{product_id}`
  210 +
  211 +**功能:** 获取单个商品的详细信息。
  212 +
  213 +**请求示例:**
  214 +```http
  215 +GET https://api.prefixbox.com/products/prod_67890
  216 +Authorization: Bearer pb_live_abc123xyz...
  217 +X-Search-Engine-Id: your-engine-identifier
  218 +```
  219 +
  220 +**响应格式:**
  221 +```json
  222 +{
  223 + "id": "prod_67890",
  224 + "name": "Nike Air Zoom Pegasus",
  225 + "description": "Responsive running shoes with Zoom Air cushioning",
  226 + "long_description": "Detailed product description...",
  227 + "images": [
  228 + "https://cdn.example.com/nike-pegasus-1.jpg",
  229 + "https://cdn.example.com/nike-pegasus-2.jpg"
  230 + ],
  231 + "price": 129.99,
  232 + "original_price": 159.99,
  233 + "currency": "USD",
  234 + "url": "https://store.example.com/products/nike-air-zoom-pegasus",
  235 + "availability": "in_stock",
  236 + "stock_quantity": 45,
  237 + "brand": "Nike",
  238 + "category": "Running Shoes",
  239 + "attributes": {
  240 + "color": ["Black", "White", "Blue"],
  241 + "size": ["7", "8", "9", "10", "11", "12"],
  242 + "material": "Mesh and Rubber",
  243 + "weight": "280g"
  244 + },
  245 + "reviews": {
  246 + "average_rating": 4.5,
  247 + "count": 234
  248 + },
  249 + "tags": ["marathon", "neutral", "cushioned"]
  250 +}
  251 +```
  252 +
  253 +---
  254 +
  255 +### 2.4 分类页面 API (Collection API)
  256 +
  257 +**端点:** `GET /collections/{collection_id}`
  258 +
  259 +**功能:** 获取分类页面的商品列表。
  260 +
  261 +**请求参数:**
  262 +
  263 +| 参数 | 类型 | 说明 |
  264 +|------|------|------|
  265 +| `collection_id` | string | 分类ID |
  266 +| `page` | integer | 页码 |
  267 +| `page_size` | integer | 每页数量 |
  268 +| `sort` | string | 排序方式 |
  269 +| `filters` | object | 过滤器 |
  270 +
  271 +**响应格式:** 与 Search API 类似,但返回的是特定分类的商品
  272 +
  273 +---
  274 +
  275 +## 3. 高级功能 API
  276 +
  277 +### 3.1 个性化搜索 API
  278 +
  279 +**端点:** `GET /search/personalized`
  280 +
  281 +**功能:** 基于用户偏好数据返回个性化排序结果。
  282 +
  283 +**请求参数:**
  284 +```json
  285 +{
  286 + "q": "running shoes",
  287 + "user_id": "user_12345",
  288 + "cdp_data": {
  289 + "preferred_brands": ["Nike", "Adidas"],
  290 + "preferred_size": "9",
  291 + "preferred_color": "Black",
  292 + "past_purchases": ["prod_111", "prod_222"]
  293 + }
  294 +}
  295 +```
  296 +
  297 +---
  298 +
  299 +### 3.2 AI 重排序 API
  300 +
  301 +**端点:** `POST /search/rerank`
  302 +
  303 +**功能:** 基于用户行为数据动态重排搜索结果。
  304 +
  305 +**请求参数:**
  306 +```json
  307 +{
  308 + "search_id": "search_abc123xyz789",
  309 + "user_interactions": {
  310 + "clicks": ["prod_67890", "prod_67891"],
  311 + "hover_time_ms": 2500,
  312 + "previous_searches": ["trail running"]
  313 + }
  314 +}
  315 +```
  316 +
  317 +---
  318 +
  319 +## 4. 推荐 API (AI Recommend)
  320 +
  321 +### 4.1 相关产品推荐
  322 +
  323 +**端点:** `GET /recommendations/related`
  324 +
  325 +**请求参数:**
  326 +```http
  327 +GET /recommendations/related?product_id=prod_67890&limit=6
  328 +```
  329 +
  330 +**响应格式:**
  331 +```json
  332 +{
  333 + "product_id": "prod_67890",
  334 + "recommendations": [
  335 + {
  336 + "id": "prod_67891",
  337 + "name": "Nike Dri-FIT Running Socks",
  338 + "reason": "Frequently bought together"
  339 + },
  340 + {
  341 + "id": "prod_67892",
  342 + "name": "Nike Running Cap",
  343 + "reason": "Similar category"
  344 + }
  345 + ]
  346 +}
  347 +```
  348 +
  349 +---
  350 +
  351 +### 4.2 热门商品推荐
  352 +
  353 +**端点:** `GET /recommendations/trending`
  354 +
  355 +```http
  356 +GET /recommendations/trending?category=running&limit=8
  357 +```
  358 +
  359 +---
  360 +
  361 +## 5. 分析追踪 API
  362 +
  363 +### 5.1 搜索事件追踪
  364 +
  365 +**端点:** `POST /analytics/search-event`
  366 +
  367 +**功能:** 追踪用户搜索行为以优化搜索相关性。
  368 +
  369 +**事件类型:**
  370 +- `search_impression` - 搜索结果展示
  371 +- `search_click` - 用户点击结果
  372 +- `search_add_to_cart` - 添加商品到购物车
  373 +- `search_purchase` - 完成购买
  374 +
  375 +**请求示例:**
  376 +```json
  377 +{
  378 + "search_id": "search_abc123xyz789",
  379 + "event_type": "search_click",
  380 + "user_id": "user_12345",
  381 + "product_id": "prod_67890",
  382 + "position": 1,
  383 + "timestamp": "2025-01-15T10:30:00Z"
  384 +}
  385 +```
  386 +
  387 +---
  388 +
  389 +## 6. 实时目录更新 API
  390 +
  391 +**端点:** `POST /catalog/update`
  392 +
  393 +**功能:** 实时更新商品目录数据。
  394 +
  395 +**请求格式:**
  396 +```json
  397 +{
  398 + "updates": [
  399 + {
  400 + "product_id": "prod_67890",
  401 + "field": "stock_quantity",
  402 + "value": 23,
  403 + "action": "update"
  404 + },
  405 + {
  406 + "product_id": "prod_67891",
  407 + "action": "delete"
  408 + }
  409 + ]
  410 +}
  411 +```
  412 +
  413 +---
  414 +
  415 +## 7. 错误处理
  416 +
  417 +### 7.1 HTTP 状态码
  418 +
  419 +| 状态码 | 说明 |
  420 +|--------|------|
  421 +| `200` | 请求成功 |
  422 +| `400` | 请求参数错误 |
  423 +| `401` | 认证失败 - API Key 无效或缺失 |
  424 +| `403` | 权限不足 |
  425 +| `404` | 资源未找到 |
  426 +| `429` | 请求频率限制 |
  427 +| `500` | 服务器内部错误 |
  428 +
  429 +### 7.2 错误响应格式
  430 +
  431 +```json
  432 +{
  433 + "error": {
  434 + "code": "INVALID_API_KEY",
  435 + "message": "The provided API key is invalid or expired.",
  436 + "status": 401,
  437 + "request_id": "req_123456789"
  438 + }
  439 +}
  440 +```
  441 +
  442 +---
  443 +
  444 +## 8. 代码集成示例
  445 +
  446 +### 8.1 JavaScript/Frontend 集成
  447 +
  448 +```javascript
  449 +// 初始化 Prefixbox 客户端
  450 +const prefixbox = new PrefixboxClient({
  451 + apiKey: 'pb_live_abc123xyz...',
  452 + searchEngineId: 'your-engine-identifier',
  453 + language: 'en'
  454 +});
  455 +
  456 +// 自动完成
  457 +const suggestions = await prefixbox.autocomplete({
  458 + query: 'run',
  459 + limit: 8
  460 +});
  461 +
  462 +// 搜索
  463 +const searchResults = await prefixbox.search({
  464 + query: 'running shoes',
  465 + page: 1,
  466 + pageSize: 24,
  467 + filters: {
  468 + brand: ['Nike', 'Adidas'],
  469 + priceRange: { min: 50, max: 200 }
  470 + }
  471 +});
  472 +
  473 +// 追踪事件
  474 +prefixbox.trackEvent('search_click', {
  475 + searchId: searchResults.searchId,
  476 + productId: 'prod_67890',
  477 + position: 1
  478 +});
  479 +```
  480 +
  481 +### 8.2 cURL 示例
  482 +
  483 +```bash
  484 +# 自动完成
  485 +curl -X GET "https://api.prefixbox.com/autocomplete?q=apple&limit=5" \
  486 + -H "Authorization: Bearer pb_live_abc123xyz..." \
  487 + -H "X-Search-Engine-Id: your-engine-identifier"
  488 +
  489 +# 搜索
  490 +curl -X GET "https://api.prefixbox.com/search?q=laptop&page=1&page_size=24" \
  491 + -H "Authorization: Bearer pb_live_abc123xyz..." \
  492 + -H "X-Search-Engine-Id: your-engine-identifier"
  493 +```
  494 +
  495 +### 8.3 Python 集成示例
  496 +
  497 +```python
  498 +import requests
  499 +
  500 +class PrefixboxClient:
  501 + def __init__(self, api_key, search_engine_id):
  502 + self.base_url = "https://api.prefixbox.com"
  503 + self.headers = {
  504 + "Authorization": f"Bearer {api_key}",
  505 + "X-Search-Engine-Id": search_engine_id,
  506 + "Content-Type": "application/json"
  507 + }
  508 +
  509 + def autocomplete(self, query, limit=10):
  510 + endpoint = f"{self.base_url}/autocomplete"
  511 + params = {"q": query, "limit": limit}
  512 + response = requests.get(endpoint, headers=self.headers, params=params)
  513 + return response.json()
  514 +
  515 + def search(self, query, page=1, page_size=24, filters=None):
  516 + endpoint = f"{self.base_url}/search"
  517 + params = {
  518 + "q": query,
  519 + "page": page,
  520 + "page_size": page_size
  521 + }
  522 + if filters:
  523 + params["filters"] = filters
  524 +
  525 + response = requests.get(endpoint, headers=self.headers, params=params)
  526 + return response.json()
  527 +
  528 +# 使用示例
  529 +client = PrefixboxClient(
  530 + api_key="pb_live_abc123xyz...",
  531 + search_engine_id="your-engine-identifier"
  532 +)
  533 +
  534 +# 执行搜索
  535 +results = client.search(
  536 + query="wireless headphones",
  537 + filters={"brand": ["Sony", "Bose"], "price_range": {"min": 100, "max": 300}}
  538 +)
  539 +
  540 +print(f"找到 {results['total_results']} 个商品")
  541 +for product in results['products']:
  542 + print(f"- {product['name']}: ${product['price']}")
  543 +```
  544 +
  545 +---
  546 +
  547 +## 9. SDK 和库
  548 +
  549 +Prefixbox 提供以下官方 SDK:
  550 +
  551 +- **JavaScript SDK**: `npm install @prefixbox/search-js`
  552 +- **React SDK**: `npm install @prefixbox/react-search`
  553 +- **Python SDK**: `pip install prefixbox-search`
  554 +- **PHP SDK**: `composer require prefixbox/search`
  555 +
  556 +---
  557 +
  558 +## 10. 集成方式
  559 +
  560 +### 10.1 API 集成
  561 +
  562 +**适用场景:** 需要完全自定义 UI 和用户体验
  563 +
  564 +**客户职责:**
  565 +- 提供商品 Feed URL
  566 +- 实现与 Prefixbox API 的通信
  567 +- 实现搜索 UX 界面
  568 +- 实现用户行为追踪
  569 +- Bug 修复和发布
  570 +
  571 +**Prefixbox 职责:**
  572 +- 在 Admin Portal 配置搜索模块
  573 +- 定制功能需求
  574 +- 测试和报告 Bug
  575 +- 运行 A/B 测试
  576 +
  577 +### 10.2 前端 JavaScript 集成
  578 +
  579 +**适用场景:** 快速部署,最小化开发工作量
  580 +
  581 +**客户职责:**
  582 +- 提供带 header/footer 的空搜索结果页
  583 +- 提供商品 Feed URL
  584 +- 在网站中插入 Prefixbox JavaScript
  585 +
  586 +**Prefixbox 职责:**
  587 +- 配置搜索模块
  588 +- 定制外观和功能
  589 +- 测试、预览和修复 Bug
  590 +- 发布到生产环境
  591 +
  592 +---
  593 +
  594 +## 11. 商品 Feed 格式
  595 +
  596 +Prefixbox 要求提供兼容的商品 Feed:
  597 +
  598 +**必需字段:**
  599 +- `id` - 商品唯一标识符
  600 +- `name` - 商品名称
  601 +- `url` - 商品页面 URL
  602 +- `price` - 价格
  603 +- `currency` - 货币代码
  604 +- `availability` - 库存状态
  605 +
  606 +**推荐字段:**
  607 +- `description` - 商品描述
  608 +- `image` - 商品图片 URL
  609 +- `brand` - 品牌
  610 +- `category` - 分类
  611 +- `attributes` - 商品属性(颜色、尺寸等)
  612 +- `created_at` - 创建日期
  613 +- `popularity_score` - 热门度分数
  614 +
  615 +**支持格式:** JSON, XML, CSV
  616 +
  617 +---
  618 +
  619 +## 12. 限制和配额
  620 +
  621 +### 12.1 API 请求限制
  622 +
  623 +| 套餐 | 月请求配额 | 每分钟限制 |
  624 +|------|-----------|-----------|
  625 +| Starter | 20,000 | 100 |
  626 +| Growth | 200,000 | 500 |
  627 +| Grow+ | 600,000-900,000 | 1,000 |
  628 +| Enterprise | 自定义 | 自定义 |
  629 +
  630 +### 12.2 什么算作 API 请求?
  631 +
  632 +- **自动完成:** 每个字符输入 = 1 次请求
  633 +- **搜索:** 每次搜索 = 1 次请求
  634 +- **过滤/排序:** 每次操作 = 1 次请求
  635 +- **分页:** 每次翻页 = 1 次请求
  636 +
  637 +**示例:** 用户输入 "apple"(6 次自动完成请求)+ 执行搜索(1 次)+ 过滤(1 次)+ 翻页(1 次)= **共 9 次 API 请求**
  638 +
  639 +---
  640 +
  641 +## 13. 支持和资源
  642 +
  643 +### 13.1 官方资源
  644 +
  645 +- **API 文档门户:** https://api-docs.prefixbox.com
  646 +- **开发者中心:** https://developers.prefixbox.com
  647 +- **技术文档:** https://www.prefixbox.com/en-us/technical
  648 +- **集成指南:** https://www.prefixbox.com/en-us/technical/integration
  649 +
  650 +### 13.2 支持级别
  651 +
  652 +| 支持级别 | 可用套餐 | 响应时间 |
  653 +|---------|---------|---------|
  654 +| 标准支持 | Starter, Growth | 24-48 小时 |
  655 +| 高级支持 | Grow+ | 4 小时 |
  656 +| 专属支持 | Enterprise | 1 小时 |
  657 +
  658 +---
  659 +
  660 +## 14. 最佳实践
  661 +
  662 +### 14.1 性能优化
  663 +
  664 +1. **缓存自动完成结果** - 减少重复请求
  665 +2. **延迟搜索** - 用户停止输入 300ms 后再发送请求
  666 +3. **分页加载** - 使用无限滚动或分页
  667 +4. **图片优化** - 使用 CDN 和适当尺寸的图片
  668 +
  669 +### 14.2 提升搜索相关性
  670 +
  671 +1. **配置同义词** - 连接不同词汇(如 "sneakers" = "trainers")
  672 +2. **设置字段权重** - 提升重要字段的权重(如 name > description)
  673 +3. **使用 A/B 测试** - 测试不同配置对转化率的影响
  674 +4. **分析零结果搜索** - 识别缺失商品或查询问题
  675 +
  676 +### 14.3 用户体验
  677 +
  678 +1. **显示搜索建议** - 帮助用户快速找到相关查询
  679 +2. **保留搜索上下文** - 支持返回搜索结果
  680 +3. **移动端优化** - 确保触摸友好的界面
  681 +4. **加载状态** - 提供清晰的加载指示
  682 +
  683 +---
  684 +
  685 +## 15. 常见问题
  686 +
  687 +### Q: API 返回 429 错误怎么办?
  688 +A: 表示达到速率限制。实现指数退避重试策略,或升级到更高配额的套餐。
  689 +
  690 +### Q: 如何处理多语言搜索?
  691 +A: 在请求中使用 `lang` 参数,并确保商品 Feed 包含翻译内容。
  692 +
  693 +### Q: 可以自定义搜索算法吗?
  694 +A: 可以在 Prefixbox Admin Portal 中调整字段权重和同义词规则。
  695 +
  696 +### Q: 支持语音搜索吗?
  697 +A: 是的,Prefixbox AI Agent 支持自然语言查询和语音搜索集成。
  698 +
  699 +### Q: 如何集成到移动应用?
  700 +A: 通过 API 集成方式,使用 iOS/Android 原生开发或 React Native 调用 REST API。
... ...
docs/系统设计文档.md
... ... @@ -699,45 +699,44 @@ Elasticsearch
699 699  
700 700 #### 8.3.2 Facets 配置数据流
701 701  
702   -**输入格式**:`List[Union[str, FacetConfig]]`
  702 +**输入格式**:`List[FacetConfig]`
703 703  
704   -- **简单模式**:字符串列表(字段名),使用默认配置
705   - ```json
706   - ["category1_name", "category2_name", "specifications"]
707   - ```
708   -
709   -- **Specifications分面**:
710   - - 所有规格名称:`"specifications"` - 返回所有name及其value列表
711   - - 指定规格名称:`"specifications.color"` - 只返回指定name的value列表
  704 +**配置对象列表**:所有分面配置必须使用 FacetConfig 对象
  705 +```json
  706 +[
  707 + {
  708 + "field": "category1_name",
  709 + "size": 15,
  710 + "type": "terms"
  711 + },
  712 + {
  713 + "field": "specifications.color",
  714 + "size": 20,
  715 + "type": "terms"
  716 + },
  717 + {
  718 + "field": "min_price",
  719 + "type": "range",
  720 + "ranges": [
  721 + {"key": "0-50", "to": 50},
  722 + {"key": "50-100", "from": 50, "to": 100}
  723 + ]
  724 + }
  725 +]
  726 +```
712 727  
713   -- **高级模式**:FacetConfig 对象列表,支持自定义配置
714   - ```json
715   - [
716   - {
717   - "field": "category1_name",
718   - "size": 15,
719   - "type": "terms"
720   - },
721   - {
722   - "field": "min_price",
723   - "type": "range",
724   - "ranges": [
725   - {"key": "0-50", "to": 50},
726   - {"key": "50-100", "from": 50, "to": 100}
727   - ]
728   - },
729   - "specifications.color" // 指定规格名称的分面
730   - ]
731   - ```
  728 +**Specifications 分面支持**:
  729 +- 所有规格名称:`field: "specifications"` - 返回所有 name 及其 value 列表
  730 +- 指定规格名称:`field: "specifications.color"` - 只返回指定 name 的 value 列表
732 731  
733 732 **数据流**:
734   -1. API 层:接收 `List[Union[str, FacetConfig]]`
735   -2. Searcher 层:透传,不做转换
736   -3. ES Query Builder:只接受 `str` 或 `FacetConfig`,自动处理两种格式
  733 +1. API 层:接收 `List[FacetConfig]`,Pydantic 验证参数
  734 +2. Searcher 层:透传 FacetConfig 对象列表
  735 +3. ES Query Builder:解析 FacetConfig 对象
737 736 - 检测 `"specifications"` 或 `"specifications.{name}"` 格式
738   - - 构建对应的嵌套聚合查询
739   -4. 输出:转换为 ES 聚合查询(包括specifications嵌套聚合)
740   -5. Result Formatter:格式化ES聚合结果,处理specifications嵌套结构
  737 + - 构建对应的嵌套聚合查询或普通聚合查询
  738 +4. 输出:转换为 ES 聚合查询(包括 specifications 嵌套聚合)
  739 +5. Result Formatter:格式化 ES 聚合结果,处理 specifications 嵌套结构
741 740  
742 741 #### 8.3.3 Range Filters 数据流
743 742  
... ...
docs/索引字段说明v2-参考表结构.md
  1 +
  2 +
  3 +类目表 product_category
  4 +id bigint(20) NO PRI auto_increment
  5 +parent_id bigint(20) NO
  6 +name varchar(255) NO
  7 +pic_url varchar(255) NO
  8 +sort int(11) YES 0
  9 +status tinyint(4) NO
  10 +creator varchar(64) YES
  11 +create_time datetime NO CURRENT_TIMESTAMP
  12 +updater varchar(64) YES
  13 +update_time datetime NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP
  14 +deleted bit(1) NO b'0'
  15 +tenant_id bigint(20) NO 0
  16 +
  17 +
  18 +
1 19 spu表 shoplazza_product_spu 全部字段
2 20 "Field" "Type" "Null" "Key" "Default" "Extra"
3 21 "id" "bigint(20)" "NO" "PRI" "auto_increment"
... ...
docs/索引字段说明v2.md
... ... @@ -284,11 +284,12 @@
284 284 | `sku_weights` | long | 所有 SKU 重量列表(数组) | 从所有 SKU 重量汇总 |
285 285 | `sku_weight_units` | keyword | 所有 SKU 重量单位列表(数组) | 从所有 SKU 重量单位汇总 |
286 286  
287   -### 9. 库存字段
  287 +### 9. 库存与销量字段
288 288  
289 289 | 字段名 | ES类型 | 说明 | 数据来源 |
290 290 |--------|--------|------|----------|
291 291 | `total_inventory` | long | 总库存(所有 SKU 库存之和) | 从所有 SKU 库存汇总 |
  292 +| `sales` | long | 销量(展示销量) | MySQL: `shoplazza_product_spu.fake_sales` |
292 293  
293 294 ### 10. SKU 嵌套字段
294 295  
... ... @@ -363,6 +364,16 @@
363 364 - `vendor_zh.keyword`, `vendor_en.keyword`
364 365 - `specifications` (嵌套查询)
365 366 - `min_price`, `max_price` (范围过滤)
  367 +- `sales` (范围过滤)
  368 +- `total_inventory` (范围过滤)
  369 +
  370 +### 排序字段
  371 +
  372 +- `price`: 价格(前端传入,后端自动映射:asc→min_price,desc→max_price)
  373 +- `sales`: 销量
  374 +- `create_time`: 创建时间
  375 +- `update_time`: 更新时间
  376 +- `relevance_score`: 相关性分数(默认)
366 377  
367 378 ### 分面字段(聚合统计)
368 379  
... ...
frontend/index.html
... ... @@ -94,11 +94,18 @@
94 94 <span class="arrow-down" data-field="create_time" data-order="asc" onclick="sortByField('create_time', 'asc')">▼</span>
95 95 </span>
96 96 </button>
97   - <button class="sort-btn" data-sort="min_price">
  97 + <button class="sort-btn" data-sort="price">
98 98 By Price
99 99 <span class="sort-arrows">
100   - <span class="arrow-up" data-field="min_price" data-order="asc" onclick="sortByField('min_price', 'asc')">▲</span>
101   - <span class="arrow-down" data-field="min_price" data-order="desc" onclick="sortByField('min_price', 'desc')">▼</span>
  100 + <span class="arrow-up" data-field="price" data-order="asc" onclick="sortByField('price', 'asc')">▲</span>
  101 + <span class="arrow-down" data-field="price" data-order="desc" onclick="sortByField('price', 'desc')">▼</span>
  102 + </span>
  103 + </button>
  104 + <button class="sort-btn" data-sort="sales">
  105 + By Sales
  106 + <span class="sort-arrows">
  107 + <span class="arrow-up" data-field="sales" data-order="desc" onclick="sortByField('sales', 'desc')">▲</span>
  108 + <span class="arrow-down" data-field="sales" data-order="asc" onclick="sortByField('sales', 'asc')">▼</span>
102 109 </span>
103 110 </button>
104 111  
... ... @@ -106,8 +113,8 @@
106 113 <select id="resultSize" onchange="performSearch()">
107 114 <option value="20">20 per page</option>
108 115 <option value="50" selected>50 per page</option>
109   - <option value="100">50 per page</option>
110   - <option value="200">50 per page</option>
  116 + <option value="100">100 per page</option>
  117 + <option value="200">200 per page</option>
111 118 </select>
112 119 </div>
113 120 </div>
... ... @@ -135,6 +142,6 @@
135 142 <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p>
136 143 </footer>
137 144  
138   - <script src="/static/js/app.js?v=3.4"></script>
  145 + <script src="/static/js/app.js?v=3.7"></script>
139 146 </body>
140 147 </html>
... ...
frontend/static/js/app.js
... ... @@ -86,10 +86,10 @@ async function performSearch(page = 1) {
86 86  
87 87 // Define facets (一级分类 + 三个属性分面)
88 88 const facets = [
89   - "category1_name", // 一级分类
90   - "specifications.color", // 颜色属性
91   - "specifications.size", // 尺寸属性
92   - "specifications.material" // 材质属性
  89 + { field: "category1_name", size: 15, type: "terms" }, // 一级分类
  90 + { field: "specifications.color", size: 20, type: "terms" }, // 颜色属性
  91 + { field: "specifications.size", size: 15, type: "terms" }, // 尺寸属性
  92 + { field: "specifications.material", size: 10, type: "terms" } // 材质属性
93 93 ];
94 94  
95 95 // Show loading
... ...
indexer/spu_transformer.py
... ... @@ -6,11 +6,15 @@ Transforms SPU and SKU data from MySQL into SPU-level ES documents with nested s
6 6  
7 7 import pandas as pd
8 8 import numpy as np
  9 +import logging
9 10 from typing import Dict, Any, List, Optional
10 11 from sqlalchemy import create_engine, text
11 12 from utils.db_connector import create_db_connection
12 13 from config import ConfigLoader
13 14  
  15 +# Configure logger
  16 +logger = logging.getLogger(__name__)
  17 +
14 18  
15 19 class SPUTransformer:
16 20 """Transform SPU and SKU data into SPU-level ES documents."""
... ... @@ -38,7 +42,48 @@ class SPUTransformer:
38 42 except Exception as e:
39 43 print(f"Warning: Failed to load config, using default searchable_option_dimensions: {e}")
40 44 self.searchable_option_dimensions = ['option1', 'option2', 'option3']
  45 +
  46 + # Load category ID to name mapping
  47 + self.category_id_to_name = self._load_category_mapping()
41 48  
  49 + def _load_category_mapping(self) -> Dict[str, str]:
  50 + """
  51 + Load category ID to name mapping from database.
  52 +
  53 + Returns:
  54 + Dictionary mapping category_id to category_name
  55 + """
  56 + query = text("""
  57 + SELECT DISTINCT
  58 + category_id,
  59 + category
  60 + FROM shoplazza_product_spu
  61 + WHERE deleted = 0 AND category_id IS NOT NULL
  62 + """)
  63 +
  64 + mapping = {}
  65 + with self.db_engine.connect() as conn:
  66 + result = conn.execute(query)
  67 + for row in result:
  68 + category_id = str(int(row.category_id))
  69 + category_name = row.category
  70 +
  71 + if not category_name or not category_name.strip():
  72 + logger.warning(f"Category ID {category_id} has empty name, skipping")
  73 + continue
  74 +
  75 + mapping[category_id] = category_name
  76 +
  77 + logger.info(f"Loaded {len(mapping)} category ID to name mappings")
  78 +
  79 + # Log all category mappings for debugging
  80 + if mapping:
  81 + logger.debug("Category ID mappings:")
  82 + for cid, name in sorted(mapping.items()):
  83 + logger.debug(f" {cid} -> {name}")
  84 +
  85 + return mapping
  86 +
42 87 def load_spu_data(self) -> pd.DataFrame:
43 88 """
44 89 Load SPU data from MySQL.
... ... @@ -53,6 +98,7 @@ class SPUTransformer:
53 98 image_src, image_width, image_height, image_path, image_alt,
54 99 tags, note, category, category_id, category_google_id,
55 100 category_level, category_path,
  101 + fake_sales, display_fake_sales,
56 102 tenant_id, creator, create_time, updater, update_time, deleted
57 103 FROM shoplazza_product_spu
58 104 WHERE tenant_id = :tenant_id AND deleted = 0
... ... @@ -61,8 +107,26 @@ class SPUTransformer:
61 107 with self.db_engine.connect() as conn:
62 108 df = pd.read_sql(query, conn, params={"tenant_id": self.tenant_id})
63 109  
64   - # Debug: Check if there's any data for this tenant_id
65   - if len(df) == 0:
  110 + logger.info(f"Loaded {len(df)} SPU records for tenant_id={self.tenant_id}")
  111 +
  112 + # Statistics
  113 + if len(df) > 0:
  114 + has_category_path = df['category_path'].notna().sum()
  115 + has_category = df['category'].notna().sum()
  116 + has_title = df['title'].notna().sum()
  117 +
  118 + logger.info(f"SPU data statistics:")
  119 + logger.info(f" - Has title: {has_title}/{len(df)} ({100*has_title/len(df):.1f}%)")
  120 + logger.info(f" - Has category_path: {has_category_path}/{len(df)} ({100*has_category_path/len(df):.1f}%)")
  121 + logger.info(f" - Has category: {has_category}/{len(df)} ({100*has_category/len(df):.1f}%)")
  122 +
  123 + # Warn if too many SPUs don't have category_path
  124 + if has_category_path < len(df) * 0.5:
  125 + logger.warning(f"Only {100*has_category_path/len(df):.1f}% of SPUs have category_path, data quality may be low")
  126 + else:
  127 + logger.warning(f"No SPU data found for tenant_id={self.tenant_id}")
  128 +
  129 + # Debug: Check if there's any data for this tenant_id
66 130 debug_query = text("""
67 131 SELECT
68 132 COUNT(*) as total_count,
... ... @@ -77,7 +141,7 @@ class SPUTransformer:
77 141 total = debug_df.iloc[0]['total_count']
78 142 active = debug_df.iloc[0]['active_count']
79 143 deleted = debug_df.iloc[0]['deleted_count']
80   - print(f"DEBUG: tenant_id={self.tenant_id}: total={total}, active={active}, deleted={deleted}")
  144 + logger.debug(f"tenant_id={self.tenant_id}: total={total}, active={active}, deleted={deleted}")
81 145  
82 146 # Check what tenant_ids exist in the table
83 147 tenant_check_query = text("""
... ... @@ -90,9 +154,9 @@ class SPUTransformer:
90 154 with self.db_engine.connect() as conn:
91 155 tenant_df = pd.read_sql(tenant_check_query, conn)
92 156 if not tenant_df.empty:
93   - print(f"DEBUG: Available tenant_ids in shoplazza_product_spu:")
  157 + logger.debug(f"Available tenant_ids in shoplazza_product_spu:")
94 158 for _, row in tenant_df.iterrows():
95   - print(f" tenant_id={row['tenant_id']}: total={row['count']}, active={row['active']}")
  159 + logger.debug(f" tenant_id={row['tenant_id']}: total={row['count']}, active={row['active']}")
96 160  
97 161 return df
98 162  
... ... @@ -120,7 +184,26 @@ class SPUTransformer:
120 184 with self.db_engine.connect() as conn:
121 185 df = pd.read_sql(query, conn, params={"tenant_id": self.tenant_id})
122 186  
123   - print(f"DEBUG: Loaded {len(df)} SKU records for tenant_id={self.tenant_id}")
  187 + logger.info(f"Loaded {len(df)} SKU records for tenant_id={self.tenant_id}")
  188 +
  189 + # Statistics
  190 + if len(df) > 0:
  191 + has_price = df['price'].notna().sum()
  192 + has_inventory = df['inventory_quantity'].notna().sum()
  193 + has_option1 = df['option1'].notna().sum()
  194 + has_option2 = df['option2'].notna().sum()
  195 + has_option3 = df['option3'].notna().sum()
  196 +
  197 + logger.info(f"SKU data statistics:")
  198 + logger.info(f" - Has price: {has_price}/{len(df)} ({100*has_price/len(df):.1f}%)")
  199 + logger.info(f" - Has inventory: {has_inventory}/{len(df)} ({100*has_inventory/len(df):.1f}%)")
  200 + logger.info(f" - Has option1: {has_option1}/{len(df)} ({100*has_option1/len(df):.1f}%)")
  201 + logger.info(f" - Has option2: {has_option2}/{len(df)} ({100*has_option2/len(df):.1f}%)")
  202 + logger.info(f" - Has option3: {has_option3}/{len(df)} ({100*has_option3/len(df):.1f}%)")
  203 +
  204 + # Warn about data quality issues
  205 + if has_price < len(df) * 0.95:
  206 + logger.warning(f"Only {100*has_price/len(df):.1f}% of SKUs have price")
124 207  
125 208 return df
126 209  
... ... @@ -144,7 +227,21 @@ class SPUTransformer:
144 227 with self.db_engine.connect() as conn:
145 228 df = pd.read_sql(query, conn, params={"tenant_id": self.tenant_id})
146 229  
147   - print(f"DEBUG: Loaded {len(df)} option records for tenant_id={self.tenant_id}")
  230 + logger.info(f"Loaded {len(df)} option records for tenant_id={self.tenant_id}")
  231 +
  232 + # Statistics
  233 + if len(df) > 0:
  234 + unique_spus_with_options = df['spu_id'].nunique()
  235 + has_name = df['name'].notna().sum()
  236 +
  237 + logger.info(f"Option data statistics:")
  238 + logger.info(f" - Unique SPUs with options: {unique_spus_with_options}")
  239 + logger.info(f" - Has name: {has_name}/{len(df)} ({100*has_name/len(df):.1f}%)")
  240 +
  241 + # Warn about missing option names
  242 + if has_name < len(df):
  243 + missing = len(df) - has_name
  244 + logger.warning(f"{missing} option records are missing names")
148 245  
149 246 return df
150 247  
... ... @@ -155,35 +252,61 @@ class SPUTransformer:
155 252 Returns:
156 253 List of SPU-level ES documents
157 254 """
  255 + logger.info(f"Starting data transformation for tenant_id={self.tenant_id}")
  256 +
158 257 # Load data
159 258 spu_df = self.load_spu_data()
160 259 sku_df = self.load_sku_data()
161 260 option_df = self.load_option_data()
162 261  
163 262 if spu_df.empty:
  263 + logger.warning("No SPU data to transform")
164 264 return []
165 265  
166 266 # Group SKUs by SPU
167 267 sku_groups = sku_df.groupby('spu_id')
  268 + logger.info(f"Grouped SKUs into {len(sku_groups)} SPU groups")
168 269  
169 270 # Group options by SPU
170 271 option_groups = option_df.groupby('spu_id') if not option_df.empty else None
  272 + if option_groups:
  273 + logger.info(f"Grouped options into {len(option_groups)} SPU groups")
171 274  
172 275 documents = []
173   - for _, spu_row in spu_df.iterrows():
  276 + skipped_count = 0
  277 + error_count = 0
  278 +
  279 + for idx, spu_row in spu_df.iterrows():
174 280 spu_id = spu_row['id']
175 281  
176   - # Get SKUs for this SPU
177   - skus = sku_groups.get_group(spu_id) if spu_id in sku_groups.groups else pd.DataFrame()
178   -
179   - # Get options for this SPU
180   - options = option_groups.get_group(spu_id) if option_groups and spu_id in option_groups.groups else pd.DataFrame()
181   -
182   - # Transform to ES document
183   - doc = self._transform_spu_to_doc(spu_row, skus, options)
184   - if doc:
185   - documents.append(doc)
186   -
  282 + try:
  283 + # Get SKUs for this SPU
  284 + skus = sku_groups.get_group(spu_id) if spu_id in sku_groups.groups else pd.DataFrame()
  285 +
  286 + # Get options for this SPU
  287 + options = option_groups.get_group(spu_id) if option_groups and spu_id in option_groups.groups else pd.DataFrame()
  288 +
  289 + # Warn if SPU has no SKUs
  290 + if skus.empty:
  291 + logger.warning(f"SPU {spu_id} (title: {spu_row.get('title', 'N/A')}) has no SKUs")
  292 +
  293 + # Transform to ES document
  294 + doc = self._transform_spu_to_doc(spu_row, skus, options)
  295 + if doc:
  296 + documents.append(doc)
  297 + else:
  298 + skipped_count += 1
  299 + logger.warning(f"SPU {spu_id} transformation returned None, skipped")
  300 + except Exception as e:
  301 + error_count += 1
  302 + logger.error(f"Error transforming SPU {spu_id}: {e}", exc_info=True)
  303 +
  304 + logger.info(f"Transformation complete:")
  305 + logger.info(f" - Total SPUs: {len(spu_df)}")
  306 + logger.info(f" - Successfully transformed: {len(documents)}")
  307 + logger.info(f" - Skipped: {skipped_count}")
  308 + logger.info(f" - Errors: {error_count}")
  309 +
187 310 return documents
188 311  
189 312 def _transform_spu_to_doc(
... ... @@ -209,7 +332,12 @@ class SPUTransformer:
209 332 doc['tenant_id'] = str(self.tenant_id)
210 333  
211 334 # SPU ID
212   - doc['spu_id'] = str(spu_row['id'])
  335 + spu_id = spu_row['id']
  336 + doc['spu_id'] = str(spu_id)
  337 +
  338 + # Validate required fields
  339 + if pd.isna(spu_row.get('title')) or not str(spu_row['title']).strip():
  340 + logger.error(f"SPU {spu_id} has no title, this may cause search issues")
213 341  
214 342 # 文本相关性相关字段(中英文双语,暂时只填充中文)
215 343 if pd.notna(spu_row.get('title')):
... ... @@ -237,17 +365,35 @@ class SPUTransformer:
237 365 # Category相关字段
238 366 if pd.notna(spu_row.get('category_path')):
239 367 category_path = str(spu_row['category_path'])
240   - doc['category_path_zh'] = category_path
241   - doc['category_path_en'] = None # 暂时设为空
242 368  
243   - # 解析category_path获取多层级分类名称
244   - path_parts = category_path.split('/')
245   - if len(path_parts) > 0:
246   - doc['category1_name'] = path_parts[0].strip()
247   - if len(path_parts) > 1:
248   - doc['category2_name'] = path_parts[1].strip()
249   - if len(path_parts) > 2:
250   - doc['category3_name'] = path_parts[2].strip()
  369 + # 解析category_path - 这是逗号分隔的类目ID列表
  370 + category_ids = [cid.strip() for cid in category_path.split(',') if cid.strip()]
  371 +
  372 + # 将ID映射为名称
  373 + category_names = []
  374 + missing_category_ids = []
  375 + for cid in category_ids:
  376 + if cid in self.category_id_to_name:
  377 + category_names.append(self.category_id_to_name[cid])
  378 + else:
  379 + # 如果找不到映射,记录错误并使用ID作为备选
  380 + logger.error(f"Category ID {cid} not found in mapping for SPU {spu_row['id']} (title: {spu_row.get('title', 'N/A')}), category_path={category_path}")
  381 + missing_category_ids.append(cid)
  382 + category_names.append(cid) # 使用ID作为备选
  383 +
  384 + # 构建类目路径字符串(用于搜索)
  385 + if category_names:
  386 + category_path_str = '/'.join(category_names)
  387 + doc['category_path_zh'] = category_path_str
  388 + doc['category_path_en'] = None # 暂时设为空
  389 +
  390 + # 填充分层类目名称
  391 + if len(category_names) > 0:
  392 + doc['category1_name'] = category_names[0]
  393 + if len(category_names) > 1:
  394 + doc['category2_name'] = category_names[1]
  395 + if len(category_names) > 2:
  396 + doc['category3_name'] = category_names[2]
251 397 elif pd.notna(spu_row.get('category')):
252 398 # 如果category_path为空,使用category字段作为category1_name的备选
253 399 category = str(spu_row['category'])
... ... @@ -302,6 +448,15 @@ class SPUTransformer:
302 448 image_src = f"//{image_src}" if image_src.startswith('//') else image_src
303 449 doc['image_url'] = image_src
304 450  
  451 + # Sales (fake_sales)
  452 + if pd.notna(spu_row.get('fake_sales')):
  453 + try:
  454 + doc['sales'] = int(spu_row['fake_sales'])
  455 + except (ValueError, TypeError):
  456 + doc['sales'] = 0
  457 + else:
  458 + doc['sales'] = 0
  459 +
305 460 # Process SKUs and build specifications
306 461 skus_list = []
307 462 prices = []
... ...
search/es_query_builder.py
... ... @@ -519,7 +519,7 @@ class ESQueryBuilder:
519 519  
520 520 Args:
521 521 es_query: Existing ES query
522   - sort_by: Field name for sorting
  522 + sort_by: Field name for sorting (支持 'price' 自动映射)
523 523 sort_order: Sort order: 'asc' or 'desc'
524 524  
525 525 Returns:
... ... @@ -531,6 +531,13 @@ class ESQueryBuilder:
531 531 if not sort_order:
532 532 sort_order = "desc"
533 533  
  534 + # Auto-map 'price' to 'min_price' or 'max_price' based on sort_order
  535 + if sort_by == "price":
  536 + if sort_order.lower() == "asc":
  537 + sort_by = "min_price" # 价格从低到高
  538 + else:
  539 + sort_by = "max_price" # 价格从高到低
  540 +
534 541 if "sort" not in es_query:
535 542 es_query["sort"] = []
536 543  
... ... @@ -546,20 +553,18 @@ class ESQueryBuilder:
546 553  
547 554 def build_facets(
548 555 self,
549   - facet_configs: Optional[List[Union[str, 'FacetConfig']]] = None
  556 + facet_configs: Optional[List['FacetConfig']] = None
550 557 ) -> Dict[str, Any]:
551 558 """
552 559 构建分面聚合。
553 560  
554   - 支持:
555   - 1. 分类分面:category1_name, category2_name, category3_name, category_name
556   - 2. specifications分面:嵌套聚合,按name聚合,然后按value聚合
557   -
558 561 Args:
559   - facet_configs: 分面配置列表(标准格式):
560   - - str: 字段名,使用默认 terms 配置
561   - - FacetConfig: 详细的分面配置对象
562   - - 特殊值 "specifications": 构建specifications嵌套分面
  562 + facet_configs: 分面配置对象列表
  563 +
  564 + 支持的字段类型:
  565 + - 普通字段: 如 "category1_name"(terms 或 range 类型)
  566 + - specifications: "specifications"(返回所有规格名称及其值)
  567 + - specifications.{name}: 如 "specifications.color"(返回指定规格名称的值)
563 568  
564 569 Returns:
565 570 ES aggregations 字典
... ... @@ -570,99 +575,76 @@ class ESQueryBuilder:
570 575 aggs = {}
571 576  
572 577 for config in facet_configs:
573   - # 特殊处理:specifications嵌套分面
574   - if isinstance(config, str):
575   - # 格式1: "specifications" - 返回所有name的分面
576   - if config == "specifications":
577   - aggs["specifications_facet"] = {
578   - "nested": {
579   - "path": "specifications"
580   - },
581   - "aggs": {
582   - "by_name": {
583   - "terms": {
584   - "field": "specifications.name",
585   - "size": 20,
586   - "order": {"_count": "desc"}
587   - },
588   - "aggs": {
589   - "value_counts": {
590   - "terms": {
591   - "field": "specifications.value",
592   - "size": 10,
593   - "order": {"_count": "desc"}
594   - }
  578 + field = config.field
  579 + size = config.size
  580 + facet_type = config.type
  581 +
  582 + # 处理 specifications(所有规格名称)
  583 + if field == "specifications":
  584 + aggs["specifications_facet"] = {
  585 + "nested": {"path": "specifications"},
  586 + "aggs": {
  587 + "by_name": {
  588 + "terms": {
  589 + "field": "specifications.name",
  590 + "size": 20,
  591 + "order": {"_count": "desc"}
  592 + },
  593 + "aggs": {
  594 + "value_counts": {
  595 + "terms": {
  596 + "field": "specifications.value",
  597 + "size": size,
  598 + "order": {"_count": "desc"}
595 599 }
596 600 }
597 601 }
598 602 }
599 603 }
600   - continue
601   -
602   - # 格式2: "specifications.color" 或 "specifications.颜色" - 只返回指定name的value列表
603   - if config.startswith("specifications."):
604   - name = config[len("specifications."):]
605   - agg_name = f"specifications_{name}_facet"
606   - aggs[agg_name] = {
607   - "nested": {
608   - "path": "specifications"
609   - },
610   - "aggs": {
611   - "filter_by_name": {
612   - "filter": {
613   - "term": {"specifications.name": name}
614   - },
615   - "aggs": {
616   - "value_counts": {
617   - "terms": {
618   - "field": "specifications.value",
619   - "size": 10,
620   - "order": {"_count": "desc"}
621   - }
  604 + }
  605 + continue
  606 +
  607 + # 处理 specifications.{name}(指定规格名称)
  608 + if field.startswith("specifications."):
  609 + name = field[len("specifications."):]
  610 + agg_name = f"specifications_{name}_facet"
  611 + aggs[agg_name] = {
  612 + "nested": {"path": "specifications"},
  613 + "aggs": {
  614 + "filter_by_name": {
  615 + "filter": {"term": {"specifications.name": name}},
  616 + "aggs": {
  617 + "value_counts": {
  618 + "terms": {
  619 + "field": "specifications.value",
  620 + "size": size,
  621 + "order": {"_count": "desc"}
622 622 }
623 623 }
624 624 }
625 625 }
626 626 }
627   - continue
  627 + }
  628 + continue
  629 +
  630 + # 处理普通字段
  631 + agg_name = f"{field}_facet"
628 632  
629   - # 简单模式:只有字段名(字符串,非specifications)
630   - if isinstance(config, str):
631   - field = config
632   - agg_name = f"{field}_facet"
  633 + if facet_type == 'terms':
633 634 aggs[agg_name] = {
634 635 "terms": {
635 636 "field": field,
636   - "size": 10,
  637 + "size": size,
637 638 "order": {"_count": "desc"}
638 639 }
639 640 }
640   - continue
641   -
642   - # 高级模式:FacetConfig 对象
643   - else:
644   - # 此时 config 应该是 FacetConfig 对象
645   - field = config.field
646   - facet_type = config.type
647   - size = config.size
648   - agg_name = f"{field}_facet"
649   -
650   - if facet_type == 'terms':
  641 + elif facet_type == 'range':
  642 + if config.ranges:
651 643 aggs[agg_name] = {
652   - "terms": {
  644 + "range": {
653 645 "field": field,
654   - "size": size,
655   - "order": {"_count": "desc"}
  646 + "ranges": config.ranges
656 647 }
657 648 }
658   -
659   - elif facet_type == 'range':
660   - if config.ranges:
661   - aggs[agg_name] = {
662   - "range": {
663   - "field": field,
664   - "ranges": config.ranges
665   - }
666   - }
667 649  
668 650 return aggs
... ...
search/searcher.py
... ... @@ -17,7 +17,7 @@ from .rerank_engine import RerankEngine
17 17 from config import SearchConfig
18 18 from config.utils import get_match_fields_for_index
19 19 from context.request_context import RequestContext, RequestContextStage, create_request_context
20   -from api.models import FacetResult, FacetValue
  20 +from api.models import FacetResult, FacetValue, FacetConfig
21 21 from api.result_formatter import ResultFormatter
22 22  
23 23 logger = logging.getLogger(__name__)
... ... @@ -123,7 +123,7 @@ class Searcher:
123 123 from_: int = 0,
124 124 filters: Optional[Dict[str, Any]] = None,
125 125 range_filters: Optional[Dict[str, Any]] = None,
126   - facets: Optional[List[Any]] = None,
  126 + facets: Optional[List[FacetConfig]] = None,
127 127 min_score: Optional[float] = None,
128 128 context: Optional[RequestContext] = None,
129 129 sort_by: Optional[str] = None,
... ... @@ -651,7 +651,7 @@ class Searcher:
651 651 # 构建 FacetResult 对象
652 652 facet_result = FacetResult(
653 653 field=field,
654   - label=self._get_field_label(field),
  654 + label=field,
655 655 type=facet_type,
656 656 values=facet_values
657 657 )
... ... @@ -659,14 +659,3 @@ class Searcher:
659 659 standardized_facets.append(facet_result)
660 660  
661 661 return standardized_facets if standardized_facets else None
662   -
663   - def _get_field_label(self, field: str) -> str:
664   - """获取字段的显示标签"""
665   - # 字段标签映射(简化版,不再从配置读取)
666   - field_labels = {
667   - "category1_name": "一级分类",
668   - "category2_name": "二级分类",
669   - "category3_name": "三级分类",
670   - "specifications": "规格"
671   - }
672   - return field_labels.get(field, field)
... ...