Commit a10a89a395b0189e3f533aeb48ce9d1df068ccd4

Authored by tangwang
1 parent acf1349c

构造测试数据用于测试分类 和 三种属性的分面。

... ... @@ -20,7 +20,7 @@ DB_HOST=120.79.247.228
20 20 DB_PORT=3316
21 21 DB_DATABASE=saas
22 22 DB_USERNAME=saas
23   -DB_PASSWORD=P89cZHS5d7dFyc9R
  23 +DB_PASSWORD=pcjY7iwX1C6le1oz
24 24  
25 25 # Model Directories
26 26 TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型
... ...
README.md
... ... @@ -2,10 +2,31 @@
2 2  
3 3 一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。
4 4  
  5 +
5 6 ## 项目环境
6 7 source /home/tw/miniconda3/etc/profile.d/conda.sh
7 8 conda activate searchengine
8 9  
  10 +## 测试pipeline
  11 +
  12 +fake数据 生成商品导入数据 提交到店匠的店铺:
  13 +cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx
  14 +
  15 +自动同步到mysql
  16 +mysql到ES:
  17 +
  18 +python scripts/recreate_and_import.py \
  19 + --tenant-id 162 \
  20 + --db-host <mysql_host> \
  21 + --db-database saas \
  22 + --db-username saas \
  23 + --db-password <password> \
  24 + --es-host http://localhost:9200
  25 +
  26 +构造查询:
  27 +参考 @
  28 +
  29 +
9 30 ## 核心能力速览
10 31  
11 32 - **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL)
... ...
api/result_formatter.py
... ... @@ -268,33 +268,38 @@ class ResultFormatter:
268 268 facets.append(facet)
269 269 continue
270 270  
271   - # 处理specifications嵌套分面(指定name)
272   - if field_name.startswith("specifications_") and field_name.endswith("_facet") and 'filter_by_name' in agg_data:
273   - # 提取name(从 "specifications_颜色_facet" 提取 "颜色")
  271 + # 处理specifications嵌套分面(指定name,如 specifications.color)
  272 + if field_name.startswith("specifications_") and field_name.endswith("_facet"):
  273 + # 提取name(从 "specifications_color_facet" 提取 "color")
274 274 name = field_name[len("specifications_"):-len("_facet")]
275   - filter_by_name_agg = agg_data.get('filter_by_name', {})
276   - value_counts = filter_by_name_agg.get('value_counts', {})
277 275  
278   - values = []
279   - if 'buckets' in value_counts:
280   - for value_bucket in value_counts['buckets']:
281   - value = FacetValue(
282   - value=value_bucket['key'],
283   - label=str(value_bucket['key']),
284   - count=value_bucket['doc_count'],
285   - selected=False
286   - )
287   - values.append(value)
  276 + # ES nested聚合返回结构: { "doc_count": N, "filter_by_name": { ... } }
  277 + # filter_by_name应该在agg_data的第一层
  278 + filter_by_name_agg = agg_data.get('filter_by_name')
288 279  
289   - # 创建分面结果
290   - facet = FacetResult(
291   - field=f"specifications.{name}",
292   - label=str(name),
293   - type="terms",
294   - values=values,
295   - total_count=filter_by_name_agg.get('doc_count', 0)
296   - )
297   - facets.append(facet)
  280 + if filter_by_name_agg:
  281 + value_counts = filter_by_name_agg.get('value_counts', {})
  282 +
  283 + values = []
  284 + if 'buckets' in value_counts and value_counts['buckets']:
  285 + for value_bucket in value_counts['buckets']:
  286 + value = FacetValue(
  287 + value=value_bucket['key'],
  288 + label=str(value_bucket['key']),
  289 + count=value_bucket['doc_count'],
  290 + selected=False
  291 + )
  292 + values.append(value)
  293 +
  294 + # 创建分面结果
  295 + facet = FacetResult(
  296 + field=f"specifications.{name}",
  297 + label=str(name),
  298 + type="terms",
  299 + values=values,
  300 + total_count=filter_by_name_agg.get('doc_count', 0)
  301 + )
  302 + facets.append(facet)
298 303 continue
299 304  
300 305 # Handle terms aggregation
... ...
database-config-analysis.md 0 → 100644
... ... @@ -0,0 +1,246 @@
  1 +# 数据库配置分析与建议
  2 +
  3 +## 当前配置分析
  4 +
  5 +从提供的 YAML 配置文件中,发现以下情况:
  6 +
  7 +### 1. 缺失的数据库配置
  8 +
  9 +**当前配置文件中缺少直接的数据源配置**,需要添加以下配置:
  10 +
  11 +```yaml
  12 +spring:
  13 + datasource:
  14 + # 主数据源配置
  15 + master:
  16 + url: jdbc:mysql://localhost:3306/saas
  17 + username: saas
  18 + password: P89cZHS5d7dFyc9R
  19 + driver-class: com.mysql.cj.jdbc.Driver
  20 + type: com.zaxxer.hikari.HikariDataSource
  21 + hikari:
  22 + maximum-pool-size: 20
  23 + minimum-idle: 5
  24 + connection-timeout: 30000
  25 + idle-timeout: 600000
  26 + max-lifetime: 1800000
  27 +
  28 + # 动态数据源配置(多租户支持)
  29 + dynamic:
  30 + enabled: true
  31 + primary: master
  32 + strict: false
  33 + datasource:
  34 + # 店匠生产数据库
  35 + shoplazza:
  36 + url: jdbc:mysql://120.79.247.228:3316/saas
  37 + username: saas
  38 + password: P89cZHS5d7dFyc9R
  39 + driver-class: com.mysql.cj.jdbc.Driver
  40 + type: com.zaxxer.hikari.HikariDataSource
  41 +```
  42 +
  43 +### 2. 当前已有的相关配置
  44 +
  45 +#### Redis 配置
  46 +```yaml
  47 +spring:
  48 + data:
  49 + redis:
  50 + host: 127.0.0.1
  51 + port: 6379
  52 + database: 0
  53 + timeout: 5000ms
  54 + lettuce:
  55 + pool:
  56 + max-active: 200
  57 + max-idle: 20
  58 + min-idle: 5
  59 + max-wait: -1ms
  60 +```
  61 +
  62 +#### MyBatis Plus 配置
  63 +```yaml
  64 +mybatis-plus:
  65 + configuration:
  66 + map-underscore-to-camel-case: true
  67 + global-config:
  68 + db-config:
  69 + id-type: NONE
  70 + logic-delete-value: 1
  71 + logic-not-delete-value: 0
  72 +```
  73 +
  74 +### 3. 从项目结构推断的数据库配置
  75 +
  76 +基于之前分析的项目文件,完整的数据库配置应该包括:
  77 +
  78 +#### 3.1 连接池配置
  79 +```yaml
  80 +spring:
  81 + datasource:
  82 + master:
  83 + hikari:
  84 + # 连接池最大连接数
  85 + maximum-pool-size: 20
  86 + # 连接池最小空闲连接数
  87 + minimum-idle: 5
  88 + # 连接超时时间(毫秒)
  89 + connection-timeout: 30000
  90 + # 空闲连接超时时间(毫秒)
  91 + idle-timeout: 600000
  92 + # 连接最大生命周期(毫秒)
  93 + max-lifetime: 1800000
  94 + # 连接测试查询
  95 + connection-test-query: SELECT 1
  96 +```
  97 +
  98 +#### 3.2 多数据源配置
  99 +```yaml
  100 +spring:
  101 + datasource:
  102 + dynamic:
  103 + enabled: true
  104 + primary: master
  105 + strict: false
  106 + datasource:
  107 + # 主库(读写)
  108 + master:
  109 + url: jdbc:mysql://120.79.247.228:3316/saas
  110 + username: saas
  111 + password: P89cZHS5d7dFyc9R
  112 + driver-class: com.mysql.cj.jdbc.Driver
  113 +
  114 + # 从库(只读)- 可选配置
  115 + slave:
  116 + url: jdbc:mysql://slave-host:3306/saas
  117 + username: saas_readonly
  118 + password: readonly_password
  119 + driver-class: com.mysql.cj.jdbc.Driver
  120 +```
  121 +
  122 +#### 3.3 JPA/Hibernate 配置
  123 +```yaml
  124 +spring:
  125 + jpa:
  126 + show-sql: false
  127 + hibernate:
  128 + ddl-auto: none
  129 + naming:
  130 + physical-strategy: org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy
  131 + properties:
  132 + hibernate:
  133 + dialect: org.hibernate.dialect.MySQL8Dialect
  134 + format_sql: true
  135 + use_sql_comments: true
  136 + jdbc:
  137 + batch_size: 50
  138 + order_inserts: true
  139 + order_updates: true
  140 +```
  141 +
  142 +### 4. 环境配置建议
  143 +
  144 +#### 开发环境(application-dev.yml)
  145 +```yaml
  146 +spring:
  147 + profiles:
  148 + active: dev
  149 +
  150 + datasource:
  151 + master:
  152 + url: jdbc:mysql://localhost:3306/saas_dev
  153 + username: root
  154 + password: root
  155 + driver-class: com.mysql.cj.jdbc.Driver
  156 + hikari:
  157 + maximum-pool-size: 10
  158 + minimum-idle: 2
  159 + connection-timeout: 30000
  160 +
  161 +# 开发环境 SQL 输出
  162 +logging:
  163 + level:
  164 + com.hsyl.saas.mapper: DEBUG
  165 + org.springframework.jdbc.core: DEBUG
  166 +```
  167 +
  168 +#### 生产环境(application-prod.yml)
  169 +```yaml
  170 +spring:
  171 + profiles:
  172 + active: prod
  173 +
  174 + datasource:
  175 + master:
  176 + url: jdbc:mysql://120.79.247.228:3316/saas
  177 + username: saas
  178 + password: P89cZHS5d7dFyc9R
  179 + driver-class: com.mysql.cj.jdbc.Driver
  180 + hikari:
  181 + maximum-pool-size: 50
  182 + minimum-idle: 10
  183 + connection-timeout: 60000
  184 + max-lifetime: 3600000
  185 +
  186 +# 生产环境 SQL 监控
  187 +management:
  188 + endpoints:
  189 + web:
  190 + exposure:
  191 + include: health,info,metrics,datasource
  192 +```
  193 +
  194 +### 5. 数据库连接信息汇总
  195 +
  196 +| 环境 | 主机 | 端口 | 数据库 | 用户名 | 密码 |
  197 +|------|------|------|--------|--------|------|
  198 +| 本地开发 | localhost | 3306 | saas | saas | P89cZHS5d7dFyc9R |
  199 +| 生产环境 | 120.79.247.228 | 3316 | saas | saas | P89cZHS5d7dFyc9R |
  200 +
  201 +### 6. 必需的依赖项
  202 +
  203 +确保 `pom.xml` 或 `build.gradle` 包含以下依赖:
  204 +
  205 +```xml
  206 +<!-- MySQL 连接器 -->
  207 +<dependency>
  208 + <groupId>mysql</groupId>
  209 + <artifactId>mysql-connector-java</artifactId>
  210 + <version>8.0.33</version>
  211 +</dependency>
  212 +
  213 +<!-- HikariCP 连接池 -->
  214 +<dependency>
  215 + <groupId>com.zaxxer</groupId>
  216 + <artifactId>HikariCP</artifactId>
  217 + <version>5.0.1</version>
  218 +</dependency>
  219 +
  220 +<!-- 动态数据源 -->
  221 +<dependency>
  222 + <groupId>com.baomidou</groupId>
  223 + <artifactId>dynamic-datasource-spring-boot-starter</artifactId>
  224 + <version>3.6.1</version>
  225 +</dependency>
  226 +```
  227 +
  228 +### 7. 测试数据库连接
  229 +
  230 +```bash
  231 +# 测试本地数据库连接
  232 +mysql -h localhost -P 3306 -u saas -pP89cZHS5d7dFyc9R saas
  233 +
  234 +# 测试生产数据库连接
  235 +mysql -h 120.79.247.228 -P 3316 -u saas -pP89cZHS5d7dFyc9R saas
  236 +```
  237 +
  238 +## 结论
  239 +
  240 +当前配置文件缺少完整的数据库配置,需要补充:
  241 +1. **数据源连接信息**(URL、用户名、密码)
  242 +2. **连接池配置**(HikariCP 参数)
  243 +3. **多数据源配置**(如需多租户支持)
  244 +4. **JPA/Hibernate 配置**(数据库方言、DDL 策略等)
  245 +
  246 +建议根据环境(开发/测试/生产)分别配置不同的数据库连接参数。
0 247 \ No newline at end of file
... ...
docs/Search-API-Examples.md
... ... @@ -23,7 +23,7 @@
23 23 ```bash
24 24 curl -X POST "http://localhost:6002/search/" \
25 25 -H "Content-Type: application/json" \
26   - -H "X-Tenant-ID: 2" \
  26 + -H "X-Tenant-ID: 162" \
27 27 -d '{
28 28 "query": "芭比娃娃"
29 29 }'
... ... @@ -49,7 +49,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
49 49 ```bash
50 50 curl -X POST "http://localhost:6002/search/" \
51 51 -H "Content-Type: application/json" \
52   - -H "X-Tenant-ID: 2" \
  52 + -H "X-Tenant-ID: 162" \
53 53 -d '{
54 54 "query": "手机",
55 55 "language": "zh",
... ... @@ -63,7 +63,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
63 63 # 第1页(0-19)
64 64 curl -X POST "http://localhost:6002/search/" \
65 65 -H "Content-Type: application/json" \
66   - -H "X-Tenant-ID: 2" \
  66 + -H "X-Tenant-ID: 162" \
67 67 -d '{
68 68 "query": "手机",
69 69 "language": "zh",
... ... @@ -74,7 +74,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
74 74 # 第2页(20-39)
75 75 curl -X POST "http://localhost:6002/search/" \
76 76 -H "Content-Type: application/json" \
77   - -H "X-Tenant-ID: 2" \
  77 + -H "X-Tenant-ID: 162" \
78 78 -d '{
79 79 "query": "手机",
80 80 "language": "zh",
... ... @@ -94,7 +94,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
94 94 ```bash
95 95 curl -X POST "http://localhost:6002/search/" \
96 96 -H "Content-Type: application/json" \
97   - -H "X-Tenant-ID: 2" \
  97 + -H "X-Tenant-ID: 162" \
98 98 -d '{
99 99 "query": "手机",
100 100 "language": "zh",
... ... @@ -109,7 +109,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
109 109 ```bash
110 110 curl -X POST "http://localhost:6002/search/" \
111 111 -H "Content-Type: application/json" \
112   - -H "X-Tenant-ID: 2" \
  112 + -H "X-Tenant-ID: 162" \
113 113 -d '{
114 114 "query": "手机",
115 115 "language": "zh",
... ... @@ -126,7 +126,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
126 126 ```bash
127 127 curl -X POST "http://localhost:6002/search/" \
128 128 -H "Content-Type: application/json" \
129   - -H "X-Tenant-ID: 2" \
  129 + -H "X-Tenant-ID: 162" \
130 130 -d '{
131 131 "query": "手机",
132 132 "language": "zh",
... ... @@ -144,7 +144,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
144 144 ```bash
145 145 curl -X POST "http://localhost:6002/search/" \
146 146 -H "Content-Type: application/json" \
147   - -H "X-Tenant-ID: 2" \
  147 + -H "X-Tenant-ID: 162" \
148 148 -d '{
149 149 "query": "手机",
150 150 "language": "zh",
... ... @@ -164,7 +164,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
164 164 ```bash
165 165 curl -X POST "http://localhost:6002/search/" \
166 166 -H "Content-Type: application/json" \
167   - -H "X-Tenant-ID: 2" \
  167 + -H "X-Tenant-ID: 162" \
168 168 -d '{
169 169 "query": "手机",
170 170 "language": "zh",
... ... @@ -184,7 +184,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
184 184 ```bash
185 185 curl -X POST "http://localhost:6002/search/" \
186 186 -H "Content-Type: application/json" \
187   - -H "X-Tenant-ID: 2" \
  187 + -H "X-Tenant-ID: 162" \
188 188 -d '{
189 189 "query": "手机",
190 190 "language": "zh",
... ... @@ -207,7 +207,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
207 207 ```bash
208 208 curl -X POST "http://localhost:6002/search/" \
209 209 -H "Content-Type: application/json" \
210   - -H "X-Tenant-ID: 2" \
  210 + -H "X-Tenant-ID: 162" \
211 211 -d '{
212 212 "query": "手机",
213 213 "language": "zh",
... ... @@ -227,7 +227,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
227 227 ```bash
228 228 curl -X POST "http://localhost:6002/search/" \
229 229 -H "Content-Type: application/json" \
230   - -H "X-Tenant-ID: 2" \
  230 + -H "X-Tenant-ID: 162" \
231 231 -d '{
232 232 "query": "手机",
233 233 "language": "zh",
... ... @@ -246,7 +246,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
246 246 ```bash
247 247 curl -X POST "http://localhost:6002/search/" \
248 248 -H "Content-Type: application/json" \
249   - -H "X-Tenant-ID: 2" \
  249 + -H "X-Tenant-ID: 162" \
250 250 -d '{
251 251 "query": "手机",
252 252 "language": "zh",
... ... @@ -265,7 +265,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
265 265 ```bash
266 266 curl -X POST "http://localhost:6002/search/" \
267 267 -H "Content-Type: application/json" \
268   - -H "X-Tenant-ID: 2" \
  268 + -H "X-Tenant-ID: 162" \
269 269 -d '{
270 270 "query": "手机",
271 271 "language": "zh",
... ... @@ -288,7 +288,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
288 288 ```bash
289 289 curl -X POST "http://localhost:6002/search/" \
290 290 -H "Content-Type: application/json" \
291   - -H "X-Tenant-ID: 2" \
  291 + -H "X-Tenant-ID: 162" \
292 292 -d '{
293 293 "query": "手机",
294 294 "language": "zh",
... ... @@ -318,7 +318,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
318 318 ```bash
319 319 curl -X POST "http://localhost:6002/search/" \
320 320 -H "Content-Type: application/json" \
321   - -H "X-Tenant-ID: 2" \
  321 + -H "X-Tenant-ID: 162" \
322 322 -d '{
323 323 "query": "手机",
324 324 "language": "zh",
... ... @@ -369,7 +369,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
369 369 ```bash
370 370 curl -X POST "http://localhost:6002/search/" \
371 371 -H "Content-Type: application/json" \
372   - -H "X-Tenant-ID: 2" \
  372 + -H "X-Tenant-ID: 162" \
373 373 -d '{
374 374 "query": "手机",
375 375 "language": "zh",
... ... @@ -384,7 +384,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
384 384 ```bash
385 385 curl -X POST "http://localhost:6002/search/" \
386 386 -H "Content-Type: application/json" \
387   - -H "X-Tenant-ID: 2" \
  387 + -H "X-Tenant-ID: 162" \
388 388 -d '{
389 389 "query": "手机",
390 390 "language": "zh",
... ... @@ -401,7 +401,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
401 401 ```bash
402 402 curl -X POST "http://localhost:6002/search/" \
403 403 -H "Content-Type: application/json" \
404   - -H "X-Tenant-ID: 2" \
  404 + -H "X-Tenant-ID: 162" \
405 405 -d '{
406 406 "query": "手机",
407 407 "language": "zh",
... ... @@ -425,7 +425,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
425 425 ```bash
426 426 curl -X POST "http://localhost:6002/search/" \
427 427 -H "Content-Type: application/json" \
428   - -H "X-Tenant-ID: 2" \
  428 + -H "X-Tenant-ID: 162" \
429 429 -d '{
430 430 "query": "手机",
431 431 "language": "zh",
... ... @@ -468,7 +468,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
468 468 ```bash
469 469 curl -X POST "http://localhost:6002/search/" \
470 470 -H "Content-Type: application/json" \
471   - -H "X-Tenant-ID: 2" \
  471 + -H "X-Tenant-ID: 162" \
472 472 -d '{
473 473 "query": "手机",
474 474 "language": "zh",
... ... @@ -497,7 +497,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
497 497 ```bash
498 498 curl -X POST "http://localhost:6002/search/" \
499 499 -H "Content-Type: application/json" \
500   - -H "X-Tenant-ID: 2" \
  500 + -H "X-Tenant-ID: 162" \
501 501 -d '{
502 502 "query": "手机",
503 503 "language": "zh",
... ... @@ -512,7 +512,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
512 512 ```bash
513 513 curl -X POST "http://localhost:6002/search/" \
514 514 -H "Content-Type: application/json" \
515   - -H "X-Tenant-ID: 2" \
  515 + -H "X-Tenant-ID: 162" \
516 516 -d '{
517 517 "query": "手机",
518 518 "language": "zh",
... ... @@ -527,7 +527,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
527 527 ```bash
528 528 curl -X POST "http://localhost:6002/search/" \
529 529 -H "Content-Type: application/json" \
530   - -H "X-Tenant-ID: 2" \
  530 + -H "X-Tenant-ID: 162" \
531 531 -d '{
532 532 "query": "手机",
533 533 "language": "zh",
... ... @@ -548,7 +548,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
548 548 ```bash
549 549 curl -X POST "http://localhost:6002/search/image" \
550 550 -H "Content-Type: application/json" \
551   - -H "X-Tenant-ID: 2" \
  551 + -H "X-Tenant-ID: 162" \
552 552 -d '{
553 553 "image_url": "https://example.com/barbie.jpg",
554 554 "size": 20
... ... @@ -560,7 +560,7 @@ curl -X POST &quot;http://localhost:6002/search/image&quot; \
560 560 ```bash
561 561 curl -X POST "http://localhost:6002/search/image" \
562 562 -H "Content-Type: application/json" \
563   - -H "X-Tenant-ID: 2" \
  563 + -H "X-Tenant-ID: 162" \
564 564 -d '{
565 565 "image_url": "https://example.com/barbie.jpg",
566 566 "size": 20,
... ... @@ -584,7 +584,7 @@ curl -X POST &quot;http://localhost:6002/search/image&quot; \
584 584 ```bash
585 585 curl -X POST "http://localhost:6002/search/" \
586 586 -H "Content-Type: application/json" \
587   - -H "X-Tenant-ID: 2" \
  587 + -H "X-Tenant-ID: 162" \
588 588 -d '{
589 589 "query": "玩具 AND 乐高"
590 590 }'
... ... @@ -597,7 +597,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
597 597 ```bash
598 598 curl -X POST "http://localhost:6002/search/" \
599 599 -H "Content-Type: application/json" \
600   - -H "X-Tenant-ID: 2" \
  600 + -H "X-Tenant-ID: 162" \
601 601 -d '{
602 602 "query": "芭比 OR 娃娃"
603 603 }'
... ... @@ -610,7 +610,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
610 610 ```bash
611 611 curl -X POST "http://localhost:6002/search/" \
612 612 -H "Content-Type: application/json" \
613   - -H "X-Tenant-ID: 2" \
  613 + -H "X-Tenant-ID: 162" \
614 614 -d '{
615 615 "query": "玩具 ANDNOT 电动"
616 616 }'
... ... @@ -623,7 +623,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
623 623 ```bash
624 624 curl -X POST "http://localhost:6002/search/" \
625 625 -H "Content-Type: application/json" \
626   - -H "X-Tenant-ID: 2" \
  626 + -H "X-Tenant-ID: 162" \
627 627 -d '{
628 628 "query": "玩具 AND (乐高 OR 芭比) ANDNOT 电动"
629 629 }'
... ... @@ -636,7 +636,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
636 636 ```bash
637 637 curl -X POST "http://localhost:6002/search/" \
638 638 -H "Content-Type: application/json" \
639   - -H "X-Tenant-ID: 2" \
  639 + -H "X-Tenant-ID: 162" \
640 640 -d '{
641 641 "query": "brand:乐高"
642 642 }'
... ... @@ -961,7 +961,7 @@ const SearchComponent = {
961 961 ```bash
962 962 curl -X POST "http://localhost:6002/search/" \
963 963 -H "Content-Type: application/json" \
964   - -H "X-Tenant-ID: 2" \
  964 + -H "X-Tenant-ID: 162" \
965 965 -d '{
966 966 "query": "手机",
967 967 "language": "zh",
... ... @@ -1000,7 +1000,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1000 1000 ```bash
1001 1001 curl -X POST "http://localhost:6002/search/" \
1002 1002 -H "Content-Type: application/json" \
1003   - -H "X-Tenant-ID: 2" \
  1003 + -H "X-Tenant-ID: 162" \
1004 1004 -d '{
1005 1005 "query": "手机",
1006 1006 "language": "zh",
... ... @@ -1020,7 +1020,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1020 1020 # 显示某个类目下的所有商品,按价格排序,提供品牌筛选
1021 1021 curl -X POST "http://localhost:6002/search/" \
1022 1022 -H "Content-Type: application/json" \
1023   - -H "X-Tenant-ID: 2" \
  1023 + -H "X-Tenant-ID: 162" \
1024 1024 -d '{
1025 1025 "query": "*",
1026 1026 "filters": {
... ... @@ -1051,7 +1051,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1051 1051 # 用户搜索关键词,提供筛选和排序(包含规格分面)
1052 1052 curl -X POST "http://localhost:6002/search/" \
1053 1053 -H "Content-Type: application/json" \
1054   - -H "X-Tenant-ID: 2" \
  1054 + -H "X-Tenant-ID: 162" \
1055 1055 -d '{
1056 1056 "query": "手机",
1057 1057 "language": "zh",
... ... @@ -1064,9 +1064,9 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1064 1064 "field": "min_price",
1065 1065 "type": "range",
1066 1066 "ranges": [
1067   - {"key": "0-50", "to": 50},
1068   - {"key": "50-100", "from": 50, "to": 100},
1069   - {"key": "100+", "from": 100}
  1067 + {"key": "0-50", "to": 50},
  1068 + {"key": "50-100", "from": 50, "to": 100},
  1069 + {"key": "100+", "from": 100}
1070 1070 ]
1071 1071 }
1072 1072 ],
... ... @@ -1080,7 +1080,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1080 1080 # 用户搜索并选择了规格筛选条件
1081 1081 curl -X POST "http://localhost:6002/search/" \
1082 1082 -H "Content-Type: application/json" \
1083   - -H "X-Tenant-ID: 2" \
  1083 + -H "X-Tenant-ID: 162" \
1084 1084 -d '{
1085 1085 "query": "手机",
1086 1086 "language": "zh",
... ... @@ -1106,7 +1106,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1106 1106 # 显示特定价格区间的商品
1107 1107 curl -X POST "http://localhost:6002/search/" \
1108 1108 -H "Content-Type: application/json" \
1109   - -H "X-Tenant-ID: 2" \
  1109 + -H "X-Tenant-ID: 162" \
1110 1110 -d '{
1111 1111 "query": "*",
1112 1112 "range_filters": {
... ... @@ -1128,7 +1128,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1128 1128 # 最近更新的商品
1129 1129 curl -X POST "http://localhost:6002/search/" \
1130 1130 -H "Content-Type: application/json" \
1131   - -H "X-Tenant-ID: 2" \
  1131 + -H "X-Tenant-ID: 162" \
1132 1132 -d '{
1133 1133 "query": "*",
1134 1134 "range_filters": {
... ... @@ -1152,7 +1152,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1152 1152 # 错误:range_filters 缺少操作符
1153 1153 curl -X POST "http://localhost:6002/search/" \
1154 1154 -H "Content-Type: application/json" \
1155   - -H "X-Tenant-ID: 2" \
  1155 + -H "X-Tenant-ID: 162" \
1156 1156 -d '{
1157 1157 "query": "手机",
1158 1158 "language": "zh",
... ... @@ -1177,7 +1177,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1177 1177 # 错误:query 为空
1178 1178 curl -X POST "http://localhost:6002/search/" \
1179 1179 -H "Content-Type: application/json" \
1180   - -H "X-Tenant-ID: 2" \
  1180 + -H "X-Tenant-ID: 162" \
1181 1181 -d '{
1182 1182 "query": ""
1183 1183 }'
... ... @@ -1255,7 +1255,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1255 1255 # 使用通配符查询 + 分面
1256 1256 curl -X POST "http://localhost:6002/search/" \
1257 1257 -H "Content-Type: application/json" \
1258   - -H "X-Tenant-ID: 2" \
  1258 + -H "X-Tenant-ID: 162" \
1259 1259 -d '{
1260 1260 "query": "*",
1261 1261 "size": 0,
... ... @@ -1270,7 +1270,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1270 1270 ```bash
1271 1271 curl -X POST "http://localhost:6002/search/" \
1272 1272 -H "Content-Type: application/json" \
1273   - -H "X-Tenant-ID: 2" \
  1273 + -H "X-Tenant-ID: 162" \
1274 1274 -d '{
1275 1275 "query": "手机",
1276 1276 "language": "zh",
... ... @@ -1297,7 +1297,7 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1297 1297 # 布尔表达式 + 过滤器 + 分面 + 排序
1298 1298 curl -X POST "http://localhost:6002/search/" \
1299 1299 -H "Content-Type: application/json" \
1300   - -H "X-Tenant-ID: 2" \
  1300 + -H "X-Tenant-ID: 162" \
1301 1301 -d '{
1302 1302 "query": "(玩具 OR 游戏) AND 儿童 ANDNOT 电子",
1303 1303 "filters": {
... ... @@ -1326,19 +1326,19 @@ curl -X POST &quot;http://localhost:6002/search/&quot; \
1326 1326 # 测试类目:玩具
1327 1327 curl -X POST "http://localhost:6002/search/" \
1328 1328 -H "Content-Type: application/json" \
1329   - -H "X-Tenant-ID: 2" \
  1329 + -H "X-Tenant-ID: 162" \
1330 1330 -d '{"query": "玩具", "size": 5}'
1331 1331  
1332 1332 # 测试品牌:乐高
1333 1333 curl -X POST "http://localhost:6002/search/" \
1334 1334 -H "Content-Type: application/json" \
1335   - -H "X-Tenant-ID: 2" \
  1335 + -H "X-Tenant-ID: 162" \
1336 1336 -d '{"query": "brand:乐高", "size": 5}'
1337 1337  
1338 1338 # 测试布尔表达式
1339 1339 curl -X POST "http://localhost:6002/search/" \
1340 1340 -H "Content-Type: application/json" \
1341   - -H "X-Tenant-ID: 2" \
  1341 + -H "X-Tenant-ID: 162" \
1342 1342 -d '{"query": "玩具 AND 乐高", "size": 5}'
1343 1343 ```
1344 1344  
... ...
docs/Usage-Guide.md
... ... @@ -271,7 +271,7 @@ curl http://localhost:6002/admin/stats
271 271 ```bash
272 272 curl -X POST http://localhost:6002/search/ \
273 273 -H "Content-Type: application/json" \
274   - -H "X-Tenant-ID: 2" \
  274 + -H "X-Tenant-ID: 162" \
275 275 -d '{
276 276 "query": "玩具",
277 277 "size": 10
... ... @@ -294,7 +294,7 @@ curl -X POST &quot;http://localhost:6002/search/?tenant_id=2&quot; \
294 294 ```bash
295 295 curl -X POST http://localhost:6002/search/ \
296 296 -H "Content-Type: application/json" \
297   - -H "X-Tenant-ID: 2" \
  297 + -H "X-Tenant-ID: 162" \
298 298 -d '{
299 299 "query": "玩具",
300 300 "size": 10,
... ... @@ -312,7 +312,7 @@ curl -X POST http://localhost:6002/search/ \
312 312 ```bash
313 313 curl -X POST http://localhost:6002/search/ \
314 314 -H "Content-Type: application/json" \
315   - -H "X-Tenant-ID: 2" \
  315 + -H "X-Tenant-ID: 162" \
316 316 -d '{
317 317 "query": "玩具",
318 318 "size": 10,
... ... @@ -328,7 +328,7 @@ curl -X POST http://localhost:6002/search/ \
328 328 ```bash
329 329 curl -X POST http://localhost:6002/search/image \
330 330 -H "Content-Type: application/json" \
331   - -H "X-Tenant-ID: 2" \
  331 + -H "X-Tenant-ID: 162" \
332 332 -d '{
333 333 "image_url": "https://oss.essa.cn/example.jpg",
334 334 "size": 10
... ... @@ -403,7 +403,7 @@ curl http://localhost:9200/search_products/_count
403 403 # 检查tenant_id过滤是否正确
404 404 curl -X POST http://localhost:6002/search/ \
405 405 -H "Content-Type: application/json" \
406   - -H "X-Tenant-ID: 2" \
  406 + -H "X-Tenant-ID: 162" \
407 407 -d '{"query": "*", "size": 10, "debug": true}'
408 408 ```
409 409  
... ...
docs/分面数据问题完整分析.md 0 → 100644
... ... @@ -0,0 +1,188 @@
  1 +# 分面数据问题完整分析报告
  2 +
  3 +## 问题现象
  4 +
  5 +前端显示的分面结果都是空的:
  6 +- Category: 空
  7 +- Color: 空
  8 +- Size: 空
  9 +- Material: 空
  10 +
  11 +ES的聚合查询结果也是空的。
  12 +
  13 +## 诊断结果分析
  14 +
  15 +### MySQL数据检查结果
  16 +
  17 +1. **category_path字段**:
  18 + - 总SPU数:11254
  19 + - 有category_path的SPU:只有1个
  20 + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,逗号分隔)
  21 +
  22 +2. **option表数据**:
  23 + - 总option记录数:2658
  24 + - 有option定义的SPU数量:886个
  25 + - **position=1, name='color'**: 885个SPU ✅
  26 + - **position=2, name='size'**: 885个SPU ✅
  27 + - **position=3, name='material'**: 885个SPU ✅
  28 +
  29 +3. **SKU数据**:
  30 + - 总SKU数:43109
  31 + - 应该有option1/2/3值
  32 +
  33 +### ES数据检查结果
  34 +
  35 +1. **category1_name字段**:
  36 + - 总文档数:10000
  37 + - 有category1_name的文档:只有1个
  38 + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式)
  39 +
  40 +2. **specifications字段**:
  41 + - ES聚合查询显示**有数据**:
  42 + - specifications.color: Beige: 1226, Khaki: 1176, Red: 1168等
  43 + - specifications.size: 1: 1234, 12: 1234等
  44 + - specifications.material: 塑料英文包装: 17277等
  45 +
  46 +## 问题根源
  47 +
  48 +### 问题1:category1_name 几乎都为空 ✅ 已找到原因
  49 +
  50 +**原因**:
  51 +1. MySQL的`category_path`字段几乎都是空的(只有1个,而且是ID列表格式)
  52 +2. 当`category_path`为空时,代码会使用`category`字段作为备选(代码已修复)
  53 +3. 但需要检查MySQL的`category`字段是否有值
  54 +
  55 +**数据流转**:
  56 +- Excel "专辑名称" → 店匠系统 → MySQL `category` 或 `category_path` 字段
  57 +- 如果Excel导入时"专辑名称"没有正确映射,或者`category`字段也为空,就会导致`category1_name`为空
  58 +
  59 +### 问题2:为什么specifications分面查询无结果
  60 +
  61 +**ES聚合查询显示有数据**,但前端显示为空,可能原因:
  62 +
  63 +1. **前端搜索时有查询条件**:
  64 + - 如果有查询条件(如`query="手机"`),ES会先过滤文档
  65 + - 过滤后的文档如果没有specifications数据,聚合结果就会为空
  66 + - 但这不应该导致所有分面都为空
  67 +
  68 +2. **分面聚合构建问题**:
  69 + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]`
  70 + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`等
  71 + - 可能聚合构建或解析有问题
  72 +
  73 +3. **tenant_id过滤问题**:
  74 + - 如果搜索时tenant_id不匹配,可能导致没有匹配的文档
  75 +
  76 +## 需要检查的关键点
  77 +
  78 +### 1. MySQL的category字段是否有值
  79 +
  80 +**需要运行SQL查询**:
  81 +```sql
  82 +SELECT
  83 + COUNT(*) as total,
  84 + COUNT(category) as has_category,
  85 + COUNT(*) - COUNT(category) as null_category
  86 +FROM shoplazza_product_spu
  87 +WHERE tenant_id = 162 AND deleted = 0;
  88 +```
  89 +
  90 +**如果category字段也为空**:
  91 +- 说明Excel导入时"专辑名称"字段没有正确映射到MySQL的`category`字段
  92 +- 需要检查店匠系统的字段映射配置
  93 +
  94 +### 2. SKU的option1/2/3字段是否有值
  95 +
  96 +**需要运行SQL查询**:
  97 +```sql
  98 +SELECT
  99 + COUNT(*) as total_skus,
  100 + COUNT(option1) as has_option1,
  101 + COUNT(option2) as has_option2,
  102 + COUNT(option3) as has_option3
  103 +FROM shoplazza_product_sku
  104 +WHERE tenant_id = 162 AND deleted = 0;
  105 +```
  106 +
  107 +### 3. 检查ES聚合查询
  108 +
  109 +**运行检查脚本**:
  110 +```bash
  111 +python scripts/check_es_data.py --tenant-id 162
  112 +```
  113 +
  114 +查看:
  115 +- 是否有category1_name数据
  116 +- specifications聚合是否有数据
  117 +
  118 +## 解决方案
  119 +
  120 +### 方案1:修复category1_name字段生成(代码已修复)
  121 +
  122 +**已修复的代码**(`indexer/spu_transformer.py`第241-259行):
  123 +- 如果`category_path`为空,使用`category`字段作为备选
  124 +- 从`category`字段解析多级分类
  125 +
  126 +**但需要确保**:
  127 +1. MySQL的`category`字段有值
  128 +2. 重新导入数据到ES
  129 +
  130 +### 方案2:检查并修复MySQL数据
  131 +
  132 +如果MySQL的`category`字段也为空:
  133 +
  134 +1. **检查Excel导入映射**:
  135 + - 确认"专辑名称"字段是否正确映射到MySQL的`category`字段
  136 + - 如果不正确,需要修复映射或重新导入
  137 +
  138 +2. **如果category字段有值但category1_name仍为空**:
  139 + - 说明数据导入时使用的是旧代码
  140 + - 需要重新导入数据到ES
  141 +
  142 +### 方案3:验证specifications分面查询
  143 +
  144 +虽然ES聚合查询显示有数据,但需要验证:
  145 +
  146 +1. **检查前端搜索请求**:
  147 + - 确认分面请求是否正确发送
  148 + - 确认tenant_id是否正确
  149 +
  150 +2. **检查ES聚合结果解析**:
  151 + - 确认`format_facets`函数是否正确解析specifications分面
  152 + - 确认字段名匹配是否正确(`specifications.color` vs `specifications_color_facet`)
  153 +
  154 +## 立即执行的操作
  155 +
  156 +### 步骤1:检查MySQL的category字段
  157 +
  158 +更新诊断脚本,添加category字段检查:
  159 +```bash
  160 +# 需要手动运行SQL或更新诊断脚本
  161 +```
  162 +
  163 +### 步骤2:重新导入数据到ES
  164 +
  165 +修复代码后,重新导入数据:
  166 +```bash
  167 +python scripts/recreate_and_import.py \
  168 + --tenant-id 162 \
  169 + --db-host <host> \
  170 + --db-database saas \
  171 + --db-username saas \
  172 + --db-password <password> \
  173 + --es-host http://localhost:9200
  174 +```
  175 +
  176 +### 步骤3:验证ES数据
  177 +
  178 +运行ES数据检查脚本:
  179 +```bash
  180 +python scripts/check_es_data.py --tenant-id 162
  181 +```
  182 +
  183 +## 关键发现
  184 +
  185 +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据
  186 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成
  187 +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效
  188 +
... ...
docs/分面数据问题根源分析.md 0 → 100644
... ... @@ -0,0 +1,125 @@
  1 +# 分面数据问题根源分析
  2 +
  3 +## ES数据检查结果
  4 +
  5 +从ES索引数据检查结果可以看到:
  6 +
  7 +### 1. category1_name 分面问题
  8 +
  9 +**检查结果**:
  10 +- 总文档数:10000
  11 +- 有category1_name的文档:只有1个
  12 +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称)
  13 +
  14 +**问题原因**:
  15 +- MySQL中`category_path`字段几乎都是空的(只有1个,而且那个是ID列表格式,不是路径格式如"服装/男装")
  16 +- MySQL中`category`字段可能也为空
  17 +- 导致ES索引中的`category1_name`字段几乎都是空的
  18 +
  19 +**解决方案**:
  20 +代码已修复(`indexer/spu_transformer.py`第241-259行),支持从`category`字段生成`category1_name`,但需要:
  21 +1. 确保MySQL的`category`字段有值
  22 +2. 重新导入数据到ES
  23 +
  24 +### 2. specifications 分面问题
  25 +
  26 +**检查结果**(从ES聚合查询):
  27 +- specifications.color 分面:有数据(Beige: 1226, Khaki: 1176等)
  28 +- specifications.size 分面:有数据(1: 1234, 12: 1234等)
  29 +- specifications.material 分面:有数据(塑料英文包装: 17277等)
  30 +
  31 +**说明**:ES中确实有specifications数据,而且聚合查询能正常返回结果。
  32 +
  33 +## 问题根源
  34 +
  35 +### 问题1:category1_name 几乎都为空
  36 +
  37 +**MySQL数据情况**:
  38 +- `category_path` 字段:11253个SPU为空,只有1个有值(但那个值是ID列表格式)
  39 +- `category` 字段:需要检查是否有值
  40 +
  41 +**ES索引情况**:
  42 +- `category1_name` 字段:几乎都是None
  43 +- 导致category分面查询结果为空
  44 +
  45 +### 问题2:为什么specifications分面查询无结果
  46 +
  47 +虽然ES聚合查询显示有数据,但前端显示为空,可能原因:
  48 +
  49 +1. **分面聚合结构不匹配**:
  50 + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]`
  51 + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`, `specifications_size_facet`, `specifications_material_facet`
  52 + - 前端解析时的字段匹配可能有问题
  53 +
  54 +2. **ES聚合结果格式**:
  55 + - specifications.color分面的聚合名称是`specifications_color_facet`
  56 + - 但前端期望的field是`specifications.color`
  57 + - 需要在`format_facets`中正确匹配
  58 +
  59 +## 解决方案
  60 +
  61 +### 方案1:修复category1_name字段(必需)
  62 +
  63 +**问题**:MySQL的`category_path`为空,需要从`category`字段生成
  64 +
  65 +**已修复代码**(`indexer/spu_transformer.py`):
  66 +- 如果`category_path`为空,使用`category`字段作为备选
  67 +- 从`category`字段解析多级分类(如果包含"/")
  68 +- 如果`category`不包含"/",直接作为`category1_name`
  69 +
  70 +**但需要**:
  71 +1. 检查MySQL的`category`字段是否有值
  72 +2. 如果`category`也为空,需要检查Excel导入时"专辑名称"字段是否正确映射
  73 +
  74 +### 方案2:验证specifications分面查询
  75 +
  76 +虽然ES聚合查询显示有数据,但需要:
  77 +1. 检查前端是否正确发送分面请求
  78 +2. 检查ES返回的聚合结果格式
  79 +3. 检查`format_facets`函数是否正确解析specifications分面
  80 +
  81 +## 下一步操作
  82 +
  83 +### 步骤1:检查MySQL的category字段
  84 +
  85 +```sql
  86 +SELECT
  87 + COUNT(*) as total,
  88 + COUNT(category) as has_category,
  89 + COUNT(*) - COUNT(category) as null_category
  90 +FROM shoplazza_product_spu
  91 +WHERE tenant_id = 162 AND deleted = 0;
  92 +```
  93 +
  94 +### 步骤2:检查Excel导入映射
  95 +
  96 +确认Excel的"专辑名称"字段是否正确映射到MySQL的`category`字段:
  97 +- 如果映射到`category`字段,应该有值
  98 +- 如果映射到`category_path`字段,但值是ID列表格式,需要修复
  99 +
  100 +### 步骤3:重新导入数据到ES
  101 +
  102 +修复后,重新导入数据:
  103 +```bash
  104 +python scripts/recreate_and_import.py \
  105 + --tenant-id 162 \
  106 + --db-host <host> \
  107 + --db-database saas \
  108 + --db-username saas \
  109 + --db-password <password> \
  110 + --es-host http://localhost:9200
  111 +```
  112 +
  113 +### 步骤4:验证ES数据
  114 +
  115 +检查ES索引中的文档是否包含:
  116 +- `category1_name`字段(应该有值)
  117 +- `specifications`字段(应该已经有数据)
  118 +
  119 +## 关键发现
  120 +
  121 +从ES检查结果看:
  122 +1. **specifications数据是有的**,ES聚合查询能正常返回color/size/material的分面数据
  123 +2. **category1_name几乎都是空的**,这是导致category分面为空的原因
  124 +3. **如果specifications分面也显示为空**,可能是前端解析ES聚合结果的问题,而不是ES数据的问题
  125 +
... ...
docs/分面数据问题根源和解决方案.md 0 → 100644
... ... @@ -0,0 +1,180 @@
  1 +# 分面数据问题根源和解决方案
  2 +
  3 +## 📊 诊断结果总结
  4 +
  5 +### MySQL数据情况
  6 +- **总SPU数**:11254
  7 +- **category_path字段**:只有1个有值(ID列表格式),11253个为空
  8 +- **option表数据**:
  9 + - 有option定义的SPU:886个
  10 + - position=1, name='color': 885个 ✅
  11 + - position=2, name='size': 885个 ✅
  12 + - position=3, name='material': 885个 ✅
  13 +
  14 +### ES索引数据情况
  15 +- **总文档数**:10000
  16 +- **category1_name字段**:只有1个有值(ID列表格式),其他都是None ❌
  17 +- **specifications聚合查询**:有数据 ✅
  18 + - specifications.color: Beige: 1226, Khaki: 1176等
  19 + - specifications.size: 1: 1234, 12: 1234等
  20 + - specifications.material: 塑料英文包装: 17277等
  21 +
  22 +## 🔍 问题根源
  23 +
  24 +### 问题1:category1_name 几乎都为空
  25 +
  26 +**数据流分析**:
  27 +
  28 +1. **Excel生成阶段**(`csv_to_excel_multi_variant.py`):
  29 + - Excel字段:`'专辑名称': csv_data['categoryName']`
  30 + - 从CSV的`categoryName`字段读取,应该有值
  31 +
  32 +2. **Excel导入店匠 → MySQL**:
  33 + - Excel的"专辑名称"字段 → 可能映射到MySQL的`category`或`category_path`字段
  34 + - **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path`
  35 + - 诊断结果显示:`category_path`几乎都是空的
  36 +
  37 +3. **MySQL → ES转换**(`spu_transformer.py`):
  38 + - 原逻辑:只从`category_path`解析`category1_name`
  39 + - 如果`category_path`为空,`category1_name`不会被设置
  40 + - **已修复**:如果`category_path`为空,使用`category`字段作为备选(第241-259行)
  41 +
  42 +**关键检查点**:
  43 +- MySQL的`category`字段是否有值?
  44 +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射
  45 +
  46 +### 问题2:specifications分面查询无结果
  47 +
  48 +**奇怪的现象**:
  49 +- ES聚合查询显示有数据(Beige: 1226, Khaki: 1176等)
  50 +- 但前端显示为空
  51 +
  52 +**可能原因**:
  53 +
  54 +1. **前端搜索时有查询条件**:
  55 + - 如果搜索时添加了查询条件(如`query="手机"`),ES会先过滤文档
  56 + - 过滤后的文档可能没有specifications数据,导致聚合结果为空
  57 + - **需要验证**:不带查询条件的搜索,分面是否有数据
  58 +
  59 +2. **分面聚合构建或解析问题**:
  60 + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]`
  61 + - ES构建的聚合名称:`specifications_color_facet`
  62 + - 前端解析时的字段匹配:`specifications.color`
  63 + - **需要验证**:`format_facets`函数是否正确匹配
  64 +
  65 +3. **tenant_id过滤问题**:
  66 + - 如果tenant_id不匹配,会导致没有匹配的文档
  67 +
  68 +## ✅ 已实施的修复
  69 +
  70 +### 修复1:支持从category字段生成category1_name
  71 +
  72 +**文件**:`indexer/spu_transformer.py`(第241-259行)
  73 +
  74 +**修改内容**:
  75 +```python
  76 +elif pd.notna(spu_row.get('category')):
  77 + # 如果category_path为空,使用category字段作为category1_name的备选
  78 + category = str(spu_row['category'])
  79 + # 从category字段解析多级分类
  80 + if '/' in category:
  81 + path_parts = category.split('/')
  82 + if len(path_parts) > 0:
  83 + doc['category1_name'] = path_parts[0].strip()
  84 + else:
  85 + # 直接作为category1_name
  86 + doc['category1_name'] = category.strip()
  87 +```
  88 +
  89 +**说明**:如果MySQL的`category`字段有值,修复后的代码应该能生成`category1_name`
  90 +
  91 +## 🔧 需要执行的操作
  92 +
  93 +### 步骤1:检查MySQL的category字段
  94 +
  95 +**更新诊断脚本**(已更新):`scripts/check_data_source.py`
  96 +
  97 +**运行检查**:
  98 +```bash
  99 +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ...
  100 +```
  101 +
  102 +**关键检查**:
  103 +- `category`字段是否有值
  104 +- 如果有值,值的格式是什么(是否包含"/")
  105 +- 如果也为空,说明Excel导入映射有问题
  106 +
  107 +### 步骤2:重新导入数据到ES
  108 +
  109 +**修复代码后,需要重新导入数据**:
  110 +```bash
  111 +python scripts/recreate_and_import.py \
  112 + --tenant-id 162 \
  113 + --db-host <host> \
  114 + --db-database saas \
  115 + --db-username saas \
  116 + --db-password <password> \
  117 + --es-host http://localhost:9200
  118 +```
  119 +
  120 +### 步骤3:验证ES数据
  121 +
  122 +**运行ES数据检查脚本**:
  123 +```bash
  124 +python scripts/check_es_data.py --tenant-id 162
  125 +```
  126 +
  127 +**检查内容**:
  128 +- `category1_name`字段是否有值
  129 +- `specifications`字段是否有数据
  130 +- 分面聚合查询是否有结果
  131 +
  132 +## 📝 数据流程说明
  133 +
  134 +### Excel生成 → MySQL
  135 +
  136 +**Excel字段**(`csv_to_excel_multi_variant.py`):
  137 +- `'专辑名称': csv_data['categoryName']` - 分类信息
  138 +- `'款式1': 'color'`(M行)- 选项名称
  139 +- `'款式2': 'size'`(M行)- 选项名称
  140 +- `'款式3': 'material'`(M行)- 选项名称
  141 +- `'款式1': 'Red'`(P行)- 选项值
  142 +- `'款式2': '5'`(P行)- 选项值
  143 +- `'款式3': '塑料'`(P行)- 选项值
  144 +
  145 +**Excel导入店匠 → MySQL映射**(需要确认):
  146 +- `'专辑名称'` → `shoplazza_product_spu.category` 或 `category_path`
  147 +- `'款式1/2/3'`(M行)→ `shoplazza_product_option.name` + `position`
  148 +- `'款式1/2/3'`(P行)→ `shoplazza_product_sku.option1/2/3`
  149 +
  150 +### MySQL → ES转换
  151 +
  152 +**当前逻辑**(`spu_transformer.py`):
  153 +
  154 +1. **category1_name生成**:
  155 + - 优先从`category_path`解析(第228-240行)
  156 + - 如果`category_path`为空,从`category`字段解析(第241-259行)✅ 已修复
  157 +
  158 +2. **specifications生成**(第351-370行):
  159 + - 从`option表`获取name(position → name映射)
  160 + - 从`SKU表`获取option1/2/3值
  161 + - 构建`specifications`数组
  162 +
  163 +**关键点**:
  164 +- 需要确保MySQL的`category`字段有值
  165 +- 需要确保`option表`有数据且`name`是英文(color/size/material)
  166 +- 需要确保SKU的`option1/2/3`字段有值
  167 +
  168 +## 🎯 关键发现
  169 +
  170 +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据
  171 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成
  172 +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效
  173 +
  174 +## 🔄 下一步
  175 +
  176 +1. ✅ **代码已修复**:支持从`category`字段生成`category1_name`
  177 +2. ⏳ **需要检查MySQL数据**:确认`category`字段是否有值
  178 +3. ⏳ **需要重新导入数据**:将修复后的数据导入ES
  179 +4. ⏳ **需要验证**:检查ES数据是否正确,分面是否能正常显示
  180 +
... ...
docs/分面数据问题诊断.md 0 → 100644
... ... @@ -0,0 +1,282 @@
  1 +# 分面数据问题诊断报告
  2 +
  3 +## 问题描述
  4 +
  5 +前端显示的分面结果都是空的:
  6 +- Category: 空
  7 +- Color: 空
  8 +- Size: 空
  9 +- Material: 空
  10 +
  11 +ES的聚合查询结果也是空的。
  12 +
  13 +## 数据流程分析
  14 +
  15 +### 1. 数据生成阶段(csv_to_excel_multi_variant.py)
  16 +
  17 +**生成的数据**:
  18 +
  19 +#### 分类信息:
  20 +- Excel字段:`'专辑名称': csv_data['categoryName']`
  21 +- 示例值:`"电子产品"` 或 `"服装/男装"`(从CSV的categoryName字段读取)
  22 +
  23 +#### 属性信息(M+P类型商品):
  24 +- Excel字段(M行主商品):
  25 + - `'款式1': 'color'`(选项名称)
  26 + - `'款式2': 'size'`(选项名称)
  27 + - `'款式3': 'material'`(选项名称)
  28 +- Excel字段(P行子款式):
  29 + - `'款式1': 'Red'`(选项值,从COLORS列表随机选择)
  30 + - `'款式2': '5'`(选项值,1-30随机选择)
  31 + - `'款式3': '塑料'`(选项值,从商品标题提取)
  32 +
  33 +### 2. Excel导入店匠系统 → MySQL
  34 +
  35 +**预期映射**:
  36 +
  37 +#### 分类字段:
  38 +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category_path` **或** `category` 字段
  39 +- **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path`字段
  40 +
  41 +#### 属性字段:
  42 +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` 和 `position`
  43 +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3`
  44 +
  45 +### 3. MySQL → ES转换阶段(spu_transformer.py)
  46 +
  47 +#### category1_name 构建逻辑(第228-240行):
  48 +
  49 +```python
  50 +if pd.notna(spu_row.get('category_path')):
  51 + category_path = str(spu_row['category_path'])
  52 + # 解析category_path获取多层级分类名称
  53 + path_parts = category_path.split('/')
  54 + if len(path_parts) > 0:
  55 + doc['category1_name'] = path_parts[0].strip()
  56 +```
  57 +
  58 +**问题**:如果MySQL中的`category_path`字段为空,`category1_name`不会被设置!
  59 +
  60 +#### specifications 构建逻辑(第328-347行):
  61 +
  62 +```python
  63 +# 构建option名称映射(position -> name)
  64 +option_name_map = {}
  65 +if not options.empty:
  66 + for _, opt_row in options.iterrows():
  67 + position = opt_row.get('position')
  68 + name = opt_row.get('name')
  69 + if pd.notna(position) and pd.notna(name):
  70 + option_name_map[int(position)] = str(name)
  71 +
  72 +# 构建specifications
  73 +if pd.notna(sku_row.get('option1')) and 1 in option_name_map:
  74 + specifications.append({
  75 + 'sku_id': sku_id,
  76 + 'name': option_name_map[1], # 使用option表的name字段
  77 + 'value': str(sku_row['option1'])
  78 + })
  79 +```
  80 +
  81 +**问题**:如果`shoplazza_product_option`表中没有记录,或者`name`字段值不是英文(如"color"),会导致:
  82 +1. `option_name_map`为空,无法构建specifications
  83 +2. 即使有值,如果name不是"color"/"size"/"material",前端也无法正确匹配
  84 +
  85 +## 问题根源
  86 +
  87 +### 问题1:category1_name 为空
  88 +
  89 +**原因**:
  90 +1. MySQL的`category_path`字段可能为空
  91 +2. Excel的"专辑名称"可能被映射到`category`字段而不是`category_path`
  92 +3. 如果`category_path`为空,`category1_name`不会被设置
  93 +
  94 +**验证方法**:
  95 +```sql
  96 +SELECT COUNT(*) as total,
  97 + COUNT(category_path) as has_category_path,
  98 + COUNT(category) as has_category
  99 +FROM shoplazza_product_spu
  100 +WHERE tenant_id = 162 AND deleted = 0;
  101 +```
  102 +
  103 +### 问题2:specifications 为空
  104 +
  105 +**原因**:
  106 +1. `shoplazza_product_option`表可能没有数据
  107 +2. option表的`name`字段值可能不是英文(不是"color"、"size"、"material")
  108 +
  109 +**验证方法**:
  110 +```sql
  111 +SELECT DISTINCT name, position, COUNT(*) as count
  112 +FROM shoplazza_product_option
  113 +WHERE tenant_id = 162 AND deleted = 0
  114 +GROUP BY name, position
  115 +ORDER BY position, name;
  116 +```
  117 +
  118 +## 解决方案
  119 +
  120 +### 方案1:修复 spu_transformer.py - 支持从category字段生成category1_name
  121 +
  122 +修改`indexer/spu_transformer.py`的`_transform_spu_to_doc`方法,如果`category_path`为空,使用`category`字段作为备选:
  123 +
  124 +```python
  125 +# Category相关字段
  126 +if pd.notna(spu_row.get('category_path')):
  127 + category_path = str(spu_row['category_path'])
  128 + doc['category_path_zh'] = category_path
  129 + doc['category_path_en'] = None
  130 +
  131 + # 解析category_path获取多层级分类名称
  132 + path_parts = category_path.split('/')
  133 + if len(path_parts) > 0:
  134 + doc['category1_name'] = path_parts[0].strip()
  135 + if len(path_parts) > 1:
  136 + doc['category2_name'] = path_parts[1].strip()
  137 + if len(path_parts) > 2:
  138 + doc['category3_name'] = path_parts[2].strip()
  139 +elif pd.notna(spu_row.get('category')):
  140 + # 如果category_path为空,使用category字段作为category1_name
  141 + category = str(spu_row['category'])
  142 + doc['category1_name'] = category.strip()
  143 + # 如果category包含"/",也尝试解析
  144 + if '/' in category:
  145 + path_parts = category.split('/')
  146 + if len(path_parts) > 0:
  147 + doc['category1_name'] = path_parts[0].strip()
  148 + if len(path_parts) > 1:
  149 + doc['category2_name'] = path_parts[1].strip()
  150 + if len(path_parts) > 2:
  151 + doc['category3_name'] = path_parts[2].strip()
  152 +```
  153 +
  154 +### 方案2:检查并修复 option 表的 name 字段值
  155 +
  156 +需要确保`shoplazza_product_option`表的`name`字段值是英文:
  157 +- position=1 的name应该是 `"color"`
  158 +- position=2 的name应该是 `"size"`
  159 +- position=3 的name应该是 `"material"`
  160 +
  161 +如果值不对,需要更新:
  162 +
  163 +```sql
  164 +-- 查看当前的name值
  165 +SELECT DISTINCT name, position
  166 +FROM shoplazza_product_option
  167 +WHERE tenant_id = 162 AND deleted = 0
  168 +ORDER BY position;
  169 +
  170 +-- 如果需要更新(示例)
  171 +-- UPDATE shoplazza_product_option
  172 +-- SET name = CASE position
  173 +-- WHEN 1 THEN 'color'
  174 +-- WHEN 2 THEN 'size'
  175 +-- WHEN 3 THEN 'material'
  176 +-- END
  177 +-- WHERE tenant_id = 162 AND deleted = 0;
  178 +```
  179 +
  180 +### 方案3:验证数据完整性
  181 +
  182 +使用诊断脚本检查数据:
  183 +
  184 +```bash
  185 +python scripts/check_data_source.py \
  186 + --tenant-id 162 \
  187 + --db-host <mysql_host> \
  188 + --db-port 3316 \
  189 + --db-database saas \
  190 + --db-username saas \
  191 + --db-password <password>
  192 +```
  193 +
  194 +## 诊断步骤
  195 +
  196 +### 步骤1:检查MySQL数据
  197 +
  198 +运行诊断脚本:
  199 +```bash
  200 +cd /home/tw/SearchEngine
  201 +source /home/tw/miniconda3/etc/profile.d/conda.sh
  202 +conda activate searchengine
  203 +python scripts/check_data_source.py --tenant-id 162 --db-host <host> --db-database saas --db-username saas --db-password <password>
  204 +```
  205 +
  206 +### 步骤2:根据检查结果修复
  207 +
  208 +#### 如果 category_path 为空:
  209 +- 使用方案1:修改`spu_transformer.py`支持从`category`字段生成`category1_name`
  210 +
  211 +#### 如果 option 表没有数据或name值不对:
  212 +- 检查Excel导入是否正确
  213 +- 如果需要,手动更新option表的name字段值
  214 +
  215 +### 步骤3:重新导入数据到ES
  216 +
  217 +```bash
  218 +python scripts/recreate_and_import.py \
  219 + --tenant-id 162 \
  220 + --db-host <host> \
  221 + --db-database saas \
  222 + --db-username saas \
  223 + --db-password <password> \
  224 + --es-host http://localhost:9200
  225 +```
  226 +
  227 +### 步骤4:验证ES数据
  228 +
  229 +检查ES索引中的文档:
  230 +
  231 +```bash
  232 +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d'
  233 +{
  234 + "query": {
  235 + "term": {
  236 + "tenant_id": "162"
  237 + }
  238 + },
  239 + "size": 1,
  240 + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name"]
  241 +}'
  242 +```
  243 +
  244 +## 预期结果
  245 +
  246 +修复后,ES文档应该包含:
  247 +
  248 +1. **category1_name字段**:
  249 + ```json
  250 + {
  251 + "category1_name": "电子产品"
  252 + }
  253 + ```
  254 +
  255 +2. **specifications字段**:
  256 + ```json
  257 + {
  258 + "specifications": [
  259 + {"sku_id": "123", "name": "color", "value": "Red"},
  260 + {"sku_id": "123", "name": "size", "value": "5"},
  261 + {"sku_id": "123", "name": "material", "value": "塑料"}
  262 + ]
  263 + }
  264 + ```
  265 +
  266 +3. **option1_name/2_name/3_name字段**:
  267 + ```json
  268 + {
  269 + "option1_name": "color",
  270 + "option2_name": "size",
  271 + "option3_name": "material"
  272 + }
  273 + ```
  274 +
  275 +## 总结
  276 +
  277 +问题可能出现在:
  278 +1. **MySQL数据层面**:`category_path`字段为空,或者`shoplazza_product_option`表没有正确的数据
  279 +2. **数据转换层面**:`spu_transformer.py`没有处理`category_path`为空的情况
  280 +
  281 +建议先运行诊断脚本检查MySQL数据,然后根据检查结果进行修复。
  282 +
... ...
docs/分面问题修复总结.md 0 → 100644
... ... @@ -0,0 +1,177 @@
  1 +# 分面数据问题修复总结
  2 +
  3 +## 问题现象
  4 +
  5 +前端显示的分面结果都是空的:
  6 +- Category: 空
  7 +- Color: 空
  8 +- Size: 空
  9 +- Material: 空
  10 +
  11 +ES的聚合查询结果也是空的。
  12 +
  13 +## 问题分析
  14 +
  15 +### 数据流程
  16 +
  17 +1. **数据生成**(csv_to_excel_multi_variant.py):
  18 + - 生成Excel文件,包含"专辑名称"(分类)和"款式1/2/3"(属性名称和值)
  19 +
  20 +2. **Excel导入店匠** → MySQL:
  21 + - "专辑名称" → 可能映射到 `category` 或 `category_path` 字段
  22 + - "款式1/2/3"(M行)→ `shoplazza_product_option.name`
  23 + - "款式1/2/3"(P行)→ `shoplazza_product_sku.option1/2/3`
  24 +
  25 +3. **MySQL → ES转换**(spu_transformer.py):
  26 + - `category1_name` 从 `category_path` 解析
  27 + - `specifications` 从 `option表.name` + `sku表.option1/2/3` 构建
  28 +
  29 +### 根本原因
  30 +
  31 +1. **category1_name 为空**:
  32 + - MySQL的`category_path`字段可能为空
  33 + - Excel的"专辑名称"可能被映射到`category`字段而不是`category_path`
  34 + - 原代码只从`category_path`解析,如果为空则`category1_name`不会被设置
  35 +
  36 +2. **specifications 为空**:
  37 + - `shoplazza_product_option`表可能没有数据
  38 + - 或`name`字段值不是英文(不是"color"、"size"、"material")
  39 +
  40 +## 已实施的修复
  41 +
  42 +### 修复1:支持从category字段生成category1_name
  43 +
  44 +**文件**: `indexer/spu_transformer.py`
  45 +
  46 +**修改内容**:
  47 +- 如果`category_path`为空,使用`category`字段作为备选
  48 +- 从`category`字段解析多级分类(如果包含"/")
  49 +- 如果`category`不包含"/",直接作为`category1_name`
  50 +
  51 +**代码位置**:第241-259行
  52 +
  53 +```python
  54 +elif pd.notna(spu_row.get('category')):
  55 + # 如果category_path为空,使用category字段作为category1_name的备选
  56 + category = str(spu_row['category'])
  57 + doc['category_name_zh'] = category
  58 + doc['category_name_en'] = None
  59 + doc['category_name'] = category
  60 +
  61 + # 尝试从category字段解析多级分类
  62 + if '/' in category:
  63 + path_parts = category.split('/')
  64 + if len(path_parts) > 0:
  65 + doc['category1_name'] = path_parts[0].strip()
  66 + if len(path_parts) > 1:
  67 + doc['category2_name'] = path_parts[1].strip()
  68 + if len(path_parts) > 2:
  69 + doc['category3_name'] = path_parts[2].strip()
  70 + else:
  71 + # 如果category不包含"/",直接作为category1_name
  72 + doc['category1_name'] = category.strip()
  73 +```
  74 +
  75 +## 诊断工具
  76 +
  77 +已创建诊断脚本:`scripts/check_data_source.py`
  78 +
  79 +**使用方法**:
  80 +```bash
  81 +cd /home/tw/SearchEngine
  82 +source /home/tw/miniconda3/etc/profile.d/conda.sh
  83 +conda activate searchengine
  84 +python scripts/check_data_source.py \
  85 + --tenant-id 162 \
  86 + --db-host <mysql_host> \
  87 + --db-port 3316 \
  88 + --db-database saas \
  89 + --db-username saas \
  90 + --db-password <password>
  91 +```
  92 +
  93 +**检查内容**:
  94 +1. SPU汇总信息
  95 +2. category_path 字段是否有值
  96 +3. option 表的 name 字段值
  97 +4. SKU 表的 option1/2/3 字段值
  98 +
  99 +## 下一步操作
  100 +
  101 +### 步骤1:运行诊断脚本检查MySQL数据
  102 +
  103 +```bash
  104 +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ...
  105 +```
  106 +
  107 +### 步骤2:根据检查结果修复数据
  108 +
  109 +#### 如果 option 表的 name 值不对:
  110 +
  111 +检查option表的name字段值:
  112 +```sql
  113 +SELECT DISTINCT name, position
  114 +FROM shoplazza_product_option
  115 +WHERE tenant_id = 162 AND deleted = 0
  116 +ORDER BY position;
  117 +```
  118 +
  119 +如果需要,更新为英文:
  120 +- position=1 的 name 应该是 "color"
  121 +- position=2 的 name 应该是 "size"
  122 +- position=3 的 name 应该是 "material"
  123 +
  124 +### 步骤3:重新导入数据到ES
  125 +
  126 +```bash
  127 +python scripts/recreate_and_import.py \
  128 + --tenant-id 162 \
  129 + --db-host <host> \
  130 + --db-database saas \
  131 + --db-username saas \
  132 + --db-password <password> \
  133 + --es-host http://localhost:9200
  134 +```
  135 +
  136 +### 步骤4:验证ES数据
  137 +
  138 +检查ES索引中的文档是否包含:
  139 +- `category1_name` 字段
  140 +- `specifications` 字段(包含color、size、material)
  141 +- `option1_name`、`option2_name`、`option3_name` 字段
  142 +
  143 +```bash
  144 +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d'
  145 +{
  146 + "query": {
  147 + "term": {
  148 + "tenant_id": "162"
  149 + }
  150 + },
  151 + "size": 1,
  152 + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name", "option2_name", "option3_name"]
  153 +}'
  154 +```
  155 +
  156 +## 预期结果
  157 +
  158 +修复后,ES文档应该包含:
  159 +
  160 +```json
  161 +{
  162 + "spu_id": "123",
  163 + "title_zh": "商品标题",
  164 + "category1_name": "电子产品",
  165 + "specifications": [
  166 + {"sku_id": "456", "name": "color", "value": "Red"},
  167 + {"sku_id": "456", "name": "size", "value": "5"},
  168 + {"sku_id": "456", "name": "material", "value": "塑料"}
  169 + ],
  170 + "option1_name": "color",
  171 + "option2_name": "size",
  172 + "option3_name": "material"
  173 +}
  174 +```
  175 +
  176 +前端分面应该能正常显示分类和属性值。
  177 +
... ...
docs/分面问题最终诊断.md 0 → 100644
... ... @@ -0,0 +1,115 @@
  1 +# 分面问题最终诊断报告
  2 +
  3 +## ES数据检查结果
  4 +
  5 +根据ES索引检查结果:
  6 +
  7 +### ✅ specifications 分面有数据
  8 +ES聚合查询显示:
  9 +- **specifications.color**: 有数据(Beige: 1226, Khaki: 1176, Red: 1168等)
  10 +- **specifications.size**: 有数据(1: 1234, 12: 1234等)
  11 +- **specifications.material**: 有数据(塑料英文包装: 17277等)
  12 +
  13 +**结论**:ES中确实有specifications数据,聚合查询能正常返回结果。
  14 +
  15 +### ❌ category1_name 几乎都为空
  16 +- 总文档数:10000
  17 +- 有category1_name的文档:只有1个
  18 +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称)
  19 +
  20 +**结论**:category1_name字段几乎都是空的,导致category分面为空。
  21 +
  22 +## 问题根源分析
  23 +
  24 +### 问题1:category1_name 为什么为空
  25 +
  26 +**MySQL数据情况**(从诊断脚本结果):
  27 +- `category_path`字段:11253个SPU为空,只有1个有值
  28 +- 该唯一值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是路径格式)
  29 +
  30 +**当前代码逻辑**(`spu_transformer.py`第228-240行):
  31 +```python
  32 +if pd.notna(spu_row.get('category_path')):
  33 + category_path = str(spu_row['category_path'])
  34 + # 直接按"/"分割,但ID列表格式是逗号分隔的
  35 + path_parts = category_path.split('/')
  36 + # 如果category_path是ID列表,path_parts只有一个元素(整个ID列表)
  37 +```
  38 +
  39 +**问题**:
  40 +1. 对于ID列表格式的`category_path`(如`593389466647815326,593389582007954165,593389582008019701`),按"/"分割后只有一个元素,会被错误地作为`category1_name`
  41 +2. 对于空的`category_path`,会进入`elif`分支,使用`category`字段作为备选
  42 +
  43 +**需要检查**:
  44 +- MySQL的`category`字段是否有值?如果有值,应该能生成`category1_name`
  45 +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射
  46 +
  47 +### 问题2:specifications 分面查询为什么为空
  48 +
  49 +虽然ES聚合查询显示有数据,但前端显示为空,可能原因:
  50 +
  51 +1. **前端分面请求格式**:
  52 + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]`
  53 + - ES构建的聚合名称:`specifications_color_facet`(注意:是下划线,不是点号)
  54 + - 字段匹配可能有问题
  55 +
  56 +2. **ES聚合结果解析**:
  57 + - ES返回的聚合字段名:`specifications_color_facet`
  58 + - 前端期望的field:`specifications.color`
  59 + - `format_facets`函数需要正确匹配
  60 +
  61 +## 具体数据说明
  62 +
  63 +### MySQL数据情况
  64 +- **总SPU数**:11254
  65 +- **有category_path的SPU**:1个(值是ID列表格式)
  66 +- **有option定义的SPU**:886个
  67 + - position=1, name='color': 885个
  68 + - position=2, name='size': 885个
  69 + - position=3, name='material': 885个
  70 +- **总SKU数**:43109个
  71 +
  72 +### ES数据情况
  73 +- **specifications数据**:有数据,能够正常聚合
  74 +- **category1_name数据**:几乎都是空的(只有1个,而且是ID列表格式)
  75 +
  76 +## 解决方案
  77 +
  78 +### 立即执行的操作
  79 +
  80 +1. **检查MySQL的category字段**:
  81 + - 运行诊断脚本检查`category`字段是否有值
  82 + - 如果`category`有值,修复后的代码应该能生成`category1_name`
  83 + - 如果`category`也为空,需要检查Excel导入映射
  84 +
  85 +2. **重新导入数据到ES**:
  86 + ```bash
  87 + python scripts/recreate_and_import.py \
  88 + --tenant-id 162 \
  89 + --db-host <host> \
  90 + --db-database saas \
  91 + --db-username saas \
  92 + --db-password <password> \
  93 + --es-host http://localhost:9200
  94 + ```
  95 +
  96 +3. **验证ES数据**:
  97 + - 检查`category1_name`字段是否有值
  98 + - 检查`specifications`字段是否有数据
  99 +
  100 +### 如果category字段也为空
  101 +
  102 +需要检查Excel导入到店匠系统时,"专辑名称"字段是否正确映射到MySQL的`category`字段。
  103 +
  104 +## 关键发现
  105 +
  106 +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据
  107 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,而且可能`category`字段也为空
  108 +3. **需要从category字段生成category1_name**:代码已修复,但需要确保MySQL的`category`字段有值
  109 +
  110 +## 下一步
  111 +
  112 +1. 检查MySQL的`category`字段是否有值
  113 +2. 如果有值,重新导入数据到ES
  114 +3. 如果也为空,需要检查Excel导入映射或修复数据
  115 +
... ...
docs/分面问题诊断和修复指南.md 0 → 100644
... ... @@ -0,0 +1,203 @@
  1 +# 分面数据问题诊断和修复指南
  2 +
  3 +## 问题现象
  4 +
  5 +前端显示的分面结果都是空的:
  6 +- Category: 空
  7 +- Color: 空
  8 +- Size: 空
  9 +- Material: 空
  10 +
  11 +ES的聚合查询结果也是空的。
  12 +
  13 +## 诊断结果分析
  14 +
  15 +### MySQL数据情况
  16 +
  17 +| 字段/表 | 有数据的数量 | 说明 |
  18 +|---------|-------------|------|
  19 +| 总SPU数 | 11254 | - |
  20 +| category_path有值 | 1个 | 该值是ID列表格式(不是路径格式) |
  21 +| category字段 | 需要检查 | 可能是空的 |
  22 +| option表记录 | 2658条 | 886个SPU有option定义 |
  23 +| position=1, name='color' | 885个SPU | ✅ 数量足够 |
  24 +| position=2, name='size' | 885个SPU | ✅ 数量足够 |
  25 +| position=3, name='material' | 885个SPU | ✅ 数量足够 |
  26 +| 总SKU数 | 43109 | option1/2/3字段需要检查 |
  27 +
  28 +### ES索引数据情况
  29 +
  30 +| 字段 | 有数据的数量 | 说明 |
  31 +|------|-------------|------|
  32 +| 总文档数 | 10000 | - |
  33 +| category1_name有值 | 1个 | 该值是ID列表格式 ❌ |
  34 +| specifications聚合查询 | 有数据 | ✅ color/size/material都有数据 |
  35 +
  36 +## 问题根源
  37 +
  38 +### 问题1:category1_name 几乎都为空 ❌
  39 +
  40 +**原因分析**:
  41 +
  42 +1. **MySQL数据层面**:
  43 + - `category_path`字段几乎都是空的(只有1个,且是ID列表格式)
  44 + - 需要检查`category`字段是否有值
  45 +
  46 +2. **数据转换层面**:
  47 + - 原代码只从`category_path`解析`category1_name`
  48 + - 如果`category_path`为空,`category1_name`不会被设置
  49 + - ✅ **已修复**:如果`category_path`为空,使用`category`字段作为备选(`spu_transformer.py`第241-259行)
  50 +
  51 +3. **Excel导入映射**:
  52 + - Excel的"专辑名称"字段可能映射到MySQL的`category`字段
  53 + - 需要确认映射关系
  54 +
  55 +### 问题2:specifications分面查询无结果
  56 +
  57 +**奇怪现象**:
  58 +- ES聚合查询(查询所有文档)显示有数据
  59 +- 但前端显示为空
  60 +
  61 +**可能原因**:
  62 +1. 前端搜索时有查询条件,过滤后没有匹配的文档
  63 +2. 分面聚合构建或解析有问题
  64 +3. tenant_id不匹配
  65 +
  66 +## 数据流程分析
  67 +
  68 +### 1. Excel生成阶段
  69 +
  70 +**脚本**:`scripts/csv_to_excel_multi_variant.py`
  71 +
  72 +**生成的数据**:
  73 +- `'专辑名称': csv_data['categoryName']` - 从CSV的categoryName字段读取
  74 +- `'款式1': 'color'`(M行主商品)- 选项名称
  75 +- `'款式2': 'size'`(M行主商品)- 选项名称
  76 +- `'款式3': 'material'`(M行主商品)- 选项名称
  77 +- `'款式1': 'Red'`(P行子款式)- 选项值(从COLORS列表随机选择)
  78 +- `'款式2': '5'`(P行子款式)- 选项值(1-30随机选择)
  79 +- `'款式3': '塑料'`(P行子款式)- 选项值(从商品标题提取)
  80 +
  81 +### 2. Excel导入店匠 → MySQL
  82 +
  83 +**映射关系**(需要确认):
  84 +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category` 或 `category_path`
  85 +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` + `position`
  86 +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3`
  87 +
  88 +**当前情况**:
  89 +- ✅ option表有数据:885个SPU有color/size/material选项名称
  90 +- ❓ category字段:需要检查是否有值
  91 +
  92 +### 3. MySQL → ES转换
  93 +
  94 +**代码逻辑**(`indexer/spu_transformer.py`):
  95 +
  96 +1. **category1_name生成**(第228-259行):
  97 + ```python
  98 + if pd.notna(spu_row.get('category_path')):
  99 + # 从category_path解析
  100 + path_parts = category_path.split('/')
  101 + doc['category1_name'] = path_parts[0].strip()
  102 + elif pd.notna(spu_row.get('category')):
  103 + # 从category字段解析(已修复)
  104 + doc['category1_name'] = category.strip()
  105 + ```
  106 +
  107 +2. **specifications生成**(第351-370行):
  108 + ```python
  109 + # 从option表获取name映射
  110 + option_name_map = {position: name}
  111 + # 从SKU表获取option值
  112 + if pd.notna(sku_row.get('option1')) and 1 in option_name_map:
  113 + specifications.append({
  114 + 'name': option_name_map[1], # 'color'
  115 + 'value': str(sku_row['option1']) # 'Red'
  116 + })
  117 + ```
  118 +
  119 +## 解决方案
  120 +
  121 +### 步骤1:检查MySQL的category字段
  122 +
  123 +**运行更新后的诊断脚本**:
  124 +```bash
  125 +cd /home/tw/SearchEngine
  126 +source /home/tw/miniconda3/etc/profile.d/conda.sh
  127 +conda activate searchengine
  128 +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ...
  129 +```
  130 +
  131 +**关键检查**:
  132 +- `category`字段是否有值
  133 +- 如果有值,值的格式是什么(是否包含"/")
  134 +
  135 +**如果category字段也为空**:
  136 +- 说明Excel导入时"专辑名称"没有正确映射到MySQL
  137 +- 需要检查店匠系统的字段映射配置
  138 +
  139 +### 步骤2:重新导入数据到ES
  140 +
  141 +**修复代码后,必须重新导入数据才能生效**:
  142 +```bash
  143 +python scripts/recreate_and_import.py \
  144 + --tenant-id 162 \
  145 + --db-host <host> \
  146 + --db-database saas \
  147 + --db-username saas \
  148 + --db-password <password> \
  149 + --es-host http://localhost:9200
  150 +```
  151 +
  152 +### 步骤3:验证ES数据
  153 +
  154 +**运行ES数据检查脚本**:
  155 +```bash
  156 +python scripts/check_es_data.py --tenant-id 162
  157 +```
  158 +
  159 +**检查内容**:
  160 +- `category1_name`字段是否有值
  161 +- `specifications`字段是否有数据
  162 +- 分面聚合查询是否有结果
  163 +
  164 +## 预期结果
  165 +
  166 +修复后,ES文档应该包含:
  167 +
  168 +```json
  169 +{
  170 + "spu_id": "123",
  171 + "title_zh": "商品标题",
  172 + "category1_name": "电子产品", // 从category字段生成
  173 + "specifications": [
  174 + {"sku_id": "456", "name": "color", "value": "Red"},
  175 + {"sku_id": "456", "name": "size", "value": "5"},
  176 + {"sku_id": "456", "name": "material", "value": "塑料"}
  177 + ],
  178 + "option1_name": "color",
  179 + "option2_name": "size",
  180 + "option3_name": "material"
  181 +}
  182 +```
  183 +
  184 +## 关键检查点
  185 +
  186 +### 1. MySQL数据检查
  187 +
  188 +- [ ] `category`字段是否有值
  189 +- [ ] `category_path`字段是否为空
  190 +- [ ] `option表`的`name`字段是否是英文(color/size/material)
  191 +- [ ] SKU表的`option1/2/3`字段是否有值
  192 +
  193 +### 2. ES数据检查
  194 +
  195 +- [ ] `category1_name`字段是否有值
  196 +- [ ] `specifications`字段是否有数据
  197 +- [ ] 分面聚合查询是否有结果
  198 +
  199 +### 3. 数据导入验证
  200 +
  201 +- [ ] 重新导入数据后,检查ES文档是否正确
  202 +- [ ] 验证分面查询是否能正常返回结果
  203 +
... ...
docs/ES常用表达式.md renamed to docs/常用查询 - ES.md
docs/常用查询 - sql.sql 0 → 100644
... ... @@ -0,0 +1,254 @@
  1 +-- 查询今天入库的SPU和SKU商品数据
  2 +-- 用于查询当天新增的商品信息
  3 +
  4 +-- ======================================
  5 +-- 1. 查询今天入库的SPU商品
  6 +-- ======================================
  7 +
  8 +-- 查询今天创建的SPU商品(SPU级别)
  9 +SELECT
  10 + spu.id AS spu_id,
  11 + spu.tenant_id,
  12 + spu.shop_id,
  13 + spu.shoplazza_id AS shoplazza_product_id,
  14 + spu.title AS product_title,
  15 + spu.description AS product_description,
  16 + spu.brief AS product_brief,
  17 + spu.vendor AS brand_name,
  18 + spu.category AS product_category,
  19 + spu.category_path AS category_path,
  20 + spu.handle AS product_handle,
  21 + spu.tags AS product_tags,
  22 + spu.published AS product_published,
  23 + spu.published_at AS publish_time,
  24 + spu.image_src AS main_image_url,
  25 + spu.image_width AS main_image_width,
  26 + spu.image_height AS main_image_height,
  27 + spu.create_time AS spu_create_time,
  28 + spu.update_time AS spu_update_time,
  29 + CASE
  30 + WHEN spu.deleted = 1 THEN '已删除'
  31 + ELSE '正常'
  32 + END AS spu_status
  33 +FROM shoplazza_product_spu spu
  34 +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期
  35 + AND spu.deleted = 0 -- 未删除的商品
  36 +ORDER BY spu.create_time DESC;
  37 +
  38 +-- ======================================
  39 +-- 2. 查询今天入库的SKU商品
  40 +-- ======================================
  41 +
  42 +-- 查询今天创建的SKU商品(SKU级别)
  43 +SELECT
  44 + sku.id AS sku_id,
  45 + sku.tenant_id,
  46 + sku.shop_id,
  47 + sku.spu_id,
  48 + sku.shoplazza_id AS variant_id,
  49 + sku.shoplazza_product_id AS shoplazza_product_id,
  50 + sku.sku AS sku_code,
  51 + sku.title AS sku_title,
  52 + sku.price AS sku_price,
  53 + sku.compare_at_price AS compare_price,
  54 + sku.cost_price AS cost_price,
  55 + sku.inventory_quantity AS stock_quantity,
  56 + sku.weight AS product_weight,
  57 + sku.weight_unit AS weight_unit,
  58 + sku.option1 AS color_option,
  59 + sku.option2 AS size_option,
  60 + sku.option3 AS material_option,
  61 + sku.image_src AS sku_image_url,
  62 + sku.barcode AS barcode,
  63 + sku.position AS variant_position,
  64 + sku.create_time AS sku_create_time,
  65 + sku.update_time AS sku_update_time,
  66 + CASE
  67 + WHEN sku.deleted = 1 THEN '已删除'
  68 + ELSE '正常'
  69 + END AS sku_status
  70 +FROM shoplazza_product_sku sku
  71 +WHERE DATE(sku.create_time) = CURDATE() -- 今天的日期
  72 + AND sku.deleted = 0 -- 未删除的商品
  73 +ORDER BY sku.create_time DESC;
  74 +
  75 +-- ======================================
  76 +-- 3. 关联查询今天入库的SPU及其对应的SKU
  77 +-- ======================================
  78 +
  79 +-- 查询今天创建的SPU及其关联的SKU信息
  80 +SELECT
  81 + spu.id AS spu_id,
  82 + spu.tenant_id,
  83 + spu.shop_id,
  84 + spu.shoplazza_id AS shoplazza_product_id,
  85 + spu.title AS product_title,
  86 + spu.vendor AS brand_name,
  87 + spu.tags AS product_tags,
  88 + spu.published AS product_published,
  89 + spu.create_time AS spu_create_time,
  90 +
  91 + -- 聚合SKU信息
  92 + COUNT(sku.id) AS sku_count,
  93 + COALESCE(MIN(sku.price), 0) AS min_price,
  94 + COALESCE(MAX(sku.price), 0) AS max_price,
  95 + COALESCE(SUM(sku.inventory_quantity), 0) AS total_stock,
  96 + GROUP_CONCAT(DISTINCT sku.option1 ORDER BY sku.option1 SEPARATOR ', ') AS available_colors,
  97 + GROUP_CONCAT(DISTINCT sku.option2 ORDER BY sku.option2 SEPARATOR ', ') AS available_sizes,
  98 + GROUP_CONCAT(DISTINCT sku.option3 ORDER BY sku.option3 SEPARATOR ', ') AS available_materials
  99 +
  100 +FROM shoplazza_product_spu spu
  101 +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id
  102 + AND spu.tenant_id = sku.tenant_id
  103 + AND sku.deleted = 0
  104 +WHERE DATE(spu.create_time) = CURDATE() -- 今天创建的SPU
  105 + AND spu.deleted = 0 -- 未删除的SPU
  106 +GROUP BY spu.id, spu.tenant_id, spu.shop_id, spu.shoplazza_id,
  107 + spu.title, spu.vendor, spu.tags, spu.published, spu.create_time
  108 +ORDER BY spu.create_time DESC;
  109 +
  110 +-- ======================================
  111 +-- 4. 查询今天入库商品的数量统计
  112 +-- ======================================
  113 +
  114 +-- 统计今天入库的商品数量
  115 +SELECT
  116 + 'SPU商品' AS data_type,
  117 + COUNT(*) AS today_count,
  118 + DATE(CURDATE()) AS statistics_date
  119 +FROM shoplazza_product_spu
  120 +WHERE DATE(create_time) = CURDATE()
  121 + AND deleted = 0
  122 +
  123 +UNION ALL
  124 +
  125 +SELECT
  126 + 'SKU商品' AS data_type,
  127 + COUNT(*) AS today_count,
  128 + DATE(CURDATE()) AS statistics_date
  129 +FROM shoplazza_product_sku
  130 +WHERE DATE(create_time) = CURDATE()
  131 + AND deleted = 0
  132 +
  133 +UNION ALL
  134 +
  135 +SELECT
  136 + '活跃店铺' AS data_type,
  137 + COUNT(DISTINCT shop_id) AS today_count,
  138 + DATE(CURDATE()) AS statistics_date
  139 +FROM shoplazza_product_spu
  140 +WHERE DATE(create_time) = CURDATE()
  141 + AND deleted = 0
  142 +
  143 +UNION ALL
  144 +
  145 +SELECT
  146 + '活跃租户' AS data_type,
  147 + COUNT(DISTINCT tenant_id) AS today_count,
  148 + DATE(CURDATE()) AS statistics_date
  149 +FROM shoplazza_product_spu
  150 +WHERE DATE(create_time) = CURDATE()
  151 + AND deleted = 0;
  152 +
  153 +-- ======================================
  154 +-- 5. 按租户统计今天入库的商品
  155 +-- ======================================
  156 +
  157 +-- 按租户统计今天入库的商品分布
  158 +SELECT
  159 + spu.tenant_id,
  160 + COUNT(DISTINCT spu.id) AS spu_count,
  161 + COUNT(DISTINCT sku.id) AS sku_count,
  162 + COUNT(DISTINCT spu.shop_id) AS shop_count,
  163 + COALESCE(SUM(sku.inventory_quantity), 0) AS total_inventory,
  164 + COALESCE(AVG(sku.price), 0) AS avg_price
  165 +FROM shoplazza_product_spu spu
  166 +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id
  167 + AND spu.tenant_id = sku.tenant_id
  168 + AND sku.deleted = 0
  169 +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期
  170 + AND spu.deleted = 0 -- 未删除的SPU
  171 +GROUP BY spu.tenant_id
  172 +ORDER BY spu_count DESC;
  173 +
  174 +-- ======================================
  175 +-- 6. 查询今天入库商品的图片信息
  176 +-- ======================================
  177 +
  178 +-- 查询今天入库商品的主图信息(从SPU表获取)
  179 +SELECT
  180 + spu.tenant_id,
  181 + spu.shop_id,
  182 + spu.shoplazza_id AS shoplazza_product_id,
  183 + spu.image_src AS image_url,
  184 + spu.image_width AS image_width,
  185 + spu.image_height AS image_height,
  186 + spu.image_path AS image_path,
  187 + spu.image_alt AS image_alt,
  188 + spu.create_time AS product_create_time,
  189 + CASE
  190 + WHEN spu.deleted = 1 THEN '已删除'
  191 + ELSE '正常'
  192 + END AS image_status
  193 +FROM shoplazza_product_spu spu
  194 +WHERE DATE(spu.create_time) = CURDATE() -- 今天入库的商品
  195 + AND spu.deleted = 0 -- 未删除的商品
  196 + AND spu.image_src IS NOT NULL -- 有图片的商品
  197 +ORDER BY spu.tenant_id, spu.shop_id, spu.shoplazza_id;
  198 +
  199 +-- ======================================
  200 +-- 7. 查询今天入库商品的详细信息(含图片)
  201 +-- ======================================
  202 +
  203 +-- 完整的今天入库商品信息(包含图片)
  204 +SELECT
  205 + spu.id AS spu_id,
  206 + spu.tenant_id,
  207 + spu.shop_id,
  208 + spu.shoplazza_id AS shoplazza_product_id,
  209 + spu.title AS product_title,
  210 + spu.description AS product_description,
  211 + spu.brief AS product_brief,
  212 + spu.vendor AS brand_name,
  213 + spu.category AS product_category,
  214 + spu.category_path AS category_path,
  215 + spu.handle AS product_handle,
  216 + spu.tags AS product_tags,
  217 + spu.published AS product_published,
  218 + spu.published_at AS publish_time,
  219 + spu.create_time AS spu_create_time,
  220 +
  221 + -- SKU信息聚合
  222 + COALESCE(sku_summary.sku_count, 0) AS variant_count,
  223 + COALESCE(sku_summary.min_price, 0) AS min_price,
  224 + COALESCE(sku_summary.max_price, 0) AS max_price,
  225 + COALESCE(sku_summary.total_stock, 0) AS total_inventory,
  226 +
  227 + -- 主图信息(从SPU表直接获取)
  228 + COALESCE(spu.image_src, '') AS main_image_url,
  229 + COALESCE(spu.image_width, 0) AS main_image_width,
  230 + COALESCE(spu.image_height, 0) AS main_image_height,
  231 + COALESCE(spu.image_path, '') AS main_image_path,
  232 + COALESCE(spu.image_alt, '') AS main_image_alt
  233 +
  234 +FROM shoplazza_product_spu spu
  235 +
  236 +-- 关联SKU统计信息
  237 +LEFT JOIN (
  238 + SELECT
  239 + spu_id,
  240 + tenant_id,
  241 + COUNT(*) AS sku_count,
  242 + MIN(price) AS min_price,
  243 + MAX(price) AS max_price,
  244 + SUM(inventory_quantity) AS total_stock
  245 + FROM shoplazza_product_sku
  246 + WHERE DATE(create_time) = CURDATE() -- 今天的SKU
  247 + AND deleted = 0
  248 + GROUP BY spu_id, tenant_id
  249 +) sku_summary ON spu.id = sku_summary.spu_id
  250 + AND spu.tenant_id = sku_summary.tenant_id
  251 +
  252 +WHERE DATE(spu.create_time) = CURDATE() -- 今天的SPU
  253 + AND spu.deleted = 0 -- 未删除的SPU
  254 +ORDER BY spu.create_time DESC;
0 255 \ No newline at end of file
... ...
docs/搜索API对接指南.md
... ... @@ -27,7 +27,7 @@
27 27 ```bash
28 28 curl -X POST "http://120.76.41.98:6002/search/" \
29 29 -H "Content-Type: application/json" \
30   - -H "X-Tenant-ID: 2" \
  30 + -H "X-Tenant-ID: 162" \
31 31 -d '{"query": "芭比娃娃"}'
32 32 ```
33 33  
... ... @@ -36,7 +36,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
36 36 ```bash
37 37 curl -X POST "http://120.76.41.98:6002/search/" \
38 38 -H "Content-Type: application/json" \
39   - -H "X-Tenant-ID: 2" \
  39 + -H "X-Tenant-ID: 162" \
40 40 -d '{
41 41 "query": "芭比娃娃",
42 42 "size": 5,
... ... @@ -60,11 +60,10 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
60 60 ```bash
61 61 curl -X POST "http://120.76.41.98:6002/search/" \
62 62 -H "Content-Type: application/json" \
63   - -H "X-Tenant-ID: 2" \
  63 + -H "X-Tenant-ID: 162" \
64 64 -d '{
65   - "tenant_id": "demo-tenant",
66 65 "query": "芭比娃娃",
67   - "facets": ["category.keyword", "specifications.color", "specifications.size"],
  66 + "facets": ["category1_name", "specifications.color", "specifications.size", "specifications.material"],
68 67 "min_score": 0.2
69 68 }'
70 69 ```
... ... @@ -291,10 +290,10 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
291 290 **模式2:指定规格名称的分面** (`"specifications.color"`):
292 291 ```json
293 292 {
294   - "facets": ["specifications.color", "specifications.size"]
  293 + "facets": ["specifications.color", "specifications.size", "specifications.material"]
295 294 }
296 295 ```
297   -只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size")。
  296 +只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size"、"material")。
298 297  
299 298 **返回格式示例**:
300 299 ```json
... ... @@ -879,9 +878,9 @@ curl &quot;http://localhost:6002/search/instant?q=玩具&amp;size=5&quot;
879 878 {
880 879 "id": "12345",
881 880 "source": {
882   - "title": "芭比时尚娃娃",
  881 + "title_zh": "芭比时尚娃娃",
883 882 "min_price": 89.99,
884   - "category.keyword": "玩具"
  883 + "category1_name": "玩具"
885 884 }
886 885 }
887 886 ```
... ...
docs/搜索API速查表.md
... ... @@ -60,7 +60,7 @@ POST /search/
60 60 ```bash
61 61 {
62 62 "range_filters": {
63   - "price": {
  63 + "min_price": {
64 64 "gte": 50, // >=
65 65 "lte": 200 // <=
66 66 }
... ... @@ -94,7 +94,7 @@ POST /search/
94 94 **指定规格名称**:
95 95 ```bash
96 96 {
97   - "facets": ["specifications.color", "specifications.size"] // 只返回指定name的value列表
  97 + "facets": ["specifications.color", "specifications.size", "specifications.material"] // 只返回指定name的value列表
98 98 }
99 99 ```
100 100  
... ... @@ -114,7 +114,8 @@ POST /search/
114 114 },
115 115 "specifications", // 所有规格名称
116 116 "specifications.color", // 指定规格名称
117   - "specifications.size"
  117 + "specifications.size",
  118 + "specifications.material"
118 119 ]
119 120 }
120 121 ```
... ... @@ -190,7 +191,7 @@ POST /search/
190 191  
191 192 ```bash
192 193 POST /search/
193   -Headers: X-Tenant-ID: 2
  194 +Headers: X-Tenant-ID: 162
194 195 {
195 196 "query": "手机",
196 197 "size": 20,
... ...
docs/索引字段说明v2-参考表结构.md
1   -spu表全部字段
  1 +spu表 shoplazza_product_spu 全部字段
2 2 "Field" "Type" "Null" "Key" "Default" "Extra"
3 3 "id" "bigint(20)" "NO" "PRI" "auto_increment"
4 4 "shop_id" "bigint(20)" "NO" "MUL" ""
... ... @@ -46,7 +46,7 @@ spu表全部字段
46 46 "update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP"
47 47 "deleted" "bit(1)" "NO" "" "b'0'" ""
48 48  
49   -sku全部字段
  49 +shoplazza_product_sku 全部字段
50 50 "Field" "Type" "Null" "Key" "Default" "Extra"
51 51 "id" "bigint(20)" "NO" "PRI" "auto_increment"
52 52 "spu_id" "bigint(20)" "NO" "MUL" ""
... ...
docs/索引字段说明v2.md
... ... @@ -124,7 +124,7 @@
124 124 {
125 125 "query": "手机",
126 126 "filters": {
127   - "specifications": {
  127 + "specifications": {
128 128 "name": "color",
129 129 "value": "white"
130 130 }
... ... @@ -141,21 +141,21 @@
141 141 {"name": "color", "value": "white"},
142 142 {"name": "size", "value": "256GB"}
143 143 ]
144   - }
  144 + }
145 145 }
146 146 ```
147 147  
148 148 **ES 查询结构**(后端自动生成):
149 149 ```json
150 150 {
151   - "nested": {
152   - "path": "specifications",
153   - "query": {
154   - "bool": {
155   - "must": [
  151 + "nested": {
  152 + "path": "specifications",
  153 + "query": {
  154 + "bool": {
  155 + "must": [
156 156 { "term": { "specifications.name": "color" } },
157 157 { "term": { "specifications.value": "white" } }
158   - ]
  158 + ]
159 159 }
160 160 }
161 161 }
... ... @@ -202,7 +202,7 @@
202 202 }
203 203 }
204 204 ```
205   -
  205 +
206 206 指定规格名称:
207 207 ```json
208 208 {
... ... @@ -402,3 +402,4 @@ filters AND (text_recall OR embedding_recall)
402 402 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择
403 403 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合
404 404 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端
  405 +
405 406 \ No newline at end of file
... ...
frontend/index.html
... ... @@ -32,22 +32,28 @@
32 32  
33 33 <!-- Filter Section -->
34 34 <div class="filter-section" id="filterSection">
35   - <!-- Category Filter -->
  35 + <!-- Category Filter (一级分类) -->
36 36 <div class="filter-row">
37   - <div class="filter-label">Categories:</div>
38   - <div class="filter-tags" id="categoryTags"></div>
  37 + <div class="filter-label">Category:</div>
  38 + <div class="filter-tags" id="category1Tags"></div>
39 39 </div>
40 40  
41   - <!-- Brand Filter -->
  41 + <!-- Color Filter -->
42 42 <div class="filter-row">
43   - <div class="filter-label">Brand:</div>
44   - <div class="filter-tags" id="brandTags"></div>
  43 + <div class="filter-label">Color:</div>
  44 + <div class="filter-tags" id="colorTags"></div>
45 45 </div>
46 46  
47   - <!-- Supplier Filter -->
  47 + <!-- Size Filter -->
48 48 <div class="filter-row">
49   - <div class="filter-label">Supplier:</div>
50   - <div class="filter-tags" id="supplierTags"></div>
  49 + <div class="filter-label">Size:</div>
  50 + <div class="filter-tags" id="sizeTags"></div>
  51 + </div>
  52 +
  53 + <!-- Material Filter -->
  54 + <div class="filter-row">
  55 + <div class="filter-label">Material:</div>
  56 + <div class="filter-tags" id="materialTags"></div>
51 57 </div>
52 58  
53 59 <!-- Dropdown Filters -->
... ... @@ -124,6 +130,6 @@
124 130 <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p>
125 131 </footer>
126 132  
127   - <script src="/static/js/app.js?v=3.1"></script>
  133 + <script src="/static/js/app.js?v=3.2"></script>
128 134 </body>
129 135 </html>
... ...
frontend/static/js/app.js
... ... @@ -31,9 +31,6 @@ let state = {
31 31  
32 32 // Initialize
33 33 document.addEventListener('DOMContentLoaded', function() {
34   - console.log('SearchEngine loaded');
35   - console.log('Debug mode: always enabled (test frontend)');
36   -
37 34 document.getElementById('searchInput').focus();
38 35 });
39 36  
... ... @@ -71,33 +68,12 @@ async function performSearch(page = 1) {
71 68  
72 69 const from = (page - 1) * state.pageSize;
73 70  
74   - // Define facets (简化配置)
  71 + // Define facets (一级分类 + 三个属性分面)
75 72 const facets = [
76   - {
77   - "field": "category.keyword",
78   - "size": 15,
79   - "type": "terms"
80   - },
81   - {
82   - "field": "vendor.keyword",
83   - "size": 15,
84   - "type": "terms"
85   - },
86   - {
87   - "field": "tags.keyword",
88   - "size": 10,
89   - "type": "terms"
90   - },
91   - {
92   - "field": "min_price",
93   - "type": "range",
94   - "ranges": [
95   - {"key": "0-50", "to": 50},
96   - {"key": "50-100", "from": 50, "to": 100},
97   - {"key": "100-200", "from": 100, "to": 200},
98   - {"key": "200+", "from": 200}
99   - ]
100   - }
  73 + "category1_name", // 一级分类
  74 + "specifications.color", // 颜色属性
  75 + "specifications.size", // 尺寸属性
  76 + "specifications.material" // 材质属性
101 77 ];
102 78  
103 79 // Show loading
... ... @@ -210,43 +186,91 @@ function displayResults(data) {
210 186 grid.innerHTML = html;
211 187 }
212 188  
213   -// Display facets as filter tags (重构版 - 标准化格式)
  189 +// Display facets as filter tags (一级分类 + 三个属性分面)
214 190 function displayFacets(facets) {
215   - if (!facets) return;
  191 + if (!facets || !Array.isArray(facets)) {
  192 + return;
  193 + }
216 194  
217   - facets.forEach(facet => {
  195 + facets.forEach((facet) => {
218 196 // 根据字段名找到对应的容器
219 197 let containerId = null;
220 198 let maxDisplay = 10;
221 199  
222   - if (facet.field === 'category.keyword') {
223   - containerId = 'categoryTags';
  200 + // 一级分类
  201 + if (facet.field === 'category1_name') {
  202 + containerId = 'category1Tags';
224 203 maxDisplay = 10;
225   - } else if (facet.field === 'vendor.keyword') {
226   - containerId = 'brandTags';
  204 + }
  205 + // 颜色属性分面 (specifications.color)
  206 + else if (facet.field === 'specifications.color') {
  207 + containerId = 'colorTags';
  208 + maxDisplay = 10;
  209 + }
  210 + // 尺寸属性分面 (specifications.size)
  211 + else if (facet.field === 'specifications.size') {
  212 + containerId = 'sizeTags';
  213 + maxDisplay = 10;
  214 + }
  215 + // 材质属性分面 (specifications.material)
  216 + else if (facet.field === 'specifications.material') {
  217 + containerId = 'materialTags';
227 218 maxDisplay = 10;
228   - } else if (facet.field === 'tags.keyword') {
229   - containerId = 'supplierTags';
230   - maxDisplay = 8;
231 219 }
232 220  
233   - if (!containerId) return;
  221 + if (!containerId) {
  222 + return;
  223 + }
234 224  
235 225 const container = document.getElementById(containerId);
236   - if (!container) return;
  226 + if (!container) {
  227 + return;
  228 + }
  229 +
  230 + // 检查values是否存在且是数组
  231 + if (!facet.values || !Array.isArray(facet.values) || facet.values.length === 0) {
  232 + container.innerHTML = '';
  233 + return;
  234 + }
237 235  
238 236 let html = '';
239 237  
240 238 // 渲染分面值
241   - facet.values.slice(0, maxDisplay).forEach(facetValue => {
  239 + facet.values.slice(0, maxDisplay).forEach((facetValue) => {
  240 + if (!facetValue || typeof facetValue !== 'object') {
  241 + return;
  242 + }
  243 +
242 244 const value = facetValue.value;
243 245 const count = facetValue.count;
244   - const selected = facetValue.selected;
  246 +
  247 + // 允许value为0或空字符串,但不允许undefined/null
  248 + if (value === undefined || value === null) {
  249 + return;
  250 + }
  251 +
  252 + // 检查是否已选中
  253 + let selected = false;
  254 + if (facet.field.startsWith('specifications.')) {
  255 + // 检查specifications过滤
  256 + const specName = facet.field.split('.')[1];
  257 + if (state.filters.specifications) {
  258 + const specs = Array.isArray(state.filters.specifications)
  259 + ? state.filters.specifications
  260 + : [state.filters.specifications];
  261 + selected = specs.some(spec => spec && spec.name === specName && spec.value === value);
  262 + }
  263 + } else {
  264 + // 检查普通字段过滤
  265 + if (state.filters[facet.field]) {
  266 + selected = state.filters[facet.field].includes(value);
  267 + }
  268 + }
245 269  
246 270 html += `
247 271 <span class="filter-tag ${selected ? 'active' : ''}"
248   - onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(value)}')">
249   - ${escapeHtml(value)} (${count})
  272 + onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(String(value))}')">
  273 + ${escapeHtml(String(value))} (${count || 0})
250 274 </span>
251 275 `;
252 276 });
... ... @@ -255,20 +279,56 @@ function displayFacets(facets) {
255 279 });
256 280 }
257 281  
258   -// Toggle filter
  282 +// Toggle filter (支持specifications嵌套过滤)
259 283 function toggleFilter(field, value) {
260   - if (!state.filters[field]) {
261   - state.filters[field] = [];
262   - }
263   -
264   - const index = state.filters[field].indexOf(value);
265   - if (index > -1) {
266   - state.filters[field].splice(index, 1);
267   - if (state.filters[field].length === 0) {
268   - delete state.filters[field];
  284 + // 处理specifications属性过滤 (specifications.color, specifications.size, specifications.material)
  285 + if (field.startsWith('specifications.')) {
  286 + const specName = field.split('.')[1]; // 提取name (color, size, material)
  287 +
  288 + // 初始化specifications过滤
  289 + if (!state.filters.specifications) {
  290 + state.filters.specifications = [];
  291 + }
  292 +
  293 + // 确保是数组格式
  294 + if (!Array.isArray(state.filters.specifications)) {
  295 + // 如果已经是单个对象,转换为数组
  296 + state.filters.specifications = [state.filters.specifications];
  297 + }
  298 +
  299 + // 查找是否已存在相同的name和value组合
  300 + const existingIndex = state.filters.specifications.findIndex(
  301 + spec => spec.name === specName && spec.value === value
  302 + );
  303 +
  304 + if (existingIndex > -1) {
  305 + // 移除
  306 + state.filters.specifications.splice(existingIndex, 1);
  307 + if (state.filters.specifications.length === 0) {
  308 + delete state.filters.specifications;
  309 + } else if (state.filters.specifications.length === 1) {
  310 + // 如果只剩一个,可以保持为数组,或转换为单个对象(API都支持)
  311 + // 这里保持为数组,更一致
  312 + }
  313 + } else {
  314 + // 添加
  315 + state.filters.specifications.push({ name: specName, value: value });
269 316 }
270 317 } else {
271   - state.filters[field].push(value);
  318 + // 处理普通字段过滤 (category1_name等)
  319 + if (!state.filters[field]) {
  320 + state.filters[field] = [];
  321 + }
  322 +
  323 + const index = state.filters[field].indexOf(value);
  324 + if (index > -1) {
  325 + state.filters[field].splice(index, 1);
  326 + if (state.filters[field].length === 0) {
  327 + delete state.filters[field];
  328 + }
  329 + } else {
  330 + state.filters[field].push(value);
  331 + }
272 332 }
273 333  
274 334 performSearch(1); // Reset to page 1
... ...
indexer/mapping_generator.py
... ... @@ -19,13 +19,13 @@ DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / &quot;mappings&quot; / &quot;search_produ
19 19  
20 20  
21 21 def load_mapping(mapping_file: str = None) -> Dict[str, Any]:
22   - """
  22 + """
23 23 Load Elasticsearch mapping from JSON file.
24 24  
25   - Args:
  25 + Args:
26 26 mapping_file: Path to mapping JSON file. If None, uses default.
27 27  
28   - Returns:
  28 + Returns:
29 29 Dictionary containing index configuration (settings + mappings)
30 30  
31 31 Raises:
... ... @@ -66,8 +66,8 @@ def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, An
66 66 mapping = load_mapping()
67 67  
68 68 if es_client.create_index(index_name, mapping):
69   - logger.info(f"Index '{index_name}' created successfully")
70   - return True
  69 + logger.info(f"Index '{index_name}' created successfully")
  70 + return True
71 71 else:
72 72 logger.error(f"Failed to create index '{index_name}'")
73 73 return False
... ... @@ -89,8 +89,8 @@ def delete_index_if_exists(es_client, index_name: str) -&gt; bool:
89 89 return False
90 90  
91 91 if es_client.delete_index(index_name):
92   - logger.info(f"Index '{index_name}' deleted successfully")
93   - return True
  92 + logger.info(f"Index '{index_name}' deleted successfully")
  93 + return True
94 94 else:
95 95 logger.error(f"Failed to delete index '{index_name}'")
96 96 return False
... ... @@ -114,8 +114,8 @@ def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -&gt; bo
114 114  
115 115 mapping = {"properties": new_fields}
116 116 if es_client.update_mapping(index_name, mapping):
117   - logger.info(f"Mapping updated for index '{index_name}'")
118   - return True
  117 + logger.info(f"Mapping updated for index '{index_name}'")
  118 + return True
119 119 else:
120 120 logger.error(f"Failed to update mapping for index '{index_name}'")
121 121 return False
... ...
indexer/spu_transformer.py
... ... @@ -238,12 +238,35 @@ class SPUTransformer:
238 238 doc['category2_name'] = path_parts[1].strip()
239 239 if len(path_parts) > 2:
240 240 doc['category3_name'] = path_parts[2].strip()
  241 + elif pd.notna(spu_row.get('category')):
  242 + # 如果category_path为空,使用category字段作为category1_name的备选
  243 + category = str(spu_row['category'])
  244 + doc['category_name_zh'] = category
  245 + doc['category_name_en'] = None
  246 + doc['category_name'] = category
  247 +
  248 + # 尝试从category字段解析多级分类
  249 + if '/' in category:
  250 + path_parts = category.split('/')
  251 + if len(path_parts) > 0:
  252 + doc['category1_name'] = path_parts[0].strip()
  253 + if len(path_parts) > 1:
  254 + doc['category2_name'] = path_parts[1].strip()
  255 + if len(path_parts) > 2:
  256 + doc['category3_name'] = path_parts[2].strip()
  257 + else:
  258 + # 如果category不包含"/",直接作为category1_name
  259 + doc['category1_name'] = category.strip()
241 260  
242 261 if pd.notna(spu_row.get('category')):
  262 + # 确保category相关字段都被设置(如果前面没有设置)
243 263 category_name = str(spu_row['category'])
244   - doc['category_name_zh'] = category_name
245   - doc['category_name_en'] = None
246   - doc['category_name'] = category_name
  264 + if 'category_name_zh' not in doc:
  265 + doc['category_name_zh'] = category_name
  266 + if 'category_name_en' not in doc:
  267 + doc['category_name_en'] = None
  268 + if 'category_name' not in doc:
  269 + doc['category_name'] = category_name
247 270  
248 271 if pd.notna(spu_row.get('category_id')):
249 272 doc['category_id'] = str(int(spu_row['category_id']))
... ... @@ -459,7 +482,7 @@ class SPUTransformer:
459 482 sku_data['option2_value'] = str(sku_row['option2'])
460 483 if pd.notna(sku_row.get('option3')):
461 484 sku_data['option3_value'] = str(sku_row['option3'])
462   -
  485 +
463 486 # Image src
464 487 if pd.notna(sku_row.get('image_src')):
465 488 sku_data['image_src'] = str(sku_row['image_src'])
... ...
scripts/check_data_source.py 0 → 100755
... ... @@ -0,0 +1,301 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +诊断脚本:检查MySQL数据源中分类和规格信息是否正确
  4 +
  5 +检查:
  6 +1. category_path 字段是否有值
  7 +2. category_path 格式是否正确(应该能被解析为 category1_name)
  8 +3. shoplazza_product_option 表的 name 字段是否有值(应该是 "color", "size", "material")
  9 +4. shoplazza_product_sku 表的 option1/2/3 字段是否有值
  10 +"""
  11 +
  12 +import sys
  13 +import argparse
  14 +from pathlib import Path
  15 +from sqlalchemy import create_engine, text
  16 +
  17 +# Add parent directory to path
  18 +sys.path.insert(0, str(Path(__file__).parent.parent))
  19 +
  20 +from utils.db_connector import create_db_connection
  21 +
  22 +
  23 +def check_category_path(db_engine, tenant_id: str):
  24 + """检查 category_path 和 category 字段"""
  25 + print("\n" + "="*60)
  26 + print("1. 检查 category_path 和 category 字段")
  27 + print("="*60)
  28 +
  29 + query = text("""
  30 + SELECT
  31 + COUNT(*) as total,
  32 + COUNT(category_path) as has_category_path,
  33 + COUNT(*) - COUNT(category_path) as null_category_path,
  34 + COUNT(category) as has_category,
  35 + COUNT(*) - COUNT(category) as null_category
  36 + FROM shoplazza_product_spu
  37 + WHERE tenant_id = :tenant_id AND deleted = 0
  38 + """)
  39 +
  40 + with db_engine.connect() as conn:
  41 + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
  42 + total = result[0]
  43 + has_category_path = result[1]
  44 + null_category_path = result[2]
  45 + has_category = result[3]
  46 + null_category = result[4]
  47 +
  48 + print(f"总SPU数: {total}")
  49 + print(f"有 category_path 的SPU: {has_category_path}")
  50 + print(f"category_path 为空的SPU: {null_category_path}")
  51 + print(f"有 category 的SPU: {has_category}")
  52 + print(f"category 为空的SPU: {null_category}")
  53 +
  54 + # 查看category字段的示例
  55 + if has_category > 0:
  56 + sample_query = text("""
  57 + SELECT id, title, category_path, category, category_id, category_level
  58 + FROM shoplazza_product_spu
  59 + WHERE tenant_id = :tenant_id
  60 + AND deleted = 0
  61 + AND category IS NOT NULL
  62 + LIMIT 5
  63 + """)
  64 + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
  65 + print(f"\n示例数据(前5条有 category 的记录):")
  66 + for row in samples:
  67 + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
  68 + print(f" category_path: {row[2]}")
  69 + print(f" category: '{row[3]}'")
  70 + print(f" category_id: {row[4]}, category_level: {row[5]}")
  71 +
  72 + # 解析 category 字段(用于生成 category1_name)
  73 + if row[3]:
  74 + category = str(row[3])
  75 + if '/' in category:
  76 + path_parts = category.split('/')
  77 + print(f" 解析后(按'/'分割): {path_parts}")
  78 + if len(path_parts) > 0:
  79 + print(f" → category1_name: '{path_parts[0].strip()}'")
  80 + else:
  81 + print(f" → category1_name: '{category.strip()}'(直接作为category1_name)")
  82 + else:
  83 + print("\n⚠️ 警告: 没有SPU有 category 值!")
  84 +
  85 + # 查看category_path的示例(如果有)
  86 + if has_category_path > 0:
  87 + sample_query = text("""
  88 + SELECT id, title, category_path, category
  89 + FROM shoplazza_product_spu
  90 + WHERE tenant_id = :tenant_id
  91 + AND deleted = 0
  92 + AND category_path IS NOT NULL
  93 + LIMIT 3
  94 + """)
  95 + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
  96 + print(f"\n示例数据(有 category_path 的记录):")
  97 + for row in samples:
  98 + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
  99 + print(f" category_path: '{row[2]}'")
  100 + print(f" category: '{row[3]}'")
  101 +
  102 + # 检查是否是ID列表格式
  103 + if row[2] and ',' in str(row[2]) and not '/' in str(row[2]):
  104 + print(f" ⚠️ 注意: category_path是ID列表格式(逗号分隔),不是路径格式")
  105 +
  106 +
  107 +def check_options(db_engine, tenant_id: str):
  108 + """检查 option 表的 name 字段"""
  109 + print("\n" + "="*60)
  110 + print("2. 检查 shoplazza_product_option 表的 name 字段")
  111 + print("="*60)
  112 +
  113 + query = text("""
  114 + SELECT
  115 + COUNT(*) as total_options,
  116 + COUNT(DISTINCT name) as distinct_names,
  117 + COUNT(DISTINCT spu_id) as spus_with_options
  118 + FROM shoplazza_product_option
  119 + WHERE tenant_id = :tenant_id AND deleted = 0
  120 + """)
  121 +
  122 + with db_engine.connect() as conn:
  123 + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
  124 + total_options = result[0]
  125 + distinct_names = result[1]
  126 + spus_with_options = result[2]
  127 +
  128 + print(f"总 option 记录数: {total_options}")
  129 + print(f"不同的 name 数量: {distinct_names}")
  130 + print(f"有 option 定义的 SPU 数量: {spus_with_options}")
  131 +
  132 + if total_options > 0:
  133 + # 查看不同的 name 值
  134 + name_query = text("""
  135 + SELECT DISTINCT name, position, COUNT(*) as count
  136 + FROM shoplazza_product_option
  137 + WHERE tenant_id = :tenant_id AND deleted = 0
  138 + GROUP BY name, position
  139 + ORDER BY position, name
  140 + """)
  141 + names = conn.execute(name_query, {"tenant_id": tenant_id}).fetchall()
  142 + print(f"\n不同的 name 值:")
  143 + for row in names:
  144 + print(f" position={row[1]}, name='{row[0]}', count={row[2]}")
  145 +
  146 + # 查看一些示例
  147 + sample_query = text("""
  148 + SELECT spu_id, position, name, `values`
  149 + FROM shoplazza_product_option
  150 + WHERE tenant_id = :tenant_id AND deleted = 0
  151 + ORDER BY spu_id, position
  152 + LIMIT 10
  153 + """)
  154 + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
  155 + print(f"\n示例数据(前10条 option 记录):")
  156 + for row in samples:
  157 + print(f" SPU ID: {row[0]}, position: {row[1]}, name: '{row[2]}', values: {row[3]}")
  158 + else:
  159 + print("\n⚠️ 警告: 没有 option 记录!")
  160 +
  161 +
  162 +def check_sku_options(db_engine, tenant_id: str):
  163 + """检查 SKU 表的 option1/2/3 字段"""
  164 + print("\n" + "="*60)
  165 + print("3. 检查 shoplazza_product_sku 表的 option1/2/3 字段")
  166 + print("="*60)
  167 +
  168 + query = text("""
  169 + SELECT
  170 + COUNT(*) as total_skus,
  171 + COUNT(option1) as has_option1,
  172 + COUNT(option2) as has_option2,
  173 + COUNT(option3) as has_option3,
  174 + COUNT(DISTINCT spu_id) as distinct_spus
  175 + FROM shoplazza_product_sku
  176 + WHERE tenant_id = :tenant_id AND deleted = 0
  177 + """)
  178 +
  179 + with db_engine.connect() as conn:
  180 + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
  181 + total_skus = result[0]
  182 + has_option1 = result[1]
  183 + has_option2 = result[2]
  184 + has_option3 = result[3]
  185 + distinct_spus = result[4]
  186 +
  187 + print(f"总 SKU 数: {total_skus}")
  188 + print(f"有 option1 的 SKU: {has_option1}")
  189 + print(f"有 option2 的 SKU: {has_option2}")
  190 + print(f"有 option3 的 SKU: {has_option3}")
  191 + print(f"不同的 SPU 数量: {distinct_spus}")
  192 +
  193 + if total_skus > 0:
  194 + # 查看一些示例
  195 + sample_query = text("""
  196 + SELECT spu_id, id, option1, option2, option3
  197 + FROM shoplazza_product_sku
  198 + WHERE tenant_id = :tenant_id AND deleted = 0
  199 + ORDER BY spu_id, id
  200 + LIMIT 10
  201 + """)
  202 + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
  203 + print(f"\n示例数据(前10条 SKU 记录):")
  204 + for row in samples:
  205 + print(f" SPU ID: {row[0]}, SKU ID: {row[1]}")
  206 + print(f" option1: '{row[2]}', option2: '{row[3]}', option3: '{row[4]}'")
  207 + else:
  208 + print("\n⚠️ 警告: 没有 SKU 记录!")
  209 +
  210 +
  211 +def check_spu_summary(db_engine, tenant_id: str):
  212 + """检查 SPU 汇总信息"""
  213 + print("\n" + "="*60)
  214 + print("4. SPU 汇总信息")
  215 + print("="*60)
  216 +
  217 + query = text("""
  218 + SELECT
  219 + COUNT(DISTINCT spu.id) as total_spus,
  220 + COUNT(DISTINCT sku.id) as total_skus,
  221 + COUNT(DISTINCT opt.id) as total_options,
  222 + COUNT(DISTINCT CASE WHEN spu.category_path IS NOT NULL THEN spu.id END) as spus_with_category_path,
  223 + COUNT(DISTINCT opt.spu_id) as spus_with_options
  224 + FROM shoplazza_product_spu spu
  225 + LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id AND sku.tenant_id = :tenant_id AND sku.deleted = 0
  226 + LEFT JOIN shoplazza_product_option opt ON spu.id = opt.spu_id AND opt.tenant_id = :tenant_id AND opt.deleted = 0
  227 + WHERE spu.tenant_id = :tenant_id AND spu.deleted = 0
  228 + """)
  229 +
  230 + with db_engine.connect() as conn:
  231 + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
  232 + total_spus = result[0]
  233 + total_skus = result[1]
  234 + total_options = result[2]
  235 + spus_with_category_path = result[3]
  236 + spus_with_options = result[4]
  237 +
  238 + print(f"总 SPU 数: {total_spus}")
  239 + print(f"总 SKU 数: {total_skus}")
  240 + print(f"总 option 记录数: {total_options}")
  241 + print(f"有 category_path 的 SPU: {spus_with_category_path}")
  242 + print(f"有 option 定义的 SPU: {spus_with_options}")
  243 +
  244 +
  245 +def main():
  246 + parser = argparse.ArgumentParser(description='检查MySQL数据源中的分类和规格信息')
  247 + parser.add_argument('--tenant-id', required=True, help='Tenant ID')
  248 + parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)')
  249 + parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)')
  250 + parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)')
  251 + parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)')
  252 + parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)')
  253 +
  254 + args = parser.parse_args()
  255 +
  256 + # 连接数据库
  257 + import os
  258 + db_host = args.db_host or os.environ.get('DB_HOST')
  259 + db_port = args.db_port or int(os.environ.get('DB_PORT', 3306))
  260 + db_database = args.db_database or os.environ.get('DB_DATABASE')
  261 + db_username = args.db_username or os.environ.get('DB_USERNAME')
  262 + db_password = args.db_password or os.environ.get('DB_PASSWORD')
  263 +
  264 + if not all([db_host, db_database, db_username, db_password]):
  265 + print("错误: MySQL连接参数不完整")
  266 + print("请提供 --db-host, --db-database, --db-username, --db-password")
  267 + print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD")
  268 + return 1
  269 +
  270 + print(f"连接MySQL: {db_host}:{db_port}/{db_database}")
  271 + print(f"Tenant ID: {args.tenant_id}")
  272 +
  273 + try:
  274 + db_engine = create_db_connection(
  275 + host=db_host,
  276 + port=db_port,
  277 + database=db_database,
  278 + username=db_username,
  279 + password=db_password
  280 + )
  281 + print("✓ MySQL连接成功\n")
  282 + except Exception as e:
  283 + print(f"✗ 连接MySQL失败: {e}")
  284 + return 1
  285 +
  286 + # 执行检查
  287 + check_spu_summary(db_engine, args.tenant_id)
  288 + check_category_path(db_engine, args.tenant_id)
  289 + check_options(db_engine, args.tenant_id)
  290 + check_sku_options(db_engine, args.tenant_id)
  291 +
  292 + print("\n" + "="*60)
  293 + print("检查完成")
  294 + print("="*60)
  295 +
  296 + return 0
  297 +
  298 +
  299 +if __name__ == '__main__':
  300 + sys.exit(main())
  301 +
... ...
scripts/check_es_data.py 0 → 100755
... ... @@ -0,0 +1,266 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +检查ES索引中的实际数据,看分面字段是否有值
  4 +"""
  5 +
  6 +import sys
  7 +import os
  8 +import argparse
  9 +from pathlib import Path
  10 +
  11 +sys.path.insert(0, str(Path(__file__).parent.parent))
  12 +
  13 +from utils.es_client import ESClient
  14 +
  15 +
  16 +def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
  17 + """检查ES中的分面相关字段"""
  18 + print("\n" + "="*60)
  19 + print("检查ES索引中的分面字段数据")
  20 + print("="*60)
  21 +
  22 + query = {
  23 + "query": {
  24 + "term": {
  25 + "tenant_id": tenant_id
  26 + }
  27 + },
  28 + "size": size,
  29 + "_source": [
  30 + "spu_id",
  31 + "title_zh",
  32 + "category1_name",
  33 + "category2_name",
  34 + "category3_name",
  35 + "category_name",
  36 + "category_path_zh",
  37 + "specifications",
  38 + "option1_name",
  39 + "option2_name",
  40 + "option3_name"
  41 + ]
  42 + }
  43 +
  44 + try:
  45 + response = es_client.client.search(index="search_products", body=query)
  46 + hits = response.get('hits', {}).get('hits', [])
  47 + total = response.get('hits', {}).get('total', {}).get('value', 0)
  48 +
  49 + print(f"\n总文档数: {total}")
  50 + print(f"检查前 {len(hits)} 个文档:\n")
  51 +
  52 + for i, hit in enumerate(hits, 1):
  53 + source = hit.get('_source', {})
  54 + print(f"文档 {i}:")
  55 + print(f" spu_id: {source.get('spu_id')}")
  56 + print(f" title_zh: {source.get('title_zh', '')[:50]}")
  57 + print(f" category1_name: {source.get('category1_name')}")
  58 + print(f" category2_name: {source.get('category2_name')}")
  59 + print(f" category3_name: {source.get('category3_name')}")
  60 + print(f" category_name: {source.get('category_name')}")
  61 + print(f" category_path_zh: {source.get('category_path_zh')}")
  62 + print(f" option1_name: {source.get('option1_name')}")
  63 + print(f" option2_name: {source.get('option2_name')}")
  64 + print(f" option3_name: {source.get('option3_name')}")
  65 +
  66 + specs = source.get('specifications', [])
  67 + if specs:
  68 + print(f" specifications 数量: {len(specs)}")
  69 + # 显示前3个specifications
  70 + for spec in specs[:3]:
  71 + print(f" - name: {spec.get('name')}, value: {spec.get('value')}")
  72 + else:
  73 + print(f" specifications: 空")
  74 + print()
  75 +
  76 + except Exception as e:
  77 + print(f"错误: {e}")
  78 + import traceback
  79 + traceback.print_exc()
  80 +
  81 +
  82 +def check_facet_aggregations(es_client, tenant_id: str):
  83 + """检查分面聚合查询"""
  84 + print("\n" + "="*60)
  85 + print("检查分面聚合查询结果")
  86 + print("="*60)
  87 +
  88 + query = {
  89 + "query": {
  90 + "term": {
  91 + "tenant_id": tenant_id
  92 + }
  93 + },
  94 + "size": 0,
  95 + "aggs": {
  96 + "category1_facet": {
  97 + "terms": {
  98 + "field": "category1_name",
  99 + "size": 10
  100 + }
  101 + },
  102 + "color_facet": {
  103 + "nested": {
  104 + "path": "specifications"
  105 + },
  106 + "aggs": {
  107 + "filter_by_name": {
  108 + "filter": {
  109 + "term": {
  110 + "specifications.name": "color"
  111 + }
  112 + },
  113 + "aggs": {
  114 + "value_counts": {
  115 + "terms": {
  116 + "field": "specifications.value",
  117 + "size": 10
  118 + }
  119 + }
  120 + }
  121 + }
  122 + }
  123 + },
  124 + "size_facet": {
  125 + "nested": {
  126 + "path": "specifications"
  127 + },
  128 + "aggs": {
  129 + "filter_by_name": {
  130 + "filter": {
  131 + "term": {
  132 + "specifications.name": "size"
  133 + }
  134 + },
  135 + "aggs": {
  136 + "value_counts": {
  137 + "terms": {
  138 + "field": "specifications.value",
  139 + "size": 10
  140 + }
  141 + }
  142 + }
  143 + }
  144 + }
  145 + },
  146 + "material_facet": {
  147 + "nested": {
  148 + "path": "specifications"
  149 + },
  150 + "aggs": {
  151 + "filter_by_name": {
  152 + "filter": {
  153 + "term": {
  154 + "specifications.name": "material"
  155 + }
  156 + },
  157 + "aggs": {
  158 + "value_counts": {
  159 + "terms": {
  160 + "field": "specifications.value",
  161 + "size": 10
  162 + }
  163 + }
  164 + }
  165 + }
  166 + }
  167 + }
  168 + }
  169 + }
  170 +
  171 + try:
  172 + response = es_client.client.search(index="search_products", body=query)
  173 + aggs = response.get('aggregations', {})
  174 +
  175 + print("\n1. category1_name 分面:")
  176 + category1 = aggs.get('category1_facet', {})
  177 + buckets = category1.get('buckets', [])
  178 + if buckets:
  179 + for bucket in buckets:
  180 + print(f" {bucket['key']}: {bucket['doc_count']}")
  181 + else:
  182 + print(" 空(没有数据)")
  183 +
  184 + print("\n2. specifications.color 分面:")
  185 + color_agg = aggs.get('color_facet', {})
  186 + color_filter = color_agg.get('filter_by_name', {})
  187 + color_values = color_filter.get('value_counts', {})
  188 + color_buckets = color_values.get('buckets', [])
  189 + if color_buckets:
  190 + for bucket in color_buckets:
  191 + print(f" {bucket['key']}: {bucket['doc_count']}")
  192 + else:
  193 + print(" 空(没有数据)")
  194 +
  195 + print("\n3. specifications.size 分面:")
  196 + size_agg = aggs.get('size_facet', {})
  197 + size_filter = size_agg.get('filter_by_name', {})
  198 + size_values = size_filter.get('value_counts', {})
  199 + size_buckets = size_values.get('buckets', [])
  200 + if size_buckets:
  201 + for bucket in size_buckets:
  202 + print(f" {bucket['key']}: {bucket['doc_count']}")
  203 + else:
  204 + print(" 空(没有数据)")
  205 +
  206 + print("\n4. specifications.material 分面:")
  207 + material_agg = aggs.get('material_facet', {})
  208 + material_filter = material_agg.get('filter_by_name', {})
  209 + material_values = material_filter.get('value_counts', {})
  210 + material_buckets = material_values.get('buckets', [])
  211 + if material_buckets:
  212 + for bucket in material_buckets:
  213 + print(f" {bucket['key']}: {bucket['doc_count']}")
  214 + else:
  215 + print(" 空(没有数据)")
  216 +
  217 + except Exception as e:
  218 + print(f"错误: {e}")
  219 + import traceback
  220 + traceback.print_exc()
  221 +
  222 +
  223 +def main():
  224 + parser = argparse.ArgumentParser(description='检查ES索引中的分面字段数据')
  225 + parser.add_argument('--tenant-id', required=True, help='Tenant ID')
  226 + parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)')
  227 + parser.add_argument('--size', type=int, default=5, help='检查的文档数量 (默认: 5)')
  228 +
  229 + args = parser.parse_args()
  230 +
  231 + # 连接ES
  232 + es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200')
  233 + es_username = os.environ.get('ES_USERNAME')
  234 + es_password = os.environ.get('ES_PASSWORD')
  235 +
  236 + print(f"连接Elasticsearch: {es_host}")
  237 + print(f"Tenant ID: {args.tenant_id}\n")
  238 +
  239 + try:
  240 + if es_username and es_password:
  241 + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
  242 + else:
  243 + es_client = ESClient(hosts=[es_host])
  244 +
  245 + if not es_client.ping():
  246 + print(f"✗ 无法连接到Elasticsearch: {es_host}")
  247 + return 1
  248 + print("✓ Elasticsearch连接成功\n")
  249 + except Exception as e:
  250 + print(f"✗ 连接Elasticsearch失败: {e}")
  251 + return 1
  252 +
  253 + # 检查ES数据
  254 + check_es_facet_fields(es_client, args.tenant_id, args.size)
  255 + check_facet_aggregations(es_client, args.tenant_id)
  256 +
  257 + print("\n" + "="*60)
  258 + print("检查完成")
  259 + print("="*60)
  260 +
  261 + return 0
  262 +
  263 +
  264 +if __name__ == '__main__':
  265 + sys.exit(main())
  266 +
... ...
scripts/test_facet_api.py 0 → 100755
... ... @@ -0,0 +1,131 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +测试脚本:模拟前端请求,检查后端返回的分面结果
  4 +"""
  5 +
  6 +import sys
  7 +import json
  8 +import requests
  9 +import argparse
  10 +from pathlib import Path
  11 +
  12 +sys.path.insert(0, str(Path(__file__).parent.parent))
  13 +
  14 +
  15 +def main():
  16 + parser = argparse.ArgumentParser(description='测试分面API')
  17 + parser.add_argument('--api-url', type=str, default='http://localhost:6002/search/', help='API URL')
  18 + parser.add_argument('--tenant-id', type=str, required=True, help='Tenant ID')
  19 + args = parser.parse_args()
  20 +
  21 + # 模拟前端的分面请求(与frontend/static/js/app.js一致)
  22 + request_data = {
  23 + "query": "", # 空查询,获取所有数据
  24 + "size": 10,
  25 + "from": 0,
  26 + "facets": [
  27 + "category1_name",
  28 + "specifications.color",
  29 + "specifications.size",
  30 + "specifications.material"
  31 + ]
  32 + }
  33 +
  34 + headers = {
  35 + "Content-Type": "application/json",
  36 + "X-Tenant-ID": args.tenant_id
  37 + }
  38 +
  39 + try:
  40 + print(f"发送请求到: {args.api_url}")
  41 + print(f"Tenant ID: {args.tenant_id}")
  42 + print(f"请求数据:")
  43 + print(json.dumps(request_data, indent=2, ensure_ascii=False))
  44 + print("\n" + "="*60)
  45 +
  46 + response = requests.post(args.api_url, json=request_data, headers=headers, timeout=30)
  47 +
  48 + if response.status_code != 200:
  49 + print(f"API错误: {response.status_code}")
  50 + print(response.text)
  51 + return 1
  52 +
  53 + data = response.json()
  54 +
  55 + print("API响应:")
  56 + print(f" 总结果数: {data.get('total', 0)}")
  57 + print(f" 返回结果数: {len(data.get('results', []))}")
  58 +
  59 + facets = data.get('facets', [])
  60 + print(f"\n分面数量: {len(facets)}")
  61 +
  62 + if not facets:
  63 + print("\n⚠ 分面列表为空!")
  64 + return 1
  65 +
  66 + print("\n" + "="*60)
  67 + print("分面详情:")
  68 + print("="*60)
  69 +
  70 + for i, facet in enumerate(facets, 1):
  71 + print(f"\n{i}. {facet.get('field')}")
  72 + print(f" 标签: {facet.get('label')}")
  73 + print(f" 类型: {facet.get('type')}")
  74 + print(f" 值数量: {len(facet.get('values', []))}")
  75 + print(f" 总计数: {facet.get('total_count', 0)}")
  76 +
  77 + values = facet.get('values', [])
  78 + if values:
  79 + print(f" 前5个值:")
  80 + for v in values[:5]:
  81 + print(f" - {v.get('value')}: {v.get('count')}")
  82 + else:
  83 + print(f" ⚠ 值列表为空!")
  84 +
  85 + # 检查specifications.color分面
  86 + print("\n" + "="*60)
  87 + print("检查specifications.color分面:")
  88 + print("="*60)
  89 +
  90 + color_facet = None
  91 + for facet in facets:
  92 + if facet.get('field') == 'specifications.color':
  93 + color_facet = facet
  94 + break
  95 +
  96 + if color_facet:
  97 + print("✓ 找到specifications.color分面")
  98 + print(f" 值数量: {len(color_facet.get('values', []))}")
  99 + if color_facet.get('values'):
  100 + print(" 前10个值:")
  101 + for v in color_facet.get('values', [])[:10]:
  102 + print(f" {v.get('value')}: {v.get('count')}")
  103 + else:
  104 + print(" ⚠ 值列表为空!")
  105 + else:
  106 + print("✗ 未找到specifications.color分面")
  107 + print(f" 可用分面字段: {[f.get('field') for f in facets]}")
  108 +
  109 + # 输出完整JSON(便于调试)
  110 + print("\n" + "="*60)
  111 + print("完整分面JSON(前500字符):")
  112 + print("="*60)
  113 + facets_json = json.dumps(facets, indent=2, ensure_ascii=False)
  114 + print(facets_json[:500])
  115 +
  116 + except requests.exceptions.ConnectionError as e:
  117 + print(f"\n连接错误: 无法连接到API服务器 {args.api_url}")
  118 + print("请确保后端服务正在运行")
  119 + return 1
  120 + except Exception as e:
  121 + print(f"\n错误: {e}")
  122 + import traceback
  123 + traceback.print_exc()
  124 + return 1
  125 +
  126 + return 0
  127 +
  128 +
  129 +if __name__ == '__main__':
  130 + sys.exit(main())
  131 +
... ...