Commit a10a89a395b0189e3f533aeb48ce9d1df068ccd4
1 parent
acf1349c
构造测试数据用于测试分类 和 三种属性的分面。
Showing
26 changed files
with
2766 additions
and
182 deletions
Show diff stats
README.md
| ... | ... | @@ -2,10 +2,31 @@ |
| 2 | 2 | |
| 3 | 3 | 一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 |
| 4 | 4 | |
| 5 | + | |
| 5 | 6 | ## 项目环境 |
| 6 | 7 | source /home/tw/miniconda3/etc/profile.d/conda.sh |
| 7 | 8 | conda activate searchengine |
| 8 | 9 | |
| 10 | +## 测试pipeline | |
| 11 | + | |
| 12 | +fake数据 生成商品导入数据 提交到店匠的店铺: | |
| 13 | +cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx | |
| 14 | + | |
| 15 | +自动同步到mysql | |
| 16 | +mysql到ES: | |
| 17 | + | |
| 18 | +python scripts/recreate_and_import.py \ | |
| 19 | + --tenant-id 162 \ | |
| 20 | + --db-host <mysql_host> \ | |
| 21 | + --db-database saas \ | |
| 22 | + --db-username saas \ | |
| 23 | + --db-password <password> \ | |
| 24 | + --es-host http://localhost:9200 | |
| 25 | + | |
| 26 | +构造查询: | |
| 27 | +参考 @ | |
| 28 | + | |
| 29 | + | |
| 9 | 30 | ## 核心能力速览 |
| 10 | 31 | |
| 11 | 32 | - **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL) | ... | ... |
api/result_formatter.py
| ... | ... | @@ -268,33 +268,38 @@ class ResultFormatter: |
| 268 | 268 | facets.append(facet) |
| 269 | 269 | continue |
| 270 | 270 | |
| 271 | - # 处理specifications嵌套分面(指定name) | |
| 272 | - if field_name.startswith("specifications_") and field_name.endswith("_facet") and 'filter_by_name' in agg_data: | |
| 273 | - # 提取name(从 "specifications_颜色_facet" 提取 "颜色") | |
| 271 | + # 处理specifications嵌套分面(指定name,如 specifications.color) | |
| 272 | + if field_name.startswith("specifications_") and field_name.endswith("_facet"): | |
| 273 | + # 提取name(从 "specifications_color_facet" 提取 "color") | |
| 274 | 274 | name = field_name[len("specifications_"):-len("_facet")] |
| 275 | - filter_by_name_agg = agg_data.get('filter_by_name', {}) | |
| 276 | - value_counts = filter_by_name_agg.get('value_counts', {}) | |
| 277 | 275 | |
| 278 | - values = [] | |
| 279 | - if 'buckets' in value_counts: | |
| 280 | - for value_bucket in value_counts['buckets']: | |
| 281 | - value = FacetValue( | |
| 282 | - value=value_bucket['key'], | |
| 283 | - label=str(value_bucket['key']), | |
| 284 | - count=value_bucket['doc_count'], | |
| 285 | - selected=False | |
| 286 | - ) | |
| 287 | - values.append(value) | |
| 276 | + # ES nested聚合返回结构: { "doc_count": N, "filter_by_name": { ... } } | |
| 277 | + # filter_by_name应该在agg_data的第一层 | |
| 278 | + filter_by_name_agg = agg_data.get('filter_by_name') | |
| 288 | 279 | |
| 289 | - # 创建分面结果 | |
| 290 | - facet = FacetResult( | |
| 291 | - field=f"specifications.{name}", | |
| 292 | - label=str(name), | |
| 293 | - type="terms", | |
| 294 | - values=values, | |
| 295 | - total_count=filter_by_name_agg.get('doc_count', 0) | |
| 296 | - ) | |
| 297 | - facets.append(facet) | |
| 280 | + if filter_by_name_agg: | |
| 281 | + value_counts = filter_by_name_agg.get('value_counts', {}) | |
| 282 | + | |
| 283 | + values = [] | |
| 284 | + if 'buckets' in value_counts and value_counts['buckets']: | |
| 285 | + for value_bucket in value_counts['buckets']: | |
| 286 | + value = FacetValue( | |
| 287 | + value=value_bucket['key'], | |
| 288 | + label=str(value_bucket['key']), | |
| 289 | + count=value_bucket['doc_count'], | |
| 290 | + selected=False | |
| 291 | + ) | |
| 292 | + values.append(value) | |
| 293 | + | |
| 294 | + # 创建分面结果 | |
| 295 | + facet = FacetResult( | |
| 296 | + field=f"specifications.{name}", | |
| 297 | + label=str(name), | |
| 298 | + type="terms", | |
| 299 | + values=values, | |
| 300 | + total_count=filter_by_name_agg.get('doc_count', 0) | |
| 301 | + ) | |
| 302 | + facets.append(facet) | |
| 298 | 303 | continue |
| 299 | 304 | |
| 300 | 305 | # Handle terms aggregation | ... | ... |
| ... | ... | @@ -0,0 +1,246 @@ |
| 1 | +# 数据库配置分析与建议 | |
| 2 | + | |
| 3 | +## 当前配置分析 | |
| 4 | + | |
| 5 | +从提供的 YAML 配置文件中,发现以下情况: | |
| 6 | + | |
| 7 | +### 1. 缺失的数据库配置 | |
| 8 | + | |
| 9 | +**当前配置文件中缺少直接的数据源配置**,需要添加以下配置: | |
| 10 | + | |
| 11 | +```yaml | |
| 12 | +spring: | |
| 13 | + datasource: | |
| 14 | + # 主数据源配置 | |
| 15 | + master: | |
| 16 | + url: jdbc:mysql://localhost:3306/saas | |
| 17 | + username: saas | |
| 18 | + password: P89cZHS5d7dFyc9R | |
| 19 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 20 | + type: com.zaxxer.hikari.HikariDataSource | |
| 21 | + hikari: | |
| 22 | + maximum-pool-size: 20 | |
| 23 | + minimum-idle: 5 | |
| 24 | + connection-timeout: 30000 | |
| 25 | + idle-timeout: 600000 | |
| 26 | + max-lifetime: 1800000 | |
| 27 | + | |
| 28 | + # 动态数据源配置(多租户支持) | |
| 29 | + dynamic: | |
| 30 | + enabled: true | |
| 31 | + primary: master | |
| 32 | + strict: false | |
| 33 | + datasource: | |
| 34 | + # 店匠生产数据库 | |
| 35 | + shoplazza: | |
| 36 | + url: jdbc:mysql://120.79.247.228:3316/saas | |
| 37 | + username: saas | |
| 38 | + password: P89cZHS5d7dFyc9R | |
| 39 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 40 | + type: com.zaxxer.hikari.HikariDataSource | |
| 41 | +``` | |
| 42 | + | |
| 43 | +### 2. 当前已有的相关配置 | |
| 44 | + | |
| 45 | +#### Redis 配置 | |
| 46 | +```yaml | |
| 47 | +spring: | |
| 48 | + data: | |
| 49 | + redis: | |
| 50 | + host: 127.0.0.1 | |
| 51 | + port: 6379 | |
| 52 | + database: 0 | |
| 53 | + timeout: 5000ms | |
| 54 | + lettuce: | |
| 55 | + pool: | |
| 56 | + max-active: 200 | |
| 57 | + max-idle: 20 | |
| 58 | + min-idle: 5 | |
| 59 | + max-wait: -1ms | |
| 60 | +``` | |
| 61 | + | |
| 62 | +#### MyBatis Plus 配置 | |
| 63 | +```yaml | |
| 64 | +mybatis-plus: | |
| 65 | + configuration: | |
| 66 | + map-underscore-to-camel-case: true | |
| 67 | + global-config: | |
| 68 | + db-config: | |
| 69 | + id-type: NONE | |
| 70 | + logic-delete-value: 1 | |
| 71 | + logic-not-delete-value: 0 | |
| 72 | +``` | |
| 73 | + | |
| 74 | +### 3. 从项目结构推断的数据库配置 | |
| 75 | + | |
| 76 | +基于之前分析的项目文件,完整的数据库配置应该包括: | |
| 77 | + | |
| 78 | +#### 3.1 连接池配置 | |
| 79 | +```yaml | |
| 80 | +spring: | |
| 81 | + datasource: | |
| 82 | + master: | |
| 83 | + hikari: | |
| 84 | + # 连接池最大连接数 | |
| 85 | + maximum-pool-size: 20 | |
| 86 | + # 连接池最小空闲连接数 | |
| 87 | + minimum-idle: 5 | |
| 88 | + # 连接超时时间(毫秒) | |
| 89 | + connection-timeout: 30000 | |
| 90 | + # 空闲连接超时时间(毫秒) | |
| 91 | + idle-timeout: 600000 | |
| 92 | + # 连接最大生命周期(毫秒) | |
| 93 | + max-lifetime: 1800000 | |
| 94 | + # 连接测试查询 | |
| 95 | + connection-test-query: SELECT 1 | |
| 96 | +``` | |
| 97 | + | |
| 98 | +#### 3.2 多数据源配置 | |
| 99 | +```yaml | |
| 100 | +spring: | |
| 101 | + datasource: | |
| 102 | + dynamic: | |
| 103 | + enabled: true | |
| 104 | + primary: master | |
| 105 | + strict: false | |
| 106 | + datasource: | |
| 107 | + # 主库(读写) | |
| 108 | + master: | |
| 109 | + url: jdbc:mysql://120.79.247.228:3316/saas | |
| 110 | + username: saas | |
| 111 | + password: P89cZHS5d7dFyc9R | |
| 112 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 113 | + | |
| 114 | + # 从库(只读)- 可选配置 | |
| 115 | + slave: | |
| 116 | + url: jdbc:mysql://slave-host:3306/saas | |
| 117 | + username: saas_readonly | |
| 118 | + password: readonly_password | |
| 119 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 120 | +``` | |
| 121 | + | |
| 122 | +#### 3.3 JPA/Hibernate 配置 | |
| 123 | +```yaml | |
| 124 | +spring: | |
| 125 | + jpa: | |
| 126 | + show-sql: false | |
| 127 | + hibernate: | |
| 128 | + ddl-auto: none | |
| 129 | + naming: | |
| 130 | + physical-strategy: org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy | |
| 131 | + properties: | |
| 132 | + hibernate: | |
| 133 | + dialect: org.hibernate.dialect.MySQL8Dialect | |
| 134 | + format_sql: true | |
| 135 | + use_sql_comments: true | |
| 136 | + jdbc: | |
| 137 | + batch_size: 50 | |
| 138 | + order_inserts: true | |
| 139 | + order_updates: true | |
| 140 | +``` | |
| 141 | + | |
| 142 | +### 4. 环境配置建议 | |
| 143 | + | |
| 144 | +#### 开发环境(application-dev.yml) | |
| 145 | +```yaml | |
| 146 | +spring: | |
| 147 | + profiles: | |
| 148 | + active: dev | |
| 149 | + | |
| 150 | + datasource: | |
| 151 | + master: | |
| 152 | + url: jdbc:mysql://localhost:3306/saas_dev | |
| 153 | + username: root | |
| 154 | + password: root | |
| 155 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 156 | + hikari: | |
| 157 | + maximum-pool-size: 10 | |
| 158 | + minimum-idle: 2 | |
| 159 | + connection-timeout: 30000 | |
| 160 | + | |
| 161 | +# 开发环境 SQL 输出 | |
| 162 | +logging: | |
| 163 | + level: | |
| 164 | + com.hsyl.saas.mapper: DEBUG | |
| 165 | + org.springframework.jdbc.core: DEBUG | |
| 166 | +``` | |
| 167 | + | |
| 168 | +#### 生产环境(application-prod.yml) | |
| 169 | +```yaml | |
| 170 | +spring: | |
| 171 | + profiles: | |
| 172 | + active: prod | |
| 173 | + | |
| 174 | + datasource: | |
| 175 | + master: | |
| 176 | + url: jdbc:mysql://120.79.247.228:3316/saas | |
| 177 | + username: saas | |
| 178 | + password: P89cZHS5d7dFyc9R | |
| 179 | + driver-class: com.mysql.cj.jdbc.Driver | |
| 180 | + hikari: | |
| 181 | + maximum-pool-size: 50 | |
| 182 | + minimum-idle: 10 | |
| 183 | + connection-timeout: 60000 | |
| 184 | + max-lifetime: 3600000 | |
| 185 | + | |
| 186 | +# 生产环境 SQL 监控 | |
| 187 | +management: | |
| 188 | + endpoints: | |
| 189 | + web: | |
| 190 | + exposure: | |
| 191 | + include: health,info,metrics,datasource | |
| 192 | +``` | |
| 193 | + | |
| 194 | +### 5. 数据库连接信息汇总 | |
| 195 | + | |
| 196 | +| 环境 | 主机 | 端口 | 数据库 | 用户名 | 密码 | | |
| 197 | +|------|------|------|--------|--------|------| | |
| 198 | +| 本地开发 | localhost | 3306 | saas | saas | P89cZHS5d7dFyc9R | | |
| 199 | +| 生产环境 | 120.79.247.228 | 3316 | saas | saas | P89cZHS5d7dFyc9R | | |
| 200 | + | |
| 201 | +### 6. 必需的依赖项 | |
| 202 | + | |
| 203 | +确保 `pom.xml` 或 `build.gradle` 包含以下依赖: | |
| 204 | + | |
| 205 | +```xml | |
| 206 | +<!-- MySQL 连接器 --> | |
| 207 | +<dependency> | |
| 208 | + <groupId>mysql</groupId> | |
| 209 | + <artifactId>mysql-connector-java</artifactId> | |
| 210 | + <version>8.0.33</version> | |
| 211 | +</dependency> | |
| 212 | + | |
| 213 | +<!-- HikariCP 连接池 --> | |
| 214 | +<dependency> | |
| 215 | + <groupId>com.zaxxer</groupId> | |
| 216 | + <artifactId>HikariCP</artifactId> | |
| 217 | + <version>5.0.1</version> | |
| 218 | +</dependency> | |
| 219 | + | |
| 220 | +<!-- 动态数据源 --> | |
| 221 | +<dependency> | |
| 222 | + <groupId>com.baomidou</groupId> | |
| 223 | + <artifactId>dynamic-datasource-spring-boot-starter</artifactId> | |
| 224 | + <version>3.6.1</version> | |
| 225 | +</dependency> | |
| 226 | +``` | |
| 227 | + | |
| 228 | +### 7. 测试数据库连接 | |
| 229 | + | |
| 230 | +```bash | |
| 231 | +# 测试本地数据库连接 | |
| 232 | +mysql -h localhost -P 3306 -u saas -pP89cZHS5d7dFyc9R saas | |
| 233 | + | |
| 234 | +# 测试生产数据库连接 | |
| 235 | +mysql -h 120.79.247.228 -P 3316 -u saas -pP89cZHS5d7dFyc9R saas | |
| 236 | +``` | |
| 237 | + | |
| 238 | +## 结论 | |
| 239 | + | |
| 240 | +当前配置文件缺少完整的数据库配置,需要补充: | |
| 241 | +1. **数据源连接信息**(URL、用户名、密码) | |
| 242 | +2. **连接池配置**(HikariCP 参数) | |
| 243 | +3. **多数据源配置**(如需多租户支持) | |
| 244 | +4. **JPA/Hibernate 配置**(数据库方言、DDL 策略等) | |
| 245 | + | |
| 246 | +建议根据环境(开发/测试/生产)分别配置不同的数据库连接参数。 | |
| 0 | 247 | \ No newline at end of file | ... | ... |
docs/Search-API-Examples.md
| ... | ... | @@ -23,7 +23,7 @@ |
| 23 | 23 | ```bash |
| 24 | 24 | curl -X POST "http://localhost:6002/search/" \ |
| 25 | 25 | -H "Content-Type: application/json" \ |
| 26 | - -H "X-Tenant-ID: 2" \ | |
| 26 | + -H "X-Tenant-ID: 162" \ | |
| 27 | 27 | -d '{ |
| 28 | 28 | "query": "芭比娃娃" |
| 29 | 29 | }' |
| ... | ... | @@ -49,7 +49,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 49 | 49 | ```bash |
| 50 | 50 | curl -X POST "http://localhost:6002/search/" \ |
| 51 | 51 | -H "Content-Type: application/json" \ |
| 52 | - -H "X-Tenant-ID: 2" \ | |
| 52 | + -H "X-Tenant-ID: 162" \ | |
| 53 | 53 | -d '{ |
| 54 | 54 | "query": "手机", |
| 55 | 55 | "language": "zh", |
| ... | ... | @@ -63,7 +63,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 63 | 63 | # 第1页(0-19) |
| 64 | 64 | curl -X POST "http://localhost:6002/search/" \ |
| 65 | 65 | -H "Content-Type: application/json" \ |
| 66 | - -H "X-Tenant-ID: 2" \ | |
| 66 | + -H "X-Tenant-ID: 162" \ | |
| 67 | 67 | -d '{ |
| 68 | 68 | "query": "手机", |
| 69 | 69 | "language": "zh", |
| ... | ... | @@ -74,7 +74,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 74 | 74 | # 第2页(20-39) |
| 75 | 75 | curl -X POST "http://localhost:6002/search/" \ |
| 76 | 76 | -H "Content-Type: application/json" \ |
| 77 | - -H "X-Tenant-ID: 2" \ | |
| 77 | + -H "X-Tenant-ID: 162" \ | |
| 78 | 78 | -d '{ |
| 79 | 79 | "query": "手机", |
| 80 | 80 | "language": "zh", |
| ... | ... | @@ -94,7 +94,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 94 | 94 | ```bash |
| 95 | 95 | curl -X POST "http://localhost:6002/search/" \ |
| 96 | 96 | -H "Content-Type: application/json" \ |
| 97 | - -H "X-Tenant-ID: 2" \ | |
| 97 | + -H "X-Tenant-ID: 162" \ | |
| 98 | 98 | -d '{ |
| 99 | 99 | "query": "手机", |
| 100 | 100 | "language": "zh", |
| ... | ... | @@ -109,7 +109,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 109 | 109 | ```bash |
| 110 | 110 | curl -X POST "http://localhost:6002/search/" \ |
| 111 | 111 | -H "Content-Type: application/json" \ |
| 112 | - -H "X-Tenant-ID: 2" \ | |
| 112 | + -H "X-Tenant-ID: 162" \ | |
| 113 | 113 | -d '{ |
| 114 | 114 | "query": "手机", |
| 115 | 115 | "language": "zh", |
| ... | ... | @@ -126,7 +126,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 126 | 126 | ```bash |
| 127 | 127 | curl -X POST "http://localhost:6002/search/" \ |
| 128 | 128 | -H "Content-Type: application/json" \ |
| 129 | - -H "X-Tenant-ID: 2" \ | |
| 129 | + -H "X-Tenant-ID: 162" \ | |
| 130 | 130 | -d '{ |
| 131 | 131 | "query": "手机", |
| 132 | 132 | "language": "zh", |
| ... | ... | @@ -144,7 +144,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 144 | 144 | ```bash |
| 145 | 145 | curl -X POST "http://localhost:6002/search/" \ |
| 146 | 146 | -H "Content-Type: application/json" \ |
| 147 | - -H "X-Tenant-ID: 2" \ | |
| 147 | + -H "X-Tenant-ID: 162" \ | |
| 148 | 148 | -d '{ |
| 149 | 149 | "query": "手机", |
| 150 | 150 | "language": "zh", |
| ... | ... | @@ -164,7 +164,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 164 | 164 | ```bash |
| 165 | 165 | curl -X POST "http://localhost:6002/search/" \ |
| 166 | 166 | -H "Content-Type: application/json" \ |
| 167 | - -H "X-Tenant-ID: 2" \ | |
| 167 | + -H "X-Tenant-ID: 162" \ | |
| 168 | 168 | -d '{ |
| 169 | 169 | "query": "手机", |
| 170 | 170 | "language": "zh", |
| ... | ... | @@ -184,7 +184,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 184 | 184 | ```bash |
| 185 | 185 | curl -X POST "http://localhost:6002/search/" \ |
| 186 | 186 | -H "Content-Type: application/json" \ |
| 187 | - -H "X-Tenant-ID: 2" \ | |
| 187 | + -H "X-Tenant-ID: 162" \ | |
| 188 | 188 | -d '{ |
| 189 | 189 | "query": "手机", |
| 190 | 190 | "language": "zh", |
| ... | ... | @@ -207,7 +207,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 207 | 207 | ```bash |
| 208 | 208 | curl -X POST "http://localhost:6002/search/" \ |
| 209 | 209 | -H "Content-Type: application/json" \ |
| 210 | - -H "X-Tenant-ID: 2" \ | |
| 210 | + -H "X-Tenant-ID: 162" \ | |
| 211 | 211 | -d '{ |
| 212 | 212 | "query": "手机", |
| 213 | 213 | "language": "zh", |
| ... | ... | @@ -227,7 +227,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 227 | 227 | ```bash |
| 228 | 228 | curl -X POST "http://localhost:6002/search/" \ |
| 229 | 229 | -H "Content-Type: application/json" \ |
| 230 | - -H "X-Tenant-ID: 2" \ | |
| 230 | + -H "X-Tenant-ID: 162" \ | |
| 231 | 231 | -d '{ |
| 232 | 232 | "query": "手机", |
| 233 | 233 | "language": "zh", |
| ... | ... | @@ -246,7 +246,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 246 | 246 | ```bash |
| 247 | 247 | curl -X POST "http://localhost:6002/search/" \ |
| 248 | 248 | -H "Content-Type: application/json" \ |
| 249 | - -H "X-Tenant-ID: 2" \ | |
| 249 | + -H "X-Tenant-ID: 162" \ | |
| 250 | 250 | -d '{ |
| 251 | 251 | "query": "手机", |
| 252 | 252 | "language": "zh", |
| ... | ... | @@ -265,7 +265,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 265 | 265 | ```bash |
| 266 | 266 | curl -X POST "http://localhost:6002/search/" \ |
| 267 | 267 | -H "Content-Type: application/json" \ |
| 268 | - -H "X-Tenant-ID: 2" \ | |
| 268 | + -H "X-Tenant-ID: 162" \ | |
| 269 | 269 | -d '{ |
| 270 | 270 | "query": "手机", |
| 271 | 271 | "language": "zh", |
| ... | ... | @@ -288,7 +288,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 288 | 288 | ```bash |
| 289 | 289 | curl -X POST "http://localhost:6002/search/" \ |
| 290 | 290 | -H "Content-Type: application/json" \ |
| 291 | - -H "X-Tenant-ID: 2" \ | |
| 291 | + -H "X-Tenant-ID: 162" \ | |
| 292 | 292 | -d '{ |
| 293 | 293 | "query": "手机", |
| 294 | 294 | "language": "zh", |
| ... | ... | @@ -318,7 +318,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 318 | 318 | ```bash |
| 319 | 319 | curl -X POST "http://localhost:6002/search/" \ |
| 320 | 320 | -H "Content-Type: application/json" \ |
| 321 | - -H "X-Tenant-ID: 2" \ | |
| 321 | + -H "X-Tenant-ID: 162" \ | |
| 322 | 322 | -d '{ |
| 323 | 323 | "query": "手机", |
| 324 | 324 | "language": "zh", |
| ... | ... | @@ -369,7 +369,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 369 | 369 | ```bash |
| 370 | 370 | curl -X POST "http://localhost:6002/search/" \ |
| 371 | 371 | -H "Content-Type: application/json" \ |
| 372 | - -H "X-Tenant-ID: 2" \ | |
| 372 | + -H "X-Tenant-ID: 162" \ | |
| 373 | 373 | -d '{ |
| 374 | 374 | "query": "手机", |
| 375 | 375 | "language": "zh", |
| ... | ... | @@ -384,7 +384,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 384 | 384 | ```bash |
| 385 | 385 | curl -X POST "http://localhost:6002/search/" \ |
| 386 | 386 | -H "Content-Type: application/json" \ |
| 387 | - -H "X-Tenant-ID: 2" \ | |
| 387 | + -H "X-Tenant-ID: 162" \ | |
| 388 | 388 | -d '{ |
| 389 | 389 | "query": "手机", |
| 390 | 390 | "language": "zh", |
| ... | ... | @@ -401,7 +401,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 401 | 401 | ```bash |
| 402 | 402 | curl -X POST "http://localhost:6002/search/" \ |
| 403 | 403 | -H "Content-Type: application/json" \ |
| 404 | - -H "X-Tenant-ID: 2" \ | |
| 404 | + -H "X-Tenant-ID: 162" \ | |
| 405 | 405 | -d '{ |
| 406 | 406 | "query": "手机", |
| 407 | 407 | "language": "zh", |
| ... | ... | @@ -425,7 +425,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 425 | 425 | ```bash |
| 426 | 426 | curl -X POST "http://localhost:6002/search/" \ |
| 427 | 427 | -H "Content-Type: application/json" \ |
| 428 | - -H "X-Tenant-ID: 2" \ | |
| 428 | + -H "X-Tenant-ID: 162" \ | |
| 429 | 429 | -d '{ |
| 430 | 430 | "query": "手机", |
| 431 | 431 | "language": "zh", |
| ... | ... | @@ -468,7 +468,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 468 | 468 | ```bash |
| 469 | 469 | curl -X POST "http://localhost:6002/search/" \ |
| 470 | 470 | -H "Content-Type: application/json" \ |
| 471 | - -H "X-Tenant-ID: 2" \ | |
| 471 | + -H "X-Tenant-ID: 162" \ | |
| 472 | 472 | -d '{ |
| 473 | 473 | "query": "手机", |
| 474 | 474 | "language": "zh", |
| ... | ... | @@ -497,7 +497,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 497 | 497 | ```bash |
| 498 | 498 | curl -X POST "http://localhost:6002/search/" \ |
| 499 | 499 | -H "Content-Type: application/json" \ |
| 500 | - -H "X-Tenant-ID: 2" \ | |
| 500 | + -H "X-Tenant-ID: 162" \ | |
| 501 | 501 | -d '{ |
| 502 | 502 | "query": "手机", |
| 503 | 503 | "language": "zh", |
| ... | ... | @@ -512,7 +512,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 512 | 512 | ```bash |
| 513 | 513 | curl -X POST "http://localhost:6002/search/" \ |
| 514 | 514 | -H "Content-Type: application/json" \ |
| 515 | - -H "X-Tenant-ID: 2" \ | |
| 515 | + -H "X-Tenant-ID: 162" \ | |
| 516 | 516 | -d '{ |
| 517 | 517 | "query": "手机", |
| 518 | 518 | "language": "zh", |
| ... | ... | @@ -527,7 +527,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 527 | 527 | ```bash |
| 528 | 528 | curl -X POST "http://localhost:6002/search/" \ |
| 529 | 529 | -H "Content-Type: application/json" \ |
| 530 | - -H "X-Tenant-ID: 2" \ | |
| 530 | + -H "X-Tenant-ID: 162" \ | |
| 531 | 531 | -d '{ |
| 532 | 532 | "query": "手机", |
| 533 | 533 | "language": "zh", |
| ... | ... | @@ -548,7 +548,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 548 | 548 | ```bash |
| 549 | 549 | curl -X POST "http://localhost:6002/search/image" \ |
| 550 | 550 | -H "Content-Type: application/json" \ |
| 551 | - -H "X-Tenant-ID: 2" \ | |
| 551 | + -H "X-Tenant-ID: 162" \ | |
| 552 | 552 | -d '{ |
| 553 | 553 | "image_url": "https://example.com/barbie.jpg", |
| 554 | 554 | "size": 20 |
| ... | ... | @@ -560,7 +560,7 @@ curl -X POST "http://localhost:6002/search/image" \ |
| 560 | 560 | ```bash |
| 561 | 561 | curl -X POST "http://localhost:6002/search/image" \ |
| 562 | 562 | -H "Content-Type: application/json" \ |
| 563 | - -H "X-Tenant-ID: 2" \ | |
| 563 | + -H "X-Tenant-ID: 162" \ | |
| 564 | 564 | -d '{ |
| 565 | 565 | "image_url": "https://example.com/barbie.jpg", |
| 566 | 566 | "size": 20, |
| ... | ... | @@ -584,7 +584,7 @@ curl -X POST "http://localhost:6002/search/image" \ |
| 584 | 584 | ```bash |
| 585 | 585 | curl -X POST "http://localhost:6002/search/" \ |
| 586 | 586 | -H "Content-Type: application/json" \ |
| 587 | - -H "X-Tenant-ID: 2" \ | |
| 587 | + -H "X-Tenant-ID: 162" \ | |
| 588 | 588 | -d '{ |
| 589 | 589 | "query": "玩具 AND 乐高" |
| 590 | 590 | }' |
| ... | ... | @@ -597,7 +597,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 597 | 597 | ```bash |
| 598 | 598 | curl -X POST "http://localhost:6002/search/" \ |
| 599 | 599 | -H "Content-Type: application/json" \ |
| 600 | - -H "X-Tenant-ID: 2" \ | |
| 600 | + -H "X-Tenant-ID: 162" \ | |
| 601 | 601 | -d '{ |
| 602 | 602 | "query": "芭比 OR 娃娃" |
| 603 | 603 | }' |
| ... | ... | @@ -610,7 +610,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 610 | 610 | ```bash |
| 611 | 611 | curl -X POST "http://localhost:6002/search/" \ |
| 612 | 612 | -H "Content-Type: application/json" \ |
| 613 | - -H "X-Tenant-ID: 2" \ | |
| 613 | + -H "X-Tenant-ID: 162" \ | |
| 614 | 614 | -d '{ |
| 615 | 615 | "query": "玩具 ANDNOT 电动" |
| 616 | 616 | }' |
| ... | ... | @@ -623,7 +623,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 623 | 623 | ```bash |
| 624 | 624 | curl -X POST "http://localhost:6002/search/" \ |
| 625 | 625 | -H "Content-Type: application/json" \ |
| 626 | - -H "X-Tenant-ID: 2" \ | |
| 626 | + -H "X-Tenant-ID: 162" \ | |
| 627 | 627 | -d '{ |
| 628 | 628 | "query": "玩具 AND (乐高 OR 芭比) ANDNOT 电动" |
| 629 | 629 | }' |
| ... | ... | @@ -636,7 +636,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 636 | 636 | ```bash |
| 637 | 637 | curl -X POST "http://localhost:6002/search/" \ |
| 638 | 638 | -H "Content-Type: application/json" \ |
| 639 | - -H "X-Tenant-ID: 2" \ | |
| 639 | + -H "X-Tenant-ID: 162" \ | |
| 640 | 640 | -d '{ |
| 641 | 641 | "query": "brand:乐高" |
| 642 | 642 | }' |
| ... | ... | @@ -961,7 +961,7 @@ const SearchComponent = { |
| 961 | 961 | ```bash |
| 962 | 962 | curl -X POST "http://localhost:6002/search/" \ |
| 963 | 963 | -H "Content-Type: application/json" \ |
| 964 | - -H "X-Tenant-ID: 2" \ | |
| 964 | + -H "X-Tenant-ID: 162" \ | |
| 965 | 965 | -d '{ |
| 966 | 966 | "query": "手机", |
| 967 | 967 | "language": "zh", |
| ... | ... | @@ -1000,7 +1000,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1000 | 1000 | ```bash |
| 1001 | 1001 | curl -X POST "http://localhost:6002/search/" \ |
| 1002 | 1002 | -H "Content-Type: application/json" \ |
| 1003 | - -H "X-Tenant-ID: 2" \ | |
| 1003 | + -H "X-Tenant-ID: 162" \ | |
| 1004 | 1004 | -d '{ |
| 1005 | 1005 | "query": "手机", |
| 1006 | 1006 | "language": "zh", |
| ... | ... | @@ -1020,7 +1020,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1020 | 1020 | # 显示某个类目下的所有商品,按价格排序,提供品牌筛选 |
| 1021 | 1021 | curl -X POST "http://localhost:6002/search/" \ |
| 1022 | 1022 | -H "Content-Type: application/json" \ |
| 1023 | - -H "X-Tenant-ID: 2" \ | |
| 1023 | + -H "X-Tenant-ID: 162" \ | |
| 1024 | 1024 | -d '{ |
| 1025 | 1025 | "query": "*", |
| 1026 | 1026 | "filters": { |
| ... | ... | @@ -1051,7 +1051,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1051 | 1051 | # 用户搜索关键词,提供筛选和排序(包含规格分面) |
| 1052 | 1052 | curl -X POST "http://localhost:6002/search/" \ |
| 1053 | 1053 | -H "Content-Type: application/json" \ |
| 1054 | - -H "X-Tenant-ID: 2" \ | |
| 1054 | + -H "X-Tenant-ID: 162" \ | |
| 1055 | 1055 | -d '{ |
| 1056 | 1056 | "query": "手机", |
| 1057 | 1057 | "language": "zh", |
| ... | ... | @@ -1064,9 +1064,9 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1064 | 1064 | "field": "min_price", |
| 1065 | 1065 | "type": "range", |
| 1066 | 1066 | "ranges": [ |
| 1067 | - {"key": "0-50", "to": 50}, | |
| 1068 | - {"key": "50-100", "from": 50, "to": 100}, | |
| 1069 | - {"key": "100+", "from": 100} | |
| 1067 | + {"key": "0-50", "to": 50}, | |
| 1068 | + {"key": "50-100", "from": 50, "to": 100}, | |
| 1069 | + {"key": "100+", "from": 100} | |
| 1070 | 1070 | ] |
| 1071 | 1071 | } |
| 1072 | 1072 | ], |
| ... | ... | @@ -1080,7 +1080,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1080 | 1080 | # 用户搜索并选择了规格筛选条件 |
| 1081 | 1081 | curl -X POST "http://localhost:6002/search/" \ |
| 1082 | 1082 | -H "Content-Type: application/json" \ |
| 1083 | - -H "X-Tenant-ID: 2" \ | |
| 1083 | + -H "X-Tenant-ID: 162" \ | |
| 1084 | 1084 | -d '{ |
| 1085 | 1085 | "query": "手机", |
| 1086 | 1086 | "language": "zh", |
| ... | ... | @@ -1106,7 +1106,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1106 | 1106 | # 显示特定价格区间的商品 |
| 1107 | 1107 | curl -X POST "http://localhost:6002/search/" \ |
| 1108 | 1108 | -H "Content-Type: application/json" \ |
| 1109 | - -H "X-Tenant-ID: 2" \ | |
| 1109 | + -H "X-Tenant-ID: 162" \ | |
| 1110 | 1110 | -d '{ |
| 1111 | 1111 | "query": "*", |
| 1112 | 1112 | "range_filters": { |
| ... | ... | @@ -1128,7 +1128,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1128 | 1128 | # 最近更新的商品 |
| 1129 | 1129 | curl -X POST "http://localhost:6002/search/" \ |
| 1130 | 1130 | -H "Content-Type: application/json" \ |
| 1131 | - -H "X-Tenant-ID: 2" \ | |
| 1131 | + -H "X-Tenant-ID: 162" \ | |
| 1132 | 1132 | -d '{ |
| 1133 | 1133 | "query": "*", |
| 1134 | 1134 | "range_filters": { |
| ... | ... | @@ -1152,7 +1152,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1152 | 1152 | # 错误:range_filters 缺少操作符 |
| 1153 | 1153 | curl -X POST "http://localhost:6002/search/" \ |
| 1154 | 1154 | -H "Content-Type: application/json" \ |
| 1155 | - -H "X-Tenant-ID: 2" \ | |
| 1155 | + -H "X-Tenant-ID: 162" \ | |
| 1156 | 1156 | -d '{ |
| 1157 | 1157 | "query": "手机", |
| 1158 | 1158 | "language": "zh", |
| ... | ... | @@ -1177,7 +1177,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1177 | 1177 | # 错误:query 为空 |
| 1178 | 1178 | curl -X POST "http://localhost:6002/search/" \ |
| 1179 | 1179 | -H "Content-Type: application/json" \ |
| 1180 | - -H "X-Tenant-ID: 2" \ | |
| 1180 | + -H "X-Tenant-ID: 162" \ | |
| 1181 | 1181 | -d '{ |
| 1182 | 1182 | "query": "" |
| 1183 | 1183 | }' |
| ... | ... | @@ -1255,7 +1255,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1255 | 1255 | # 使用通配符查询 + 分面 |
| 1256 | 1256 | curl -X POST "http://localhost:6002/search/" \ |
| 1257 | 1257 | -H "Content-Type: application/json" \ |
| 1258 | - -H "X-Tenant-ID: 2" \ | |
| 1258 | + -H "X-Tenant-ID: 162" \ | |
| 1259 | 1259 | -d '{ |
| 1260 | 1260 | "query": "*", |
| 1261 | 1261 | "size": 0, |
| ... | ... | @@ -1270,7 +1270,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1270 | 1270 | ```bash |
| 1271 | 1271 | curl -X POST "http://localhost:6002/search/" \ |
| 1272 | 1272 | -H "Content-Type: application/json" \ |
| 1273 | - -H "X-Tenant-ID: 2" \ | |
| 1273 | + -H "X-Tenant-ID: 162" \ | |
| 1274 | 1274 | -d '{ |
| 1275 | 1275 | "query": "手机", |
| 1276 | 1276 | "language": "zh", |
| ... | ... | @@ -1297,7 +1297,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1297 | 1297 | # 布尔表达式 + 过滤器 + 分面 + 排序 |
| 1298 | 1298 | curl -X POST "http://localhost:6002/search/" \ |
| 1299 | 1299 | -H "Content-Type: application/json" \ |
| 1300 | - -H "X-Tenant-ID: 2" \ | |
| 1300 | + -H "X-Tenant-ID: 162" \ | |
| 1301 | 1301 | -d '{ |
| 1302 | 1302 | "query": "(玩具 OR 游戏) AND 儿童 ANDNOT 电子", |
| 1303 | 1303 | "filters": { |
| ... | ... | @@ -1326,19 +1326,19 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1326 | 1326 | # 测试类目:玩具 |
| 1327 | 1327 | curl -X POST "http://localhost:6002/search/" \ |
| 1328 | 1328 | -H "Content-Type: application/json" \ |
| 1329 | - -H "X-Tenant-ID: 2" \ | |
| 1329 | + -H "X-Tenant-ID: 162" \ | |
| 1330 | 1330 | -d '{"query": "玩具", "size": 5}' |
| 1331 | 1331 | |
| 1332 | 1332 | # 测试品牌:乐高 |
| 1333 | 1333 | curl -X POST "http://localhost:6002/search/" \ |
| 1334 | 1334 | -H "Content-Type: application/json" \ |
| 1335 | - -H "X-Tenant-ID: 2" \ | |
| 1335 | + -H "X-Tenant-ID: 162" \ | |
| 1336 | 1336 | -d '{"query": "brand:乐高", "size": 5}' |
| 1337 | 1337 | |
| 1338 | 1338 | # 测试布尔表达式 |
| 1339 | 1339 | curl -X POST "http://localhost:6002/search/" \ |
| 1340 | 1340 | -H "Content-Type: application/json" \ |
| 1341 | - -H "X-Tenant-ID: 2" \ | |
| 1341 | + -H "X-Tenant-ID: 162" \ | |
| 1342 | 1342 | -d '{"query": "玩具 AND 乐高", "size": 5}' |
| 1343 | 1343 | ``` |
| 1344 | 1344 | ... | ... |
docs/Usage-Guide.md
| ... | ... | @@ -271,7 +271,7 @@ curl http://localhost:6002/admin/stats |
| 271 | 271 | ```bash |
| 272 | 272 | curl -X POST http://localhost:6002/search/ \ |
| 273 | 273 | -H "Content-Type: application/json" \ |
| 274 | - -H "X-Tenant-ID: 2" \ | |
| 274 | + -H "X-Tenant-ID: 162" \ | |
| 275 | 275 | -d '{ |
| 276 | 276 | "query": "玩具", |
| 277 | 277 | "size": 10 |
| ... | ... | @@ -294,7 +294,7 @@ curl -X POST "http://localhost:6002/search/?tenant_id=2" \ |
| 294 | 294 | ```bash |
| 295 | 295 | curl -X POST http://localhost:6002/search/ \ |
| 296 | 296 | -H "Content-Type: application/json" \ |
| 297 | - -H "X-Tenant-ID: 2" \ | |
| 297 | + -H "X-Tenant-ID: 162" \ | |
| 298 | 298 | -d '{ |
| 299 | 299 | "query": "玩具", |
| 300 | 300 | "size": 10, |
| ... | ... | @@ -312,7 +312,7 @@ curl -X POST http://localhost:6002/search/ \ |
| 312 | 312 | ```bash |
| 313 | 313 | curl -X POST http://localhost:6002/search/ \ |
| 314 | 314 | -H "Content-Type: application/json" \ |
| 315 | - -H "X-Tenant-ID: 2" \ | |
| 315 | + -H "X-Tenant-ID: 162" \ | |
| 316 | 316 | -d '{ |
| 317 | 317 | "query": "玩具", |
| 318 | 318 | "size": 10, |
| ... | ... | @@ -328,7 +328,7 @@ curl -X POST http://localhost:6002/search/ \ |
| 328 | 328 | ```bash |
| 329 | 329 | curl -X POST http://localhost:6002/search/image \ |
| 330 | 330 | -H "Content-Type: application/json" \ |
| 331 | - -H "X-Tenant-ID: 2" \ | |
| 331 | + -H "X-Tenant-ID: 162" \ | |
| 332 | 332 | -d '{ |
| 333 | 333 | "image_url": "https://oss.essa.cn/example.jpg", |
| 334 | 334 | "size": 10 |
| ... | ... | @@ -403,7 +403,7 @@ curl http://localhost:9200/search_products/_count |
| 403 | 403 | # 检查tenant_id过滤是否正确 |
| 404 | 404 | curl -X POST http://localhost:6002/search/ \ |
| 405 | 405 | -H "Content-Type: application/json" \ |
| 406 | - -H "X-Tenant-ID: 2" \ | |
| 406 | + -H "X-Tenant-ID: 162" \ | |
| 407 | 407 | -d '{"query": "*", "size": 10, "debug": true}' |
| 408 | 408 | ``` |
| 409 | 409 | ... | ... |
| ... | ... | @@ -0,0 +1,188 @@ |
| 1 | +# 分面数据问题完整分析报告 | |
| 2 | + | |
| 3 | +## 问题现象 | |
| 4 | + | |
| 5 | +前端显示的分面结果都是空的: | |
| 6 | +- Category: 空 | |
| 7 | +- Color: 空 | |
| 8 | +- Size: 空 | |
| 9 | +- Material: 空 | |
| 10 | + | |
| 11 | +ES的聚合查询结果也是空的。 | |
| 12 | + | |
| 13 | +## 诊断结果分析 | |
| 14 | + | |
| 15 | +### MySQL数据检查结果 | |
| 16 | + | |
| 17 | +1. **category_path字段**: | |
| 18 | + - 总SPU数:11254 | |
| 19 | + - 有category_path的SPU:只有1个 | |
| 20 | + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,逗号分隔) | |
| 21 | + | |
| 22 | +2. **option表数据**: | |
| 23 | + - 总option记录数:2658 | |
| 24 | + - 有option定义的SPU数量:886个 | |
| 25 | + - **position=1, name='color'**: 885个SPU ✅ | |
| 26 | + - **position=2, name='size'**: 885个SPU ✅ | |
| 27 | + - **position=3, name='material'**: 885个SPU ✅ | |
| 28 | + | |
| 29 | +3. **SKU数据**: | |
| 30 | + - 总SKU数:43109 | |
| 31 | + - 应该有option1/2/3值 | |
| 32 | + | |
| 33 | +### ES数据检查结果 | |
| 34 | + | |
| 35 | +1. **category1_name字段**: | |
| 36 | + - 总文档数:10000 | |
| 37 | + - 有category1_name的文档:只有1个 | |
| 38 | + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式) | |
| 39 | + | |
| 40 | +2. **specifications字段**: | |
| 41 | + - ES聚合查询显示**有数据**: | |
| 42 | + - specifications.color: Beige: 1226, Khaki: 1176, Red: 1168等 | |
| 43 | + - specifications.size: 1: 1234, 12: 1234等 | |
| 44 | + - specifications.material: 塑料英文包装: 17277等 | |
| 45 | + | |
| 46 | +## 问题根源 | |
| 47 | + | |
| 48 | +### 问题1:category1_name 几乎都为空 ✅ 已找到原因 | |
| 49 | + | |
| 50 | +**原因**: | |
| 51 | +1. MySQL的`category_path`字段几乎都是空的(只有1个,而且是ID列表格式) | |
| 52 | +2. 当`category_path`为空时,代码会使用`category`字段作为备选(代码已修复) | |
| 53 | +3. 但需要检查MySQL的`category`字段是否有值 | |
| 54 | + | |
| 55 | +**数据流转**: | |
| 56 | +- Excel "专辑名称" → 店匠系统 → MySQL `category` 或 `category_path` 字段 | |
| 57 | +- 如果Excel导入时"专辑名称"没有正确映射,或者`category`字段也为空,就会导致`category1_name`为空 | |
| 58 | + | |
| 59 | +### 问题2:为什么specifications分面查询无结果 | |
| 60 | + | |
| 61 | +**ES聚合查询显示有数据**,但前端显示为空,可能原因: | |
| 62 | + | |
| 63 | +1. **前端搜索时有查询条件**: | |
| 64 | + - 如果有查询条件(如`query="手机"`),ES会先过滤文档 | |
| 65 | + - 过滤后的文档如果没有specifications数据,聚合结果就会为空 | |
| 66 | + - 但这不应该导致所有分面都为空 | |
| 67 | + | |
| 68 | +2. **分面聚合构建问题**: | |
| 69 | + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` | |
| 70 | + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`等 | |
| 71 | + - 可能聚合构建或解析有问题 | |
| 72 | + | |
| 73 | +3. **tenant_id过滤问题**: | |
| 74 | + - 如果搜索时tenant_id不匹配,可能导致没有匹配的文档 | |
| 75 | + | |
| 76 | +## 需要检查的关键点 | |
| 77 | + | |
| 78 | +### 1. MySQL的category字段是否有值 | |
| 79 | + | |
| 80 | +**需要运行SQL查询**: | |
| 81 | +```sql | |
| 82 | +SELECT | |
| 83 | + COUNT(*) as total, | |
| 84 | + COUNT(category) as has_category, | |
| 85 | + COUNT(*) - COUNT(category) as null_category | |
| 86 | +FROM shoplazza_product_spu | |
| 87 | +WHERE tenant_id = 162 AND deleted = 0; | |
| 88 | +``` | |
| 89 | + | |
| 90 | +**如果category字段也为空**: | |
| 91 | +- 说明Excel导入时"专辑名称"字段没有正确映射到MySQL的`category`字段 | |
| 92 | +- 需要检查店匠系统的字段映射配置 | |
| 93 | + | |
| 94 | +### 2. SKU的option1/2/3字段是否有值 | |
| 95 | + | |
| 96 | +**需要运行SQL查询**: | |
| 97 | +```sql | |
| 98 | +SELECT | |
| 99 | + COUNT(*) as total_skus, | |
| 100 | + COUNT(option1) as has_option1, | |
| 101 | + COUNT(option2) as has_option2, | |
| 102 | + COUNT(option3) as has_option3 | |
| 103 | +FROM shoplazza_product_sku | |
| 104 | +WHERE tenant_id = 162 AND deleted = 0; | |
| 105 | +``` | |
| 106 | + | |
| 107 | +### 3. 检查ES聚合查询 | |
| 108 | + | |
| 109 | +**运行检查脚本**: | |
| 110 | +```bash | |
| 111 | +python scripts/check_es_data.py --tenant-id 162 | |
| 112 | +``` | |
| 113 | + | |
| 114 | +查看: | |
| 115 | +- 是否有category1_name数据 | |
| 116 | +- specifications聚合是否有数据 | |
| 117 | + | |
| 118 | +## 解决方案 | |
| 119 | + | |
| 120 | +### 方案1:修复category1_name字段生成(代码已修复) | |
| 121 | + | |
| 122 | +**已修复的代码**(`indexer/spu_transformer.py`第241-259行): | |
| 123 | +- 如果`category_path`为空,使用`category`字段作为备选 | |
| 124 | +- 从`category`字段解析多级分类 | |
| 125 | + | |
| 126 | +**但需要确保**: | |
| 127 | +1. MySQL的`category`字段有值 | |
| 128 | +2. 重新导入数据到ES | |
| 129 | + | |
| 130 | +### 方案2:检查并修复MySQL数据 | |
| 131 | + | |
| 132 | +如果MySQL的`category`字段也为空: | |
| 133 | + | |
| 134 | +1. **检查Excel导入映射**: | |
| 135 | + - 确认"专辑名称"字段是否正确映射到MySQL的`category`字段 | |
| 136 | + - 如果不正确,需要修复映射或重新导入 | |
| 137 | + | |
| 138 | +2. **如果category字段有值但category1_name仍为空**: | |
| 139 | + - 说明数据导入时使用的是旧代码 | |
| 140 | + - 需要重新导入数据到ES | |
| 141 | + | |
| 142 | +### 方案3:验证specifications分面查询 | |
| 143 | + | |
| 144 | +虽然ES聚合查询显示有数据,但需要验证: | |
| 145 | + | |
| 146 | +1. **检查前端搜索请求**: | |
| 147 | + - 确认分面请求是否正确发送 | |
| 148 | + - 确认tenant_id是否正确 | |
| 149 | + | |
| 150 | +2. **检查ES聚合结果解析**: | |
| 151 | + - 确认`format_facets`函数是否正确解析specifications分面 | |
| 152 | + - 确认字段名匹配是否正确(`specifications.color` vs `specifications_color_facet`) | |
| 153 | + | |
| 154 | +## 立即执行的操作 | |
| 155 | + | |
| 156 | +### 步骤1:检查MySQL的category字段 | |
| 157 | + | |
| 158 | +更新诊断脚本,添加category字段检查: | |
| 159 | +```bash | |
| 160 | +# 需要手动运行SQL或更新诊断脚本 | |
| 161 | +``` | |
| 162 | + | |
| 163 | +### 步骤2:重新导入数据到ES | |
| 164 | + | |
| 165 | +修复代码后,重新导入数据: | |
| 166 | +```bash | |
| 167 | +python scripts/recreate_and_import.py \ | |
| 168 | + --tenant-id 162 \ | |
| 169 | + --db-host <host> \ | |
| 170 | + --db-database saas \ | |
| 171 | + --db-username saas \ | |
| 172 | + --db-password <password> \ | |
| 173 | + --es-host http://localhost:9200 | |
| 174 | +``` | |
| 175 | + | |
| 176 | +### 步骤3:验证ES数据 | |
| 177 | + | |
| 178 | +运行ES数据检查脚本: | |
| 179 | +```bash | |
| 180 | +python scripts/check_es_data.py --tenant-id 162 | |
| 181 | +``` | |
| 182 | + | |
| 183 | +## 关键发现 | |
| 184 | + | |
| 185 | +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 | |
| 186 | +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成 | |
| 187 | +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效 | |
| 188 | + | ... | ... |
| ... | ... | @@ -0,0 +1,125 @@ |
| 1 | +# 分面数据问题根源分析 | |
| 2 | + | |
| 3 | +## ES数据检查结果 | |
| 4 | + | |
| 5 | +从ES索引数据检查结果可以看到: | |
| 6 | + | |
| 7 | +### 1. category1_name 分面问题 | |
| 8 | + | |
| 9 | +**检查结果**: | |
| 10 | +- 总文档数:10000 | |
| 11 | +- 有category1_name的文档:只有1个 | |
| 12 | +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称) | |
| 13 | + | |
| 14 | +**问题原因**: | |
| 15 | +- MySQL中`category_path`字段几乎都是空的(只有1个,而且那个是ID列表格式,不是路径格式如"服装/男装") | |
| 16 | +- MySQL中`category`字段可能也为空 | |
| 17 | +- 导致ES索引中的`category1_name`字段几乎都是空的 | |
| 18 | + | |
| 19 | +**解决方案**: | |
| 20 | +代码已修复(`indexer/spu_transformer.py`第241-259行),支持从`category`字段生成`category1_name`,但需要: | |
| 21 | +1. 确保MySQL的`category`字段有值 | |
| 22 | +2. 重新导入数据到ES | |
| 23 | + | |
| 24 | +### 2. specifications 分面问题 | |
| 25 | + | |
| 26 | +**检查结果**(从ES聚合查询): | |
| 27 | +- specifications.color 分面:有数据(Beige: 1226, Khaki: 1176等) | |
| 28 | +- specifications.size 分面:有数据(1: 1234, 12: 1234等) | |
| 29 | +- specifications.material 分面:有数据(塑料英文包装: 17277等) | |
| 30 | + | |
| 31 | +**说明**:ES中确实有specifications数据,而且聚合查询能正常返回结果。 | |
| 32 | + | |
| 33 | +## 问题根源 | |
| 34 | + | |
| 35 | +### 问题1:category1_name 几乎都为空 | |
| 36 | + | |
| 37 | +**MySQL数据情况**: | |
| 38 | +- `category_path` 字段:11253个SPU为空,只有1个有值(但那个值是ID列表格式) | |
| 39 | +- `category` 字段:需要检查是否有值 | |
| 40 | + | |
| 41 | +**ES索引情况**: | |
| 42 | +- `category1_name` 字段:几乎都是None | |
| 43 | +- 导致category分面查询结果为空 | |
| 44 | + | |
| 45 | +### 问题2:为什么specifications分面查询无结果 | |
| 46 | + | |
| 47 | +虽然ES聚合查询显示有数据,但前端显示为空,可能原因: | |
| 48 | + | |
| 49 | +1. **分面聚合结构不匹配**: | |
| 50 | + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` | |
| 51 | + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`, `specifications_size_facet`, `specifications_material_facet` | |
| 52 | + - 前端解析时的字段匹配可能有问题 | |
| 53 | + | |
| 54 | +2. **ES聚合结果格式**: | |
| 55 | + - specifications.color分面的聚合名称是`specifications_color_facet` | |
| 56 | + - 但前端期望的field是`specifications.color` | |
| 57 | + - 需要在`format_facets`中正确匹配 | |
| 58 | + | |
| 59 | +## 解决方案 | |
| 60 | + | |
| 61 | +### 方案1:修复category1_name字段(必需) | |
| 62 | + | |
| 63 | +**问题**:MySQL的`category_path`为空,需要从`category`字段生成 | |
| 64 | + | |
| 65 | +**已修复代码**(`indexer/spu_transformer.py`): | |
| 66 | +- 如果`category_path`为空,使用`category`字段作为备选 | |
| 67 | +- 从`category`字段解析多级分类(如果包含"/") | |
| 68 | +- 如果`category`不包含"/",直接作为`category1_name` | |
| 69 | + | |
| 70 | +**但需要**: | |
| 71 | +1. 检查MySQL的`category`字段是否有值 | |
| 72 | +2. 如果`category`也为空,需要检查Excel导入时"专辑名称"字段是否正确映射 | |
| 73 | + | |
| 74 | +### 方案2:验证specifications分面查询 | |
| 75 | + | |
| 76 | +虽然ES聚合查询显示有数据,但需要: | |
| 77 | +1. 检查前端是否正确发送分面请求 | |
| 78 | +2. 检查ES返回的聚合结果格式 | |
| 79 | +3. 检查`format_facets`函数是否正确解析specifications分面 | |
| 80 | + | |
| 81 | +## 下一步操作 | |
| 82 | + | |
| 83 | +### 步骤1:检查MySQL的category字段 | |
| 84 | + | |
| 85 | +```sql | |
| 86 | +SELECT | |
| 87 | + COUNT(*) as total, | |
| 88 | + COUNT(category) as has_category, | |
| 89 | + COUNT(*) - COUNT(category) as null_category | |
| 90 | +FROM shoplazza_product_spu | |
| 91 | +WHERE tenant_id = 162 AND deleted = 0; | |
| 92 | +``` | |
| 93 | + | |
| 94 | +### 步骤2:检查Excel导入映射 | |
| 95 | + | |
| 96 | +确认Excel的"专辑名称"字段是否正确映射到MySQL的`category`字段: | |
| 97 | +- 如果映射到`category`字段,应该有值 | |
| 98 | +- 如果映射到`category_path`字段,但值是ID列表格式,需要修复 | |
| 99 | + | |
| 100 | +### 步骤3:重新导入数据到ES | |
| 101 | + | |
| 102 | +修复后,重新导入数据: | |
| 103 | +```bash | |
| 104 | +python scripts/recreate_and_import.py \ | |
| 105 | + --tenant-id 162 \ | |
| 106 | + --db-host <host> \ | |
| 107 | + --db-database saas \ | |
| 108 | + --db-username saas \ | |
| 109 | + --db-password <password> \ | |
| 110 | + --es-host http://localhost:9200 | |
| 111 | +``` | |
| 112 | + | |
| 113 | +### 步骤4:验证ES数据 | |
| 114 | + | |
| 115 | +检查ES索引中的文档是否包含: | |
| 116 | +- `category1_name`字段(应该有值) | |
| 117 | +- `specifications`字段(应该已经有数据) | |
| 118 | + | |
| 119 | +## 关键发现 | |
| 120 | + | |
| 121 | +从ES检查结果看: | |
| 122 | +1. **specifications数据是有的**,ES聚合查询能正常返回color/size/material的分面数据 | |
| 123 | +2. **category1_name几乎都是空的**,这是导致category分面为空的原因 | |
| 124 | +3. **如果specifications分面也显示为空**,可能是前端解析ES聚合结果的问题,而不是ES数据的问题 | |
| 125 | + | ... | ... |
| ... | ... | @@ -0,0 +1,180 @@ |
| 1 | +# 分面数据问题根源和解决方案 | |
| 2 | + | |
| 3 | +## 📊 诊断结果总结 | |
| 4 | + | |
| 5 | +### MySQL数据情况 | |
| 6 | +- **总SPU数**:11254 | |
| 7 | +- **category_path字段**:只有1个有值(ID列表格式),11253个为空 | |
| 8 | +- **option表数据**: | |
| 9 | + - 有option定义的SPU:886个 | |
| 10 | + - position=1, name='color': 885个 ✅ | |
| 11 | + - position=2, name='size': 885个 ✅ | |
| 12 | + - position=3, name='material': 885个 ✅ | |
| 13 | + | |
| 14 | +### ES索引数据情况 | |
| 15 | +- **总文档数**:10000 | |
| 16 | +- **category1_name字段**:只有1个有值(ID列表格式),其他都是None ❌ | |
| 17 | +- **specifications聚合查询**:有数据 ✅ | |
| 18 | + - specifications.color: Beige: 1226, Khaki: 1176等 | |
| 19 | + - specifications.size: 1: 1234, 12: 1234等 | |
| 20 | + - specifications.material: 塑料英文包装: 17277等 | |
| 21 | + | |
| 22 | +## 🔍 问题根源 | |
| 23 | + | |
| 24 | +### 问题1:category1_name 几乎都为空 | |
| 25 | + | |
| 26 | +**数据流分析**: | |
| 27 | + | |
| 28 | +1. **Excel生成阶段**(`csv_to_excel_multi_variant.py`): | |
| 29 | + - Excel字段:`'专辑名称': csv_data['categoryName']` | |
| 30 | + - 从CSV的`categoryName`字段读取,应该有值 | |
| 31 | + | |
| 32 | +2. **Excel导入店匠 → MySQL**: | |
| 33 | + - Excel的"专辑名称"字段 → 可能映射到MySQL的`category`或`category_path`字段 | |
| 34 | + - **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path` | |
| 35 | + - 诊断结果显示:`category_path`几乎都是空的 | |
| 36 | + | |
| 37 | +3. **MySQL → ES转换**(`spu_transformer.py`): | |
| 38 | + - 原逻辑:只从`category_path`解析`category1_name` | |
| 39 | + - 如果`category_path`为空,`category1_name`不会被设置 | |
| 40 | + - **已修复**:如果`category_path`为空,使用`category`字段作为备选(第241-259行) | |
| 41 | + | |
| 42 | +**关键检查点**: | |
| 43 | +- MySQL的`category`字段是否有值? | |
| 44 | +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射 | |
| 45 | + | |
| 46 | +### 问题2:specifications分面查询无结果 | |
| 47 | + | |
| 48 | +**奇怪的现象**: | |
| 49 | +- ES聚合查询显示有数据(Beige: 1226, Khaki: 1176等) | |
| 50 | +- 但前端显示为空 | |
| 51 | + | |
| 52 | +**可能原因**: | |
| 53 | + | |
| 54 | +1. **前端搜索时有查询条件**: | |
| 55 | + - 如果搜索时添加了查询条件(如`query="手机"`),ES会先过滤文档 | |
| 56 | + - 过滤后的文档可能没有specifications数据,导致聚合结果为空 | |
| 57 | + - **需要验证**:不带查询条件的搜索,分面是否有数据 | |
| 58 | + | |
| 59 | +2. **分面聚合构建或解析问题**: | |
| 60 | + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` | |
| 61 | + - ES构建的聚合名称:`specifications_color_facet` | |
| 62 | + - 前端解析时的字段匹配:`specifications.color` | |
| 63 | + - **需要验证**:`format_facets`函数是否正确匹配 | |
| 64 | + | |
| 65 | +3. **tenant_id过滤问题**: | |
| 66 | + - 如果tenant_id不匹配,会导致没有匹配的文档 | |
| 67 | + | |
| 68 | +## ✅ 已实施的修复 | |
| 69 | + | |
| 70 | +### 修复1:支持从category字段生成category1_name | |
| 71 | + | |
| 72 | +**文件**:`indexer/spu_transformer.py`(第241-259行) | |
| 73 | + | |
| 74 | +**修改内容**: | |
| 75 | +```python | |
| 76 | +elif pd.notna(spu_row.get('category')): | |
| 77 | + # 如果category_path为空,使用category字段作为category1_name的备选 | |
| 78 | + category = str(spu_row['category']) | |
| 79 | + # 从category字段解析多级分类 | |
| 80 | + if '/' in category: | |
| 81 | + path_parts = category.split('/') | |
| 82 | + if len(path_parts) > 0: | |
| 83 | + doc['category1_name'] = path_parts[0].strip() | |
| 84 | + else: | |
| 85 | + # 直接作为category1_name | |
| 86 | + doc['category1_name'] = category.strip() | |
| 87 | +``` | |
| 88 | + | |
| 89 | +**说明**:如果MySQL的`category`字段有值,修复后的代码应该能生成`category1_name` | |
| 90 | + | |
| 91 | +## 🔧 需要执行的操作 | |
| 92 | + | |
| 93 | +### 步骤1:检查MySQL的category字段 | |
| 94 | + | |
| 95 | +**更新诊断脚本**(已更新):`scripts/check_data_source.py` | |
| 96 | + | |
| 97 | +**运行检查**: | |
| 98 | +```bash | |
| 99 | +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ... | |
| 100 | +``` | |
| 101 | + | |
| 102 | +**关键检查**: | |
| 103 | +- `category`字段是否有值 | |
| 104 | +- 如果有值,值的格式是什么(是否包含"/") | |
| 105 | +- 如果也为空,说明Excel导入映射有问题 | |
| 106 | + | |
| 107 | +### 步骤2:重新导入数据到ES | |
| 108 | + | |
| 109 | +**修复代码后,需要重新导入数据**: | |
| 110 | +```bash | |
| 111 | +python scripts/recreate_and_import.py \ | |
| 112 | + --tenant-id 162 \ | |
| 113 | + --db-host <host> \ | |
| 114 | + --db-database saas \ | |
| 115 | + --db-username saas \ | |
| 116 | + --db-password <password> \ | |
| 117 | + --es-host http://localhost:9200 | |
| 118 | +``` | |
| 119 | + | |
| 120 | +### 步骤3:验证ES数据 | |
| 121 | + | |
| 122 | +**运行ES数据检查脚本**: | |
| 123 | +```bash | |
| 124 | +python scripts/check_es_data.py --tenant-id 162 | |
| 125 | +``` | |
| 126 | + | |
| 127 | +**检查内容**: | |
| 128 | +- `category1_name`字段是否有值 | |
| 129 | +- `specifications`字段是否有数据 | |
| 130 | +- 分面聚合查询是否有结果 | |
| 131 | + | |
| 132 | +## 📝 数据流程说明 | |
| 133 | + | |
| 134 | +### Excel生成 → MySQL | |
| 135 | + | |
| 136 | +**Excel字段**(`csv_to_excel_multi_variant.py`): | |
| 137 | +- `'专辑名称': csv_data['categoryName']` - 分类信息 | |
| 138 | +- `'款式1': 'color'`(M行)- 选项名称 | |
| 139 | +- `'款式2': 'size'`(M行)- 选项名称 | |
| 140 | +- `'款式3': 'material'`(M行)- 选项名称 | |
| 141 | +- `'款式1': 'Red'`(P行)- 选项值 | |
| 142 | +- `'款式2': '5'`(P行)- 选项值 | |
| 143 | +- `'款式3': '塑料'`(P行)- 选项值 | |
| 144 | + | |
| 145 | +**Excel导入店匠 → MySQL映射**(需要确认): | |
| 146 | +- `'专辑名称'` → `shoplazza_product_spu.category` 或 `category_path` | |
| 147 | +- `'款式1/2/3'`(M行)→ `shoplazza_product_option.name` + `position` | |
| 148 | +- `'款式1/2/3'`(P行)→ `shoplazza_product_sku.option1/2/3` | |
| 149 | + | |
| 150 | +### MySQL → ES转换 | |
| 151 | + | |
| 152 | +**当前逻辑**(`spu_transformer.py`): | |
| 153 | + | |
| 154 | +1. **category1_name生成**: | |
| 155 | + - 优先从`category_path`解析(第228-240行) | |
| 156 | + - 如果`category_path`为空,从`category`字段解析(第241-259行)✅ 已修复 | |
| 157 | + | |
| 158 | +2. **specifications生成**(第351-370行): | |
| 159 | + - 从`option表`获取name(position → name映射) | |
| 160 | + - 从`SKU表`获取option1/2/3值 | |
| 161 | + - 构建`specifications`数组 | |
| 162 | + | |
| 163 | +**关键点**: | |
| 164 | +- 需要确保MySQL的`category`字段有值 | |
| 165 | +- 需要确保`option表`有数据且`name`是英文(color/size/material) | |
| 166 | +- 需要确保SKU的`option1/2/3`字段有值 | |
| 167 | + | |
| 168 | +## 🎯 关键发现 | |
| 169 | + | |
| 170 | +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 | |
| 171 | +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成 | |
| 172 | +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效 | |
| 173 | + | |
| 174 | +## 🔄 下一步 | |
| 175 | + | |
| 176 | +1. ✅ **代码已修复**:支持从`category`字段生成`category1_name` | |
| 177 | +2. ⏳ **需要检查MySQL数据**:确认`category`字段是否有值 | |
| 178 | +3. ⏳ **需要重新导入数据**:将修复后的数据导入ES | |
| 179 | +4. ⏳ **需要验证**:检查ES数据是否正确,分面是否能正常显示 | |
| 180 | + | ... | ... |
| ... | ... | @@ -0,0 +1,282 @@ |
| 1 | +# 分面数据问题诊断报告 | |
| 2 | + | |
| 3 | +## 问题描述 | |
| 4 | + | |
| 5 | +前端显示的分面结果都是空的: | |
| 6 | +- Category: 空 | |
| 7 | +- Color: 空 | |
| 8 | +- Size: 空 | |
| 9 | +- Material: 空 | |
| 10 | + | |
| 11 | +ES的聚合查询结果也是空的。 | |
| 12 | + | |
| 13 | +## 数据流程分析 | |
| 14 | + | |
| 15 | +### 1. 数据生成阶段(csv_to_excel_multi_variant.py) | |
| 16 | + | |
| 17 | +**生成的数据**: | |
| 18 | + | |
| 19 | +#### 分类信息: | |
| 20 | +- Excel字段:`'专辑名称': csv_data['categoryName']` | |
| 21 | +- 示例值:`"电子产品"` 或 `"服装/男装"`(从CSV的categoryName字段读取) | |
| 22 | + | |
| 23 | +#### 属性信息(M+P类型商品): | |
| 24 | +- Excel字段(M行主商品): | |
| 25 | + - `'款式1': 'color'`(选项名称) | |
| 26 | + - `'款式2': 'size'`(选项名称) | |
| 27 | + - `'款式3': 'material'`(选项名称) | |
| 28 | +- Excel字段(P行子款式): | |
| 29 | + - `'款式1': 'Red'`(选项值,从COLORS列表随机选择) | |
| 30 | + - `'款式2': '5'`(选项值,1-30随机选择) | |
| 31 | + - `'款式3': '塑料'`(选项值,从商品标题提取) | |
| 32 | + | |
| 33 | +### 2. Excel导入店匠系统 → MySQL | |
| 34 | + | |
| 35 | +**预期映射**: | |
| 36 | + | |
| 37 | +#### 分类字段: | |
| 38 | +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category_path` **或** `category` 字段 | |
| 39 | +- **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path`字段 | |
| 40 | + | |
| 41 | +#### 属性字段: | |
| 42 | +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` 和 `position` | |
| 43 | +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3` | |
| 44 | + | |
| 45 | +### 3. MySQL → ES转换阶段(spu_transformer.py) | |
| 46 | + | |
| 47 | +#### category1_name 构建逻辑(第228-240行): | |
| 48 | + | |
| 49 | +```python | |
| 50 | +if pd.notna(spu_row.get('category_path')): | |
| 51 | + category_path = str(spu_row['category_path']) | |
| 52 | + # 解析category_path获取多层级分类名称 | |
| 53 | + path_parts = category_path.split('/') | |
| 54 | + if len(path_parts) > 0: | |
| 55 | + doc['category1_name'] = path_parts[0].strip() | |
| 56 | +``` | |
| 57 | + | |
| 58 | +**问题**:如果MySQL中的`category_path`字段为空,`category1_name`不会被设置! | |
| 59 | + | |
| 60 | +#### specifications 构建逻辑(第328-347行): | |
| 61 | + | |
| 62 | +```python | |
| 63 | +# 构建option名称映射(position -> name) | |
| 64 | +option_name_map = {} | |
| 65 | +if not options.empty: | |
| 66 | + for _, opt_row in options.iterrows(): | |
| 67 | + position = opt_row.get('position') | |
| 68 | + name = opt_row.get('name') | |
| 69 | + if pd.notna(position) and pd.notna(name): | |
| 70 | + option_name_map[int(position)] = str(name) | |
| 71 | + | |
| 72 | +# 构建specifications | |
| 73 | +if pd.notna(sku_row.get('option1')) and 1 in option_name_map: | |
| 74 | + specifications.append({ | |
| 75 | + 'sku_id': sku_id, | |
| 76 | + 'name': option_name_map[1], # 使用option表的name字段 | |
| 77 | + 'value': str(sku_row['option1']) | |
| 78 | + }) | |
| 79 | +``` | |
| 80 | + | |
| 81 | +**问题**:如果`shoplazza_product_option`表中没有记录,或者`name`字段值不是英文(如"color"),会导致: | |
| 82 | +1. `option_name_map`为空,无法构建specifications | |
| 83 | +2. 即使有值,如果name不是"color"/"size"/"material",前端也无法正确匹配 | |
| 84 | + | |
| 85 | +## 问题根源 | |
| 86 | + | |
| 87 | +### 问题1:category1_name 为空 | |
| 88 | + | |
| 89 | +**原因**: | |
| 90 | +1. MySQL的`category_path`字段可能为空 | |
| 91 | +2. Excel的"专辑名称"可能被映射到`category`字段而不是`category_path` | |
| 92 | +3. 如果`category_path`为空,`category1_name`不会被设置 | |
| 93 | + | |
| 94 | +**验证方法**: | |
| 95 | +```sql | |
| 96 | +SELECT COUNT(*) as total, | |
| 97 | + COUNT(category_path) as has_category_path, | |
| 98 | + COUNT(category) as has_category | |
| 99 | +FROM shoplazza_product_spu | |
| 100 | +WHERE tenant_id = 162 AND deleted = 0; | |
| 101 | +``` | |
| 102 | + | |
| 103 | +### 问题2:specifications 为空 | |
| 104 | + | |
| 105 | +**原因**: | |
| 106 | +1. `shoplazza_product_option`表可能没有数据 | |
| 107 | +2. option表的`name`字段值可能不是英文(不是"color"、"size"、"material") | |
| 108 | + | |
| 109 | +**验证方法**: | |
| 110 | +```sql | |
| 111 | +SELECT DISTINCT name, position, COUNT(*) as count | |
| 112 | +FROM shoplazza_product_option | |
| 113 | +WHERE tenant_id = 162 AND deleted = 0 | |
| 114 | +GROUP BY name, position | |
| 115 | +ORDER BY position, name; | |
| 116 | +``` | |
| 117 | + | |
| 118 | +## 解决方案 | |
| 119 | + | |
| 120 | +### 方案1:修复 spu_transformer.py - 支持从category字段生成category1_name | |
| 121 | + | |
| 122 | +修改`indexer/spu_transformer.py`的`_transform_spu_to_doc`方法,如果`category_path`为空,使用`category`字段作为备选: | |
| 123 | + | |
| 124 | +```python | |
| 125 | +# Category相关字段 | |
| 126 | +if pd.notna(spu_row.get('category_path')): | |
| 127 | + category_path = str(spu_row['category_path']) | |
| 128 | + doc['category_path_zh'] = category_path | |
| 129 | + doc['category_path_en'] = None | |
| 130 | + | |
| 131 | + # 解析category_path获取多层级分类名称 | |
| 132 | + path_parts = category_path.split('/') | |
| 133 | + if len(path_parts) > 0: | |
| 134 | + doc['category1_name'] = path_parts[0].strip() | |
| 135 | + if len(path_parts) > 1: | |
| 136 | + doc['category2_name'] = path_parts[1].strip() | |
| 137 | + if len(path_parts) > 2: | |
| 138 | + doc['category3_name'] = path_parts[2].strip() | |
| 139 | +elif pd.notna(spu_row.get('category')): | |
| 140 | + # 如果category_path为空,使用category字段作为category1_name | |
| 141 | + category = str(spu_row['category']) | |
| 142 | + doc['category1_name'] = category.strip() | |
| 143 | + # 如果category包含"/",也尝试解析 | |
| 144 | + if '/' in category: | |
| 145 | + path_parts = category.split('/') | |
| 146 | + if len(path_parts) > 0: | |
| 147 | + doc['category1_name'] = path_parts[0].strip() | |
| 148 | + if len(path_parts) > 1: | |
| 149 | + doc['category2_name'] = path_parts[1].strip() | |
| 150 | + if len(path_parts) > 2: | |
| 151 | + doc['category3_name'] = path_parts[2].strip() | |
| 152 | +``` | |
| 153 | + | |
| 154 | +### 方案2:检查并修复 option 表的 name 字段值 | |
| 155 | + | |
| 156 | +需要确保`shoplazza_product_option`表的`name`字段值是英文: | |
| 157 | +- position=1 的name应该是 `"color"` | |
| 158 | +- position=2 的name应该是 `"size"` | |
| 159 | +- position=3 的name应该是 `"material"` | |
| 160 | + | |
| 161 | +如果值不对,需要更新: | |
| 162 | + | |
| 163 | +```sql | |
| 164 | +-- 查看当前的name值 | |
| 165 | +SELECT DISTINCT name, position | |
| 166 | +FROM shoplazza_product_option | |
| 167 | +WHERE tenant_id = 162 AND deleted = 0 | |
| 168 | +ORDER BY position; | |
| 169 | + | |
| 170 | +-- 如果需要更新(示例) | |
| 171 | +-- UPDATE shoplazza_product_option | |
| 172 | +-- SET name = CASE position | |
| 173 | +-- WHEN 1 THEN 'color' | |
| 174 | +-- WHEN 2 THEN 'size' | |
| 175 | +-- WHEN 3 THEN 'material' | |
| 176 | +-- END | |
| 177 | +-- WHERE tenant_id = 162 AND deleted = 0; | |
| 178 | +``` | |
| 179 | + | |
| 180 | +### 方案3:验证数据完整性 | |
| 181 | + | |
| 182 | +使用诊断脚本检查数据: | |
| 183 | + | |
| 184 | +```bash | |
| 185 | +python scripts/check_data_source.py \ | |
| 186 | + --tenant-id 162 \ | |
| 187 | + --db-host <mysql_host> \ | |
| 188 | + --db-port 3316 \ | |
| 189 | + --db-database saas \ | |
| 190 | + --db-username saas \ | |
| 191 | + --db-password <password> | |
| 192 | +``` | |
| 193 | + | |
| 194 | +## 诊断步骤 | |
| 195 | + | |
| 196 | +### 步骤1:检查MySQL数据 | |
| 197 | + | |
| 198 | +运行诊断脚本: | |
| 199 | +```bash | |
| 200 | +cd /home/tw/SearchEngine | |
| 201 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 202 | +conda activate searchengine | |
| 203 | +python scripts/check_data_source.py --tenant-id 162 --db-host <host> --db-database saas --db-username saas --db-password <password> | |
| 204 | +``` | |
| 205 | + | |
| 206 | +### 步骤2:根据检查结果修复 | |
| 207 | + | |
| 208 | +#### 如果 category_path 为空: | |
| 209 | +- 使用方案1:修改`spu_transformer.py`支持从`category`字段生成`category1_name` | |
| 210 | + | |
| 211 | +#### 如果 option 表没有数据或name值不对: | |
| 212 | +- 检查Excel导入是否正确 | |
| 213 | +- 如果需要,手动更新option表的name字段值 | |
| 214 | + | |
| 215 | +### 步骤3:重新导入数据到ES | |
| 216 | + | |
| 217 | +```bash | |
| 218 | +python scripts/recreate_and_import.py \ | |
| 219 | + --tenant-id 162 \ | |
| 220 | + --db-host <host> \ | |
| 221 | + --db-database saas \ | |
| 222 | + --db-username saas \ | |
| 223 | + --db-password <password> \ | |
| 224 | + --es-host http://localhost:9200 | |
| 225 | +``` | |
| 226 | + | |
| 227 | +### 步骤4:验证ES数据 | |
| 228 | + | |
| 229 | +检查ES索引中的文档: | |
| 230 | + | |
| 231 | +```bash | |
| 232 | +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d' | |
| 233 | +{ | |
| 234 | + "query": { | |
| 235 | + "term": { | |
| 236 | + "tenant_id": "162" | |
| 237 | + } | |
| 238 | + }, | |
| 239 | + "size": 1, | |
| 240 | + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name"] | |
| 241 | +}' | |
| 242 | +``` | |
| 243 | + | |
| 244 | +## 预期结果 | |
| 245 | + | |
| 246 | +修复后,ES文档应该包含: | |
| 247 | + | |
| 248 | +1. **category1_name字段**: | |
| 249 | + ```json | |
| 250 | + { | |
| 251 | + "category1_name": "电子产品" | |
| 252 | + } | |
| 253 | + ``` | |
| 254 | + | |
| 255 | +2. **specifications字段**: | |
| 256 | + ```json | |
| 257 | + { | |
| 258 | + "specifications": [ | |
| 259 | + {"sku_id": "123", "name": "color", "value": "Red"}, | |
| 260 | + {"sku_id": "123", "name": "size", "value": "5"}, | |
| 261 | + {"sku_id": "123", "name": "material", "value": "塑料"} | |
| 262 | + ] | |
| 263 | + } | |
| 264 | + ``` | |
| 265 | + | |
| 266 | +3. **option1_name/2_name/3_name字段**: | |
| 267 | + ```json | |
| 268 | + { | |
| 269 | + "option1_name": "color", | |
| 270 | + "option2_name": "size", | |
| 271 | + "option3_name": "material" | |
| 272 | + } | |
| 273 | + ``` | |
| 274 | + | |
| 275 | +## 总结 | |
| 276 | + | |
| 277 | +问题可能出现在: | |
| 278 | +1. **MySQL数据层面**:`category_path`字段为空,或者`shoplazza_product_option`表没有正确的数据 | |
| 279 | +2. **数据转换层面**:`spu_transformer.py`没有处理`category_path`为空的情况 | |
| 280 | + | |
| 281 | +建议先运行诊断脚本检查MySQL数据,然后根据检查结果进行修复。 | |
| 282 | + | ... | ... |
| ... | ... | @@ -0,0 +1,177 @@ |
| 1 | +# 分面数据问题修复总结 | |
| 2 | + | |
| 3 | +## 问题现象 | |
| 4 | + | |
| 5 | +前端显示的分面结果都是空的: | |
| 6 | +- Category: 空 | |
| 7 | +- Color: 空 | |
| 8 | +- Size: 空 | |
| 9 | +- Material: 空 | |
| 10 | + | |
| 11 | +ES的聚合查询结果也是空的。 | |
| 12 | + | |
| 13 | +## 问题分析 | |
| 14 | + | |
| 15 | +### 数据流程 | |
| 16 | + | |
| 17 | +1. **数据生成**(csv_to_excel_multi_variant.py): | |
| 18 | + - 生成Excel文件,包含"专辑名称"(分类)和"款式1/2/3"(属性名称和值) | |
| 19 | + | |
| 20 | +2. **Excel导入店匠** → MySQL: | |
| 21 | + - "专辑名称" → 可能映射到 `category` 或 `category_path` 字段 | |
| 22 | + - "款式1/2/3"(M行)→ `shoplazza_product_option.name` | |
| 23 | + - "款式1/2/3"(P行)→ `shoplazza_product_sku.option1/2/3` | |
| 24 | + | |
| 25 | +3. **MySQL → ES转换**(spu_transformer.py): | |
| 26 | + - `category1_name` 从 `category_path` 解析 | |
| 27 | + - `specifications` 从 `option表.name` + `sku表.option1/2/3` 构建 | |
| 28 | + | |
| 29 | +### 根本原因 | |
| 30 | + | |
| 31 | +1. **category1_name 为空**: | |
| 32 | + - MySQL的`category_path`字段可能为空 | |
| 33 | + - Excel的"专辑名称"可能被映射到`category`字段而不是`category_path` | |
| 34 | + - 原代码只从`category_path`解析,如果为空则`category1_name`不会被设置 | |
| 35 | + | |
| 36 | +2. **specifications 为空**: | |
| 37 | + - `shoplazza_product_option`表可能没有数据 | |
| 38 | + - 或`name`字段值不是英文(不是"color"、"size"、"material") | |
| 39 | + | |
| 40 | +## 已实施的修复 | |
| 41 | + | |
| 42 | +### 修复1:支持从category字段生成category1_name | |
| 43 | + | |
| 44 | +**文件**: `indexer/spu_transformer.py` | |
| 45 | + | |
| 46 | +**修改内容**: | |
| 47 | +- 如果`category_path`为空,使用`category`字段作为备选 | |
| 48 | +- 从`category`字段解析多级分类(如果包含"/") | |
| 49 | +- 如果`category`不包含"/",直接作为`category1_name` | |
| 50 | + | |
| 51 | +**代码位置**:第241-259行 | |
| 52 | + | |
| 53 | +```python | |
| 54 | +elif pd.notna(spu_row.get('category')): | |
| 55 | + # 如果category_path为空,使用category字段作为category1_name的备选 | |
| 56 | + category = str(spu_row['category']) | |
| 57 | + doc['category_name_zh'] = category | |
| 58 | + doc['category_name_en'] = None | |
| 59 | + doc['category_name'] = category | |
| 60 | + | |
| 61 | + # 尝试从category字段解析多级分类 | |
| 62 | + if '/' in category: | |
| 63 | + path_parts = category.split('/') | |
| 64 | + if len(path_parts) > 0: | |
| 65 | + doc['category1_name'] = path_parts[0].strip() | |
| 66 | + if len(path_parts) > 1: | |
| 67 | + doc['category2_name'] = path_parts[1].strip() | |
| 68 | + if len(path_parts) > 2: | |
| 69 | + doc['category3_name'] = path_parts[2].strip() | |
| 70 | + else: | |
| 71 | + # 如果category不包含"/",直接作为category1_name | |
| 72 | + doc['category1_name'] = category.strip() | |
| 73 | +``` | |
| 74 | + | |
| 75 | +## 诊断工具 | |
| 76 | + | |
| 77 | +已创建诊断脚本:`scripts/check_data_source.py` | |
| 78 | + | |
| 79 | +**使用方法**: | |
| 80 | +```bash | |
| 81 | +cd /home/tw/SearchEngine | |
| 82 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 83 | +conda activate searchengine | |
| 84 | +python scripts/check_data_source.py \ | |
| 85 | + --tenant-id 162 \ | |
| 86 | + --db-host <mysql_host> \ | |
| 87 | + --db-port 3316 \ | |
| 88 | + --db-database saas \ | |
| 89 | + --db-username saas \ | |
| 90 | + --db-password <password> | |
| 91 | +``` | |
| 92 | + | |
| 93 | +**检查内容**: | |
| 94 | +1. SPU汇总信息 | |
| 95 | +2. category_path 字段是否有值 | |
| 96 | +3. option 表的 name 字段值 | |
| 97 | +4. SKU 表的 option1/2/3 字段值 | |
| 98 | + | |
| 99 | +## 下一步操作 | |
| 100 | + | |
| 101 | +### 步骤1:运行诊断脚本检查MySQL数据 | |
| 102 | + | |
| 103 | +```bash | |
| 104 | +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ... | |
| 105 | +``` | |
| 106 | + | |
| 107 | +### 步骤2:根据检查结果修复数据 | |
| 108 | + | |
| 109 | +#### 如果 option 表的 name 值不对: | |
| 110 | + | |
| 111 | +检查option表的name字段值: | |
| 112 | +```sql | |
| 113 | +SELECT DISTINCT name, position | |
| 114 | +FROM shoplazza_product_option | |
| 115 | +WHERE tenant_id = 162 AND deleted = 0 | |
| 116 | +ORDER BY position; | |
| 117 | +``` | |
| 118 | + | |
| 119 | +如果需要,更新为英文: | |
| 120 | +- position=1 的 name 应该是 "color" | |
| 121 | +- position=2 的 name 应该是 "size" | |
| 122 | +- position=3 的 name 应该是 "material" | |
| 123 | + | |
| 124 | +### 步骤3:重新导入数据到ES | |
| 125 | + | |
| 126 | +```bash | |
| 127 | +python scripts/recreate_and_import.py \ | |
| 128 | + --tenant-id 162 \ | |
| 129 | + --db-host <host> \ | |
| 130 | + --db-database saas \ | |
| 131 | + --db-username saas \ | |
| 132 | + --db-password <password> \ | |
| 133 | + --es-host http://localhost:9200 | |
| 134 | +``` | |
| 135 | + | |
| 136 | +### 步骤4:验证ES数据 | |
| 137 | + | |
| 138 | +检查ES索引中的文档是否包含: | |
| 139 | +- `category1_name` 字段 | |
| 140 | +- `specifications` 字段(包含color、size、material) | |
| 141 | +- `option1_name`、`option2_name`、`option3_name` 字段 | |
| 142 | + | |
| 143 | +```bash | |
| 144 | +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d' | |
| 145 | +{ | |
| 146 | + "query": { | |
| 147 | + "term": { | |
| 148 | + "tenant_id": "162" | |
| 149 | + } | |
| 150 | + }, | |
| 151 | + "size": 1, | |
| 152 | + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name", "option2_name", "option3_name"] | |
| 153 | +}' | |
| 154 | +``` | |
| 155 | + | |
| 156 | +## 预期结果 | |
| 157 | + | |
| 158 | +修复后,ES文档应该包含: | |
| 159 | + | |
| 160 | +```json | |
| 161 | +{ | |
| 162 | + "spu_id": "123", | |
| 163 | + "title_zh": "商品标题", | |
| 164 | + "category1_name": "电子产品", | |
| 165 | + "specifications": [ | |
| 166 | + {"sku_id": "456", "name": "color", "value": "Red"}, | |
| 167 | + {"sku_id": "456", "name": "size", "value": "5"}, | |
| 168 | + {"sku_id": "456", "name": "material", "value": "塑料"} | |
| 169 | + ], | |
| 170 | + "option1_name": "color", | |
| 171 | + "option2_name": "size", | |
| 172 | + "option3_name": "material" | |
| 173 | +} | |
| 174 | +``` | |
| 175 | + | |
| 176 | +前端分面应该能正常显示分类和属性值。 | |
| 177 | + | ... | ... |
| ... | ... | @@ -0,0 +1,115 @@ |
| 1 | +# 分面问题最终诊断报告 | |
| 2 | + | |
| 3 | +## ES数据检查结果 | |
| 4 | + | |
| 5 | +根据ES索引检查结果: | |
| 6 | + | |
| 7 | +### ✅ specifications 分面有数据 | |
| 8 | +ES聚合查询显示: | |
| 9 | +- **specifications.color**: 有数据(Beige: 1226, Khaki: 1176, Red: 1168等) | |
| 10 | +- **specifications.size**: 有数据(1: 1234, 12: 1234等) | |
| 11 | +- **specifications.material**: 有数据(塑料英文包装: 17277等) | |
| 12 | + | |
| 13 | +**结论**:ES中确实有specifications数据,聚合查询能正常返回结果。 | |
| 14 | + | |
| 15 | +### ❌ category1_name 几乎都为空 | |
| 16 | +- 总文档数:10000 | |
| 17 | +- 有category1_name的文档:只有1个 | |
| 18 | +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称) | |
| 19 | + | |
| 20 | +**结论**:category1_name字段几乎都是空的,导致category分面为空。 | |
| 21 | + | |
| 22 | +## 问题根源分析 | |
| 23 | + | |
| 24 | +### 问题1:category1_name 为什么为空 | |
| 25 | + | |
| 26 | +**MySQL数据情况**(从诊断脚本结果): | |
| 27 | +- `category_path`字段:11253个SPU为空,只有1个有值 | |
| 28 | +- 该唯一值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是路径格式) | |
| 29 | + | |
| 30 | +**当前代码逻辑**(`spu_transformer.py`第228-240行): | |
| 31 | +```python | |
| 32 | +if pd.notna(spu_row.get('category_path')): | |
| 33 | + category_path = str(spu_row['category_path']) | |
| 34 | + # 直接按"/"分割,但ID列表格式是逗号分隔的 | |
| 35 | + path_parts = category_path.split('/') | |
| 36 | + # 如果category_path是ID列表,path_parts只有一个元素(整个ID列表) | |
| 37 | +``` | |
| 38 | + | |
| 39 | +**问题**: | |
| 40 | +1. 对于ID列表格式的`category_path`(如`593389466647815326,593389582007954165,593389582008019701`),按"/"分割后只有一个元素,会被错误地作为`category1_name` | |
| 41 | +2. 对于空的`category_path`,会进入`elif`分支,使用`category`字段作为备选 | |
| 42 | + | |
| 43 | +**需要检查**: | |
| 44 | +- MySQL的`category`字段是否有值?如果有值,应该能生成`category1_name` | |
| 45 | +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射 | |
| 46 | + | |
| 47 | +### 问题2:specifications 分面查询为什么为空 | |
| 48 | + | |
| 49 | +虽然ES聚合查询显示有数据,但前端显示为空,可能原因: | |
| 50 | + | |
| 51 | +1. **前端分面请求格式**: | |
| 52 | + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` | |
| 53 | + - ES构建的聚合名称:`specifications_color_facet`(注意:是下划线,不是点号) | |
| 54 | + - 字段匹配可能有问题 | |
| 55 | + | |
| 56 | +2. **ES聚合结果解析**: | |
| 57 | + - ES返回的聚合字段名:`specifications_color_facet` | |
| 58 | + - 前端期望的field:`specifications.color` | |
| 59 | + - `format_facets`函数需要正确匹配 | |
| 60 | + | |
| 61 | +## 具体数据说明 | |
| 62 | + | |
| 63 | +### MySQL数据情况 | |
| 64 | +- **总SPU数**:11254 | |
| 65 | +- **有category_path的SPU**:1个(值是ID列表格式) | |
| 66 | +- **有option定义的SPU**:886个 | |
| 67 | + - position=1, name='color': 885个 | |
| 68 | + - position=2, name='size': 885个 | |
| 69 | + - position=3, name='material': 885个 | |
| 70 | +- **总SKU数**:43109个 | |
| 71 | + | |
| 72 | +### ES数据情况 | |
| 73 | +- **specifications数据**:有数据,能够正常聚合 | |
| 74 | +- **category1_name数据**:几乎都是空的(只有1个,而且是ID列表格式) | |
| 75 | + | |
| 76 | +## 解决方案 | |
| 77 | + | |
| 78 | +### 立即执行的操作 | |
| 79 | + | |
| 80 | +1. **检查MySQL的category字段**: | |
| 81 | + - 运行诊断脚本检查`category`字段是否有值 | |
| 82 | + - 如果`category`有值,修复后的代码应该能生成`category1_name` | |
| 83 | + - 如果`category`也为空,需要检查Excel导入映射 | |
| 84 | + | |
| 85 | +2. **重新导入数据到ES**: | |
| 86 | + ```bash | |
| 87 | + python scripts/recreate_and_import.py \ | |
| 88 | + --tenant-id 162 \ | |
| 89 | + --db-host <host> \ | |
| 90 | + --db-database saas \ | |
| 91 | + --db-username saas \ | |
| 92 | + --db-password <password> \ | |
| 93 | + --es-host http://localhost:9200 | |
| 94 | + ``` | |
| 95 | + | |
| 96 | +3. **验证ES数据**: | |
| 97 | + - 检查`category1_name`字段是否有值 | |
| 98 | + - 检查`specifications`字段是否有数据 | |
| 99 | + | |
| 100 | +### 如果category字段也为空 | |
| 101 | + | |
| 102 | +需要检查Excel导入到店匠系统时,"专辑名称"字段是否正确映射到MySQL的`category`字段。 | |
| 103 | + | |
| 104 | +## 关键发现 | |
| 105 | + | |
| 106 | +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 | |
| 107 | +2. **category1_name几乎都是空的**:这是因为`category_path`为空,而且可能`category`字段也为空 | |
| 108 | +3. **需要从category字段生成category1_name**:代码已修复,但需要确保MySQL的`category`字段有值 | |
| 109 | + | |
| 110 | +## 下一步 | |
| 111 | + | |
| 112 | +1. 检查MySQL的`category`字段是否有值 | |
| 113 | +2. 如果有值,重新导入数据到ES | |
| 114 | +3. 如果也为空,需要检查Excel导入映射或修复数据 | |
| 115 | + | ... | ... |
| ... | ... | @@ -0,0 +1,203 @@ |
| 1 | +# 分面数据问题诊断和修复指南 | |
| 2 | + | |
| 3 | +## 问题现象 | |
| 4 | + | |
| 5 | +前端显示的分面结果都是空的: | |
| 6 | +- Category: 空 | |
| 7 | +- Color: 空 | |
| 8 | +- Size: 空 | |
| 9 | +- Material: 空 | |
| 10 | + | |
| 11 | +ES的聚合查询结果也是空的。 | |
| 12 | + | |
| 13 | +## 诊断结果分析 | |
| 14 | + | |
| 15 | +### MySQL数据情况 | |
| 16 | + | |
| 17 | +| 字段/表 | 有数据的数量 | 说明 | | |
| 18 | +|---------|-------------|------| | |
| 19 | +| 总SPU数 | 11254 | - | | |
| 20 | +| category_path有值 | 1个 | 该值是ID列表格式(不是路径格式) | | |
| 21 | +| category字段 | 需要检查 | 可能是空的 | | |
| 22 | +| option表记录 | 2658条 | 886个SPU有option定义 | | |
| 23 | +| position=1, name='color' | 885个SPU | ✅ 数量足够 | | |
| 24 | +| position=2, name='size' | 885个SPU | ✅ 数量足够 | | |
| 25 | +| position=3, name='material' | 885个SPU | ✅ 数量足够 | | |
| 26 | +| 总SKU数 | 43109 | option1/2/3字段需要检查 | | |
| 27 | + | |
| 28 | +### ES索引数据情况 | |
| 29 | + | |
| 30 | +| 字段 | 有数据的数量 | 说明 | | |
| 31 | +|------|-------------|------| | |
| 32 | +| 总文档数 | 10000 | - | | |
| 33 | +| category1_name有值 | 1个 | 该值是ID列表格式 ❌ | | |
| 34 | +| specifications聚合查询 | 有数据 | ✅ color/size/material都有数据 | | |
| 35 | + | |
| 36 | +## 问题根源 | |
| 37 | + | |
| 38 | +### 问题1:category1_name 几乎都为空 ❌ | |
| 39 | + | |
| 40 | +**原因分析**: | |
| 41 | + | |
| 42 | +1. **MySQL数据层面**: | |
| 43 | + - `category_path`字段几乎都是空的(只有1个,且是ID列表格式) | |
| 44 | + - 需要检查`category`字段是否有值 | |
| 45 | + | |
| 46 | +2. **数据转换层面**: | |
| 47 | + - 原代码只从`category_path`解析`category1_name` | |
| 48 | + - 如果`category_path`为空,`category1_name`不会被设置 | |
| 49 | + - ✅ **已修复**:如果`category_path`为空,使用`category`字段作为备选(`spu_transformer.py`第241-259行) | |
| 50 | + | |
| 51 | +3. **Excel导入映射**: | |
| 52 | + - Excel的"专辑名称"字段可能映射到MySQL的`category`字段 | |
| 53 | + - 需要确认映射关系 | |
| 54 | + | |
| 55 | +### 问题2:specifications分面查询无结果 | |
| 56 | + | |
| 57 | +**奇怪现象**: | |
| 58 | +- ES聚合查询(查询所有文档)显示有数据 | |
| 59 | +- 但前端显示为空 | |
| 60 | + | |
| 61 | +**可能原因**: | |
| 62 | +1. 前端搜索时有查询条件,过滤后没有匹配的文档 | |
| 63 | +2. 分面聚合构建或解析有问题 | |
| 64 | +3. tenant_id不匹配 | |
| 65 | + | |
| 66 | +## 数据流程分析 | |
| 67 | + | |
| 68 | +### 1. Excel生成阶段 | |
| 69 | + | |
| 70 | +**脚本**:`scripts/csv_to_excel_multi_variant.py` | |
| 71 | + | |
| 72 | +**生成的数据**: | |
| 73 | +- `'专辑名称': csv_data['categoryName']` - 从CSV的categoryName字段读取 | |
| 74 | +- `'款式1': 'color'`(M行主商品)- 选项名称 | |
| 75 | +- `'款式2': 'size'`(M行主商品)- 选项名称 | |
| 76 | +- `'款式3': 'material'`(M行主商品)- 选项名称 | |
| 77 | +- `'款式1': 'Red'`(P行子款式)- 选项值(从COLORS列表随机选择) | |
| 78 | +- `'款式2': '5'`(P行子款式)- 选项值(1-30随机选择) | |
| 79 | +- `'款式3': '塑料'`(P行子款式)- 选项值(从商品标题提取) | |
| 80 | + | |
| 81 | +### 2. Excel导入店匠 → MySQL | |
| 82 | + | |
| 83 | +**映射关系**(需要确认): | |
| 84 | +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category` 或 `category_path` | |
| 85 | +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` + `position` | |
| 86 | +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3` | |
| 87 | + | |
| 88 | +**当前情况**: | |
| 89 | +- ✅ option表有数据:885个SPU有color/size/material选项名称 | |
| 90 | +- ❓ category字段:需要检查是否有值 | |
| 91 | + | |
| 92 | +### 3. MySQL → ES转换 | |
| 93 | + | |
| 94 | +**代码逻辑**(`indexer/spu_transformer.py`): | |
| 95 | + | |
| 96 | +1. **category1_name生成**(第228-259行): | |
| 97 | + ```python | |
| 98 | + if pd.notna(spu_row.get('category_path')): | |
| 99 | + # 从category_path解析 | |
| 100 | + path_parts = category_path.split('/') | |
| 101 | + doc['category1_name'] = path_parts[0].strip() | |
| 102 | + elif pd.notna(spu_row.get('category')): | |
| 103 | + # 从category字段解析(已修复) | |
| 104 | + doc['category1_name'] = category.strip() | |
| 105 | + ``` | |
| 106 | + | |
| 107 | +2. **specifications生成**(第351-370行): | |
| 108 | + ```python | |
| 109 | + # 从option表获取name映射 | |
| 110 | + option_name_map = {position: name} | |
| 111 | + # 从SKU表获取option值 | |
| 112 | + if pd.notna(sku_row.get('option1')) and 1 in option_name_map: | |
| 113 | + specifications.append({ | |
| 114 | + 'name': option_name_map[1], # 'color' | |
| 115 | + 'value': str(sku_row['option1']) # 'Red' | |
| 116 | + }) | |
| 117 | + ``` | |
| 118 | + | |
| 119 | +## 解决方案 | |
| 120 | + | |
| 121 | +### 步骤1:检查MySQL的category字段 | |
| 122 | + | |
| 123 | +**运行更新后的诊断脚本**: | |
| 124 | +```bash | |
| 125 | +cd /home/tw/SearchEngine | |
| 126 | +source /home/tw/miniconda3/etc/profile.d/conda.sh | |
| 127 | +conda activate searchengine | |
| 128 | +python scripts/check_data_source.py --tenant-id 162 --db-host <host> ... | |
| 129 | +``` | |
| 130 | + | |
| 131 | +**关键检查**: | |
| 132 | +- `category`字段是否有值 | |
| 133 | +- 如果有值,值的格式是什么(是否包含"/") | |
| 134 | + | |
| 135 | +**如果category字段也为空**: | |
| 136 | +- 说明Excel导入时"专辑名称"没有正确映射到MySQL | |
| 137 | +- 需要检查店匠系统的字段映射配置 | |
| 138 | + | |
| 139 | +### 步骤2:重新导入数据到ES | |
| 140 | + | |
| 141 | +**修复代码后,必须重新导入数据才能生效**: | |
| 142 | +```bash | |
| 143 | +python scripts/recreate_and_import.py \ | |
| 144 | + --tenant-id 162 \ | |
| 145 | + --db-host <host> \ | |
| 146 | + --db-database saas \ | |
| 147 | + --db-username saas \ | |
| 148 | + --db-password <password> \ | |
| 149 | + --es-host http://localhost:9200 | |
| 150 | +``` | |
| 151 | + | |
| 152 | +### 步骤3:验证ES数据 | |
| 153 | + | |
| 154 | +**运行ES数据检查脚本**: | |
| 155 | +```bash | |
| 156 | +python scripts/check_es_data.py --tenant-id 162 | |
| 157 | +``` | |
| 158 | + | |
| 159 | +**检查内容**: | |
| 160 | +- `category1_name`字段是否有值 | |
| 161 | +- `specifications`字段是否有数据 | |
| 162 | +- 分面聚合查询是否有结果 | |
| 163 | + | |
| 164 | +## 预期结果 | |
| 165 | + | |
| 166 | +修复后,ES文档应该包含: | |
| 167 | + | |
| 168 | +```json | |
| 169 | +{ | |
| 170 | + "spu_id": "123", | |
| 171 | + "title_zh": "商品标题", | |
| 172 | + "category1_name": "电子产品", // 从category字段生成 | |
| 173 | + "specifications": [ | |
| 174 | + {"sku_id": "456", "name": "color", "value": "Red"}, | |
| 175 | + {"sku_id": "456", "name": "size", "value": "5"}, | |
| 176 | + {"sku_id": "456", "name": "material", "value": "塑料"} | |
| 177 | + ], | |
| 178 | + "option1_name": "color", | |
| 179 | + "option2_name": "size", | |
| 180 | + "option3_name": "material" | |
| 181 | +} | |
| 182 | +``` | |
| 183 | + | |
| 184 | +## 关键检查点 | |
| 185 | + | |
| 186 | +### 1. MySQL数据检查 | |
| 187 | + | |
| 188 | +- [ ] `category`字段是否有值 | |
| 189 | +- [ ] `category_path`字段是否为空 | |
| 190 | +- [ ] `option表`的`name`字段是否是英文(color/size/material) | |
| 191 | +- [ ] SKU表的`option1/2/3`字段是否有值 | |
| 192 | + | |
| 193 | +### 2. ES数据检查 | |
| 194 | + | |
| 195 | +- [ ] `category1_name`字段是否有值 | |
| 196 | +- [ ] `specifications`字段是否有数据 | |
| 197 | +- [ ] 分面聚合查询是否有结果 | |
| 198 | + | |
| 199 | +### 3. 数据导入验证 | |
| 200 | + | |
| 201 | +- [ ] 重新导入数据后,检查ES文档是否正确 | |
| 202 | +- [ ] 验证分面查询是否能正常返回结果 | |
| 203 | + | ... | ... |
docs/ES常用表达式.md renamed to docs/常用查询 - ES.md
| ... | ... | @@ -0,0 +1,254 @@ |
| 1 | +-- 查询今天入库的SPU和SKU商品数据 | |
| 2 | +-- 用于查询当天新增的商品信息 | |
| 3 | + | |
| 4 | +-- ====================================== | |
| 5 | +-- 1. 查询今天入库的SPU商品 | |
| 6 | +-- ====================================== | |
| 7 | + | |
| 8 | +-- 查询今天创建的SPU商品(SPU级别) | |
| 9 | +SELECT | |
| 10 | + spu.id AS spu_id, | |
| 11 | + spu.tenant_id, | |
| 12 | + spu.shop_id, | |
| 13 | + spu.shoplazza_id AS shoplazza_product_id, | |
| 14 | + spu.title AS product_title, | |
| 15 | + spu.description AS product_description, | |
| 16 | + spu.brief AS product_brief, | |
| 17 | + spu.vendor AS brand_name, | |
| 18 | + spu.category AS product_category, | |
| 19 | + spu.category_path AS category_path, | |
| 20 | + spu.handle AS product_handle, | |
| 21 | + spu.tags AS product_tags, | |
| 22 | + spu.published AS product_published, | |
| 23 | + spu.published_at AS publish_time, | |
| 24 | + spu.image_src AS main_image_url, | |
| 25 | + spu.image_width AS main_image_width, | |
| 26 | + spu.image_height AS main_image_height, | |
| 27 | + spu.create_time AS spu_create_time, | |
| 28 | + spu.update_time AS spu_update_time, | |
| 29 | + CASE | |
| 30 | + WHEN spu.deleted = 1 THEN '已删除' | |
| 31 | + ELSE '正常' | |
| 32 | + END AS spu_status | |
| 33 | +FROM shoplazza_product_spu spu | |
| 34 | +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期 | |
| 35 | + AND spu.deleted = 0 -- 未删除的商品 | |
| 36 | +ORDER BY spu.create_time DESC; | |
| 37 | + | |
| 38 | +-- ====================================== | |
| 39 | +-- 2. 查询今天入库的SKU商品 | |
| 40 | +-- ====================================== | |
| 41 | + | |
| 42 | +-- 查询今天创建的SKU商品(SKU级别) | |
| 43 | +SELECT | |
| 44 | + sku.id AS sku_id, | |
| 45 | + sku.tenant_id, | |
| 46 | + sku.shop_id, | |
| 47 | + sku.spu_id, | |
| 48 | + sku.shoplazza_id AS variant_id, | |
| 49 | + sku.shoplazza_product_id AS shoplazza_product_id, | |
| 50 | + sku.sku AS sku_code, | |
| 51 | + sku.title AS sku_title, | |
| 52 | + sku.price AS sku_price, | |
| 53 | + sku.compare_at_price AS compare_price, | |
| 54 | + sku.cost_price AS cost_price, | |
| 55 | + sku.inventory_quantity AS stock_quantity, | |
| 56 | + sku.weight AS product_weight, | |
| 57 | + sku.weight_unit AS weight_unit, | |
| 58 | + sku.option1 AS color_option, | |
| 59 | + sku.option2 AS size_option, | |
| 60 | + sku.option3 AS material_option, | |
| 61 | + sku.image_src AS sku_image_url, | |
| 62 | + sku.barcode AS barcode, | |
| 63 | + sku.position AS variant_position, | |
| 64 | + sku.create_time AS sku_create_time, | |
| 65 | + sku.update_time AS sku_update_time, | |
| 66 | + CASE | |
| 67 | + WHEN sku.deleted = 1 THEN '已删除' | |
| 68 | + ELSE '正常' | |
| 69 | + END AS sku_status | |
| 70 | +FROM shoplazza_product_sku sku | |
| 71 | +WHERE DATE(sku.create_time) = CURDATE() -- 今天的日期 | |
| 72 | + AND sku.deleted = 0 -- 未删除的商品 | |
| 73 | +ORDER BY sku.create_time DESC; | |
| 74 | + | |
| 75 | +-- ====================================== | |
| 76 | +-- 3. 关联查询今天入库的SPU及其对应的SKU | |
| 77 | +-- ====================================== | |
| 78 | + | |
| 79 | +-- 查询今天创建的SPU及其关联的SKU信息 | |
| 80 | +SELECT | |
| 81 | + spu.id AS spu_id, | |
| 82 | + spu.tenant_id, | |
| 83 | + spu.shop_id, | |
| 84 | + spu.shoplazza_id AS shoplazza_product_id, | |
| 85 | + spu.title AS product_title, | |
| 86 | + spu.vendor AS brand_name, | |
| 87 | + spu.tags AS product_tags, | |
| 88 | + spu.published AS product_published, | |
| 89 | + spu.create_time AS spu_create_time, | |
| 90 | + | |
| 91 | + -- 聚合SKU信息 | |
| 92 | + COUNT(sku.id) AS sku_count, | |
| 93 | + COALESCE(MIN(sku.price), 0) AS min_price, | |
| 94 | + COALESCE(MAX(sku.price), 0) AS max_price, | |
| 95 | + COALESCE(SUM(sku.inventory_quantity), 0) AS total_stock, | |
| 96 | + GROUP_CONCAT(DISTINCT sku.option1 ORDER BY sku.option1 SEPARATOR ', ') AS available_colors, | |
| 97 | + GROUP_CONCAT(DISTINCT sku.option2 ORDER BY sku.option2 SEPARATOR ', ') AS available_sizes, | |
| 98 | + GROUP_CONCAT(DISTINCT sku.option3 ORDER BY sku.option3 SEPARATOR ', ') AS available_materials | |
| 99 | + | |
| 100 | +FROM shoplazza_product_spu spu | |
| 101 | +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id | |
| 102 | + AND spu.tenant_id = sku.tenant_id | |
| 103 | + AND sku.deleted = 0 | |
| 104 | +WHERE DATE(spu.create_time) = CURDATE() -- 今天创建的SPU | |
| 105 | + AND spu.deleted = 0 -- 未删除的SPU | |
| 106 | +GROUP BY spu.id, spu.tenant_id, spu.shop_id, spu.shoplazza_id, | |
| 107 | + spu.title, spu.vendor, spu.tags, spu.published, spu.create_time | |
| 108 | +ORDER BY spu.create_time DESC; | |
| 109 | + | |
| 110 | +-- ====================================== | |
| 111 | +-- 4. 查询今天入库商品的数量统计 | |
| 112 | +-- ====================================== | |
| 113 | + | |
| 114 | +-- 统计今天入库的商品数量 | |
| 115 | +SELECT | |
| 116 | + 'SPU商品' AS data_type, | |
| 117 | + COUNT(*) AS today_count, | |
| 118 | + DATE(CURDATE()) AS statistics_date | |
| 119 | +FROM shoplazza_product_spu | |
| 120 | +WHERE DATE(create_time) = CURDATE() | |
| 121 | + AND deleted = 0 | |
| 122 | + | |
| 123 | +UNION ALL | |
| 124 | + | |
| 125 | +SELECT | |
| 126 | + 'SKU商品' AS data_type, | |
| 127 | + COUNT(*) AS today_count, | |
| 128 | + DATE(CURDATE()) AS statistics_date | |
| 129 | +FROM shoplazza_product_sku | |
| 130 | +WHERE DATE(create_time) = CURDATE() | |
| 131 | + AND deleted = 0 | |
| 132 | + | |
| 133 | +UNION ALL | |
| 134 | + | |
| 135 | +SELECT | |
| 136 | + '活跃店铺' AS data_type, | |
| 137 | + COUNT(DISTINCT shop_id) AS today_count, | |
| 138 | + DATE(CURDATE()) AS statistics_date | |
| 139 | +FROM shoplazza_product_spu | |
| 140 | +WHERE DATE(create_time) = CURDATE() | |
| 141 | + AND deleted = 0 | |
| 142 | + | |
| 143 | +UNION ALL | |
| 144 | + | |
| 145 | +SELECT | |
| 146 | + '活跃租户' AS data_type, | |
| 147 | + COUNT(DISTINCT tenant_id) AS today_count, | |
| 148 | + DATE(CURDATE()) AS statistics_date | |
| 149 | +FROM shoplazza_product_spu | |
| 150 | +WHERE DATE(create_time) = CURDATE() | |
| 151 | + AND deleted = 0; | |
| 152 | + | |
| 153 | +-- ====================================== | |
| 154 | +-- 5. 按租户统计今天入库的商品 | |
| 155 | +-- ====================================== | |
| 156 | + | |
| 157 | +-- 按租户统计今天入库的商品分布 | |
| 158 | +SELECT | |
| 159 | + spu.tenant_id, | |
| 160 | + COUNT(DISTINCT spu.id) AS spu_count, | |
| 161 | + COUNT(DISTINCT sku.id) AS sku_count, | |
| 162 | + COUNT(DISTINCT spu.shop_id) AS shop_count, | |
| 163 | + COALESCE(SUM(sku.inventory_quantity), 0) AS total_inventory, | |
| 164 | + COALESCE(AVG(sku.price), 0) AS avg_price | |
| 165 | +FROM shoplazza_product_spu spu | |
| 166 | +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id | |
| 167 | + AND spu.tenant_id = sku.tenant_id | |
| 168 | + AND sku.deleted = 0 | |
| 169 | +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期 | |
| 170 | + AND spu.deleted = 0 -- 未删除的SPU | |
| 171 | +GROUP BY spu.tenant_id | |
| 172 | +ORDER BY spu_count DESC; | |
| 173 | + | |
| 174 | +-- ====================================== | |
| 175 | +-- 6. 查询今天入库商品的图片信息 | |
| 176 | +-- ====================================== | |
| 177 | + | |
| 178 | +-- 查询今天入库商品的主图信息(从SPU表获取) | |
| 179 | +SELECT | |
| 180 | + spu.tenant_id, | |
| 181 | + spu.shop_id, | |
| 182 | + spu.shoplazza_id AS shoplazza_product_id, | |
| 183 | + spu.image_src AS image_url, | |
| 184 | + spu.image_width AS image_width, | |
| 185 | + spu.image_height AS image_height, | |
| 186 | + spu.image_path AS image_path, | |
| 187 | + spu.image_alt AS image_alt, | |
| 188 | + spu.create_time AS product_create_time, | |
| 189 | + CASE | |
| 190 | + WHEN spu.deleted = 1 THEN '已删除' | |
| 191 | + ELSE '正常' | |
| 192 | + END AS image_status | |
| 193 | +FROM shoplazza_product_spu spu | |
| 194 | +WHERE DATE(spu.create_time) = CURDATE() -- 今天入库的商品 | |
| 195 | + AND spu.deleted = 0 -- 未删除的商品 | |
| 196 | + AND spu.image_src IS NOT NULL -- 有图片的商品 | |
| 197 | +ORDER BY spu.tenant_id, spu.shop_id, spu.shoplazza_id; | |
| 198 | + | |
| 199 | +-- ====================================== | |
| 200 | +-- 7. 查询今天入库商品的详细信息(含图片) | |
| 201 | +-- ====================================== | |
| 202 | + | |
| 203 | +-- 完整的今天入库商品信息(包含图片) | |
| 204 | +SELECT | |
| 205 | + spu.id AS spu_id, | |
| 206 | + spu.tenant_id, | |
| 207 | + spu.shop_id, | |
| 208 | + spu.shoplazza_id AS shoplazza_product_id, | |
| 209 | + spu.title AS product_title, | |
| 210 | + spu.description AS product_description, | |
| 211 | + spu.brief AS product_brief, | |
| 212 | + spu.vendor AS brand_name, | |
| 213 | + spu.category AS product_category, | |
| 214 | + spu.category_path AS category_path, | |
| 215 | + spu.handle AS product_handle, | |
| 216 | + spu.tags AS product_tags, | |
| 217 | + spu.published AS product_published, | |
| 218 | + spu.published_at AS publish_time, | |
| 219 | + spu.create_time AS spu_create_time, | |
| 220 | + | |
| 221 | + -- SKU信息聚合 | |
| 222 | + COALESCE(sku_summary.sku_count, 0) AS variant_count, | |
| 223 | + COALESCE(sku_summary.min_price, 0) AS min_price, | |
| 224 | + COALESCE(sku_summary.max_price, 0) AS max_price, | |
| 225 | + COALESCE(sku_summary.total_stock, 0) AS total_inventory, | |
| 226 | + | |
| 227 | + -- 主图信息(从SPU表直接获取) | |
| 228 | + COALESCE(spu.image_src, '') AS main_image_url, | |
| 229 | + COALESCE(spu.image_width, 0) AS main_image_width, | |
| 230 | + COALESCE(spu.image_height, 0) AS main_image_height, | |
| 231 | + COALESCE(spu.image_path, '') AS main_image_path, | |
| 232 | + COALESCE(spu.image_alt, '') AS main_image_alt | |
| 233 | + | |
| 234 | +FROM shoplazza_product_spu spu | |
| 235 | + | |
| 236 | +-- 关联SKU统计信息 | |
| 237 | +LEFT JOIN ( | |
| 238 | + SELECT | |
| 239 | + spu_id, | |
| 240 | + tenant_id, | |
| 241 | + COUNT(*) AS sku_count, | |
| 242 | + MIN(price) AS min_price, | |
| 243 | + MAX(price) AS max_price, | |
| 244 | + SUM(inventory_quantity) AS total_stock | |
| 245 | + FROM shoplazza_product_sku | |
| 246 | + WHERE DATE(create_time) = CURDATE() -- 今天的SKU | |
| 247 | + AND deleted = 0 | |
| 248 | + GROUP BY spu_id, tenant_id | |
| 249 | +) sku_summary ON spu.id = sku_summary.spu_id | |
| 250 | + AND spu.tenant_id = sku_summary.tenant_id | |
| 251 | + | |
| 252 | +WHERE DATE(spu.create_time) = CURDATE() -- 今天的SPU | |
| 253 | + AND spu.deleted = 0 -- 未删除的SPU | |
| 254 | +ORDER BY spu.create_time DESC; | |
| 0 | 255 | \ No newline at end of file | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -27,7 +27,7 @@ |
| 27 | 27 | ```bash |
| 28 | 28 | curl -X POST "http://120.76.41.98:6002/search/" \ |
| 29 | 29 | -H "Content-Type: application/json" \ |
| 30 | - -H "X-Tenant-ID: 2" \ | |
| 30 | + -H "X-Tenant-ID: 162" \ | |
| 31 | 31 | -d '{"query": "芭比娃娃"}' |
| 32 | 32 | ``` |
| 33 | 33 | |
| ... | ... | @@ -36,7 +36,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 36 | 36 | ```bash |
| 37 | 37 | curl -X POST "http://120.76.41.98:6002/search/" \ |
| 38 | 38 | -H "Content-Type: application/json" \ |
| 39 | - -H "X-Tenant-ID: 2" \ | |
| 39 | + -H "X-Tenant-ID: 162" \ | |
| 40 | 40 | -d '{ |
| 41 | 41 | "query": "芭比娃娃", |
| 42 | 42 | "size": 5, |
| ... | ... | @@ -60,11 +60,10 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 60 | 60 | ```bash |
| 61 | 61 | curl -X POST "http://120.76.41.98:6002/search/" \ |
| 62 | 62 | -H "Content-Type: application/json" \ |
| 63 | - -H "X-Tenant-ID: 2" \ | |
| 63 | + -H "X-Tenant-ID: 162" \ | |
| 64 | 64 | -d '{ |
| 65 | - "tenant_id": "demo-tenant", | |
| 66 | 65 | "query": "芭比娃娃", |
| 67 | - "facets": ["category.keyword", "specifications.color", "specifications.size"], | |
| 66 | + "facets": ["category1_name", "specifications.color", "specifications.size", "specifications.material"], | |
| 68 | 67 | "min_score": 0.2 |
| 69 | 68 | }' |
| 70 | 69 | ``` |
| ... | ... | @@ -291,10 +290,10 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 291 | 290 | **模式2:指定规格名称的分面** (`"specifications.color"`): |
| 292 | 291 | ```json |
| 293 | 292 | { |
| 294 | - "facets": ["specifications.color", "specifications.size"] | |
| 293 | + "facets": ["specifications.color", "specifications.size", "specifications.material"] | |
| 295 | 294 | } |
| 296 | 295 | ``` |
| 297 | -只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size")。 | |
| 296 | +只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size"、"material")。 | |
| 298 | 297 | |
| 299 | 298 | **返回格式示例**: |
| 300 | 299 | ```json |
| ... | ... | @@ -879,9 +878,9 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" |
| 879 | 878 | { |
| 880 | 879 | "id": "12345", |
| 881 | 880 | "source": { |
| 882 | - "title": "芭比时尚娃娃", | |
| 881 | + "title_zh": "芭比时尚娃娃", | |
| 883 | 882 | "min_price": 89.99, |
| 884 | - "category.keyword": "玩具" | |
| 883 | + "category1_name": "玩具" | |
| 885 | 884 | } |
| 886 | 885 | } |
| 887 | 886 | ``` | ... | ... |
docs/搜索API速查表.md
| ... | ... | @@ -60,7 +60,7 @@ POST /search/ |
| 60 | 60 | ```bash |
| 61 | 61 | { |
| 62 | 62 | "range_filters": { |
| 63 | - "price": { | |
| 63 | + "min_price": { | |
| 64 | 64 | "gte": 50, // >= |
| 65 | 65 | "lte": 200 // <= |
| 66 | 66 | } |
| ... | ... | @@ -94,7 +94,7 @@ POST /search/ |
| 94 | 94 | **指定规格名称**: |
| 95 | 95 | ```bash |
| 96 | 96 | { |
| 97 | - "facets": ["specifications.color", "specifications.size"] // 只返回指定name的value列表 | |
| 97 | + "facets": ["specifications.color", "specifications.size", "specifications.material"] // 只返回指定name的value列表 | |
| 98 | 98 | } |
| 99 | 99 | ``` |
| 100 | 100 | |
| ... | ... | @@ -114,7 +114,8 @@ POST /search/ |
| 114 | 114 | }, |
| 115 | 115 | "specifications", // 所有规格名称 |
| 116 | 116 | "specifications.color", // 指定规格名称 |
| 117 | - "specifications.size" | |
| 117 | + "specifications.size", | |
| 118 | + "specifications.material" | |
| 118 | 119 | ] |
| 119 | 120 | } |
| 120 | 121 | ``` |
| ... | ... | @@ -190,7 +191,7 @@ POST /search/ |
| 190 | 191 | |
| 191 | 192 | ```bash |
| 192 | 193 | POST /search/ |
| 193 | -Headers: X-Tenant-ID: 2 | |
| 194 | +Headers: X-Tenant-ID: 162 | |
| 194 | 195 | { |
| 195 | 196 | "query": "手机", |
| 196 | 197 | "size": 20, | ... | ... |
docs/索引字段说明v2-参考表结构.md
| 1 | -spu表全部字段 | |
| 1 | +spu表 shoplazza_product_spu 全部字段 | |
| 2 | 2 | "Field" "Type" "Null" "Key" "Default" "Extra" |
| 3 | 3 | "id" "bigint(20)" "NO" "PRI" "auto_increment" |
| 4 | 4 | "shop_id" "bigint(20)" "NO" "MUL" "" |
| ... | ... | @@ -46,7 +46,7 @@ spu表全部字段 |
| 46 | 46 | "update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" |
| 47 | 47 | "deleted" "bit(1)" "NO" "" "b'0'" "" |
| 48 | 48 | |
| 49 | -sku全部字段 | |
| 49 | +shoplazza_product_sku 全部字段 | |
| 50 | 50 | "Field" "Type" "Null" "Key" "Default" "Extra" |
| 51 | 51 | "id" "bigint(20)" "NO" "PRI" "auto_increment" |
| 52 | 52 | "spu_id" "bigint(20)" "NO" "MUL" "" | ... | ... |
docs/索引字段说明v2.md
| ... | ... | @@ -124,7 +124,7 @@ |
| 124 | 124 | { |
| 125 | 125 | "query": "手机", |
| 126 | 126 | "filters": { |
| 127 | - "specifications": { | |
| 127 | + "specifications": { | |
| 128 | 128 | "name": "color", |
| 129 | 129 | "value": "white" |
| 130 | 130 | } |
| ... | ... | @@ -141,21 +141,21 @@ |
| 141 | 141 | {"name": "color", "value": "white"}, |
| 142 | 142 | {"name": "size", "value": "256GB"} |
| 143 | 143 | ] |
| 144 | - } | |
| 144 | + } | |
| 145 | 145 | } |
| 146 | 146 | ``` |
| 147 | 147 | |
| 148 | 148 | **ES 查询结构**(后端自动生成): |
| 149 | 149 | ```json |
| 150 | 150 | { |
| 151 | - "nested": { | |
| 152 | - "path": "specifications", | |
| 153 | - "query": { | |
| 154 | - "bool": { | |
| 155 | - "must": [ | |
| 151 | + "nested": { | |
| 152 | + "path": "specifications", | |
| 153 | + "query": { | |
| 154 | + "bool": { | |
| 155 | + "must": [ | |
| 156 | 156 | { "term": { "specifications.name": "color" } }, |
| 157 | 157 | { "term": { "specifications.value": "white" } } |
| 158 | - ] | |
| 158 | + ] | |
| 159 | 159 | } |
| 160 | 160 | } |
| 161 | 161 | } |
| ... | ... | @@ -202,7 +202,7 @@ |
| 202 | 202 | } |
| 203 | 203 | } |
| 204 | 204 | ``` |
| 205 | - | |
| 205 | + | |
| 206 | 206 | 指定规格名称: |
| 207 | 207 | ```json |
| 208 | 208 | { |
| ... | ... | @@ -402,3 +402,4 @@ filters AND (text_recall OR embedding_recall) |
| 402 | 402 | 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择 |
| 403 | 403 | 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合 |
| 404 | 404 | 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端 |
| 405 | + | |
| 405 | 406 | \ No newline at end of file | ... | ... |
frontend/index.html
| ... | ... | @@ -32,22 +32,28 @@ |
| 32 | 32 | |
| 33 | 33 | <!-- Filter Section --> |
| 34 | 34 | <div class="filter-section" id="filterSection"> |
| 35 | - <!-- Category Filter --> | |
| 35 | + <!-- Category Filter (一级分类) --> | |
| 36 | 36 | <div class="filter-row"> |
| 37 | - <div class="filter-label">Categories:</div> | |
| 38 | - <div class="filter-tags" id="categoryTags"></div> | |
| 37 | + <div class="filter-label">Category:</div> | |
| 38 | + <div class="filter-tags" id="category1Tags"></div> | |
| 39 | 39 | </div> |
| 40 | 40 | |
| 41 | - <!-- Brand Filter --> | |
| 41 | + <!-- Color Filter --> | |
| 42 | 42 | <div class="filter-row"> |
| 43 | - <div class="filter-label">Brand:</div> | |
| 44 | - <div class="filter-tags" id="brandTags"></div> | |
| 43 | + <div class="filter-label">Color:</div> | |
| 44 | + <div class="filter-tags" id="colorTags"></div> | |
| 45 | 45 | </div> |
| 46 | 46 | |
| 47 | - <!-- Supplier Filter --> | |
| 47 | + <!-- Size Filter --> | |
| 48 | 48 | <div class="filter-row"> |
| 49 | - <div class="filter-label">Supplier:</div> | |
| 50 | - <div class="filter-tags" id="supplierTags"></div> | |
| 49 | + <div class="filter-label">Size:</div> | |
| 50 | + <div class="filter-tags" id="sizeTags"></div> | |
| 51 | + </div> | |
| 52 | + | |
| 53 | + <!-- Material Filter --> | |
| 54 | + <div class="filter-row"> | |
| 55 | + <div class="filter-label">Material:</div> | |
| 56 | + <div class="filter-tags" id="materialTags"></div> | |
| 51 | 57 | </div> |
| 52 | 58 | |
| 53 | 59 | <!-- Dropdown Filters --> |
| ... | ... | @@ -124,6 +130,6 @@ |
| 124 | 130 | <p>SearchEngine © 2025 | API: <span id="apiUrl">Loading...</span></p> |
| 125 | 131 | </footer> |
| 126 | 132 | |
| 127 | - <script src="/static/js/app.js?v=3.1"></script> | |
| 133 | + <script src="/static/js/app.js?v=3.2"></script> | |
| 128 | 134 | </body> |
| 129 | 135 | </html> | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -31,9 +31,6 @@ let state = { |
| 31 | 31 | |
| 32 | 32 | // Initialize |
| 33 | 33 | document.addEventListener('DOMContentLoaded', function() { |
| 34 | - console.log('SearchEngine loaded'); | |
| 35 | - console.log('Debug mode: always enabled (test frontend)'); | |
| 36 | - | |
| 37 | 34 | document.getElementById('searchInput').focus(); |
| 38 | 35 | }); |
| 39 | 36 | |
| ... | ... | @@ -71,33 +68,12 @@ async function performSearch(page = 1) { |
| 71 | 68 | |
| 72 | 69 | const from = (page - 1) * state.pageSize; |
| 73 | 70 | |
| 74 | - // Define facets (简化配置) | |
| 71 | + // Define facets (一级分类 + 三个属性分面) | |
| 75 | 72 | const facets = [ |
| 76 | - { | |
| 77 | - "field": "category.keyword", | |
| 78 | - "size": 15, | |
| 79 | - "type": "terms" | |
| 80 | - }, | |
| 81 | - { | |
| 82 | - "field": "vendor.keyword", | |
| 83 | - "size": 15, | |
| 84 | - "type": "terms" | |
| 85 | - }, | |
| 86 | - { | |
| 87 | - "field": "tags.keyword", | |
| 88 | - "size": 10, | |
| 89 | - "type": "terms" | |
| 90 | - }, | |
| 91 | - { | |
| 92 | - "field": "min_price", | |
| 93 | - "type": "range", | |
| 94 | - "ranges": [ | |
| 95 | - {"key": "0-50", "to": 50}, | |
| 96 | - {"key": "50-100", "from": 50, "to": 100}, | |
| 97 | - {"key": "100-200", "from": 100, "to": 200}, | |
| 98 | - {"key": "200+", "from": 200} | |
| 99 | - ] | |
| 100 | - } | |
| 73 | + "category1_name", // 一级分类 | |
| 74 | + "specifications.color", // 颜色属性 | |
| 75 | + "specifications.size", // 尺寸属性 | |
| 76 | + "specifications.material" // 材质属性 | |
| 101 | 77 | ]; |
| 102 | 78 | |
| 103 | 79 | // Show loading |
| ... | ... | @@ -210,43 +186,91 @@ function displayResults(data) { |
| 210 | 186 | grid.innerHTML = html; |
| 211 | 187 | } |
| 212 | 188 | |
| 213 | -// Display facets as filter tags (重构版 - 标准化格式) | |
| 189 | +// Display facets as filter tags (一级分类 + 三个属性分面) | |
| 214 | 190 | function displayFacets(facets) { |
| 215 | - if (!facets) return; | |
| 191 | + if (!facets || !Array.isArray(facets)) { | |
| 192 | + return; | |
| 193 | + } | |
| 216 | 194 | |
| 217 | - facets.forEach(facet => { | |
| 195 | + facets.forEach((facet) => { | |
| 218 | 196 | // 根据字段名找到对应的容器 |
| 219 | 197 | let containerId = null; |
| 220 | 198 | let maxDisplay = 10; |
| 221 | 199 | |
| 222 | - if (facet.field === 'category.keyword') { | |
| 223 | - containerId = 'categoryTags'; | |
| 200 | + // 一级分类 | |
| 201 | + if (facet.field === 'category1_name') { | |
| 202 | + containerId = 'category1Tags'; | |
| 224 | 203 | maxDisplay = 10; |
| 225 | - } else if (facet.field === 'vendor.keyword') { | |
| 226 | - containerId = 'brandTags'; | |
| 204 | + } | |
| 205 | + // 颜色属性分面 (specifications.color) | |
| 206 | + else if (facet.field === 'specifications.color') { | |
| 207 | + containerId = 'colorTags'; | |
| 208 | + maxDisplay = 10; | |
| 209 | + } | |
| 210 | + // 尺寸属性分面 (specifications.size) | |
| 211 | + else if (facet.field === 'specifications.size') { | |
| 212 | + containerId = 'sizeTags'; | |
| 213 | + maxDisplay = 10; | |
| 214 | + } | |
| 215 | + // 材质属性分面 (specifications.material) | |
| 216 | + else if (facet.field === 'specifications.material') { | |
| 217 | + containerId = 'materialTags'; | |
| 227 | 218 | maxDisplay = 10; |
| 228 | - } else if (facet.field === 'tags.keyword') { | |
| 229 | - containerId = 'supplierTags'; | |
| 230 | - maxDisplay = 8; | |
| 231 | 219 | } |
| 232 | 220 | |
| 233 | - if (!containerId) return; | |
| 221 | + if (!containerId) { | |
| 222 | + return; | |
| 223 | + } | |
| 234 | 224 | |
| 235 | 225 | const container = document.getElementById(containerId); |
| 236 | - if (!container) return; | |
| 226 | + if (!container) { | |
| 227 | + return; | |
| 228 | + } | |
| 229 | + | |
| 230 | + // 检查values是否存在且是数组 | |
| 231 | + if (!facet.values || !Array.isArray(facet.values) || facet.values.length === 0) { | |
| 232 | + container.innerHTML = ''; | |
| 233 | + return; | |
| 234 | + } | |
| 237 | 235 | |
| 238 | 236 | let html = ''; |
| 239 | 237 | |
| 240 | 238 | // 渲染分面值 |
| 241 | - facet.values.slice(0, maxDisplay).forEach(facetValue => { | |
| 239 | + facet.values.slice(0, maxDisplay).forEach((facetValue) => { | |
| 240 | + if (!facetValue || typeof facetValue !== 'object') { | |
| 241 | + return; | |
| 242 | + } | |
| 243 | + | |
| 242 | 244 | const value = facetValue.value; |
| 243 | 245 | const count = facetValue.count; |
| 244 | - const selected = facetValue.selected; | |
| 246 | + | |
| 247 | + // 允许value为0或空字符串,但不允许undefined/null | |
| 248 | + if (value === undefined || value === null) { | |
| 249 | + return; | |
| 250 | + } | |
| 251 | + | |
| 252 | + // 检查是否已选中 | |
| 253 | + let selected = false; | |
| 254 | + if (facet.field.startsWith('specifications.')) { | |
| 255 | + // 检查specifications过滤 | |
| 256 | + const specName = facet.field.split('.')[1]; | |
| 257 | + if (state.filters.specifications) { | |
| 258 | + const specs = Array.isArray(state.filters.specifications) | |
| 259 | + ? state.filters.specifications | |
| 260 | + : [state.filters.specifications]; | |
| 261 | + selected = specs.some(spec => spec && spec.name === specName && spec.value === value); | |
| 262 | + } | |
| 263 | + } else { | |
| 264 | + // 检查普通字段过滤 | |
| 265 | + if (state.filters[facet.field]) { | |
| 266 | + selected = state.filters[facet.field].includes(value); | |
| 267 | + } | |
| 268 | + } | |
| 245 | 269 | |
| 246 | 270 | html += ` |
| 247 | 271 | <span class="filter-tag ${selected ? 'active' : ''}" |
| 248 | - onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(value)}')"> | |
| 249 | - ${escapeHtml(value)} (${count}) | |
| 272 | + onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(String(value))}')"> | |
| 273 | + ${escapeHtml(String(value))} (${count || 0}) | |
| 250 | 274 | </span> |
| 251 | 275 | `; |
| 252 | 276 | }); |
| ... | ... | @@ -255,20 +279,56 @@ function displayFacets(facets) { |
| 255 | 279 | }); |
| 256 | 280 | } |
| 257 | 281 | |
| 258 | -// Toggle filter | |
| 282 | +// Toggle filter (支持specifications嵌套过滤) | |
| 259 | 283 | function toggleFilter(field, value) { |
| 260 | - if (!state.filters[field]) { | |
| 261 | - state.filters[field] = []; | |
| 262 | - } | |
| 263 | - | |
| 264 | - const index = state.filters[field].indexOf(value); | |
| 265 | - if (index > -1) { | |
| 266 | - state.filters[field].splice(index, 1); | |
| 267 | - if (state.filters[field].length === 0) { | |
| 268 | - delete state.filters[field]; | |
| 284 | + // 处理specifications属性过滤 (specifications.color, specifications.size, specifications.material) | |
| 285 | + if (field.startsWith('specifications.')) { | |
| 286 | + const specName = field.split('.')[1]; // 提取name (color, size, material) | |
| 287 | + | |
| 288 | + // 初始化specifications过滤 | |
| 289 | + if (!state.filters.specifications) { | |
| 290 | + state.filters.specifications = []; | |
| 291 | + } | |
| 292 | + | |
| 293 | + // 确保是数组格式 | |
| 294 | + if (!Array.isArray(state.filters.specifications)) { | |
| 295 | + // 如果已经是单个对象,转换为数组 | |
| 296 | + state.filters.specifications = [state.filters.specifications]; | |
| 297 | + } | |
| 298 | + | |
| 299 | + // 查找是否已存在相同的name和value组合 | |
| 300 | + const existingIndex = state.filters.specifications.findIndex( | |
| 301 | + spec => spec.name === specName && spec.value === value | |
| 302 | + ); | |
| 303 | + | |
| 304 | + if (existingIndex > -1) { | |
| 305 | + // 移除 | |
| 306 | + state.filters.specifications.splice(existingIndex, 1); | |
| 307 | + if (state.filters.specifications.length === 0) { | |
| 308 | + delete state.filters.specifications; | |
| 309 | + } else if (state.filters.specifications.length === 1) { | |
| 310 | + // 如果只剩一个,可以保持为数组,或转换为单个对象(API都支持) | |
| 311 | + // 这里保持为数组,更一致 | |
| 312 | + } | |
| 313 | + } else { | |
| 314 | + // 添加 | |
| 315 | + state.filters.specifications.push({ name: specName, value: value }); | |
| 269 | 316 | } |
| 270 | 317 | } else { |
| 271 | - state.filters[field].push(value); | |
| 318 | + // 处理普通字段过滤 (category1_name等) | |
| 319 | + if (!state.filters[field]) { | |
| 320 | + state.filters[field] = []; | |
| 321 | + } | |
| 322 | + | |
| 323 | + const index = state.filters[field].indexOf(value); | |
| 324 | + if (index > -1) { | |
| 325 | + state.filters[field].splice(index, 1); | |
| 326 | + if (state.filters[field].length === 0) { | |
| 327 | + delete state.filters[field]; | |
| 328 | + } | |
| 329 | + } else { | |
| 330 | + state.filters[field].push(value); | |
| 331 | + } | |
| 272 | 332 | } |
| 273 | 333 | |
| 274 | 334 | performSearch(1); // Reset to page 1 | ... | ... |
indexer/mapping_generator.py
| ... | ... | @@ -19,13 +19,13 @@ DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_produ |
| 19 | 19 | |
| 20 | 20 | |
| 21 | 21 | def load_mapping(mapping_file: str = None) -> Dict[str, Any]: |
| 22 | - """ | |
| 22 | + """ | |
| 23 | 23 | Load Elasticsearch mapping from JSON file. |
| 24 | 24 | |
| 25 | - Args: | |
| 25 | + Args: | |
| 26 | 26 | mapping_file: Path to mapping JSON file. If None, uses default. |
| 27 | 27 | |
| 28 | - Returns: | |
| 28 | + Returns: | |
| 29 | 29 | Dictionary containing index configuration (settings + mappings) |
| 30 | 30 | |
| 31 | 31 | Raises: |
| ... | ... | @@ -66,8 +66,8 @@ def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, An |
| 66 | 66 | mapping = load_mapping() |
| 67 | 67 | |
| 68 | 68 | if es_client.create_index(index_name, mapping): |
| 69 | - logger.info(f"Index '{index_name}' created successfully") | |
| 70 | - return True | |
| 69 | + logger.info(f"Index '{index_name}' created successfully") | |
| 70 | + return True | |
| 71 | 71 | else: |
| 72 | 72 | logger.error(f"Failed to create index '{index_name}'") |
| 73 | 73 | return False |
| ... | ... | @@ -89,8 +89,8 @@ def delete_index_if_exists(es_client, index_name: str) -> bool: |
| 89 | 89 | return False |
| 90 | 90 | |
| 91 | 91 | if es_client.delete_index(index_name): |
| 92 | - logger.info(f"Index '{index_name}' deleted successfully") | |
| 93 | - return True | |
| 92 | + logger.info(f"Index '{index_name}' deleted successfully") | |
| 93 | + return True | |
| 94 | 94 | else: |
| 95 | 95 | logger.error(f"Failed to delete index '{index_name}'") |
| 96 | 96 | return False |
| ... | ... | @@ -114,8 +114,8 @@ def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bo |
| 114 | 114 | |
| 115 | 115 | mapping = {"properties": new_fields} |
| 116 | 116 | if es_client.update_mapping(index_name, mapping): |
| 117 | - logger.info(f"Mapping updated for index '{index_name}'") | |
| 118 | - return True | |
| 117 | + logger.info(f"Mapping updated for index '{index_name}'") | |
| 118 | + return True | |
| 119 | 119 | else: |
| 120 | 120 | logger.error(f"Failed to update mapping for index '{index_name}'") |
| 121 | 121 | return False | ... | ... |
indexer/spu_transformer.py
| ... | ... | @@ -238,12 +238,35 @@ class SPUTransformer: |
| 238 | 238 | doc['category2_name'] = path_parts[1].strip() |
| 239 | 239 | if len(path_parts) > 2: |
| 240 | 240 | doc['category3_name'] = path_parts[2].strip() |
| 241 | + elif pd.notna(spu_row.get('category')): | |
| 242 | + # 如果category_path为空,使用category字段作为category1_name的备选 | |
| 243 | + category = str(spu_row['category']) | |
| 244 | + doc['category_name_zh'] = category | |
| 245 | + doc['category_name_en'] = None | |
| 246 | + doc['category_name'] = category | |
| 247 | + | |
| 248 | + # 尝试从category字段解析多级分类 | |
| 249 | + if '/' in category: | |
| 250 | + path_parts = category.split('/') | |
| 251 | + if len(path_parts) > 0: | |
| 252 | + doc['category1_name'] = path_parts[0].strip() | |
| 253 | + if len(path_parts) > 1: | |
| 254 | + doc['category2_name'] = path_parts[1].strip() | |
| 255 | + if len(path_parts) > 2: | |
| 256 | + doc['category3_name'] = path_parts[2].strip() | |
| 257 | + else: | |
| 258 | + # 如果category不包含"/",直接作为category1_name | |
| 259 | + doc['category1_name'] = category.strip() | |
| 241 | 260 | |
| 242 | 261 | if pd.notna(spu_row.get('category')): |
| 262 | + # 确保category相关字段都被设置(如果前面没有设置) | |
| 243 | 263 | category_name = str(spu_row['category']) |
| 244 | - doc['category_name_zh'] = category_name | |
| 245 | - doc['category_name_en'] = None | |
| 246 | - doc['category_name'] = category_name | |
| 264 | + if 'category_name_zh' not in doc: | |
| 265 | + doc['category_name_zh'] = category_name | |
| 266 | + if 'category_name_en' not in doc: | |
| 267 | + doc['category_name_en'] = None | |
| 268 | + if 'category_name' not in doc: | |
| 269 | + doc['category_name'] = category_name | |
| 247 | 270 | |
| 248 | 271 | if pd.notna(spu_row.get('category_id')): |
| 249 | 272 | doc['category_id'] = str(int(spu_row['category_id'])) |
| ... | ... | @@ -459,7 +482,7 @@ class SPUTransformer: |
| 459 | 482 | sku_data['option2_value'] = str(sku_row['option2']) |
| 460 | 483 | if pd.notna(sku_row.get('option3')): |
| 461 | 484 | sku_data['option3_value'] = str(sku_row['option3']) |
| 462 | - | |
| 485 | + | |
| 463 | 486 | # Image src |
| 464 | 487 | if pd.notna(sku_row.get('image_src')): |
| 465 | 488 | sku_data['image_src'] = str(sku_row['image_src']) | ... | ... |
| ... | ... | @@ -0,0 +1,301 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +诊断脚本:检查MySQL数据源中分类和规格信息是否正确 | |
| 4 | + | |
| 5 | +检查: | |
| 6 | +1. category_path 字段是否有值 | |
| 7 | +2. category_path 格式是否正确(应该能被解析为 category1_name) | |
| 8 | +3. shoplazza_product_option 表的 name 字段是否有值(应该是 "color", "size", "material") | |
| 9 | +4. shoplazza_product_sku 表的 option1/2/3 字段是否有值 | |
| 10 | +""" | |
| 11 | + | |
| 12 | +import sys | |
| 13 | +import argparse | |
| 14 | +from pathlib import Path | |
| 15 | +from sqlalchemy import create_engine, text | |
| 16 | + | |
| 17 | +# Add parent directory to path | |
| 18 | +sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| 19 | + | |
| 20 | +from utils.db_connector import create_db_connection | |
| 21 | + | |
| 22 | + | |
| 23 | +def check_category_path(db_engine, tenant_id: str): | |
| 24 | + """检查 category_path 和 category 字段""" | |
| 25 | + print("\n" + "="*60) | |
| 26 | + print("1. 检查 category_path 和 category 字段") | |
| 27 | + print("="*60) | |
| 28 | + | |
| 29 | + query = text(""" | |
| 30 | + SELECT | |
| 31 | + COUNT(*) as total, | |
| 32 | + COUNT(category_path) as has_category_path, | |
| 33 | + COUNT(*) - COUNT(category_path) as null_category_path, | |
| 34 | + COUNT(category) as has_category, | |
| 35 | + COUNT(*) - COUNT(category) as null_category | |
| 36 | + FROM shoplazza_product_spu | |
| 37 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 38 | + """) | |
| 39 | + | |
| 40 | + with db_engine.connect() as conn: | |
| 41 | + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() | |
| 42 | + total = result[0] | |
| 43 | + has_category_path = result[1] | |
| 44 | + null_category_path = result[2] | |
| 45 | + has_category = result[3] | |
| 46 | + null_category = result[4] | |
| 47 | + | |
| 48 | + print(f"总SPU数: {total}") | |
| 49 | + print(f"有 category_path 的SPU: {has_category_path}") | |
| 50 | + print(f"category_path 为空的SPU: {null_category_path}") | |
| 51 | + print(f"有 category 的SPU: {has_category}") | |
| 52 | + print(f"category 为空的SPU: {null_category}") | |
| 53 | + | |
| 54 | + # 查看category字段的示例 | |
| 55 | + if has_category > 0: | |
| 56 | + sample_query = text(""" | |
| 57 | + SELECT id, title, category_path, category, category_id, category_level | |
| 58 | + FROM shoplazza_product_spu | |
| 59 | + WHERE tenant_id = :tenant_id | |
| 60 | + AND deleted = 0 | |
| 61 | + AND category IS NOT NULL | |
| 62 | + LIMIT 5 | |
| 63 | + """) | |
| 64 | + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() | |
| 65 | + print(f"\n示例数据(前5条有 category 的记录):") | |
| 66 | + for row in samples: | |
| 67 | + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}") | |
| 68 | + print(f" category_path: {row[2]}") | |
| 69 | + print(f" category: '{row[3]}'") | |
| 70 | + print(f" category_id: {row[4]}, category_level: {row[5]}") | |
| 71 | + | |
| 72 | + # 解析 category 字段(用于生成 category1_name) | |
| 73 | + if row[3]: | |
| 74 | + category = str(row[3]) | |
| 75 | + if '/' in category: | |
| 76 | + path_parts = category.split('/') | |
| 77 | + print(f" 解析后(按'/'分割): {path_parts}") | |
| 78 | + if len(path_parts) > 0: | |
| 79 | + print(f" → category1_name: '{path_parts[0].strip()}'") | |
| 80 | + else: | |
| 81 | + print(f" → category1_name: '{category.strip()}'(直接作为category1_name)") | |
| 82 | + else: | |
| 83 | + print("\n⚠️ 警告: 没有SPU有 category 值!") | |
| 84 | + | |
| 85 | + # 查看category_path的示例(如果有) | |
| 86 | + if has_category_path > 0: | |
| 87 | + sample_query = text(""" | |
| 88 | + SELECT id, title, category_path, category | |
| 89 | + FROM shoplazza_product_spu | |
| 90 | + WHERE tenant_id = :tenant_id | |
| 91 | + AND deleted = 0 | |
| 92 | + AND category_path IS NOT NULL | |
| 93 | + LIMIT 3 | |
| 94 | + """) | |
| 95 | + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() | |
| 96 | + print(f"\n示例数据(有 category_path 的记录):") | |
| 97 | + for row in samples: | |
| 98 | + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}") | |
| 99 | + print(f" category_path: '{row[2]}'") | |
| 100 | + print(f" category: '{row[3]}'") | |
| 101 | + | |
| 102 | + # 检查是否是ID列表格式 | |
| 103 | + if row[2] and ',' in str(row[2]) and not '/' in str(row[2]): | |
| 104 | + print(f" ⚠️ 注意: category_path是ID列表格式(逗号分隔),不是路径格式") | |
| 105 | + | |
| 106 | + | |
| 107 | +def check_options(db_engine, tenant_id: str): | |
| 108 | + """检查 option 表的 name 字段""" | |
| 109 | + print("\n" + "="*60) | |
| 110 | + print("2. 检查 shoplazza_product_option 表的 name 字段") | |
| 111 | + print("="*60) | |
| 112 | + | |
| 113 | + query = text(""" | |
| 114 | + SELECT | |
| 115 | + COUNT(*) as total_options, | |
| 116 | + COUNT(DISTINCT name) as distinct_names, | |
| 117 | + COUNT(DISTINCT spu_id) as spus_with_options | |
| 118 | + FROM shoplazza_product_option | |
| 119 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 120 | + """) | |
| 121 | + | |
| 122 | + with db_engine.connect() as conn: | |
| 123 | + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() | |
| 124 | + total_options = result[0] | |
| 125 | + distinct_names = result[1] | |
| 126 | + spus_with_options = result[2] | |
| 127 | + | |
| 128 | + print(f"总 option 记录数: {total_options}") | |
| 129 | + print(f"不同的 name 数量: {distinct_names}") | |
| 130 | + print(f"有 option 定义的 SPU 数量: {spus_with_options}") | |
| 131 | + | |
| 132 | + if total_options > 0: | |
| 133 | + # 查看不同的 name 值 | |
| 134 | + name_query = text(""" | |
| 135 | + SELECT DISTINCT name, position, COUNT(*) as count | |
| 136 | + FROM shoplazza_product_option | |
| 137 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 138 | + GROUP BY name, position | |
| 139 | + ORDER BY position, name | |
| 140 | + """) | |
| 141 | + names = conn.execute(name_query, {"tenant_id": tenant_id}).fetchall() | |
| 142 | + print(f"\n不同的 name 值:") | |
| 143 | + for row in names: | |
| 144 | + print(f" position={row[1]}, name='{row[0]}', count={row[2]}") | |
| 145 | + | |
| 146 | + # 查看一些示例 | |
| 147 | + sample_query = text(""" | |
| 148 | + SELECT spu_id, position, name, `values` | |
| 149 | + FROM shoplazza_product_option | |
| 150 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 151 | + ORDER BY spu_id, position | |
| 152 | + LIMIT 10 | |
| 153 | + """) | |
| 154 | + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() | |
| 155 | + print(f"\n示例数据(前10条 option 记录):") | |
| 156 | + for row in samples: | |
| 157 | + print(f" SPU ID: {row[0]}, position: {row[1]}, name: '{row[2]}', values: {row[3]}") | |
| 158 | + else: | |
| 159 | + print("\n⚠️ 警告: 没有 option 记录!") | |
| 160 | + | |
| 161 | + | |
| 162 | +def check_sku_options(db_engine, tenant_id: str): | |
| 163 | + """检查 SKU 表的 option1/2/3 字段""" | |
| 164 | + print("\n" + "="*60) | |
| 165 | + print("3. 检查 shoplazza_product_sku 表的 option1/2/3 字段") | |
| 166 | + print("="*60) | |
| 167 | + | |
| 168 | + query = text(""" | |
| 169 | + SELECT | |
| 170 | + COUNT(*) as total_skus, | |
| 171 | + COUNT(option1) as has_option1, | |
| 172 | + COUNT(option2) as has_option2, | |
| 173 | + COUNT(option3) as has_option3, | |
| 174 | + COUNT(DISTINCT spu_id) as distinct_spus | |
| 175 | + FROM shoplazza_product_sku | |
| 176 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 177 | + """) | |
| 178 | + | |
| 179 | + with db_engine.connect() as conn: | |
| 180 | + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() | |
| 181 | + total_skus = result[0] | |
| 182 | + has_option1 = result[1] | |
| 183 | + has_option2 = result[2] | |
| 184 | + has_option3 = result[3] | |
| 185 | + distinct_spus = result[4] | |
| 186 | + | |
| 187 | + print(f"总 SKU 数: {total_skus}") | |
| 188 | + print(f"有 option1 的 SKU: {has_option1}") | |
| 189 | + print(f"有 option2 的 SKU: {has_option2}") | |
| 190 | + print(f"有 option3 的 SKU: {has_option3}") | |
| 191 | + print(f"不同的 SPU 数量: {distinct_spus}") | |
| 192 | + | |
| 193 | + if total_skus > 0: | |
| 194 | + # 查看一些示例 | |
| 195 | + sample_query = text(""" | |
| 196 | + SELECT spu_id, id, option1, option2, option3 | |
| 197 | + FROM shoplazza_product_sku | |
| 198 | + WHERE tenant_id = :tenant_id AND deleted = 0 | |
| 199 | + ORDER BY spu_id, id | |
| 200 | + LIMIT 10 | |
| 201 | + """) | |
| 202 | + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() | |
| 203 | + print(f"\n示例数据(前10条 SKU 记录):") | |
| 204 | + for row in samples: | |
| 205 | + print(f" SPU ID: {row[0]}, SKU ID: {row[1]}") | |
| 206 | + print(f" option1: '{row[2]}', option2: '{row[3]}', option3: '{row[4]}'") | |
| 207 | + else: | |
| 208 | + print("\n⚠️ 警告: 没有 SKU 记录!") | |
| 209 | + | |
| 210 | + | |
| 211 | +def check_spu_summary(db_engine, tenant_id: str): | |
| 212 | + """检查 SPU 汇总信息""" | |
| 213 | + print("\n" + "="*60) | |
| 214 | + print("4. SPU 汇总信息") | |
| 215 | + print("="*60) | |
| 216 | + | |
| 217 | + query = text(""" | |
| 218 | + SELECT | |
| 219 | + COUNT(DISTINCT spu.id) as total_spus, | |
| 220 | + COUNT(DISTINCT sku.id) as total_skus, | |
| 221 | + COUNT(DISTINCT opt.id) as total_options, | |
| 222 | + COUNT(DISTINCT CASE WHEN spu.category_path IS NOT NULL THEN spu.id END) as spus_with_category_path, | |
| 223 | + COUNT(DISTINCT opt.spu_id) as spus_with_options | |
| 224 | + FROM shoplazza_product_spu spu | |
| 225 | + LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id AND sku.tenant_id = :tenant_id AND sku.deleted = 0 | |
| 226 | + LEFT JOIN shoplazza_product_option opt ON spu.id = opt.spu_id AND opt.tenant_id = :tenant_id AND opt.deleted = 0 | |
| 227 | + WHERE spu.tenant_id = :tenant_id AND spu.deleted = 0 | |
| 228 | + """) | |
| 229 | + | |
| 230 | + with db_engine.connect() as conn: | |
| 231 | + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() | |
| 232 | + total_spus = result[0] | |
| 233 | + total_skus = result[1] | |
| 234 | + total_options = result[2] | |
| 235 | + spus_with_category_path = result[3] | |
| 236 | + spus_with_options = result[4] | |
| 237 | + | |
| 238 | + print(f"总 SPU 数: {total_spus}") | |
| 239 | + print(f"总 SKU 数: {total_skus}") | |
| 240 | + print(f"总 option 记录数: {total_options}") | |
| 241 | + print(f"有 category_path 的 SPU: {spus_with_category_path}") | |
| 242 | + print(f"有 option 定义的 SPU: {spus_with_options}") | |
| 243 | + | |
| 244 | + | |
| 245 | +def main(): | |
| 246 | + parser = argparse.ArgumentParser(description='检查MySQL数据源中的分类和规格信息') | |
| 247 | + parser.add_argument('--tenant-id', required=True, help='Tenant ID') | |
| 248 | + parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)') | |
| 249 | + parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)') | |
| 250 | + parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)') | |
| 251 | + parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)') | |
| 252 | + parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)') | |
| 253 | + | |
| 254 | + args = parser.parse_args() | |
| 255 | + | |
| 256 | + # 连接数据库 | |
| 257 | + import os | |
| 258 | + db_host = args.db_host or os.environ.get('DB_HOST') | |
| 259 | + db_port = args.db_port or int(os.environ.get('DB_PORT', 3306)) | |
| 260 | + db_database = args.db_database or os.environ.get('DB_DATABASE') | |
| 261 | + db_username = args.db_username or os.environ.get('DB_USERNAME') | |
| 262 | + db_password = args.db_password or os.environ.get('DB_PASSWORD') | |
| 263 | + | |
| 264 | + if not all([db_host, db_database, db_username, db_password]): | |
| 265 | + print("错误: MySQL连接参数不完整") | |
| 266 | + print("请提供 --db-host, --db-database, --db-username, --db-password") | |
| 267 | + print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD") | |
| 268 | + return 1 | |
| 269 | + | |
| 270 | + print(f"连接MySQL: {db_host}:{db_port}/{db_database}") | |
| 271 | + print(f"Tenant ID: {args.tenant_id}") | |
| 272 | + | |
| 273 | + try: | |
| 274 | + db_engine = create_db_connection( | |
| 275 | + host=db_host, | |
| 276 | + port=db_port, | |
| 277 | + database=db_database, | |
| 278 | + username=db_username, | |
| 279 | + password=db_password | |
| 280 | + ) | |
| 281 | + print("✓ MySQL连接成功\n") | |
| 282 | + except Exception as e: | |
| 283 | + print(f"✗ 连接MySQL失败: {e}") | |
| 284 | + return 1 | |
| 285 | + | |
| 286 | + # 执行检查 | |
| 287 | + check_spu_summary(db_engine, args.tenant_id) | |
| 288 | + check_category_path(db_engine, args.tenant_id) | |
| 289 | + check_options(db_engine, args.tenant_id) | |
| 290 | + check_sku_options(db_engine, args.tenant_id) | |
| 291 | + | |
| 292 | + print("\n" + "="*60) | |
| 293 | + print("检查完成") | |
| 294 | + print("="*60) | |
| 295 | + | |
| 296 | + return 0 | |
| 297 | + | |
| 298 | + | |
| 299 | +if __name__ == '__main__': | |
| 300 | + sys.exit(main()) | |
| 301 | + | ... | ... |
| ... | ... | @@ -0,0 +1,266 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +检查ES索引中的实际数据,看分面字段是否有值 | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import sys | |
| 7 | +import os | |
| 8 | +import argparse | |
| 9 | +from pathlib import Path | |
| 10 | + | |
| 11 | +sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| 12 | + | |
| 13 | +from utils.es_client import ESClient | |
| 14 | + | |
| 15 | + | |
| 16 | +def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): | |
| 17 | + """检查ES中的分面相关字段""" | |
| 18 | + print("\n" + "="*60) | |
| 19 | + print("检查ES索引中的分面字段数据") | |
| 20 | + print("="*60) | |
| 21 | + | |
| 22 | + query = { | |
| 23 | + "query": { | |
| 24 | + "term": { | |
| 25 | + "tenant_id": tenant_id | |
| 26 | + } | |
| 27 | + }, | |
| 28 | + "size": size, | |
| 29 | + "_source": [ | |
| 30 | + "spu_id", | |
| 31 | + "title_zh", | |
| 32 | + "category1_name", | |
| 33 | + "category2_name", | |
| 34 | + "category3_name", | |
| 35 | + "category_name", | |
| 36 | + "category_path_zh", | |
| 37 | + "specifications", | |
| 38 | + "option1_name", | |
| 39 | + "option2_name", | |
| 40 | + "option3_name" | |
| 41 | + ] | |
| 42 | + } | |
| 43 | + | |
| 44 | + try: | |
| 45 | + response = es_client.client.search(index="search_products", body=query) | |
| 46 | + hits = response.get('hits', {}).get('hits', []) | |
| 47 | + total = response.get('hits', {}).get('total', {}).get('value', 0) | |
| 48 | + | |
| 49 | + print(f"\n总文档数: {total}") | |
| 50 | + print(f"检查前 {len(hits)} 个文档:\n") | |
| 51 | + | |
| 52 | + for i, hit in enumerate(hits, 1): | |
| 53 | + source = hit.get('_source', {}) | |
| 54 | + print(f"文档 {i}:") | |
| 55 | + print(f" spu_id: {source.get('spu_id')}") | |
| 56 | + print(f" title_zh: {source.get('title_zh', '')[:50]}") | |
| 57 | + print(f" category1_name: {source.get('category1_name')}") | |
| 58 | + print(f" category2_name: {source.get('category2_name')}") | |
| 59 | + print(f" category3_name: {source.get('category3_name')}") | |
| 60 | + print(f" category_name: {source.get('category_name')}") | |
| 61 | + print(f" category_path_zh: {source.get('category_path_zh')}") | |
| 62 | + print(f" option1_name: {source.get('option1_name')}") | |
| 63 | + print(f" option2_name: {source.get('option2_name')}") | |
| 64 | + print(f" option3_name: {source.get('option3_name')}") | |
| 65 | + | |
| 66 | + specs = source.get('specifications', []) | |
| 67 | + if specs: | |
| 68 | + print(f" specifications 数量: {len(specs)}") | |
| 69 | + # 显示前3个specifications | |
| 70 | + for spec in specs[:3]: | |
| 71 | + print(f" - name: {spec.get('name')}, value: {spec.get('value')}") | |
| 72 | + else: | |
| 73 | + print(f" specifications: 空") | |
| 74 | + print() | |
| 75 | + | |
| 76 | + except Exception as e: | |
| 77 | + print(f"错误: {e}") | |
| 78 | + import traceback | |
| 79 | + traceback.print_exc() | |
| 80 | + | |
| 81 | + | |
| 82 | +def check_facet_aggregations(es_client, tenant_id: str): | |
| 83 | + """检查分面聚合查询""" | |
| 84 | + print("\n" + "="*60) | |
| 85 | + print("检查分面聚合查询结果") | |
| 86 | + print("="*60) | |
| 87 | + | |
| 88 | + query = { | |
| 89 | + "query": { | |
| 90 | + "term": { | |
| 91 | + "tenant_id": tenant_id | |
| 92 | + } | |
| 93 | + }, | |
| 94 | + "size": 0, | |
| 95 | + "aggs": { | |
| 96 | + "category1_facet": { | |
| 97 | + "terms": { | |
| 98 | + "field": "category1_name", | |
| 99 | + "size": 10 | |
| 100 | + } | |
| 101 | + }, | |
| 102 | + "color_facet": { | |
| 103 | + "nested": { | |
| 104 | + "path": "specifications" | |
| 105 | + }, | |
| 106 | + "aggs": { | |
| 107 | + "filter_by_name": { | |
| 108 | + "filter": { | |
| 109 | + "term": { | |
| 110 | + "specifications.name": "color" | |
| 111 | + } | |
| 112 | + }, | |
| 113 | + "aggs": { | |
| 114 | + "value_counts": { | |
| 115 | + "terms": { | |
| 116 | + "field": "specifications.value", | |
| 117 | + "size": 10 | |
| 118 | + } | |
| 119 | + } | |
| 120 | + } | |
| 121 | + } | |
| 122 | + } | |
| 123 | + }, | |
| 124 | + "size_facet": { | |
| 125 | + "nested": { | |
| 126 | + "path": "specifications" | |
| 127 | + }, | |
| 128 | + "aggs": { | |
| 129 | + "filter_by_name": { | |
| 130 | + "filter": { | |
| 131 | + "term": { | |
| 132 | + "specifications.name": "size" | |
| 133 | + } | |
| 134 | + }, | |
| 135 | + "aggs": { | |
| 136 | + "value_counts": { | |
| 137 | + "terms": { | |
| 138 | + "field": "specifications.value", | |
| 139 | + "size": 10 | |
| 140 | + } | |
| 141 | + } | |
| 142 | + } | |
| 143 | + } | |
| 144 | + } | |
| 145 | + }, | |
| 146 | + "material_facet": { | |
| 147 | + "nested": { | |
| 148 | + "path": "specifications" | |
| 149 | + }, | |
| 150 | + "aggs": { | |
| 151 | + "filter_by_name": { | |
| 152 | + "filter": { | |
| 153 | + "term": { | |
| 154 | + "specifications.name": "material" | |
| 155 | + } | |
| 156 | + }, | |
| 157 | + "aggs": { | |
| 158 | + "value_counts": { | |
| 159 | + "terms": { | |
| 160 | + "field": "specifications.value", | |
| 161 | + "size": 10 | |
| 162 | + } | |
| 163 | + } | |
| 164 | + } | |
| 165 | + } | |
| 166 | + } | |
| 167 | + } | |
| 168 | + } | |
| 169 | + } | |
| 170 | + | |
| 171 | + try: | |
| 172 | + response = es_client.client.search(index="search_products", body=query) | |
| 173 | + aggs = response.get('aggregations', {}) | |
| 174 | + | |
| 175 | + print("\n1. category1_name 分面:") | |
| 176 | + category1 = aggs.get('category1_facet', {}) | |
| 177 | + buckets = category1.get('buckets', []) | |
| 178 | + if buckets: | |
| 179 | + for bucket in buckets: | |
| 180 | + print(f" {bucket['key']}: {bucket['doc_count']}") | |
| 181 | + else: | |
| 182 | + print(" 空(没有数据)") | |
| 183 | + | |
| 184 | + print("\n2. specifications.color 分面:") | |
| 185 | + color_agg = aggs.get('color_facet', {}) | |
| 186 | + color_filter = color_agg.get('filter_by_name', {}) | |
| 187 | + color_values = color_filter.get('value_counts', {}) | |
| 188 | + color_buckets = color_values.get('buckets', []) | |
| 189 | + if color_buckets: | |
| 190 | + for bucket in color_buckets: | |
| 191 | + print(f" {bucket['key']}: {bucket['doc_count']}") | |
| 192 | + else: | |
| 193 | + print(" 空(没有数据)") | |
| 194 | + | |
| 195 | + print("\n3. specifications.size 分面:") | |
| 196 | + size_agg = aggs.get('size_facet', {}) | |
| 197 | + size_filter = size_agg.get('filter_by_name', {}) | |
| 198 | + size_values = size_filter.get('value_counts', {}) | |
| 199 | + size_buckets = size_values.get('buckets', []) | |
| 200 | + if size_buckets: | |
| 201 | + for bucket in size_buckets: | |
| 202 | + print(f" {bucket['key']}: {bucket['doc_count']}") | |
| 203 | + else: | |
| 204 | + print(" 空(没有数据)") | |
| 205 | + | |
| 206 | + print("\n4. specifications.material 分面:") | |
| 207 | + material_agg = aggs.get('material_facet', {}) | |
| 208 | + material_filter = material_agg.get('filter_by_name', {}) | |
| 209 | + material_values = material_filter.get('value_counts', {}) | |
| 210 | + material_buckets = material_values.get('buckets', []) | |
| 211 | + if material_buckets: | |
| 212 | + for bucket in material_buckets: | |
| 213 | + print(f" {bucket['key']}: {bucket['doc_count']}") | |
| 214 | + else: | |
| 215 | + print(" 空(没有数据)") | |
| 216 | + | |
| 217 | + except Exception as e: | |
| 218 | + print(f"错误: {e}") | |
| 219 | + import traceback | |
| 220 | + traceback.print_exc() | |
| 221 | + | |
| 222 | + | |
| 223 | +def main(): | |
| 224 | + parser = argparse.ArgumentParser(description='检查ES索引中的分面字段数据') | |
| 225 | + parser.add_argument('--tenant-id', required=True, help='Tenant ID') | |
| 226 | + parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)') | |
| 227 | + parser.add_argument('--size', type=int, default=5, help='检查的文档数量 (默认: 5)') | |
| 228 | + | |
| 229 | + args = parser.parse_args() | |
| 230 | + | |
| 231 | + # 连接ES | |
| 232 | + es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200') | |
| 233 | + es_username = os.environ.get('ES_USERNAME') | |
| 234 | + es_password = os.environ.get('ES_PASSWORD') | |
| 235 | + | |
| 236 | + print(f"连接Elasticsearch: {es_host}") | |
| 237 | + print(f"Tenant ID: {args.tenant_id}\n") | |
| 238 | + | |
| 239 | + try: | |
| 240 | + if es_username and es_password: | |
| 241 | + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) | |
| 242 | + else: | |
| 243 | + es_client = ESClient(hosts=[es_host]) | |
| 244 | + | |
| 245 | + if not es_client.ping(): | |
| 246 | + print(f"✗ 无法连接到Elasticsearch: {es_host}") | |
| 247 | + return 1 | |
| 248 | + print("✓ Elasticsearch连接成功\n") | |
| 249 | + except Exception as e: | |
| 250 | + print(f"✗ 连接Elasticsearch失败: {e}") | |
| 251 | + return 1 | |
| 252 | + | |
| 253 | + # 检查ES数据 | |
| 254 | + check_es_facet_fields(es_client, args.tenant_id, args.size) | |
| 255 | + check_facet_aggregations(es_client, args.tenant_id) | |
| 256 | + | |
| 257 | + print("\n" + "="*60) | |
| 258 | + print("检查完成") | |
| 259 | + print("="*60) | |
| 260 | + | |
| 261 | + return 0 | |
| 262 | + | |
| 263 | + | |
| 264 | +if __name__ == '__main__': | |
| 265 | + sys.exit(main()) | |
| 266 | + | ... | ... |
| ... | ... | @@ -0,0 +1,131 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +测试脚本:模拟前端请求,检查后端返回的分面结果 | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import sys | |
| 7 | +import json | |
| 8 | +import requests | |
| 9 | +import argparse | |
| 10 | +from pathlib import Path | |
| 11 | + | |
| 12 | +sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| 13 | + | |
| 14 | + | |
| 15 | +def main(): | |
| 16 | + parser = argparse.ArgumentParser(description='测试分面API') | |
| 17 | + parser.add_argument('--api-url', type=str, default='http://localhost:6002/search/', help='API URL') | |
| 18 | + parser.add_argument('--tenant-id', type=str, required=True, help='Tenant ID') | |
| 19 | + args = parser.parse_args() | |
| 20 | + | |
| 21 | + # 模拟前端的分面请求(与frontend/static/js/app.js一致) | |
| 22 | + request_data = { | |
| 23 | + "query": "", # 空查询,获取所有数据 | |
| 24 | + "size": 10, | |
| 25 | + "from": 0, | |
| 26 | + "facets": [ | |
| 27 | + "category1_name", | |
| 28 | + "specifications.color", | |
| 29 | + "specifications.size", | |
| 30 | + "specifications.material" | |
| 31 | + ] | |
| 32 | + } | |
| 33 | + | |
| 34 | + headers = { | |
| 35 | + "Content-Type": "application/json", | |
| 36 | + "X-Tenant-ID": args.tenant_id | |
| 37 | + } | |
| 38 | + | |
| 39 | + try: | |
| 40 | + print(f"发送请求到: {args.api_url}") | |
| 41 | + print(f"Tenant ID: {args.tenant_id}") | |
| 42 | + print(f"请求数据:") | |
| 43 | + print(json.dumps(request_data, indent=2, ensure_ascii=False)) | |
| 44 | + print("\n" + "="*60) | |
| 45 | + | |
| 46 | + response = requests.post(args.api_url, json=request_data, headers=headers, timeout=30) | |
| 47 | + | |
| 48 | + if response.status_code != 200: | |
| 49 | + print(f"API错误: {response.status_code}") | |
| 50 | + print(response.text) | |
| 51 | + return 1 | |
| 52 | + | |
| 53 | + data = response.json() | |
| 54 | + | |
| 55 | + print("API响应:") | |
| 56 | + print(f" 总结果数: {data.get('total', 0)}") | |
| 57 | + print(f" 返回结果数: {len(data.get('results', []))}") | |
| 58 | + | |
| 59 | + facets = data.get('facets', []) | |
| 60 | + print(f"\n分面数量: {len(facets)}") | |
| 61 | + | |
| 62 | + if not facets: | |
| 63 | + print("\n⚠ 分面列表为空!") | |
| 64 | + return 1 | |
| 65 | + | |
| 66 | + print("\n" + "="*60) | |
| 67 | + print("分面详情:") | |
| 68 | + print("="*60) | |
| 69 | + | |
| 70 | + for i, facet in enumerate(facets, 1): | |
| 71 | + print(f"\n{i}. {facet.get('field')}") | |
| 72 | + print(f" 标签: {facet.get('label')}") | |
| 73 | + print(f" 类型: {facet.get('type')}") | |
| 74 | + print(f" 值数量: {len(facet.get('values', []))}") | |
| 75 | + print(f" 总计数: {facet.get('total_count', 0)}") | |
| 76 | + | |
| 77 | + values = facet.get('values', []) | |
| 78 | + if values: | |
| 79 | + print(f" 前5个值:") | |
| 80 | + for v in values[:5]: | |
| 81 | + print(f" - {v.get('value')}: {v.get('count')}") | |
| 82 | + else: | |
| 83 | + print(f" ⚠ 值列表为空!") | |
| 84 | + | |
| 85 | + # 检查specifications.color分面 | |
| 86 | + print("\n" + "="*60) | |
| 87 | + print("检查specifications.color分面:") | |
| 88 | + print("="*60) | |
| 89 | + | |
| 90 | + color_facet = None | |
| 91 | + for facet in facets: | |
| 92 | + if facet.get('field') == 'specifications.color': | |
| 93 | + color_facet = facet | |
| 94 | + break | |
| 95 | + | |
| 96 | + if color_facet: | |
| 97 | + print("✓ 找到specifications.color分面") | |
| 98 | + print(f" 值数量: {len(color_facet.get('values', []))}") | |
| 99 | + if color_facet.get('values'): | |
| 100 | + print(" 前10个值:") | |
| 101 | + for v in color_facet.get('values', [])[:10]: | |
| 102 | + print(f" {v.get('value')}: {v.get('count')}") | |
| 103 | + else: | |
| 104 | + print(" ⚠ 值列表为空!") | |
| 105 | + else: | |
| 106 | + print("✗ 未找到specifications.color分面") | |
| 107 | + print(f" 可用分面字段: {[f.get('field') for f in facets]}") | |
| 108 | + | |
| 109 | + # 输出完整JSON(便于调试) | |
| 110 | + print("\n" + "="*60) | |
| 111 | + print("完整分面JSON(前500字符):") | |
| 112 | + print("="*60) | |
| 113 | + facets_json = json.dumps(facets, indent=2, ensure_ascii=False) | |
| 114 | + print(facets_json[:500]) | |
| 115 | + | |
| 116 | + except requests.exceptions.ConnectionError as e: | |
| 117 | + print(f"\n连接错误: 无法连接到API服务器 {args.api_url}") | |
| 118 | + print("请确保后端服务正在运行") | |
| 119 | + return 1 | |
| 120 | + except Exception as e: | |
| 121 | + print(f"\n错误: {e}") | |
| 122 | + import traceback | |
| 123 | + traceback.print_exc() | |
| 124 | + return 1 | |
| 125 | + | |
| 126 | + return 0 | |
| 127 | + | |
| 128 | + | |
| 129 | +if __name__ == '__main__': | |
| 130 | + sys.exit(main()) | |
| 131 | + | ... | ... |