diff --git a/.env b/.env index 24111f6..5f4d9ca 100644 --- a/.env +++ b/.env @@ -20,7 +20,7 @@ DB_HOST=120.79.247.228 DB_PORT=3316 DB_DATABASE=saas DB_USERNAME=saas -DB_PASSWORD=P89cZHS5d7dFyc9R +DB_PASSWORD=pcjY7iwX1C6le1oz # Model Directories TEXT_MODEL_DIR=/data/tw/models/bge-m3 # 已经改为web请求了,不使用本地模型 diff --git a/README.md b/README.md index 036025d..b28f0b2 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,31 @@ 一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 + ## 项目环境 source /home/tw/miniconda3/etc/profile.d/conda.sh conda activate searchengine +## 测试pipeline + +fake数据 生成商品导入数据 提交到店匠的店铺: +cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx + +自动同步到mysql +mysql到ES: + +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 + +构造查询: +参考 @ + + ## 核心能力速览 - **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL) diff --git a/api/result_formatter.py b/api/result_formatter.py index 790aac1..2300325 100644 --- a/api/result_formatter.py +++ b/api/result_formatter.py @@ -268,33 +268,38 @@ class ResultFormatter: facets.append(facet) continue - # 处理specifications嵌套分面(指定name) - if field_name.startswith("specifications_") and field_name.endswith("_facet") and 'filter_by_name' in agg_data: - # 提取name(从 "specifications_颜色_facet" 提取 "颜色") + # 处理specifications嵌套分面(指定name,如 specifications.color) + if field_name.startswith("specifications_") and field_name.endswith("_facet"): + # 提取name(从 "specifications_color_facet" 提取 "color") name = field_name[len("specifications_"):-len("_facet")] - filter_by_name_agg = agg_data.get('filter_by_name', {}) - value_counts = filter_by_name_agg.get('value_counts', {}) - values = [] - if 'buckets' in value_counts: - for value_bucket in value_counts['buckets']: - value = FacetValue( - value=value_bucket['key'], - label=str(value_bucket['key']), - count=value_bucket['doc_count'], - selected=False - ) - values.append(value) + # ES nested聚合返回结构: { "doc_count": N, "filter_by_name": { ... } } + # filter_by_name应该在agg_data的第一层 + filter_by_name_agg = agg_data.get('filter_by_name') - # 创建分面结果 - facet = FacetResult( - field=f"specifications.{name}", - label=str(name), - type="terms", - values=values, - total_count=filter_by_name_agg.get('doc_count', 0) - ) - facets.append(facet) + if filter_by_name_agg: + value_counts = filter_by_name_agg.get('value_counts', {}) + + values = [] + if 'buckets' in value_counts and value_counts['buckets']: + for value_bucket in value_counts['buckets']: + value = FacetValue( + value=value_bucket['key'], + label=str(value_bucket['key']), + count=value_bucket['doc_count'], + selected=False + ) + values.append(value) + + # 创建分面结果 + facet = FacetResult( + field=f"specifications.{name}", + label=str(name), + type="terms", + values=values, + total_count=filter_by_name_agg.get('doc_count', 0) + ) + facets.append(facet) continue # Handle terms aggregation diff --git a/database-config-analysis.md b/database-config-analysis.md new file mode 100644 index 0000000..8c619ce --- /dev/null +++ b/database-config-analysis.md @@ -0,0 +1,246 @@ +# 数据库配置分析与建议 + +## 当前配置分析 + +从提供的 YAML 配置文件中,发现以下情况: + +### 1. 缺失的数据库配置 + +**当前配置文件中缺少直接的数据源配置**,需要添加以下配置: + +```yaml +spring: + datasource: + # 主数据源配置 + master: + url: jdbc:mysql://localhost:3306/saas + username: saas + password: P89cZHS5d7dFyc9R + driver-class: com.mysql.cj.jdbc.Driver + type: com.zaxxer.hikari.HikariDataSource + hikari: + maximum-pool-size: 20 + minimum-idle: 5 + connection-timeout: 30000 + idle-timeout: 600000 + max-lifetime: 1800000 + + # 动态数据源配置(多租户支持) + dynamic: + enabled: true + primary: master + strict: false + datasource: + # 店匠生产数据库 + shoplazza: + url: jdbc:mysql://120.79.247.228:3316/saas + username: saas + password: P89cZHS5d7dFyc9R + driver-class: com.mysql.cj.jdbc.Driver + type: com.zaxxer.hikari.HikariDataSource +``` + +### 2. 当前已有的相关配置 + +#### Redis 配置 +```yaml +spring: + data: + redis: + host: 127.0.0.1 + port: 6379 + database: 0 + timeout: 5000ms + lettuce: + pool: + max-active: 200 + max-idle: 20 + min-idle: 5 + max-wait: -1ms +``` + +#### MyBatis Plus 配置 +```yaml +mybatis-plus: + configuration: + map-underscore-to-camel-case: true + global-config: + db-config: + id-type: NONE + logic-delete-value: 1 + logic-not-delete-value: 0 +``` + +### 3. 从项目结构推断的数据库配置 + +基于之前分析的项目文件,完整的数据库配置应该包括: + +#### 3.1 连接池配置 +```yaml +spring: + datasource: + master: + hikari: + # 连接池最大连接数 + maximum-pool-size: 20 + # 连接池最小空闲连接数 + minimum-idle: 5 + # 连接超时时间(毫秒) + connection-timeout: 30000 + # 空闲连接超时时间(毫秒) + idle-timeout: 600000 + # 连接最大生命周期(毫秒) + max-lifetime: 1800000 + # 连接测试查询 + connection-test-query: SELECT 1 +``` + +#### 3.2 多数据源配置 +```yaml +spring: + datasource: + dynamic: + enabled: true + primary: master + strict: false + datasource: + # 主库(读写) + master: + url: jdbc:mysql://120.79.247.228:3316/saas + username: saas + password: P89cZHS5d7dFyc9R + driver-class: com.mysql.cj.jdbc.Driver + + # 从库(只读)- 可选配置 + slave: + url: jdbc:mysql://slave-host:3306/saas + username: saas_readonly + password: readonly_password + driver-class: com.mysql.cj.jdbc.Driver +``` + +#### 3.3 JPA/Hibernate 配置 +```yaml +spring: + jpa: + show-sql: false + hibernate: + ddl-auto: none + naming: + physical-strategy: org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy + properties: + hibernate: + dialect: org.hibernate.dialect.MySQL8Dialect + format_sql: true + use_sql_comments: true + jdbc: + batch_size: 50 + order_inserts: true + order_updates: true +``` + +### 4. 环境配置建议 + +#### 开发环境(application-dev.yml) +```yaml +spring: + profiles: + active: dev + + datasource: + master: + url: jdbc:mysql://localhost:3306/saas_dev + username: root + password: root + driver-class: com.mysql.cj.jdbc.Driver + hikari: + maximum-pool-size: 10 + minimum-idle: 2 + connection-timeout: 30000 + +# 开发环境 SQL 输出 +logging: + level: + com.hsyl.saas.mapper: DEBUG + org.springframework.jdbc.core: DEBUG +``` + +#### 生产环境(application-prod.yml) +```yaml +spring: + profiles: + active: prod + + datasource: + master: + url: jdbc:mysql://120.79.247.228:3316/saas + username: saas + password: P89cZHS5d7dFyc9R + driver-class: com.mysql.cj.jdbc.Driver + hikari: + maximum-pool-size: 50 + minimum-idle: 10 + connection-timeout: 60000 + max-lifetime: 3600000 + +# 生产环境 SQL 监控 +management: + endpoints: + web: + exposure: + include: health,info,metrics,datasource +``` + +### 5. 数据库连接信息汇总 + +| 环境 | 主机 | 端口 | 数据库 | 用户名 | 密码 | +|------|------|------|--------|--------|------| +| 本地开发 | localhost | 3306 | saas | saas | P89cZHS5d7dFyc9R | +| 生产环境 | 120.79.247.228 | 3316 | saas | saas | P89cZHS5d7dFyc9R | + +### 6. 必需的依赖项 + +确保 `pom.xml` 或 `build.gradle` 包含以下依赖: + +```xml + + + mysql + mysql-connector-java + 8.0.33 + + + + + com.zaxxer + HikariCP + 5.0.1 + + + + + com.baomidou + dynamic-datasource-spring-boot-starter + 3.6.1 + +``` + +### 7. 测试数据库连接 + +```bash +# 测试本地数据库连接 +mysql -h localhost -P 3306 -u saas -pP89cZHS5d7dFyc9R saas + +# 测试生产数据库连接 +mysql -h 120.79.247.228 -P 3316 -u saas -pP89cZHS5d7dFyc9R saas +``` + +## 结论 + +当前配置文件缺少完整的数据库配置,需要补充: +1. **数据源连接信息**(URL、用户名、密码) +2. **连接池配置**(HikariCP 参数) +3. **多数据源配置**(如需多租户支持) +4. **JPA/Hibernate 配置**(数据库方言、DDL 策略等) + +建议根据环境(开发/测试/生产)分别配置不同的数据库连接参数。 \ No newline at end of file diff --git a/docs/ES常用表达式.md b/docs/ES常用表达式.md deleted file mode 100644 index 2c9c0fe..0000000 --- a/docs/ES常用表达式.md +++ /dev/null @@ -1,20 +0,0 @@ -GET /search_products/_search -{ - "query": { - "term": { - "tenant_id": "2" - } - } -} - - -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ - "size": 5, - "query": { - "bool": { - "filter": [ - { "term": { "tenant_id": "162" } } - ] - } - } - }' \ No newline at end of file diff --git a/docs/Search-API-Examples.md b/docs/Search-API-Examples.md index 3f0f7d0..c8efa67 100644 --- a/docs/Search-API-Examples.md +++ b/docs/Search-API-Examples.md @@ -23,7 +23,7 @@ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "芭比娃娃" }' @@ -49,7 +49,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -63,7 +63,7 @@ curl -X POST "http://localhost:6002/search/" \ # 第1页(0-19) curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -74,7 +74,7 @@ curl -X POST "http://localhost:6002/search/" \ # 第2页(20-39) curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -94,7 +94,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -109,7 +109,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -126,7 +126,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -144,7 +144,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -164,7 +164,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -184,7 +184,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -207,7 +207,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -227,7 +227,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -246,7 +246,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -265,7 +265,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -288,7 +288,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -318,7 +318,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -369,7 +369,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -384,7 +384,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -401,7 +401,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -425,7 +425,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -468,7 +468,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -497,7 +497,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -512,7 +512,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -527,7 +527,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -548,7 +548,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/image" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "image_url": "https://example.com/barbie.jpg", "size": 20 @@ -560,7 +560,7 @@ curl -X POST "http://localhost:6002/search/image" \ ```bash curl -X POST "http://localhost:6002/search/image" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "image_url": "https://example.com/barbie.jpg", "size": 20, @@ -584,7 +584,7 @@ curl -X POST "http://localhost:6002/search/image" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具 AND 乐高" }' @@ -597,7 +597,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "芭比 OR 娃娃" }' @@ -610,7 +610,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具 ANDNOT 电动" }' @@ -623,7 +623,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具 AND (乐高 OR 芭比) ANDNOT 电动" }' @@ -636,7 +636,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "brand:乐高" }' @@ -961,7 +961,7 @@ const SearchComponent = { ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1000,7 +1000,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1020,7 +1020,7 @@ curl -X POST "http://localhost:6002/search/" \ # 显示某个类目下的所有商品,按价格排序,提供品牌筛选 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "*", "filters": { @@ -1051,7 +1051,7 @@ curl -X POST "http://localhost:6002/search/" \ # 用户搜索关键词,提供筛选和排序(包含规格分面) curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1064,9 +1064,9 @@ curl -X POST "http://localhost:6002/search/" \ "field": "min_price", "type": "range", "ranges": [ - {"key": "0-50", "to": 50}, - {"key": "50-100", "from": 50, "to": 100}, - {"key": "100+", "from": 100} + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100}, + {"key": "100+", "from": 100} ] } ], @@ -1080,7 +1080,7 @@ curl -X POST "http://localhost:6002/search/" \ # 用户搜索并选择了规格筛选条件 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1106,7 +1106,7 @@ curl -X POST "http://localhost:6002/search/" \ # 显示特定价格区间的商品 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "*", "range_filters": { @@ -1128,7 +1128,7 @@ curl -X POST "http://localhost:6002/search/" \ # 最近更新的商品 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "*", "range_filters": { @@ -1152,7 +1152,7 @@ curl -X POST "http://localhost:6002/search/" \ # 错误:range_filters 缺少操作符 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1177,7 +1177,7 @@ curl -X POST "http://localhost:6002/search/" \ # 错误:query 为空 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "" }' @@ -1255,7 +1255,7 @@ curl -X POST "http://localhost:6002/search/" \ # 使用通配符查询 + 分面 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "*", "size": 0, @@ -1270,7 +1270,7 @@ curl -X POST "http://localhost:6002/search/" \ ```bash curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "手机", "language": "zh", @@ -1297,7 +1297,7 @@ curl -X POST "http://localhost:6002/search/" \ # 布尔表达式 + 过滤器 + 分面 + 排序 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "(玩具 OR 游戏) AND 儿童 ANDNOT 电子", "filters": { @@ -1326,19 +1326,19 @@ curl -X POST "http://localhost:6002/search/" \ # 测试类目:玩具 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{"query": "玩具", "size": 5}' # 测试品牌:乐高 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{"query": "brand:乐高", "size": 5}' # 测试布尔表达式 curl -X POST "http://localhost:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{"query": "玩具 AND 乐高", "size": 5}' ``` diff --git a/docs/Usage-Guide.md b/docs/Usage-Guide.md index b3cbb53..9aab5d5 100644 --- a/docs/Usage-Guide.md +++ b/docs/Usage-Guide.md @@ -271,7 +271,7 @@ curl http://localhost:6002/admin/stats ```bash curl -X POST http://localhost:6002/search/ \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具", "size": 10 @@ -294,7 +294,7 @@ curl -X POST "http://localhost:6002/search/?tenant_id=2" \ ```bash curl -X POST http://localhost:6002/search/ \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具", "size": 10, @@ -312,7 +312,7 @@ curl -X POST http://localhost:6002/search/ \ ```bash curl -X POST http://localhost:6002/search/ \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "玩具", "size": 10, @@ -328,7 +328,7 @@ curl -X POST http://localhost:6002/search/ \ ```bash curl -X POST http://localhost:6002/search/image \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "image_url": "https://oss.essa.cn/example.jpg", "size": 10 @@ -403,7 +403,7 @@ curl http://localhost:9200/search_products/_count # 检查tenant_id过滤是否正确 curl -X POST http://localhost:6002/search/ \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{"query": "*", "size": 10, "debug": true}' ``` diff --git a/docs/分面数据问题完整分析.md b/docs/分面数据问题完整分析.md new file mode 100644 index 0000000..9ee545a --- /dev/null +++ b/docs/分面数据问题完整分析.md @@ -0,0 +1,188 @@ +# 分面数据问题完整分析报告 + +## 问题现象 + +前端显示的分面结果都是空的: +- Category: 空 +- Color: 空 +- Size: 空 +- Material: 空 + +ES的聚合查询结果也是空的。 + +## 诊断结果分析 + +### MySQL数据检查结果 + +1. **category_path字段**: + - 总SPU数:11254 + - 有category_path的SPU:只有1个 + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,逗号分隔) + +2. **option表数据**: + - 总option记录数:2658 + - 有option定义的SPU数量:886个 + - **position=1, name='color'**: 885个SPU ✅ + - **position=2, name='size'**: 885个SPU ✅ + - **position=3, name='material'**: 885个SPU ✅ + +3. **SKU数据**: + - 总SKU数:43109 + - 应该有option1/2/3值 + +### ES数据检查结果 + +1. **category1_name字段**: + - 总文档数:10000 + - 有category1_name的文档:只有1个 + - 该值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式) + +2. **specifications字段**: + - ES聚合查询显示**有数据**: + - specifications.color: Beige: 1226, Khaki: 1176, Red: 1168等 + - specifications.size: 1: 1234, 12: 1234等 + - specifications.material: 塑料英文包装: 17277等 + +## 问题根源 + +### 问题1:category1_name 几乎都为空 ✅ 已找到原因 + +**原因**: +1. MySQL的`category_path`字段几乎都是空的(只有1个,而且是ID列表格式) +2. 当`category_path`为空时,代码会使用`category`字段作为备选(代码已修复) +3. 但需要检查MySQL的`category`字段是否有值 + +**数据流转**: +- Excel "专辑名称" → 店匠系统 → MySQL `category` 或 `category_path` 字段 +- 如果Excel导入时"专辑名称"没有正确映射,或者`category`字段也为空,就会导致`category1_name`为空 + +### 问题2:为什么specifications分面查询无结果 + +**ES聚合查询显示有数据**,但前端显示为空,可能原因: + +1. **前端搜索时有查询条件**: + - 如果有查询条件(如`query="手机"`),ES会先过滤文档 + - 过滤后的文档如果没有specifications数据,聚合结果就会为空 + - 但这不应该导致所有分面都为空 + +2. **分面聚合构建问题**: + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`等 + - 可能聚合构建或解析有问题 + +3. **tenant_id过滤问题**: + - 如果搜索时tenant_id不匹配,可能导致没有匹配的文档 + +## 需要检查的关键点 + +### 1. MySQL的category字段是否有值 + +**需要运行SQL查询**: +```sql +SELECT + COUNT(*) as total, + COUNT(category) as has_category, + COUNT(*) - COUNT(category) as null_category +FROM shoplazza_product_spu +WHERE tenant_id = 162 AND deleted = 0; +``` + +**如果category字段也为空**: +- 说明Excel导入时"专辑名称"字段没有正确映射到MySQL的`category`字段 +- 需要检查店匠系统的字段映射配置 + +### 2. SKU的option1/2/3字段是否有值 + +**需要运行SQL查询**: +```sql +SELECT + COUNT(*) as total_skus, + COUNT(option1) as has_option1, + COUNT(option2) as has_option2, + COUNT(option3) as has_option3 +FROM shoplazza_product_sku +WHERE tenant_id = 162 AND deleted = 0; +``` + +### 3. 检查ES聚合查询 + +**运行检查脚本**: +```bash +python scripts/check_es_data.py --tenant-id 162 +``` + +查看: +- 是否有category1_name数据 +- specifications聚合是否有数据 + +## 解决方案 + +### 方案1:修复category1_name字段生成(代码已修复) + +**已修复的代码**(`indexer/spu_transformer.py`第241-259行): +- 如果`category_path`为空,使用`category`字段作为备选 +- 从`category`字段解析多级分类 + +**但需要确保**: +1. MySQL的`category`字段有值 +2. 重新导入数据到ES + +### 方案2:检查并修复MySQL数据 + +如果MySQL的`category`字段也为空: + +1. **检查Excel导入映射**: + - 确认"专辑名称"字段是否正确映射到MySQL的`category`字段 + - 如果不正确,需要修复映射或重新导入 + +2. **如果category字段有值但category1_name仍为空**: + - 说明数据导入时使用的是旧代码 + - 需要重新导入数据到ES + +### 方案3:验证specifications分面查询 + +虽然ES聚合查询显示有数据,但需要验证: + +1. **检查前端搜索请求**: + - 确认分面请求是否正确发送 + - 确认tenant_id是否正确 + +2. **检查ES聚合结果解析**: + - 确认`format_facets`函数是否正确解析specifications分面 + - 确认字段名匹配是否正确(`specifications.color` vs `specifications_color_facet`) + +## 立即执行的操作 + +### 步骤1:检查MySQL的category字段 + +更新诊断脚本,添加category字段检查: +```bash +# 需要手动运行SQL或更新诊断脚本 +``` + +### 步骤2:重新导入数据到ES + +修复代码后,重新导入数据: +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤3:验证ES数据 + +运行ES数据检查脚本: +```bash +python scripts/check_es_data.py --tenant-id 162 +``` + +## 关键发现 + +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成 +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效 + diff --git a/docs/分面数据问题根源分析.md b/docs/分面数据问题根源分析.md new file mode 100644 index 0000000..88c48af --- /dev/null +++ b/docs/分面数据问题根源分析.md @@ -0,0 +1,125 @@ +# 分面数据问题根源分析 + +## ES数据检查结果 + +从ES索引数据检查结果可以看到: + +### 1. category1_name 分面问题 + +**检查结果**: +- 总文档数:10000 +- 有category1_name的文档:只有1个 +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称) + +**问题原因**: +- MySQL中`category_path`字段几乎都是空的(只有1个,而且那个是ID列表格式,不是路径格式如"服装/男装") +- MySQL中`category`字段可能也为空 +- 导致ES索引中的`category1_name`字段几乎都是空的 + +**解决方案**: +代码已修复(`indexer/spu_transformer.py`第241-259行),支持从`category`字段生成`category1_name`,但需要: +1. 确保MySQL的`category`字段有值 +2. 重新导入数据到ES + +### 2. specifications 分面问题 + +**检查结果**(从ES聚合查询): +- specifications.color 分面:有数据(Beige: 1226, Khaki: 1176等) +- specifications.size 分面:有数据(1: 1234, 12: 1234等) +- specifications.material 分面:有数据(塑料英文包装: 17277等) + +**说明**:ES中确实有specifications数据,而且聚合查询能正常返回结果。 + +## 问题根源 + +### 问题1:category1_name 几乎都为空 + +**MySQL数据情况**: +- `category_path` 字段:11253个SPU为空,只有1个有值(但那个值是ID列表格式) +- `category` 字段:需要检查是否有值 + +**ES索引情况**: +- `category1_name` 字段:几乎都是None +- 导致category分面查询结果为空 + +### 问题2:为什么specifications分面查询无结果 + +虽然ES聚合查询显示有数据,但前端显示为空,可能原因: + +1. **分面聚合结构不匹配**: + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` + - ES构建的聚合名称:`category1_name_facet`, `specifications_color_facet`, `specifications_size_facet`, `specifications_material_facet` + - 前端解析时的字段匹配可能有问题 + +2. **ES聚合结果格式**: + - specifications.color分面的聚合名称是`specifications_color_facet` + - 但前端期望的field是`specifications.color` + - 需要在`format_facets`中正确匹配 + +## 解决方案 + +### 方案1:修复category1_name字段(必需) + +**问题**:MySQL的`category_path`为空,需要从`category`字段生成 + +**已修复代码**(`indexer/spu_transformer.py`): +- 如果`category_path`为空,使用`category`字段作为备选 +- 从`category`字段解析多级分类(如果包含"/") +- 如果`category`不包含"/",直接作为`category1_name` + +**但需要**: +1. 检查MySQL的`category`字段是否有值 +2. 如果`category`也为空,需要检查Excel导入时"专辑名称"字段是否正确映射 + +### 方案2:验证specifications分面查询 + +虽然ES聚合查询显示有数据,但需要: +1. 检查前端是否正确发送分面请求 +2. 检查ES返回的聚合结果格式 +3. 检查`format_facets`函数是否正确解析specifications分面 + +## 下一步操作 + +### 步骤1:检查MySQL的category字段 + +```sql +SELECT + COUNT(*) as total, + COUNT(category) as has_category, + COUNT(*) - COUNT(category) as null_category +FROM shoplazza_product_spu +WHERE tenant_id = 162 AND deleted = 0; +``` + +### 步骤2:检查Excel导入映射 + +确认Excel的"专辑名称"字段是否正确映射到MySQL的`category`字段: +- 如果映射到`category`字段,应该有值 +- 如果映射到`category_path`字段,但值是ID列表格式,需要修复 + +### 步骤3:重新导入数据到ES + +修复后,重新导入数据: +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤4:验证ES数据 + +检查ES索引中的文档是否包含: +- `category1_name`字段(应该有值) +- `specifications`字段(应该已经有数据) + +## 关键发现 + +从ES检查结果看: +1. **specifications数据是有的**,ES聚合查询能正常返回color/size/material的分面数据 +2. **category1_name几乎都是空的**,这是导致category分面为空的原因 +3. **如果specifications分面也显示为空**,可能是前端解析ES聚合结果的问题,而不是ES数据的问题 + diff --git a/docs/分面数据问题根源和解决方案.md b/docs/分面数据问题根源和解决方案.md new file mode 100644 index 0000000..8a0b90f --- /dev/null +++ b/docs/分面数据问题根源和解决方案.md @@ -0,0 +1,180 @@ +# 分面数据问题根源和解决方案 + +## 📊 诊断结果总结 + +### MySQL数据情况 +- **总SPU数**:11254 +- **category_path字段**:只有1个有值(ID列表格式),11253个为空 +- **option表数据**: + - 有option定义的SPU:886个 + - position=1, name='color': 885个 ✅ + - position=2, name='size': 885个 ✅ + - position=3, name='material': 885个 ✅ + +### ES索引数据情况 +- **总文档数**:10000 +- **category1_name字段**:只有1个有值(ID列表格式),其他都是None ❌ +- **specifications聚合查询**:有数据 ✅ + - specifications.color: Beige: 1226, Khaki: 1176等 + - specifications.size: 1: 1234, 12: 1234等 + - specifications.material: 塑料英文包装: 17277等 + +## 🔍 问题根源 + +### 问题1:category1_name 几乎都为空 + +**数据流分析**: + +1. **Excel生成阶段**(`csv_to_excel_multi_variant.py`): + - Excel字段:`'专辑名称': csv_data['categoryName']` + - 从CSV的`categoryName`字段读取,应该有值 + +2. **Excel导入店匠 → MySQL**: + - Excel的"专辑名称"字段 → 可能映射到MySQL的`category`或`category_path`字段 + - **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path` + - 诊断结果显示:`category_path`几乎都是空的 + +3. **MySQL → ES转换**(`spu_transformer.py`): + - 原逻辑:只从`category_path`解析`category1_name` + - 如果`category_path`为空,`category1_name`不会被设置 + - **已修复**:如果`category_path`为空,使用`category`字段作为备选(第241-259行) + +**关键检查点**: +- MySQL的`category`字段是否有值? +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射 + +### 问题2:specifications分面查询无结果 + +**奇怪的现象**: +- ES聚合查询显示有数据(Beige: 1226, Khaki: 1176等) +- 但前端显示为空 + +**可能原因**: + +1. **前端搜索时有查询条件**: + - 如果搜索时添加了查询条件(如`query="手机"`),ES会先过滤文档 + - 过滤后的文档可能没有specifications数据,导致聚合结果为空 + - **需要验证**:不带查询条件的搜索,分面是否有数据 + +2. **分面聚合构建或解析问题**: + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` + - ES构建的聚合名称:`specifications_color_facet` + - 前端解析时的字段匹配:`specifications.color` + - **需要验证**:`format_facets`函数是否正确匹配 + +3. **tenant_id过滤问题**: + - 如果tenant_id不匹配,会导致没有匹配的文档 + +## ✅ 已实施的修复 + +### 修复1:支持从category字段生成category1_name + +**文件**:`indexer/spu_transformer.py`(第241-259行) + +**修改内容**: +```python +elif pd.notna(spu_row.get('category')): + # 如果category_path为空,使用category字段作为category1_name的备选 + category = str(spu_row['category']) + # 从category字段解析多级分类 + if '/' in category: + path_parts = category.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() + else: + # 直接作为category1_name + doc['category1_name'] = category.strip() +``` + +**说明**:如果MySQL的`category`字段有值,修复后的代码应该能生成`category1_name` + +## 🔧 需要执行的操作 + +### 步骤1:检查MySQL的category字段 + +**更新诊断脚本**(已更新):`scripts/check_data_source.py` + +**运行检查**: +```bash +python scripts/check_data_source.py --tenant-id 162 --db-host ... +``` + +**关键检查**: +- `category`字段是否有值 +- 如果有值,值的格式是什么(是否包含"/") +- 如果也为空,说明Excel导入映射有问题 + +### 步骤2:重新导入数据到ES + +**修复代码后,需要重新导入数据**: +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤3:验证ES数据 + +**运行ES数据检查脚本**: +```bash +python scripts/check_es_data.py --tenant-id 162 +``` + +**检查内容**: +- `category1_name`字段是否有值 +- `specifications`字段是否有数据 +- 分面聚合查询是否有结果 + +## 📝 数据流程说明 + +### Excel生成 → MySQL + +**Excel字段**(`csv_to_excel_multi_variant.py`): +- `'专辑名称': csv_data['categoryName']` - 分类信息 +- `'款式1': 'color'`(M行)- 选项名称 +- `'款式2': 'size'`(M行)- 选项名称 +- `'款式3': 'material'`(M行)- 选项名称 +- `'款式1': 'Red'`(P行)- 选项值 +- `'款式2': '5'`(P行)- 选项值 +- `'款式3': '塑料'`(P行)- 选项值 + +**Excel导入店匠 → MySQL映射**(需要确认): +- `'专辑名称'` → `shoplazza_product_spu.category` 或 `category_path` +- `'款式1/2/3'`(M行)→ `shoplazza_product_option.name` + `position` +- `'款式1/2/3'`(P行)→ `shoplazza_product_sku.option1/2/3` + +### MySQL → ES转换 + +**当前逻辑**(`spu_transformer.py`): + +1. **category1_name生成**: + - 优先从`category_path`解析(第228-240行) + - 如果`category_path`为空,从`category`字段解析(第241-259行)✅ 已修复 + +2. **specifications生成**(第351-370行): + - 从`option表`获取name(position → name映射) + - 从`SKU表`获取option1/2/3值 + - 构建`specifications`数组 + +**关键点**: +- 需要确保MySQL的`category`字段有值 +- 需要确保`option表`有数据且`name`是英文(color/size/material) +- 需要确保SKU的`option1/2/3`字段有值 + +## 🎯 关键发现 + +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,需要从`category`字段生成 +3. **需要重新导入数据**:修复代码后,需要重新导入数据到ES才能生效 + +## 🔄 下一步 + +1. ✅ **代码已修复**:支持从`category`字段生成`category1_name` +2. ⏳ **需要检查MySQL数据**:确认`category`字段是否有值 +3. ⏳ **需要重新导入数据**:将修复后的数据导入ES +4. ⏳ **需要验证**:检查ES数据是否正确,分面是否能正常显示 + diff --git a/docs/分面数据问题诊断.md b/docs/分面数据问题诊断.md new file mode 100644 index 0000000..b355c46 --- /dev/null +++ b/docs/分面数据问题诊断.md @@ -0,0 +1,282 @@ +# 分面数据问题诊断报告 + +## 问题描述 + +前端显示的分面结果都是空的: +- Category: 空 +- Color: 空 +- Size: 空 +- Material: 空 + +ES的聚合查询结果也是空的。 + +## 数据流程分析 + +### 1. 数据生成阶段(csv_to_excel_multi_variant.py) + +**生成的数据**: + +#### 分类信息: +- Excel字段:`'专辑名称': csv_data['categoryName']` +- 示例值:`"电子产品"` 或 `"服装/男装"`(从CSV的categoryName字段读取) + +#### 属性信息(M+P类型商品): +- Excel字段(M行主商品): + - `'款式1': 'color'`(选项名称) + - `'款式2': 'size'`(选项名称) + - `'款式3': 'material'`(选项名称) +- Excel字段(P行子款式): + - `'款式1': 'Red'`(选项值,从COLORS列表随机选择) + - `'款式2': '5'`(选项值,1-30随机选择) + - `'款式3': '塑料'`(选项值,从商品标题提取) + +### 2. Excel导入店匠系统 → MySQL + +**预期映射**: + +#### 分类字段: +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category_path` **或** `category` 字段 +- **问题**:店匠系统可能将"专辑名称"映射到`category`字段,而不是`category_path`字段 + +#### 属性字段: +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` 和 `position` +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3` + +### 3. MySQL → ES转换阶段(spu_transformer.py) + +#### category1_name 构建逻辑(第228-240行): + +```python +if pd.notna(spu_row.get('category_path')): + category_path = str(spu_row['category_path']) + # 解析category_path获取多层级分类名称 + path_parts = category_path.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() +``` + +**问题**:如果MySQL中的`category_path`字段为空,`category1_name`不会被设置! + +#### specifications 构建逻辑(第328-347行): + +```python +# 构建option名称映射(position -> name) +option_name_map = {} +if not options.empty: + for _, opt_row in options.iterrows(): + position = opt_row.get('position') + name = opt_row.get('name') + if pd.notna(position) and pd.notna(name): + option_name_map[int(position)] = str(name) + +# 构建specifications +if pd.notna(sku_row.get('option1')) and 1 in option_name_map: + specifications.append({ + 'sku_id': sku_id, + 'name': option_name_map[1], # 使用option表的name字段 + 'value': str(sku_row['option1']) + }) +``` + +**问题**:如果`shoplazza_product_option`表中没有记录,或者`name`字段值不是英文(如"color"),会导致: +1. `option_name_map`为空,无法构建specifications +2. 即使有值,如果name不是"color"/"size"/"material",前端也无法正确匹配 + +## 问题根源 + +### 问题1:category1_name 为空 + +**原因**: +1. MySQL的`category_path`字段可能为空 +2. Excel的"专辑名称"可能被映射到`category`字段而不是`category_path` +3. 如果`category_path`为空,`category1_name`不会被设置 + +**验证方法**: +```sql +SELECT COUNT(*) as total, + COUNT(category_path) as has_category_path, + COUNT(category) as has_category +FROM shoplazza_product_spu +WHERE tenant_id = 162 AND deleted = 0; +``` + +### 问题2:specifications 为空 + +**原因**: +1. `shoplazza_product_option`表可能没有数据 +2. option表的`name`字段值可能不是英文(不是"color"、"size"、"material") + +**验证方法**: +```sql +SELECT DISTINCT name, position, COUNT(*) as count +FROM shoplazza_product_option +WHERE tenant_id = 162 AND deleted = 0 +GROUP BY name, position +ORDER BY position, name; +``` + +## 解决方案 + +### 方案1:修复 spu_transformer.py - 支持从category字段生成category1_name + +修改`indexer/spu_transformer.py`的`_transform_spu_to_doc`方法,如果`category_path`为空,使用`category`字段作为备选: + +```python +# Category相关字段 +if pd.notna(spu_row.get('category_path')): + category_path = str(spu_row['category_path']) + doc['category_path_zh'] = category_path + doc['category_path_en'] = None + + # 解析category_path获取多层级分类名称 + path_parts = category_path.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() + if len(path_parts) > 1: + doc['category2_name'] = path_parts[1].strip() + if len(path_parts) > 2: + doc['category3_name'] = path_parts[2].strip() +elif pd.notna(spu_row.get('category')): + # 如果category_path为空,使用category字段作为category1_name + category = str(spu_row['category']) + doc['category1_name'] = category.strip() + # 如果category包含"/",也尝试解析 + if '/' in category: + path_parts = category.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() + if len(path_parts) > 1: + doc['category2_name'] = path_parts[1].strip() + if len(path_parts) > 2: + doc['category3_name'] = path_parts[2].strip() +``` + +### 方案2:检查并修复 option 表的 name 字段值 + +需要确保`shoplazza_product_option`表的`name`字段值是英文: +- position=1 的name应该是 `"color"` +- position=2 的name应该是 `"size"` +- position=3 的name应该是 `"material"` + +如果值不对,需要更新: + +```sql +-- 查看当前的name值 +SELECT DISTINCT name, position +FROM shoplazza_product_option +WHERE tenant_id = 162 AND deleted = 0 +ORDER BY position; + +-- 如果需要更新(示例) +-- UPDATE shoplazza_product_option +-- SET name = CASE position +-- WHEN 1 THEN 'color' +-- WHEN 2 THEN 'size' +-- WHEN 3 THEN 'material' +-- END +-- WHERE tenant_id = 162 AND deleted = 0; +``` + +### 方案3:验证数据完整性 + +使用诊断脚本检查数据: + +```bash +python scripts/check_data_source.py \ + --tenant-id 162 \ + --db-host \ + --db-port 3316 \ + --db-database saas \ + --db-username saas \ + --db-password +``` + +## 诊断步骤 + +### 步骤1:检查MySQL数据 + +运行诊断脚本: +```bash +cd /home/tw/SearchEngine +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine +python scripts/check_data_source.py --tenant-id 162 --db-host --db-database saas --db-username saas --db-password +``` + +### 步骤2:根据检查结果修复 + +#### 如果 category_path 为空: +- 使用方案1:修改`spu_transformer.py`支持从`category`字段生成`category1_name` + +#### 如果 option 表没有数据或name值不对: +- 检查Excel导入是否正确 +- 如果需要,手动更新option表的name字段值 + +### 步骤3:重新导入数据到ES + +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤4:验证ES数据 + +检查ES索引中的文档: + +```bash +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "term": { + "tenant_id": "162" + } + }, + "size": 1, + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name"] +}' +``` + +## 预期结果 + +修复后,ES文档应该包含: + +1. **category1_name字段**: + ```json + { + "category1_name": "电子产品" + } + ``` + +2. **specifications字段**: + ```json + { + "specifications": [ + {"sku_id": "123", "name": "color", "value": "Red"}, + {"sku_id": "123", "name": "size", "value": "5"}, + {"sku_id": "123", "name": "material", "value": "塑料"} + ] + } + ``` + +3. **option1_name/2_name/3_name字段**: + ```json + { + "option1_name": "color", + "option2_name": "size", + "option3_name": "material" + } + ``` + +## 总结 + +问题可能出现在: +1. **MySQL数据层面**:`category_path`字段为空,或者`shoplazza_product_option`表没有正确的数据 +2. **数据转换层面**:`spu_transformer.py`没有处理`category_path`为空的情况 + +建议先运行诊断脚本检查MySQL数据,然后根据检查结果进行修复。 + diff --git a/docs/分面问题修复总结.md b/docs/分面问题修复总结.md new file mode 100644 index 0000000..7afed76 --- /dev/null +++ b/docs/分面问题修复总结.md @@ -0,0 +1,177 @@ +# 分面数据问题修复总结 + +## 问题现象 + +前端显示的分面结果都是空的: +- Category: 空 +- Color: 空 +- Size: 空 +- Material: 空 + +ES的聚合查询结果也是空的。 + +## 问题分析 + +### 数据流程 + +1. **数据生成**(csv_to_excel_multi_variant.py): + - 生成Excel文件,包含"专辑名称"(分类)和"款式1/2/3"(属性名称和值) + +2. **Excel导入店匠** → MySQL: + - "专辑名称" → 可能映射到 `category` 或 `category_path` 字段 + - "款式1/2/3"(M行)→ `shoplazza_product_option.name` + - "款式1/2/3"(P行)→ `shoplazza_product_sku.option1/2/3` + +3. **MySQL → ES转换**(spu_transformer.py): + - `category1_name` 从 `category_path` 解析 + - `specifications` 从 `option表.name` + `sku表.option1/2/3` 构建 + +### 根本原因 + +1. **category1_name 为空**: + - MySQL的`category_path`字段可能为空 + - Excel的"专辑名称"可能被映射到`category`字段而不是`category_path` + - 原代码只从`category_path`解析,如果为空则`category1_name`不会被设置 + +2. **specifications 为空**: + - `shoplazza_product_option`表可能没有数据 + - 或`name`字段值不是英文(不是"color"、"size"、"material") + +## 已实施的修复 + +### 修复1:支持从category字段生成category1_name + +**文件**: `indexer/spu_transformer.py` + +**修改内容**: +- 如果`category_path`为空,使用`category`字段作为备选 +- 从`category`字段解析多级分类(如果包含"/") +- 如果`category`不包含"/",直接作为`category1_name` + +**代码位置**:第241-259行 + +```python +elif pd.notna(spu_row.get('category')): + # 如果category_path为空,使用category字段作为category1_name的备选 + category = str(spu_row['category']) + doc['category_name_zh'] = category + doc['category_name_en'] = None + doc['category_name'] = category + + # 尝试从category字段解析多级分类 + if '/' in category: + path_parts = category.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() + if len(path_parts) > 1: + doc['category2_name'] = path_parts[1].strip() + if len(path_parts) > 2: + doc['category3_name'] = path_parts[2].strip() + else: + # 如果category不包含"/",直接作为category1_name + doc['category1_name'] = category.strip() +``` + +## 诊断工具 + +已创建诊断脚本:`scripts/check_data_source.py` + +**使用方法**: +```bash +cd /home/tw/SearchEngine +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine +python scripts/check_data_source.py \ + --tenant-id 162 \ + --db-host \ + --db-port 3316 \ + --db-database saas \ + --db-username saas \ + --db-password +``` + +**检查内容**: +1. SPU汇总信息 +2. category_path 字段是否有值 +3. option 表的 name 字段值 +4. SKU 表的 option1/2/3 字段值 + +## 下一步操作 + +### 步骤1:运行诊断脚本检查MySQL数据 + +```bash +python scripts/check_data_source.py --tenant-id 162 --db-host ... +``` + +### 步骤2:根据检查结果修复数据 + +#### 如果 option 表的 name 值不对: + +检查option表的name字段值: +```sql +SELECT DISTINCT name, position +FROM shoplazza_product_option +WHERE tenant_id = 162 AND deleted = 0 +ORDER BY position; +``` + +如果需要,更新为英文: +- position=1 的 name 应该是 "color" +- position=2 的 name 应该是 "size" +- position=3 的 name 应该是 "material" + +### 步骤3:重新导入数据到ES + +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤4:验证ES数据 + +检查ES索引中的文档是否包含: +- `category1_name` 字段 +- `specifications` 字段(包含color、size、material) +- `option1_name`、`option2_name`、`option3_name` 字段 + +```bash +curl -X GET "http://localhost:9200/search_products/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "term": { + "tenant_id": "162" + } + }, + "size": 1, + "_source": ["spu_id", "title_zh", "category1_name", "specifications", "option1_name", "option2_name", "option3_name"] +}' +``` + +## 预期结果 + +修复后,ES文档应该包含: + +```json +{ + "spu_id": "123", + "title_zh": "商品标题", + "category1_name": "电子产品", + "specifications": [ + {"sku_id": "456", "name": "color", "value": "Red"}, + {"sku_id": "456", "name": "size", "value": "5"}, + {"sku_id": "456", "name": "material", "value": "塑料"} + ], + "option1_name": "color", + "option2_name": "size", + "option3_name": "material" +} +``` + +前端分面应该能正常显示分类和属性值。 + diff --git a/docs/分面问题最终诊断.md b/docs/分面问题最终诊断.md new file mode 100644 index 0000000..5b1ec85 --- /dev/null +++ b/docs/分面问题最终诊断.md @@ -0,0 +1,115 @@ +# 分面问题最终诊断报告 + +## ES数据检查结果 + +根据ES索引检查结果: + +### ✅ specifications 分面有数据 +ES聚合查询显示: +- **specifications.color**: 有数据(Beige: 1226, Khaki: 1176, Red: 1168等) +- **specifications.size**: 有数据(1: 1234, 12: 1234等) +- **specifications.material**: 有数据(塑料英文包装: 17277等) + +**结论**:ES中确实有specifications数据,聚合查询能正常返回结果。 + +### ❌ category1_name 几乎都为空 +- 总文档数:10000 +- 有category1_name的文档:只有1个 +- 该文档的category1_name值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是分类名称) + +**结论**:category1_name字段几乎都是空的,导致category分面为空。 + +## 问题根源分析 + +### 问题1:category1_name 为什么为空 + +**MySQL数据情况**(从诊断脚本结果): +- `category_path`字段:11253个SPU为空,只有1个有值 +- 该唯一值:`593389466647815326,593389582007954165,593389582008019701`(ID列表格式,不是路径格式) + +**当前代码逻辑**(`spu_transformer.py`第228-240行): +```python +if pd.notna(spu_row.get('category_path')): + category_path = str(spu_row['category_path']) + # 直接按"/"分割,但ID列表格式是逗号分隔的 + path_parts = category_path.split('/') + # 如果category_path是ID列表,path_parts只有一个元素(整个ID列表) +``` + +**问题**: +1. 对于ID列表格式的`category_path`(如`593389466647815326,593389582007954165,593389582008019701`),按"/"分割后只有一个元素,会被错误地作为`category1_name` +2. 对于空的`category_path`,会进入`elif`分支,使用`category`字段作为备选 + +**需要检查**: +- MySQL的`category`字段是否有值?如果有值,应该能生成`category1_name` +- 如果`category`字段也为空,说明Excel导入时"专辑名称"没有正确映射 + +### 问题2:specifications 分面查询为什么为空 + +虽然ES聚合查询显示有数据,但前端显示为空,可能原因: + +1. **前端分面请求格式**: + - 前端请求:`["category1_name", "specifications.color", "specifications.size", "specifications.material"]` + - ES构建的聚合名称:`specifications_color_facet`(注意:是下划线,不是点号) + - 字段匹配可能有问题 + +2. **ES聚合结果解析**: + - ES返回的聚合字段名:`specifications_color_facet` + - 前端期望的field:`specifications.color` + - `format_facets`函数需要正确匹配 + +## 具体数据说明 + +### MySQL数据情况 +- **总SPU数**:11254 +- **有category_path的SPU**:1个(值是ID列表格式) +- **有option定义的SPU**:886个 + - position=1, name='color': 885个 + - position=2, name='size': 885个 + - position=3, name='material': 885个 +- **总SKU数**:43109个 + +### ES数据情况 +- **specifications数据**:有数据,能够正常聚合 +- **category1_name数据**:几乎都是空的(只有1个,而且是ID列表格式) + +## 解决方案 + +### 立即执行的操作 + +1. **检查MySQL的category字段**: + - 运行诊断脚本检查`category`字段是否有值 + - 如果`category`有值,修复后的代码应该能生成`category1_name` + - 如果`category`也为空,需要检查Excel导入映射 + +2. **重新导入数据到ES**: + ```bash + python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 + ``` + +3. **验证ES数据**: + - 检查`category1_name`字段是否有值 + - 检查`specifications`字段是否有数据 + +### 如果category字段也为空 + +需要检查Excel导入到店匠系统时,"专辑名称"字段是否正确映射到MySQL的`category`字段。 + +## 关键发现 + +1. **specifications数据是存在的**:ES聚合查询能正常返回color/size/material的分面数据 +2. **category1_name几乎都是空的**:这是因为`category_path`为空,而且可能`category`字段也为空 +3. **需要从category字段生成category1_name**:代码已修复,但需要确保MySQL的`category`字段有值 + +## 下一步 + +1. 检查MySQL的`category`字段是否有值 +2. 如果有值,重新导入数据到ES +3. 如果也为空,需要检查Excel导入映射或修复数据 + diff --git a/docs/分面问题诊断和修复指南.md b/docs/分面问题诊断和修复指南.md new file mode 100644 index 0000000..ccedf6c --- /dev/null +++ b/docs/分面问题诊断和修复指南.md @@ -0,0 +1,203 @@ +# 分面数据问题诊断和修复指南 + +## 问题现象 + +前端显示的分面结果都是空的: +- Category: 空 +- Color: 空 +- Size: 空 +- Material: 空 + +ES的聚合查询结果也是空的。 + +## 诊断结果分析 + +### MySQL数据情况 + +| 字段/表 | 有数据的数量 | 说明 | +|---------|-------------|------| +| 总SPU数 | 11254 | - | +| category_path有值 | 1个 | 该值是ID列表格式(不是路径格式) | +| category字段 | 需要检查 | 可能是空的 | +| option表记录 | 2658条 | 886个SPU有option定义 | +| position=1, name='color' | 885个SPU | ✅ 数量足够 | +| position=2, name='size' | 885个SPU | ✅ 数量足够 | +| position=3, name='material' | 885个SPU | ✅ 数量足够 | +| 总SKU数 | 43109 | option1/2/3字段需要检查 | + +### ES索引数据情况 + +| 字段 | 有数据的数量 | 说明 | +|------|-------------|------| +| 总文档数 | 10000 | - | +| category1_name有值 | 1个 | 该值是ID列表格式 ❌ | +| specifications聚合查询 | 有数据 | ✅ color/size/material都有数据 | + +## 问题根源 + +### 问题1:category1_name 几乎都为空 ❌ + +**原因分析**: + +1. **MySQL数据层面**: + - `category_path`字段几乎都是空的(只有1个,且是ID列表格式) + - 需要检查`category`字段是否有值 + +2. **数据转换层面**: + - 原代码只从`category_path`解析`category1_name` + - 如果`category_path`为空,`category1_name`不会被设置 + - ✅ **已修复**:如果`category_path`为空,使用`category`字段作为备选(`spu_transformer.py`第241-259行) + +3. **Excel导入映射**: + - Excel的"专辑名称"字段可能映射到MySQL的`category`字段 + - 需要确认映射关系 + +### 问题2:specifications分面查询无结果 + +**奇怪现象**: +- ES聚合查询(查询所有文档)显示有数据 +- 但前端显示为空 + +**可能原因**: +1. 前端搜索时有查询条件,过滤后没有匹配的文档 +2. 分面聚合构建或解析有问题 +3. tenant_id不匹配 + +## 数据流程分析 + +### 1. Excel生成阶段 + +**脚本**:`scripts/csv_to_excel_multi_variant.py` + +**生成的数据**: +- `'专辑名称': csv_data['categoryName']` - 从CSV的categoryName字段读取 +- `'款式1': 'color'`(M行主商品)- 选项名称 +- `'款式2': 'size'`(M行主商品)- 选项名称 +- `'款式3': 'material'`(M行主商品)- 选项名称 +- `'款式1': 'Red'`(P行子款式)- 选项值(从COLORS列表随机选择) +- `'款式2': '5'`(P行子款式)- 选项值(1-30随机选择) +- `'款式3': '塑料'`(P行子款式)- 选项值(从商品标题提取) + +### 2. Excel导入店匠 → MySQL + +**映射关系**(需要确认): +- Excel `'专辑名称'` → MySQL `shoplazza_product_spu.category` 或 `category_path` +- Excel `'款式1/2/3'`(M行)→ MySQL `shoplazza_product_option.name` + `position` +- Excel `'款式1/2/3'`(P行)→ MySQL `shoplazza_product_sku.option1/2/3` + +**当前情况**: +- ✅ option表有数据:885个SPU有color/size/material选项名称 +- ❓ category字段:需要检查是否有值 + +### 3. MySQL → ES转换 + +**代码逻辑**(`indexer/spu_transformer.py`): + +1. **category1_name生成**(第228-259行): + ```python + if pd.notna(spu_row.get('category_path')): + # 从category_path解析 + path_parts = category_path.split('/') + doc['category1_name'] = path_parts[0].strip() + elif pd.notna(spu_row.get('category')): + # 从category字段解析(已修复) + doc['category1_name'] = category.strip() + ``` + +2. **specifications生成**(第351-370行): + ```python + # 从option表获取name映射 + option_name_map = {position: name} + # 从SKU表获取option值 + if pd.notna(sku_row.get('option1')) and 1 in option_name_map: + specifications.append({ + 'name': option_name_map[1], # 'color' + 'value': str(sku_row['option1']) # 'Red' + }) + ``` + +## 解决方案 + +### 步骤1:检查MySQL的category字段 + +**运行更新后的诊断脚本**: +```bash +cd /home/tw/SearchEngine +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine +python scripts/check_data_source.py --tenant-id 162 --db-host ... +``` + +**关键检查**: +- `category`字段是否有值 +- 如果有值,值的格式是什么(是否包含"/") + +**如果category字段也为空**: +- 说明Excel导入时"专辑名称"没有正确映射到MySQL +- 需要检查店匠系统的字段映射配置 + +### 步骤2:重新导入数据到ES + +**修复代码后,必须重新导入数据才能生效**: +```bash +python scripts/recreate_and_import.py \ + --tenant-id 162 \ + --db-host \ + --db-database saas \ + --db-username saas \ + --db-password \ + --es-host http://localhost:9200 +``` + +### 步骤3:验证ES数据 + +**运行ES数据检查脚本**: +```bash +python scripts/check_es_data.py --tenant-id 162 +``` + +**检查内容**: +- `category1_name`字段是否有值 +- `specifications`字段是否有数据 +- 分面聚合查询是否有结果 + +## 预期结果 + +修复后,ES文档应该包含: + +```json +{ + "spu_id": "123", + "title_zh": "商品标题", + "category1_name": "电子产品", // 从category字段生成 + "specifications": [ + {"sku_id": "456", "name": "color", "value": "Red"}, + {"sku_id": "456", "name": "size", "value": "5"}, + {"sku_id": "456", "name": "material", "value": "塑料"} + ], + "option1_name": "color", + "option2_name": "size", + "option3_name": "material" +} +``` + +## 关键检查点 + +### 1. MySQL数据检查 + +- [ ] `category`字段是否有值 +- [ ] `category_path`字段是否为空 +- [ ] `option表`的`name`字段是否是英文(color/size/material) +- [ ] SKU表的`option1/2/3`字段是否有值 + +### 2. ES数据检查 + +- [ ] `category1_name`字段是否有值 +- [ ] `specifications`字段是否有数据 +- [ ] 分面聚合查询是否有结果 + +### 3. 数据导入验证 + +- [ ] 重新导入数据后,检查ES文档是否正确 +- [ ] 验证分面查询是否能正常返回结果 + diff --git a/docs/常用查询 - ES.md b/docs/常用查询 - ES.md new file mode 100644 index 0000000..2c9c0fe --- /dev/null +++ b/docs/常用查询 - ES.md @@ -0,0 +1,20 @@ +GET /search_products/_search +{ + "query": { + "term": { + "tenant_id": "2" + } + } +} + + +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ + "size": 5, + "query": { + "bool": { + "filter": [ + { "term": { "tenant_id": "162" } } + ] + } + } + }' \ No newline at end of file diff --git a/docs/常用查询 - sql.sql b/docs/常用查询 - sql.sql new file mode 100644 index 0000000..c564dbc --- /dev/null +++ b/docs/常用查询 - sql.sql @@ -0,0 +1,254 @@ +-- 查询今天入库的SPU和SKU商品数据 +-- 用于查询当天新增的商品信息 + +-- ====================================== +-- 1. 查询今天入库的SPU商品 +-- ====================================== + +-- 查询今天创建的SPU商品(SPU级别) +SELECT + spu.id AS spu_id, + spu.tenant_id, + spu.shop_id, + spu.shoplazza_id AS shoplazza_product_id, + spu.title AS product_title, + spu.description AS product_description, + spu.brief AS product_brief, + spu.vendor AS brand_name, + spu.category AS product_category, + spu.category_path AS category_path, + spu.handle AS product_handle, + spu.tags AS product_tags, + spu.published AS product_published, + spu.published_at AS publish_time, + spu.image_src AS main_image_url, + spu.image_width AS main_image_width, + spu.image_height AS main_image_height, + spu.create_time AS spu_create_time, + spu.update_time AS spu_update_time, + CASE + WHEN spu.deleted = 1 THEN '已删除' + ELSE '正常' + END AS spu_status +FROM shoplazza_product_spu spu +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期 + AND spu.deleted = 0 -- 未删除的商品 +ORDER BY spu.create_time DESC; + +-- ====================================== +-- 2. 查询今天入库的SKU商品 +-- ====================================== + +-- 查询今天创建的SKU商品(SKU级别) +SELECT + sku.id AS sku_id, + sku.tenant_id, + sku.shop_id, + sku.spu_id, + sku.shoplazza_id AS variant_id, + sku.shoplazza_product_id AS shoplazza_product_id, + sku.sku AS sku_code, + sku.title AS sku_title, + sku.price AS sku_price, + sku.compare_at_price AS compare_price, + sku.cost_price AS cost_price, + sku.inventory_quantity AS stock_quantity, + sku.weight AS product_weight, + sku.weight_unit AS weight_unit, + sku.option1 AS color_option, + sku.option2 AS size_option, + sku.option3 AS material_option, + sku.image_src AS sku_image_url, + sku.barcode AS barcode, + sku.position AS variant_position, + sku.create_time AS sku_create_time, + sku.update_time AS sku_update_time, + CASE + WHEN sku.deleted = 1 THEN '已删除' + ELSE '正常' + END AS sku_status +FROM shoplazza_product_sku sku +WHERE DATE(sku.create_time) = CURDATE() -- 今天的日期 + AND sku.deleted = 0 -- 未删除的商品 +ORDER BY sku.create_time DESC; + +-- ====================================== +-- 3. 关联查询今天入库的SPU及其对应的SKU +-- ====================================== + +-- 查询今天创建的SPU及其关联的SKU信息 +SELECT + spu.id AS spu_id, + spu.tenant_id, + spu.shop_id, + spu.shoplazza_id AS shoplazza_product_id, + spu.title AS product_title, + spu.vendor AS brand_name, + spu.tags AS product_tags, + spu.published AS product_published, + spu.create_time AS spu_create_time, + + -- 聚合SKU信息 + COUNT(sku.id) AS sku_count, + COALESCE(MIN(sku.price), 0) AS min_price, + COALESCE(MAX(sku.price), 0) AS max_price, + COALESCE(SUM(sku.inventory_quantity), 0) AS total_stock, + GROUP_CONCAT(DISTINCT sku.option1 ORDER BY sku.option1 SEPARATOR ', ') AS available_colors, + GROUP_CONCAT(DISTINCT sku.option2 ORDER BY sku.option2 SEPARATOR ', ') AS available_sizes, + GROUP_CONCAT(DISTINCT sku.option3 ORDER BY sku.option3 SEPARATOR ', ') AS available_materials + +FROM shoplazza_product_spu spu +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id + AND spu.tenant_id = sku.tenant_id + AND sku.deleted = 0 +WHERE DATE(spu.create_time) = CURDATE() -- 今天创建的SPU + AND spu.deleted = 0 -- 未删除的SPU +GROUP BY spu.id, spu.tenant_id, spu.shop_id, spu.shoplazza_id, + spu.title, spu.vendor, spu.tags, spu.published, spu.create_time +ORDER BY spu.create_time DESC; + +-- ====================================== +-- 4. 查询今天入库商品的数量统计 +-- ====================================== + +-- 统计今天入库的商品数量 +SELECT + 'SPU商品' AS data_type, + COUNT(*) AS today_count, + DATE(CURDATE()) AS statistics_date +FROM shoplazza_product_spu +WHERE DATE(create_time) = CURDATE() + AND deleted = 0 + +UNION ALL + +SELECT + 'SKU商品' AS data_type, + COUNT(*) AS today_count, + DATE(CURDATE()) AS statistics_date +FROM shoplazza_product_sku +WHERE DATE(create_time) = CURDATE() + AND deleted = 0 + +UNION ALL + +SELECT + '活跃店铺' AS data_type, + COUNT(DISTINCT shop_id) AS today_count, + DATE(CURDATE()) AS statistics_date +FROM shoplazza_product_spu +WHERE DATE(create_time) = CURDATE() + AND deleted = 0 + +UNION ALL + +SELECT + '活跃租户' AS data_type, + COUNT(DISTINCT tenant_id) AS today_count, + DATE(CURDATE()) AS statistics_date +FROM shoplazza_product_spu +WHERE DATE(create_time) = CURDATE() + AND deleted = 0; + +-- ====================================== +-- 5. 按租户统计今天入库的商品 +-- ====================================== + +-- 按租户统计今天入库的商品分布 +SELECT + spu.tenant_id, + COUNT(DISTINCT spu.id) AS spu_count, + COUNT(DISTINCT sku.id) AS sku_count, + COUNT(DISTINCT spu.shop_id) AS shop_count, + COALESCE(SUM(sku.inventory_quantity), 0) AS total_inventory, + COALESCE(AVG(sku.price), 0) AS avg_price +FROM shoplazza_product_spu spu +LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id + AND spu.tenant_id = sku.tenant_id + AND sku.deleted = 0 +WHERE DATE(spu.create_time) = CURDATE() -- 今天的日期 + AND spu.deleted = 0 -- 未删除的SPU +GROUP BY spu.tenant_id +ORDER BY spu_count DESC; + +-- ====================================== +-- 6. 查询今天入库商品的图片信息 +-- ====================================== + +-- 查询今天入库商品的主图信息(从SPU表获取) +SELECT + spu.tenant_id, + spu.shop_id, + spu.shoplazza_id AS shoplazza_product_id, + spu.image_src AS image_url, + spu.image_width AS image_width, + spu.image_height AS image_height, + spu.image_path AS image_path, + spu.image_alt AS image_alt, + spu.create_time AS product_create_time, + CASE + WHEN spu.deleted = 1 THEN '已删除' + ELSE '正常' + END AS image_status +FROM shoplazza_product_spu spu +WHERE DATE(spu.create_time) = CURDATE() -- 今天入库的商品 + AND spu.deleted = 0 -- 未删除的商品 + AND spu.image_src IS NOT NULL -- 有图片的商品 +ORDER BY spu.tenant_id, spu.shop_id, spu.shoplazza_id; + +-- ====================================== +-- 7. 查询今天入库商品的详细信息(含图片) +-- ====================================== + +-- 完整的今天入库商品信息(包含图片) +SELECT + spu.id AS spu_id, + spu.tenant_id, + spu.shop_id, + spu.shoplazza_id AS shoplazza_product_id, + spu.title AS product_title, + spu.description AS product_description, + spu.brief AS product_brief, + spu.vendor AS brand_name, + spu.category AS product_category, + spu.category_path AS category_path, + spu.handle AS product_handle, + spu.tags AS product_tags, + spu.published AS product_published, + spu.published_at AS publish_time, + spu.create_time AS spu_create_time, + + -- SKU信息聚合 + COALESCE(sku_summary.sku_count, 0) AS variant_count, + COALESCE(sku_summary.min_price, 0) AS min_price, + COALESCE(sku_summary.max_price, 0) AS max_price, + COALESCE(sku_summary.total_stock, 0) AS total_inventory, + + -- 主图信息(从SPU表直接获取) + COALESCE(spu.image_src, '') AS main_image_url, + COALESCE(spu.image_width, 0) AS main_image_width, + COALESCE(spu.image_height, 0) AS main_image_height, + COALESCE(spu.image_path, '') AS main_image_path, + COALESCE(spu.image_alt, '') AS main_image_alt + +FROM shoplazza_product_spu spu + +-- 关联SKU统计信息 +LEFT JOIN ( + SELECT + spu_id, + tenant_id, + COUNT(*) AS sku_count, + MIN(price) AS min_price, + MAX(price) AS max_price, + SUM(inventory_quantity) AS total_stock + FROM shoplazza_product_sku + WHERE DATE(create_time) = CURDATE() -- 今天的SKU + AND deleted = 0 + GROUP BY spu_id, tenant_id +) sku_summary ON spu.id = sku_summary.spu_id + AND spu.tenant_id = sku_summary.tenant_id + +WHERE DATE(spu.create_time) = CURDATE() -- 今天的SPU + AND spu.deleted = 0 -- 未删除的SPU +ORDER BY spu.create_time DESC; \ No newline at end of file diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index 95225f1..342c9b8 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -27,7 +27,7 @@ ```bash curl -X POST "http://120.76.41.98:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{"query": "芭比娃娃"}' ``` @@ -36,7 +36,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ ```bash curl -X POST "http://120.76.41.98:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ "query": "芭比娃娃", "size": 5, @@ -60,11 +60,10 @@ curl -X POST "http://120.76.41.98:6002/search/" \ ```bash curl -X POST "http://120.76.41.98:6002/search/" \ -H "Content-Type: application/json" \ - -H "X-Tenant-ID: 2" \ + -H "X-Tenant-ID: 162" \ -d '{ - "tenant_id": "demo-tenant", "query": "芭比娃娃", - "facets": ["category.keyword", "specifications.color", "specifications.size"], + "facets": ["category1_name", "specifications.color", "specifications.size", "specifications.material"], "min_score": 0.2 }' ``` @@ -291,10 +290,10 @@ curl -X POST "http://120.76.41.98:6002/search/" \ **模式2:指定规格名称的分面** (`"specifications.color"`): ```json { - "facets": ["specifications.color", "specifications.size"] + "facets": ["specifications.color", "specifications.size", "specifications.material"] } ``` -只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size")。 +只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size"、"material")。 **返回格式示例**: ```json @@ -879,9 +878,9 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" { "id": "12345", "source": { - "title": "芭比时尚娃娃", + "title_zh": "芭比时尚娃娃", "min_price": 89.99, - "category.keyword": "玩具" + "category1_name": "玩具" } } ``` diff --git a/docs/搜索API速查表.md b/docs/搜索API速查表.md index e40a189..40e6529 100644 --- a/docs/搜索API速查表.md +++ b/docs/搜索API速查表.md @@ -60,7 +60,7 @@ POST /search/ ```bash { "range_filters": { - "price": { + "min_price": { "gte": 50, // >= "lte": 200 // <= } @@ -94,7 +94,7 @@ POST /search/ **指定规格名称**: ```bash { - "facets": ["specifications.color", "specifications.size"] // 只返回指定name的value列表 + "facets": ["specifications.color", "specifications.size", "specifications.material"] // 只返回指定name的value列表 } ``` @@ -114,7 +114,8 @@ POST /search/ }, "specifications", // 所有规格名称 "specifications.color", // 指定规格名称 - "specifications.size" + "specifications.size", + "specifications.material" ] } ``` @@ -190,7 +191,7 @@ POST /search/ ```bash POST /search/ -Headers: X-Tenant-ID: 2 +Headers: X-Tenant-ID: 162 { "query": "手机", "size": 20, diff --git a/docs/索引字段说明v2-参考表结构.md b/docs/索引字段说明v2-参考表结构.md index 7dba98b..f538dc4 100644 --- a/docs/索引字段说明v2-参考表结构.md +++ b/docs/索引字段说明v2-参考表结构.md @@ -1,4 +1,4 @@ -spu表全部字段 +spu表 shoplazza_product_spu 全部字段 "Field" "Type" "Null" "Key" "Default" "Extra" "id" "bigint(20)" "NO" "PRI" "auto_increment" "shop_id" "bigint(20)" "NO" "MUL" "" @@ -46,7 +46,7 @@ spu表全部字段 "update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" "deleted" "bit(1)" "NO" "" "b'0'" "" -sku全部字段 +shoplazza_product_sku 全部字段 "Field" "Type" "Null" "Key" "Default" "Extra" "id" "bigint(20)" "NO" "PRI" "auto_increment" "spu_id" "bigint(20)" "NO" "MUL" "" diff --git a/docs/索引字段说明v2.md b/docs/索引字段说明v2.md index 01f9fb4..04d9331 100644 --- a/docs/索引字段说明v2.md +++ b/docs/索引字段说明v2.md @@ -124,7 +124,7 @@ { "query": "手机", "filters": { - "specifications": { + "specifications": { "name": "color", "value": "white" } @@ -141,21 +141,21 @@ {"name": "color", "value": "white"}, {"name": "size", "value": "256GB"} ] - } + } } ``` **ES 查询结构**(后端自动生成): ```json { - "nested": { - "path": "specifications", - "query": { - "bool": { - "must": [ + "nested": { + "path": "specifications", + "query": { + "bool": { + "must": [ { "term": { "specifications.name": "color" } }, { "term": { "specifications.value": "white" } } - ] + ] } } } @@ -202,7 +202,7 @@ } } ``` - + 指定规格名称: ```json { @@ -402,3 +402,4 @@ filters AND (text_recall OR embedding_recall) 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端 + \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html index ed20b40..bca1767 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -32,22 +32,28 @@
- +
-
Categories:
-
+
Category:
+
- +
-
Brand:
-
+
Color:
+
- +
-
Supplier:
-
+
Size:
+
+
+ + +
+
Material:
+
@@ -124,6 +130,6 @@

SearchEngine © 2025 | API: Loading...

- + diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index 2e86148..94f5f87 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -31,9 +31,6 @@ let state = { // Initialize document.addEventListener('DOMContentLoaded', function() { - console.log('SearchEngine loaded'); - console.log('Debug mode: always enabled (test frontend)'); - document.getElementById('searchInput').focus(); }); @@ -71,33 +68,12 @@ async function performSearch(page = 1) { const from = (page - 1) * state.pageSize; - // Define facets (简化配置) + // Define facets (一级分类 + 三个属性分面) const facets = [ - { - "field": "category.keyword", - "size": 15, - "type": "terms" - }, - { - "field": "vendor.keyword", - "size": 15, - "type": "terms" - }, - { - "field": "tags.keyword", - "size": 10, - "type": "terms" - }, - { - "field": "min_price", - "type": "range", - "ranges": [ - {"key": "0-50", "to": 50}, - {"key": "50-100", "from": 50, "to": 100}, - {"key": "100-200", "from": 100, "to": 200}, - {"key": "200+", "from": 200} - ] - } + "category1_name", // 一级分类 + "specifications.color", // 颜色属性 + "specifications.size", // 尺寸属性 + "specifications.material" // 材质属性 ]; // Show loading @@ -210,43 +186,91 @@ function displayResults(data) { grid.innerHTML = html; } -// Display facets as filter tags (重构版 - 标准化格式) +// Display facets as filter tags (一级分类 + 三个属性分面) function displayFacets(facets) { - if (!facets) return; + if (!facets || !Array.isArray(facets)) { + return; + } - facets.forEach(facet => { + facets.forEach((facet) => { // 根据字段名找到对应的容器 let containerId = null; let maxDisplay = 10; - if (facet.field === 'category.keyword') { - containerId = 'categoryTags'; + // 一级分类 + if (facet.field === 'category1_name') { + containerId = 'category1Tags'; maxDisplay = 10; - } else if (facet.field === 'vendor.keyword') { - containerId = 'brandTags'; + } + // 颜色属性分面 (specifications.color) + else if (facet.field === 'specifications.color') { + containerId = 'colorTags'; + maxDisplay = 10; + } + // 尺寸属性分面 (specifications.size) + else if (facet.field === 'specifications.size') { + containerId = 'sizeTags'; + maxDisplay = 10; + } + // 材质属性分面 (specifications.material) + else if (facet.field === 'specifications.material') { + containerId = 'materialTags'; maxDisplay = 10; - } else if (facet.field === 'tags.keyword') { - containerId = 'supplierTags'; - maxDisplay = 8; } - if (!containerId) return; + if (!containerId) { + return; + } const container = document.getElementById(containerId); - if (!container) return; + if (!container) { + return; + } + + // 检查values是否存在且是数组 + if (!facet.values || !Array.isArray(facet.values) || facet.values.length === 0) { + container.innerHTML = ''; + return; + } let html = ''; // 渲染分面值 - facet.values.slice(0, maxDisplay).forEach(facetValue => { + facet.values.slice(0, maxDisplay).forEach((facetValue) => { + if (!facetValue || typeof facetValue !== 'object') { + return; + } + const value = facetValue.value; const count = facetValue.count; - const selected = facetValue.selected; + + // 允许value为0或空字符串,但不允许undefined/null + if (value === undefined || value === null) { + return; + } + + // 检查是否已选中 + let selected = false; + if (facet.field.startsWith('specifications.')) { + // 检查specifications过滤 + const specName = facet.field.split('.')[1]; + if (state.filters.specifications) { + const specs = Array.isArray(state.filters.specifications) + ? state.filters.specifications + : [state.filters.specifications]; + selected = specs.some(spec => spec && spec.name === specName && spec.value === value); + } + } else { + // 检查普通字段过滤 + if (state.filters[facet.field]) { + selected = state.filters[facet.field].includes(value); + } + } html += ` - ${escapeHtml(value)} (${count}) + onclick="toggleFilter('${escapeAttr(facet.field)}', '${escapeAttr(String(value))}')"> + ${escapeHtml(String(value))} (${count || 0}) `; }); @@ -255,20 +279,56 @@ function displayFacets(facets) { }); } -// Toggle filter +// Toggle filter (支持specifications嵌套过滤) function toggleFilter(field, value) { - if (!state.filters[field]) { - state.filters[field] = []; - } - - const index = state.filters[field].indexOf(value); - if (index > -1) { - state.filters[field].splice(index, 1); - if (state.filters[field].length === 0) { - delete state.filters[field]; + // 处理specifications属性过滤 (specifications.color, specifications.size, specifications.material) + if (field.startsWith('specifications.')) { + const specName = field.split('.')[1]; // 提取name (color, size, material) + + // 初始化specifications过滤 + if (!state.filters.specifications) { + state.filters.specifications = []; + } + + // 确保是数组格式 + if (!Array.isArray(state.filters.specifications)) { + // 如果已经是单个对象,转换为数组 + state.filters.specifications = [state.filters.specifications]; + } + + // 查找是否已存在相同的name和value组合 + const existingIndex = state.filters.specifications.findIndex( + spec => spec.name === specName && spec.value === value + ); + + if (existingIndex > -1) { + // 移除 + state.filters.specifications.splice(existingIndex, 1); + if (state.filters.specifications.length === 0) { + delete state.filters.specifications; + } else if (state.filters.specifications.length === 1) { + // 如果只剩一个,可以保持为数组,或转换为单个对象(API都支持) + // 这里保持为数组,更一致 + } + } else { + // 添加 + state.filters.specifications.push({ name: specName, value: value }); } } else { - state.filters[field].push(value); + // 处理普通字段过滤 (category1_name等) + if (!state.filters[field]) { + state.filters[field] = []; + } + + const index = state.filters[field].indexOf(value); + if (index > -1) { + state.filters[field].splice(index, 1); + if (state.filters[field].length === 0) { + delete state.filters[field]; + } + } else { + state.filters[field].push(value); + } } performSearch(1); // Reset to page 1 diff --git a/indexer/mapping_generator.py b/indexer/mapping_generator.py index dbd5e28..09b8f31 100644 --- a/indexer/mapping_generator.py +++ b/indexer/mapping_generator.py @@ -19,13 +19,13 @@ DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_produ def load_mapping(mapping_file: str = None) -> Dict[str, Any]: - """ + """ Load Elasticsearch mapping from JSON file. - Args: + Args: mapping_file: Path to mapping JSON file. If None, uses default. - Returns: + Returns: Dictionary containing index configuration (settings + mappings) Raises: @@ -66,8 +66,8 @@ def create_index_if_not_exists(es_client, index_name: str, mapping: Dict[str, An mapping = load_mapping() if es_client.create_index(index_name, mapping): - logger.info(f"Index '{index_name}' created successfully") - return True + logger.info(f"Index '{index_name}' created successfully") + return True else: logger.error(f"Failed to create index '{index_name}'") return False @@ -89,8 +89,8 @@ def delete_index_if_exists(es_client, index_name: str) -> bool: return False if es_client.delete_index(index_name): - logger.info(f"Index '{index_name}' deleted successfully") - return True + logger.info(f"Index '{index_name}' deleted successfully") + return True else: logger.error(f"Failed to delete index '{index_name}'") return False @@ -114,8 +114,8 @@ def update_mapping(es_client, index_name: str, new_fields: Dict[str, Any]) -> bo mapping = {"properties": new_fields} if es_client.update_mapping(index_name, mapping): - logger.info(f"Mapping updated for index '{index_name}'") - return True + logger.info(f"Mapping updated for index '{index_name}'") + return True else: logger.error(f"Failed to update mapping for index '{index_name}'") return False diff --git a/indexer/spu_transformer.py b/indexer/spu_transformer.py index 631fad3..5b1c481 100644 --- a/indexer/spu_transformer.py +++ b/indexer/spu_transformer.py @@ -238,12 +238,35 @@ class SPUTransformer: doc['category2_name'] = path_parts[1].strip() if len(path_parts) > 2: doc['category3_name'] = path_parts[2].strip() + elif pd.notna(spu_row.get('category')): + # 如果category_path为空,使用category字段作为category1_name的备选 + category = str(spu_row['category']) + doc['category_name_zh'] = category + doc['category_name_en'] = None + doc['category_name'] = category + + # 尝试从category字段解析多级分类 + if '/' in category: + path_parts = category.split('/') + if len(path_parts) > 0: + doc['category1_name'] = path_parts[0].strip() + if len(path_parts) > 1: + doc['category2_name'] = path_parts[1].strip() + if len(path_parts) > 2: + doc['category3_name'] = path_parts[2].strip() + else: + # 如果category不包含"/",直接作为category1_name + doc['category1_name'] = category.strip() if pd.notna(spu_row.get('category')): + # 确保category相关字段都被设置(如果前面没有设置) category_name = str(spu_row['category']) - doc['category_name_zh'] = category_name - doc['category_name_en'] = None - doc['category_name'] = category_name + if 'category_name_zh' not in doc: + doc['category_name_zh'] = category_name + if 'category_name_en' not in doc: + doc['category_name_en'] = None + if 'category_name' not in doc: + doc['category_name'] = category_name if pd.notna(spu_row.get('category_id')): doc['category_id'] = str(int(spu_row['category_id'])) @@ -459,7 +482,7 @@ class SPUTransformer: sku_data['option2_value'] = str(sku_row['option2']) if pd.notna(sku_row.get('option3')): sku_data['option3_value'] = str(sku_row['option3']) - + # Image src if pd.notna(sku_row.get('image_src')): sku_data['image_src'] = str(sku_row['image_src']) diff --git a/scripts/check_data_source.py b/scripts/check_data_source.py new file mode 100755 index 0000000..58fe105 --- /dev/null +++ b/scripts/check_data_source.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +""" +诊断脚本:检查MySQL数据源中分类和规格信息是否正确 + +检查: +1. category_path 字段是否有值 +2. category_path 格式是否正确(应该能被解析为 category1_name) +3. shoplazza_product_option 表的 name 字段是否有值(应该是 "color", "size", "material") +4. shoplazza_product_sku 表的 option1/2/3 字段是否有值 +""" + +import sys +import argparse +from pathlib import Path +from sqlalchemy import create_engine, text + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.db_connector import create_db_connection + + +def check_category_path(db_engine, tenant_id: str): + """检查 category_path 和 category 字段""" + print("\n" + "="*60) + print("1. 检查 category_path 和 category 字段") + print("="*60) + + query = text(""" + SELECT + COUNT(*) as total, + COUNT(category_path) as has_category_path, + COUNT(*) - COUNT(category_path) as null_category_path, + COUNT(category) as has_category, + COUNT(*) - COUNT(category) as null_category + FROM shoplazza_product_spu + WHERE tenant_id = :tenant_id AND deleted = 0 + """) + + with db_engine.connect() as conn: + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() + total = result[0] + has_category_path = result[1] + null_category_path = result[2] + has_category = result[3] + null_category = result[4] + + print(f"总SPU数: {total}") + print(f"有 category_path 的SPU: {has_category_path}") + print(f"category_path 为空的SPU: {null_category_path}") + print(f"有 category 的SPU: {has_category}") + print(f"category 为空的SPU: {null_category}") + + # 查看category字段的示例 + if has_category > 0: + sample_query = text(""" + SELECT id, title, category_path, category, category_id, category_level + FROM shoplazza_product_spu + WHERE tenant_id = :tenant_id + AND deleted = 0 + AND category IS NOT NULL + LIMIT 5 + """) + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() + print(f"\n示例数据(前5条有 category 的记录):") + for row in samples: + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}") + print(f" category_path: {row[2]}") + print(f" category: '{row[3]}'") + print(f" category_id: {row[4]}, category_level: {row[5]}") + + # 解析 category 字段(用于生成 category1_name) + if row[3]: + category = str(row[3]) + if '/' in category: + path_parts = category.split('/') + print(f" 解析后(按'/'分割): {path_parts}") + if len(path_parts) > 0: + print(f" → category1_name: '{path_parts[0].strip()}'") + else: + print(f" → category1_name: '{category.strip()}'(直接作为category1_name)") + else: + print("\n⚠️ 警告: 没有SPU有 category 值!") + + # 查看category_path的示例(如果有) + if has_category_path > 0: + sample_query = text(""" + SELECT id, title, category_path, category + FROM shoplazza_product_spu + WHERE tenant_id = :tenant_id + AND deleted = 0 + AND category_path IS NOT NULL + LIMIT 3 + """) + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() + print(f"\n示例数据(有 category_path 的记录):") + for row in samples: + print(f" SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}") + print(f" category_path: '{row[2]}'") + print(f" category: '{row[3]}'") + + # 检查是否是ID列表格式 + if row[2] and ',' in str(row[2]) and not '/' in str(row[2]): + print(f" ⚠️ 注意: category_path是ID列表格式(逗号分隔),不是路径格式") + + +def check_options(db_engine, tenant_id: str): + """检查 option 表的 name 字段""" + print("\n" + "="*60) + print("2. 检查 shoplazza_product_option 表的 name 字段") + print("="*60) + + query = text(""" + SELECT + COUNT(*) as total_options, + COUNT(DISTINCT name) as distinct_names, + COUNT(DISTINCT spu_id) as spus_with_options + FROM shoplazza_product_option + WHERE tenant_id = :tenant_id AND deleted = 0 + """) + + with db_engine.connect() as conn: + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() + total_options = result[0] + distinct_names = result[1] + spus_with_options = result[2] + + print(f"总 option 记录数: {total_options}") + print(f"不同的 name 数量: {distinct_names}") + print(f"有 option 定义的 SPU 数量: {spus_with_options}") + + if total_options > 0: + # 查看不同的 name 值 + name_query = text(""" + SELECT DISTINCT name, position, COUNT(*) as count + FROM shoplazza_product_option + WHERE tenant_id = :tenant_id AND deleted = 0 + GROUP BY name, position + ORDER BY position, name + """) + names = conn.execute(name_query, {"tenant_id": tenant_id}).fetchall() + print(f"\n不同的 name 值:") + for row in names: + print(f" position={row[1]}, name='{row[0]}', count={row[2]}") + + # 查看一些示例 + sample_query = text(""" + SELECT spu_id, position, name, `values` + FROM shoplazza_product_option + WHERE tenant_id = :tenant_id AND deleted = 0 + ORDER BY spu_id, position + LIMIT 10 + """) + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() + print(f"\n示例数据(前10条 option 记录):") + for row in samples: + print(f" SPU ID: {row[0]}, position: {row[1]}, name: '{row[2]}', values: {row[3]}") + else: + print("\n⚠️ 警告: 没有 option 记录!") + + +def check_sku_options(db_engine, tenant_id: str): + """检查 SKU 表的 option1/2/3 字段""" + print("\n" + "="*60) + print("3. 检查 shoplazza_product_sku 表的 option1/2/3 字段") + print("="*60) + + query = text(""" + SELECT + COUNT(*) as total_skus, + COUNT(option1) as has_option1, + COUNT(option2) as has_option2, + COUNT(option3) as has_option3, + COUNT(DISTINCT spu_id) as distinct_spus + FROM shoplazza_product_sku + WHERE tenant_id = :tenant_id AND deleted = 0 + """) + + with db_engine.connect() as conn: + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() + total_skus = result[0] + has_option1 = result[1] + has_option2 = result[2] + has_option3 = result[3] + distinct_spus = result[4] + + print(f"总 SKU 数: {total_skus}") + print(f"有 option1 的 SKU: {has_option1}") + print(f"有 option2 的 SKU: {has_option2}") + print(f"有 option3 的 SKU: {has_option3}") + print(f"不同的 SPU 数量: {distinct_spus}") + + if total_skus > 0: + # 查看一些示例 + sample_query = text(""" + SELECT spu_id, id, option1, option2, option3 + FROM shoplazza_product_sku + WHERE tenant_id = :tenant_id AND deleted = 0 + ORDER BY spu_id, id + LIMIT 10 + """) + samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall() + print(f"\n示例数据(前10条 SKU 记录):") + for row in samples: + print(f" SPU ID: {row[0]}, SKU ID: {row[1]}") + print(f" option1: '{row[2]}', option2: '{row[3]}', option3: '{row[4]}'") + else: + print("\n⚠️ 警告: 没有 SKU 记录!") + + +def check_spu_summary(db_engine, tenant_id: str): + """检查 SPU 汇总信息""" + print("\n" + "="*60) + print("4. SPU 汇总信息") + print("="*60) + + query = text(""" + SELECT + COUNT(DISTINCT spu.id) as total_spus, + COUNT(DISTINCT sku.id) as total_skus, + COUNT(DISTINCT opt.id) as total_options, + COUNT(DISTINCT CASE WHEN spu.category_path IS NOT NULL THEN spu.id END) as spus_with_category_path, + COUNT(DISTINCT opt.spu_id) as spus_with_options + FROM shoplazza_product_spu spu + LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id AND sku.tenant_id = :tenant_id AND sku.deleted = 0 + LEFT JOIN shoplazza_product_option opt ON spu.id = opt.spu_id AND opt.tenant_id = :tenant_id AND opt.deleted = 0 + WHERE spu.tenant_id = :tenant_id AND spu.deleted = 0 + """) + + with db_engine.connect() as conn: + result = conn.execute(query, {"tenant_id": tenant_id}).fetchone() + total_spus = result[0] + total_skus = result[1] + total_options = result[2] + spus_with_category_path = result[3] + spus_with_options = result[4] + + print(f"总 SPU 数: {total_spus}") + print(f"总 SKU 数: {total_skus}") + print(f"总 option 记录数: {total_options}") + print(f"有 category_path 的 SPU: {spus_with_category_path}") + print(f"有 option 定义的 SPU: {spus_with_options}") + + +def main(): + parser = argparse.ArgumentParser(description='检查MySQL数据源中的分类和规格信息') + parser.add_argument('--tenant-id', required=True, help='Tenant ID') + parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)') + parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)') + parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)') + parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)') + parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)') + + args = parser.parse_args() + + # 连接数据库 + import os + db_host = args.db_host or os.environ.get('DB_HOST') + db_port = args.db_port or int(os.environ.get('DB_PORT', 3306)) + db_database = args.db_database or os.environ.get('DB_DATABASE') + db_username = args.db_username or os.environ.get('DB_USERNAME') + db_password = args.db_password or os.environ.get('DB_PASSWORD') + + if not all([db_host, db_database, db_username, db_password]): + print("错误: MySQL连接参数不完整") + print("请提供 --db-host, --db-database, --db-username, --db-password") + print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD") + return 1 + + print(f"连接MySQL: {db_host}:{db_port}/{db_database}") + print(f"Tenant ID: {args.tenant_id}") + + try: + db_engine = create_db_connection( + host=db_host, + port=db_port, + database=db_database, + username=db_username, + password=db_password + ) + print("✓ MySQL连接成功\n") + except Exception as e: + print(f"✗ 连接MySQL失败: {e}") + return 1 + + # 执行检查 + check_spu_summary(db_engine, args.tenant_id) + check_category_path(db_engine, args.tenant_id) + check_options(db_engine, args.tenant_id) + check_sku_options(db_engine, args.tenant_id) + + print("\n" + "="*60) + print("检查完成") + print("="*60) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/scripts/check_es_data.py b/scripts/check_es_data.py new file mode 100755 index 0000000..ef9c735 --- /dev/null +++ b/scripts/check_es_data.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +""" +检查ES索引中的实际数据,看分面字段是否有值 +""" + +import sys +import os +import argparse +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.es_client import ESClient + + +def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): + """检查ES中的分面相关字段""" + print("\n" + "="*60) + print("检查ES索引中的分面字段数据") + print("="*60) + + query = { + "query": { + "term": { + "tenant_id": tenant_id + } + }, + "size": size, + "_source": [ + "spu_id", + "title_zh", + "category1_name", + "category2_name", + "category3_name", + "category_name", + "category_path_zh", + "specifications", + "option1_name", + "option2_name", + "option3_name" + ] + } + + try: + response = es_client.client.search(index="search_products", body=query) + hits = response.get('hits', {}).get('hits', []) + total = response.get('hits', {}).get('total', {}).get('value', 0) + + print(f"\n总文档数: {total}") + print(f"检查前 {len(hits)} 个文档:\n") + + for i, hit in enumerate(hits, 1): + source = hit.get('_source', {}) + print(f"文档 {i}:") + print(f" spu_id: {source.get('spu_id')}") + print(f" title_zh: {source.get('title_zh', '')[:50]}") + print(f" category1_name: {source.get('category1_name')}") + print(f" category2_name: {source.get('category2_name')}") + print(f" category3_name: {source.get('category3_name')}") + print(f" category_name: {source.get('category_name')}") + print(f" category_path_zh: {source.get('category_path_zh')}") + print(f" option1_name: {source.get('option1_name')}") + print(f" option2_name: {source.get('option2_name')}") + print(f" option3_name: {source.get('option3_name')}") + + specs = source.get('specifications', []) + if specs: + print(f" specifications 数量: {len(specs)}") + # 显示前3个specifications + for spec in specs[:3]: + print(f" - name: {spec.get('name')}, value: {spec.get('value')}") + else: + print(f" specifications: 空") + print() + + except Exception as e: + print(f"错误: {e}") + import traceback + traceback.print_exc() + + +def check_facet_aggregations(es_client, tenant_id: str): + """检查分面聚合查询""" + print("\n" + "="*60) + print("检查分面聚合查询结果") + print("="*60) + + query = { + "query": { + "term": { + "tenant_id": tenant_id + } + }, + "size": 0, + "aggs": { + "category1_facet": { + "terms": { + "field": "category1_name", + "size": 10 + } + }, + "color_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "color" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 10 + } + } + } + } + } + }, + "size_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "size" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 10 + } + } + } + } + } + }, + "material_facet": { + "nested": { + "path": "specifications" + }, + "aggs": { + "filter_by_name": { + "filter": { + "term": { + "specifications.name": "material" + } + }, + "aggs": { + "value_counts": { + "terms": { + "field": "specifications.value", + "size": 10 + } + } + } + } + } + } + } + } + + try: + response = es_client.client.search(index="search_products", body=query) + aggs = response.get('aggregations', {}) + + print("\n1. category1_name 分面:") + category1 = aggs.get('category1_facet', {}) + buckets = category1.get('buckets', []) + if buckets: + for bucket in buckets: + print(f" {bucket['key']}: {bucket['doc_count']}") + else: + print(" 空(没有数据)") + + print("\n2. specifications.color 分面:") + color_agg = aggs.get('color_facet', {}) + color_filter = color_agg.get('filter_by_name', {}) + color_values = color_filter.get('value_counts', {}) + color_buckets = color_values.get('buckets', []) + if color_buckets: + for bucket in color_buckets: + print(f" {bucket['key']}: {bucket['doc_count']}") + else: + print(" 空(没有数据)") + + print("\n3. specifications.size 分面:") + size_agg = aggs.get('size_facet', {}) + size_filter = size_agg.get('filter_by_name', {}) + size_values = size_filter.get('value_counts', {}) + size_buckets = size_values.get('buckets', []) + if size_buckets: + for bucket in size_buckets: + print(f" {bucket['key']}: {bucket['doc_count']}") + else: + print(" 空(没有数据)") + + print("\n4. specifications.material 分面:") + material_agg = aggs.get('material_facet', {}) + material_filter = material_agg.get('filter_by_name', {}) + material_values = material_filter.get('value_counts', {}) + material_buckets = material_values.get('buckets', []) + if material_buckets: + for bucket in material_buckets: + print(f" {bucket['key']}: {bucket['doc_count']}") + else: + print(" 空(没有数据)") + + except Exception as e: + print(f"错误: {e}") + import traceback + traceback.print_exc() + + +def main(): + parser = argparse.ArgumentParser(description='检查ES索引中的分面字段数据') + parser.add_argument('--tenant-id', required=True, help='Tenant ID') + parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)') + parser.add_argument('--size', type=int, default=5, help='检查的文档数量 (默认: 5)') + + args = parser.parse_args() + + # 连接ES + es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200') + es_username = os.environ.get('ES_USERNAME') + es_password = os.environ.get('ES_PASSWORD') + + print(f"连接Elasticsearch: {es_host}") + print(f"Tenant ID: {args.tenant_id}\n") + + try: + if es_username and es_password: + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) + else: + es_client = ESClient(hosts=[es_host]) + + if not es_client.ping(): + print(f"✗ 无法连接到Elasticsearch: {es_host}") + return 1 + print("✓ Elasticsearch连接成功\n") + except Exception as e: + print(f"✗ 连接Elasticsearch失败: {e}") + return 1 + + # 检查ES数据 + check_es_facet_fields(es_client, args.tenant_id, args.size) + check_facet_aggregations(es_client, args.tenant_id) + + print("\n" + "="*60) + print("检查完成") + print("="*60) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/scripts/test_facet_api.py b/scripts/test_facet_api.py new file mode 100755 index 0000000..e6c7f55 --- /dev/null +++ b/scripts/test_facet_api.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +测试脚本:模拟前端请求,检查后端返回的分面结果 +""" + +import sys +import json +import requests +import argparse +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +def main(): + parser = argparse.ArgumentParser(description='测试分面API') + parser.add_argument('--api-url', type=str, default='http://localhost:6002/search/', help='API URL') + parser.add_argument('--tenant-id', type=str, required=True, help='Tenant ID') + args = parser.parse_args() + + # 模拟前端的分面请求(与frontend/static/js/app.js一致) + request_data = { + "query": "", # 空查询,获取所有数据 + "size": 10, + "from": 0, + "facets": [ + "category1_name", + "specifications.color", + "specifications.size", + "specifications.material" + ] + } + + headers = { + "Content-Type": "application/json", + "X-Tenant-ID": args.tenant_id + } + + try: + print(f"发送请求到: {args.api_url}") + print(f"Tenant ID: {args.tenant_id}") + print(f"请求数据:") + print(json.dumps(request_data, indent=2, ensure_ascii=False)) + print("\n" + "="*60) + + response = requests.post(args.api_url, json=request_data, headers=headers, timeout=30) + + if response.status_code != 200: + print(f"API错误: {response.status_code}") + print(response.text) + return 1 + + data = response.json() + + print("API响应:") + print(f" 总结果数: {data.get('total', 0)}") + print(f" 返回结果数: {len(data.get('results', []))}") + + facets = data.get('facets', []) + print(f"\n分面数量: {len(facets)}") + + if not facets: + print("\n⚠ 分面列表为空!") + return 1 + + print("\n" + "="*60) + print("分面详情:") + print("="*60) + + for i, facet in enumerate(facets, 1): + print(f"\n{i}. {facet.get('field')}") + print(f" 标签: {facet.get('label')}") + print(f" 类型: {facet.get('type')}") + print(f" 值数量: {len(facet.get('values', []))}") + print(f" 总计数: {facet.get('total_count', 0)}") + + values = facet.get('values', []) + if values: + print(f" 前5个值:") + for v in values[:5]: + print(f" - {v.get('value')}: {v.get('count')}") + else: + print(f" ⚠ 值列表为空!") + + # 检查specifications.color分面 + print("\n" + "="*60) + print("检查specifications.color分面:") + print("="*60) + + color_facet = None + for facet in facets: + if facet.get('field') == 'specifications.color': + color_facet = facet + break + + if color_facet: + print("✓ 找到specifications.color分面") + print(f" 值数量: {len(color_facet.get('values', []))}") + if color_facet.get('values'): + print(" 前10个值:") + for v in color_facet.get('values', [])[:10]: + print(f" {v.get('value')}: {v.get('count')}") + else: + print(" ⚠ 值列表为空!") + else: + print("✗ 未找到specifications.color分面") + print(f" 可用分面字段: {[f.get('field') for f in facets]}") + + # 输出完整JSON(便于调试) + print("\n" + "="*60) + print("完整分面JSON(前500字符):") + print("="*60) + facets_json = json.dumps(facets, indent=2, ensure_ascii=False) + print(facets_json[:500]) + + except requests.exceptions.ConnectionError as e: + print(f"\n连接错误: 无法连接到API服务器 {args.api_url}") + print("请确保后端服务正在运行") + return 1 + except Exception as e: + print(f"\n错误: {e}") + import traceback + traceback.print_exc() + return 1 + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + -- libgit2 0.21.2