Commit 2e48a32da4decd6f951be72a91cdb60e959cedcf
1 parent
cd6d887e
doc
Showing
7 changed files
with
40 additions
and
345 deletions
Show diff stats
docs/MySQL到ES字段映射说明-业务版.md
| @@ -52,7 +52,7 @@ | @@ -52,7 +52,7 @@ | ||
| 52 | 52 | ||
| 53 | | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | | 53 | | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | |
| 54 | |---------|-----------|----------|----------| | 54 | |---------|-----------|----------|----------| |
| 55 | -| `image_url` | SPU 表 | `image_src` | 直接使用,如果 URL 不以 `http` 开头则添加 `//` 前缀 | | 55 | +| `image_url` | SPU 表 | `image_src` | | |
| 56 | 56 | ||
| 57 | ### 2.5 销量字段 | 57 | ### 2.5 销量字段 |
| 58 | 58 |
docs/索引字段说明v2-mapping结构.md deleted
| @@ -1,231 +0,0 @@ | @@ -1,231 +0,0 @@ | ||
| 1 | -{ | ||
| 2 | - "mappings": { | ||
| 3 | - "properties": { | ||
| 4 | - "tenant_id": { | ||
| 5 | - "type": "keyword" | ||
| 6 | - }, | ||
| 7 | - "spu_id": { | ||
| 8 | - "type": "keyword" | ||
| 9 | - }, | ||
| 10 | - | ||
| 11 | - "create_time": { | ||
| 12 | - "type": "date" | ||
| 13 | - }, | ||
| 14 | - "update_time": { | ||
| 15 | - "type": "date" | ||
| 16 | - }, | ||
| 17 | - | ||
| 18 | - // 文本相关性相关字段 | ||
| 19 | - "title.zh": { | ||
| 20 | - "type": "text", | ||
| 21 | - "analyzer": "hanlp_index", | ||
| 22 | - "search_analyzer": "hanlp_standard" | ||
| 23 | - }, | ||
| 24 | - "brief.zh": { | ||
| 25 | - "type": "text", | ||
| 26 | - "analyzer": "hanlp_index", | ||
| 27 | - "search_analyzer": "hanlp_standard" | ||
| 28 | - }, | ||
| 29 | - "description.zh": { | ||
| 30 | - "type": "text", | ||
| 31 | - "analyzer": "hanlp_index", | ||
| 32 | - "search_analyzer": "hanlp_standard" | ||
| 33 | - }, | ||
| 34 | - "vendor.zh": { | ||
| 35 | - "type": "text", | ||
| 36 | - "analyzer": "hanlp_index", | ||
| 37 | - "search_analyzer": "hanlp_standard", | ||
| 38 | - "fields": { | ||
| 39 | - "keyword": { | ||
| 40 | - "type": "keyword", | ||
| 41 | - "normalizer": "lowercase" | ||
| 42 | - } | ||
| 43 | - } | ||
| 44 | - }, | ||
| 45 | - | ||
| 46 | - "title.en": { | ||
| 47 | - "type": "text", | ||
| 48 | - "analyzer": "english", | ||
| 49 | - "search_analyzer": "english", | ||
| 50 | - }, | ||
| 51 | - "brief.en": { | ||
| 52 | - "type": "text", | ||
| 53 | - "analyzer": "english", | ||
| 54 | - "search_analyzer": "english", | ||
| 55 | - | ||
| 56 | - }, | ||
| 57 | - "description.en": { | ||
| 58 | - "type": "text", | ||
| 59 | - "analyzer": "english", | ||
| 60 | - "search_analyzer": "english", | ||
| 61 | - }, | ||
| 62 | - "vendor.en": { | ||
| 63 | - "type": "text", | ||
| 64 | - "analyzer": "english", | ||
| 65 | - "search_analyzer": "english", | ||
| 66 | - "fields": { | ||
| 67 | - "keyword": { | ||
| 68 | - "type": "keyword", | ||
| 69 | - "normalizer": "lowercase" | ||
| 70 | - } | ||
| 71 | - } | ||
| 72 | - }, | ||
| 73 | - | ||
| 74 | - "tags": { | ||
| 75 | - "type": "keyword", | ||
| 76 | - }, | ||
| 77 | - | ||
| 78 | - "image_url": { | ||
| 79 | - "type": "keyword", | ||
| 80 | - "index": false | ||
| 81 | - }, | ||
| 82 | - | ||
| 83 | - // 语义向量 | ||
| 84 | - "title_embedding": { | ||
| 85 | - "type": "dense_vector", | ||
| 86 | - "dims": 1024, | ||
| 87 | - "index": true, | ||
| 88 | - "similarity": "dot_product" | ||
| 89 | - }, | ||
| 90 | - "image_embedding": { | ||
| 91 | - "type": "nested", | ||
| 92 | - "properties": { | ||
| 93 | - "vector": { | ||
| 94 | - "type": "dense_vector", | ||
| 95 | - "dims": 1024, | ||
| 96 | - "index": true, | ||
| 97 | - "similarity": "dot_product" | ||
| 98 | - }, | ||
| 99 | - "url": { | ||
| 100 | - "type": "text" | ||
| 101 | - } | ||
| 102 | - } | ||
| 103 | - }, | ||
| 104 | - | ||
| 105 | - // 分类相关 | ||
| 106 | - "category_path.zh": { // 提供模糊查询功能,辅助相关性计算 | ||
| 107 | - "type": "text", | ||
| 108 | - "analyzer": "hanlp_index", | ||
| 109 | - "search_analyzer": "hanlp_standard" | ||
| 110 | - }, | ||
| 111 | - "category_path.en": { // 提供模糊查询功能,辅助相关性计算 | ||
| 112 | - "type": "text", | ||
| 113 | - "analyzer": "english", | ||
| 114 | - "search_analyzer": "english" | ||
| 115 | - }, | ||
| 116 | - "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算 | ||
| 117 | - "type": "text", | ||
| 118 | - "analyzer": "hanlp_index", | ||
| 119 | - "search_analyzer": "hanlp_standard" | ||
| 120 | - }, | ||
| 121 | - "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算 | ||
| 122 | - "type": "text", | ||
| 123 | - "analyzer": "english", | ||
| 124 | - "search_analyzer": "english" | ||
| 125 | - }, | ||
| 126 | - | ||
| 127 | - "category_id": { | ||
| 128 | - "type": "keyword" | ||
| 129 | - }, | ||
| 130 | - "category_name": { | ||
| 131 | - "type": "keyword" | ||
| 132 | - }, | ||
| 133 | - "category_level": { | ||
| 134 | - "type": "integer" | ||
| 135 | - }, | ||
| 136 | - "category1_name": { // 不同层级下 可能有同名的情况,因此提供一二三级分开的查询方式 | ||
| 137 | - "type": "keyword" | ||
| 138 | - }, | ||
| 139 | - "category2_name": { | ||
| 140 | - "type": "keyword" | ||
| 141 | - }, | ||
| 142 | - "category3_name": { | ||
| 143 | - "type": "keyword" | ||
| 144 | - }, | ||
| 145 | - | ||
| 146 | - // sku款式、子sku属性 | ||
| 147 | - "specifications": { | ||
| 148 | - "type": "nested", | ||
| 149 | - "properties": { | ||
| 150 | - "sku_id": { "type": "keyword" }, | ||
| 151 | - "name": { "type": "keyword" }, // "颜色", "容量" | ||
| 152 | - "value": { "type": "keyword" } // "白色", "256GB" | ||
| 153 | - } | ||
| 154 | - }, | ||
| 155 | - | ||
| 156 | - "option1_name": { | ||
| 157 | - "type": "keyword" | ||
| 158 | - }, | ||
| 159 | - "option2_name": { | ||
| 160 | - "type": "keyword" | ||
| 161 | - }, | ||
| 162 | - "option3_name": { | ||
| 163 | - "type": "keyword" | ||
| 164 | - }, | ||
| 165 | - | ||
| 166 | - "min_price": { | ||
| 167 | - "type": "float" | ||
| 168 | - }, | ||
| 169 | - "max_price": { | ||
| 170 | - "type": "float" | ||
| 171 | - }, | ||
| 172 | - "compare_at_price": { | ||
| 173 | - "type": "float" | ||
| 174 | - }, | ||
| 175 | - "sku_prices": { | ||
| 176 | - "type": "float" | ||
| 177 | - }, | ||
| 178 | - "sku_weights": { | ||
| 179 | - "type": "long" | ||
| 180 | - }, | ||
| 181 | - "sku_weight_units": { | ||
| 182 | - "type": "keyword" | ||
| 183 | - }, | ||
| 184 | - "total_inventory": { | ||
| 185 | - "type": "long" | ||
| 186 | - }, | ||
| 187 | - | ||
| 188 | - "skus": { | ||
| 189 | - "type": "nested", | ||
| 190 | - "properties": { | ||
| 191 | - "sku_id": { | ||
| 192 | - "type": "keyword" | ||
| 193 | - }, | ||
| 194 | - "price": { | ||
| 195 | - "type": "float" | ||
| 196 | - }, | ||
| 197 | - "compare_at_price": { | ||
| 198 | - "type": "float" | ||
| 199 | - }, | ||
| 200 | - "sku_code": { | ||
| 201 | - "type": "keyword" | ||
| 202 | - }, | ||
| 203 | - "stock": { | ||
| 204 | - "type": "long" | ||
| 205 | - }, | ||
| 206 | - "weight": { | ||
| 207 | - "type": "float" | ||
| 208 | - }, | ||
| 209 | - "weight_unit": { | ||
| 210 | - "type": "keyword" | ||
| 211 | - }, | ||
| 212 | - "option1_value": { | ||
| 213 | - "type": "keyword" | ||
| 214 | - }, | ||
| 215 | - "option2_value": { | ||
| 216 | - "type": "keyword" | ||
| 217 | - }, | ||
| 218 | - "option3_value": { | ||
| 219 | - "type": "keyword" | ||
| 220 | - }, | ||
| 221 | - "image_src": { | ||
| 222 | - "type": "keyword", | ||
| 223 | - "index": false | ||
| 224 | - } | ||
| 225 | - } | ||
| 226 | - } | ||
| 227 | - } | ||
| 228 | - } | ||
| 229 | -} | ||
| 230 | - | ||
| 231 | - |
docs/索引字段说明v2-参考表结构.md
| @@ -1,99 +0,0 @@ | @@ -1,99 +0,0 @@ | ||
| 1 | - | ||
| 2 | - | ||
| 3 | -类目表 product_category | ||
| 4 | -id bigint(20) NO PRI auto_increment | ||
| 5 | -parent_id bigint(20) NO | ||
| 6 | -name varchar(255) NO | ||
| 7 | -pic_url varchar(255) NO | ||
| 8 | -sort int(11) YES 0 | ||
| 9 | -status tinyint(4) NO | ||
| 10 | -creator varchar(64) YES | ||
| 11 | -create_time datetime NO CURRENT_TIMESTAMP | ||
| 12 | -updater varchar(64) YES | ||
| 13 | -update_time datetime NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP | ||
| 14 | -deleted bit(1) NO b'0' | ||
| 15 | -tenant_id bigint(20) NO 0 | ||
| 16 | - | ||
| 17 | - | ||
| 18 | - | ||
| 19 | -spu表 shoplazza_product_spu 全部字段 | ||
| 20 | -"Field" "Type" "Null" "Key" "Default" "Extra" | ||
| 21 | -"id" "bigint(20)" "NO" "PRI" "auto_increment" | ||
| 22 | -"shop_id" "bigint(20)" "NO" "MUL" "" | ||
| 23 | -"shoplazza_id" "varchar(64)" "NO" "" "" | ||
| 24 | -"handle" "varchar(255)" "YES" "MUL" "" | ||
| 25 | -"title" "varchar(500)" "NO" "" "" | ||
| 26 | -"brief" "varchar(1000)" "YES" "" "" | ||
| 27 | -"description" "text" "YES" "" "" | ||
| 28 | -"spu" "varchar(100)" "YES" "" "" | ||
| 29 | -"vendor" "varchar(255)" "YES" "" "" | ||
| 30 | -"vendor_url" "varchar(500)" "YES" "" "" | ||
| 31 | -"seo_title" "varchar(500)" "YES" "" "" | ||
| 32 | -"seo_description" "text" "YES" "" "" | ||
| 33 | -"seo_keywords" "text" "YES" "" "" | ||
| 34 | -"image_src" "varchar(500)" "YES" "" "" | ||
| 35 | -"image_width" "int(11)" "YES" "" "" | ||
| 36 | -"image_height" "int(11)" "YES" "" "" | ||
| 37 | -"image_path" "varchar(255)" "YES" "" "" | ||
| 38 | -"image_alt" "varchar(500)" "YES" "" "" | ||
| 39 | -"inventory_policy" "varchar(50)" "YES" "" "" | ||
| 40 | -"inventory_quantity" "int(11)" "YES" "" "0" "" | ||
| 41 | -"inventory_tracking" "tinyint(1)" "YES" "" "0" "" | ||
| 42 | -"published" "tinyint(1)" "YES" "" "0" "" | ||
| 43 | -"published_at" "datetime" "YES" "MUL" "" | ||
| 44 | -"requires_shipping" "tinyint(1)" "YES" "" "1" "" | ||
| 45 | -"taxable" "tinyint(1)" "YES" "" "0" "" | ||
| 46 | -"fake_sales" "int(11)" "YES" "" "0" "" | ||
| 47 | -"display_fake_sales" "tinyint(1)" "YES" "" "0" "" | ||
| 48 | -"mixed_wholesale" "tinyint(1)" "YES" "" "0" "" | ||
| 49 | -"need_variant_image" "tinyint(1)" "YES" "" "0" "" | ||
| 50 | -"has_only_default_variant" "tinyint(1)" "YES" "" "0" "" | ||
| 51 | -"tags" "text" "YES" "" "" | ||
| 52 | -"note" "text" "YES" "" "" | ||
| 53 | -"category" "varchar(255)" "YES" "" "" | ||
| 54 | -"category_id" "bigint(20)" "YES" "" "" | ||
| 55 | -"category_google_id" "bigint(20)" "YES" "" "" | ||
| 56 | -"category_level" "int(11)" "YES" "" "" | ||
| 57 | -"category_path" "varchar(500)" "YES" "" "" | ||
| 58 | -"shoplazza_created_at" "datetime" "YES" "" "" | ||
| 59 | -"shoplazza_updated_at" "datetime" "YES" "MUL" "" | ||
| 60 | -"tenant_id" "bigint(20)" "NO" "MUL" "" | ||
| 61 | -"creator" "varchar(64)" "YES" "" "" "" | ||
| 62 | -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" | ||
| 63 | -"updater" "varchar(64)" "YES" "" "" "" | ||
| 64 | -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" | ||
| 65 | -"deleted" "bit(1)" "NO" "" "b'0'" "" | ||
| 66 | - | ||
| 67 | -shoplazza_product_sku 全部字段 | ||
| 68 | -"Field" "Type" "Null" "Key" "Default" "Extra" | ||
| 69 | -"id" "bigint(20)" "NO" "PRI" "auto_increment" | ||
| 70 | -"spu_id" "bigint(20)" "NO" "MUL" "" | ||
| 71 | -"shop_id" "bigint(20)" "NO" "MUL" "" | ||
| 72 | -"shoplazza_id" "varchar(64)" "NO" "" "" | ||
| 73 | -"shoplazza_product_id" "varchar(64)" "NO" "MUL" "" | ||
| 74 | -"shoplazza_image_id" "varchar(64)" "YES" "" "" | ||
| 75 | -"title" "varchar(500)" "YES" "" "" | ||
| 76 | -"sku" "varchar(100)" "YES" "MUL" "" | ||
| 77 | -"barcode" "varchar(100)" "YES" "" "" | ||
| 78 | -"position" "int(11)" "YES" "" "0" "" | ||
| 79 | -"price" "decimal(10,2)" "YES" "" "" | ||
| 80 | -"compare_at_price" "decimal(10,2)" "YES" "" "" | ||
| 81 | -"cost_price" "decimal(10,2)" "YES" "" "" | ||
| 82 | -"option1" "varchar(255)" "YES" "" "" | ||
| 83 | -"option2" "varchar(255)" "YES" "" "" | ||
| 84 | -"option3" "varchar(255)" "YES" "" "" | ||
| 85 | -"inventory_quantity" "int(11)" "YES" "" "0" "" | ||
| 86 | -"weight" "decimal(10,2)" "YES" "" "" | ||
| 87 | -"weight_unit" "varchar(10)" "YES" "" "" | ||
| 88 | -"image_src" "varchar(500)" "YES" "" "" | ||
| 89 | -"wholesale_price" "json" "YES" "" "" | ||
| 90 | -"note" "text" "YES" "" "" | ||
| 91 | -"extend" "json" "YES" "" "" | ||
| 92 | -"shoplazza_created_at" "datetime" "YES" "" "" | ||
| 93 | -"shoplazza_updated_at" "datetime" "YES" "" "" | ||
| 94 | -"tenant_id" "bigint(20)" "NO" "MUL" "" | ||
| 95 | -"creator" "varchar(64)" "YES" "" "" "" | ||
| 96 | -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" | ||
| 97 | -"updater" "varchar(64)" "YES" "" "" "" | ||
| 98 | -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" | ||
| 99 | -"deleted" "bit(1)" "NO" "" "b'0'" "" |
docs/索引字段说明v2.md
| 1 | -# 索引字段说明 v2 | 1 | +# 索引字段说明 |
| 2 | 2 | ||
| 3 | -本文档详细说明 `search_products` 索引的字段结构、类型、数据来源和用途。 | 3 | +本文档详细说明商品搜索所用 Elasticsearch 索引的字段结构、类型、数据来源和用途。 |
| 4 | 4 | ||
| 5 | ## 索引概述 | 5 | ## 索引概述 |
| 6 | 6 | ||
| 7 | -- **索引名称**: `search_products` | 7 | +- **索引命名**: 按租户独立索引,名称为 **`search_products_tenant_{tenant_id}`**(例如 `search_products_tenant_162`)。由代码中 `get_tenant_index_name(tenant_id)` 生成。 |
| 8 | - **索引维度**: SPU(Standard Product Unit)级别 | 8 | - **索引维度**: SPU(Standard Product Unit)级别 |
| 9 | -- **多租户隔离**: 通过 `tenant_id` 字段实现 | ||
| 10 | -- **Mapping 文件**: `mappings/search_products.json` | 9 | +- **多租户隔离**: **索引层面隔离**——每个租户一个独立索引,查询与写入时直接使用该索引名,无需在查询条件中再按 `tenant_id` 过滤。 |
| 10 | +- **Mapping 模板**: `mappings/search_products.json`(新建每个租户索引时复用同一份 mapping) | ||
| 11 | + | ||
| 12 | +## ES 索引在代码中的使用 | ||
| 13 | + | ||
| 14 | +索引名由 **`indexer/mapping_generator.py`** 中的 `get_tenant_index_name(tenant_id)` 生成,格式为 `search_products_tenant_{tenant_id}`。 | ||
| 15 | + | ||
| 16 | +| 场景 | 位置 | 说明 | | ||
| 17 | +|------|------|------| | ||
| 18 | +| 搜索请求 | `search/searcher.py` | 构建 ES 查询时通过 `get_tenant_index_name(tenant_id)` 得到 `index_name`,调用 `es_client.search(index_name=..., body=...)`,不再在 query 中加 `tenant_id` 过滤。 | | ||
| 19 | +| 全量写入 | `indexer/bulk_indexing_service.py` | 按 `tenant_id` 调用 `get_tenant_index_name(tenant_id)` 作为写入目标索引,建索引与 bulk 写入均使用该名。 | | ||
| 20 | +| 增量写入 | `indexer/incremental_service.py` | 同上,索引名由 `get_tenant_index_name(tenant_id)` 得到。 | | ||
| 21 | +| 建索引 / 删索引 | `indexer/mapping_generator.py` | `create_index_if_not_exists`、`delete_index_if_exists` 等均按传入的 `index_name`(即租户索引名)操作。 | | ||
| 22 | + | ||
| 23 | +因此:每个租户对应一个独立 ES 索引,查询与写入都直接指定该索引名,实现索引级多租户隔离。 | ||
| 11 | 24 | ||
| 12 | ## 字段分类 | 25 | ## 字段分类 |
| 13 | 26 | ||
| @@ -15,7 +28,7 @@ | @@ -15,7 +28,7 @@ | ||
| 15 | 28 | ||
| 16 | | 字段名 | ES类型 | 说明 | 数据来源 | | 29 | | 字段名 | ES类型 | 说明 | 数据来源 | |
| 17 | |--------|--------|------|----------| | 30 | |--------|--------|------|----------| |
| 18 | -| `tenant_id` | keyword | 租户ID,用于多租户隔离 | MySQL: `shoplazza_product_spu.tenant_id` | | 31 | +| `tenant_id` | keyword | 租户ID(文档级标识,便于排查与统计;隔离已由索引名保证,查询无需再按此过滤) | MySQL: `shoplazza_product_spu.tenant_id` | |
| 19 | | `spu_id` | keyword | SPU唯一标识 | MySQL: `shoplazza_product_spu.id` | | 32 | | `spu_id` | keyword | SPU唯一标识 | MySQL: `shoplazza_product_spu.id` | |
| 20 | | `create_time` | date | 创建时间 | MySQL: `shoplazza_product_spu.created_at` | | 33 | | `create_time` | date | 创建时间 | MySQL: `shoplazza_product_spu.created_at` | |
| 21 | | `update_time` | date | 更新时间 | MySQL: `shoplazza_product_spu.updated_at` | | 34 | | `update_time` | date | 更新时间 | MySQL: `shoplazza_product_spu.updated_at` | |
| @@ -359,7 +372,7 @@ | @@ -359,7 +372,7 @@ | ||
| 359 | 372 | ||
| 360 | ### 过滤字段(精确匹配) | 373 | ### 过滤字段(精确匹配) |
| 361 | 374 | ||
| 362 | -- `tenant_id` (必需,多租户隔离) | 375 | +- `tenant_id`(文档中保留,便于排查与统计;**多租户隔离已由索引名保证,查询时无需再按 tenant_id 过滤**) |
| 363 | - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` | 376 | - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` |
| 364 | - `vendor.zh.keyword`, `vendor.en.keyword` | 377 | - `vendor.zh.keyword`, `vendor.en.keyword` |
| 365 | - `specifications` (嵌套查询) | 378 | - `specifications` (嵌套查询) |
| @@ -445,7 +458,7 @@ filters AND (text_recall OR embedding_recall) | @@ -445,7 +458,7 @@ filters AND (text_recall OR embedding_recall) | ||
| 445 | ## 注意事项 | 458 | ## 注意事项 |
| 446 | 459 | ||
| 447 | 1. **索引维度**: 所有数据以 SPU 为单位索引,SKU 信息作为嵌套字段存储 | 460 | 1. **索引维度**: 所有数据以 SPU 为单位索引,SKU 信息作为嵌套字段存储 |
| 448 | -2. **多租户隔离**: 所有查询必须包含 `tenant_id` 过滤条件 | 461 | +2. **多租户隔离**: 按租户使用独立索引(`search_products_tenant_{tenant_id}`),查询时通过 `get_tenant_index_name(tenant_id)` 指定索引名即可,无需在 query 中加 `tenant_id` 过滤 |
| 449 | 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择 | 462 | 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择 |
| 450 | 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合 | 463 | 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合 |
| 451 | 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端 | 464 | 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端 |
indexer/document_transformer.py
| @@ -2,6 +2,10 @@ | @@ -2,6 +2,10 @@ | ||
| 2 | SPU文档转换器 - 公共转换逻辑。 | 2 | SPU文档转换器 - 公共转换逻辑。 |
| 3 | 3 | ||
| 4 | 提取全量和增量索引共用的文档转换逻辑,避免代码冗余。 | 4 | 提取全量和增量索引共用的文档转换逻辑,避免代码冗余。 |
| 5 | +输出文档结构与 mappings/search_products.json 及 索引字段说明v2 一致, | ||
| 6 | +供 search/searcher 与 search/es_query_builder 使用。 | ||
| 7 | +- 多语言字段:title, brief, description, vendor, category_path, category_name_text | ||
| 8 | +- 嵌套:specifications, skus;向量:title_embedding(image_embedding 由 Java 索引或外部填充) | ||
| 5 | """ | 9 | """ |
| 6 | 10 | ||
| 7 | import pandas as pd | 11 | import pandas as pd |
| @@ -137,7 +141,13 @@ class SPUDocumentTransformer: | @@ -137,7 +141,13 @@ class SPUDocumentTransformer: | ||
| 137 | doc['min_price'] = 0.0 | 141 | doc['min_price'] = 0.0 |
| 138 | doc['max_price'] = 0.0 | 142 | doc['max_price'] = 0.0 |
| 139 | 143 | ||
| 140 | - if compare_prices: | 144 | + # 优先使用 SPU 级 compare_at_price(与索引字段说明v2一致),否则取 SKU 最大值 |
| 145 | + if pd.notna(spu_row.get('compare_at_price')): | ||
| 146 | + try: | ||
| 147 | + doc['compare_at_price'] = float(spu_row['compare_at_price']) | ||
| 148 | + except (ValueError, TypeError): | ||
| 149 | + doc['compare_at_price'] = float(max(compare_prices)) if compare_prices else None | ||
| 150 | + elif compare_prices: | ||
| 141 | doc['compare_at_price'] = float(max(compare_prices)) | 151 | doc['compare_at_price'] = float(max(compare_prices)) |
| 142 | else: | 152 | else: |
| 143 | doc['compare_at_price'] = None | 153 | doc['compare_at_price'] = None |
| @@ -290,6 +300,8 @@ class SPUDocumentTransformer: | @@ -290,6 +300,8 @@ class SPUDocumentTransformer: | ||
| 290 | if category_names: | 300 | if category_names: |
| 291 | category_path_str = '/'.join(category_names) | 301 | category_path_str = '/'.join(category_names) |
| 292 | doc['category_path'] = {primary_lang: category_path_str} | 302 | doc['category_path'] = {primary_lang: category_path_str} |
| 303 | + # 与查询使用的 category_name_text.zh/en 对齐,便于类目搜索 | ||
| 304 | + doc['category_name_text'] = {primary_lang: category_path_str} | ||
| 293 | 305 | ||
| 294 | # 填充分层类目名称 | 306 | # 填充分层类目名称 |
| 295 | if len(category_names) > 0: | 307 | if len(category_names) > 0: |
indexer/incremental_service.py
| @@ -176,7 +176,7 @@ class IncrementalIndexerService: | @@ -176,7 +176,7 @@ class IncrementalIndexerService: | ||
| 176 | image_src, image_width, image_height, image_path, image_alt, | 176 | image_src, image_width, image_height, image_path, image_alt, |
| 177 | tags, note, category, category_id, category_google_id, | 177 | tags, note, category, category_id, category_google_id, |
| 178 | category_level, category_path, | 178 | category_level, category_path, |
| 179 | - fake_sales, display_fake_sales, | 179 | + compare_at_price, fake_sales, display_fake_sales, |
| 180 | tenant_id, creator, create_time, updater, update_time, deleted | 180 | tenant_id, creator, create_time, updater, update_time, deleted |
| 181 | FROM shoplazza_product_spu | 181 | FROM shoplazza_product_spu |
| 182 | WHERE tenant_id = :tenant_id AND id = :spu_id | 182 | WHERE tenant_id = :tenant_id AND id = :spu_id |
| @@ -191,7 +191,7 @@ class IncrementalIndexerService: | @@ -191,7 +191,7 @@ class IncrementalIndexerService: | ||
| 191 | image_src, image_width, image_height, image_path, image_alt, | 191 | image_src, image_width, image_height, image_path, image_alt, |
| 192 | tags, note, category, category_id, category_google_id, | 192 | tags, note, category, category_id, category_google_id, |
| 193 | category_level, category_path, | 193 | category_level, category_path, |
| 194 | - fake_sales, display_fake_sales, | 194 | + compare_at_price, fake_sales, display_fake_sales, |
| 195 | tenant_id, creator, create_time, updater, update_time, deleted | 195 | tenant_id, creator, create_time, updater, update_time, deleted |
| 196 | FROM shoplazza_product_spu | 196 | FROM shoplazza_product_spu |
| 197 | WHERE tenant_id = :tenant_id AND id = :spu_id AND deleted = 0 | 197 | WHERE tenant_id = :tenant_id AND id = :spu_id AND deleted = 0 |
| @@ -243,7 +243,7 @@ class IncrementalIndexerService: | @@ -243,7 +243,7 @@ class IncrementalIndexerService: | ||
| 243 | image_src, image_width, image_height, image_path, image_alt, | 243 | image_src, image_width, image_height, image_path, image_alt, |
| 244 | tags, note, category, category_id, category_google_id, | 244 | tags, note, category, category_id, category_google_id, |
| 245 | category_level, category_path, | 245 | category_level, category_path, |
| 246 | - fake_sales, display_fake_sales, | 246 | + compare_at_price, fake_sales, display_fake_sales, |
| 247 | tenant_id, creator, create_time, updater, update_time, deleted | 247 | tenant_id, creator, create_time, updater, update_time, deleted |
| 248 | FROM shoplazza_product_spu | 248 | FROM shoplazza_product_spu |
| 249 | WHERE tenant_id = :tenant_id AND id IN :spu_ids | 249 | WHERE tenant_id = :tenant_id AND id IN :spu_ids |
| @@ -258,7 +258,7 @@ class IncrementalIndexerService: | @@ -258,7 +258,7 @@ class IncrementalIndexerService: | ||
| 258 | image_src, image_width, image_height, image_path, image_alt, | 258 | image_src, image_width, image_height, image_path, image_alt, |
| 259 | tags, note, category, category_id, category_google_id, | 259 | tags, note, category, category_id, category_google_id, |
| 260 | category_level, category_path, | 260 | category_level, category_path, |
| 261 | - fake_sales, display_fake_sales, | 261 | + compare_at_price, fake_sales, display_fake_sales, |
| 262 | tenant_id, creator, create_time, updater, update_time, deleted | 262 | tenant_id, creator, create_time, updater, update_time, deleted |
| 263 | FROM shoplazza_product_spu | 263 | FROM shoplazza_product_spu |
| 264 | WHERE tenant_id = :tenant_id AND deleted = 0 AND id IN :spu_ids | 264 | WHERE tenant_id = :tenant_id AND deleted = 0 AND id IN :spu_ids |
indexer/spu_transformer.py
| @@ -45,7 +45,7 @@ class SPUTransformer: | @@ -45,7 +45,7 @@ class SPUTransformer: | ||
| 45 | image_src, image_width, image_height, image_path, image_alt, | 45 | image_src, image_width, image_height, image_path, image_alt, |
| 46 | tags, note, category, category_id, category_google_id, | 46 | tags, note, category, category_id, category_google_id, |
| 47 | category_level, category_path, | 47 | category_level, category_path, |
| 48 | - fake_sales, display_fake_sales, | 48 | + compare_at_price, fake_sales, display_fake_sales, |
| 49 | tenant_id, creator, create_time, updater, update_time, deleted | 49 | tenant_id, creator, create_time, updater, update_time, deleted |
| 50 | FROM shoplazza_product_spu | 50 | FROM shoplazza_product_spu |
| 51 | WHERE tenant_id = :tenant_id AND deleted = 0 | 51 | WHERE tenant_id = :tenant_id AND deleted = 0 |