Commit 5dcddc06eabb865f4f59af2f73e384ea34148c15
1 parent
39e63ad1
索引重构
主要是对 分类 属性 子sku 等重要字段的处理。 参考文档《 @docs/索引字段说明v2-mapping结构.md 》《 @docs/索引字段说明v2.md 》 feat: 1. 更新 field_types.py 添加 hanlp_index/hanlp_standard 分析器映射(映射到 CHINESE_ECOMMERCE/CHINESE_ECOMMERCE_QUERY) 支持 keyword_normalizer 配置(用于 vendor.keyword 的 lowercase normalizer) 更新 get_default_analyzers() 添加 hanlp 分析器和 lowercase normalizer 修复 image_embedding 的 url 字段类型为 text 2. 更新 config.yaml(32-207行) 移除无用字段:handle, seo_title, seo_description, seo_keywords, shoplazza_created_at, shoplazza_updated_at 添加中英文字段:title_zh, title_en, brief_zh, brief_en, description_zh, description_en, vendor_zh, vendor_en 添加 category 多层级字段:category_path_zh, category_path_en, category_name_zh, category_name_en, category_id, category_name, category_level, category1_name, category2_name, category3_name 添加 specifications 嵌套字段 添加 option 名称字段:option1_name, option2_name, option3_name 添加 SKU 扁平化字段:sku_prices, sku_weights, sku_weight_units, total_inventory 更新 skus 嵌套结构以匹配目标 mapping 添加 image_embedding 嵌套字段 更新 indexes 配置以使用新字段名 3. 更新 config_loader.py 添加 keyword_normalizer 字段支持 4. 重构 spu_transformer.py 添加 load_option_data() 方法从 option 表加载数据 更新 transform_batch() 加载 option 数据 重构 _transform_spu_to_doc(): 实现中英文字段映射(暂时只填充中文) 实现 category 多层级字段映射和 category_path 解析 实现 specifications 构建(从 option 表获取 name,从 SKU 获取 value) 实现 option 名称字段映射 实现 SKU 扁平化字段计算 更新 skus 嵌套结构 重构 _transform_sku_row() 以匹配新的 SKU 结构 移除 SEO 和 handle 字段的处理
Showing
6 changed files
with
735 additions
and
385 deletions
Show diff stats
config/config.yaml
| @@ -29,86 +29,88 @@ fields: | @@ -29,86 +29,88 @@ fields: | ||
| 29 | store: true | 29 | store: true |
| 30 | return_in_source: true | 30 | return_in_source: true |
| 31 | 31 | ||
| 32 | - - name: "handle" | ||
| 33 | - type: "KEYWORD" | ||
| 34 | - index: true | ||
| 35 | - store: true | ||
| 36 | - return_in_source: true | ||
| 37 | - | ||
| 38 | - # 文本搜索字段 | ||
| 39 | - - name: "title" | 32 | + # 文本相关性相关字段(中英文双语) |
| 33 | + - name: "title_zh" | ||
| 40 | type: "TEXT" | 34 | type: "TEXT" |
| 41 | - analyzer: "chinese_ecommerce" | 35 | + analyzer: "hanlp_index" |
| 36 | + search_analyzer: "hanlp_standard" | ||
| 42 | boost: 3.0 | 37 | boost: 3.0 |
| 43 | index: true | 38 | index: true |
| 44 | store: true | 39 | store: true |
| 45 | return_in_source: true | 40 | return_in_source: true |
| 46 | 41 | ||
| 47 | - - name: "brief" | 42 | + - name: "brief_zh" |
| 48 | type: "TEXT" | 43 | type: "TEXT" |
| 49 | - analyzer: "chinese_ecommerce" | 44 | + analyzer: "hanlp_index" |
| 45 | + search_analyzer: "hanlp_standard" | ||
| 50 | boost: 1.5 | 46 | boost: 1.5 |
| 51 | index: true | 47 | index: true |
| 52 | store: true | 48 | store: true |
| 53 | return_in_source: true | 49 | return_in_source: true |
| 54 | 50 | ||
| 55 | - - name: "description" | 51 | + - name: "description_zh" |
| 56 | type: "TEXT" | 52 | type: "TEXT" |
| 57 | - analyzer: "chinese_ecommerce" | 53 | + analyzer: "hanlp_index" |
| 54 | + search_analyzer: "hanlp_standard" | ||
| 58 | boost: 1.0 | 55 | boost: 1.0 |
| 59 | index: true | 56 | index: true |
| 60 | store: true | 57 | store: true |
| 61 | return_in_source: true | 58 | return_in_source: true |
| 62 | 59 | ||
| 63 | - # SEO字段(提升相关性) | ||
| 64 | - - name: "seo_title" | 60 | + - name: "vendor_zh" |
| 65 | type: "TEXT" | 61 | type: "TEXT" |
| 66 | - analyzer: "chinese_ecommerce" | ||
| 67 | - boost: 2.0 | ||
| 68 | - index: true | ||
| 69 | - store: true | ||
| 70 | - return_in_source: false # SEO字段通常不需要在结果中返回 | ||
| 71 | - | ||
| 72 | - - name: "seo_description" | ||
| 73 | - type: "TEXT" | ||
| 74 | - analyzer: "chinese_ecommerce" | 62 | + analyzer: "hanlp_index" |
| 63 | + search_analyzer: "hanlp_standard" | ||
| 75 | boost: 1.5 | 64 | boost: 1.5 |
| 76 | index: true | 65 | index: true |
| 77 | store: true | 66 | store: true |
| 78 | - return_in_source: false | 67 | + return_in_source: true |
| 68 | + keyword_subfield: true | ||
| 69 | + keyword_normalizer: "lowercase" | ||
| 79 | 70 | ||
| 80 | - - name: "seo_keywords" | 71 | + - name: "title_en" |
| 81 | type: "TEXT" | 72 | type: "TEXT" |
| 82 | - analyzer: "chinese_ecommerce" | ||
| 83 | - boost: 2.0 | 73 | + analyzer: "english" |
| 74 | + search_analyzer: "english" | ||
| 75 | + boost: 3.0 | ||
| 84 | index: true | 76 | index: true |
| 85 | store: true | 77 | store: true |
| 86 | - return_in_source: false | 78 | + return_in_source: true |
| 87 | 79 | ||
| 88 | - # 分类和标签字段(TEXT + KEYWORD双重索引) | ||
| 89 | - - name: "vendor" | ||
| 90 | - type: "HKText" | ||
| 91 | - analyzer: "chinese_ecommerce" | 80 | + - name: "brief_en" |
| 81 | + type: "TEXT" | ||
| 82 | + analyzer: "english" | ||
| 83 | + search_analyzer: "english" | ||
| 92 | boost: 1.5 | 84 | boost: 1.5 |
| 93 | index: true | 85 | index: true |
| 94 | store: true | 86 | store: true |
| 95 | return_in_source: true | 87 | return_in_source: true |
| 96 | 88 | ||
| 97 | - - name: "tags" | ||
| 98 | - type: "HKText" | ||
| 99 | - analyzer: "chinese_ecommerce" | 89 | + - name: "description_en" |
| 90 | + type: "TEXT" | ||
| 91 | + analyzer: "english" | ||
| 92 | + search_analyzer: "english" | ||
| 100 | boost: 1.0 | 93 | boost: 1.0 |
| 101 | index: true | 94 | index: true |
| 102 | store: true | 95 | store: true |
| 103 | return_in_source: true | 96 | return_in_source: true |
| 104 | 97 | ||
| 105 | - - name: "category" | ||
| 106 | - type: "HKText" | ||
| 107 | - analyzer: "chinese_ecommerce" | 98 | + - name: "vendor_en" |
| 99 | + type: "TEXT" | ||
| 100 | + analyzer: "english" | ||
| 101 | + search_analyzer: "english" | ||
| 108 | boost: 1.5 | 102 | boost: 1.5 |
| 109 | index: true | 103 | index: true |
| 110 | store: true | 104 | store: true |
| 111 | return_in_source: true | 105 | return_in_source: true |
| 106 | + keyword_subfield: true | ||
| 107 | + keyword_normalizer: "lowercase" | ||
| 108 | + | ||
| 109 | + - name: "tags" | ||
| 110 | + type: "KEYWORD" | ||
| 111 | + index: true | ||
| 112 | + store: true | ||
| 113 | + return_in_source: true | ||
| 112 | 114 | ||
| 113 | # 价格字段(扁平化) | 115 | # 价格字段(扁平化) |
| 114 | - name: "min_price" | 116 | - name: "min_price" |
| @@ -129,6 +131,30 @@ fields: | @@ -129,6 +131,30 @@ fields: | ||
| 129 | store: true | 131 | store: true |
| 130 | return_in_source: true | 132 | return_in_source: true |
| 131 | 133 | ||
| 134 | + - name: "sku_prices" | ||
| 135 | + type: "FLOAT" | ||
| 136 | + index: true | ||
| 137 | + store: true | ||
| 138 | + return_in_source: true | ||
| 139 | + | ||
| 140 | + - name: "sku_weights" | ||
| 141 | + type: "LONG" | ||
| 142 | + index: true | ||
| 143 | + store: true | ||
| 144 | + return_in_source: true | ||
| 145 | + | ||
| 146 | + - name: "sku_weight_units" | ||
| 147 | + type: "KEYWORD" | ||
| 148 | + index: true | ||
| 149 | + store: true | ||
| 150 | + return_in_source: true | ||
| 151 | + | ||
| 152 | + - name: "total_inventory" | ||
| 153 | + type: "LONG" | ||
| 154 | + index: true | ||
| 155 | + store: true | ||
| 156 | + return_in_source: true | ||
| 157 | + | ||
| 132 | # 图片字段(用于显示,不参与搜索) | 158 | # 图片字段(用于显示,不参与搜索) |
| 133 | - name: "image_url" | 159 | - name: "image_url" |
| 134 | type: "KEYWORD" | 160 | type: "KEYWORD" |
| @@ -136,7 +162,7 @@ fields: | @@ -136,7 +162,7 @@ fields: | ||
| 136 | store: true | 162 | store: true |
| 137 | return_in_source: true | 163 | return_in_source: true |
| 138 | 164 | ||
| 139 | - # 文本嵌入字段(用于语义搜索) | 165 | + # 语义向量 |
| 140 | - name: "title_embedding" | 166 | - name: "title_embedding" |
| 141 | type: "TEXT_EMBEDDING" | 167 | type: "TEXT_EMBEDDING" |
| 142 | embedding_dims: 1024 | 168 | embedding_dims: 1024 |
| @@ -145,30 +171,137 @@ fields: | @@ -145,30 +171,137 @@ fields: | ||
| 145 | store: false | 171 | store: false |
| 146 | return_in_source: false # 嵌入向量通常不需要在结果中返回 | 172 | return_in_source: false # 嵌入向量通常不需要在结果中返回 |
| 147 | 173 | ||
| 148 | - # 时间字段 | ||
| 149 | - - name: "create_time" | ||
| 150 | - type: "DATE" | 174 | + - name: "image_embedding" |
| 175 | + type: "IMAGE_EMBEDDING" | ||
| 176 | + embedding_dims: 1024 | ||
| 177 | + embedding_similarity: "dot_product" | ||
| 178 | + nested: true | ||
| 179 | + index: true | ||
| 180 | + store: false | ||
| 181 | + return_in_source: false | ||
| 182 | + | ||
| 183 | + # 分类相关字段 | ||
| 184 | + - name: "category_path_zh" | ||
| 185 | + type: "TEXT" | ||
| 186 | + analyzer: "hanlp_index" | ||
| 187 | + search_analyzer: "hanlp_standard" | ||
| 188 | + boost: 1.5 | ||
| 151 | index: true | 189 | index: true |
| 152 | store: true | 190 | store: true |
| 153 | return_in_source: true | 191 | return_in_source: true |
| 154 | 192 | ||
| 155 | - - name: "update_time" | ||
| 156 | - type: "DATE" | 193 | + - name: "category_path_en" |
| 194 | + type: "TEXT" | ||
| 195 | + analyzer: "english" | ||
| 196 | + search_analyzer: "english" | ||
| 197 | + boost: 1.5 | ||
| 198 | + index: true | ||
| 199 | + store: true | ||
| 200 | + return_in_source: true | ||
| 201 | + | ||
| 202 | + - name: "category_name_zh" | ||
| 203 | + type: "TEXT" | ||
| 204 | + analyzer: "hanlp_index" | ||
| 205 | + search_analyzer: "hanlp_standard" | ||
| 206 | + boost: 1.5 | ||
| 207 | + index: true | ||
| 208 | + store: true | ||
| 209 | + return_in_source: true | ||
| 210 | + | ||
| 211 | + - name: "category_name_en" | ||
| 212 | + type: "TEXT" | ||
| 213 | + analyzer: "english" | ||
| 214 | + search_analyzer: "english" | ||
| 215 | + boost: 1.5 | ||
| 157 | index: true | 216 | index: true |
| 158 | store: true | 217 | store: true |
| 159 | return_in_source: true | 218 | return_in_source: true |
| 160 | 219 | ||
| 161 | - - name: "shoplazza_created_at" | 220 | + - name: "category_id" |
| 221 | + type: "KEYWORD" | ||
| 222 | + index: true | ||
| 223 | + store: true | ||
| 224 | + return_in_source: true | ||
| 225 | + | ||
| 226 | + - name: "category_name" | ||
| 227 | + type: "KEYWORD" | ||
| 228 | + index: true | ||
| 229 | + store: true | ||
| 230 | + return_in_source: true | ||
| 231 | + | ||
| 232 | + - name: "category_level" | ||
| 233 | + type: "INT" | ||
| 234 | + index: true | ||
| 235 | + store: true | ||
| 236 | + return_in_source: true | ||
| 237 | + | ||
| 238 | + - name: "category1_name" | ||
| 239 | + type: "KEYWORD" | ||
| 240 | + index: true | ||
| 241 | + store: true | ||
| 242 | + return_in_source: true | ||
| 243 | + | ||
| 244 | + - name: "category2_name" | ||
| 245 | + type: "KEYWORD" | ||
| 246 | + index: true | ||
| 247 | + store: true | ||
| 248 | + return_in_source: true | ||
| 249 | + | ||
| 250 | + - name: "category3_name" | ||
| 251 | + type: "KEYWORD" | ||
| 252 | + index: true | ||
| 253 | + store: true | ||
| 254 | + return_in_source: true | ||
| 255 | + | ||
| 256 | + # SKU款式、子sku属性 | ||
| 257 | + - name: "specifications" | ||
| 258 | + type: "JSON" | ||
| 259 | + nested: true | ||
| 260 | + return_in_source: true | ||
| 261 | + nested_properties: | ||
| 262 | + sku_id: | ||
| 263 | + type: "keyword" | ||
| 264 | + index: true | ||
| 265 | + store: true | ||
| 266 | + name: | ||
| 267 | + type: "keyword" | ||
| 268 | + index: true | ||
| 269 | + store: true | ||
| 270 | + value: | ||
| 271 | + type: "keyword" | ||
| 272 | + index: true | ||
| 273 | + store: true | ||
| 274 | + | ||
| 275 | + - name: "option1_name" | ||
| 276 | + type: "KEYWORD" | ||
| 277 | + index: true | ||
| 278 | + store: true | ||
| 279 | + return_in_source: true | ||
| 280 | + | ||
| 281 | + - name: "option2_name" | ||
| 282 | + type: "KEYWORD" | ||
| 283 | + index: true | ||
| 284 | + store: true | ||
| 285 | + return_in_source: true | ||
| 286 | + | ||
| 287 | + - name: "option3_name" | ||
| 288 | + type: "KEYWORD" | ||
| 289 | + index: true | ||
| 290 | + store: true | ||
| 291 | + return_in_source: true | ||
| 292 | + | ||
| 293 | + # 时间字段 | ||
| 294 | + - name: "create_time" | ||
| 162 | type: "DATE" | 295 | type: "DATE" |
| 163 | index: true | 296 | index: true |
| 164 | store: true | 297 | store: true |
| 165 | - return_in_source: false # 通常不需要返回 | 298 | + return_in_source: true |
| 166 | 299 | ||
| 167 | - - name: "shoplazza_updated_at" | 300 | + - name: "update_time" |
| 168 | type: "DATE" | 301 | type: "DATE" |
| 169 | index: true | 302 | index: true |
| 170 | store: true | 303 | store: true |
| 171 | - return_in_source: false # 通常不需要返回 | 304 | + return_in_source: true |
| 172 | 305 | ||
| 173 | # 嵌套skus字段 | 306 | # 嵌套skus字段 |
| 174 | - name: "skus" | 307 | - name: "skus" |
| @@ -180,11 +313,6 @@ fields: | @@ -180,11 +313,6 @@ fields: | ||
| 180 | type: "keyword" | 313 | type: "keyword" |
| 181 | index: true | 314 | index: true |
| 182 | store: true | 315 | store: true |
| 183 | - title: | ||
| 184 | - type: "text" | ||
| 185 | - analyzer: "chinese_ecommerce" | ||
| 186 | - index: true | ||
| 187 | - store: true | ||
| 188 | price: | 316 | price: |
| 189 | type: "float" | 317 | type: "float" |
| 190 | index: true | 318 | index: true |
| @@ -193,7 +321,7 @@ fields: | @@ -193,7 +321,7 @@ fields: | ||
| 193 | type: "float" | 321 | type: "float" |
| 194 | index: true | 322 | index: true |
| 195 | store: true | 323 | store: true |
| 196 | - sku: | 324 | + sku_code: |
| 197 | type: "keyword" | 325 | type: "keyword" |
| 198 | index: true | 326 | index: true |
| 199 | store: true | 327 | store: true |
| @@ -201,46 +329,65 @@ fields: | @@ -201,46 +329,65 @@ fields: | ||
| 201 | type: "long" | 329 | type: "long" |
| 202 | index: true | 330 | index: true |
| 203 | store: true | 331 | store: true |
| 204 | - options: | ||
| 205 | - type: "object" | ||
| 206 | - enabled: true | 332 | + weight: |
| 333 | + type: "float" | ||
| 334 | + index: true | ||
| 335 | + store: true | ||
| 336 | + weight_unit: | ||
| 337 | + type: "keyword" | ||
| 338 | + index: true | ||
| 339 | + store: true | ||
| 340 | + option1_value: | ||
| 341 | + type: "keyword" | ||
| 342 | + index: true | ||
| 343 | + store: true | ||
| 344 | + option2_value: | ||
| 345 | + type: "keyword" | ||
| 346 | + index: true | ||
| 347 | + store: true | ||
| 348 | + option3_value: | ||
| 349 | + type: "keyword" | ||
| 350 | + index: true | ||
| 351 | + store: true | ||
| 352 | + image_src: | ||
| 353 | + type: "keyword" | ||
| 354 | + index: false | ||
| 355 | + store: true | ||
| 207 | 356 | ||
| 208 | # Index Structure (Query Domains) | 357 | # Index Structure (Query Domains) |
| 209 | indexes: | 358 | indexes: |
| 210 | - name: "default" | 359 | - name: "default" |
| 211 | label: "默认索引" | 360 | label: "默认索引" |
| 212 | fields: | 361 | fields: |
| 213 | - - "title" | ||
| 214 | - - "brief" | ||
| 215 | - - "description" | ||
| 216 | - - "seo_title" | ||
| 217 | - - "seo_description" | ||
| 218 | - - "seo_keywords" | ||
| 219 | - - "vendor" | 362 | + - "title_zh" |
| 363 | + - "brief_zh" | ||
| 364 | + - "description_zh" | ||
| 365 | + - "vendor_zh" | ||
| 220 | - "tags" | 366 | - "tags" |
| 221 | - - "category" | 367 | + - "category_path_zh" |
| 368 | + - "category_name_zh" | ||
| 222 | analyzer: "chinese_ecommerce" | 369 | analyzer: "chinese_ecommerce" |
| 223 | boost: 1.0 | 370 | boost: 1.0 |
| 224 | 371 | ||
| 225 | - name: "title" | 372 | - name: "title" |
| 226 | label: "标题索引" | 373 | label: "标题索引" |
| 227 | fields: | 374 | fields: |
| 228 | - - "title" | ||
| 229 | - - "seo_title" | 375 | + - "title_zh" |
| 230 | analyzer: "chinese_ecommerce" | 376 | analyzer: "chinese_ecommerce" |
| 231 | boost: 2.0 | 377 | boost: 2.0 |
| 232 | 378 | ||
| 233 | - name: "vendor" | 379 | - name: "vendor" |
| 234 | label: "品牌索引" | 380 | label: "品牌索引" |
| 235 | fields: | 381 | fields: |
| 236 | - - "vendor" | 382 | + - "vendor_zh" |
| 237 | analyzer: "chinese_ecommerce" | 383 | analyzer: "chinese_ecommerce" |
| 238 | boost: 1.5 | 384 | boost: 1.5 |
| 239 | 385 | ||
| 240 | - name: "category" | 386 | - name: "category" |
| 241 | label: "类目索引" | 387 | label: "类目索引" |
| 242 | fields: | 388 | fields: |
| 243 | - - "category" | 389 | + - "category_path_zh" |
| 390 | + - "category_name_zh" | ||
| 244 | analyzer: "chinese_ecommerce" | 391 | analyzer: "chinese_ecommerce" |
| 245 | boost: 1.5 | 392 | boost: 1.5 |
| 246 | 393 | ||
| @@ -248,7 +395,6 @@ indexes: | @@ -248,7 +395,6 @@ indexes: | ||
| 248 | label: "标签索引" | 395 | label: "标签索引" |
| 249 | fields: | 396 | fields: |
| 250 | - "tags" | 397 | - "tags" |
| 251 | - - "seo_keywords" | ||
| 252 | analyzer: "chinese_ecommerce" | 398 | analyzer: "chinese_ecommerce" |
| 253 | boost: 1.0 | 399 | boost: 1.0 |
| 254 | 400 |
config/config_loader.py
| @@ -313,7 +313,8 @@ class ConfigLoader: | @@ -313,7 +313,8 @@ class ConfigLoader: | ||
| 313 | nested=field_data.get("nested", False), | 313 | nested=field_data.get("nested", False), |
| 314 | nested_properties=field_data.get("nested_properties"), | 314 | nested_properties=field_data.get("nested_properties"), |
| 315 | keyword_subfield=field_data.get("keyword_subfield", is_hktext), | 315 | keyword_subfield=field_data.get("keyword_subfield", is_hktext), |
| 316 | - keyword_ignore_above=field_data.get("keyword_ignore_above", 256) | 316 | + keyword_ignore_above=field_data.get("keyword_ignore_above", 256), |
| 317 | + keyword_normalizer=field_data.get("keyword_normalizer") | ||
| 317 | ) | 318 | ) |
| 318 | 319 | ||
| 319 | def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig: | 320 | def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig: |
config/field_types.py
| @@ -75,6 +75,7 @@ class FieldConfig: | @@ -75,6 +75,7 @@ class FieldConfig: | ||
| 75 | # Hybrid Keyword Text (HKText) support | 75 | # Hybrid Keyword Text (HKText) support |
| 76 | keyword_subfield: bool = False | 76 | keyword_subfield: bool = False |
| 77 | keyword_ignore_above: int = 256 | 77 | keyword_ignore_above: int = 256 |
| 78 | + keyword_normalizer: Optional[str] = None # For keyword subfield normalizer (e.g., "lowercase") | ||
| 78 | 79 | ||
| 79 | 80 | ||
| 80 | def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | 81 | def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: |
| @@ -100,18 +101,28 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | @@ -100,18 +101,28 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | ||
| 100 | if field_config.analyzer == AnalyzerType.CHINESE_ECOMMERCE: | 101 | if field_config.analyzer == AnalyzerType.CHINESE_ECOMMERCE: |
| 101 | mapping["analyzer"] = "index_ansj" | 102 | mapping["analyzer"] = "index_ansj" |
| 102 | mapping["search_analyzer"] = "query_ansj" | 103 | mapping["search_analyzer"] = "query_ansj" |
| 104 | + elif field_config.analyzer == AnalyzerType.CHINESE_ECOMMERCE_QUERY: | ||
| 105 | + # If search_analyzer is explicitly set to CHINESE_ECOMMERCE_QUERY | ||
| 106 | + mapping["analyzer"] = "index_ansj" | ||
| 107 | + mapping["search_analyzer"] = "query_ansj" | ||
| 103 | else: | 108 | else: |
| 104 | mapping["analyzer"] = field_config.analyzer.value | 109 | mapping["analyzer"] = field_config.analyzer.value |
| 105 | 110 | ||
| 106 | if field_config.search_analyzer: | 111 | if field_config.search_analyzer: |
| 107 | - mapping["search_analyzer"] = field_config.search_analyzer.value | 112 | + if field_config.search_analyzer == AnalyzerType.CHINESE_ECOMMERCE_QUERY: |
| 113 | + mapping["search_analyzer"] = "query_ansj" | ||
| 114 | + else: | ||
| 115 | + mapping["search_analyzer"] = field_config.search_analyzer.value | ||
| 108 | 116 | ||
| 109 | if field_config.keyword_subfield: | 117 | if field_config.keyword_subfield: |
| 110 | mapping.setdefault("fields", {}) | 118 | mapping.setdefault("fields", {}) |
| 111 | - mapping["fields"]["keyword"] = { | 119 | + keyword_field = { |
| 112 | "type": "keyword", | 120 | "type": "keyword", |
| 113 | "ignore_above": field_config.keyword_ignore_above | 121 | "ignore_above": field_config.keyword_ignore_above |
| 114 | } | 122 | } |
| 123 | + if field_config.keyword_normalizer: | ||
| 124 | + keyword_field["normalizer"] = field_config.keyword_normalizer | ||
| 125 | + mapping["fields"]["keyword"] = keyword_field | ||
| 115 | 126 | ||
| 116 | elif field_config.field_type == FieldType.KEYWORD: | 127 | elif field_config.field_type == FieldType.KEYWORD: |
| 117 | mapping = { | 128 | mapping = { |
| @@ -140,7 +151,7 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | @@ -140,7 +151,7 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | ||
| 140 | "similarity": field_config.embedding_similarity | 151 | "similarity": field_config.embedding_similarity |
| 141 | }, | 152 | }, |
| 142 | "url": { | 153 | "url": { |
| 143 | - "type": "keyword" | 154 | + "type": "text" |
| 144 | } | 155 | } |
| 145 | } | 156 | } |
| 146 | } | 157 | } |
| @@ -239,6 +250,22 @@ def get_default_analyzers() -> Dict[str, Any]: | @@ -239,6 +250,22 @@ def get_default_analyzers() -> Dict[str, Any]: | ||
| 239 | "type": "custom", | 250 | "type": "custom", |
| 240 | "tokenizer": "standard", | 251 | "tokenizer": "standard", |
| 241 | "filter": ["lowercase", "asciifolding"] | 252 | "filter": ["lowercase", "asciifolding"] |
| 253 | + }, | ||
| 254 | + "hanlp_index": { | ||
| 255 | + "type": "custom", | ||
| 256 | + "tokenizer": "standard", | ||
| 257 | + "filter": ["lowercase", "asciifolding"] | ||
| 258 | + }, | ||
| 259 | + "hanlp_standard": { | ||
| 260 | + "type": "custom", | ||
| 261 | + "tokenizer": "standard", | ||
| 262 | + "filter": ["lowercase", "asciifolding"] | ||
| 263 | + } | ||
| 264 | + }, | ||
| 265 | + "normalizer": { | ||
| 266 | + "lowercase": { | ||
| 267 | + "type": "custom", | ||
| 268 | + "filter": ["lowercase"] | ||
| 242 | } | 269 | } |
| 243 | } | 270 | } |
| 244 | } | 271 | } |
| @@ -300,6 +327,9 @@ ANALYZER_MAP = { | @@ -300,6 +327,9 @@ ANALYZER_MAP = { | ||
| 300 | "chinese": AnalyzerType.CHINESE_ECOMMERCE, | 327 | "chinese": AnalyzerType.CHINESE_ECOMMERCE, |
| 301 | "chinese_ecommerce": AnalyzerType.CHINESE_ECOMMERCE, | 328 | "chinese_ecommerce": AnalyzerType.CHINESE_ECOMMERCE, |
| 302 | "index_ansj": AnalyzerType.CHINESE_ECOMMERCE, | 329 | "index_ansj": AnalyzerType.CHINESE_ECOMMERCE, |
| 330 | + "hanlp_index": AnalyzerType.CHINESE_ECOMMERCE, # Alias for index_ansj | ||
| 331 | + "hanlp_standard": AnalyzerType.CHINESE_ECOMMERCE_QUERY, # Alias for query_ansj | ||
| 332 | + "query_ansj": AnalyzerType.CHINESE_ECOMMERCE_QUERY, | ||
| 303 | "english": AnalyzerType.ENGLISH, | 333 | "english": AnalyzerType.ENGLISH, |
| 304 | "arabic": AnalyzerType.ARABIC, | 334 | "arabic": AnalyzerType.ARABIC, |
| 305 | "spanish": AnalyzerType.SPANISH, | 335 | "spanish": AnalyzerType.SPANISH, |
| @@ -0,0 +1,231 @@ | @@ -0,0 +1,231 @@ | ||
| 1 | +{ | ||
| 2 | + "mappings": { | ||
| 3 | + "properties": { | ||
| 4 | + "tenant_id": { | ||
| 5 | + "type": "keyword" | ||
| 6 | + }, | ||
| 7 | + "spu_id": { | ||
| 8 | + "type": "keyword" | ||
| 9 | + }, | ||
| 10 | + | ||
| 11 | + "create_time": { | ||
| 12 | + "type": "date" | ||
| 13 | + }, | ||
| 14 | + "update_time": { | ||
| 15 | + "type": "date" | ||
| 16 | + }, | ||
| 17 | + | ||
| 18 | + // 文本相关性相关字段 | ||
| 19 | + "title_zh": { | ||
| 20 | + "type": "text", | ||
| 21 | + "analyzer": "hanlp_index", | ||
| 22 | + "search_analyzer": "hanlp_standard" | ||
| 23 | + }, | ||
| 24 | + "brief_zh": { | ||
| 25 | + "type": "text", | ||
| 26 | + "analyzer": "hanlp_index", | ||
| 27 | + "search_analyzer": "hanlp_standard" | ||
| 28 | + }, | ||
| 29 | + "description_zh": { | ||
| 30 | + "type": "text", | ||
| 31 | + "analyzer": "hanlp_index", | ||
| 32 | + "search_analyzer": "hanlp_standard" | ||
| 33 | + }, | ||
| 34 | + "vendor_zh": { | ||
| 35 | + "type": "text", | ||
| 36 | + "analyzer": "hanlp_index", | ||
| 37 | + "search_analyzer": "hanlp_standard", | ||
| 38 | + "fields": { | ||
| 39 | + "keyword": { | ||
| 40 | + "type": "keyword", | ||
| 41 | + "normalizer": "lowercase" | ||
| 42 | + } | ||
| 43 | + } | ||
| 44 | + }, | ||
| 45 | + | ||
| 46 | + "title_en": { | ||
| 47 | + "type": "text", | ||
| 48 | + "analyzer": "english", | ||
| 49 | + "search_analyzer": "english", | ||
| 50 | + }, | ||
| 51 | + "brief_en": { | ||
| 52 | + "type": "text", | ||
| 53 | + "analyzer": "english", | ||
| 54 | + "search_analyzer": "english", | ||
| 55 | + | ||
| 56 | + }, | ||
| 57 | + "description_en": { | ||
| 58 | + "type": "text", | ||
| 59 | + "analyzer": "english", | ||
| 60 | + "search_analyzer": "english", | ||
| 61 | + }, | ||
| 62 | + "vendor_en": { | ||
| 63 | + "type": "text", | ||
| 64 | + "analyzer": "english", | ||
| 65 | + "search_analyzer": "english", | ||
| 66 | + "fields": { | ||
| 67 | + "keyword": { | ||
| 68 | + "type": "keyword", | ||
| 69 | + "normalizer": "lowercase" | ||
| 70 | + } | ||
| 71 | + } | ||
| 72 | + }, | ||
| 73 | + | ||
| 74 | + "tags": { | ||
| 75 | + "type": "keyword", | ||
| 76 | + }, | ||
| 77 | + | ||
| 78 | + "image_url": { | ||
| 79 | + "type": "keyword", | ||
| 80 | + "index": false | ||
| 81 | + }, | ||
| 82 | + | ||
| 83 | + // 语义向量 | ||
| 84 | + "title_embedding": { | ||
| 85 | + "type": "dense_vector", | ||
| 86 | + "dims": 1024, | ||
| 87 | + "index": true, | ||
| 88 | + "similarity": "dot_product" | ||
| 89 | + }, | ||
| 90 | + "image_embedding": { | ||
| 91 | + "type": "nested", | ||
| 92 | + "properties": { | ||
| 93 | + "vector": { | ||
| 94 | + "type": "dense_vector", | ||
| 95 | + "dims": 1024, | ||
| 96 | + "index": true, | ||
| 97 | + "similarity": "dot_product" | ||
| 98 | + }, | ||
| 99 | + "url": { | ||
| 100 | + "type": "text" | ||
| 101 | + } | ||
| 102 | + } | ||
| 103 | + }, | ||
| 104 | + | ||
| 105 | + // 分类相关 | ||
| 106 | + "category_path_zh": { // 提供模糊查询功能,辅助相关性计算 | ||
| 107 | + "type": "text", | ||
| 108 | + "analyzer": "hanlp_index", | ||
| 109 | + "search_analyzer": "hanlp_standard" | ||
| 110 | + }, | ||
| 111 | + "category_path_en": { // 提供模糊查询功能,辅助相关性计算 | ||
| 112 | + "type": "text", | ||
| 113 | + "analyzer": "english", | ||
| 114 | + "search_analyzer": "english" | ||
| 115 | + }, | ||
| 116 | + "category_name_zh": { // 提供模糊查询功能,辅助相关性计算 | ||
| 117 | + "type": "text", | ||
| 118 | + "analyzer": "hanlp_index", | ||
| 119 | + "search_analyzer": "hanlp_standard" | ||
| 120 | + }, | ||
| 121 | + "category_name_en": { // 提供模糊查询功能,辅助相关性计算 | ||
| 122 | + "type": "text", | ||
| 123 | + "analyzer": "english", | ||
| 124 | + "search_analyzer": "english" | ||
| 125 | + }, | ||
| 126 | + | ||
| 127 | + "category_id": { | ||
| 128 | + "type": "keyword" | ||
| 129 | + }, | ||
| 130 | + "category_name": { | ||
| 131 | + "type": "keyword" | ||
| 132 | + }, | ||
| 133 | + "category_level": { | ||
| 134 | + "type": "integer" | ||
| 135 | + }, | ||
| 136 | + "category1_name": { // 不同层级下 可能有同名的情况,因此提供一二三级分开的查询方式 | ||
| 137 | + "type": "keyword" | ||
| 138 | + }, | ||
| 139 | + "category2_name": { | ||
| 140 | + "type": "keyword" | ||
| 141 | + }, | ||
| 142 | + "category3_name": { | ||
| 143 | + "type": "keyword" | ||
| 144 | + }, | ||
| 145 | + | ||
| 146 | + // sku款式、子sku属性 | ||
| 147 | + "specifications": { | ||
| 148 | + "type": "nested", | ||
| 149 | + "properties": { | ||
| 150 | + "sku_id": { "type": "keyword" }, | ||
| 151 | + "name": { "type": "keyword" }, // "颜色", "容量" | ||
| 152 | + "value": { "type": "keyword" } // "白色", "256GB" | ||
| 153 | + } | ||
| 154 | + }, | ||
| 155 | + | ||
| 156 | + "option1_name": { | ||
| 157 | + "type": "keyword" | ||
| 158 | + }, | ||
| 159 | + "option2_name": { | ||
| 160 | + "type": "keyword" | ||
| 161 | + }, | ||
| 162 | + "option3_name": { | ||
| 163 | + "type": "keyword" | ||
| 164 | + }, | ||
| 165 | + | ||
| 166 | + "min_price": { | ||
| 167 | + "type": "float" | ||
| 168 | + }, | ||
| 169 | + "max_price": { | ||
| 170 | + "type": "float" | ||
| 171 | + }, | ||
| 172 | + "compare_at_price": { | ||
| 173 | + "type": "float" | ||
| 174 | + }, | ||
| 175 | + "sku_prices": { | ||
| 176 | + "type": "float" | ||
| 177 | + }, | ||
| 178 | + "sku_weights": { | ||
| 179 | + "type": "long" | ||
| 180 | + }, | ||
| 181 | + "sku_weight_units": { | ||
| 182 | + "type": "keyword" | ||
| 183 | + }, | ||
| 184 | + "total_inventory": { | ||
| 185 | + "type": "long" | ||
| 186 | + }, | ||
| 187 | + | ||
| 188 | + "skus": { | ||
| 189 | + "type": "nested", | ||
| 190 | + "properties": { | ||
| 191 | + "sku_id": { | ||
| 192 | + "type": "keyword" | ||
| 193 | + }, | ||
| 194 | + "price": { | ||
| 195 | + "type": "float" | ||
| 196 | + }, | ||
| 197 | + "compare_at_price": { | ||
| 198 | + "type": "float" | ||
| 199 | + }, | ||
| 200 | + "sku_code": { | ||
| 201 | + "type": "keyword" | ||
| 202 | + }, | ||
| 203 | + "stock": { | ||
| 204 | + "type": "long" | ||
| 205 | + }, | ||
| 206 | + "weight": { | ||
| 207 | + "type": "float" | ||
| 208 | + }, | ||
| 209 | + "weight_unit": { | ||
| 210 | + "type": "keyword" | ||
| 211 | + }, | ||
| 212 | + "option1_value": { | ||
| 213 | + "type": "keyword" | ||
| 214 | + }, | ||
| 215 | + "option2_value": { | ||
| 216 | + "type": "keyword" | ||
| 217 | + }, | ||
| 218 | + "option3_value": { | ||
| 219 | + "type": "keyword" | ||
| 220 | + }, | ||
| 221 | + "image_src": { | ||
| 222 | + "type": "keyword", | ||
| 223 | + "index": false | ||
| 224 | + } | ||
| 225 | + } | ||
| 226 | + } | ||
| 227 | + } | ||
| 228 | + } | ||
| 229 | +} | ||
| 230 | + | ||
| 231 | + |
docs/索引字段说明v2.md
| @@ -4,247 +4,34 @@ SPU-SKU索引方案选型 | @@ -4,247 +4,34 @@ SPU-SKU索引方案选型 | ||
| 4 | 除了title, brielf description seo相关 cate tags vendor所有影响相关性的字段都在spu。 sku只有款式、价格、重量、库存等相关属性。所以,可以以spu为单位建立索引。 | 4 | 除了title, brielf description seo相关 cate tags vendor所有影响相关性的字段都在spu。 sku只有款式、价格、重量、库存等相关属性。所以,可以以spu为单位建立索引。 |
| 5 | sku中需要参与搜索的属性(比如价格、库存)展开到spu。 | 5 | sku中需要参与搜索的属性(比如价格、库存)展开到spu。 |
| 6 | sku的所有需要返回的字段作为nested字段,仅用于返回。 | 6 | sku的所有需要返回的字段作为nested字段,仅用于返回。 |
| 7 | -灌入数据准备 | ||
| 8 | -def build_product_document(product, skus): | ||
| 9 | - # 提取价格列表(转换为float,保留两位小数) | ||
| 10 | - price_list = [float(sku.price) for sku in skus if sku.price is not None] | ||
| 11 | - | ||
| 12 | - # 提取重量信息(重量转为int,单位统一为克;重量+单位拼接为字符串) | ||
| 13 | - weight_list = [int(float(sku.weight) * 1000) for sku in skus if sku.weight is not None] # 转为整数克 | ||
| 14 | - weight_with_unit_list = [f"{sku.weight}{sku.weight_unit}" for sku in skus if sku.weight and sku.weight_unit] | ||
| 15 | - | ||
| 16 | - # 计算库存总和 | ||
| 17 | - total_stock = sum([sku.inventory_quantity for sku in skus if sku.inventory_quantity is not None]) | ||
| 18 | - | ||
| 19 | - # 计算价格区间 | ||
| 20 | - min_price = min(price_list) if price_list else 0.0 | ||
| 21 | - max_price = max(price_list) if price_list else 0.0 | ||
| 22 | - | 7 | +# 写入 spu 级别索引 |
| 8 | +def build_product_document(product, variants): | ||
| 23 | return { | 9 | return { |
| 24 | "spu_id": str(product.id), | 10 | "spu_id": str(product.id), |
| 25 | "title": product.title, | 11 | "title": product.title, |
| 26 | 12 | ||
| 27 | - # SPU级别的选项名称定义(如:颜色、尺码、材质) | ||
| 28 | - "option1_name": getattr(product, 'option1', None), | ||
| 29 | - "option2_name": getattr(product, 'option2', None), | ||
| 30 | - "option3_name": getattr(product, 'option3', None), | ||
| 31 | - | ||
| 32 | - # SKU搜索字段(展开) | 13 | + # Variant搜索字段(展开) |
| 33 | # 价格(int)、重量(int)、重量单位拼接重量(keyword),都以list形式灌入 | 14 | # 价格(int)、重量(int)、重量单位拼接重量(keyword),都以list形式灌入 |
| 34 | - "sku_prices": price_list, # 所有SKU价格列表,用于范围聚合 | ||
| 35 | - "sku_weights": weight_list, # 重量数值列表(转换为整数克) | ||
| 36 | - "sku_weight_units": weight_with_unit_list, # 重量+单位字符串列表 | ||
| 37 | - | ||
| 38 | - # 库存总和 将SKU的库存加起来作为一个值灌入 | ||
| 39 | - "total_inventory": total_stock, # SKU库存总和 | ||
| 40 | - | ||
| 41 | - # 售价,灌入3个字段:SKU价格列表、最高价、最低价 | ||
| 42 | - "min_price": min_price, # 最低售价 | ||
| 43 | - "max_price": max_price, # 最高售价 | ||
| 44 | - "price_range": { # 价格区间对象,便于范围查询 | ||
| 45 | - "gte": min_price, | ||
| 46 | - "lte": max_price | ||
| 47 | - }, | ||
| 48 | - | ||
| 49 | - # SKU详细信息(nested结构,仅用于返回) | ||
| 50 | - "skus": [ | ||
| 51 | - { | ||
| 52 | - "sku_id": str(sku.id), | ||
| 53 | - "price": float(sku.price) if sku.price else 0.0, | ||
| 54 | - "compare_at_price": float(sku.compare_at_price) if sku.compare_at_price else None, | ||
| 55 | - "sku_code": sku.sku, | ||
| 56 | - "stock": sku.inventory_quantity, | ||
| 57 | - "weight": float(sku.weight) if sku.weight else None, | ||
| 58 | - "weight_unit": sku.weight_unit, | ||
| 59 | - | ||
| 60 | - # SKU级别的选项值(对应SPU的选项名称) | ||
| 61 | - "option1_value": sku.option1, | ||
| 62 | - "option2_value": sku.option2, | ||
| 63 | - "option3_value": sku.option3, | 15 | + # TODO 按要求补充 |
| 16 | + | ||
| 17 | + # 库存总和 将sku的库存加起来作为一个值灌入 | ||
| 18 | + # 售价,灌入3个字段,一个 sku价格 以list形式灌入,一个最高价一个最低价 | ||
| 19 | + # TODO 按要求补充 | ||
| 64 | 20 | ||
| 65 | - "image_src": sku.image_src | 21 | + # Variant详细信息(用于返回) |
| 22 | + "variants": [ | ||
| 23 | + { | ||
| 24 | + "sku_id": str(v.id), | ||
| 25 | + "price": float(v.price), | ||
| 26 | + "options": v.options | ||
| 66 | } | 27 | } |
| 67 | - for sku in skus | 28 | + for v in variants |
| 68 | ], | 29 | ], |
| 69 | - | ||
| 70 | - # 其他SPU级别字段(根据索引文档补充) | ||
| 71 | - "tenant_id": str(product.tenant_id), | ||
| 72 | - "brief": product.brief, | ||
| 73 | - "description": product.description, | ||
| 74 | - "vendor": product.vendor, | ||
| 75 | - "category": product.category, | ||
| 76 | - "tags": product.tags.split(',') if product.tags else [], | ||
| 77 | - "seo_title": product.seo_title, | ||
| 78 | - "seo_description": product.seo_description, | ||
| 79 | - "seo_keywords": product.seo_keywords.split(',') if product.seo_keywords else [], | ||
| 80 | - "image_url": product.image_src, | ||
| 81 | - "create_time": product.create_time.isoformat() if product.create_time else None, | ||
| 82 | - "update_time": product.update_time.isoformat() if product.update_time else None | ||
| 83 | - } | ||
| 84 | - 索引定义 | ||
| 85 | -{ | ||
| 86 | - "mappings": { | ||
| 87 | - "properties": { | ||
| 88 | - "tenant_id": { | ||
| 89 | - "type": "keyword" | ||
| 90 | - }, | ||
| 91 | - "spu_id": { | ||
| 92 | - "type": "keyword" | ||
| 93 | - }, | ||
| 94 | - // 文本相关性相关字段 | ||
| 95 | - "title_zh": { | ||
| 96 | - "type": "text", | ||
| 97 | - "analyzer": "hanlp_index", | ||
| 98 | - "search_analyzer": "hanlp_standard" | ||
| 99 | - }, | ||
| 100 | - "brief_zh": { | ||
| 101 | - "type": "text", | ||
| 102 | - "analyzer": "hanlp_index", | ||
| 103 | - "search_analyzer": "hanlp_standard" | ||
| 104 | - }, | ||
| 105 | - "description_zh": { | ||
| 106 | - "type": "text", | ||
| 107 | - "analyzer": "hanlp_index", | ||
| 108 | - "search_analyzer": "hanlp_standard" | ||
| 109 | - }, | ||
| 110 | - "vendor_zh": { | ||
| 111 | - "type": "text", | ||
| 112 | - "analyzer": "hanlp_index", | ||
| 113 | - "search_analyzer": "hanlp_standard", | ||
| 114 | - "fields": { | ||
| 115 | - "keyword": { | ||
| 116 | - "type": "keyword", | ||
| 117 | - "normalizer": "lowercase" | ||
| 118 | - } | ||
| 119 | - } | ||
| 120 | - }, | ||
| 121 | - | ||
| 122 | - "title_en": { | ||
| 123 | - "type": "text", | ||
| 124 | - "analyzer": "english", | ||
| 125 | - "search_analyzer": "english", | ||
| 126 | - }, | ||
| 127 | - "brief_en": { | ||
| 128 | - "type": "text", | ||
| 129 | - "analyzer": "english", | ||
| 130 | - "search_analyzer": "english", | ||
| 131 | 30 | ||
| 132 | - }, | ||
| 133 | - "description_en": { | ||
| 134 | - "type": "text", | ||
| 135 | - "analyzer": "english", | ||
| 136 | - "search_analyzer": "english", | ||
| 137 | - }, | ||
| 138 | - "vendor_en": { | ||
| 139 | - "type": "text", | ||
| 140 | - "analyzer": "english", | ||
| 141 | - "search_analyzer": "english", | ||
| 142 | - "fields": { | ||
| 143 | - "keyword": { | ||
| 144 | - "type": "keyword", | ||
| 145 | - "normalizer": "lowercase" | ||
| 146 | - } | ||
| 147 | - } | ||
| 148 | - }, | ||
| 149 | - | ||
| 150 | - "tags": { | ||
| 151 | - "type": "keyword", | ||
| 152 | - }, | ||
| 153 | - | ||
| 154 | - | ||
| 155 | - "min_price": { | ||
| 156 | - "type": "float" | ||
| 157 | - }, | ||
| 158 | - "max_price": { | ||
| 159 | - "type": "float" | ||
| 160 | - }, | ||
| 161 | - "compare_at_price": { | ||
| 162 | - "type": "float" | ||
| 163 | - }, | ||
| 164 | - "sku_prices": { | ||
| 165 | - "type": "float" | ||
| 166 | - }, | ||
| 167 | - "sku_weights": { | ||
| 168 | - "type": "long" | ||
| 169 | - }, | ||
| 170 | - "sku_weight_units": { | ||
| 171 | - "type": "keyword" | ||
| 172 | - }, | ||
| 173 | - "total_inventory": { | ||
| 174 | - "type": "long" | ||
| 175 | - }, | ||
| 176 | - | ||
| 177 | - "image_url": { | ||
| 178 | - "type": "keyword", | ||
| 179 | - "index": false | ||
| 180 | - }, | ||
| 181 | - | ||
| 182 | - "title_embedding": { | ||
| 183 | - "type": "dense_vector", | ||
| 184 | - "dims": 1024, | ||
| 185 | - "index": true, | ||
| 186 | - "similarity": "dot_product" | ||
| 187 | - }, | ||
| 188 | - | ||
| 189 | - "create_time": { | ||
| 190 | - "type": "date" | ||
| 191 | - }, | ||
| 192 | - "update_time": { | ||
| 193 | - "type": "date" | ||
| 194 | - }, | ||
| 195 | - | ||
| 196 | - "option1_name": { | ||
| 197 | - "type": "keyword" | ||
| 198 | - }, | ||
| 199 | - "option2_name": { | ||
| 200 | - "type": "keyword" | ||
| 201 | - }, | ||
| 202 | - "option3_name": { | ||
| 203 | - "type": "keyword" | ||
| 204 | - }, | ||
| 205 | - | ||
| 206 | - "skus": { | ||
| 207 | - "type": "nested", | ||
| 208 | - "properties": { | ||
| 209 | - "sku_id": { | ||
| 210 | - "type": "keyword" | ||
| 211 | - }, | ||
| 212 | - "price": { | ||
| 213 | - "type": "float" | ||
| 214 | - }, | ||
| 215 | - "compare_at_price": { | ||
| 216 | - "type": "float" | ||
| 217 | - }, | ||
| 218 | - "sku_code": { | ||
| 219 | - "type": "keyword" | ||
| 220 | - }, | ||
| 221 | - "stock": { | ||
| 222 | - "type": "long" | ||
| 223 | - }, | ||
| 224 | - "weight": { | ||
| 225 | - "type": "float" | ||
| 226 | - }, | ||
| 227 | - "weight_unit": { | ||
| 228 | - "type": "keyword" | ||
| 229 | - }, | ||
| 230 | - "option1_value": { | ||
| 231 | - "type": "keyword" | ||
| 232 | - }, | ||
| 233 | - "option2_value": { | ||
| 234 | - "type": "keyword" | ||
| 235 | - }, | ||
| 236 | - "option3_value": { | ||
| 237 | - "type": "keyword" | ||
| 238 | - }, | ||
| 239 | - "image_src": { | ||
| 240 | - "type": "keyword", | ||
| 241 | - "index": false | ||
| 242 | - } | ||
| 243 | - } | ||
| 244 | - } | 31 | + |
| 32 | + "min_price": min(v.price for v in variants), | ||
| 33 | + "max_price": max(v.price for v in variants) | ||
| 245 | } | 34 | } |
| 246 | - } | ||
| 247 | -} | ||
| 248 | 1.2 查询方案 | 35 | 1.2 查询方案 |
| 249 | 对数组字段使用 dis_max,只取最高分,避免累加。 | 36 | 对数组字段使用 dis_max,只取最高分,避免累加。 |
| 250 | 其他重点字段 | 37 | 其他重点字段 |
| @@ -333,26 +120,56 @@ S red | @@ -333,26 +120,56 @@ S red | ||
| 333 | 1. API 在 SPU 的维度直接返回3个属性定义,存储在 shoplazza_product_option 中: | 120 | 1. API 在 SPU 的维度直接返回3个属性定义,存储在 shoplazza_product_option 中: |
| 334 | 1. API在 SKU的维度直接返回3个属性值,存储在 shoplazza_product_sku 表的 option 相关的字段中: | 121 | 1. API在 SKU的维度直接返回3个属性值,存储在 shoplazza_product_sku 表的 option 相关的字段中: |
| 335 | 5.3 ES索引 | 122 | 5.3 ES索引 |
| 336 | -5.3.1 | ||
| 337 | - 3nested,支持超过3个属性(动态)。只用作返回,不能查询。节省索引空间 | 123 | + |
| 338 | "specifications": { | 124 | "specifications": { |
| 339 | "type": "nested", | 125 | "type": "nested", |
| 340 | "properties": { | 126 | "properties": { |
| 341 | - "name": { "type": "keyword","index": false }, | ||
| 342 | - "value": { "type": "keyword","index": false } | 127 | + "name": { "type": "keyword" }, // "颜色", "容量" |
| 128 | + "value": { "type": "keyword" } // "白色", "256GB" | ||
| 343 | } | 129 | } |
| 344 | }, | 130 | }, |
| 345 | 131 | ||
| 346 | -6. SEO相关字段 | ||
| 347 | -6.1 数据源 | ||
| 348 | -SEO标题 SEO描述 SEO URL Handle SEO URL 重定向 SEO关键词 | ||
| 349 | -最多5000字符 最多5000字符 "最多支持输入255字符 | ||
| 350 | - (SEO URL handle只对SEO URL的「URL参数」部分进行更改,即“products/”后的内容,如:products/「URL参数」 | ||
| 351 | - )" "创建URL重定向,访问修改前链接可跳转到修改后的新链接页面 | ||
| 352 | -「Y」:TRUE | ||
| 353 | -「N」:FALSE " 多个关键词请用「英文逗号」隔开 | ||
| 354 | - | ||
| 355 | -6.2 Mysql | ||
| 356 | -6.3 ES索引 | ||
| 357 | -6.3.1 输入数据 | ||
| 358 | -6.3.2 索引方法 | ||
| 359 | \ No newline at end of file | 132 | \ No newline at end of file |
| 133 | + 另外还需要包含一个单独的字段,main_option (即店铺主题装修里面配置的 颜色切换 - 变体名称,也就是列表页商品的子sku显示维度) | ||
| 134 | + "main_option": { "type": "keyword" } | ||
| 135 | +查询指定款式 | ||
| 136 | +{ | ||
| 137 | + "query": { | ||
| 138 | + "nested": { | ||
| 139 | + "path": "specifications", | ||
| 140 | + "query": { | ||
| 141 | + "bool": { | ||
| 142 | + "must": [ | ||
| 143 | + { "term": { "specifications.name ": "颜色" } }, | ||
| 144 | + { "term": { "specifications.value": "绿色" } } | ||
| 145 | + ] | ||
| 146 | + } | ||
| 147 | + } | ||
| 148 | + } | ||
| 149 | + } | ||
| 150 | +} | ||
| 151 | +按 name 做分面搜索(聚合) | ||
| 152 | + | ||
| 153 | +{ | ||
| 154 | + "aggs": { | ||
| 155 | + "specs": { | ||
| 156 | + "nested": { "path": "specifications" }, | ||
| 157 | + "aggs": { | ||
| 158 | + "by_name": { | ||
| 159 | + "terms": { | ||
| 160 | + "field": "specifications.name", | ||
| 161 | + "size": 20 | ||
| 162 | + }, | ||
| 163 | + "aggs": { | ||
| 164 | + "value_counts": { | ||
| 165 | + "terms": { | ||
| 166 | + "field": "specifications.value", | ||
| 167 | + "size": 10 | ||
| 168 | + } | ||
| 169 | + } | ||
| 170 | + } | ||
| 171 | + } | ||
| 172 | + } | ||
| 173 | + } | ||
| 174 | + } | ||
| 175 | +} | ||
| 176 | + | ||
| 360 | \ No newline at end of file | 177 | \ No newline at end of file |
indexer/spu_transformer.py
| @@ -38,12 +38,12 @@ class SPUTransformer: | @@ -38,12 +38,12 @@ class SPUTransformer: | ||
| 38 | """ | 38 | """ |
| 39 | query = text(""" | 39 | query = text(""" |
| 40 | SELECT | 40 | SELECT |
| 41 | - id, shop_id, shoplazza_id, handle, title, brief, description, | ||
| 42 | - spu, vendor, vendor_url, seo_title, seo_description, seo_keywords, | 41 | + id, shop_id, shoplazza_id, title, brief, description, |
| 42 | + spu, vendor, vendor_url, | ||
| 43 | image_src, image_width, image_height, image_path, image_alt, | 43 | image_src, image_width, image_height, image_path, image_alt, |
| 44 | - tags, note, category, | ||
| 45 | - shoplazza_created_at, shoplazza_updated_at, tenant_id, | ||
| 46 | - creator, create_time, updater, update_time, deleted | 44 | + tags, note, category, category_id, category_google_id, |
| 45 | + category_level, category_path, | ||
| 46 | + tenant_id, creator, create_time, updater, update_time, deleted | ||
| 47 | FROM shoplazza_product_spu | 47 | FROM shoplazza_product_spu |
| 48 | WHERE tenant_id = :tenant_id AND deleted = 0 | 48 | WHERE tenant_id = :tenant_id AND deleted = 0 |
| 49 | """) | 49 | """) |
| @@ -114,6 +114,30 @@ class SPUTransformer: | @@ -114,6 +114,30 @@ class SPUTransformer: | ||
| 114 | 114 | ||
| 115 | return df | 115 | return df |
| 116 | 116 | ||
| 117 | + def load_option_data(self) -> pd.DataFrame: | ||
| 118 | + """ | ||
| 119 | + Load option data from MySQL. | ||
| 120 | + | ||
| 121 | + Returns: | ||
| 122 | + DataFrame with option data (name, position for each SPU) | ||
| 123 | + """ | ||
| 124 | + query = text(""" | ||
| 125 | + SELECT | ||
| 126 | + id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, | ||
| 127 | + position, name, values, tenant_id, | ||
| 128 | + creator, create_time, updater, update_time, deleted | ||
| 129 | + FROM shoplazza_product_option | ||
| 130 | + WHERE tenant_id = :tenant_id AND deleted = 0 | ||
| 131 | + ORDER BY spu_id, position | ||
| 132 | + """) | ||
| 133 | + | ||
| 134 | + with self.db_engine.connect() as conn: | ||
| 135 | + df = pd.read_sql(query, conn, params={"tenant_id": self.tenant_id}) | ||
| 136 | + | ||
| 137 | + print(f"DEBUG: Loaded {len(df)} option records for tenant_id={self.tenant_id}") | ||
| 138 | + | ||
| 139 | + return df | ||
| 140 | + | ||
| 117 | def transform_batch(self) -> List[Dict[str, Any]]: | 141 | def transform_batch(self) -> List[Dict[str, Any]]: |
| 118 | """ | 142 | """ |
| 119 | Transform SPU and SKU data into ES documents. | 143 | Transform SPU and SKU data into ES documents. |
| @@ -124,12 +148,16 @@ class SPUTransformer: | @@ -124,12 +148,16 @@ class SPUTransformer: | ||
| 124 | # Load data | 148 | # Load data |
| 125 | spu_df = self.load_spu_data() | 149 | spu_df = self.load_spu_data() |
| 126 | sku_df = self.load_sku_data() | 150 | sku_df = self.load_sku_data() |
| 151 | + option_df = self.load_option_data() | ||
| 127 | 152 | ||
| 128 | if spu_df.empty: | 153 | if spu_df.empty: |
| 129 | return [] | 154 | return [] |
| 130 | 155 | ||
| 131 | # Group SKUs by SPU | 156 | # Group SKUs by SPU |
| 132 | sku_groups = sku_df.groupby('spu_id') | 157 | sku_groups = sku_df.groupby('spu_id') |
| 158 | + | ||
| 159 | + # Group options by SPU | ||
| 160 | + option_groups = option_df.groupby('spu_id') if not option_df.empty else None | ||
| 133 | 161 | ||
| 134 | documents = [] | 162 | documents = [] |
| 135 | for _, spu_row in spu_df.iterrows(): | 163 | for _, spu_row in spu_df.iterrows(): |
| @@ -138,8 +166,11 @@ class SPUTransformer: | @@ -138,8 +166,11 @@ class SPUTransformer: | ||
| 138 | # Get SKUs for this SPU | 166 | # Get SKUs for this SPU |
| 139 | skus = sku_groups.get_group(spu_id) if spu_id in sku_groups.groups else pd.DataFrame() | 167 | skus = sku_groups.get_group(spu_id) if spu_id in sku_groups.groups else pd.DataFrame() |
| 140 | 168 | ||
| 169 | + # Get options for this SPU | ||
| 170 | + options = option_groups.get_group(spu_id) if option_groups and spu_id in option_groups.groups else pd.DataFrame() | ||
| 171 | + | ||
| 141 | # Transform to ES document | 172 | # Transform to ES document |
| 142 | - doc = self._transform_spu_to_doc(spu_row, skus) | 173 | + doc = self._transform_spu_to_doc(spu_row, skus, options) |
| 143 | if doc: | 174 | if doc: |
| 144 | documents.append(doc) | 175 | documents.append(doc) |
| 145 | 176 | ||
| @@ -148,7 +179,8 @@ class SPUTransformer: | @@ -148,7 +179,8 @@ class SPUTransformer: | ||
| 148 | def _transform_spu_to_doc( | 179 | def _transform_spu_to_doc( |
| 149 | self, | 180 | self, |
| 150 | spu_row: pd.Series, | 181 | spu_row: pd.Series, |
| 151 | - skus: pd.DataFrame | 182 | + skus: pd.DataFrame, |
| 183 | + options: pd.DataFrame | ||
| 152 | ) -> Optional[Dict[str, Any]]: | 184 | ) -> Optional[Dict[str, Any]]: |
| 153 | """ | 185 | """ |
| 154 | Transform a single SPU row and its SKUs into an ES document. | 186 | Transform a single SPU row and its SKUs into an ES document. |
| @@ -156,6 +188,7 @@ class SPUTransformer: | @@ -156,6 +188,7 @@ class SPUTransformer: | ||
| 156 | Args: | 188 | Args: |
| 157 | spu_row: SPU row from database | 189 | spu_row: SPU row from database |
| 158 | skus: DataFrame with SKUs for this SPU | 190 | skus: DataFrame with SKUs for this SPU |
| 191 | + options: DataFrame with options for this SPU | ||
| 159 | 192 | ||
| 160 | Returns: | 193 | Returns: |
| 161 | ES document or None if transformation fails | 194 | ES document or None if transformation fails |
| @@ -168,41 +201,66 @@ class SPUTransformer: | @@ -168,41 +201,66 @@ class SPUTransformer: | ||
| 168 | # SPU ID | 201 | # SPU ID |
| 169 | doc['spu_id'] = str(spu_row['id']) | 202 | doc['spu_id'] = str(spu_row['id']) |
| 170 | 203 | ||
| 171 | - # Handle | ||
| 172 | - if pd.notna(spu_row.get('handle')): | ||
| 173 | - doc['handle'] = str(spu_row['handle']) | ||
| 174 | - | ||
| 175 | - # Title | 204 | + # 文本相关性相关字段(中英文双语,暂时只填充中文) |
| 176 | if pd.notna(spu_row.get('title')): | 205 | if pd.notna(spu_row.get('title')): |
| 177 | - doc['title'] = str(spu_row['title']) | 206 | + doc['title_zh'] = str(spu_row['title']) |
| 207 | + doc['title_en'] = None # 暂时设为空 | ||
| 178 | 208 | ||
| 179 | - # Brief | ||
| 180 | if pd.notna(spu_row.get('brief')): | 209 | if pd.notna(spu_row.get('brief')): |
| 181 | - doc['brief'] = str(spu_row['brief']) | 210 | + doc['brief_zh'] = str(spu_row['brief']) |
| 211 | + doc['brief_en'] = None | ||
| 182 | 212 | ||
| 183 | - # Description | ||
| 184 | if pd.notna(spu_row.get('description')): | 213 | if pd.notna(spu_row.get('description')): |
| 185 | - doc['description'] = str(spu_row['description']) | 214 | + doc['description_zh'] = str(spu_row['description']) |
| 215 | + doc['description_en'] = None | ||
| 186 | 216 | ||
| 187 | - # SEO fields | ||
| 188 | - if pd.notna(spu_row.get('seo_title')): | ||
| 189 | - doc['seo_title'] = str(spu_row['seo_title']) | ||
| 190 | - if pd.notna(spu_row.get('seo_description')): | ||
| 191 | - doc['seo_description'] = str(spu_row['seo_description']) | ||
| 192 | - if pd.notna(spu_row.get('seo_keywords')): | ||
| 193 | - doc['seo_keywords'] = str(spu_row['seo_keywords']) | ||
| 194 | - | ||
| 195 | - # Vendor | ||
| 196 | if pd.notna(spu_row.get('vendor')): | 217 | if pd.notna(spu_row.get('vendor')): |
| 197 | - doc['vendor'] = str(spu_row['vendor']) | 218 | + doc['vendor_zh'] = str(spu_row['vendor']) |
| 219 | + doc['vendor_en'] = None | ||
| 198 | 220 | ||
| 199 | # Tags | 221 | # Tags |
| 200 | if pd.notna(spu_row.get('tags')): | 222 | if pd.notna(spu_row.get('tags')): |
| 201 | - doc['tags'] = str(spu_row['tags']) | 223 | + # Tags是逗号分隔的字符串,需要转换为数组 |
| 224 | + tags_str = str(spu_row['tags']) | ||
| 225 | + doc['tags'] = [tag.strip() for tag in tags_str.split(',') if tag.strip()] | ||
| 226 | + | ||
| 227 | + # Category相关字段 | ||
| 228 | + if pd.notna(spu_row.get('category_path')): | ||
| 229 | + category_path = str(spu_row['category_path']) | ||
| 230 | + doc['category_path_zh'] = category_path | ||
| 231 | + doc['category_path_en'] = None # 暂时设为空 | ||
| 232 | + | ||
| 233 | + # 解析category_path获取多层级分类名称 | ||
| 234 | + path_parts = category_path.split('/') | ||
| 235 | + if len(path_parts) > 0: | ||
| 236 | + doc['category1_name'] = path_parts[0].strip() | ||
| 237 | + if len(path_parts) > 1: | ||
| 238 | + doc['category2_name'] = path_parts[1].strip() | ||
| 239 | + if len(path_parts) > 2: | ||
| 240 | + doc['category3_name'] = path_parts[2].strip() | ||
| 202 | 241 | ||
| 203 | - # Category | ||
| 204 | if pd.notna(spu_row.get('category')): | 242 | if pd.notna(spu_row.get('category')): |
| 205 | - doc['category'] = str(spu_row['category']) | 243 | + category_name = str(spu_row['category']) |
| 244 | + doc['category_name_zh'] = category_name | ||
| 245 | + doc['category_name_en'] = None | ||
| 246 | + doc['category_name'] = category_name | ||
| 247 | + | ||
| 248 | + if pd.notna(spu_row.get('category_id')): | ||
| 249 | + doc['category_id'] = str(int(spu_row['category_id'])) | ||
| 250 | + | ||
| 251 | + if pd.notna(spu_row.get('category_level')): | ||
| 252 | + doc['category_level'] = int(spu_row['category_level']) | ||
| 253 | + | ||
| 254 | + # Option名称(从option表获取) | ||
| 255 | + if not options.empty: | ||
| 256 | + # 按position排序获取option名称 | ||
| 257 | + sorted_options = options.sort_values('position') | ||
| 258 | + if len(sorted_options) > 0 and pd.notna(sorted_options.iloc[0].get('name')): | ||
| 259 | + doc['option1_name'] = str(sorted_options.iloc[0]['name']) | ||
| 260 | + if len(sorted_options) > 1 and pd.notna(sorted_options.iloc[1].get('name')): | ||
| 261 | + doc['option2_name'] = str(sorted_options.iloc[1]['name']) | ||
| 262 | + if len(sorted_options) > 2 and pd.notna(sorted_options.iloc[2].get('name')): | ||
| 263 | + doc['option3_name'] = str(sorted_options.iloc[2]['name']) | ||
| 206 | 264 | ||
| 207 | # Image URL | 265 | # Image URL |
| 208 | if pd.notna(spu_row.get('image_src')): | 266 | if pd.notna(spu_row.get('image_src')): |
| @@ -211,27 +269,85 @@ class SPUTransformer: | @@ -211,27 +269,85 @@ class SPUTransformer: | ||
| 211 | image_src = f"//{image_src}" if image_src.startswith('//') else image_src | 269 | image_src = f"//{image_src}" if image_src.startswith('//') else image_src |
| 212 | doc['image_url'] = image_src | 270 | doc['image_url'] = image_src |
| 213 | 271 | ||
| 214 | - # Process SKUs | 272 | + # Process SKUs and build specifications |
| 215 | skus_list = [] | 273 | skus_list = [] |
| 216 | prices = [] | 274 | prices = [] |
| 217 | compare_prices = [] | 275 | compare_prices = [] |
| 276 | + sku_prices = [] | ||
| 277 | + sku_weights = [] | ||
| 278 | + sku_weight_units = [] | ||
| 279 | + total_inventory = 0 | ||
| 280 | + specifications = [] | ||
| 281 | + | ||
| 282 | + # 构建option名称映射(position -> name) | ||
| 283 | + option_name_map = {} | ||
| 284 | + if not options.empty: | ||
| 285 | + for _, opt_row in options.iterrows(): | ||
| 286 | + position = opt_row.get('position') | ||
| 287 | + name = opt_row.get('name') | ||
| 288 | + if pd.notna(position) and pd.notna(name): | ||
| 289 | + option_name_map[int(position)] = str(name) | ||
| 218 | 290 | ||
| 219 | for _, sku_row in skus.iterrows(): | 291 | for _, sku_row in skus.iterrows(): |
| 220 | - sku_data = self._transform_sku_row(sku_row) | 292 | + sku_data = self._transform_sku_row(sku_row, option_name_map) |
| 221 | if sku_data: | 293 | if sku_data: |
| 222 | skus_list.append(sku_data) | 294 | skus_list.append(sku_data) |
| 295 | + | ||
| 296 | + # 收集价格信息 | ||
| 223 | if 'price' in sku_data and sku_data['price'] is not None: | 297 | if 'price' in sku_data and sku_data['price'] is not None: |
| 224 | try: | 298 | try: |
| 225 | - prices.append(float(sku_data['price'])) | 299 | + price_val = float(sku_data['price']) |
| 300 | + prices.append(price_val) | ||
| 301 | + sku_prices.append(price_val) | ||
| 226 | except (ValueError, TypeError): | 302 | except (ValueError, TypeError): |
| 227 | pass | 303 | pass |
| 304 | + | ||
| 228 | if 'compare_at_price' in sku_data and sku_data['compare_at_price'] is not None: | 305 | if 'compare_at_price' in sku_data and sku_data['compare_at_price'] is not None: |
| 229 | try: | 306 | try: |
| 230 | compare_prices.append(float(sku_data['compare_at_price'])) | 307 | compare_prices.append(float(sku_data['compare_at_price'])) |
| 231 | except (ValueError, TypeError): | 308 | except (ValueError, TypeError): |
| 232 | pass | 309 | pass |
| 310 | + | ||
| 311 | + # 收集重量信息 | ||
| 312 | + if 'weight' in sku_data and sku_data['weight'] is not None: | ||
| 313 | + try: | ||
| 314 | + sku_weights.append(int(float(sku_data['weight']))) | ||
| 315 | + except (ValueError, TypeError): | ||
| 316 | + pass | ||
| 317 | + | ||
| 318 | + if 'weight_unit' in sku_data and sku_data['weight_unit']: | ||
| 319 | + sku_weight_units.append(str(sku_data['weight_unit'])) | ||
| 320 | + | ||
| 321 | + # 收集库存信息 | ||
| 322 | + if 'stock' in sku_data and sku_data['stock'] is not None: | ||
| 323 | + try: | ||
| 324 | + total_inventory += int(sku_data['stock']) | ||
| 325 | + except (ValueError, TypeError): | ||
| 326 | + pass | ||
| 327 | + | ||
| 328 | + # 构建specifications(从SKU的option值和option表的name) | ||
| 329 | + sku_id = str(sku_row['id']) | ||
| 330 | + if pd.notna(sku_row.get('option1')) and 1 in option_name_map: | ||
| 331 | + specifications.append({ | ||
| 332 | + 'sku_id': sku_id, | ||
| 333 | + 'name': option_name_map[1], | ||
| 334 | + 'value': str(sku_row['option1']) | ||
| 335 | + }) | ||
| 336 | + if pd.notna(sku_row.get('option2')) and 2 in option_name_map: | ||
| 337 | + specifications.append({ | ||
| 338 | + 'sku_id': sku_id, | ||
| 339 | + 'name': option_name_map[2], | ||
| 340 | + 'value': str(sku_row['option2']) | ||
| 341 | + }) | ||
| 342 | + if pd.notna(sku_row.get('option3')) and 3 in option_name_map: | ||
| 343 | + specifications.append({ | ||
| 344 | + 'sku_id': sku_id, | ||
| 345 | + 'name': option_name_map[3], | ||
| 346 | + 'value': str(sku_row['option3']) | ||
| 347 | + }) | ||
| 233 | 348 | ||
| 234 | doc['skus'] = skus_list | 349 | doc['skus'] = skus_list |
| 350 | + doc['specifications'] = specifications | ||
| 235 | 351 | ||
| 236 | # Calculate price ranges | 352 | # Calculate price ranges |
| 237 | if prices: | 353 | if prices: |
| @@ -246,6 +362,19 @@ class SPUTransformer: | @@ -246,6 +362,19 @@ class SPUTransformer: | ||
| 246 | else: | 362 | else: |
| 247 | doc['compare_at_price'] = None | 363 | doc['compare_at_price'] = None |
| 248 | 364 | ||
| 365 | + # SKU扁平化字段 | ||
| 366 | + doc['sku_prices'] = sku_prices | ||
| 367 | + doc['sku_weights'] = sku_weights | ||
| 368 | + doc['sku_weight_units'] = list(set(sku_weight_units)) # 去重 | ||
| 369 | + doc['total_inventory'] = total_inventory | ||
| 370 | + | ||
| 371 | + # Image URL | ||
| 372 | + if pd.notna(spu_row.get('image_src')): | ||
| 373 | + image_src = str(spu_row['image_src']) | ||
| 374 | + if not image_src.startswith('http'): | ||
| 375 | + image_src = f"//{image_src}" if image_src.startswith('//') else image_src | ||
| 376 | + doc['image_url'] = image_src | ||
| 377 | + | ||
| 249 | # Time fields - convert datetime to ISO format string for ES DATE type | 378 | # Time fields - convert datetime to ISO format string for ES DATE type |
| 250 | if pd.notna(spu_row.get('create_time')): | 379 | if pd.notna(spu_row.get('create_time')): |
| 251 | create_time = spu_row['create_time'] | 380 | create_time = spu_row['create_time'] |
| @@ -260,29 +389,16 @@ class SPUTransformer: | @@ -260,29 +389,16 @@ class SPUTransformer: | ||
| 260 | doc['update_time'] = update_time.isoformat() | 389 | doc['update_time'] = update_time.isoformat() |
| 261 | else: | 390 | else: |
| 262 | doc['update_time'] = str(update_time) | 391 | doc['update_time'] = str(update_time) |
| 263 | - | ||
| 264 | - if pd.notna(spu_row.get('shoplazza_created_at')): | ||
| 265 | - shoplazza_created_at = spu_row['shoplazza_created_at'] | ||
| 266 | - if hasattr(shoplazza_created_at, 'isoformat'): | ||
| 267 | - doc['shoplazza_created_at'] = shoplazza_created_at.isoformat() | ||
| 268 | - else: | ||
| 269 | - doc['shoplazza_created_at'] = str(shoplazza_created_at) | ||
| 270 | - | ||
| 271 | - if pd.notna(spu_row.get('shoplazza_updated_at')): | ||
| 272 | - shoplazza_updated_at = spu_row['shoplazza_updated_at'] | ||
| 273 | - if hasattr(shoplazza_updated_at, 'isoformat'): | ||
| 274 | - doc['shoplazza_updated_at'] = shoplazza_updated_at.isoformat() | ||
| 275 | - else: | ||
| 276 | - doc['shoplazza_updated_at'] = str(shoplazza_updated_at) | ||
| 277 | 392 | ||
| 278 | return doc | 393 | return doc |
| 279 | 394 | ||
| 280 | - def _transform_sku_row(self, sku_row: pd.Series) -> Optional[Dict[str, Any]]: | 395 | + def _transform_sku_row(self, sku_row: pd.Series, option_name_map: Dict[int, str] = None) -> Optional[Dict[str, Any]]: |
| 281 | """ | 396 | """ |
| 282 | Transform a SKU row into a SKU object. | 397 | Transform a SKU row into a SKU object. |
| 283 | 398 | ||
| 284 | Args: | 399 | Args: |
| 285 | sku_row: SKU row from database | 400 | sku_row: SKU row from database |
| 401 | + option_name_map: Mapping from position to option name | ||
| 286 | 402 | ||
| 287 | Returns: | 403 | Returns: |
| 288 | SKU dictionary or None | 404 | SKU dictionary or None |
| @@ -292,10 +408,6 @@ class SPUTransformer: | @@ -292,10 +408,6 @@ class SPUTransformer: | ||
| 292 | # SKU ID | 408 | # SKU ID |
| 293 | sku_data['sku_id'] = str(sku_row['id']) | 409 | sku_data['sku_id'] = str(sku_row['id']) |
| 294 | 410 | ||
| 295 | - # Title | ||
| 296 | - if pd.notna(sku_row.get('title')): | ||
| 297 | - sku_data['title'] = str(sku_row['title']) | ||
| 298 | - | ||
| 299 | # Price | 411 | # Price |
| 300 | if pd.notna(sku_row.get('price')): | 412 | if pd.notna(sku_row.get('price')): |
| 301 | try: | 413 | try: |
| @@ -314,9 +426,9 @@ class SPUTransformer: | @@ -314,9 +426,9 @@ class SPUTransformer: | ||
| 314 | else: | 426 | else: |
| 315 | sku_data['compare_at_price'] = None | 427 | sku_data['compare_at_price'] = None |
| 316 | 428 | ||
| 317 | - # SKU | 429 | + # SKU Code |
| 318 | if pd.notna(sku_row.get('sku')): | 430 | if pd.notna(sku_row.get('sku')): |
| 319 | - sku_data['sku'] = str(sku_row['sku']) | 431 | + sku_data['sku_code'] = str(sku_row['sku']) |
| 320 | 432 | ||
| 321 | # Stock | 433 | # Stock |
| 322 | if pd.notna(sku_row.get('inventory_quantity')): | 434 | if pd.notna(sku_row.get('inventory_quantity')): |
| @@ -327,17 +439,30 @@ class SPUTransformer: | @@ -327,17 +439,30 @@ class SPUTransformer: | ||
| 327 | else: | 439 | else: |
| 328 | sku_data['stock'] = 0 | 440 | sku_data['stock'] = 0 |
| 329 | 441 | ||
| 330 | - # Options (from option1, option2, option3) | ||
| 331 | - options = {} | 442 | + # Weight |
| 443 | + if pd.notna(sku_row.get('weight')): | ||
| 444 | + try: | ||
| 445 | + sku_data['weight'] = float(sku_row['weight']) | ||
| 446 | + except (ValueError, TypeError): | ||
| 447 | + sku_data['weight'] = None | ||
| 448 | + else: | ||
| 449 | + sku_data['weight'] = None | ||
| 450 | + | ||
| 451 | + # Weight unit | ||
| 452 | + if pd.notna(sku_row.get('weight_unit')): | ||
| 453 | + sku_data['weight_unit'] = str(sku_row['weight_unit']) | ||
| 454 | + | ||
| 455 | + # Option values | ||
| 332 | if pd.notna(sku_row.get('option1')): | 456 | if pd.notna(sku_row.get('option1')): |
| 333 | - options['option1'] = str(sku_row['option1']) | 457 | + sku_data['option1_value'] = str(sku_row['option1']) |
| 334 | if pd.notna(sku_row.get('option2')): | 458 | if pd.notna(sku_row.get('option2')): |
| 335 | - options['option2'] = str(sku_row['option2']) | 459 | + sku_data['option2_value'] = str(sku_row['option2']) |
| 336 | if pd.notna(sku_row.get('option3')): | 460 | if pd.notna(sku_row.get('option3')): |
| 337 | - options['option3'] = str(sku_row['option3']) | ||
| 338 | - | ||
| 339 | - if options: | ||
| 340 | - sku_data['options'] = options | 461 | + sku_data['option3_value'] = str(sku_row['option3']) |
| 462 | + | ||
| 463 | + # Image src | ||
| 464 | + if pd.notna(sku_row.get('image_src')): | ||
| 465 | + sku_data['image_src'] = str(sku_row['image_src']) | ||
| 341 | 466 | ||
| 342 | return sku_data | 467 | return sku_data |
| 343 | 468 |