diff --git a/docs/MySQL到ES字段映射说明-业务版.md b/docs/MySQL到ES字段映射说明-业务版.md index 7ca7260..1e70df2 100644 --- a/docs/MySQL到ES字段映射说明-业务版.md +++ b/docs/MySQL到ES字段映射说明-业务版.md @@ -52,7 +52,7 @@ | ES 字段 | 数据来源表 | 表中字段 | 转换说明 | |---------|-----------|----------|----------| -| `image_url` | SPU 表 | `image_src` | 直接使用,如果 URL 不以 `http` 开头则添加 `//` 前缀 | +| `image_url` | SPU 表 | `image_src` | | ### 2.5 销量字段 diff --git a/docs/索引字段说明v2-mapping结构.md b/docs/索引字段说明v2-mapping结构.md deleted file mode 100644 index 1c5598e..0000000 --- a/docs/索引字段说明v2-mapping结构.md +++ /dev/null @@ -1,231 +0,0 @@ -{ - "mappings": { - "properties": { - "tenant_id": { - "type": "keyword" - }, - "spu_id": { - "type": "keyword" - }, - - "create_time": { - "type": "date" - }, - "update_time": { - "type": "date" - }, - - // 文本相关性相关字段 - "title.zh": { - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard" - }, - "brief.zh": { - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard" - }, - "description.zh": { - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard" - }, - "vendor.zh": { - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard", - "fields": { - "keyword": { - "type": "keyword", - "normalizer": "lowercase" - } - } - }, - - "title.en": { - "type": "text", - "analyzer": "english", - "search_analyzer": "english", - }, - "brief.en": { - "type": "text", - "analyzer": "english", - "search_analyzer": "english", - - }, - "description.en": { - "type": "text", - "analyzer": "english", - "search_analyzer": "english", - }, - "vendor.en": { - "type": "text", - "analyzer": "english", - "search_analyzer": "english", - "fields": { - "keyword": { - "type": "keyword", - "normalizer": "lowercase" - } - } - }, - - "tags": { - "type": "keyword", - }, - - "image_url": { - "type": "keyword", - "index": false - }, - - // 语义向量 - "title_embedding": { - "type": "dense_vector", - "dims": 1024, - "index": true, - "similarity": "dot_product" - }, - "image_embedding": { - "type": "nested", - "properties": { - "vector": { - "type": "dense_vector", - "dims": 1024, - "index": true, - "similarity": "dot_product" - }, - "url": { - "type": "text" - } - } - }, - - // 分类相关 - "category_path.zh": { // 提供模糊查询功能,辅助相关性计算 - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard" - }, - "category_path.en": { // 提供模糊查询功能,辅助相关性计算 - "type": "text", - "analyzer": "english", - "search_analyzer": "english" - }, - "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算 - "type": "text", - "analyzer": "hanlp_index", - "search_analyzer": "hanlp_standard" - }, - "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算 - "type": "text", - "analyzer": "english", - "search_analyzer": "english" - }, - - "category_id": { - "type": "keyword" - }, - "category_name": { - "type": "keyword" - }, - "category_level": { - "type": "integer" - }, - "category1_name": { // 不同层级下 可能有同名的情况,因此提供一二三级分开的查询方式 - "type": "keyword" - }, - "category2_name": { - "type": "keyword" - }, - "category3_name": { - "type": "keyword" - }, - - // sku款式、子sku属性 - "specifications": { - "type": "nested", - "properties": { - "sku_id": { "type": "keyword" }, - "name": { "type": "keyword" }, // "颜色", "容量" - "value": { "type": "keyword" } // "白色", "256GB" - } - }, - - "option1_name": { - "type": "keyword" - }, - "option2_name": { - "type": "keyword" - }, - "option3_name": { - "type": "keyword" - }, - - "min_price": { - "type": "float" - }, - "max_price": { - "type": "float" - }, - "compare_at_price": { - "type": "float" - }, - "sku_prices": { - "type": "float" - }, - "sku_weights": { - "type": "long" - }, - "sku_weight_units": { - "type": "keyword" - }, - "total_inventory": { - "type": "long" - }, - - "skus": { - "type": "nested", - "properties": { - "sku_id": { - "type": "keyword" - }, - "price": { - "type": "float" - }, - "compare_at_price": { - "type": "float" - }, - "sku_code": { - "type": "keyword" - }, - "stock": { - "type": "long" - }, - "weight": { - "type": "float" - }, - "weight_unit": { - "type": "keyword" - }, - "option1_value": { - "type": "keyword" - }, - "option2_value": { - "type": "keyword" - }, - "option3_value": { - "type": "keyword" - }, - "image_src": { - "type": "keyword", - "index": false - } - } - } - } - } -} - - diff --git a/docs/索引字段说明v2-参考表结构.md b/docs/索引字段说明v2-参考表结构.md index e327777..e69de29 100644 --- a/docs/索引字段说明v2-参考表结构.md +++ b/docs/索引字段说明v2-参考表结构.md @@ -1,99 +0,0 @@ - - -类目表 product_category -id bigint(20) NO PRI auto_increment -parent_id bigint(20) NO -name varchar(255) NO -pic_url varchar(255) NO -sort int(11) YES 0 -status tinyint(4) NO -creator varchar(64) YES -create_time datetime NO CURRENT_TIMESTAMP -updater varchar(64) YES -update_time datetime NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP -deleted bit(1) NO b'0' -tenant_id bigint(20) NO 0 - - - -spu表 shoplazza_product_spu 全部字段 -"Field" "Type" "Null" "Key" "Default" "Extra" -"id" "bigint(20)" "NO" "PRI" "auto_increment" -"shop_id" "bigint(20)" "NO" "MUL" "" -"shoplazza_id" "varchar(64)" "NO" "" "" -"handle" "varchar(255)" "YES" "MUL" "" -"title" "varchar(500)" "NO" "" "" -"brief" "varchar(1000)" "YES" "" "" -"description" "text" "YES" "" "" -"spu" "varchar(100)" "YES" "" "" -"vendor" "varchar(255)" "YES" "" "" -"vendor_url" "varchar(500)" "YES" "" "" -"seo_title" "varchar(500)" "YES" "" "" -"seo_description" "text" "YES" "" "" -"seo_keywords" "text" "YES" "" "" -"image_src" "varchar(500)" "YES" "" "" -"image_width" "int(11)" "YES" "" "" -"image_height" "int(11)" "YES" "" "" -"image_path" "varchar(255)" "YES" "" "" -"image_alt" "varchar(500)" "YES" "" "" -"inventory_policy" "varchar(50)" "YES" "" "" -"inventory_quantity" "int(11)" "YES" "" "0" "" -"inventory_tracking" "tinyint(1)" "YES" "" "0" "" -"published" "tinyint(1)" "YES" "" "0" "" -"published_at" "datetime" "YES" "MUL" "" -"requires_shipping" "tinyint(1)" "YES" "" "1" "" -"taxable" "tinyint(1)" "YES" "" "0" "" -"fake_sales" "int(11)" "YES" "" "0" "" -"display_fake_sales" "tinyint(1)" "YES" "" "0" "" -"mixed_wholesale" "tinyint(1)" "YES" "" "0" "" -"need_variant_image" "tinyint(1)" "YES" "" "0" "" -"has_only_default_variant" "tinyint(1)" "YES" "" "0" "" -"tags" "text" "YES" "" "" -"note" "text" "YES" "" "" -"category" "varchar(255)" "YES" "" "" -"category_id" "bigint(20)" "YES" "" "" -"category_google_id" "bigint(20)" "YES" "" "" -"category_level" "int(11)" "YES" "" "" -"category_path" "varchar(500)" "YES" "" "" -"shoplazza_created_at" "datetime" "YES" "" "" -"shoplazza_updated_at" "datetime" "YES" "MUL" "" -"tenant_id" "bigint(20)" "NO" "MUL" "" -"creator" "varchar(64)" "YES" "" "" "" -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" -"updater" "varchar(64)" "YES" "" "" "" -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" -"deleted" "bit(1)" "NO" "" "b'0'" "" - -shoplazza_product_sku 全部字段 -"Field" "Type" "Null" "Key" "Default" "Extra" -"id" "bigint(20)" "NO" "PRI" "auto_increment" -"spu_id" "bigint(20)" "NO" "MUL" "" -"shop_id" "bigint(20)" "NO" "MUL" "" -"shoplazza_id" "varchar(64)" "NO" "" "" -"shoplazza_product_id" "varchar(64)" "NO" "MUL" "" -"shoplazza_image_id" "varchar(64)" "YES" "" "" -"title" "varchar(500)" "YES" "" "" -"sku" "varchar(100)" "YES" "MUL" "" -"barcode" "varchar(100)" "YES" "" "" -"position" "int(11)" "YES" "" "0" "" -"price" "decimal(10,2)" "YES" "" "" -"compare_at_price" "decimal(10,2)" "YES" "" "" -"cost_price" "decimal(10,2)" "YES" "" "" -"option1" "varchar(255)" "YES" "" "" -"option2" "varchar(255)" "YES" "" "" -"option3" "varchar(255)" "YES" "" "" -"inventory_quantity" "int(11)" "YES" "" "0" "" -"weight" "decimal(10,2)" "YES" "" "" -"weight_unit" "varchar(10)" "YES" "" "" -"image_src" "varchar(500)" "YES" "" "" -"wholesale_price" "json" "YES" "" "" -"note" "text" "YES" "" "" -"extend" "json" "YES" "" "" -"shoplazza_created_at" "datetime" "YES" "" "" -"shoplazza_updated_at" "datetime" "YES" "" "" -"tenant_id" "bigint(20)" "NO" "MUL" "" -"creator" "varchar(64)" "YES" "" "" "" -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" -"updater" "varchar(64)" "YES" "" "" "" -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" -"deleted" "bit(1)" "NO" "" "b'0'" "" diff --git a/docs/索引字段说明v2.md b/docs/索引字段说明v2.md index 6da30f4..bcfe6f2 100644 --- a/docs/索引字段说明v2.md +++ b/docs/索引字段说明v2.md @@ -1,13 +1,26 @@ -# 索引字段说明 v2 +# 索引字段说明 -本文档详细说明 `search_products` 索引的字段结构、类型、数据来源和用途。 +本文档详细说明商品搜索所用 Elasticsearch 索引的字段结构、类型、数据来源和用途。 ## 索引概述 -- **索引名称**: `search_products` +- **索引命名**: 按租户独立索引,名称为 **`search_products_tenant_{tenant_id}`**(例如 `search_products_tenant_162`)。由代码中 `get_tenant_index_name(tenant_id)` 生成。 - **索引维度**: SPU(Standard Product Unit)级别 -- **多租户隔离**: 通过 `tenant_id` 字段实现 -- **Mapping 文件**: `mappings/search_products.json` +- **多租户隔离**: **索引层面隔离**——每个租户一个独立索引,查询与写入时直接使用该索引名,无需在查询条件中再按 `tenant_id` 过滤。 +- **Mapping 模板**: `mappings/search_products.json`(新建每个租户索引时复用同一份 mapping) + +## ES 索引在代码中的使用 + +索引名由 **`indexer/mapping_generator.py`** 中的 `get_tenant_index_name(tenant_id)` 生成,格式为 `search_products_tenant_{tenant_id}`。 + +| 场景 | 位置 | 说明 | +|------|------|------| +| 搜索请求 | `search/searcher.py` | 构建 ES 查询时通过 `get_tenant_index_name(tenant_id)` 得到 `index_name`,调用 `es_client.search(index_name=..., body=...)`,不再在 query 中加 `tenant_id` 过滤。 | +| 全量写入 | `indexer/bulk_indexing_service.py` | 按 `tenant_id` 调用 `get_tenant_index_name(tenant_id)` 作为写入目标索引,建索引与 bulk 写入均使用该名。 | +| 增量写入 | `indexer/incremental_service.py` | 同上,索引名由 `get_tenant_index_name(tenant_id)` 得到。 | +| 建索引 / 删索引 | `indexer/mapping_generator.py` | `create_index_if_not_exists`、`delete_index_if_exists` 等均按传入的 `index_name`(即租户索引名)操作。 | + +因此:每个租户对应一个独立 ES 索引,查询与写入都直接指定该索引名,实现索引级多租户隔离。 ## 字段分类 @@ -15,7 +28,7 @@ | 字段名 | ES类型 | 说明 | 数据来源 | |--------|--------|------|----------| -| `tenant_id` | keyword | 租户ID,用于多租户隔离 | MySQL: `shoplazza_product_spu.tenant_id` | +| `tenant_id` | keyword | 租户ID(文档级标识,便于排查与统计;隔离已由索引名保证,查询无需再按此过滤) | MySQL: `shoplazza_product_spu.tenant_id` | | `spu_id` | keyword | SPU唯一标识 | MySQL: `shoplazza_product_spu.id` | | `create_time` | date | 创建时间 | MySQL: `shoplazza_product_spu.created_at` | | `update_time` | date | 更新时间 | MySQL: `shoplazza_product_spu.updated_at` | @@ -359,7 +372,7 @@ ### 过滤字段(精确匹配) -- `tenant_id` (必需,多租户隔离) +- `tenant_id`(文档中保留,便于排查与统计;**多租户隔离已由索引名保证,查询时无需再按 tenant_id 过滤**) - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name` - `vendor.zh.keyword`, `vendor.en.keyword` - `specifications` (嵌套查询) @@ -445,7 +458,7 @@ filters AND (text_recall OR embedding_recall) ## 注意事项 1. **索引维度**: 所有数据以 SPU 为单位索引,SKU 信息作为嵌套字段存储 -2. **多租户隔离**: 所有查询必须包含 `tenant_id` 过滤条件 +2. **多租户隔离**: 按租户使用独立索引(`search_products_tenant_{tenant_id}`),查询时通过 `get_tenant_index_name(tenant_id)` 指定索引名即可,无需在 query 中加 `tenant_id` 过滤 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端 diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py index 271f944..83cf27e 100644 --- a/indexer/document_transformer.py +++ b/indexer/document_transformer.py @@ -2,6 +2,10 @@ SPU文档转换器 - 公共转换逻辑。 提取全量和增量索引共用的文档转换逻辑,避免代码冗余。 +输出文档结构与 mappings/search_products.json 及 索引字段说明v2 一致, +供 search/searcher 与 search/es_query_builder 使用。 +- 多语言字段:title, brief, description, vendor, category_path, category_name_text +- 嵌套:specifications, skus;向量:title_embedding(image_embedding 由 Java 索引或外部填充) """ import pandas as pd @@ -137,7 +141,13 @@ class SPUDocumentTransformer: doc['min_price'] = 0.0 doc['max_price'] = 0.0 - if compare_prices: + # 优先使用 SPU 级 compare_at_price(与索引字段说明v2一致),否则取 SKU 最大值 + if pd.notna(spu_row.get('compare_at_price')): + try: + doc['compare_at_price'] = float(spu_row['compare_at_price']) + except (ValueError, TypeError): + doc['compare_at_price'] = float(max(compare_prices)) if compare_prices else None + elif compare_prices: doc['compare_at_price'] = float(max(compare_prices)) else: doc['compare_at_price'] = None @@ -290,6 +300,8 @@ class SPUDocumentTransformer: if category_names: category_path_str = '/'.join(category_names) doc['category_path'] = {primary_lang: category_path_str} + # 与查询使用的 category_name_text.zh/en 对齐,便于类目搜索 + doc['category_name_text'] = {primary_lang: category_path_str} # 填充分层类目名称 if len(category_names) > 0: diff --git a/indexer/incremental_service.py b/indexer/incremental_service.py index cd9f49b..84f3a9f 100644 --- a/indexer/incremental_service.py +++ b/indexer/incremental_service.py @@ -176,7 +176,7 @@ class IncrementalIndexerService: image_src, image_width, image_height, image_path, image_alt, tags, note, category, category_id, category_google_id, category_level, category_path, - fake_sales, display_fake_sales, + compare_at_price, fake_sales, display_fake_sales, tenant_id, creator, create_time, updater, update_time, deleted FROM shoplazza_product_spu WHERE tenant_id = :tenant_id AND id = :spu_id @@ -191,7 +191,7 @@ class IncrementalIndexerService: image_src, image_width, image_height, image_path, image_alt, tags, note, category, category_id, category_google_id, category_level, category_path, - fake_sales, display_fake_sales, + compare_at_price, fake_sales, display_fake_sales, tenant_id, creator, create_time, updater, update_time, deleted FROM shoplazza_product_spu WHERE tenant_id = :tenant_id AND id = :spu_id AND deleted = 0 @@ -243,7 +243,7 @@ class IncrementalIndexerService: image_src, image_width, image_height, image_path, image_alt, tags, note, category, category_id, category_google_id, category_level, category_path, - fake_sales, display_fake_sales, + compare_at_price, fake_sales, display_fake_sales, tenant_id, creator, create_time, updater, update_time, deleted FROM shoplazza_product_spu WHERE tenant_id = :tenant_id AND id IN :spu_ids @@ -258,7 +258,7 @@ class IncrementalIndexerService: image_src, image_width, image_height, image_path, image_alt, tags, note, category, category_id, category_google_id, category_level, category_path, - fake_sales, display_fake_sales, + compare_at_price, fake_sales, display_fake_sales, tenant_id, creator, create_time, updater, update_time, deleted FROM shoplazza_product_spu WHERE tenant_id = :tenant_id AND deleted = 0 AND id IN :spu_ids diff --git a/indexer/spu_transformer.py b/indexer/spu_transformer.py index 5c78b34..fdd5624 100644 --- a/indexer/spu_transformer.py +++ b/indexer/spu_transformer.py @@ -45,7 +45,7 @@ class SPUTransformer: image_src, image_width, image_height, image_path, image_alt, tags, note, category, category_id, category_google_id, category_level, category_path, - fake_sales, display_fake_sales, + compare_at_price, fake_sales, display_fake_sales, tenant_id, creator, create_time, updater, update_time, deleted FROM shoplazza_product_spu WHERE tenant_id = :tenant_id AND deleted = 0 -- libgit2 0.21.2