Commit 2e48a32da4decd6f951be72a91cdb60e959cedcf

Authored by tangwang
1 parent cd6d887e

doc

docs/MySQL到ES字段映射说明-业务版.md
... ... @@ -52,7 +52,7 @@
52 52  
53 53 | ES 字段 | 数据来源表 | 表中字段 | 转换说明 |
54 54 |---------|-----------|----------|----------|
55   -| `image_url` | SPU 表 | `image_src` | 直接使用,如果 URL 不以 `http` 开头则添加 `//` 前缀 |
  55 +| `image_url` | SPU 表 | `image_src` | |
56 56  
57 57 ### 2.5 销量字段
58 58  
... ...
docs/索引字段说明v2-mapping结构.md deleted
... ... @@ -1,231 +0,0 @@
1   -{
2   - "mappings": {
3   - "properties": {
4   - "tenant_id": {
5   - "type": "keyword"
6   - },
7   - "spu_id": {
8   - "type": "keyword"
9   - },
10   -
11   - "create_time": {
12   - "type": "date"
13   - },
14   - "update_time": {
15   - "type": "date"
16   - },
17   -
18   - // 文本相关性相关字段
19   - "title.zh": {
20   - "type": "text",
21   - "analyzer": "hanlp_index",
22   - "search_analyzer": "hanlp_standard"
23   - },
24   - "brief.zh": {
25   - "type": "text",
26   - "analyzer": "hanlp_index",
27   - "search_analyzer": "hanlp_standard"
28   - },
29   - "description.zh": {
30   - "type": "text",
31   - "analyzer": "hanlp_index",
32   - "search_analyzer": "hanlp_standard"
33   - },
34   - "vendor.zh": {
35   - "type": "text",
36   - "analyzer": "hanlp_index",
37   - "search_analyzer": "hanlp_standard",
38   - "fields": {
39   - "keyword": {
40   - "type": "keyword",
41   - "normalizer": "lowercase"
42   - }
43   - }
44   - },
45   -
46   - "title.en": {
47   - "type": "text",
48   - "analyzer": "english",
49   - "search_analyzer": "english",
50   - },
51   - "brief.en": {
52   - "type": "text",
53   - "analyzer": "english",
54   - "search_analyzer": "english",
55   -
56   - },
57   - "description.en": {
58   - "type": "text",
59   - "analyzer": "english",
60   - "search_analyzer": "english",
61   - },
62   - "vendor.en": {
63   - "type": "text",
64   - "analyzer": "english",
65   - "search_analyzer": "english",
66   - "fields": {
67   - "keyword": {
68   - "type": "keyword",
69   - "normalizer": "lowercase"
70   - }
71   - }
72   - },
73   -
74   - "tags": {
75   - "type": "keyword",
76   - },
77   -
78   - "image_url": {
79   - "type": "keyword",
80   - "index": false
81   - },
82   -
83   - // 语义向量
84   - "title_embedding": {
85   - "type": "dense_vector",
86   - "dims": 1024,
87   - "index": true,
88   - "similarity": "dot_product"
89   - },
90   - "image_embedding": {
91   - "type": "nested",
92   - "properties": {
93   - "vector": {
94   - "type": "dense_vector",
95   - "dims": 1024,
96   - "index": true,
97   - "similarity": "dot_product"
98   - },
99   - "url": {
100   - "type": "text"
101   - }
102   - }
103   - },
104   -
105   - // 分类相关
106   - "category_path.zh": { // 提供模糊查询功能,辅助相关性计算
107   - "type": "text",
108   - "analyzer": "hanlp_index",
109   - "search_analyzer": "hanlp_standard"
110   - },
111   - "category_path.en": { // 提供模糊查询功能,辅助相关性计算
112   - "type": "text",
113   - "analyzer": "english",
114   - "search_analyzer": "english"
115   - },
116   - "category_name_text.zh": { // 提供模糊查询功能,辅助相关性计算
117   - "type": "text",
118   - "analyzer": "hanlp_index",
119   - "search_analyzer": "hanlp_standard"
120   - },
121   - "category_name_text.en": { // 提供模糊查询功能,辅助相关性计算
122   - "type": "text",
123   - "analyzer": "english",
124   - "search_analyzer": "english"
125   - },
126   -
127   - "category_id": {
128   - "type": "keyword"
129   - },
130   - "category_name": {
131   - "type": "keyword"
132   - },
133   - "category_level": {
134   - "type": "integer"
135   - },
136   - "category1_name": { // 不同层级下 可能有同名的情况,因此提供一二三级分开的查询方式
137   - "type": "keyword"
138   - },
139   - "category2_name": {
140   - "type": "keyword"
141   - },
142   - "category3_name": {
143   - "type": "keyword"
144   - },
145   -
146   - // sku款式、子sku属性
147   - "specifications": {
148   - "type": "nested",
149   - "properties": {
150   - "sku_id": { "type": "keyword" },
151   - "name": { "type": "keyword" }, // "颜色", "容量"
152   - "value": { "type": "keyword" } // "白色", "256GB"
153   - }
154   - },
155   -
156   - "option1_name": {
157   - "type": "keyword"
158   - },
159   - "option2_name": {
160   - "type": "keyword"
161   - },
162   - "option3_name": {
163   - "type": "keyword"
164   - },
165   -
166   - "min_price": {
167   - "type": "float"
168   - },
169   - "max_price": {
170   - "type": "float"
171   - },
172   - "compare_at_price": {
173   - "type": "float"
174   - },
175   - "sku_prices": {
176   - "type": "float"
177   - },
178   - "sku_weights": {
179   - "type": "long"
180   - },
181   - "sku_weight_units": {
182   - "type": "keyword"
183   - },
184   - "total_inventory": {
185   - "type": "long"
186   - },
187   -
188   - "skus": {
189   - "type": "nested",
190   - "properties": {
191   - "sku_id": {
192   - "type": "keyword"
193   - },
194   - "price": {
195   - "type": "float"
196   - },
197   - "compare_at_price": {
198   - "type": "float"
199   - },
200   - "sku_code": {
201   - "type": "keyword"
202   - },
203   - "stock": {
204   - "type": "long"
205   - },
206   - "weight": {
207   - "type": "float"
208   - },
209   - "weight_unit": {
210   - "type": "keyword"
211   - },
212   - "option1_value": {
213   - "type": "keyword"
214   - },
215   - "option2_value": {
216   - "type": "keyword"
217   - },
218   - "option3_value": {
219   - "type": "keyword"
220   - },
221   - "image_src": {
222   - "type": "keyword",
223   - "index": false
224   - }
225   - }
226   - }
227   - }
228   - }
229   -}
230   -
231   -
docs/索引字段说明v2-参考表结构.md
... ... @@ -1,99 +0,0 @@
1   -
2   -
3   -类目表 product_category
4   -id bigint(20) NO PRI auto_increment
5   -parent_id bigint(20) NO
6   -name varchar(255) NO
7   -pic_url varchar(255) NO
8   -sort int(11) YES 0
9   -status tinyint(4) NO
10   -creator varchar(64) YES
11   -create_time datetime NO CURRENT_TIMESTAMP
12   -updater varchar(64) YES
13   -update_time datetime NO CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP
14   -deleted bit(1) NO b'0'
15   -tenant_id bigint(20) NO 0
16   -
17   -
18   -
19   -spu表 shoplazza_product_spu 全部字段
20   -"Field" "Type" "Null" "Key" "Default" "Extra"
21   -"id" "bigint(20)" "NO" "PRI" "auto_increment"
22   -"shop_id" "bigint(20)" "NO" "MUL" ""
23   -"shoplazza_id" "varchar(64)" "NO" "" ""
24   -"handle" "varchar(255)" "YES" "MUL" ""
25   -"title" "varchar(500)" "NO" "" ""
26   -"brief" "varchar(1000)" "YES" "" ""
27   -"description" "text" "YES" "" ""
28   -"spu" "varchar(100)" "YES" "" ""
29   -"vendor" "varchar(255)" "YES" "" ""
30   -"vendor_url" "varchar(500)" "YES" "" ""
31   -"seo_title" "varchar(500)" "YES" "" ""
32   -"seo_description" "text" "YES" "" ""
33   -"seo_keywords" "text" "YES" "" ""
34   -"image_src" "varchar(500)" "YES" "" ""
35   -"image_width" "int(11)" "YES" "" ""
36   -"image_height" "int(11)" "YES" "" ""
37   -"image_path" "varchar(255)" "YES" "" ""
38   -"image_alt" "varchar(500)" "YES" "" ""
39   -"inventory_policy" "varchar(50)" "YES" "" ""
40   -"inventory_quantity" "int(11)" "YES" "" "0" ""
41   -"inventory_tracking" "tinyint(1)" "YES" "" "0" ""
42   -"published" "tinyint(1)" "YES" "" "0" ""
43   -"published_at" "datetime" "YES" "MUL" ""
44   -"requires_shipping" "tinyint(1)" "YES" "" "1" ""
45   -"taxable" "tinyint(1)" "YES" "" "0" ""
46   -"fake_sales" "int(11)" "YES" "" "0" ""
47   -"display_fake_sales" "tinyint(1)" "YES" "" "0" ""
48   -"mixed_wholesale" "tinyint(1)" "YES" "" "0" ""
49   -"need_variant_image" "tinyint(1)" "YES" "" "0" ""
50   -"has_only_default_variant" "tinyint(1)" "YES" "" "0" ""
51   -"tags" "text" "YES" "" ""
52   -"note" "text" "YES" "" ""
53   -"category" "varchar(255)" "YES" "" ""
54   -"category_id" "bigint(20)" "YES" "" ""
55   -"category_google_id" "bigint(20)" "YES" "" ""
56   -"category_level" "int(11)" "YES" "" ""
57   -"category_path" "varchar(500)" "YES" "" ""
58   -"shoplazza_created_at" "datetime" "YES" "" ""
59   -"shoplazza_updated_at" "datetime" "YES" "MUL" ""
60   -"tenant_id" "bigint(20)" "NO" "MUL" ""
61   -"creator" "varchar(64)" "YES" "" "" ""
62   -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" ""
63   -"updater" "varchar(64)" "YES" "" "" ""
64   -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP"
65   -"deleted" "bit(1)" "NO" "" "b'0'" ""
66   -
67   -shoplazza_product_sku 全部字段
68   -"Field" "Type" "Null" "Key" "Default" "Extra"
69   -"id" "bigint(20)" "NO" "PRI" "auto_increment"
70   -"spu_id" "bigint(20)" "NO" "MUL" ""
71   -"shop_id" "bigint(20)" "NO" "MUL" ""
72   -"shoplazza_id" "varchar(64)" "NO" "" ""
73   -"shoplazza_product_id" "varchar(64)" "NO" "MUL" ""
74   -"shoplazza_image_id" "varchar(64)" "YES" "" ""
75   -"title" "varchar(500)" "YES" "" ""
76   -"sku" "varchar(100)" "YES" "MUL" ""
77   -"barcode" "varchar(100)" "YES" "" ""
78   -"position" "int(11)" "YES" "" "0" ""
79   -"price" "decimal(10,2)" "YES" "" ""
80   -"compare_at_price" "decimal(10,2)" "YES" "" ""
81   -"cost_price" "decimal(10,2)" "YES" "" ""
82   -"option1" "varchar(255)" "YES" "" ""
83   -"option2" "varchar(255)" "YES" "" ""
84   -"option3" "varchar(255)" "YES" "" ""
85   -"inventory_quantity" "int(11)" "YES" "" "0" ""
86   -"weight" "decimal(10,2)" "YES" "" ""
87   -"weight_unit" "varchar(10)" "YES" "" ""
88   -"image_src" "varchar(500)" "YES" "" ""
89   -"wholesale_price" "json" "YES" "" ""
90   -"note" "text" "YES" "" ""
91   -"extend" "json" "YES" "" ""
92   -"shoplazza_created_at" "datetime" "YES" "" ""
93   -"shoplazza_updated_at" "datetime" "YES" "" ""
94   -"tenant_id" "bigint(20)" "NO" "MUL" ""
95   -"creator" "varchar(64)" "YES" "" "" ""
96   -"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" ""
97   -"updater" "varchar(64)" "YES" "" "" ""
98   -"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP"
99   -"deleted" "bit(1)" "NO" "" "b'0'" ""
docs/索引字段说明v2.md
1   -# 索引字段说明 v2
  1 +# 索引字段说明
2 2  
3   -本文档详细说明 `search_products` 索引的字段结构、类型、数据来源和用途。
  3 +本文档详细说明商品搜索所用 Elasticsearch 索引的字段结构、类型、数据来源和用途。
4 4  
5 5 ## 索引概述
6 6  
7   -- **索引名称**: `search_products`
  7 +- **索引命名**: 按租户独立索引,名称为 **`search_products_tenant_{tenant_id}`**(例如 `search_products_tenant_162`)。由代码中 `get_tenant_index_name(tenant_id)` 生成。
8 8 - **索引维度**: SPU(Standard Product Unit)级别
9   -- **多租户隔离**: 通过 `tenant_id` 字段实现
10   -- **Mapping 文件**: `mappings/search_products.json`
  9 +- **多租户隔离**: **索引层面隔离**——每个租户一个独立索引,查询与写入时直接使用该索引名,无需在查询条件中再按 `tenant_id` 过滤。
  10 +- **Mapping 模板**: `mappings/search_products.json`(新建每个租户索引时复用同一份 mapping)
  11 +
  12 +## ES 索引在代码中的使用
  13 +
  14 +索引名由 **`indexer/mapping_generator.py`** 中的 `get_tenant_index_name(tenant_id)` 生成,格式为 `search_products_tenant_{tenant_id}`。
  15 +
  16 +| 场景 | 位置 | 说明 |
  17 +|------|------|------|
  18 +| 搜索请求 | `search/searcher.py` | 构建 ES 查询时通过 `get_tenant_index_name(tenant_id)` 得到 `index_name`,调用 `es_client.search(index_name=..., body=...)`,不再在 query 中加 `tenant_id` 过滤。 |
  19 +| 全量写入 | `indexer/bulk_indexing_service.py` | 按 `tenant_id` 调用 `get_tenant_index_name(tenant_id)` 作为写入目标索引,建索引与 bulk 写入均使用该名。 |
  20 +| 增量写入 | `indexer/incremental_service.py` | 同上,索引名由 `get_tenant_index_name(tenant_id)` 得到。 |
  21 +| 建索引 / 删索引 | `indexer/mapping_generator.py` | `create_index_if_not_exists`、`delete_index_if_exists` 等均按传入的 `index_name`(即租户索引名)操作。 |
  22 +
  23 +因此:每个租户对应一个独立 ES 索引,查询与写入都直接指定该索引名,实现索引级多租户隔离。
11 24  
12 25 ## 字段分类
13 26  
... ... @@ -15,7 +28,7 @@
15 28  
16 29 | 字段名 | ES类型 | 说明 | 数据来源 |
17 30 |--------|--------|------|----------|
18   -| `tenant_id` | keyword | 租户ID,用于多租户隔离 | MySQL: `shoplazza_product_spu.tenant_id` |
  31 +| `tenant_id` | keyword | 租户ID(文档级标识,便于排查与统计;隔离已由索引名保证,查询无需再按此过滤) | MySQL: `shoplazza_product_spu.tenant_id` |
19 32 | `spu_id` | keyword | SPU唯一标识 | MySQL: `shoplazza_product_spu.id` |
20 33 | `create_time` | date | 创建时间 | MySQL: `shoplazza_product_spu.created_at` |
21 34 | `update_time` | date | 更新时间 | MySQL: `shoplazza_product_spu.updated_at` |
... ... @@ -359,7 +372,7 @@
359 372  
360 373 ### 过滤字段(精确匹配)
361 374  
362   -- `tenant_id` (必需,多租户隔离)
  375 +- `tenant_id`(文档中保留,便于排查与统计;**多租户隔离已由索引名保证,查询时无需再按 tenant_id 过滤**)
363 376 - `category_id`, `category_name`, `category1_name`, `category2_name`, `category3_name`
364 377 - `vendor.zh.keyword`, `vendor.en.keyword`
365 378 - `specifications` (嵌套查询)
... ... @@ -445,7 +458,7 @@ filters AND (text_recall OR embedding_recall)
445 458 ## 注意事项
446 459  
447 460 1. **索引维度**: 所有数据以 SPU 为单位索引,SKU 信息作为嵌套字段存储
448   -2. **多租户隔离**: 所有查询必须包含 `tenant_id` 过滤条件
  461 +2. **多租户隔离**: 按租户使用独立索引(`search_products_tenant_{tenant_id}`),查询时通过 `get_tenant_index_name(tenant_id)` 指定索引名即可,无需在 query 中加 `tenant_id` 过滤
449 462 3. **多语言支持**: 文本字段支持中英文,后端根据 `language` 参数自动选择
450 463 4. **规格分面**: `specifications` 使用嵌套聚合,按 `name` 分组,然后按 `value` 聚合
451 464 5. **向量字段**: `title_embedding` 和 `image_embedding` 仅用于搜索,不返回给前端
... ...
indexer/document_transformer.py
... ... @@ -2,6 +2,10 @@
2 2 SPU文档转换器 - 公共转换逻辑。
3 3  
4 4 提取全量和增量索引共用的文档转换逻辑,避免代码冗余。
  5 +输出文档结构与 mappings/search_products.json 及 索引字段说明v2 一致,
  6 +供 search/searcher 与 search/es_query_builder 使用。
  7 +- 多语言字段:title, brief, description, vendor, category_path, category_name_text
  8 +- 嵌套:specifications, skus;向量:title_embedding(image_embedding 由 Java 索引或外部填充)
5 9 """
6 10  
7 11 import pandas as pd
... ... @@ -137,7 +141,13 @@ class SPUDocumentTransformer:
137 141 doc['min_price'] = 0.0
138 142 doc['max_price'] = 0.0
139 143  
140   - if compare_prices:
  144 + # 优先使用 SPU 级 compare_at_price(与索引字段说明v2一致),否则取 SKU 最大值
  145 + if pd.notna(spu_row.get('compare_at_price')):
  146 + try:
  147 + doc['compare_at_price'] = float(spu_row['compare_at_price'])
  148 + except (ValueError, TypeError):
  149 + doc['compare_at_price'] = float(max(compare_prices)) if compare_prices else None
  150 + elif compare_prices:
141 151 doc['compare_at_price'] = float(max(compare_prices))
142 152 else:
143 153 doc['compare_at_price'] = None
... ... @@ -290,6 +300,8 @@ class SPUDocumentTransformer:
290 300 if category_names:
291 301 category_path_str = '/'.join(category_names)
292 302 doc['category_path'] = {primary_lang: category_path_str}
  303 + # 与查询使用的 category_name_text.zh/en 对齐,便于类目搜索
  304 + doc['category_name_text'] = {primary_lang: category_path_str}
293 305  
294 306 # 填充分层类目名称
295 307 if len(category_names) > 0:
... ...
indexer/incremental_service.py
... ... @@ -176,7 +176,7 @@ class IncrementalIndexerService:
176 176 image_src, image_width, image_height, image_path, image_alt,
177 177 tags, note, category, category_id, category_google_id,
178 178 category_level, category_path,
179   - fake_sales, display_fake_sales,
  179 + compare_at_price, fake_sales, display_fake_sales,
180 180 tenant_id, creator, create_time, updater, update_time, deleted
181 181 FROM shoplazza_product_spu
182 182 WHERE tenant_id = :tenant_id AND id = :spu_id
... ... @@ -191,7 +191,7 @@ class IncrementalIndexerService:
191 191 image_src, image_width, image_height, image_path, image_alt,
192 192 tags, note, category, category_id, category_google_id,
193 193 category_level, category_path,
194   - fake_sales, display_fake_sales,
  194 + compare_at_price, fake_sales, display_fake_sales,
195 195 tenant_id, creator, create_time, updater, update_time, deleted
196 196 FROM shoplazza_product_spu
197 197 WHERE tenant_id = :tenant_id AND id = :spu_id AND deleted = 0
... ... @@ -243,7 +243,7 @@ class IncrementalIndexerService:
243 243 image_src, image_width, image_height, image_path, image_alt,
244 244 tags, note, category, category_id, category_google_id,
245 245 category_level, category_path,
246   - fake_sales, display_fake_sales,
  246 + compare_at_price, fake_sales, display_fake_sales,
247 247 tenant_id, creator, create_time, updater, update_time, deleted
248 248 FROM shoplazza_product_spu
249 249 WHERE tenant_id = :tenant_id AND id IN :spu_ids
... ... @@ -258,7 +258,7 @@ class IncrementalIndexerService:
258 258 image_src, image_width, image_height, image_path, image_alt,
259 259 tags, note, category, category_id, category_google_id,
260 260 category_level, category_path,
261   - fake_sales, display_fake_sales,
  261 + compare_at_price, fake_sales, display_fake_sales,
262 262 tenant_id, creator, create_time, updater, update_time, deleted
263 263 FROM shoplazza_product_spu
264 264 WHERE tenant_id = :tenant_id AND deleted = 0 AND id IN :spu_ids
... ...
indexer/spu_transformer.py
... ... @@ -45,7 +45,7 @@ class SPUTransformer:
45 45 image_src, image_width, image_height, image_path, image_alt,
46 46 tags, note, category, category_id, category_google_id,
47 47 category_level, category_path,
48   - fake_sales, display_fake_sales,
  48 + compare_at_price, fake_sales, display_fake_sales,
49 49 tenant_id, creator, create_time, updater, update_time, deleted
50 50 FROM shoplazza_product_spu
51 51 WHERE tenant_id = :tenant_id AND deleted = 0
... ...