Commit cadc77b6bfe6292708ce8c0db83554b133886510
1 parent
3cf1c64d
索引字段名、变量名、API数据结构字段名都对齐spu/sku表
Showing
23 changed files
with
1128 additions
and
369 deletions
Show diff stats
api/models.py
| ... | ... | @@ -43,7 +43,7 @@ class FacetConfig(BaseModel): |
| 43 | 43 | json_schema_extra = { |
| 44 | 44 | "examples": [ |
| 45 | 45 | { |
| 46 | - "field": "categoryName_keyword", | |
| 46 | + "field": "category.keyword", | |
| 47 | 47 | "size": 15, |
| 48 | 48 | "type": "terms" |
| 49 | 49 | }, |
| ... | ... | @@ -77,8 +77,8 @@ class SearchRequest(BaseModel): |
| 77 | 77 | json_schema_extra={ |
| 78 | 78 | "examples": [ |
| 79 | 79 | { |
| 80 | - "categoryName_keyword": ["玩具", "益智玩具"], | |
| 81 | - "brandName_keyword": "乐高", | |
| 80 | + "category.keyword": ["玩具", "益智玩具"], | |
| 81 | + "vendor.keyword": "乐高", | |
| 82 | 82 | "in_stock": True |
| 83 | 83 | } |
| 84 | 84 | ] |
| ... | ... | @@ -110,10 +110,10 @@ class SearchRequest(BaseModel): |
| 110 | 110 | json_schema_extra={ |
| 111 | 111 | "examples": [ |
| 112 | 112 | # 简单模式:只指定字段名,使用默认配置 |
| 113 | - ["categoryName_keyword", "brandName_keyword"], | |
| 113 | + ["category.keyword", "vendor.keyword"], | |
| 114 | 114 | # 高级模式:详细配置 |
| 115 | 115 | [ |
| 116 | - {"field": "categoryName_keyword", "size": 15}, | |
| 116 | + {"field": "category.keyword", "size": 15}, | |
| 117 | 117 | { |
| 118 | 118 | "field": "price", |
| 119 | 119 | "type": "range", |
| ... | ... | @@ -172,10 +172,10 @@ class FacetResult(BaseModel): |
| 172 | 172 | total_count: Optional[int] = Field(None, description="该字段的总文档数") |
| 173 | 173 | |
| 174 | 174 | |
| 175 | -class VariantResult(BaseModel): | |
| 176 | - """商品变体结果""" | |
| 177 | - variant_id: str = Field(..., description="变体ID") | |
| 178 | - title: Optional[str] = Field(None, description="变体标题") | |
| 175 | +class SkuResult(BaseModel): | |
| 176 | + """SKU 结果""" | |
| 177 | + sku_id: str = Field(..., description="SKU ID") | |
| 178 | + title: Optional[str] = Field(None, description="SKU标题") | |
| 179 | 179 | price: Optional[float] = Field(None, description="价格") |
| 180 | 180 | compare_at_price: Optional[float] = Field(None, description="原价") |
| 181 | 181 | sku: Optional[str] = Field(None, description="SKU编码") |
| ... | ... | @@ -183,21 +183,21 @@ class VariantResult(BaseModel): |
| 183 | 183 | options: Optional[Dict[str, Any]] = Field(None, description="选项(颜色、尺寸等)") |
| 184 | 184 | |
| 185 | 185 | |
| 186 | -class ProductResult(BaseModel): | |
| 187 | - """商品搜索结果""" | |
| 188 | - product_id: str = Field(..., description="商品ID") | |
| 186 | +class SpuResult(BaseModel): | |
| 187 | + """SPU 搜索结果""" | |
| 188 | + spu_id: str = Field(..., description="SPU ID") | |
| 189 | 189 | title: Optional[str] = Field(None, description="商品标题") |
| 190 | 190 | handle: Optional[str] = Field(None, description="商品handle") |
| 191 | 191 | description: Optional[str] = Field(None, description="商品描述") |
| 192 | 192 | vendor: Optional[str] = Field(None, description="供应商/品牌") |
| 193 | - product_type: Optional[str] = Field(None, description="商品类型") | |
| 193 | + category: Optional[str] = Field(None, description="类目") | |
| 194 | 194 | tags: Optional[str] = Field(None, description="标签") |
| 195 | 195 | price: Optional[float] = Field(None, description="价格(min_price)") |
| 196 | 196 | compare_at_price: Optional[float] = Field(None, description="原价") |
| 197 | 197 | currency: str = Field("USD", description="货币单位") |
| 198 | 198 | image_url: Optional[str] = Field(None, description="主图URL") |
| 199 | 199 | in_stock: bool = Field(True, description="是否有库存") |
| 200 | - variants: List[VariantResult] = Field(default_factory=list, description="变体列表") | |
| 200 | + skus: List[SkuResult] = Field(default_factory=list, description="SKU列表") | |
| 201 | 201 | relevance_score: float = Field(..., ge=0.0, description="相关性分数(ES原始分数)") |
| 202 | 202 | |
| 203 | 203 | |
| ... | ... | @@ -205,7 +205,7 @@ class SearchResponse(BaseModel): |
| 205 | 205 | """搜索响应模型(外部友好格式)""" |
| 206 | 206 | |
| 207 | 207 | # 核心结果 |
| 208 | - results: List[ProductResult] = Field(..., description="搜索结果列表") | |
| 208 | + results: List[SpuResult] = Field(..., description="搜索结果列表") | |
| 209 | 209 | total: int = Field(..., description="匹配的总文档数") |
| 210 | 210 | max_score: float = Field(..., description="最高相关性分数") |
| 211 | 211 | ... | ... |
api/result_formatter.py
| ... | ... | @@ -3,7 +3,7 @@ Result formatter for converting ES internal format to external-friendly format. |
| 3 | 3 | """ |
| 4 | 4 | |
| 5 | 5 | from typing import List, Dict, Any, Optional |
| 6 | -from .models import ProductResult, VariantResult, FacetResult, FacetValue | |
| 6 | +from .models import SpuResult, SkuResult, FacetResult, FacetValue | |
| 7 | 7 | |
| 8 | 8 | |
| 9 | 9 | class ResultFormatter: |
| ... | ... | @@ -13,16 +13,16 @@ class ResultFormatter: |
| 13 | 13 | def format_search_results( |
| 14 | 14 | es_hits: List[Dict[str, Any]], |
| 15 | 15 | max_score: float = 1.0 |
| 16 | - ) -> List[ProductResult]: | |
| 16 | + ) -> List[SpuResult]: | |
| 17 | 17 | """ |
| 18 | - Convert ES hits to ProductResult list. | |
| 18 | + Convert ES hits to SpuResult list. | |
| 19 | 19 | |
| 20 | 20 | Args: |
| 21 | 21 | es_hits: List of ES hit dictionaries (with _id, _score, _source) |
| 22 | 22 | max_score: Maximum score (unused, kept for compatibility) |
| 23 | 23 | |
| 24 | 24 | Returns: |
| 25 | - List of ProductResult objects | |
| 25 | + List of SpuResult objects | |
| 26 | 26 | """ |
| 27 | 27 | results = [] |
| 28 | 28 | |
| ... | ... | @@ -40,44 +40,44 @@ class ResultFormatter: |
| 40 | 40 | except (ValueError, TypeError): |
| 41 | 41 | relevance_score = 0.0 |
| 42 | 42 | |
| 43 | - # Extract variants | |
| 44 | - variants = [] | |
| 45 | - variants_data = source.get('variants', []) | |
| 46 | - if isinstance(variants_data, list): | |
| 47 | - for variant_data in variants_data: | |
| 48 | - variant = VariantResult( | |
| 49 | - variant_id=str(variant_data.get('variant_id', '')), | |
| 50 | - title=variant_data.get('title'), | |
| 51 | - price=variant_data.get('price'), | |
| 52 | - compare_at_price=variant_data.get('compare_at_price'), | |
| 53 | - sku=variant_data.get('sku'), | |
| 54 | - stock=variant_data.get('stock', 0), | |
| 55 | - options=variant_data.get('options') | |
| 43 | + # Extract SKUs | |
| 44 | + skus = [] | |
| 45 | + skus_data = source.get('skus', []) | |
| 46 | + if isinstance(skus_data, list): | |
| 47 | + for sku_entry in skus_data: | |
| 48 | + sku = SkuResult( | |
| 49 | + sku_id=str(sku_entry.get('sku_id', '')), | |
| 50 | + title=sku_entry.get('title'), | |
| 51 | + price=sku_entry.get('price'), | |
| 52 | + compare_at_price=sku_entry.get('compare_at_price'), | |
| 53 | + sku=sku_entry.get('sku'), | |
| 54 | + stock=sku_entry.get('stock', 0), | |
| 55 | + options=sku_entry.get('options') | |
| 56 | 56 | ) |
| 57 | - variants.append(variant) | |
| 57 | + skus.append(sku) | |
| 58 | 58 | |
| 59 | - # Determine in_stock (any variant has stock > 0) | |
| 60 | - in_stock = any(v.stock > 0 for v in variants) if variants else True | |
| 59 | + # Determine in_stock (any sku has stock > 0) | |
| 60 | + in_stock = any(sku.stock > 0 for sku in skus) if skus else True | |
| 61 | 61 | |
| 62 | - # Build ProductResult | |
| 63 | - product = ProductResult( | |
| 64 | - product_id=str(source.get('product_id', '')), | |
| 62 | + # Build SpuResult | |
| 63 | + spu = SpuResult( | |
| 64 | + spu_id=str(source.get('spu_id', '')), | |
| 65 | 65 | title=source.get('title'), |
| 66 | 66 | handle=source.get('handle'), |
| 67 | 67 | description=source.get('description'), |
| 68 | 68 | vendor=source.get('vendor'), |
| 69 | - product_type=source.get('product_type'), | |
| 69 | + category=source.get('category'), | |
| 70 | 70 | tags=source.get('tags'), |
| 71 | 71 | price=source.get('min_price'), |
| 72 | 72 | compare_at_price=source.get('compare_at_price'), |
| 73 | 73 | currency="USD", # Default currency |
| 74 | 74 | image_url=source.get('image_url'), |
| 75 | 75 | in_stock=in_stock, |
| 76 | - variants=variants, | |
| 76 | + skus=skus, | |
| 77 | 77 | relevance_score=relevance_score |
| 78 | 78 | ) |
| 79 | 79 | |
| 80 | - results.append(product) | |
| 80 | + results.append(spu) | |
| 81 | 81 | |
| 82 | 82 | return results |
| 83 | 83 | |
| ... | ... | @@ -99,6 +99,7 @@ class ResultFormatter: |
| 99 | 99 | facets = [] |
| 100 | 100 | |
| 101 | 101 | for field_name, agg_data in es_aggregations.items(): |
| 102 | + display_field = field_name[:-6] if field_name.endswith("_facet") else field_name | |
| 102 | 103 | # Handle terms aggregation |
| 103 | 104 | if 'buckets' in agg_data: |
| 104 | 105 | values = [] |
| ... | ... | @@ -112,8 +113,8 @@ class ResultFormatter: |
| 112 | 113 | values.append(value) |
| 113 | 114 | |
| 114 | 115 | facet = FacetResult( |
| 115 | - field=field_name, | |
| 116 | - label=field_name, # Can be enhanced with field labels | |
| 116 | + field=display_field, | |
| 117 | + label=display_field, # Can be enhanced with field labels | |
| 117 | 118 | type="terms", |
| 118 | 119 | values=values, |
| 119 | 120 | total_count=agg_data.get('sum_other_doc_count', 0) + len(values) |
| ... | ... | @@ -134,8 +135,8 @@ class ResultFormatter: |
| 134 | 135 | values.append(value) |
| 135 | 136 | |
| 136 | 137 | facet = FacetResult( |
| 137 | - field=field_name, | |
| 138 | - label=field_name, | |
| 138 | + field=display_field, | |
| 139 | + label=display_field, | |
| 139 | 140 | type="range", |
| 140 | 141 | values=values |
| 141 | 142 | ) |
| ... | ... | @@ -146,7 +147,7 @@ class ResultFormatter: |
| 146 | 147 | @staticmethod |
| 147 | 148 | def generate_suggestions( |
| 148 | 149 | query: str, |
| 149 | - results: List[ProductResult] | |
| 150 | + results: List[SpuResult] | |
| 150 | 151 | ) -> List[str]: |
| 151 | 152 | """ |
| 152 | 153 | Generate search suggestions. |
| ... | ... | @@ -164,7 +165,7 @@ class ResultFormatter: |
| 164 | 165 | @staticmethod |
| 165 | 166 | def generate_related_searches( |
| 166 | 167 | query: str, |
| 167 | - results: List[ProductResult] | |
| 168 | + results: List[SpuResult] | |
| 168 | 169 | ) -> List[str]: |
| 169 | 170 | """ |
| 170 | 171 | Generate related searches. | ... | ... |
config/config.yaml
| ... | ... | @@ -22,7 +22,7 @@ fields: |
| 22 | 22 | return_in_source: true |
| 23 | 23 | |
| 24 | 24 | # 商品标识字段 |
| 25 | - - name: "product_id" | |
| 25 | + - name: "spu_id" | |
| 26 | 26 | type: "KEYWORD" |
| 27 | 27 | required: true |
| 28 | 28 | index: true |
| ... | ... | @@ -87,61 +87,29 @@ fields: |
| 87 | 87 | |
| 88 | 88 | # 分类和标签字段(TEXT + KEYWORD双重索引) |
| 89 | 89 | - name: "vendor" |
| 90 | - type: "TEXT" | |
| 90 | + type: "HKText" | |
| 91 | 91 | analyzer: "chinese_ecommerce" |
| 92 | 92 | boost: 1.5 |
| 93 | 93 | index: true |
| 94 | 94 | store: true |
| 95 | 95 | return_in_source: true |
| 96 | 96 | |
| 97 | - - name: "vendor_keyword" | |
| 98 | - type: "KEYWORD" | |
| 99 | - index: true | |
| 100 | - store: false | |
| 101 | - return_in_source: false # keyword字段通常只用于过滤,不需要返回 | |
| 102 | - | |
| 103 | - - name: "product_type" | |
| 104 | - type: "TEXT" | |
| 105 | - analyzer: "chinese_ecommerce" | |
| 106 | - boost: 1.5 | |
| 107 | - index: true | |
| 108 | - store: true | |
| 109 | - return_in_source: true | |
| 110 | - | |
| 111 | - - name: "product_type_keyword" | |
| 112 | - type: "KEYWORD" | |
| 113 | - index: true | |
| 114 | - store: false | |
| 115 | - return_in_source: false | |
| 116 | - | |
| 117 | 97 | - name: "tags" |
| 118 | - type: "TEXT" | |
| 98 | + type: "HKText" | |
| 119 | 99 | analyzer: "chinese_ecommerce" |
| 120 | 100 | boost: 1.0 |
| 121 | 101 | index: true |
| 122 | 102 | store: true |
| 123 | 103 | return_in_source: true |
| 124 | 104 | |
| 125 | - - name: "tags_keyword" | |
| 126 | - type: "KEYWORD" | |
| 127 | - index: true | |
| 128 | - store: false | |
| 129 | - return_in_source: false | |
| 130 | - | |
| 131 | 105 | - name: "category" |
| 132 | - type: "TEXT" | |
| 106 | + type: "HKText" | |
| 133 | 107 | analyzer: "chinese_ecommerce" |
| 134 | 108 | boost: 1.5 |
| 135 | 109 | index: true |
| 136 | 110 | store: true |
| 137 | 111 | return_in_source: true |
| 138 | 112 | |
| 139 | - - name: "category_keyword" | |
| 140 | - type: "KEYWORD" | |
| 141 | - index: true | |
| 142 | - store: false | |
| 143 | - return_in_source: false | |
| 144 | - | |
| 145 | 113 | # 价格字段(扁平化) |
| 146 | 114 | - name: "min_price" |
| 147 | 115 | type: "FLOAT" |
| ... | ... | @@ -202,13 +170,13 @@ fields: |
| 202 | 170 | store: true |
| 203 | 171 | return_in_source: false # 通常不需要返回 |
| 204 | 172 | |
| 205 | - # 嵌套variants字段 | |
| 206 | - - name: "variants" | |
| 173 | + # 嵌套skus字段 | |
| 174 | + - name: "skus" | |
| 207 | 175 | type: "JSON" |
| 208 | 176 | nested: true |
| 209 | 177 | return_in_source: true |
| 210 | 178 | nested_properties: |
| 211 | - variant_id: | |
| 179 | + sku_id: | |
| 212 | 180 | type: "keyword" |
| 213 | 181 | index: true |
| 214 | 182 | store: true |
| ... | ... | @@ -249,7 +217,6 @@ indexes: |
| 249 | 217 | - "seo_description" |
| 250 | 218 | - "seo_keywords" |
| 251 | 219 | - "vendor" |
| 252 | - - "product_type" | |
| 253 | 220 | - "tags" |
| 254 | 221 | - "category" |
| 255 | 222 | analyzer: "chinese_ecommerce" |
| ... | ... | @@ -323,9 +290,9 @@ rerank: |
| 323 | 290 | expression: "" |
| 324 | 291 | description: "Local reranking (disabled, use ES function_score instead)" |
| 325 | 292 | |
| 326 | -# SPU配置(已启用,使用嵌套variants) | |
| 293 | +# SPU配置(已启用,使用嵌套skus) | |
| 327 | 294 | spu_config: |
| 328 | 295 | enabled: true |
| 329 | - spu_field: "product_id" | |
| 296 | + spu_field: "spu_id" | |
| 330 | 297 | inner_hits_size: 10 |
| 331 | 298 | ... | ... |
config/config_loader.py
| ... | ... | @@ -277,11 +277,13 @@ class ConfigLoader: |
| 277 | 277 | """Parse field configuration from dictionary.""" |
| 278 | 278 | name = field_data["name"] |
| 279 | 279 | field_type_str = field_data["type"] |
| 280 | + field_type_raw = field_type_str | |
| 280 | 281 | |
| 281 | 282 | # Map field type string to enum |
| 282 | 283 | if field_type_str not in FIELD_TYPE_MAP: |
| 283 | 284 | raise ConfigurationError(f"Unknown field type: {field_type_str}") |
| 284 | 285 | field_type = FIELD_TYPE_MAP[field_type_str] |
| 286 | + is_hktext = field_type_str.lower() == "hktext" | |
| 285 | 287 | |
| 286 | 288 | # Map analyzer string to enum (if provided) |
| 287 | 289 | analyzer = None |
| ... | ... | @@ -309,7 +311,9 @@ class ConfigLoader: |
| 309 | 311 | embedding_dims=field_data.get("embedding_dims", 1024), |
| 310 | 312 | embedding_similarity=field_data.get("embedding_similarity", "dot_product"), |
| 311 | 313 | nested=field_data.get("nested", False), |
| 312 | - nested_properties=field_data.get("nested_properties") | |
| 314 | + nested_properties=field_data.get("nested_properties"), | |
| 315 | + keyword_subfield=field_data.get("keyword_subfield", is_hktext), | |
| 316 | + keyword_ignore_above=field_data.get("keyword_ignore_above", 256) | |
| 313 | 317 | ) |
| 314 | 318 | |
| 315 | 319 | def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig: | ... | ... |
config/field_types.py
| ... | ... | @@ -72,6 +72,10 @@ class FieldConfig: |
| 72 | 72 | nested: bool = False |
| 73 | 73 | nested_properties: Optional[Dict[str, Any]] = None |
| 74 | 74 | |
| 75 | + # Hybrid Keyword Text (HKText) support | |
| 76 | + keyword_subfield: bool = False | |
| 77 | + keyword_ignore_above: int = 256 | |
| 78 | + | |
| 75 | 79 | |
| 76 | 80 | def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: |
| 77 | 81 | """ |
| ... | ... | @@ -102,6 +106,13 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: |
| 102 | 106 | if field_config.search_analyzer: |
| 103 | 107 | mapping["search_analyzer"] = field_config.search_analyzer.value |
| 104 | 108 | |
| 109 | + if field_config.keyword_subfield: | |
| 110 | + mapping.setdefault("fields", {}) | |
| 111 | + mapping["fields"]["keyword"] = { | |
| 112 | + "type": "keyword", | |
| 113 | + "ignore_above": field_config.keyword_ignore_above | |
| 114 | + } | |
| 115 | + | |
| 105 | 116 | elif field_config.field_type == FieldType.KEYWORD: |
| 106 | 117 | mapping = { |
| 107 | 118 | "type": "keyword", |
| ... | ... | @@ -256,6 +267,9 @@ def get_default_similarity() -> Dict[str, Any]: |
| 256 | 267 | FIELD_TYPE_MAP = { |
| 257 | 268 | "text": FieldType.TEXT, |
| 258 | 269 | "TEXT": FieldType.TEXT, |
| 270 | + "HKText": FieldType.TEXT, | |
| 271 | + "hktext": FieldType.TEXT, | |
| 272 | + "HKTEXT": FieldType.TEXT, | |
| 259 | 273 | "keyword": FieldType.KEYWORD, |
| 260 | 274 | "KEYWORD": FieldType.KEYWORD, |
| 261 | 275 | "LITERAL": FieldType.KEYWORD, | ... | ... |
config/query_rewrite.dict
docs/INDEX_FIELDS_DOCUMENTATION.md
| ... | ... | @@ -6,7 +6,7 @@ |
| 6 | 6 | |
| 7 | 7 | - **索引名称**: `search_products` |
| 8 | 8 | - **索引级别**: SPU级别(商品级别) |
| 9 | -- **数据结构**: SPU文档包含嵌套的variants(SKU)数组 | |
| 9 | +- **数据结构**: SPU文档包含嵌套的skus(SKU)数组 | |
| 10 | 10 | |
| 11 | 11 | ## 字段说明表 |
| 12 | 12 | |
| ... | ... | @@ -15,7 +15,7 @@ |
| 15 | 15 | | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |
| 16 | 16 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| |
| 17 | 17 | | tenant_id | keyword | 是 | 精确匹配 | SPU表 | tenant_id | BIGINT | 租户ID,用于多租户隔离 | |
| 18 | -| product_id | keyword | 是 | 精确匹配 | SPU表 | id | BIGINT | 商品ID(SPU ID) | | |
| 18 | +| spu_id | keyword | 是 | 精确匹配 | SPU表 | id | BIGINT | 商品ID(SPU ID) | | |
| 19 | 19 | | handle | keyword | 是 | 精确匹配 | SPU表 | handle | VARCHAR(255) | 商品URL handle | |
| 20 | 20 | |
| 21 | 21 | ### 文本搜索字段 |
| ... | ... | @@ -39,13 +39,13 @@ |
| 39 | 39 | | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | Boost权重 | 是否返回 | 说明 | |
| 40 | 40 | |-----------|-----------|---------|---------|-----------|-----------|-------------|-----------|---------|------| |
| 41 | 41 | | vendor | TEXT | 是 | english | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | 供应商/品牌(文本搜索) | |
| 42 | -| vendor_keyword | keyword | 是 | 精确匹配 | SPU表 | vendor | VARCHAR(255) | - | 否 | 供应商/品牌(精确匹配,用于过滤) | | |
| 42 | +| vendor.keyword | keyword | 是 | 精确匹配 | SPU表 | vendor | VARCHAR(255) | - | 否 | 供应商/品牌(精确匹配,用于过滤) | | |
| 43 | 43 | | product_type | TEXT | 是 | english | SPU表 | category | VARCHAR(255) | 1.5 | 是 | 商品类型(文本搜索) | |
| 44 | 44 | | product_type_keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 商品类型(精确匹配,用于过滤) | |
| 45 | 45 | | tags | TEXT | 是 | english | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | 标签(文本搜索) | |
| 46 | -| tags_keyword | keyword | 是 | 精确匹配 | SPU表 | tags | VARCHAR(1024) | - | 否 | 标签(精确匹配,用于过滤) | | |
| 46 | +| tags.keyword | keyword | 是 | 精确匹配 | SPU表 | tags | VARCHAR(1024) | - | 否 | 标签(精确匹配,用于过滤) | | |
| 47 | 47 | | category | TEXT | 是 | english | SPU表 | category | VARCHAR(255) | 1.5 | 是 | 类目(文本搜索) | |
| 48 | -| category_keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 类目(精确匹配,用于过滤) | | |
| 48 | +| category.keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 类目(精确匹配,用于过滤) | | |
| 49 | 49 | |
| 50 | 50 | ### 价格字段 |
| 51 | 51 | |
| ... | ... | @@ -86,26 +86,26 @@ |
| 86 | 86 | | shoplazza_created_at | DATE | 是 | 日期范围 | SPU表 | shoplazza_created_at | DATETIME | 否 | 店匠系统创建时间 | |
| 87 | 87 | | shoplazza_updated_at | DATE | 是 | 日期范围 | SPU表 | shoplazza_updated_at | DATETIME | 否 | 店匠系统更新时间 | |
| 88 | 88 | |
| 89 | -### 嵌套Variants字段(SKU级别) | |
| 89 | +### 嵌套Skus字段(SKU级别) | |
| 90 | 90 | |
| 91 | 91 | | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |
| 92 | 92 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| |
| 93 | -| variants | JSON (nested) | 是 | 嵌套对象 | SKU表 | - | - | 商品变体数组(嵌套结构) | | |
| 93 | +| skus | JSON (nested) | 是 | 嵌套对象 | SKU表 | - | - | 商品变体数组(嵌套结构) | | |
| 94 | 94 | |
| 95 | -#### Variants子字段 | |
| 95 | +#### Skus子字段 | |
| 96 | 96 | |
| 97 | 97 | | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |
| 98 | 98 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| |
| 99 | -| variants.variant_id | keyword | 是 | 精确匹配 | SKU表 | id | BIGINT | 变体ID(SKU ID) | | |
| 100 | -| variants.title | text | 是 | english | SKU表 | title | VARCHAR(500) | 变体标题 | | |
| 101 | -| variants.price | float | 是 | float | SKU表 | price | DECIMAL(10,2) | 变体价格 | | |
| 102 | -| variants.compare_at_price | float | 是 | float | SKU表 | compare_at_price | DECIMAL(10,2) | 变体原价 | | |
| 103 | -| variants.sku | keyword | 是 | 精确匹配 | SKU表 | sku | VARCHAR(100) | SKU编码 | | |
| 104 | -| variants.stock | long | 是 | float | SKU表 | inventory_quantity | INT(11) | 库存数量 | | |
| 105 | -| variants.options | object | 是 | 对象 | SKU表 | option1/option2/option3 | VARCHAR(255) | 选项(颜色、尺寸等) | | |
| 106 | - | |
| 107 | -**Variants结构说明**: | |
| 108 | -- `variants` 是一个嵌套对象数组,每个元素代表一个SKU | |
| 99 | +| skus.sku_id | keyword | 是 | 精确匹配 | SKU表 | id | BIGINT | 变体ID(SKU ID) | | |
| 100 | +| skus.title | text | 是 | english | SKU表 | title | VARCHAR(500) | 变体标题 | | |
| 101 | +| skus.price | float | 是 | float | SKU表 | price | DECIMAL(10,2) | 变体价格 | | |
| 102 | +| skus.compare_at_price | float | 是 | float | SKU表 | compare_at_price | DECIMAL(10,2) | 变体原价 | | |
| 103 | +| skus.sku | keyword | 是 | 精确匹配 | SKU表 | sku | VARCHAR(100) | SKU编码 | | |
| 104 | +| skus.stock | long | 是 | float | SKU表 | inventory_quantity | INT(11) | 库存数量 | | |
| 105 | +| skus.options | object | 是 | 对象 | SKU表 | option1/option2/option3 | VARCHAR(255) | 选项(颜色、尺寸等) | | |
| 106 | + | |
| 107 | +**Skus结构说明**: | |
| 108 | +- `skus` 是一个嵌套对象数组,每个元素代表一个SKU | |
| 109 | 109 | - 使用ES的nested类型,支持对嵌套字段进行独立查询和过滤 |
| 110 | 110 | - `options` 对象包含 `option1`、`option2`、`option3` 三个字段,分别对应SKU表中的选项值 |
| 111 | 111 | |
| ... | ... | @@ -165,7 +165,7 @@ |
| 165 | 165 | |
| 166 | 166 | ### 数据类型转换 |
| 167 | 167 | |
| 168 | -1. **BIGINT → keyword**: 数字ID转换为字符串(如 `product_id`, `variant_id`) | |
| 168 | +1. **BIGINT → keyword**: 数字ID转换为字符串(如 `spu_id`, `sku_id`) | |
| 169 | 169 | 2. **DECIMAL → FLOAT**: 价格字段从DECIMAL转换为FLOAT |
| 170 | 170 | 3. **INT → LONG**: 库存数量从INT转换为LONG |
| 171 | 171 | 4. **DATETIME → DATE**: 时间字段转换为ISO格式字符串 |
| ... | ... | @@ -179,7 +179,7 @@ |
| 179 | 179 | ## 注意事项 |
| 180 | 180 | |
| 181 | 181 | 1. **多租户隔离**: 所有查询必须包含 `tenant_id` 过滤条件 |
| 182 | -2. **嵌套查询**: 查询variants字段时需要使用nested查询语法 | |
| 182 | +2. **嵌套查询**: 查询skus字段时需要使用nested查询语法 | |
| 183 | 183 | 3. **字段命名**: 用于过滤的字段应使用 `*_keyword` 后缀的字段 |
| 184 | 184 | 4. **向量搜索**: title_embedding字段用于语义搜索,需要配合文本查询使用 |
| 185 | 185 | 5. **Boost权重**: 不同字段的boost权重影响搜索结果的相关性排序 |
| ... | ... | @@ -210,7 +210,7 @@ |
| 210 | 210 | ### SKU表(shoplazza_product_sku) |
| 211 | 211 | |
| 212 | 212 | 主要字段: |
| 213 | -- `id`: BIGINT - 主键ID(对应variant_id) | |
| 213 | +- `id`: BIGINT - 主键ID(对应sku_id) | |
| 214 | 214 | - `spu_id`: BIGINT - SPU ID(关联字段) |
| 215 | 215 | - `title`: VARCHAR(500) - 变体标题 |
| 216 | 216 | - `price`: DECIMAL(10,2) - 价格 | ... | ... |
docs/Search-API-Examples.md
| ... | ... | @@ -90,7 +90,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 90 | 90 | -d '{ |
| 91 | 91 | "query": "玩具", |
| 92 | 92 | "filters": { |
| 93 | - "categoryName_keyword": "玩具" | |
| 93 | + "category.keyword": "玩具" | |
| 94 | 94 | } |
| 95 | 95 | }' |
| 96 | 96 | ``` |
| ... | ... | @@ -103,7 +103,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 103 | 103 | -d '{ |
| 104 | 104 | "query": "娃娃", |
| 105 | 105 | "filters": { |
| 106 | - "categoryName_keyword": ["玩具", "益智玩具", "儿童玩具"] | |
| 106 | + "category.keyword": ["玩具", "益智玩具", "儿童玩具"] | |
| 107 | 107 | } |
| 108 | 108 | }' |
| 109 | 109 | ``` |
| ... | ... | @@ -118,8 +118,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 118 | 118 | -d '{ |
| 119 | 119 | "query": "娃娃", |
| 120 | 120 | "filters": { |
| 121 | - "categoryName_keyword": "玩具", | |
| 122 | - "brandName_keyword": "美泰" | |
| 121 | + "category.keyword": "玩具", | |
| 122 | + "vendor.keyword": "美泰" | |
| 123 | 123 | } |
| 124 | 124 | }' |
| 125 | 125 | ``` |
| ... | ... | @@ -209,8 +209,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 209 | 209 | -d '{ |
| 210 | 210 | "query": "玩具", |
| 211 | 211 | "filters": { |
| 212 | - "categoryName_keyword": ["玩具", "益智玩具"], | |
| 213 | - "brandName_keyword": "乐高" | |
| 212 | + "category.keyword": ["玩具", "益智玩具"], | |
| 213 | + "vendor.keyword": "乐高" | |
| 214 | 214 | }, |
| 215 | 215 | "range_filters": { |
| 216 | 216 | "price": { |
| ... | ... | @@ -237,7 +237,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 237 | 237 | -d '{ |
| 238 | 238 | "query": "玩具", |
| 239 | 239 | "size": 20, |
| 240 | - "facets": ["categoryName_keyword", "brandName_keyword"] | |
| 240 | + "facets": ["category.keyword", "vendor.keyword"] | |
| 241 | 241 | }' |
| 242 | 242 | ``` |
| 243 | 243 | |
| ... | ... | @@ -248,8 +248,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 248 | 248 | "total": 118, |
| 249 | 249 | "facets": [ |
| 250 | 250 | { |
| 251 | - "field": "categoryName_keyword", | |
| 252 | - "label": "categoryName_keyword", | |
| 251 | + "field": "category.keyword", | |
| 252 | + "label": "category.keyword", | |
| 253 | 253 | "type": "terms", |
| 254 | 254 | "values": [ |
| 255 | 255 | {"value": "玩具", "count": 85, "selected": false}, |
| ... | ... | @@ -257,8 +257,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 257 | 257 | ] |
| 258 | 258 | }, |
| 259 | 259 | { |
| 260 | - "field": "brandName_keyword", | |
| 261 | - "label": "brandName_keyword", | |
| 260 | + "field": "vendor.keyword", | |
| 261 | + "label": "vendor.keyword", | |
| 262 | 262 | "type": "terms", |
| 263 | 263 | "values": [ |
| 264 | 264 | {"value": "乐高", "count": 42, "selected": false}, |
| ... | ... | @@ -280,12 +280,12 @@ curl -X POST "http://localhost:6002/search/" \ |
| 280 | 280 | "query": "玩具", |
| 281 | 281 | "facets": [ |
| 282 | 282 | { |
| 283 | - "field": "categoryName_keyword", | |
| 283 | + "field": "category.keyword", | |
| 284 | 284 | "size": 20, |
| 285 | 285 | "type": "terms" |
| 286 | 286 | }, |
| 287 | 287 | { |
| 288 | - "field": "brandName_keyword", | |
| 288 | + "field": "vendor.keyword", | |
| 289 | 289 | "size": 30, |
| 290 | 290 | "type": "terms" |
| 291 | 291 | } |
| ... | ... | @@ -342,8 +342,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 342 | 342 | -d '{ |
| 343 | 343 | "query": "玩具", |
| 344 | 344 | "facets": [ |
| 345 | - {"field": "categoryName_keyword", "size": 15}, | |
| 346 | - {"field": "brandName_keyword", "size": 15}, | |
| 345 | + {"field": "category.keyword", "size": 15}, | |
| 346 | + {"field": "vendor.keyword", "size": 15}, | |
| 347 | 347 | { |
| 348 | 348 | "field": "price", |
| 349 | 349 | "type": "range", |
| ... | ... | @@ -395,7 +395,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 395 | 395 | -d '{ |
| 396 | 396 | "query": "玩具", |
| 397 | 397 | "filters": { |
| 398 | - "categoryName_keyword": "益智玩具" | |
| 398 | + "category.keyword": "益智玩具" | |
| 399 | 399 | }, |
| 400 | 400 | "sort_by": "min_price", |
| 401 | 401 | "sort_order": "asc" |
| ... | ... | @@ -426,7 +426,7 @@ curl -X POST "http://localhost:6002/search/image" \ |
| 426 | 426 | "image_url": "https://example.com/barbie.jpg", |
| 427 | 427 | "size": 20, |
| 428 | 428 | "filters": { |
| 429 | - "categoryName_keyword": "玩具" | |
| 429 | + "category.keyword": "玩具" | |
| 430 | 430 | }, |
| 431 | 431 | "range_filters": { |
| 432 | 432 | "price": { |
| ... | ... | @@ -560,14 +560,14 @@ result = search_products( |
| 560 | 560 | query="玩具", |
| 561 | 561 | size=20, |
| 562 | 562 | filters={ |
| 563 | - "categoryName_keyword": ["玩具", "益智玩具"] | |
| 563 | + "category.keyword": ["玩具", "益智玩具"] | |
| 564 | 564 | }, |
| 565 | 565 | range_filters={ |
| 566 | 566 | "price": {"gte": 50, "lte": 200} |
| 567 | 567 | }, |
| 568 | 568 | facets=[ |
| 569 | - {"field": "brandName_keyword", "size": 15}, | |
| 570 | - {"field": "categoryName_keyword", "size": 15}, | |
| 569 | + {"field": "vendor.keyword", "size": 15}, | |
| 570 | + {"field": "category.keyword", "size": 15}, | |
| 571 | 571 | { |
| 572 | 572 | "field": "price", |
| 573 | 573 | "type": "range", |
| ... | ... | @@ -697,14 +697,14 @@ const result2 = await client.search({ |
| 697 | 697 | query: "玩具", |
| 698 | 698 | size: 20, |
| 699 | 699 | filters: { |
| 700 | - categoryName_keyword: ["玩具", "益智玩具"] | |
| 700 | + category.keyword: ["玩具", "益智玩具"] | |
| 701 | 701 | }, |
| 702 | 702 | rangeFilters: { |
| 703 | 703 | price: { gte: 50, lte: 200 } |
| 704 | 704 | }, |
| 705 | 705 | facets: [ |
| 706 | - { field: "brandName_keyword", size: 15 }, | |
| 707 | - { field: "categoryName_keyword", size: 15 } | |
| 706 | + { field: "vendor.keyword", size: 15 }, | |
| 707 | + { field: "category.keyword", size: 15 } | |
| 708 | 708 | ], |
| 709 | 709 | sortBy: "price", |
| 710 | 710 | sortOrder: "asc" |
| ... | ... | @@ -755,8 +755,8 @@ const SearchComponent = { |
| 755 | 755 | filters: this.filters, |
| 756 | 756 | range_filters: this.rangeFilters, |
| 757 | 757 | facets: [ |
| 758 | - { field: 'categoryName_keyword', size: 15 }, | |
| 759 | - { field: 'brandName_keyword', size: 15 } | |
| 758 | + { field: 'category.keyword', size: 15 }, | |
| 759 | + { field: 'vendor.keyword', size: 15 } | |
| 760 | 760 | ] |
| 761 | 761 | }) |
| 762 | 762 | }); |
| ... | ... | @@ -868,10 +868,10 @@ curl -X POST "http://localhost:6002/search/" \ |
| 868 | 868 | -d '{ |
| 869 | 869 | "query": "*", |
| 870 | 870 | "filters": { |
| 871 | - "categoryName_keyword": "玩具" | |
| 871 | + "category.keyword": "玩具" | |
| 872 | 872 | }, |
| 873 | 873 | "facets": [ |
| 874 | - {"field": "brandName_keyword", "size": 20}, | |
| 874 | + {"field": "vendor.keyword", "size": 20}, | |
| 875 | 875 | { |
| 876 | 876 | "field": "price", |
| 877 | 877 | "type": "range", |
| ... | ... | @@ -898,8 +898,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 898 | 898 | -d '{ |
| 899 | 899 | "query": "芭比娃娃", |
| 900 | 900 | "facets": [ |
| 901 | - {"field": "categoryName_keyword", "size": 10}, | |
| 902 | - {"field": "brandName_keyword", "size": 10}, | |
| 901 | + {"field": "category.keyword", "size": 10}, | |
| 902 | + {"field": "vendor.keyword", "size": 10}, | |
| 903 | 903 | {"field": "price", "type": "range", "ranges": [ |
| 904 | 904 | {"key": "0-50", "to": 50}, |
| 905 | 905 | {"key": "50-100", "from": 50, "to": 100}, |
| ... | ... | @@ -924,7 +924,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 924 | 924 | "lte": 100 |
| 925 | 925 | } |
| 926 | 926 | }, |
| 927 | - "facets": ["categoryName_keyword", "brandName_keyword"], | |
| 927 | + "facets": ["category.keyword", "vendor.keyword"], | |
| 928 | 928 | "sort_by": "min_price", |
| 929 | 929 | "sort_order": "asc", |
| 930 | 930 | "size": 50 |
| ... | ... | @@ -1017,8 +1017,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1017 | 1017 | # ✅ 推荐:只请求必要的分面 |
| 1018 | 1018 | { |
| 1019 | 1019 | "facets": [ |
| 1020 | - {"field": "categoryName_keyword", "size": 15}, | |
| 1021 | - {"field": "brandName_keyword", "size": 15} | |
| 1020 | + {"field": "category.keyword", "size": 15}, | |
| 1021 | + {"field": "vendor.keyword", "size": 15} | |
| 1022 | 1022 | ] |
| 1023 | 1023 | } |
| 1024 | 1024 | ``` |
| ... | ... | @@ -1045,7 +1045,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1045 | 1045 | { |
| 1046 | 1046 | "query": "玩具", |
| 1047 | 1047 | "filters": { |
| 1048 | - "categoryName_keyword": "玩具" | |
| 1048 | + "category.keyword": "玩具" | |
| 1049 | 1049 | } |
| 1050 | 1050 | } |
| 1051 | 1051 | ``` |
| ... | ... | @@ -1064,7 +1064,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1064 | 1064 | "query": "*", |
| 1065 | 1065 | "size": 0, |
| 1066 | 1066 | "facets": [ |
| 1067 | - {"field": "categoryName_keyword", "size": 100} | |
| 1067 | + {"field": "category.keyword", "size": 100} | |
| 1068 | 1068 | ] |
| 1069 | 1069 | }' |
| 1070 | 1070 | ``` |
| ... | ... | @@ -1102,14 +1102,14 @@ curl -X POST "http://localhost:6002/search/" \ |
| 1102 | 1102 | -d '{ |
| 1103 | 1103 | "query": "(玩具 OR 游戏) AND 儿童 ANDNOT 电子", |
| 1104 | 1104 | "filters": { |
| 1105 | - "categoryName_keyword": ["玩具", "益智玩具"] | |
| 1105 | + "category.keyword": ["玩具", "益智玩具"] | |
| 1106 | 1106 | }, |
| 1107 | 1107 | "range_filters": { |
| 1108 | 1108 | "price": {"gte": 20, "lte": 100}, |
| 1109 | 1109 | "days_since_last_update": {"lte": 30} |
| 1110 | 1110 | }, |
| 1111 | 1111 | "facets": [ |
| 1112 | - {"field": "brandName_keyword", "size": 20} | |
| 1112 | + {"field": "vendor.keyword", "size": 20} | |
| 1113 | 1113 | ], |
| 1114 | 1114 | "sort_by": "min_price", |
| 1115 | 1115 | "sort_order": "asc", | ... | ... |
docs/Usage-Guide.md
| ... | ... | @@ -299,7 +299,7 @@ curl -X POST http://localhost:6002/search/ \ |
| 299 | 299 | "query": "玩具", |
| 300 | 300 | "size": 10, |
| 301 | 301 | "filters": { |
| 302 | - "categoryName_keyword": ["玩具", "益智玩具"] | |
| 302 | + "category.keyword": ["玩具", "益智玩具"] | |
| 303 | 303 | }, |
| 304 | 304 | "range_filters": { |
| 305 | 305 | "price": {"gte": 50, "lte": 200} |
| ... | ... | @@ -317,8 +317,8 @@ curl -X POST http://localhost:6002/search/ \ |
| 317 | 317 | "query": "玩具", |
| 318 | 318 | "size": 10, |
| 319 | 319 | "facets": [ |
| 320 | - {"field": "categoryName_keyword", "size": 15}, | |
| 321 | - {"field": "brandName_keyword", "size": 15} | |
| 320 | + {"field": "category.keyword", "size": 15}, | |
| 321 | + {"field": "vendor.keyword", "size": 15} | |
| 322 | 322 | ] |
| 323 | 323 | }' |
| 324 | 324 | ``` | ... | ... |
docs/基础配置指南.md
| ... | ... | @@ -8,7 +8,7 @@ Base閰嶇疆鏄簵鍖狅紙Shoplazza锛夐氱敤閰嶇疆锛岄傜敤浜庢墍鏈変娇鐢ㄥ簵鍖犳爣 |
| 8 | 8 | |
| 9 | 9 | ## 鏍稿績鐗规 |
| 10 | 10 | |
| 11 | -- **SPU绾у埆绱㈠紩**锛氭瘡涓狤S鏂囨。浠h〃涓涓猄PU锛屽寘鍚祵濂楃殑variants鏁扮粍 | |
| 11 | +- **SPU绾у埆绱㈠紩**锛氭瘡涓狤S鏂囨。浠h〃涓涓猄PU锛屽寘鍚祵濂楃殑skus鏁扮粍 | |
| 12 | 12 | - **缁熶竴绱㈠紩**锛氭墍鏈夊鎴峰叡浜玚search_products`绱㈠紩 |
| 13 | 13 | - **绉熸埛闅旂**锛氶氳繃`tenant_id`瀛楁瀹炵幇鏁版嵁闅旂 |
| 14 | 14 | - **閰嶇疆绠鍖**锛氶厤缃彧鍖呭惈ES鎼滅储鐩稿叧閰嶇疆锛屼笉鍖呭惈MySQL鏁版嵁婧愰厤缃 |
| ... | ... | @@ -41,12 +41,12 @@ Base閰嶇疆**鍙寘鍚**锛 |
| 41 | 41 | |
| 42 | 42 | ### 涓昏瀛楁 |
| 43 | 43 | |
| 44 | -- `product_id` - 鍟嗗搧ID | |
| 44 | +- `spu_id` - SPU ID | |
| 45 | 45 | - `title`, `brief`, `description` - 鏂囨湰鎼滅储瀛楁 |
| 46 | 46 | - `seo_title`, `seo_description`, `seo_keywords` - SEO瀛楁 |
| 47 | -- `vendor`, `product_type`, `tags`, `category` - 鍒嗙被鍜屾爣绛惧瓧娈 | |
| 47 | +- `vendor`, `tags`, `category` - 鍒嗙被鍜屾爣绛惧瓧娈碉紙HKText锛屾敮鎸 `.keyword` 绮剧‘鍖归厤锛 | |
| 48 | 48 | - `min_price`, `max_price`, `compare_at_price` - 浠锋牸瀛楁 |
| 49 | -- `variants` (nested) - 宓屽鍙樹綋鏁扮粍 | |
| 49 | +- `skus` (nested) - 宓屽SKU鏁扮粍 | |
| 50 | 50 | |
| 51 | 51 | ## 鏁版嵁瀵煎叆娴佺▼ |
| 52 | 52 | |
| ... | ... | @@ -109,9 +109,9 @@ Content-Type: application/json |
| 109 | 109 | "size": 10, |
| 110 | 110 | "from": 0, |
| 111 | 111 | "filters": { |
| 112 | - "category_keyword": "鐢靛瓙浜у搧" | |
| 112 | + "category.keyword": "鐢靛瓙浜у搧" | |
| 113 | 113 | }, |
| 114 | - "facets": ["category_keyword", "vendor_keyword"] | |
| 114 | + "facets": ["category.keyword", "vendor.keyword"] | |
| 115 | 115 | } |
| 116 | 116 | ``` |
| 117 | 117 | |
| ... | ... | @@ -120,20 +120,20 @@ Content-Type: application/json |
| 120 | 120 | { |
| 121 | 121 | "results": [ |
| 122 | 122 | { |
| 123 | - "product_id": "1", | |
| 123 | + "spu_id": "1", | |
| 124 | 124 | "title": "钃濈墮鑰虫満 Sony", |
| 125 | 125 | "handle": "product-1", |
| 126 | 126 | "description": "楂樺搧璐ㄦ棤绾胯摑鐗欒虫満", |
| 127 | 127 | "vendor": "Sony", |
| 128 | - "product_type": "鐢靛瓙浜у搧", | |
| 128 | + "category": "鐢靛瓙浜у搧", | |
| 129 | 129 | "price": 199.99, |
| 130 | 130 | "compare_at_price": 299.99, |
| 131 | 131 | "currency": "USD", |
| 132 | 132 | "image_url": "//cdn.example.com/products/1.jpg", |
| 133 | 133 | "in_stock": true, |
| 134 | - "variants": [ | |
| 134 | + "skus": [ | |
| 135 | 135 | { |
| 136 | - "variant_id": "1", | |
| 136 | + "sku_id": "1", | |
| 137 | 137 | "title": "榛戣壊", |
| 138 | 138 | "price": 199.99, |
| 139 | 139 | "compare_at_price": 299.99, |
| ... | ... | @@ -151,8 +151,8 @@ Content-Type: application/json |
| 151 | 151 | "max_score": 1.0, |
| 152 | 152 | "facets": [ |
| 153 | 153 | { |
| 154 | - "field": "category_keyword", | |
| 155 | - "label": "category_keyword", | |
| 154 | + "field": "category.keyword", | |
| 155 | + "label": "category.keyword", | |
| 156 | 156 | "type": "terms", |
| 157 | 157 | "values": [ |
| 158 | 158 | { |
| ... | ... | @@ -176,31 +176,31 @@ Content-Type: application/json |
| 176 | 176 | #### 涓昏鍙樺寲 |
| 177 | 177 | |
| 178 | 178 | 1. **`results`鏇夸唬`hits`**锛氳繑鍥炲瓧娈典粠`hits`鏀逛负`results` |
| 179 | -2. **缁撴瀯鍖栫粨鏋**锛氭瘡涓粨鏋滃寘鍚玚product_id`, `title`, `variants`, `relevance_score`绛夊瓧娈 | |
| 179 | +2. **缁撴瀯鍖栫粨鏋**锛氭瘡涓粨鏋滃寘鍚玚spu_id`, `title`, `skus`, `relevance_score`绛夊瓧娈 | |
| 180 | 180 | 3. **鏃燛S鍐呴儴瀛楁**锛氫笉鍖呭惈`_id`, `_score`, `_source`绛塃S鍐呴儴瀛楁 |
| 181 | -4. **宓屽variants**锛氭瘡涓晢鍝佸寘鍚玽ariants鏁扮粍锛屾瘡涓獀ariant鍖呭惈瀹屾暣鐨勫彉浣撲俊鎭 | |
| 181 | +4. **宓屽skus**锛氭瘡涓晢鍝佸寘鍚玸kus鏁扮粍锛屾瘡涓猻ku鍖呭惈瀹屾暣鐨勫彉浣撲俊鎭 | |
| 182 | 182 | 5. **鐩稿叧鎬у垎鏁**锛歚relevance_score`鏄疎S鍘熷鍒嗘暟锛堜笉杩涜褰掍竴鍖栵級 |
| 183 | 183 | |
| 184 | -#### ProductResult瀛楁 | |
| 184 | +#### SpuResult瀛楁 | |
| 185 | 185 | |
| 186 | -- `product_id` - 鍟嗗搧ID | |
| 186 | +- `spu_id` - SPU ID | |
| 187 | 187 | - `title` - 鍟嗗搧鏍囬 |
| 188 | 188 | - `handle` - 鍟嗗搧handle |
| 189 | 189 | - `description` - 鍟嗗搧鎻忚堪 |
| 190 | 190 | - `vendor` - 渚涘簲鍟/鍝佺墝 |
| 191 | -- `product_type` - 鍟嗗搧绫诲瀷 | |
| 191 | +- `category` - 绫荤洰 | |
| 192 | 192 | - `tags` - 鏍囩 |
| 193 | 193 | - `price` - 鏈浣庝环鏍硷紙min_price锛 |
| 194 | 194 | - `compare_at_price` - 鍘熶环 |
| 195 | 195 | - `currency` - 璐у竵鍗曚綅锛堥粯璁SD锛 |
| 196 | 196 | - `image_url` - 涓诲浘URL |
| 197 | 197 | - `in_stock` - 鏄惁鏈夊簱瀛 |
| 198 | -- `variants` - 鍙樹綋鍒楄〃 | |
| 198 | +- `skus` - SKU鍒楄〃 | |
| 199 | 199 | - `relevance_score` - 鐩稿叧鎬у垎鏁帮紙ES鍘熷鍒嗘暟锛 |
| 200 | 200 | |
| 201 | -#### VariantResult瀛楁 | |
| 201 | +#### SkuResult瀛楁 | |
| 202 | 202 | |
| 203 | -- `variant_id` - 鍙樹綋ID | |
| 203 | +- `sku_id` - SKU ID | |
| 204 | 204 | - `title` - 鍙樹綋鏍囬 |
| 205 | 205 | - `price` - 浠锋牸 |
| 206 | 206 | - `compare_at_price` - 鍘熶环 |
| ... | ... | @@ -242,7 +242,7 @@ A: 浣跨敤`test_base.py`鑴氭湰锛屾寚瀹氫袱涓笉鍚岀殑`--tenant-id`锛屾鏌ユ悳绱 |
| 242 | 242 | |
| 243 | 243 | ### Q: API杩斿洖鏍煎紡涓负浠涔堟病鏈塦_id`鍜宍_score`锛 |
| 244 | 244 | |
| 245 | -A: 涓轰簡鎻愪緵澶栭儴鍙嬪ソ鐨凙PI鏍煎紡锛屾垜浠Щ闄や簡ES鍐呴儴瀛楁锛屼娇鐢╜product_id`鍜宍relevance_score`鏇夸唬銆 | |
| 245 | +A: 涓轰簡鎻愪緵澶栭儴鍙嬪ソ鐨凙PI鏍煎紡锛屾垜浠Щ闄や簡ES鍐呴儴瀛楁锛屼娇鐢╜spu_id`鍜宍relevance_score`鏇夸唬銆 | |
| 246 | 246 | |
| 247 | 247 | ### Q: 濡備綍娣诲姞鏂扮殑鎼滅储瀛楁锛 |
| 248 | 248 | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -43,7 +43,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 43 | 43 | "size": 5, |
| 44 | 44 | "from": 10, |
| 45 | 45 | "filters": { |
| 46 | - "vendor_keyword": ["乐高", "孩之宝"] | |
| 46 | + "vendor.keyword": ["乐高", "孩之宝"] | |
| 47 | 47 | }, |
| 48 | 48 | "sort_by": "min_price", |
| 49 | 49 | "sort_order": "asc" |
| ... | ... | @@ -58,7 +58,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 58 | 58 | -d '{ |
| 59 | 59 | "tenant_id": "demo-tenant", |
| 60 | 60 | "query": "芭比娃娃", |
| 61 | - "facets": ["category_keyword", "vendor_keyword"], | |
| 61 | + "facets": ["category.keyword", "vendor.keyword"], | |
| 62 | 62 | "min_score": 0.2, |
| 63 | 63 | "debug": true |
| 64 | 64 | }' |
| ... | ... | @@ -134,9 +134,9 @@ curl -X POST "http://localhost:6002/search/" \ |
| 134 | 134 | ```json |
| 135 | 135 | { |
| 136 | 136 | "filters": { |
| 137 | - "category_keyword": "玩具", // 单值:精确匹配 | |
| 138 | - "vendor_keyword": ["乐高", "孩之宝"], // 数组:匹配任意值(OR) | |
| 139 | - "product_type_keyword": "益智玩具" // 单值:精确匹配 | |
| 137 | + "category.keyword": "玩具", // 单值:精确匹配 | |
| 138 | + "vendor.keyword": ["乐高", "孩之宝"], // 数组:匹配任意值(OR) | |
| 139 | + "tags.keyword": "益智玩具" // 单值:精确匹配 | |
| 140 | 140 | } |
| 141 | 141 | } |
| 142 | 142 | ``` |
| ... | ... | @@ -148,10 +148,9 @@ curl -X POST "http://localhost:6002/search/" \ |
| 148 | 148 | - 数组:匹配任意值(OR 逻辑) |
| 149 | 149 | |
| 150 | 150 | **常用过滤字段**: |
| 151 | -- `category_keyword`: 类目 | |
| 152 | -- `vendor_keyword`: 品牌/供应商 | |
| 153 | -- `product_type_keyword`: 商品类型 | |
| 154 | -- `tags_keyword`: 标签 | |
| 151 | +- `category.keyword`: 类目 | |
| 152 | +- `vendor.keyword`: 品牌/供应商 | |
| 153 | +- `tags.keyword`: 标签 | |
| 155 | 154 | |
| 156 | 155 | #### 2. 范围过滤器 (range_filters) |
| 157 | 156 | |
| ... | ... | @@ -197,7 +196,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 197 | 196 | **简单模式**(字符串数组): |
| 198 | 197 | ```json |
| 199 | 198 | { |
| 200 | - "facets": ["category_keyword", "vendor_keyword"] | |
| 199 | + "facets": ["category.keyword", "vendor.keyword"] | |
| 201 | 200 | } |
| 202 | 201 | ``` |
| 203 | 202 | |
| ... | ... | @@ -206,7 +205,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 206 | 205 | { |
| 207 | 206 | "facets": [ |
| 208 | 207 | { |
| 209 | - "field": "category_keyword", | |
| 208 | + "field": "category.keyword", | |
| 210 | 209 | "size": 15, |
| 211 | 210 | "type": "terms" |
| 212 | 211 | }, |
| ... | ... | @@ -270,21 +269,21 @@ curl -X POST "http://localhost:6002/search/" \ |
| 270 | 269 | { |
| 271 | 270 | "results": [ |
| 272 | 271 | { |
| 273 | - "product_id": "12345", | |
| 272 | + "spu_id": "12345", | |
| 274 | 273 | "title": "芭比时尚娃娃", |
| 275 | 274 | "handle": "barbie-doll", |
| 276 | 275 | "description": "高品质芭比娃娃", |
| 277 | 276 | "vendor": "美泰", |
| 278 | - "product_type": "玩具", | |
| 277 | + "category": "玩具", | |
| 279 | 278 | "tags": "娃娃, 玩具, 女孩", |
| 280 | 279 | "price": 89.99, |
| 281 | 280 | "compare_at_price": 129.99, |
| 282 | 281 | "currency": "USD", |
| 283 | 282 | "image_url": "https://example.com/image.jpg", |
| 284 | 283 | "in_stock": true, |
| 285 | - "variants": [ | |
| 284 | + "skus": [ | |
| 286 | 285 | { |
| 287 | - "variant_id": "67890", | |
| 286 | + "sku_id": "67890", | |
| 288 | 287 | "title": "粉色款", |
| 289 | 288 | "price": 89.99, |
| 290 | 289 | "compare_at_price": 129.99, |
| ... | ... | @@ -303,8 +302,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 303 | 302 | "max_score": 8.5, |
| 304 | 303 | "facets": [ |
| 305 | 304 | { |
| 306 | - "field": "category_keyword", | |
| 307 | - "label": "category_keyword", | |
| 305 | + "field": "category.keyword", | |
| 306 | + "label": "category.keyword", | |
| 308 | 307 | "type": "terms", |
| 309 | 308 | "values": [ |
| 310 | 309 | { |
| ... | ... | @@ -335,11 +334,11 @@ curl -X POST "http://localhost:6002/search/" \ |
| 335 | 334 | |
| 336 | 335 | | 字段 | 类型 | 说明 | |
| 337 | 336 | |------|------|------| |
| 338 | -| `results` | array | 搜索结果列表(ProductResult对象数组) | | |
| 339 | -| `results[].product_id` | string | 商品ID | | |
| 337 | +| `results` | array | 搜索结果列表(SpuResult对象数组) | | |
| 338 | +| `results[].spu_id` | string | SPU ID | | |
| 340 | 339 | | `results[].title` | string | 商品标题 | |
| 341 | 340 | | `results[].price` | float | 价格(min_price) | |
| 342 | -| `results[].variants` | array | 变体列表(SKU列表) | | |
| 341 | +| `results[].skus` | array | SKU列表 | | |
| 343 | 342 | | `results[].relevance_score` | float | 相关性分数 | |
| 344 | 343 | | `total` | integer | 匹配的总文档数 | |
| 345 | 344 | | `max_score` | float | 最高相关性分数 | |
| ... | ... | @@ -347,31 +346,31 @@ curl -X POST "http://localhost:6002/search/" \ |
| 347 | 346 | | `query_info` | object | 查询处理信息 | |
| 348 | 347 | | `took_ms` | integer | 搜索耗时(毫秒) | |
| 349 | 348 | |
| 350 | -### ProductResult字段说明 | |
| 349 | +### SpuResult字段说明 | |
| 351 | 350 | |
| 352 | 351 | | 字段 | 类型 | 说明 | |
| 353 | 352 | |------|------|------| |
| 354 | -| `product_id` | string | 商品ID(SPU ID) | | |
| 353 | +| `spu_id` | string | SPU ID | | |
| 355 | 354 | | `title` | string | 商品标题 | |
| 356 | 355 | | `handle` | string | 商品URL handle | |
| 357 | 356 | | `description` | string | 商品描述 | |
| 358 | 357 | | `vendor` | string | 供应商/品牌 | |
| 359 | -| `product_type` | string | 商品类型 | | |
| 358 | +| `category` | string | 类目 | | |
| 360 | 359 | | `tags` | string | 标签 | |
| 361 | 360 | | `price` | float | 价格(min_price) | |
| 362 | 361 | | `compare_at_price` | float | 原价 | |
| 363 | 362 | | `currency` | string | 货币单位(默认USD) | |
| 364 | 363 | | `image_url` | string | 主图URL | |
| 365 | -| `in_stock` | boolean | 是否有库存(任意变体有库存即为true) | | |
| 366 | -| `variants` | array | 变体列表 | | |
| 364 | +| `in_stock` | boolean | 是否有库存(任意SKU有库存即为true) | | |
| 365 | +| `skus` | array | SKU 列表 | | |
| 367 | 366 | | `relevance_score` | float | 相关性分数 | |
| 368 | 367 | |
| 369 | -### VariantResult字段说明 | |
| 368 | +### SkuResult字段说明 | |
| 370 | 369 | |
| 371 | 370 | | 字段 | 类型 | 说明 | |
| 372 | 371 | |------|------|------| |
| 373 | -| `variant_id` | string | 变体ID(SKU ID) | | |
| 374 | -| `title` | string | 变体标题 | | |
| 372 | +| `sku_id` | string | SKU ID | | |
| 373 | +| `title` | string | SKU标题 | | |
| 375 | 374 | | `price` | float | 价格 | |
| 376 | 375 | | `compare_at_price` | float | 原价 | |
| 377 | 376 | | `sku` | string | SKU编码 | |
| ... | ... | @@ -405,7 +404,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 405 | 404 | "query": "玩具", |
| 406 | 405 | "size": 20, |
| 407 | 406 | "filters": { |
| 408 | - "category_keyword": "益智玩具" | |
| 407 | + "category.keyword": "益智玩具" | |
| 409 | 408 | }, |
| 410 | 409 | "range_filters": { |
| 411 | 410 | "min_price": { |
| ... | ... | @@ -425,8 +424,8 @@ curl -X POST "http://localhost:6002/search/" \ |
| 425 | 424 | "query": "玩具", |
| 426 | 425 | "size": 20, |
| 427 | 426 | "facets": [ |
| 428 | - "category_keyword", | |
| 429 | - "vendor_keyword" | |
| 427 | + "category.keyword", | |
| 428 | + "vendor.keyword" | |
| 430 | 429 | ] |
| 431 | 430 | } |
| 432 | 431 | ``` |
| ... | ... | @@ -440,7 +439,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 440 | 439 | "query": "玩具", |
| 441 | 440 | "size": 20, |
| 442 | 441 | "filters": { |
| 443 | - "vendor_keyword": ["乐高", "孩之宝", "美泰"] | |
| 442 | + "vendor.keyword": ["乐高", "孩之宝", "美泰"] | |
| 444 | 443 | }, |
| 445 | 444 | "range_filters": { |
| 446 | 445 | "min_price": { |
| ... | ... | @@ -450,7 +449,7 @@ curl -X POST "http://localhost:6002/search/" \ |
| 450 | 449 | }, |
| 451 | 450 | "facets": [ |
| 452 | 451 | { |
| 453 | - "field": "category_keyword", | |
| 452 | + "field": "category.keyword", | |
| 454 | 453 | "size": 15 |
| 455 | 454 | }, |
| 456 | 455 | { |
| ... | ... | @@ -575,7 +574,7 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" |
| 575 | 574 | "source": { |
| 576 | 575 | "title": "芭比时尚娃娃", |
| 577 | 576 | "min_price": 89.99, |
| 578 | - "category_keyword": "玩具" | |
| 577 | + "category.keyword": "玩具" | |
| 579 | 578 | } |
| 580 | 579 | } |
| 581 | 580 | ``` |
| ... | ... | @@ -646,15 +645,15 @@ curl "http://localhost:6002/search/12345" |
| 646 | 645 | |
| 647 | 646 | | 字段名 | 类型 | 描述 | |
| 648 | 647 | |--------|------|------| |
| 649 | -| `product_id` | keyword | 商品 ID(SPU) | | |
| 648 | +| `spu_id` | keyword | SPU ID | | |
| 650 | 649 | | `sku_id` | keyword/long | SKU ID(主键) | |
| 651 | 650 | | `title` | text | 商品名称(中文) | |
| 652 | 651 | | `en_title` | text | 商品名称(英文) | |
| 653 | 652 | | `ru_title` | text | 商品名称(俄文) | |
| 654 | -| `category_keyword` | keyword | 类目(精确匹配) | | |
| 655 | -| `vendor_keyword` | keyword | 品牌/供应商(精确匹配) | | |
| 656 | -| `product_type_keyword` | keyword | 商品类型 | | |
| 657 | -| `tags_keyword` | keyword | 标签 | | |
| 653 | +| `category.keyword` | keyword | 类目(精确匹配) | | |
| 654 | +| `vendor.keyword` | keyword | 品牌/供应商(精确匹配) | | |
| 655 | +| `category` | HKText | 类目(支持 `category.keyword` 精确匹配) | | |
| 656 | +| `tags.keyword` | keyword | 标签 | | |
| 658 | 657 | | `min_price` | double | 最低价格 | |
| 659 | 658 | | `max_price` | double | 最高价格 | |
| 660 | 659 | | `compare_at_price` | double | 原价 | |
| ... | ... | @@ -664,7 +663,7 @@ curl "http://localhost:6002/search/12345" |
| 664 | 663 | | `text_embedding` | dense_vector | 文本向量(1024 维) | |
| 665 | 664 | | `image_embedding` | dense_vector | 图片向量(1024 维) | |
| 666 | 665 | |
| 667 | -> 不同租户可自定义字段名称,但最佳实践是对可过滤字段建立 `*_keyword` 版本,对可排序字段显式建 keyword/数值映射。 | |
| 666 | +> 不同租户可自定义字段名称。推荐将可过滤的文本字段配置为 HKText,这样即可同时支持全文检索和 `field.keyword` 精确过滤;数值字段单独建索引以用于排序/Range。 | |
| 668 | 667 | |
| 669 | 668 | --- |
| 670 | 669 | |
| ... | ... | @@ -672,12 +671,11 @@ curl "http://localhost:6002/search/12345" |
| 672 | 671 | |
| 673 | 672 | ### 常用字段列表 |
| 674 | 673 | |
| 675 | -#### 过滤字段(使用 `*_keyword` 后缀) | |
| 674 | +#### 过滤字段(使用 HKText 的 keyword 子字段) | |
| 676 | 675 | |
| 677 | -- `category_keyword`: 类目 | |
| 678 | -- `vendor_keyword`: 品牌/供应商 | |
| 679 | -- `product_type_keyword`: 商品类型 | |
| 680 | -- `tags_keyword`: 标签 | |
| 676 | +- `category.keyword`: 类目 | |
| 677 | +- `vendor.keyword`: 品牌/供应商 | |
| 678 | +- `tags.keyword`: 标签 | |
| 681 | 679 | |
| 682 | 680 | #### 范围字段 |
| 683 | 681 | ... | ... |
docs/搜索API速查表.md
| ... | ... | @@ -17,8 +17,8 @@ POST /search/ |
| 17 | 17 | ```bash |
| 18 | 18 | { |
| 19 | 19 | "filters": { |
| 20 | - "categoryName_keyword": "玩具", // 单值 | |
| 21 | - "brandName_keyword": ["乐高", "美泰"] // 多值(OR) | |
| 20 | + "category.keyword": "玩具", // 单值 | |
| 21 | + "vendor.keyword": ["乐高", "美泰"] // 多值(OR) | |
| 22 | 22 | } |
| 23 | 23 | } |
| 24 | 24 | ``` |
| ... | ... | @@ -48,7 +48,7 @@ POST /search/ |
| 48 | 48 | |
| 49 | 49 | ```bash |
| 50 | 50 | { |
| 51 | - "facets": ["categoryName_keyword", "brandName_keyword"] | |
| 51 | + "facets": ["category.keyword", "vendor.keyword"] | |
| 52 | 52 | } |
| 53 | 53 | ``` |
| 54 | 54 | |
| ... | ... | @@ -57,7 +57,7 @@ POST /search/ |
| 57 | 57 | ```bash |
| 58 | 58 | { |
| 59 | 59 | "facets": [ |
| 60 | - {"field": "categoryName_keyword", "size": 15}, | |
| 60 | + {"field": "category.keyword", "size": 15}, | |
| 61 | 61 | { |
| 62 | 62 | "field": "price", |
| 63 | 63 | "type": "range", |
| ... | ... | @@ -115,14 +115,14 @@ POST /search/ |
| 115 | 115 | "size": 20, |
| 116 | 116 | "from": 0, |
| 117 | 117 | "filters": { |
| 118 | - "categoryName_keyword": ["玩具", "益智玩具"] | |
| 118 | + "category.keyword": ["玩具", "益智玩具"] | |
| 119 | 119 | }, |
| 120 | 120 | "range_filters": { |
| 121 | 121 | "price": {"gte": 50, "lte": 200} |
| 122 | 122 | }, |
| 123 | 123 | "facets": [ |
| 124 | - {"field": "brandName_keyword", "size": 15}, | |
| 125 | - {"field": "categoryName_keyword", "size": 15} | |
| 124 | + {"field": "vendor.keyword", "size": 15}, | |
| 125 | + {"field": "category.keyword", "size": 15} | |
| 126 | 126 | ], |
| 127 | 127 | "sort_by": "min_price", |
| 128 | 128 | "sort_order": "asc" |
| ... | ... | @@ -147,7 +147,7 @@ POST /search/ |
| 147 | 147 | "took_ms": 45, |
| 148 | 148 | "facets": [ |
| 149 | 149 | { |
| 150 | - "field": "categoryName_keyword", | |
| 150 | + "field": "category.keyword", | |
| 151 | 151 | "label": "商品类目", |
| 152 | 152 | "type": "terms", |
| 153 | 153 | "values": [ |
| ... | ... | @@ -194,9 +194,9 @@ import requests |
| 194 | 194 | |
| 195 | 195 | result = requests.post('http://localhost:6002/search/', json={ |
| 196 | 196 | "query": "玩具", |
| 197 | - "filters": {"categoryName_keyword": "玩具"}, | |
| 197 | + "filters": {"category.keyword": "玩具"}, | |
| 198 | 198 | "range_filters": {"price": {"gte": 50, "lte": 200}}, |
| 199 | - "facets": ["brandName_keyword"], | |
| 199 | + "facets": ["vendor.keyword"], | |
| 200 | 200 | "sort_by": "min_price", |
| 201 | 201 | "sort_order": "asc" |
| 202 | 202 | }).json() |
| ... | ... | @@ -214,9 +214,9 @@ const result = await fetch('http://localhost:6002/search/', { |
| 214 | 214 | headers: {'Content-Type': 'application/json'}, |
| 215 | 215 | body: JSON.stringify({ |
| 216 | 216 | query: "玩具", |
| 217 | - filters: {categoryName_keyword: "玩具"}, | |
| 217 | + filters: {category.keyword: "玩具"}, | |
| 218 | 218 | range_filters: {price: {gte: 50, lte: 200}}, |
| 219 | - facets: ["brandName_keyword"], | |
| 219 | + facets: ["vendor.keyword"], | |
| 220 | 220 | sort_by: "min_price", |
| 221 | 221 | sort_order: "asc" |
| 222 | 222 | }) | ... | ... |
docs/系统设计文档.md
| ... | ... | @@ -24,17 +24,17 @@ |
| 24 | 24 | - 鎵鏈夊鎴峰叡浜悓涓涓狤lasticsearch绱㈠紩锛歚search_products` |
| 25 | 25 | - 绱㈠紩绮掑害锛歋PU绾у埆锛堟瘡涓枃妗d唬琛ㄤ竴涓猄PU锛 |
| 26 | 26 | - 鏁版嵁闅旂锛氶氳繃`tenant_id`瀛楁瀹炵幇绉熸埛闅旂 |
| 27 | -- 宓屽缁撴瀯锛氭瘡涓猄PU鏂囨。鍖呭惈宓屽鐨刞variants`鏁扮粍锛圫KU鍙樹綋锛 | |
| 27 | +- 宓屽缁撴瀯锛氭瘡涓猄PU鏂囨。鍖呭惈宓屽鐨刞skus`鏁扮粍 | |
| 28 | 28 | |
| 29 | 29 | **绱㈠紩鏂囨。缁撴瀯**锛 |
| 30 | 30 | ```json |
| 31 | 31 | { |
| 32 | 32 | "tenant_id": "1", |
| 33 | - "product_id": "123", | |
| 33 | + "spu_id": "123", | |
| 34 | 34 | "title": "钃濈墮鑰虫満", |
| 35 | - "variants": [ | |
| 35 | + "skus": [ | |
| 36 | 36 | { |
| 37 | - "variant_id": "456", | |
| 37 | + "sku_id": "456", | |
| 38 | 38 | "title": "榛戣壊", |
| 39 | 39 | "price": 199.99, |
| 40 | 40 | "sku": "SKU-123-1", |
| ... | ... | @@ -108,7 +108,7 @@ fields: |
| 108 | 108 | store: true |
| 109 | 109 | |
| 110 | 110 | # 鍟嗗搧鏍囪瘑瀛楁 |
| 111 | - - name: "product_id" | |
| 111 | + - name: "spu_id" | |
| 112 | 112 | type: "KEYWORD" |
| 113 | 113 | required: true |
| 114 | 114 | index: true |
| ... | ... | @@ -129,12 +129,12 @@ fields: |
| 129 | 129 | index: true |
| 130 | 130 | store: true |
| 131 | 131 | |
| 132 | - # 宓屽variants瀛楁 | |
| 133 | - - name: "variants" | |
| 132 | + # 宓屽skus瀛楁 | |
| 133 | + - name: "skus" | |
| 134 | 134 | type: "JSON" |
| 135 | 135 | nested: true |
| 136 | 136 | nested_properties: |
| 137 | - variant_id: | |
| 137 | + sku_id: | |
| 138 | 138 | type: "keyword" |
| 139 | 139 | price: |
| 140 | 140 | type: "float" |
| ... | ... | @@ -246,7 +246,7 @@ indexes: |
| 246 | 246 | 1. **鏁版嵁鍔犺浇**锛氫粠MySQL璇诲彇`shoplazza_product_spu`鍜宍shoplazza_product_sku`琛 |
| 247 | 247 | 2. **鏁版嵁杞崲**锛坄indexer/spu_transformer.py`锛夛細 |
| 248 | 248 | - 鎸塦spu_id`鍜宍tenant_id`鍏宠仈SPU鍜孲KU鏁版嵁 |
| 249 | - - 灏哠KU鏁版嵁鑱氬悎涓哄祵濂楃殑`variants`鏁扮粍 | |
| 249 | + - 灏哠KU鏁版嵁鑱氬悎涓哄祵濂楃殑`skus`鏁扮粍 | |
| 250 | 250 | - 璁$畻鎵佸钩鍖栦环鏍煎瓧娈碉紙`min_price`, `max_price`, `compare_at_price`锛 |
| 251 | 251 | - 瀛楁鏄犲皠锛堝啓姝诲湪浠g爜涓紝涓嶄緷璧栭厤缃級 |
| 252 | 252 | - 娉ㄥ叆`tenant_id`瀛楁 |
| ... | ... | @@ -518,7 +518,7 @@ ranking: |
| 518 | 518 | |
| 519 | 519 | ### 6.6 Base閰嶇疆锛堝簵鍖犻氱敤锛 |
| 520 | 520 | - 鉁 SPU绾у埆绱㈠紩缁撴瀯 |
| 521 | -- 鉁 宓屽variants瀛楁 | |
| 521 | +- 鉁 宓屽skus瀛楁 | |
| 522 | 522 | - 鉁 缁熶竴绱㈠紩锛坰earch_products锛 |
| 523 | 523 | - 鉁 绉熸埛闅旂锛坱enant_id锛 |
| 524 | 524 | - 鉁 閰嶇疆绠鍖栵紙绉婚櫎MySQL鐩稿叧閰嶇疆锛 |
| ... | ... | @@ -548,11 +548,11 @@ API杩斿洖鏍煎紡涓嶅寘鍚獷S鍐呴儴瀛楁锛坄_id`, `_score`, `_source`锛夛紝浣跨敤 |
| 548 | 548 | { |
| 549 | 549 | "results": [ |
| 550 | 550 | { |
| 551 | - "product_id": "123", | |
| 551 | + "spu_id": "123", | |
| 552 | 552 | "title": "钃濈墮鑰虫満", |
| 553 | - "variants": [ | |
| 553 | + "skus": [ | |
| 554 | 554 | { |
| 555 | - "variant_id": "456", | |
| 555 | + "sku_id": "456", | |
| 556 | 556 | "price": 199.99, |
| 557 | 557 | "sku": "SKU-123-1", |
| 558 | 558 | "stock": 50 |
| ... | ... | @@ -569,8 +569,8 @@ API杩斿洖鏍煎紡涓嶅寘鍚獷S鍐呴儴瀛楁锛坄_id`, `_score`, `_source`锛夛紝浣跨敤 |
| 569 | 569 | ``` |
| 570 | 570 | |
| 571 | 571 | **涓昏鍙樺寲**锛 |
| 572 | -- 缁撴瀯鍖栫粨鏋滐紙`ProductResult`鍜宍VariantResult`锛 | |
| 573 | -- 宓屽variants鏁扮粍 | |
| 572 | +- 缁撴瀯鍖栫粨鏋滐紙`SpuResult`鍜宍SkuResult`锛 | |
| 573 | +- 宓屽skus鏁扮粍 | |
| 574 | 574 | - 鏃燛S鍐呴儴瀛楁 |
| 575 | 575 | |
| 576 | 576 | ### 8.2 绉熸埛闅旂 |
| ... | ... | @@ -610,14 +610,14 @@ Elasticsearch |
| 610 | 610 | |
| 611 | 611 | - **绠鍗曟ā寮**锛氬瓧绗︿覆鍒楄〃锛堝瓧娈靛悕锛夛紝浣跨敤榛樿閰嶇疆 |
| 612 | 612 | ```json |
| 613 | - ["categoryName_keyword", "brandName_keyword"] | |
| 613 | + ["category.keyword", "vendor.keyword"] | |
| 614 | 614 | ``` |
| 615 | 615 | |
| 616 | 616 | - **楂樼骇妯″紡**锛欶acetConfig 瀵硅薄鍒楄〃锛屾敮鎸佽嚜瀹氫箟閰嶇疆 |
| 617 | 617 | ```json |
| 618 | 618 | [ |
| 619 | 619 | { |
| 620 | - "field": "categoryName_keyword", | |
| 620 | + "field": "category.keyword", | |
| 621 | 621 | "size": 15, |
| 622 | 622 | "type": "terms" |
| 623 | 623 | }, |
| ... | ... | @@ -714,11 +714,3 @@ class FacetResult(BaseModel): |
| 714 | 714 | **鍏朵粬瀹㈡埛閰嶇疆**锛歚config/schema/tenant1/config.yaml` |
| 715 | 715 | |
| 716 | 716 | --- |
| 717 | - | |
| 718 | -## 9. 鐩稿叧鏂囨。 | |
| 719 | - | |
| 720 | -- `MULTILANG_FEATURE.md` - 澶氳瑷鍔熻兘璇︾粏璇存槑 | |
| 721 | -- `QUICKSTART.md` - 蹇熷紑濮嬫寚鍗 | |
| 722 | -- `HighLevelDesign.md` - 楂樺眰璁捐鏂囨。 | |
| 723 | -- `IMPLEMENTATION_SUMMARY.md` - 瀹炵幇鎬荤粨 | |
| 724 | -- `鍟嗗搧鏁版嵁婧愬叆ES閰嶇疆瑙勮寖.md` - 鏁版嵁婧愰厤缃鑼 | ... | ... |
docs/索引字段说明.md
| ... | ... | @@ -11,7 +11,7 @@ |
| 11 | 11 | - 原始数据与用户环境均为多语言,需根据语言路由到不同分析器/索引方式,在线搜索时也要考虑多语言的适配。 |
| 12 | 12 | 4. **搜索接口适配** |
| 13 | 13 | - 接口简单,自动为多语言的数据源和 query 适配最优检索策略。 |
| 14 | - - 返回的结果格式约定为店匠系列的 SPU/SKU(products/variants)嵌套结构。 | |
| 14 | +- 返回的结果格式约定为店匠系列的 SPU/SKU嵌套结构。 | |
| 15 | 15 | - 支撑 facet/过滤/排序业务需求:用户可以选择任何一个 keyword 或 HKText 类型的字段做筛选、聚合;也可以选择任何一个数值型字段做 Range 过滤或排序。 |
| 16 | 16 | |
| 17 | 17 | 本文档详细说明了 Elasticsearch 索引中所有字段的类型、索引方式、数据来源等信息。 |
| ... | ... | @@ -20,7 +20,7 @@ |
| 20 | 20 | |
| 21 | 21 | - **索引名称**: `search_products` |
| 22 | 22 | - **索引级别**: SPU级别(商品级别) |
| 23 | -- **数据结构**: SPU文档包含嵌套的variants(SKU)数组 | |
| 23 | +- **数据结构**: SPU文档包含嵌套的skus数组 | |
| 24 | 24 | |
| 25 | 25 | ## 索引类型与处理说明 |
| 26 | 26 | |
| ... | ... | @@ -131,7 +131,7 @@ |
| 131 | 131 | | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |
| 132 | 132 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| |
| 133 | 133 | | tenant_id | KEYWORD | 是 | SPU表 | tenant_id | BIGINT | BIGINT转字符串 | 租户ID,用于多租户隔离 | |
| 134 | -| product_id | KEYWORD | 是 | SPU表 | id | BIGINT | BIGINT转字符串 | 商品ID(SPU ID) | | |
| 134 | +| spu_id | KEYWORD | 是 | SPU表 | id | BIGINT | BIGINT转字符串 | SPU ID(主键) | | |
| 135 | 135 | | handle | KEYWORD | 是 | SPU表 | handle | VARCHAR(255) | | 商品URL handle | |
| 136 | 136 | |
| 137 | 137 | 数据预处理列留空表示该字段无需额外处理。 |
| ... | ... | @@ -156,14 +156,9 @@ |
| 156 | 156 | |
| 157 | 157 | | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | Boost权重 | 是否返回 | 数据预处理 | 说明 | |
| 158 | 158 | |-----------|-----------|---------|-----------|-----------|-------------|-----------|---------|-------------|------| |
| 159 | -| vendor | TEXT | 是 | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | | 供应商/品牌(文本搜索) | | |
| 160 | -| vendor_keyword | HKText | 是 | SPU表 | vendor | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 供应商/品牌(HKText类型,keyword子字段用于过滤) | | |
| 161 | -| product_type | TEXT | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 商品类型(文本搜索) | | |
| 162 | -| product_type_keyword | HKText | 是 | SPU表 | category | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 商品类型(HKText类型) | | |
| 163 | -| tags | HKText | 是 | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | | 标签(HKText类型,支持搜索+过滤) | | |
| 164 | -| tags_keyword | HKText | 是 | SPU表 | tags | VARCHAR(1024) | - | 否 | 按逗号分割为list,去除空白项 | 标签keyword别名(HKText类型) | | |
| 165 | -| category | TEXT | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 类目(文本搜索) | | |
| 166 | -| category_keyword | HKText | 是 | SPU表 | category | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 类目(HKText类型) | | |
| 159 | +| vendor | HKText | 是 | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | | 供应商/品牌,HKText字段自动提供 `vendor.keyword` 用于过滤、聚合 | | |
| 160 | +| tags | HKText | 是 | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | | 标签字段,支持模糊搜索;使用 `tags.keyword` 进行精确过滤 | | |
| 161 | +| category | HKText | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 类目字段,使用 `category.keyword` 进行过滤/分面 | | |
| 167 | 162 | |
| 168 | 163 | ### 价格字段 |
| 169 | 164 | |
| ... | ... | @@ -204,23 +199,23 @@ |
| 204 | 199 | | shoplazza_created_at | DATE | 是 | SPU表 | shoplazza_created_at | DATETIME | 否 | 转换为UTC ISO8601字符串 | 店匠系统创建时间 | |
| 205 | 200 | | shoplazza_updated_at | DATE | 是 | SPU表 | shoplazza_updated_at | DATETIME | 否 | 转换为UTC ISO8601字符串 | 店匠系统更新时间 | |
| 206 | 201 | |
| 207 | -### 嵌套Variants字段(SKU级别) | |
| 202 | +### 嵌套SKUs字段(SKU级别) | |
| 208 | 203 | |
| 209 | 204 | | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |
| 210 | 205 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| |
| 211 | -| variants | JSON (nested) | 是 | SKU表 | - | - | 汇总同SPU下SKU记录,构建nested数组 | 商品变体数组(嵌套结构) | | |
| 206 | +| skus | JSON (nested) | 是 | SKU表 | - | - | 汇总同SPU下SKU记录,构建nested数组 | SKU数组(嵌套结构) | | |
| 212 | 207 | |
| 213 | -#### Variants子字段 | |
| 208 | +#### SKUs子字段 | |
| 214 | 209 | |
| 215 | 210 | | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |
| 216 | 211 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| |
| 217 | -| variants.variant_id | keyword | 是 | SKU表 | id | BIGINT | BIGINT转字符串 | 变体ID(SKU ID) | | |
| 218 | -| variants.title | text | 是 | SKU表 | title | VARCHAR(500) | | 变体标题 | | |
| 219 | -| variants.price | float | 是 | SKU表 | price | DECIMAL(10,2) | DECIMAL转FLOAT | 变体价格 | | |
| 220 | -| variants.compare_at_price | float | 是 | SKU表 | compare_at_price | DECIMAL(10,2) | DECIMAL转FLOAT | 变体原价 | | |
| 221 | -| variants.sku | keyword | 是 | SKU表 | sku | VARCHAR(100) | | SKU编码 | | |
| 222 | -| variants.stock | long | 是 | SKU表 | inventory_quantity | INT(11) | INT转LONG | 库存数量 | | |
| 223 | -| variants.options | object | 是 | SKU表 | option1/option2/option3 | VARCHAR(255) | 合并option1/2/3并去除空值 | 选项(颜色、尺寸等) | | |
| 212 | +| skus.sku_id | keyword | 是 | SKU表 | id | BIGINT | BIGINT转字符串 | SKU ID | | |
| 213 | +| skus.title | text | 是 | SKU表 | title | VARCHAR(500) | | SKU标题 | | |
| 214 | +| skus.price | float | 是 | SKU表 | price | DECIMAL(10,2) | DECIMAL转FLOAT | SKU价格 | | |
| 215 | +| skus.compare_at_price | float | 是 | SKU表 | compare_at_price | DECIMAL(10,2) | DECIMAL转FLOAT | 原价 | | |
| 216 | +| skus.sku | keyword | 是 | SKU表 | sku | VARCHAR(100) | | SKU编码 | | |
| 217 | +| skus.stock | long | 是 | SKU表 | inventory_quantity | INT(11) | INT转LONG | 库存数量 | | |
| 218 | +| skus.options | object | 是 | SKU表 | option1/option2/option3 | VARCHAR(255) | 合并option1/2/3并去除空值 | 选项(颜色、尺寸等) | | |
| 224 | 219 | |
| 225 | 220 | **Variants结构说明**: |
| 226 | 221 | - `variants` 是一个嵌套对象数组,每个元素代表一个SKU |
| ... | ... | @@ -285,7 +280,7 @@ |
| 285 | 280 | |
| 286 | 281 | ### 数据类型转换 |
| 287 | 282 | |
| 288 | -1. **BIGINT → KEYWORD**: 数字ID转换为字符串(如 `product_id`, `variant_id`) | |
| 283 | +1. **BIGINT → KEYWORD**: 数字ID转换为字符串(如 `spu_id`, `sku_id`) | |
| 289 | 284 | 2. **DECIMAL → FLOAT**: 价格字段从DECIMAL转换为FLOAT |
| 290 | 285 | 3. **INT → LONG**: 库存数量从INT转换为LONG |
| 291 | 286 | 4. **DATETIME → DATE**: 时间字段转换为ISO格式字符串 |
| ... | ... | @@ -327,6 +322,53 @@ |
| 327 | 322 | - `shoplazza_created_at`: DATETIME - 店匠创建时间 |
| 328 | 323 | - `shoplazza_updated_at`: DATETIME - 店匠更新时间 |
| 329 | 324 | |
| 325 | +spu表全部字段 | |
| 326 | +"Field" "Type" "Null" "Key" "Default" "Extra" | |
| 327 | +"id" "bigint(20)" "NO" "PRI" "auto_increment" | |
| 328 | +"shop_id" "bigint(20)" "NO" "MUL" "" | |
| 329 | +"shoplazza_id" "varchar(64)" "NO" "" "" | |
| 330 | +"handle" "varchar(255)" "YES" "MUL" "" | |
| 331 | +"title" "varchar(500)" "NO" "" "" | |
| 332 | +"brief" "varchar(1000)" "YES" "" "" | |
| 333 | +"description" "text" "YES" "" "" | |
| 334 | +"spu" "varchar(100)" "YES" "" "" | |
| 335 | +"vendor" "varchar(255)" "YES" "" "" | |
| 336 | +"vendor_url" "varchar(500)" "YES" "" "" | |
| 337 | +"seo_title" "varchar(500)" "YES" "" "" | |
| 338 | +"seo_description" "text" "YES" "" "" | |
| 339 | +"seo_keywords" "text" "YES" "" "" | |
| 340 | +"image_src" "varchar(500)" "YES" "" "" | |
| 341 | +"image_width" "int(11)" "YES" "" "" | |
| 342 | +"image_height" "int(11)" "YES" "" "" | |
| 343 | +"image_path" "varchar(255)" "YES" "" "" | |
| 344 | +"image_alt" "varchar(500)" "YES" "" "" | |
| 345 | +"inventory_policy" "varchar(50)" "YES" "" "" | |
| 346 | +"inventory_quantity" "int(11)" "YES" "" "0" "" | |
| 347 | +"inventory_tracking" "tinyint(1)" "YES" "" "0" "" | |
| 348 | +"published" "tinyint(1)" "YES" "" "0" "" | |
| 349 | +"published_at" "datetime" "YES" "MUL" "" | |
| 350 | +"requires_shipping" "tinyint(1)" "YES" "" "1" "" | |
| 351 | +"taxable" "tinyint(1)" "YES" "" "0" "" | |
| 352 | +"fake_sales" "int(11)" "YES" "" "0" "" | |
| 353 | +"display_fake_sales" "tinyint(1)" "YES" "" "0" "" | |
| 354 | +"mixed_wholesale" "tinyint(1)" "YES" "" "0" "" | |
| 355 | +"need_variant_image" "tinyint(1)" "YES" "" "0" "" | |
| 356 | +"has_only_default_variant" "tinyint(1)" "YES" "" "0" "" | |
| 357 | +"tags" "text" "YES" "" "" | |
| 358 | +"note" "text" "YES" "" "" | |
| 359 | +"category" "varchar(255)" "YES" "" "" | |
| 360 | +"shoplazza_created_at" "datetime" "YES" "" "" | |
| 361 | +"shoplazza_updated_at" "datetime" "YES" "MUL" "" | |
| 362 | +"tenant_id" "bigint(20)" "NO" "MUL" "" | |
| 363 | +"creator" "varchar(64)" "YES" "" "" "" | |
| 364 | +"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" | |
| 365 | +"updater" "varchar(64)" "YES" "" "" "" | |
| 366 | +"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" | |
| 367 | +"deleted" "bit(1)" "NO" "" "b'0'" "" | |
| 368 | + | |
| 369 | + | |
| 370 | + | |
| 371 | + | |
| 330 | 372 | ### SKU表(shoplazza_product_sku) |
| 331 | 373 | |
| 332 | 374 | 主要字段: |
| ... | ... | @@ -341,6 +383,40 @@ |
| 341 | 383 | - `option2`: VARCHAR(255) - 选项2 |
| 342 | 384 | - `option3`: VARCHAR(255) - 选项3 |
| 343 | 385 | |
| 386 | +sku全部字段 | |
| 387 | +"Field" "Type" "Null" "Key" "Default" "Extra" | |
| 388 | +"id" "bigint(20)" "NO" "PRI" "auto_increment" | |
| 389 | +"spu_id" "bigint(20)" "NO" "MUL" "" | |
| 390 | +"shop_id" "bigint(20)" "NO" "MUL" "" | |
| 391 | +"shoplazza_id" "varchar(64)" "NO" "" "" | |
| 392 | +"shoplazza_product_id" "varchar(64)" "NO" "MUL" "" | |
| 393 | +"shoplazza_image_id" "varchar(64)" "YES" "" "" | |
| 394 | +"title" "varchar(500)" "YES" "" "" | |
| 395 | +"sku" "varchar(100)" "YES" "MUL" "" | |
| 396 | +"barcode" "varchar(100)" "YES" "" "" | |
| 397 | +"position" "int(11)" "YES" "" "0" "" | |
| 398 | +"price" "decimal(10,2)" "YES" "" "" | |
| 399 | +"compare_at_price" "decimal(10,2)" "YES" "" "" | |
| 400 | +"cost_price" "decimal(10,2)" "YES" "" "" | |
| 401 | +"option1" "varchar(255)" "YES" "" "" | |
| 402 | +"option2" "varchar(255)" "YES" "" "" | |
| 403 | +"option3" "varchar(255)" "YES" "" "" | |
| 404 | +"inventory_quantity" "int(11)" "YES" "" "0" "" | |
| 405 | +"weight" "decimal(10,2)" "YES" "" "" | |
| 406 | +"weight_unit" "varchar(10)" "YES" "" "" | |
| 407 | +"image_src" "varchar(500)" "YES" "" "" | |
| 408 | +"wholesale_price" "json" "YES" "" "" | |
| 409 | +"note" "text" "YES" "" "" | |
| 410 | +"extend" "json" "YES" "" "" | |
| 411 | +"shoplazza_created_at" "datetime" "YES" "" "" | |
| 412 | +"shoplazza_updated_at" "datetime" "YES" "" "" | |
| 413 | +"tenant_id" "bigint(20)" "NO" "MUL" "" | |
| 414 | +"creator" "varchar(64)" "YES" "" "" "" | |
| 415 | +"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" | |
| 416 | +"updater" "varchar(64)" "YES" "" "" "" | |
| 417 | +"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" | |
| 418 | +"deleted" "bit(1)" "NO" "" "b'0'" "" | |
| 419 | + | |
| 344 | 420 | |
| 345 | 421 | ## TODO |
| 346 | 422 | 多语言问题。 | ... | ... |
| ... | ... | @@ -0,0 +1,716 @@ |
| 1 | +# 搜索引擎通用化开发进度 | |
| 2 | + | |
| 3 | +## 项目概述 | |
| 4 | + | |
| 5 | +对后端搜索技术 做通用化。 | |
| 6 | +通用化的本质 是 对于各种业务数据、各种检索需求,都可以 用少量定制+配置化 来实现效果。 | |
| 7 | + | |
| 8 | + | |
| 9 | +**通用化的本质**:对于各种业务数据、各种检索需求,都可以用少量定制+配置化来实现效果。 | |
| 10 | + | |
| 11 | +--- | |
| 12 | + | |
| 13 | +## 1. 原始数据层的约定 | |
| 14 | + | |
| 15 | +### 1.1 店匠主表 | |
| 16 | + | |
| 17 | +所有租户共用以下主表: | |
| 18 | +- `shoplazza_product_sku` - SKU级别商品数据 | |
| 19 | +- `shoplazza_product_spu` - SPU级别商品数据 | |
| 20 | + | |
| 21 | +### 1.2 索引结构(SPU维度) | |
| 22 | + | |
| 23 | +**统一索引架构**: | |
| 24 | +- 所有客户共享同一个Elasticsearch索引:`search_products` | |
| 25 | +- 索引粒度:SPU级别(每个文档代表一个SPU) | |
| 26 | +- 数据隔离:通过`tenant_id`字段实现租户隔离 | |
| 27 | +- 嵌套结构:每个SPU文档包含嵌套的`skus`数组(SKU变体) | |
| 28 | + | |
| 29 | +**索引文档结构**: | |
| 30 | +```json | |
| 31 | +{ | |
| 32 | + "tenant_id": "1", | |
| 33 | + "spu_id": "123", | |
| 34 | + "title": "蓝牙耳机", | |
| 35 | + "skus": [ | |
| 36 | + { | |
| 37 | + "sku_id": "456", | |
| 38 | + "title": "黑色", | |
| 39 | + "price": 199.99, | |
| 40 | + "sku": "SKU-123-1", | |
| 41 | + "stock": 50 | |
| 42 | + } | |
| 43 | + ], | |
| 44 | + "min_price": 199.99, | |
| 45 | + "max_price": 299.99 | |
| 46 | +} | |
| 47 | +``` | |
| 48 | + | |
| 49 | +### 1.3 配置化方案 | |
| 50 | + | |
| 51 | +**配置分离原则**: | |
| 52 | +- **搜索配置**:只包含ES字段定义、查询域、排序规则等搜索相关配置 | |
| 53 | +- **数据源配置**:不在搜索配置中,由Pipeline层(脚本)决定 | |
| 54 | +- **数据导入流程**:写死的脚本,不依赖配置 | |
| 55 | + | |
| 56 | +统一通过配置文件定义: | |
| 57 | +1. ES 字段定义(字段类型、分析器、boost等) | |
| 58 | +2. ES mapping 结构生成 | |
| 59 | +3. 查询域配置(indexes) | |
| 60 | +4. 排序和打分配置(function_score) | |
| 61 | + | |
| 62 | +**注意**:配置中**不包含**以下内容: | |
| 63 | +- `mysql_config` - MySQL数据库配置 | |
| 64 | +- `main_table` / `extension_table` - 数据表配置 | |
| 65 | +- `source_table` / `source_column` - 字段数据源映射 | |
| 66 | + | |
| 67 | +--- | |
| 68 | + | |
| 69 | +## 2. 配置系统实现 | |
| 70 | + | |
| 71 | +### 2.1 应用结构配置(字段定义) | |
| 72 | + | |
| 73 | +**配置文件位置**:`config/schema/{tenant_id}_config.yaml` | |
| 74 | + | |
| 75 | +**配置内容**:定义了 ES 的输入数据有哪些字段、关联 MySQL 的哪些字段。 | |
| 76 | + | |
| 77 | +**实现情况**: | |
| 78 | + | |
| 79 | +#### 字段类型支持 | |
| 80 | +- **TEXT**:文本字段,支持多语言分析器 | |
| 81 | +- **KEYWORD**:关键词字段,用于精确匹配和聚合 | |
| 82 | +- **TEXT_EMBEDDING**:文本向量字段(1024维,dot_product相似度) | |
| 83 | +- **IMAGE_EMBEDDING**:图片向量字段(1024维,dot_product相似度) | |
| 84 | +- **INT/LONG**:整数类型 | |
| 85 | +- **FLOAT/DOUBLE**:浮点数类型 | |
| 86 | +- **DATE**:日期类型 | |
| 87 | +- **BOOLEAN**:布尔类型 | |
| 88 | + | |
| 89 | +#### 分析器支持 | |
| 90 | +- **chinese_ecommerce**:中文电商分词器(index_ansj/query_ansj) | |
| 91 | +- **english**:英文分析器 | |
| 92 | +- **russian**:俄文分析器 | |
| 93 | +- **arabic**:阿拉伯文分析器 | |
| 94 | +- **spanish**:西班牙文分析器 | |
| 95 | +- **japanese**:日文分析器 | |
| 96 | +- **standard**:标准分析器 | |
| 97 | +- **keyword**:关键词分析器 | |
| 98 | + | |
| 99 | +#### 字段配置示例(Base配置) | |
| 100 | + | |
| 101 | +```yaml | |
| 102 | +fields: | |
| 103 | + # 租户隔离字段(必需) | |
| 104 | + - name: "tenant_id" | |
| 105 | + type: "KEYWORD" | |
| 106 | + required: true | |
| 107 | + index: true | |
| 108 | + store: true | |
| 109 | + | |
| 110 | + # 商品标识字段 | |
| 111 | + - name: "spu_id" | |
| 112 | + type: "KEYWORD" | |
| 113 | + required: true | |
| 114 | + index: true | |
| 115 | + store: true | |
| 116 | + | |
| 117 | + # 文本搜索字段 | |
| 118 | + - name: "title" | |
| 119 | + type: "TEXT" | |
| 120 | + analyzer: "chinese_ecommerce" | |
| 121 | + boost: 3.0 | |
| 122 | + index: true | |
| 123 | + store: true | |
| 124 | + | |
| 125 | + - name: "seo_keywords" | |
| 126 | + type: "TEXT" | |
| 127 | + analyzer: "chinese_ecommerce" | |
| 128 | + boost: 2.0 | |
| 129 | + index: true | |
| 130 | + store: true | |
| 131 | + | |
| 132 | + # 嵌套skus字段 | |
| 133 | + - name: "skus" | |
| 134 | + type: "JSON" | |
| 135 | + nested: true | |
| 136 | + nested_properties: | |
| 137 | + sku_id: | |
| 138 | + type: "keyword" | |
| 139 | + price: | |
| 140 | + type: "float" | |
| 141 | + sku: | |
| 142 | + type: "keyword" | |
| 143 | +``` | |
| 144 | + | |
| 145 | +**注意**:配置中**不包含**`source_table`和`source_column`,数据源映射由Pipeline层决定。 | |
| 146 | + | |
| 147 | +**实现模块**: | |
| 148 | +- `config/config_loader.py` - 配置加载器 | |
| 149 | +- `config/field_types.py` - 字段类型定义 | |
| 150 | +- `indexer/mapping_generator.py` - ES mapping 生成器 | |
| 151 | +- `indexer/data_transformer.py` - 数据转换器 | |
| 152 | + | |
| 153 | +### 2.2 索引结构配置(查询域配置) | |
| 154 | + | |
| 155 | +**配置内容**:定义了 ES 的字段索引 mapping 配置,支持各个域的查询,包括默认域的查询。 | |
| 156 | + | |
| 157 | +**实现情况**: | |
| 158 | + | |
| 159 | +#### 域(Domain)配置 | |
| 160 | +每个域定义了: | |
| 161 | +- 域名称(如 `default`, `title`, `category`, `brand`) | |
| 162 | +- 域标签(中文描述) | |
| 163 | +- 搜索字段列表 | |
| 164 | +- 默认分析器 | |
| 165 | +- 权重(boost) | |
| 166 | +- **多语言字段映射**(`language_field_mapping`) | |
| 167 | + | |
| 168 | +#### 多语言字段映射 | |
| 169 | + | |
| 170 | +支持将不同语言的查询路由到对应的字段: | |
| 171 | + | |
| 172 | +```yaml | |
| 173 | +indexes: | |
| 174 | + - name: "default" | |
| 175 | + label: "默认索引" | |
| 176 | + fields: | |
| 177 | + - "name" | |
| 178 | + - "enSpuName" | |
| 179 | + - "ruSkuName" | |
| 180 | + - "categoryName" | |
| 181 | + - "brandName" | |
| 182 | + analyzer: "chinese_ecommerce" | |
| 183 | + boost: 1.0 | |
| 184 | + language_field_mapping: | |
| 185 | + zh: | |
| 186 | + - "name" | |
| 187 | + - "categoryName" | |
| 188 | + - "brandName" | |
| 189 | + en: | |
| 190 | + - "enSpuName" | |
| 191 | + ru: | |
| 192 | + - "ruSkuName" | |
| 193 | + | |
| 194 | + - name: "title" | |
| 195 | + label: "标题索引" | |
| 196 | + fields: | |
| 197 | + - "name" | |
| 198 | + - "enSpuName" | |
| 199 | + - "ruSkuName" | |
| 200 | + analyzer: "chinese_ecommerce" | |
| 201 | + boost: 2.0 | |
| 202 | + language_field_mapping: | |
| 203 | + zh: | |
| 204 | + - "name" | |
| 205 | + en: | |
| 206 | + - "enSpuName" | |
| 207 | + ru: | |
| 208 | + - "ruSkuName" | |
| 209 | +``` | |
| 210 | + | |
| 211 | +**工作原理**: | |
| 212 | +1. 检测查询语言(中文、英文、俄文等) | |
| 213 | +2. 如果查询语言在 `language_field_mapping` 中,使用原始查询搜索对应语言的字段 | |
| 214 | +3. 将查询翻译到其他支持的语言,分别搜索对应语言的字段 | |
| 215 | +4. 组合多个语言查询的结果,提高召回率 | |
| 216 | + | |
| 217 | +**实现模块**: | |
| 218 | +- `search/multilang_query_builder.py` - 多语言查询构建器 | |
| 219 | +- `query/query_parser.py` - 查询解析器(支持语言检测和翻译) | |
| 220 | + | |
| 221 | +--- | |
| 222 | + | |
| 223 | +## 3. 数据导入流程 | |
| 224 | + | |
| 225 | +### 3.1 数据源 | |
| 226 | + | |
| 227 | +**店匠标准表**(Base配置使用): | |
| 228 | +- `shoplazza_product_spu` - SPU级别商品数据 | |
| 229 | +- `shoplazza_product_sku` - SKU级别商品数据 | |
| 230 | + | |
| 231 | +**其他客户表**(tenant1等): | |
| 232 | +- 使用各自的数据源表和扩展表 | |
| 233 | + | |
| 234 | +### 3.2 数据导入方式 | |
| 235 | + | |
| 236 | +**Pipeline层决定数据源**: | |
| 237 | +- 数据导入流程是写死的脚本,不依赖配置 | |
| 238 | +- 配置只关注ES搜索相关的内容 | |
| 239 | +- 数据源映射逻辑写死在转换器代码中 | |
| 240 | + | |
| 241 | +#### Base配置数据导入(店匠通用) | |
| 242 | + | |
| 243 | +**脚本**:`scripts/ingest_shoplazza.py` | |
| 244 | + | |
| 245 | +**数据流程**: | |
| 246 | +1. **数据加载**:从MySQL读取`shoplazza_product_spu`和`shoplazza_product_sku`表 | |
| 247 | +2. **数据转换**(`indexer/spu_transformer.py`): | |
| 248 | + - 按`spu_id`和`tenant_id`关联SPU和SKU数据 | |
| 249 | + - 将SKU数据聚合为嵌套的`skus`数组 | |
| 250 | + - 计算扁平化价格字段(`min_price`, `max_price`, `compare_at_price`) | |
| 251 | + - 字段映射(写死在代码中,不依赖配置) | |
| 252 | + - 注入`tenant_id`字段 | |
| 253 | +3. **索引创建**: | |
| 254 | + - 根据配置生成ES mapping | |
| 255 | + - 创建或更新`search_products`索引 | |
| 256 | +4. **批量入库**: | |
| 257 | + - 批量写入ES(默认每批500条) | |
| 258 | + - 错误处理和重试机制 | |
| 259 | + | |
| 260 | +**命令行工具**: | |
| 261 | +```bash | |
| 262 | +python scripts/ingest_shoplazza.py \ | |
| 263 | + --db-host localhost \ | |
| 264 | + --db-port 3306 \ | |
| 265 | + --db-database saas \ | |
| 266 | + --db-username root \ | |
| 267 | + --db-password password \ | |
| 268 | + --tenant-id "1" \ | |
| 269 | + --config base \ | |
| 270 | + --es-host http://localhost:9200 \ | |
| 271 | + --recreate \ | |
| 272 | + --batch-size 500 | |
| 273 | +``` | |
| 274 | + | |
| 275 | +#### 其他客户数据导入 | |
| 276 | + | |
| 277 | +- 使用各自的数据转换器(如`indexer/data_transformer.py`) | |
| 278 | +- 数据源映射逻辑写死在各自的转换器中 | |
| 279 | +- 共享`search_products`索引,通过`tenant_id`隔离 | |
| 280 | + | |
| 281 | +**实现模块**: | |
| 282 | +- `indexer/spu_transformer.py` - SPU数据转换器(Base配置) | |
| 283 | +- `indexer/data_transformer.py` - 通用数据转换器(其他客户) | |
| 284 | +- `indexer/bulk_indexer.py` - 批量索引器 | |
| 285 | +- `scripts/ingest_shoplazza.py` - 店匠数据导入脚本 | |
| 286 | + | |
| 287 | +--- | |
| 288 | + | |
| 289 | +## 4. QueryParser 实现 | |
| 290 | + | |
| 291 | + | |
| 292 | +### 4.1 查询改写(Query Rewriting) | |
| 293 | + | |
| 294 | +配置词典的key是query,value是改写后的查询表达式,比如。比如品牌词 改写为在brand|query OR name|query,类别词、标签词等都可以放进去。纠错、规范化、查询改写等 都可以通过这个词典来配置。 | |
| 295 | +**实现情况**: | |
| 296 | + | |
| 297 | +#### 配置方式 | |
| 298 | +在 `query_config.rewrite_dictionary` 中配置查询改写规则: | |
| 299 | + | |
| 300 | +```yaml | |
| 301 | +query_config: | |
| 302 | + enable_query_rewrite: true | |
| 303 | + rewrite_dictionary: | |
| 304 | + "芭比": "brand:芭比 OR name:芭比娃娃" | |
| 305 | + "玩具": "category:玩具" | |
| 306 | + "消防": "category:消防 OR name:消防" | |
| 307 | +``` | |
| 308 | + | |
| 309 | +#### 功能特性 | |
| 310 | +- **精确匹配**:查询完全匹配词典 key 时,替换为 value | |
| 311 | +- **部分匹配**:查询包含词典 key 时,替换该部分 | |
| 312 | +- **支持布尔表达式**:value 可以是复杂的布尔表达式(AND, OR, 域查询等) | |
| 313 | + | |
| 314 | +#### 实现模块 | |
| 315 | +- `query/query_rewriter.py` - 查询改写器 | |
| 316 | +- `query/query_parser.py` - 查询解析器(集成改写功能) | |
| 317 | + | |
| 318 | +### 4.2 翻译(Translation) | |
| 319 | + | |
| 320 | +**实现情况**: | |
| 321 | + | |
| 322 | +#### 配置方式 | |
| 323 | +```yaml | |
| 324 | +query_config: | |
| 325 | + supported_languages: | |
| 326 | + - "zh" | |
| 327 | + - "en" | |
| 328 | + - "ru" | |
| 329 | + default_language: "zh" | |
| 330 | + enable_translation: true | |
| 331 | + translation_service: "deepl" | |
| 332 | + translation_api_key: null # 通过环境变量设置 | |
| 333 | +``` | |
| 334 | + | |
| 335 | +#### 功能特性 | |
| 336 | +1. **语言检测**:自动检测查询语言 | |
| 337 | +2. **智能翻译**: | |
| 338 | + - 如果查询是中文,翻译为英文、俄文 | |
| 339 | + - 如果查询是英文,翻译为中文、俄文 | |
| 340 | + - 如果查询是其他语言,翻译为所有支持的语言 | |
| 341 | +3. **域感知翻译**: | |
| 342 | + - 如果域有 `language_field_mapping`,只翻译到映射中存在的语言 | |
| 343 | + - 避免不必要的翻译,提高效率 | |
| 344 | +4. **翻译缓存**:缓存翻译结果,避免重复调用 API | |
| 345 | + | |
| 346 | +#### 工作流程 | |
| 347 | +``` | |
| 348 | +查询输入 → 语言检测 → 确定目标语言 → 翻译 → 多语言查询构建 | |
| 349 | +``` | |
| 350 | + | |
| 351 | +#### 实现模块 | |
| 352 | +- `query/language_detector.py` - 语言检测器 | |
| 353 | +- `query/translator.py` - 翻译器(DeepL API) | |
| 354 | +- `query/query_parser.py` - 查询解析器(集成翻译功能) | |
| 355 | + | |
| 356 | +### 4.3 文本向量化(Text Embedding) | |
| 357 | + | |
| 358 | +如果配置打开了text_embedding查询,并且query 包含了default域的查询,那么要把default域的查询词转向量,后面searcher会用这个向量参与查询。 | |
| 359 | + | |
| 360 | +**实现情况**: | |
| 361 | + | |
| 362 | +#### 配置方式 | |
| 363 | +```yaml | |
| 364 | +query_config: | |
| 365 | + enable_text_embedding: true | |
| 366 | +``` | |
| 367 | + | |
| 368 | +#### 功能特性 | |
| 369 | +1. **条件生成**: | |
| 370 | + - 仅当 `enable_text_embedding=true` 时生成向量 | |
| 371 | + - 仅对 `default` 域查询生成向量 | |
| 372 | +2. **向量模型**:BGE-M3 模型(1024维向量) | |
| 373 | +3. **用途**:用于语义搜索(KNN 检索) | |
| 374 | + | |
| 375 | +#### 实现模块 | |
| 376 | +- `embeddings/bge_encoder.py` - BGE 文本编码器 | |
| 377 | +- `query/query_parser.py` - 查询解析器(集成向量生成) | |
| 378 | + | |
| 379 | +--- | |
| 380 | + | |
| 381 | +## 5. Searcher 实现 | |
| 382 | + | |
| 383 | +参考opensearch,他们自己定义的一套索引结构配置、支持自定义的一套检索表达式、排序表达式,这是各个客户进行配置化的基础,包括索引结构配置、排序策略配置。 | |
| 384 | +比如各种业务过滤策略 可以简单的通过表达式满足,比如brand|耐克 AND cate2|xxx。指定字段排序可以通过排序的表达式实现。 | |
| 385 | + | |
| 386 | +查询默认在default域,相也会对这个域的查询做一些相关性的重点优化,包括融合语义相关性、多语言相关性(可以基于配置 将查询翻译到指定语言并在对应的语言的字段进行查询)来弥补传统查询分析手段(比如查询改写 纠错 词权重等)的不足,也支持通过配置一些词表转为泛查询模式来优化相关性。 | |
| 387 | + | |
| 388 | +### 5.1 布尔表达式解析 | |
| 389 | + | |
| 390 | +**实现情况**: | |
| 391 | + | |
| 392 | +#### 支持的运算符 | |
| 393 | +- **AND**:所有项必须匹配 | |
| 394 | +- **OR**:任意项匹配 | |
| 395 | +- **RANK**:排序增强(类似 OR 但影响排序) | |
| 396 | +- **ANDNOT**:排除(第一项匹配,第二项不匹配) | |
| 397 | +- **()**:括号分组 | |
| 398 | + | |
| 399 | +#### 优先级(从高到低) | |
| 400 | +1. `()` - 括号 | |
| 401 | +2. `ANDNOT` - 排除 | |
| 402 | +3. `AND` - 与 | |
| 403 | +4. `OR` - 或 | |
| 404 | +5. `RANK` - 排序 | |
| 405 | + | |
| 406 | +#### 示例 | |
| 407 | +``` | |
| 408 | +laptop AND (gaming OR professional) ANDNOT cheap | |
| 409 | +``` | |
| 410 | + | |
| 411 | +#### 实现模块 | |
| 412 | +- `search/boolean_parser.py` - 布尔表达式解析器 | |
| 413 | +- `search/searcher.py` - 搜索器(集成布尔解析) | |
| 414 | + | |
| 415 | +### 5.2 多语言搜索 | |
| 416 | + | |
| 417 | +**实现情况**: | |
| 418 | + | |
| 419 | +#### 工作原理 | |
| 420 | +1. **查询解析**: | |
| 421 | + - 提取域(如 `title:查询` → 域=`title`,查询=`查询`) | |
| 422 | + - 检测查询语言 | |
| 423 | + - 生成翻译 | |
| 424 | +2. **多语言查询构建**: | |
| 425 | + - 如果域有 `language_field_mapping`: | |
| 426 | + - 使用检测到的语言查询对应字段(boost * 1.5) | |
| 427 | + - 使用翻译后的查询搜索其他语言字段(boost * 1.0) | |
| 428 | + - 如果域没有 `language_field_mapping`: | |
| 429 | + - 使用所有字段进行搜索 | |
| 430 | +3. **查询组合**: | |
| 431 | + - 多个语言查询组合为 `should` 子句 | |
| 432 | + - 提高召回率 | |
| 433 | + | |
| 434 | +#### 示例 | |
| 435 | +``` | |
| 436 | +查询: "芭比娃娃" | |
| 437 | +域: default | |
| 438 | +检测语言: zh | |
| 439 | + | |
| 440 | +生成的查询: | |
| 441 | +- 中文查询 "芭比娃娃" → 搜索 name, categoryName, brandName (boost * 1.5) | |
| 442 | +- 英文翻译 "Barbie doll" → 搜索 enSpuName (boost * 1.0) | |
| 443 | +- 俄文翻译 "Кукла Барби" → 搜索 ruSkuName (boost * 1.0) | |
| 444 | +``` | |
| 445 | + | |
| 446 | +#### 实现模块 | |
| 447 | +- `search/multilang_query_builder.py` - 多语言查询构建器 | |
| 448 | +- `search/searcher.py` - 搜索器(使用多语言构建器) | |
| 449 | + | |
| 450 | +### 5.3 相关性计算(Ranking) | |
| 451 | + | |
| 452 | +**实现情况**: | |
| 453 | + | |
| 454 | +#### 当前实现 | |
| 455 | +**公式**:`bm25() + 0.2 * text_embedding_relevance()` | |
| 456 | + | |
| 457 | +- **bm25()**:BM25 文本相关性得分 | |
| 458 | + - 包括多语言打分 | |
| 459 | + - 内部通过配置翻译为多种语言 | |
| 460 | + - 分别到对应的字段搜索 | |
| 461 | + - 中文字段使用中文分词器,英文字段使用英文分词器 | |
| 462 | +- **text_embedding_relevance()**:文本向量相关性得分(KNN 检索的打分) | |
| 463 | + - 权重:0.2 | |
| 464 | + | |
| 465 | +#### 配置方式 | |
| 466 | +```yaml | |
| 467 | +ranking: | |
| 468 | + expression: "bm25() + 0.2*text_embedding_relevance()" | |
| 469 | + description: "BM25 text relevance combined with semantic embedding similarity" | |
| 470 | +``` | |
| 471 | + | |
| 472 | +#### 扩展性 | |
| 473 | +- 支持表达式配置(未来可扩展) | |
| 474 | +- 支持自定义函数(如 `timeliness()`, `field_value()`) | |
| 475 | + | |
| 476 | +#### 实现模块 | |
| 477 | +- `search/ranking_engine.py` - 排序引擎 | |
| 478 | +- `search/searcher.py` - 搜索器(集成排序功能) | |
| 479 | + | |
| 480 | +--- | |
| 481 | + | |
| 482 | +## 6. 已完成功能总结 | |
| 483 | + | |
| 484 | +### 6.1 配置系统 | |
| 485 | +- ✅ 字段定义配置(类型、分析器、来源表/列) | |
| 486 | +- ✅ 索引域配置(多域查询、多语言映射) | |
| 487 | +- ✅ 查询配置(改写词典、翻译配置) | |
| 488 | +- ✅ 排序配置(表达式配置) | |
| 489 | +- ✅ 配置验证(字段存在性、类型检查、分析器匹配) | |
| 490 | + | |
| 491 | +### 6.2 数据索引 | |
| 492 | +- ✅ 数据转换(字段映射、类型转换) | |
| 493 | +- ✅ 向量生成(文本向量、图片向量) | |
| 494 | +- ✅ 向量缓存(避免重复计算) | |
| 495 | +- ✅ 批量索引(错误处理、重试机制) | |
| 496 | +- ✅ ES mapping 自动生成 | |
| 497 | + | |
| 498 | +### 6.3 查询处理 | |
| 499 | +- ✅ 查询改写(词典配置) | |
| 500 | +- ✅ 语言检测 | |
| 501 | +- ✅ 多语言翻译(DeepL API) | |
| 502 | +- ✅ 文本向量化(BGE-M3) | |
| 503 | +- ✅ 域提取(支持 `domain:query` 语法) | |
| 504 | + | |
| 505 | +### 6.4 搜索功能 | |
| 506 | +- ✅ 布尔表达式解析(AND, OR, RANK, ANDNOT, 括号) | |
| 507 | +- ✅ 多语言查询构建(语言路由、字段映射) | |
| 508 | +- ✅ 语义搜索(KNN 检索) | |
| 509 | +- ✅ 相关性排序(BM25 + 向量相似度) | |
| 510 | +- ✅ 结果聚合(Faceted Search) | |
| 511 | + | |
| 512 | +### 6.5 API 服务 | |
| 513 | +- ✅ RESTful API(FastAPI) | |
| 514 | +- ✅ 搜索接口(文本搜索、图片搜索) | |
| 515 | +- ✅ 文档查询接口 | |
| 516 | +- ✅ 前端界面(HTML + JavaScript) | |
| 517 | +- ✅ 租户隔离(tenant_id过滤) | |
| 518 | + | |
| 519 | +### 6.6 Base配置(店匠通用) | |
| 520 | +- ✅ SPU级别索引结构 | |
| 521 | +- ✅ 嵌套skus字段 | |
| 522 | +- ✅ 统一索引(search_products) | |
| 523 | +- ✅ 租户隔离(tenant_id) | |
| 524 | +- ✅ 配置简化(移除MySQL相关配置) | |
| 525 | + | |
| 526 | +--- | |
| 527 | + | |
| 528 | +## 7. 技术栈 | |
| 529 | + | |
| 530 | +- **后端**:Python 3.6+ | |
| 531 | +- **搜索引擎**:Elasticsearch | |
| 532 | +- **数据库**:MySQL(Shoplazza) | |
| 533 | +- **向量模型**:BGE-M3(文本)、CN-CLIP(图片) | |
| 534 | +- **翻译服务**:DeepL API | |
| 535 | +- **API 框架**:FastAPI | |
| 536 | +- **前端**:HTML + JavaScript | |
| 537 | + | |
| 538 | +--- | |
| 539 | + | |
| 540 | +## 8. API响应格式 | |
| 541 | + | |
| 542 | +### 8.1 外部友好格式 | |
| 543 | + | |
| 544 | +API返回格式不包含ES内部字段(`_id`, `_score`, `_source`),使用外部友好的格式: | |
| 545 | + | |
| 546 | +**响应结构**: | |
| 547 | +```json | |
| 548 | +{ | |
| 549 | + "results": [ | |
| 550 | + { | |
| 551 | + "spu_id": "123", | |
| 552 | + "title": "蓝牙耳机", | |
| 553 | + "skus": [ | |
| 554 | + { | |
| 555 | + "sku_id": "456", | |
| 556 | + "price": 199.99, | |
| 557 | + "sku": "SKU-123-1", | |
| 558 | + "stock": 50 | |
| 559 | + } | |
| 560 | + ], | |
| 561 | + "relevance_score": 0.95 | |
| 562 | + } | |
| 563 | + ], | |
| 564 | + "total": 10, | |
| 565 | + "facets": [...], | |
| 566 | + "suggestions": [], | |
| 567 | + "related_searches": [] | |
| 568 | +} | |
| 569 | +``` | |
| 570 | + | |
| 571 | +**主要变化**: | |
| 572 | +- 结构化结果(`SpuResult`和`SkuResult`) | |
| 573 | +- 嵌套skus数组 | |
| 574 | +- 无ES内部字段 | |
| 575 | + | |
| 576 | +### 8.2 租户隔离 | |
| 577 | + | |
| 578 | +所有API请求必须提供`tenant_id`: | |
| 579 | +- 请求头:`X-Tenant-ID: 1` | |
| 580 | +- 或查询参数:`?tenant_id=1` | |
| 581 | + | |
| 582 | +搜索时自动添加`tenant_id`过滤,确保数据隔离。 | |
| 583 | + | |
| 584 | +### 8.3 数据接口约定 | |
| 585 | + | |
| 586 | +**统一的数据约定格式**:所有API接口使用 Pydantic 模型进行数据验证和序列化。 | |
| 587 | + | |
| 588 | +#### 8.3.1 数据流模式 | |
| 589 | + | |
| 590 | +系统采用统一的数据流模式,确保数据在各层之间的一致性: | |
| 591 | + | |
| 592 | +**数据流转路径**: | |
| 593 | +``` | |
| 594 | +API Request (JSON) | |
| 595 | + ↓ | |
| 596 | +Pydantic 验证 → 结构化模型(RangeFilter, FacetConfig 等) | |
| 597 | + ↓ | |
| 598 | +Searcher(透传) | |
| 599 | + ↓ | |
| 600 | +ES Query Builder → model_dump() 转换为字典 | |
| 601 | + ↓ | |
| 602 | +ES Query (字典) | |
| 603 | + ↓ | |
| 604 | +Elasticsearch | |
| 605 | +``` | |
| 606 | + | |
| 607 | +#### 8.3.2 Facets 配置数据流 | |
| 608 | + | |
| 609 | +**输入格式**:`List[Union[str, FacetConfig]]` | |
| 610 | + | |
| 611 | +- **简单模式**:字符串列表(字段名),使用默认配置 | |
| 612 | + ```json | |
| 613 | + ["categoryName_keyword", "brandName_keyword"] | |
| 614 | + ``` | |
| 615 | + | |
| 616 | +- **高级模式**:FacetConfig 对象列表,支持自定义配置 | |
| 617 | + ```json | |
| 618 | + [ | |
| 619 | + { | |
| 620 | + "field": "categoryName_keyword", | |
| 621 | + "size": 15, | |
| 622 | + "type": "terms" | |
| 623 | + }, | |
| 624 | + { | |
| 625 | + "field": "price", | |
| 626 | + "type": "range", | |
| 627 | + "ranges": [ | |
| 628 | + {"key": "0-50", "to": 50}, | |
| 629 | + {"key": "50-100", "from": 50, "to": 100} | |
| 630 | + ] | |
| 631 | + } | |
| 632 | + ] | |
| 633 | + ``` | |
| 634 | + | |
| 635 | +**数据流**: | |
| 636 | +1. API 层:接收 `List[Union[str, FacetConfig]]` | |
| 637 | +2. Searcher 层:透传,不做转换 | |
| 638 | +3. ES Query Builder:只接受 `str` 或 `FacetConfig`,自动处理两种格式 | |
| 639 | +4. 输出:转换为 ES 聚合查询 | |
| 640 | + | |
| 641 | +#### 8.3.3 Range Filters 数据流 | |
| 642 | + | |
| 643 | +**输入格式**:`Dict[str, RangeFilter]` | |
| 644 | + | |
| 645 | +**RangeFilter 模型**: | |
| 646 | +```python | |
| 647 | +class RangeFilter(BaseModel): | |
| 648 | + gte: Optional[Union[float, str]] # 大于等于 | |
| 649 | + gt: Optional[Union[float, str]] # 大于 | |
| 650 | + lte: Optional[Union[float, str]] # 小于等于 | |
| 651 | + lt: Optional[Union[float, str]] # 小于 | |
| 652 | +``` | |
| 653 | + | |
| 654 | +**示例**: | |
| 655 | +```json | |
| 656 | +{ | |
| 657 | + "price": {"gte": 50, "lte": 200}, | |
| 658 | + "created_at": {"gte": "2023-01-01T00:00:00Z"} | |
| 659 | +} | |
| 660 | +``` | |
| 661 | + | |
| 662 | +**数据流**: | |
| 663 | +1. API 层:接收 `Dict[str, RangeFilter]`,Pydantic 自动验证 | |
| 664 | +2. Searcher 层:透传 `Dict[str, RangeFilter]` | |
| 665 | +3. ES Query Builder:调用 `range_filter.model_dump()` 转换为字典 | |
| 666 | +4. 输出:ES range 查询(支持数值和日期) | |
| 667 | + | |
| 668 | +**特性**: | |
| 669 | +- 自动验证:确保至少指定一个边界值(gte, gt, lte, lt) | |
| 670 | +- 类型支持:支持数值(float)和日期时间字符串(ISO 格式) | |
| 671 | +- 统一约定:所有范围过滤都使用 RangeFilter 模型 | |
| 672 | + | |
| 673 | +#### 8.3.4 响应 Facets 数据流 | |
| 674 | + | |
| 675 | +**输出格式**:`List[FacetResult]` | |
| 676 | + | |
| 677 | +**FacetResult 模型**: | |
| 678 | +```python | |
| 679 | +class FacetResult(BaseModel): | |
| 680 | + field: str # 字段名 | |
| 681 | + label: str # 显示标签 | |
| 682 | + type: Literal["terms", "range"] # 分面类型 | |
| 683 | + values: List[FacetValue] # 分面值列表 | |
| 684 | + total_count: Optional[int] # 总文档数 | |
| 685 | +``` | |
| 686 | + | |
| 687 | +**数据流**: | |
| 688 | +1. ES Response:返回聚合结果(字典格式) | |
| 689 | +2. Searcher 层:构建 `List[FacetResult]` 对象 | |
| 690 | +3. API 层:直接返回 `List[FacetResult]`(Pydantic 自动序列化为 JSON) | |
| 691 | + | |
| 692 | +**优势**: | |
| 693 | +- 类型安全:使用 Pydantic 模型确保数据结构一致性 | |
| 694 | +- 自动序列化:模型自动转换为 JSON,无需手动处理 | |
| 695 | +- 统一约定:所有响应都使用标准化的 Pydantic 模型 | |
| 696 | + | |
| 697 | +#### 8.3.5 统一约定的好处 | |
| 698 | + | |
| 699 | +1. **类型安全**:使用 Pydantic 模型提供运行时类型检查和验证 | |
| 700 | +2. **代码一致性**:所有层使用相同的数据模型,减少转换错误 | |
| 701 | +3. **自动文档**:FastAPI 自动生成 API 文档(基于 Pydantic 模型) | |
| 702 | +4. **易于维护**:修改数据结构只需更新模型定义 | |
| 703 | +5. **数据验证**:自动验证输入数据,减少错误处理代码 | |
| 704 | + | |
| 705 | +**实现模块**: | |
| 706 | +- `api/models.py` - 所有 Pydantic 模型定义 | |
| 707 | +- `api/result_formatter.py` - 结果格式化器(ES 响应 → Pydantic 模型) | |
| 708 | +- `search/es_query_builder.py` - ES 查询构建器(Pydantic 模型 → ES 查询) | |
| 709 | + | |
| 710 | +## 9. 配置文件示例 | |
| 711 | + | |
| 712 | +**Base配置**(店匠通用):`config/schema/base/config.yaml` | |
| 713 | + | |
| 714 | +**其他客户配置**:`config/schema/tenant1/config.yaml` | |
| 715 | + | |
| 716 | +--- | ... | ... |
frontend/README.md
| ... | ... | @@ -105,7 +105,7 @@ POST http://120.76.41.98:6002/search/ |
| 105 | 105 | "size": 20, // 每页结果数 |
| 106 | 106 | "from": 0, // 偏移量(分页) |
| 107 | 107 | "filters": { // 筛选条件 |
| 108 | - "categoryName_keyword": ["玩具"], | |
| 108 | + "category.keyword": ["玩具"], | |
| 109 | 109 | "price": {"from": 50, "to": 100} |
| 110 | 110 | }, |
| 111 | 111 | "aggregations": {...}, // 聚合配置 | ... | ... |
frontend/static/js/app.js
| ... | ... | @@ -74,17 +74,17 @@ async function performSearch(page = 1) { |
| 74 | 74 | // Define facets (简化配置) |
| 75 | 75 | const facets = [ |
| 76 | 76 | { |
| 77 | - "field": "category_keyword", | |
| 77 | + "field": "category.keyword", | |
| 78 | 78 | "size": 15, |
| 79 | 79 | "type": "terms" |
| 80 | 80 | }, |
| 81 | 81 | { |
| 82 | - "field": "vendor_keyword", | |
| 82 | + "field": "vendor.keyword", | |
| 83 | 83 | "size": 15, |
| 84 | 84 | "type": "terms" |
| 85 | 85 | }, |
| 86 | 86 | { |
| 87 | - "field": "tags_keyword", | |
| 87 | + "field": "tags.keyword", | |
| 88 | 88 | "size": 10, |
| 89 | 89 | "type": "terms" |
| 90 | 90 | }, |
| ... | ... | @@ -219,13 +219,13 @@ function displayFacets(facets) { |
| 219 | 219 | let containerId = null; |
| 220 | 220 | let maxDisplay = 10; |
| 221 | 221 | |
| 222 | - if (facet.field === 'category_keyword') { | |
| 222 | + if (facet.field === 'category.keyword') { | |
| 223 | 223 | containerId = 'categoryTags'; |
| 224 | 224 | maxDisplay = 10; |
| 225 | - } else if (facet.field === 'vendor_keyword') { | |
| 225 | + } else if (facet.field === 'vendor.keyword') { | |
| 226 | 226 | containerId = 'brandTags'; |
| 227 | 227 | maxDisplay = 10; |
| 228 | - } else if (facet.field === 'tags_keyword') { | |
| 228 | + } else if (facet.field === 'tags.keyword') { | |
| 229 | 229 | containerId = 'supplierTags'; |
| 230 | 230 | maxDisplay = 8; |
| 231 | 231 | } |
| ... | ... | @@ -351,7 +351,7 @@ function updateClearFiltersButton() { |
| 351 | 351 | |
| 352 | 352 | // Update product count |
| 353 | 353 | function updateProductCount(total) { |
| 354 | - document.getElementById('productCount').textContent = `${total.toLocaleString()} products found`; | |
| 354 | + document.getElementById('productCount').textContent = `${total.toLocaleString()} SPUs found`; | |
| 355 | 355 | } |
| 356 | 356 | |
| 357 | 357 | // Sort functions | ... | ... |
frontend/static/js/app_base.js
| ... | ... | @@ -72,17 +72,17 @@ async function performSearch(page = 1) { |
| 72 | 72 | // Define facets (简化配置) |
| 73 | 73 | const facets = [ |
| 74 | 74 | { |
| 75 | - "field": "category_keyword", | |
| 75 | + "field": "category.keyword", | |
| 76 | 76 | "size": 15, |
| 77 | 77 | "type": "terms" |
| 78 | 78 | }, |
| 79 | 79 | { |
| 80 | - "field": "vendor_keyword", | |
| 80 | + "field": "vendor.keyword", | |
| 81 | 81 | "size": 15, |
| 82 | 82 | "type": "terms" |
| 83 | 83 | }, |
| 84 | 84 | { |
| 85 | - "field": "tags_keyword", | |
| 85 | + "field": "tags.keyword", | |
| 86 | 86 | "size": 10, |
| 87 | 87 | "type": "terms" |
| 88 | 88 | }, |
| ... | ... | @@ -168,14 +168,14 @@ function displayResults(data) { |
| 168 | 168 | |
| 169 | 169 | let html = ''; |
| 170 | 170 | |
| 171 | - data.results.forEach((product) => { | |
| 172 | - const score = product.relevance_score; | |
| 171 | + data.results.forEach((spu) => { | |
| 172 | + const score = spu.relevance_score; | |
| 173 | 173 | html += ` |
| 174 | 174 | <div class="product-card"> |
| 175 | 175 | <div class="product-image-wrapper"> |
| 176 | - ${product.image_url ? ` | |
| 177 | - <img src="${escapeHtml(product.image_url)}" | |
| 178 | - alt="${escapeHtml(product.title)}" | |
| 176 | + ${spu.image_url ? ` | |
| 177 | + <img src="${escapeHtml(spu.image_url)}" | |
| 178 | + alt="${escapeHtml(spu.title)}" | |
| 179 | 179 | class="product-image" |
| 180 | 180 | onerror="this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22100%22 height=%22100%22%3E%3Crect fill=%22%23f0f0f0%22 width=%22100%22 height=%22100%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 font-size=%2214%22 text-anchor=%22middle%22 dy=%22.3em%22 fill=%22%23999%22%3ENo Image%3C/text%3E%3C/svg%3E'"> |
| 181 | 181 | ` : ` |
| ... | ... | @@ -184,21 +184,21 @@ function displayResults(data) { |
| 184 | 184 | </div> |
| 185 | 185 | |
| 186 | 186 | <div class="product-price"> |
| 187 | - ${product.price ? `$${product.price.toFixed(2)}` : 'N/A'}${product.compare_at_price && product.compare_at_price > product.price ? `<span style="text-decoration: line-through; color: #999; font-size: 0.9em; margin-left: 8px;">$${product.compare_at_price.toFixed(2)}</span>` : ''} | |
| 187 | + ${spu.price ? `$${spu.price.toFixed(2)}` : 'N/A'}${spu.compare_at_price && spu.compare_at_price > spu.price ? `<span style="text-decoration: line-through; color: #999; font-size: 0.9em; margin-left: 8px;">$${spu.compare_at_price.toFixed(2)}</span>` : ''} | |
| 188 | 188 | </div> |
| 189 | 189 | |
| 190 | 190 | <div class="product-stock"> |
| 191 | - ${product.in_stock ? '<span style="color: green;">In Stock</span>' : '<span style="color: red;">Out of Stock</span>'} | |
| 192 | - ${product.variants && product.variants.length > 0 ? `<span style="color: #666; font-size: 0.9em;">(${product.variants.length} variants)</span>` : ''} | |
| 191 | + ${spu.in_stock ? '<span style="color: green;">In Stock</span>' : '<span style="color: red;">Out of Stock</span>'} | |
| 192 | + ${spu.skus && spu.skus.length > 0 ? `<span style="color: #666; font-size: 0.9em;">(${spu.skus.length} skus)</span>` : ''} | |
| 193 | 193 | </div> |
| 194 | 194 | |
| 195 | 195 | <div class="product-title"> |
| 196 | - ${escapeHtml(product.title || 'N/A')} | |
| 196 | + ${escapeHtml(spu.title || 'N/A')} | |
| 197 | 197 | </div> |
| 198 | 198 | |
| 199 | - <div class="product-meta">${product.vendor ? escapeHtml(product.vendor) : ''}${product.product_type ? ' | ' + escapeHtml(product.product_type) : ''}${product.category ? ' | ' + escapeHtml(product.category) : ''} ${product.tags ? ` | |
| 199 | + <div class="product-meta">${spu.vendor ? escapeHtml(spu.vendor) : ''}${spu.category ? ' | ' + escapeHtml(spu.category) : ''} ${spu.tags ? ` | |
| 200 | 200 | <div class="product-tags"> |
| 201 | - Tags: ${escapeHtml(product.tags)} | |
| 201 | + Tags: ${escapeHtml(spu.tags)} | |
| 202 | 202 | </div> |
| 203 | 203 | ` : ''} |
| 204 | 204 | </div> |
| ... | ... | @@ -217,13 +217,13 @@ function displayFacets(facets) { |
| 217 | 217 | let containerId = null; |
| 218 | 218 | let maxDisplay = 10; |
| 219 | 219 | |
| 220 | - if (facet.field === 'category_keyword') { | |
| 220 | + if (facet.field === 'category.keyword') { | |
| 221 | 221 | containerId = 'categoryTags'; |
| 222 | 222 | maxDisplay = 10; |
| 223 | - } else if (facet.field === 'vendor_keyword') { | |
| 223 | + } else if (facet.field === 'vendor.keyword') { | |
| 224 | 224 | containerId = 'brandTags'; |
| 225 | 225 | maxDisplay = 10; |
| 226 | - } else if (facet.field === 'tags_keyword') { | |
| 226 | + } else if (facet.field === 'tags.keyword') { | |
| 227 | 227 | containerId = 'supplierTags'; |
| 228 | 228 | maxDisplay = 8; |
| 229 | 229 | } |
| ... | ... | @@ -349,7 +349,7 @@ function updateClearFiltersButton() { |
| 349 | 349 | |
| 350 | 350 | // Update product count |
| 351 | 351 | function updateProductCount(total) { |
| 352 | - document.getElementById('productCount').textContent = `${total.toLocaleString()} products found`; | |
| 352 | + document.getElementById('productCount').textContent = `${total.toLocaleString()} SPUs found`; | |
| 353 | 353 | } |
| 354 | 354 | |
| 355 | 355 | // Sort functions | ... | ... |
indexer/spu_transformer.py
| 1 | 1 | """ |
| 2 | 2 | SPU data transformer for Shoplazza products. |
| 3 | 3 | |
| 4 | -Transforms SPU and SKU data from MySQL into SPU-level ES documents with nested variants. | |
| 4 | +Transforms SPU and SKU data from MySQL into SPU-level ES documents with nested skus. | |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | 7 | import pandas as pd |
| ... | ... | @@ -165,8 +165,8 @@ class SPUTransformer: |
| 165 | 165 | # Tenant ID (required) |
| 166 | 166 | doc['tenant_id'] = str(self.tenant_id) |
| 167 | 167 | |
| 168 | - # Product ID | |
| 169 | - doc['product_id'] = str(spu_row['id']) | |
| 168 | + # SPU ID | |
| 169 | + doc['spu_id'] = str(spu_row['id']) | |
| 170 | 170 | |
| 171 | 171 | # Handle |
| 172 | 172 | if pd.notna(spu_row.get('handle')): |
| ... | ... | @@ -195,23 +195,14 @@ class SPUTransformer: |
| 195 | 195 | # Vendor |
| 196 | 196 | if pd.notna(spu_row.get('vendor')): |
| 197 | 197 | doc['vendor'] = str(spu_row['vendor']) |
| 198 | - doc['vendor_keyword'] = str(spu_row['vendor']) | |
| 199 | - | |
| 200 | - # Product type (from category or tags) | |
| 201 | - if pd.notna(spu_row.get('category')): | |
| 202 | - doc['product_type'] = str(spu_row['category']) | |
| 203 | - doc['product_type_keyword'] = str(spu_row['category']) | |
| 204 | 198 | |
| 205 | 199 | # Tags |
| 206 | 200 | if pd.notna(spu_row.get('tags')): |
| 207 | - tags_str = str(spu_row['tags']) | |
| 208 | - doc['tags'] = tags_str | |
| 209 | - doc['tags_keyword'] = tags_str | |
| 201 | + doc['tags'] = str(spu_row['tags']) | |
| 210 | 202 | |
| 211 | 203 | # Category |
| 212 | 204 | if pd.notna(spu_row.get('category')): |
| 213 | 205 | doc['category'] = str(spu_row['category']) |
| 214 | - doc['category_keyword'] = str(spu_row['category']) | |
| 215 | 206 | |
| 216 | 207 | # Image URL |
| 217 | 208 | if pd.notna(spu_row.get('image_src')): |
| ... | ... | @@ -220,27 +211,27 @@ class SPUTransformer: |
| 220 | 211 | image_src = f"//{image_src}" if image_src.startswith('//') else image_src |
| 221 | 212 | doc['image_url'] = image_src |
| 222 | 213 | |
| 223 | - # Process variants | |
| 224 | - variants = [] | |
| 214 | + # Process SKUs | |
| 215 | + skus_list = [] | |
| 225 | 216 | prices = [] |
| 226 | 217 | compare_prices = [] |
| 227 | 218 | |
| 228 | 219 | for _, sku_row in skus.iterrows(): |
| 229 | - variant = self._transform_sku_to_variant(sku_row) | |
| 230 | - if variant: | |
| 231 | - variants.append(variant) | |
| 232 | - if 'price' in variant and variant['price'] is not None: | |
| 220 | + sku_data = self._transform_sku_row(sku_row) | |
| 221 | + if sku_data: | |
| 222 | + skus_list.append(sku_data) | |
| 223 | + if 'price' in sku_data and sku_data['price'] is not None: | |
| 233 | 224 | try: |
| 234 | - prices.append(float(variant['price'])) | |
| 225 | + prices.append(float(sku_data['price'])) | |
| 235 | 226 | except (ValueError, TypeError): |
| 236 | 227 | pass |
| 237 | - if 'compare_at_price' in variant and variant['compare_at_price'] is not None: | |
| 228 | + if 'compare_at_price' in sku_data and sku_data['compare_at_price'] is not None: | |
| 238 | 229 | try: |
| 239 | - compare_prices.append(float(variant['compare_at_price'])) | |
| 230 | + compare_prices.append(float(sku_data['compare_at_price'])) | |
| 240 | 231 | except (ValueError, TypeError): |
| 241 | 232 | pass |
| 242 | 233 | |
| 243 | - doc['variants'] = variants | |
| 234 | + doc['skus'] = skus_list | |
| 244 | 235 | |
| 245 | 236 | # Calculate price ranges |
| 246 | 237 | if prices: |
| ... | ... | @@ -286,55 +277,55 @@ class SPUTransformer: |
| 286 | 277 | |
| 287 | 278 | return doc |
| 288 | 279 | |
| 289 | - def _transform_sku_to_variant(self, sku_row: pd.Series) -> Optional[Dict[str, Any]]: | |
| 280 | + def _transform_sku_row(self, sku_row: pd.Series) -> Optional[Dict[str, Any]]: | |
| 290 | 281 | """ |
| 291 | - Transform a SKU row into a variant object. | |
| 282 | + Transform a SKU row into a SKU object. | |
| 292 | 283 | |
| 293 | 284 | Args: |
| 294 | 285 | sku_row: SKU row from database |
| 295 | 286 | |
| 296 | 287 | Returns: |
| 297 | - Variant dictionary or None | |
| 288 | + SKU dictionary or None | |
| 298 | 289 | """ |
| 299 | - variant = {} | |
| 290 | + sku_data = {} | |
| 300 | 291 | |
| 301 | - # Variant ID | |
| 302 | - variant['variant_id'] = str(sku_row['id']) | |
| 292 | + # SKU ID | |
| 293 | + sku_data['sku_id'] = str(sku_row['id']) | |
| 303 | 294 | |
| 304 | 295 | # Title |
| 305 | 296 | if pd.notna(sku_row.get('title')): |
| 306 | - variant['title'] = str(sku_row['title']) | |
| 297 | + sku_data['title'] = str(sku_row['title']) | |
| 307 | 298 | |
| 308 | 299 | # Price |
| 309 | 300 | if pd.notna(sku_row.get('price')): |
| 310 | 301 | try: |
| 311 | - variant['price'] = float(sku_row['price']) | |
| 302 | + sku_data['price'] = float(sku_row['price']) | |
| 312 | 303 | except (ValueError, TypeError): |
| 313 | - variant['price'] = None | |
| 304 | + sku_data['price'] = None | |
| 314 | 305 | else: |
| 315 | - variant['price'] = None | |
| 306 | + sku_data['price'] = None | |
| 316 | 307 | |
| 317 | 308 | # Compare at price |
| 318 | 309 | if pd.notna(sku_row.get('compare_at_price')): |
| 319 | 310 | try: |
| 320 | - variant['compare_at_price'] = float(sku_row['compare_at_price']) | |
| 311 | + sku_data['compare_at_price'] = float(sku_row['compare_at_price']) | |
| 321 | 312 | except (ValueError, TypeError): |
| 322 | - variant['compare_at_price'] = None | |
| 313 | + sku_data['compare_at_price'] = None | |
| 323 | 314 | else: |
| 324 | - variant['compare_at_price'] = None | |
| 315 | + sku_data['compare_at_price'] = None | |
| 325 | 316 | |
| 326 | 317 | # SKU |
| 327 | 318 | if pd.notna(sku_row.get('sku')): |
| 328 | - variant['sku'] = str(sku_row['sku']) | |
| 319 | + sku_data['sku'] = str(sku_row['sku']) | |
| 329 | 320 | |
| 330 | 321 | # Stock |
| 331 | 322 | if pd.notna(sku_row.get('inventory_quantity')): |
| 332 | 323 | try: |
| 333 | - variant['stock'] = int(sku_row['inventory_quantity']) | |
| 324 | + sku_data['stock'] = int(sku_row['inventory_quantity']) | |
| 334 | 325 | except (ValueError, TypeError): |
| 335 | - variant['stock'] = 0 | |
| 326 | + sku_data['stock'] = 0 | |
| 336 | 327 | else: |
| 337 | - variant['stock'] = 0 | |
| 328 | + sku_data['stock'] = 0 | |
| 338 | 329 | |
| 339 | 330 | # Options (from option1, option2, option3) |
| 340 | 331 | options = {} |
| ... | ... | @@ -346,7 +337,7 @@ class SPUTransformer: |
| 346 | 337 | options['option3'] = str(sku_row['option3']) |
| 347 | 338 | |
| 348 | 339 | if options: |
| 349 | - variant['options'] = options | |
| 340 | + sku_data['options'] = options | |
| 350 | 341 | |
| 351 | - return variant | |
| 342 | + return sku_data | |
| 352 | 343 | ... | ... |
scripts/generate_test_data.py
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | """ |
| 3 | 3 | Generate test data for Shoplazza SPU and SKU tables. |
| 4 | 4 | |
| 5 | -Generates 100 SPU records with 1-5 SKU variants each. | |
| 5 | +Generates 100 SPU records with 1-5 SKUs each. | |
| 6 | 6 | """ |
| 7 | 7 | |
| 8 | 8 | import sys |
| ... | ... | @@ -145,15 +145,15 @@ def generate_sku_data(spus: list, start_sku_id: int = 1): |
| 145 | 145 | |
| 146 | 146 | for spu in spus: |
| 147 | 147 | spu_id = spu['id'] |
| 148 | - num_variants = random.randint(1, 5) | |
| 148 | + num_skus = random.randint(1, 5) | |
| 149 | 149 | |
| 150 | 150 | # Base price |
| 151 | 151 | base_price = random.uniform(50, 500) |
| 152 | 152 | |
| 153 | - for i in range(num_variants): | |
| 153 | + for i in range(num_skus): | |
| 154 | 154 | # Generate variant options |
| 155 | - color = random.choice(colors) if num_variants > 1 else None | |
| 156 | - size = random.choice(sizes) if num_variants > 2 else None | |
| 155 | + color = random.choice(colors) if num_skus > 1 else None | |
| 156 | + size = random.choice(sizes) if num_skus > 2 else None | |
| 157 | 157 | |
| 158 | 158 | # Generate title |
| 159 | 159 | title_parts = [] |
| ... | ... | @@ -400,7 +400,7 @@ def main(): |
| 400 | 400 | start_sku_id = 1 |
| 401 | 401 | print(f"Using start SPU ID: {start_spu_id}, start SKU ID: {start_sku_id}") |
| 402 | 402 | |
| 403 | - print(f"Generating {args.num_spus} SPUs with variants...") | |
| 403 | + print(f"Generating {args.num_spus} SPUs with skus...") | |
| 404 | 404 | |
| 405 | 405 | # Generate SPU data |
| 406 | 406 | spus = generate_spu_data(args.num_spus, args.tenant_id, start_spu_id) | ... | ... |
scripts/ingest_shoplazza.py
| ... | ... | @@ -141,7 +141,7 @@ def main(): |
| 141 | 141 | indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) |
| 142 | 142 | |
| 143 | 143 | try: |
| 144 | - results = indexer.index_documents(documents, id_field="product_id", show_progress=True) | |
| 144 | + results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) | |
| 145 | 145 | print(f"\nIngestion complete:") |
| 146 | 146 | print(f" Success: {results['success']}") |
| 147 | 147 | print(f" Failed: {results['failed']}") | ... | ... |
scripts/test_base.py
| ... | ... | @@ -85,7 +85,7 @@ def validate_response_format(data: dict): |
| 85 | 85 | # Validate first result structure |
| 86 | 86 | if results: |
| 87 | 87 | result = results[0] |
| 88 | - required_fields = ['product_id', 'title', 'variants', 'relevance_score'] | |
| 88 | + required_fields = ['spu_id', 'title', 'skus', 'relevance_score'] | |
| 89 | 89 | for field in required_fields: |
| 90 | 90 | if field not in result: |
| 91 | 91 | errors.append(f"Result missing required field: {field}") |
| ... | ... | @@ -96,17 +96,17 @@ def validate_response_format(data: dict): |
| 96 | 96 | if field in result: |
| 97 | 97 | errors.append(f"Result contains ES internal field: {field}") |
| 98 | 98 | |
| 99 | - # Validate variants | |
| 100 | - if 'variants' in result: | |
| 101 | - variants = result['variants'] | |
| 102 | - if not isinstance(variants, list): | |
| 103 | - errors.append("'variants' should be a list") | |
| 104 | - elif variants: | |
| 105 | - variant = variants[0] | |
| 106 | - variant_required = ['variant_id', 'price', 'sku', 'stock'] | |
| 107 | - for field in variant_required: | |
| 108 | - if field not in variant: | |
| 109 | - errors.append(f"Variant missing required field: {field}") | |
| 99 | + # Validate skus | |
| 100 | + if 'skus' in result: | |
| 101 | + skus = result['skus'] | |
| 102 | + if not isinstance(skus, list): | |
| 103 | + errors.append("'skus' should be a list") | |
| 104 | + elif skus: | |
| 105 | + sku = skus[0] | |
| 106 | + sku_required = ['sku_id', 'price', 'sku', 'stock'] | |
| 107 | + for field in sku_required: | |
| 108 | + if field not in sku: | |
| 109 | + errors.append(f"SKU missing required field: {field}") | |
| 110 | 110 | |
| 111 | 111 | # Check for suggestions and related_searches |
| 112 | 112 | if 'suggestions' not in data: |
| ... | ... | @@ -136,7 +136,7 @@ def test_facets(base_url: str, tenant_id: str): |
| 136 | 136 | payload = { |
| 137 | 137 | "query": "商品", |
| 138 | 138 | "size": 10, |
| 139 | - "facets": ["category_keyword", "vendor_keyword"] | |
| 139 | + "facets": ["category.keyword", "vendor.keyword"] | |
| 140 | 140 | } |
| 141 | 141 | |
| 142 | 142 | print(f"\nTesting facets:") |
| ... | ... | @@ -179,8 +179,8 @@ def test_tenant_isolation(base_url: str, tenant_id_1: str, tenant_id_2: str): |
| 179 | 179 | data2 = test_search_api(base_url, tenant_id_2, "商品") |
| 180 | 180 | |
| 181 | 181 | if data1 and data2: |
| 182 | - results1 = set(r.get('product_id') for r in data1.get('results', [])) | |
| 183 | - results2 = set(r.get('product_id') for r in data2.get('results', [])) | |
| 182 | + results1 = set(r.get('spu_id') for r in data1.get('results', [])) | |
| 183 | + results2 = set(r.get('spu_id') for r in data2.get('results', [])) | |
| 184 | 184 | |
| 185 | 185 | overlap = results1 & results2 |
| 186 | 186 | if overlap: | ... | ... |
search/searcher.py