From cadc77b6bfe6292708ce8c0db83554b133886510 Mon Sep 17 00:00:00 2001 From: tangwang Date: Fri, 14 Nov 2025 18:51:31 +0800 Subject: [PATCH] 索引字段名、变量名、API数据结构字段名都对齐spu/sku表 --- api/models.py | 30 +++++++++++++++--------------- api/result_formatter.py | 65 +++++++++++++++++++++++++++++++++-------------------------------- config/config.yaml | 51 +++++++++------------------------------------------ config/config_loader.py | 6 +++++- config/field_types.py | 14 ++++++++++++++ config/query_rewrite.dict | 6 +++--- docs/INDEX_FIELDS_DOCUMENTATION.md | 42 +++++++++++++++++++++--------------------- docs/Search-API-Examples.md | 72 ++++++++++++++++++++++++++++++++++++------------------------------------ docs/Usage-Guide.md | 6 +++--- docs/基础配置指南.md | 42 +++++++++++++++++++++--------------------- docs/搜索API对接指南.md | 88 +++++++++++++++++++++++++++++++++++++++++++--------------------------------------------- docs/搜索API速查表.md | 24 ++++++++++++------------ docs/系统设计文档.md | 42 +++++++++++++++++------------------------- docs/索引字段说明.md | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------- docs/设计文档.md | 716 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ frontend/README.md | 2 +- frontend/static/js/app.js | 14 +++++++------- frontend/static/js/app_base.js | 36 ++++++++++++++++++------------------ indexer/spu_transformer.py | 75 +++++++++++++++++++++++++++++++++------------------------------------------ scripts/generate_test_data.py | 12 ++++++------ scripts/ingest_shoplazza.py | 2 +- scripts/test_base.py | 30 +++++++++++++++--------------- search/searcher.py | 2 +- 23 files changed, 1128 insertions(+), 369 deletions(-) create mode 100644 docs/设计文档.md diff --git a/api/models.py b/api/models.py index 9f23626..e632eb8 100644 --- a/api/models.py +++ b/api/models.py @@ -43,7 +43,7 @@ class FacetConfig(BaseModel): json_schema_extra = { "examples": [ { - "field": "categoryName_keyword", + "field": "category.keyword", "size": 15, "type": "terms" }, @@ -77,8 +77,8 @@ class SearchRequest(BaseModel): json_schema_extra={ "examples": [ { - "categoryName_keyword": ["玩具", "益智玩具"], - "brandName_keyword": "乐高", + "category.keyword": ["玩具", "益智玩具"], + "vendor.keyword": "乐高", "in_stock": True } ] @@ -110,10 +110,10 @@ class SearchRequest(BaseModel): json_schema_extra={ "examples": [ # 简单模式:只指定字段名,使用默认配置 - ["categoryName_keyword", "brandName_keyword"], + ["category.keyword", "vendor.keyword"], # 高级模式:详细配置 [ - {"field": "categoryName_keyword", "size": 15}, + {"field": "category.keyword", "size": 15}, { "field": "price", "type": "range", @@ -172,10 +172,10 @@ class FacetResult(BaseModel): total_count: Optional[int] = Field(None, description="该字段的总文档数") -class VariantResult(BaseModel): - """商品变体结果""" - variant_id: str = Field(..., description="变体ID") - title: Optional[str] = Field(None, description="变体标题") +class SkuResult(BaseModel): + """SKU 结果""" + sku_id: str = Field(..., description="SKU ID") + title: Optional[str] = Field(None, description="SKU标题") price: Optional[float] = Field(None, description="价格") compare_at_price: Optional[float] = Field(None, description="原价") sku: Optional[str] = Field(None, description="SKU编码") @@ -183,21 +183,21 @@ class VariantResult(BaseModel): options: Optional[Dict[str, Any]] = Field(None, description="选项(颜色、尺寸等)") -class ProductResult(BaseModel): - """商品搜索结果""" - product_id: str = Field(..., description="商品ID") +class SpuResult(BaseModel): + """SPU 搜索结果""" + spu_id: str = Field(..., description="SPU ID") title: Optional[str] = Field(None, description="商品标题") handle: Optional[str] = Field(None, description="商品handle") description: Optional[str] = Field(None, description="商品描述") vendor: Optional[str] = Field(None, description="供应商/品牌") - product_type: Optional[str] = Field(None, description="商品类型") + category: Optional[str] = Field(None, description="类目") tags: Optional[str] = Field(None, description="标签") price: Optional[float] = Field(None, description="价格(min_price)") compare_at_price: Optional[float] = Field(None, description="原价") currency: str = Field("USD", description="货币单位") image_url: Optional[str] = Field(None, description="主图URL") in_stock: bool = Field(True, description="是否有库存") - variants: List[VariantResult] = Field(default_factory=list, description="变体列表") + skus: List[SkuResult] = Field(default_factory=list, description="SKU列表") relevance_score: float = Field(..., ge=0.0, description="相关性分数(ES原始分数)") @@ -205,7 +205,7 @@ class SearchResponse(BaseModel): """搜索响应模型(外部友好格式)""" # 核心结果 - results: List[ProductResult] = Field(..., description="搜索结果列表") + results: List[SpuResult] = Field(..., description="搜索结果列表") total: int = Field(..., description="匹配的总文档数") max_score: float = Field(..., description="最高相关性分数") diff --git a/api/result_formatter.py b/api/result_formatter.py index b2f6e70..10dff47 100644 --- a/api/result_formatter.py +++ b/api/result_formatter.py @@ -3,7 +3,7 @@ Result formatter for converting ES internal format to external-friendly format. """ from typing import List, Dict, Any, Optional -from .models import ProductResult, VariantResult, FacetResult, FacetValue +from .models import SpuResult, SkuResult, FacetResult, FacetValue class ResultFormatter: @@ -13,16 +13,16 @@ class ResultFormatter: def format_search_results( es_hits: List[Dict[str, Any]], max_score: float = 1.0 - ) -> List[ProductResult]: + ) -> List[SpuResult]: """ - Convert ES hits to ProductResult list. + Convert ES hits to SpuResult list. Args: es_hits: List of ES hit dictionaries (with _id, _score, _source) max_score: Maximum score (unused, kept for compatibility) Returns: - List of ProductResult objects + List of SpuResult objects """ results = [] @@ -40,44 +40,44 @@ class ResultFormatter: except (ValueError, TypeError): relevance_score = 0.0 - # Extract variants - variants = [] - variants_data = source.get('variants', []) - if isinstance(variants_data, list): - for variant_data in variants_data: - variant = VariantResult( - variant_id=str(variant_data.get('variant_id', '')), - title=variant_data.get('title'), - price=variant_data.get('price'), - compare_at_price=variant_data.get('compare_at_price'), - sku=variant_data.get('sku'), - stock=variant_data.get('stock', 0), - options=variant_data.get('options') + # Extract SKUs + skus = [] + skus_data = source.get('skus', []) + if isinstance(skus_data, list): + for sku_entry in skus_data: + sku = SkuResult( + sku_id=str(sku_entry.get('sku_id', '')), + title=sku_entry.get('title'), + price=sku_entry.get('price'), + compare_at_price=sku_entry.get('compare_at_price'), + sku=sku_entry.get('sku'), + stock=sku_entry.get('stock', 0), + options=sku_entry.get('options') ) - variants.append(variant) + skus.append(sku) - # Determine in_stock (any variant has stock > 0) - in_stock = any(v.stock > 0 for v in variants) if variants else True + # Determine in_stock (any sku has stock > 0) + in_stock = any(sku.stock > 0 for sku in skus) if skus else True - # Build ProductResult - product = ProductResult( - product_id=str(source.get('product_id', '')), + # Build SpuResult + spu = SpuResult( + spu_id=str(source.get('spu_id', '')), title=source.get('title'), handle=source.get('handle'), description=source.get('description'), vendor=source.get('vendor'), - product_type=source.get('product_type'), + category=source.get('category'), tags=source.get('tags'), price=source.get('min_price'), compare_at_price=source.get('compare_at_price'), currency="USD", # Default currency image_url=source.get('image_url'), in_stock=in_stock, - variants=variants, + skus=skus, relevance_score=relevance_score ) - results.append(product) + results.append(spu) return results @@ -99,6 +99,7 @@ class ResultFormatter: facets = [] for field_name, agg_data in es_aggregations.items(): + display_field = field_name[:-6] if field_name.endswith("_facet") else field_name # Handle terms aggregation if 'buckets' in agg_data: values = [] @@ -112,8 +113,8 @@ class ResultFormatter: values.append(value) facet = FacetResult( - field=field_name, - label=field_name, # Can be enhanced with field labels + field=display_field, + label=display_field, # Can be enhanced with field labels type="terms", values=values, total_count=agg_data.get('sum_other_doc_count', 0) + len(values) @@ -134,8 +135,8 @@ class ResultFormatter: values.append(value) facet = FacetResult( - field=field_name, - label=field_name, + field=display_field, + label=display_field, type="range", values=values ) @@ -146,7 +147,7 @@ class ResultFormatter: @staticmethod def generate_suggestions( query: str, - results: List[ProductResult] + results: List[SpuResult] ) -> List[str]: """ Generate search suggestions. @@ -164,7 +165,7 @@ class ResultFormatter: @staticmethod def generate_related_searches( query: str, - results: List[ProductResult] + results: List[SpuResult] ) -> List[str]: """ Generate related searches. diff --git a/config/config.yaml b/config/config.yaml index 5c3de46..6065e8d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,7 +22,7 @@ fields: return_in_source: true # 商品标识字段 - - name: "product_id" + - name: "spu_id" type: "KEYWORD" required: true index: true @@ -87,61 +87,29 @@ fields: # 分类和标签字段(TEXT + KEYWORD双重索引) - name: "vendor" - type: "TEXT" + type: "HKText" analyzer: "chinese_ecommerce" boost: 1.5 index: true store: true return_in_source: true - - name: "vendor_keyword" - type: "KEYWORD" - index: true - store: false - return_in_source: false # keyword字段通常只用于过滤,不需要返回 - - - name: "product_type" - type: "TEXT" - analyzer: "chinese_ecommerce" - boost: 1.5 - index: true - store: true - return_in_source: true - - - name: "product_type_keyword" - type: "KEYWORD" - index: true - store: false - return_in_source: false - - name: "tags" - type: "TEXT" + type: "HKText" analyzer: "chinese_ecommerce" boost: 1.0 index: true store: true return_in_source: true - - name: "tags_keyword" - type: "KEYWORD" - index: true - store: false - return_in_source: false - - name: "category" - type: "TEXT" + type: "HKText" analyzer: "chinese_ecommerce" boost: 1.5 index: true store: true return_in_source: true - - name: "category_keyword" - type: "KEYWORD" - index: true - store: false - return_in_source: false - # 价格字段(扁平化) - name: "min_price" type: "FLOAT" @@ -202,13 +170,13 @@ fields: store: true return_in_source: false # 通常不需要返回 - # 嵌套variants字段 - - name: "variants" + # 嵌套skus字段 + - name: "skus" type: "JSON" nested: true return_in_source: true nested_properties: - variant_id: + sku_id: type: "keyword" index: true store: true @@ -249,7 +217,6 @@ indexes: - "seo_description" - "seo_keywords" - "vendor" - - "product_type" - "tags" - "category" analyzer: "chinese_ecommerce" @@ -323,9 +290,9 @@ rerank: expression: "" description: "Local reranking (disabled, use ES function_score instead)" -# SPU配置(已启用,使用嵌套variants) +# SPU配置(已启用,使用嵌套skus) spu_config: enabled: true - spu_field: "product_id" + spu_field: "spu_id" inner_hits_size: 10 diff --git a/config/config_loader.py b/config/config_loader.py index 3cfee9e..f6c9892 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -277,11 +277,13 @@ class ConfigLoader: """Parse field configuration from dictionary.""" name = field_data["name"] field_type_str = field_data["type"] + field_type_raw = field_type_str # Map field type string to enum if field_type_str not in FIELD_TYPE_MAP: raise ConfigurationError(f"Unknown field type: {field_type_str}") field_type = FIELD_TYPE_MAP[field_type_str] + is_hktext = field_type_str.lower() == "hktext" # Map analyzer string to enum (if provided) analyzer = None @@ -309,7 +311,9 @@ class ConfigLoader: embedding_dims=field_data.get("embedding_dims", 1024), embedding_similarity=field_data.get("embedding_similarity", "dot_product"), nested=field_data.get("nested", False), - nested_properties=field_data.get("nested_properties") + nested_properties=field_data.get("nested_properties"), + keyword_subfield=field_data.get("keyword_subfield", is_hktext), + keyword_ignore_above=field_data.get("keyword_ignore_above", 256) ) def _parse_index_config(self, index_data: Dict[str, Any]) -> IndexConfig: diff --git a/config/field_types.py b/config/field_types.py index 6d84141..c1d3703 100644 --- a/config/field_types.py +++ b/config/field_types.py @@ -72,6 +72,10 @@ class FieldConfig: nested: bool = False nested_properties: Optional[Dict[str, Any]] = None + # Hybrid Keyword Text (HKText) support + keyword_subfield: bool = False + keyword_ignore_above: int = 256 + def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: """ @@ -102,6 +106,13 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: if field_config.search_analyzer: mapping["search_analyzer"] = field_config.search_analyzer.value + if field_config.keyword_subfield: + mapping.setdefault("fields", {}) + mapping["fields"]["keyword"] = { + "type": "keyword", + "ignore_above": field_config.keyword_ignore_above + } + elif field_config.field_type == FieldType.KEYWORD: mapping = { "type": "keyword", @@ -256,6 +267,9 @@ def get_default_similarity() -> Dict[str, Any]: FIELD_TYPE_MAP = { "text": FieldType.TEXT, "TEXT": FieldType.TEXT, + "HKText": FieldType.TEXT, + "hktext": FieldType.TEXT, + "HKTEXT": FieldType.TEXT, "keyword": FieldType.KEYWORD, "KEYWORD": FieldType.KEYWORD, "LITERAL": FieldType.KEYWORD, diff --git a/config/query_rewrite.dict b/config/query_rewrite.dict index 2bc9202..7fb87fe 100644 --- a/config/query_rewrite.dict +++ b/config/query_rewrite.dict @@ -1,4 +1,4 @@ -芭比 vendor_keyword:芭比 OR title:芭比娃娃 -玩具 category_keyword:玩具 OR title:玩具 -消防 category_keyword:消防 OR title:消防 +芭比 vendor.keyword:芭比 OR title:芭比娃娃 +玩具 category.keyword:玩具 OR title:玩具 +消防 category.keyword:消防 OR title:消防 diff --git a/docs/INDEX_FIELDS_DOCUMENTATION.md b/docs/INDEX_FIELDS_DOCUMENTATION.md index feaaf76..0fdd156 100644 --- a/docs/INDEX_FIELDS_DOCUMENTATION.md +++ b/docs/INDEX_FIELDS_DOCUMENTATION.md @@ -6,7 +6,7 @@ - **索引名称**: `search_products` - **索引级别**: SPU级别(商品级别) -- **数据结构**: SPU文档包含嵌套的variants(SKU)数组 +- **数据结构**: SPU文档包含嵌套的skus(SKU)数组 ## 字段说明表 @@ -15,7 +15,7 @@ | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| | tenant_id | keyword | 是 | 精确匹配 | SPU表 | tenant_id | BIGINT | 租户ID,用于多租户隔离 | -| product_id | keyword | 是 | 精确匹配 | SPU表 | id | BIGINT | 商品ID(SPU ID) | +| spu_id | keyword | 是 | 精确匹配 | SPU表 | id | BIGINT | 商品ID(SPU ID) | | handle | keyword | 是 | 精确匹配 | SPU表 | handle | VARCHAR(255) | 商品URL handle | ### 文本搜索字段 @@ -39,13 +39,13 @@ | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | Boost权重 | 是否返回 | 说明 | |-----------|-----------|---------|---------|-----------|-----------|-------------|-----------|---------|------| | vendor | TEXT | 是 | english | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | 供应商/品牌(文本搜索) | -| vendor_keyword | keyword | 是 | 精确匹配 | SPU表 | vendor | VARCHAR(255) | - | 否 | 供应商/品牌(精确匹配,用于过滤) | +| vendor.keyword | keyword | 是 | 精确匹配 | SPU表 | vendor | VARCHAR(255) | - | 否 | 供应商/品牌(精确匹配,用于过滤) | | product_type | TEXT | 是 | english | SPU表 | category | VARCHAR(255) | 1.5 | 是 | 商品类型(文本搜索) | | product_type_keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 商品类型(精确匹配,用于过滤) | | tags | TEXT | 是 | english | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | 标签(文本搜索) | -| tags_keyword | keyword | 是 | 精确匹配 | SPU表 | tags | VARCHAR(1024) | - | 否 | 标签(精确匹配,用于过滤) | +| tags.keyword | keyword | 是 | 精确匹配 | SPU表 | tags | VARCHAR(1024) | - | 否 | 标签(精确匹配,用于过滤) | | category | TEXT | 是 | english | SPU表 | category | VARCHAR(255) | 1.5 | 是 | 类目(文本搜索) | -| category_keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 类目(精确匹配,用于过滤) | +| category.keyword | keyword | 是 | 精确匹配 | SPU表 | category | VARCHAR(255) | - | 否 | 类目(精确匹配,用于过滤) | ### 价格字段 @@ -86,26 +86,26 @@ | shoplazza_created_at | DATE | 是 | 日期范围 | SPU表 | shoplazza_created_at | DATETIME | 否 | 店匠系统创建时间 | | shoplazza_updated_at | DATE | 是 | 日期范围 | SPU表 | shoplazza_updated_at | DATETIME | 否 | 店匠系统更新时间 | -### 嵌套Variants字段(SKU级别) +### 嵌套Skus字段(SKU级别) | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| -| variants | JSON (nested) | 是 | 嵌套对象 | SKU表 | - | - | 商品变体数组(嵌套结构) | +| skus | JSON (nested) | 是 | 嵌套对象 | SKU表 | - | - | 商品变体数组(嵌套结构) | -#### Variants子字段 +#### Skus子字段 | 索引字段名 | ES字段类型 | 是否索引 | 索引方式 | 数据来源表 | 表中字段名 | 表中字段类型 | 说明 | |-----------|-----------|---------|---------|-----------|-----------|-------------|------| -| variants.variant_id | keyword | 是 | 精确匹配 | SKU表 | id | BIGINT | 变体ID(SKU ID) | -| variants.title | text | 是 | english | SKU表 | title | VARCHAR(500) | 变体标题 | -| variants.price | float | 是 | float | SKU表 | price | DECIMAL(10,2) | 变体价格 | -| variants.compare_at_price | float | 是 | float | SKU表 | compare_at_price | DECIMAL(10,2) | 变体原价 | -| variants.sku | keyword | 是 | 精确匹配 | SKU表 | sku | VARCHAR(100) | SKU编码 | -| variants.stock | long | 是 | float | SKU表 | inventory_quantity | INT(11) | 库存数量 | -| variants.options | object | 是 | 对象 | SKU表 | option1/option2/option3 | VARCHAR(255) | 选项(颜色、尺寸等) | - -**Variants结构说明**: -- `variants` 是一个嵌套对象数组,每个元素代表一个SKU +| skus.sku_id | keyword | 是 | 精确匹配 | SKU表 | id | BIGINT | 变体ID(SKU ID) | +| skus.title | text | 是 | english | SKU表 | title | VARCHAR(500) | 变体标题 | +| skus.price | float | 是 | float | SKU表 | price | DECIMAL(10,2) | 变体价格 | +| skus.compare_at_price | float | 是 | float | SKU表 | compare_at_price | DECIMAL(10,2) | 变体原价 | +| skus.sku | keyword | 是 | 精确匹配 | SKU表 | sku | VARCHAR(100) | SKU编码 | +| skus.stock | long | 是 | float | SKU表 | inventory_quantity | INT(11) | 库存数量 | +| skus.options | object | 是 | 对象 | SKU表 | option1/option2/option3 | VARCHAR(255) | 选项(颜色、尺寸等) | + +**Skus结构说明**: +- `skus` 是一个嵌套对象数组,每个元素代表一个SKU - 使用ES的nested类型,支持对嵌套字段进行独立查询和过滤 - `options` 对象包含 `option1`、`option2`、`option3` 三个字段,分别对应SKU表中的选项值 @@ -165,7 +165,7 @@ ### 数据类型转换 -1. **BIGINT → keyword**: 数字ID转换为字符串(如 `product_id`, `variant_id`) +1. **BIGINT → keyword**: 数字ID转换为字符串(如 `spu_id`, `sku_id`) 2. **DECIMAL → FLOAT**: 价格字段从DECIMAL转换为FLOAT 3. **INT → LONG**: 库存数量从INT转换为LONG 4. **DATETIME → DATE**: 时间字段转换为ISO格式字符串 @@ -179,7 +179,7 @@ ## 注意事项 1. **多租户隔离**: 所有查询必须包含 `tenant_id` 过滤条件 -2. **嵌套查询**: 查询variants字段时需要使用nested查询语法 +2. **嵌套查询**: 查询skus字段时需要使用nested查询语法 3. **字段命名**: 用于过滤的字段应使用 `*_keyword` 后缀的字段 4. **向量搜索**: title_embedding字段用于语义搜索,需要配合文本查询使用 5. **Boost权重**: 不同字段的boost权重影响搜索结果的相关性排序 @@ -210,7 +210,7 @@ ### SKU表(shoplazza_product_sku) 主要字段: -- `id`: BIGINT - 主键ID(对应variant_id) +- `id`: BIGINT - 主键ID(对应sku_id) - `spu_id`: BIGINT - SPU ID(关联字段) - `title`: VARCHAR(500) - 变体标题 - `price`: DECIMAL(10,2) - 价格 diff --git a/docs/Search-API-Examples.md b/docs/Search-API-Examples.md index c262fa2..35f7e7f 100644 --- a/docs/Search-API-Examples.md +++ b/docs/Search-API-Examples.md @@ -90,7 +90,7 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "玩具", "filters": { - "categoryName_keyword": "玩具" + "category.keyword": "玩具" } }' ``` @@ -103,7 +103,7 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "娃娃", "filters": { - "categoryName_keyword": ["玩具", "益智玩具", "儿童玩具"] + "category.keyword": ["玩具", "益智玩具", "儿童玩具"] } }' ``` @@ -118,8 +118,8 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "娃娃", "filters": { - "categoryName_keyword": "玩具", - "brandName_keyword": "美泰" + "category.keyword": "玩具", + "vendor.keyword": "美泰" } }' ``` @@ -209,8 +209,8 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "玩具", "filters": { - "categoryName_keyword": ["玩具", "益智玩具"], - "brandName_keyword": "乐高" + "category.keyword": ["玩具", "益智玩具"], + "vendor.keyword": "乐高" }, "range_filters": { "price": { @@ -237,7 +237,7 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "玩具", "size": 20, - "facets": ["categoryName_keyword", "brandName_keyword"] + "facets": ["category.keyword", "vendor.keyword"] }' ``` @@ -248,8 +248,8 @@ curl -X POST "http://localhost:6002/search/" \ "total": 118, "facets": [ { - "field": "categoryName_keyword", - "label": "categoryName_keyword", + "field": "category.keyword", + "label": "category.keyword", "type": "terms", "values": [ {"value": "玩具", "count": 85, "selected": false}, @@ -257,8 +257,8 @@ curl -X POST "http://localhost:6002/search/" \ ] }, { - "field": "brandName_keyword", - "label": "brandName_keyword", + "field": "vendor.keyword", + "label": "vendor.keyword", "type": "terms", "values": [ {"value": "乐高", "count": 42, "selected": false}, @@ -280,12 +280,12 @@ curl -X POST "http://localhost:6002/search/" \ "query": "玩具", "facets": [ { - "field": "categoryName_keyword", + "field": "category.keyword", "size": 20, "type": "terms" }, { - "field": "brandName_keyword", + "field": "vendor.keyword", "size": 30, "type": "terms" } @@ -342,8 +342,8 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "玩具", "facets": [ - {"field": "categoryName_keyword", "size": 15}, - {"field": "brandName_keyword", "size": 15}, + {"field": "category.keyword", "size": 15}, + {"field": "vendor.keyword", "size": 15}, { "field": "price", "type": "range", @@ -395,7 +395,7 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "玩具", "filters": { - "categoryName_keyword": "益智玩具" + "category.keyword": "益智玩具" }, "sort_by": "min_price", "sort_order": "asc" @@ -426,7 +426,7 @@ curl -X POST "http://localhost:6002/search/image" \ "image_url": "https://example.com/barbie.jpg", "size": 20, "filters": { - "categoryName_keyword": "玩具" + "category.keyword": "玩具" }, "range_filters": { "price": { @@ -560,14 +560,14 @@ result = search_products( query="玩具", size=20, filters={ - "categoryName_keyword": ["玩具", "益智玩具"] + "category.keyword": ["玩具", "益智玩具"] }, range_filters={ "price": {"gte": 50, "lte": 200} }, facets=[ - {"field": "brandName_keyword", "size": 15}, - {"field": "categoryName_keyword", "size": 15}, + {"field": "vendor.keyword", "size": 15}, + {"field": "category.keyword", "size": 15}, { "field": "price", "type": "range", @@ -697,14 +697,14 @@ const result2 = await client.search({ query: "玩具", size: 20, filters: { - categoryName_keyword: ["玩具", "益智玩具"] + category.keyword: ["玩具", "益智玩具"] }, rangeFilters: { price: { gte: 50, lte: 200 } }, facets: [ - { field: "brandName_keyword", size: 15 }, - { field: "categoryName_keyword", size: 15 } + { field: "vendor.keyword", size: 15 }, + { field: "category.keyword", size: 15 } ], sortBy: "price", sortOrder: "asc" @@ -755,8 +755,8 @@ const SearchComponent = { filters: this.filters, range_filters: this.rangeFilters, facets: [ - { field: 'categoryName_keyword', size: 15 }, - { field: 'brandName_keyword', size: 15 } + { field: 'category.keyword', size: 15 }, + { field: 'vendor.keyword', size: 15 } ] }) }); @@ -868,10 +868,10 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "*", "filters": { - "categoryName_keyword": "玩具" + "category.keyword": "玩具" }, "facets": [ - {"field": "brandName_keyword", "size": 20}, + {"field": "vendor.keyword", "size": 20}, { "field": "price", "type": "range", @@ -898,8 +898,8 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "芭比娃娃", "facets": [ - {"field": "categoryName_keyword", "size": 10}, - {"field": "brandName_keyword", "size": 10}, + {"field": "category.keyword", "size": 10}, + {"field": "vendor.keyword", "size": 10}, {"field": "price", "type": "range", "ranges": [ {"key": "0-50", "to": 50}, {"key": "50-100", "from": 50, "to": 100}, @@ -924,7 +924,7 @@ curl -X POST "http://localhost:6002/search/" \ "lte": 100 } }, - "facets": ["categoryName_keyword", "brandName_keyword"], + "facets": ["category.keyword", "vendor.keyword"], "sort_by": "min_price", "sort_order": "asc", "size": 50 @@ -1017,8 +1017,8 @@ curl -X POST "http://localhost:6002/search/" \ # ✅ 推荐:只请求必要的分面 { "facets": [ - {"field": "categoryName_keyword", "size": 15}, - {"field": "brandName_keyword", "size": 15} + {"field": "category.keyword", "size": 15}, + {"field": "vendor.keyword", "size": 15} ] } ``` @@ -1045,7 +1045,7 @@ curl -X POST "http://localhost:6002/search/" \ { "query": "玩具", "filters": { - "categoryName_keyword": "玩具" + "category.keyword": "玩具" } } ``` @@ -1064,7 +1064,7 @@ curl -X POST "http://localhost:6002/search/" \ "query": "*", "size": 0, "facets": [ - {"field": "categoryName_keyword", "size": 100} + {"field": "category.keyword", "size": 100} ] }' ``` @@ -1102,14 +1102,14 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "query": "(玩具 OR 游戏) AND 儿童 ANDNOT 电子", "filters": { - "categoryName_keyword": ["玩具", "益智玩具"] + "category.keyword": ["玩具", "益智玩具"] }, "range_filters": { "price": {"gte": 20, "lte": 100}, "days_since_last_update": {"lte": 30} }, "facets": [ - {"field": "brandName_keyword", "size": 20} + {"field": "vendor.keyword", "size": 20} ], "sort_by": "min_price", "sort_order": "asc", diff --git a/docs/Usage-Guide.md b/docs/Usage-Guide.md index 8305050..b3cbb53 100644 --- a/docs/Usage-Guide.md +++ b/docs/Usage-Guide.md @@ -299,7 +299,7 @@ curl -X POST http://localhost:6002/search/ \ "query": "玩具", "size": 10, "filters": { - "categoryName_keyword": ["玩具", "益智玩具"] + "category.keyword": ["玩具", "益智玩具"] }, "range_filters": { "price": {"gte": 50, "lte": 200} @@ -317,8 +317,8 @@ curl -X POST http://localhost:6002/search/ \ "query": "玩具", "size": 10, "facets": [ - {"field": "categoryName_keyword", "size": 15}, - {"field": "brandName_keyword", "size": 15} + {"field": "category.keyword", "size": 15}, + {"field": "vendor.keyword", "size": 15} ] }' ``` diff --git a/docs/基础配置指南.md b/docs/基础配置指南.md index 013ec98..2feed7d 100644 --- a/docs/基础配置指南.md +++ b/docs/基础配置指南.md @@ -8,7 +8,7 @@ Base配置是店匠(Shoplazza)通用配置,适用于所有使用店匠标 ## 核心特性 -- **SPU级别索引**:每个ES文档代表一个SPU,包含嵌套的variants数组 +- **SPU级别索引**:每个ES文档代表一个SPU,包含嵌套的skus数组 - **统一索引**:所有客户共享`search_products`索引 - **租户隔离**:通过`tenant_id`字段实现数据隔离 - **配置简化**:配置只包含ES搜索相关配置,不包含MySQL数据源配置 @@ -41,12 +41,12 @@ Base配置**只包含**: ### 主要字段 -- `product_id` - 商品ID +- `spu_id` - SPU ID - `title`, `brief`, `description` - 文本搜索字段 - `seo_title`, `seo_description`, `seo_keywords` - SEO字段 -- `vendor`, `product_type`, `tags`, `category` - 分类和标签字段 +- `vendor`, `tags`, `category` - 分类和标签字段(HKText,支持 `.keyword` 精确匹配) - `min_price`, `max_price`, `compare_at_price` - 价格字段 -- `variants` (nested) - 嵌套变体数组 +- `skus` (nested) - 嵌套SKU数组 ## 数据导入流程 @@ -109,9 +109,9 @@ Content-Type: application/json "size": 10, "from": 0, "filters": { - "category_keyword": "电子产品" + "category.keyword": "电子产品" }, - "facets": ["category_keyword", "vendor_keyword"] + "facets": ["category.keyword", "vendor.keyword"] } ``` @@ -120,20 +120,20 @@ Content-Type: application/json { "results": [ { - "product_id": "1", + "spu_id": "1", "title": "蓝牙耳机 Sony", "handle": "product-1", "description": "高品质无线蓝牙耳机", "vendor": "Sony", - "product_type": "电子产品", + "category": "电子产品", "price": 199.99, "compare_at_price": 299.99, "currency": "USD", "image_url": "//cdn.example.com/products/1.jpg", "in_stock": true, - "variants": [ + "skus": [ { - "variant_id": "1", + "sku_id": "1", "title": "黑色", "price": 199.99, "compare_at_price": 299.99, @@ -151,8 +151,8 @@ Content-Type: application/json "max_score": 1.0, "facets": [ { - "field": "category_keyword", - "label": "category_keyword", + "field": "category.keyword", + "label": "category.keyword", "type": "terms", "values": [ { @@ -176,31 +176,31 @@ Content-Type: application/json #### 主要变化 1. **`results`替代`hits`**:返回字段从`hits`改为`results` -2. **结构化结果**:每个结果包含`product_id`, `title`, `variants`, `relevance_score`等字段 +2. **结构化结果**:每个结果包含`spu_id`, `title`, `skus`, `relevance_score`等字段 3. **无ES内部字段**:不包含`_id`, `_score`, `_source`等ES内部字段 -4. **嵌套variants**:每个商品包含variants数组,每个variant包含完整的变体信息 +4. **嵌套skus**:每个商品包含skus数组,每个sku包含完整的变体信息 5. **相关性分数**:`relevance_score`是ES原始分数(不进行归一化) -#### ProductResult字段 +#### SpuResult字段 -- `product_id` - 商品ID +- `spu_id` - SPU ID - `title` - 商品标题 - `handle` - 商品handle - `description` - 商品描述 - `vendor` - 供应商/品牌 -- `product_type` - 商品类型 +- `category` - 类目 - `tags` - 标签 - `price` - 最低价格(min_price) - `compare_at_price` - 原价 - `currency` - 货币单位(默认USD) - `image_url` - 主图URL - `in_stock` - 是否有库存 -- `variants` - 变体列表 +- `skus` - SKU列表 - `relevance_score` - 相关性分数(ES原始分数) -#### VariantResult字段 +#### SkuResult字段 -- `variant_id` - 变体ID +- `sku_id` - SKU ID - `title` - 变体标题 - `price` - 价格 - `compare_at_price` - 原价 @@ -242,7 +242,7 @@ A: 使用`test_base.py`脚本,指定两个不同的`--tenant-id`,检查搜 ### Q: API返回格式中为什么没有`_id`和`_score`? -A: 为了提供外部友好的API格式,我们移除了ES内部字段,使用`product_id`和`relevance_score`替代。 +A: 为了提供外部友好的API格式,我们移除了ES内部字段,使用`spu_id`和`relevance_score`替代。 ### Q: 如何添加新的搜索字段? diff --git a/docs/搜索API对接指南.md b/docs/搜索API对接指南.md index 12144f6..be6be65 100644 --- a/docs/搜索API对接指南.md +++ b/docs/搜索API对接指南.md @@ -43,7 +43,7 @@ curl -X POST "http://localhost:6002/search/" \ "size": 5, "from": 10, "filters": { - "vendor_keyword": ["乐高", "孩之宝"] + "vendor.keyword": ["乐高", "孩之宝"] }, "sort_by": "min_price", "sort_order": "asc" @@ -58,7 +58,7 @@ curl -X POST "http://localhost:6002/search/" \ -d '{ "tenant_id": "demo-tenant", "query": "芭比娃娃", - "facets": ["category_keyword", "vendor_keyword"], + "facets": ["category.keyword", "vendor.keyword"], "min_score": 0.2, "debug": true }' @@ -134,9 +134,9 @@ curl -X POST "http://localhost:6002/search/" \ ```json { "filters": { - "category_keyword": "玩具", // 单值:精确匹配 - "vendor_keyword": ["乐高", "孩之宝"], // 数组:匹配任意值(OR) - "product_type_keyword": "益智玩具" // 单值:精确匹配 + "category.keyword": "玩具", // 单值:精确匹配 + "vendor.keyword": ["乐高", "孩之宝"], // 数组:匹配任意值(OR) + "tags.keyword": "益智玩具" // 单值:精确匹配 } } ``` @@ -148,10 +148,9 @@ curl -X POST "http://localhost:6002/search/" \ - 数组:匹配任意值(OR 逻辑) **常用过滤字段**: -- `category_keyword`: 类目 -- `vendor_keyword`: 品牌/供应商 -- `product_type_keyword`: 商品类型 -- `tags_keyword`: 标签 +- `category.keyword`: 类目 +- `vendor.keyword`: 品牌/供应商 +- `tags.keyword`: 标签 #### 2. 范围过滤器 (range_filters) @@ -197,7 +196,7 @@ curl -X POST "http://localhost:6002/search/" \ **简单模式**(字符串数组): ```json { - "facets": ["category_keyword", "vendor_keyword"] + "facets": ["category.keyword", "vendor.keyword"] } ``` @@ -206,7 +205,7 @@ curl -X POST "http://localhost:6002/search/" \ { "facets": [ { - "field": "category_keyword", + "field": "category.keyword", "size": 15, "type": "terms" }, @@ -270,21 +269,21 @@ curl -X POST "http://localhost:6002/search/" \ { "results": [ { - "product_id": "12345", + "spu_id": "12345", "title": "芭比时尚娃娃", "handle": "barbie-doll", "description": "高品质芭比娃娃", "vendor": "美泰", - "product_type": "玩具", + "category": "玩具", "tags": "娃娃, 玩具, 女孩", "price": 89.99, "compare_at_price": 129.99, "currency": "USD", "image_url": "https://example.com/image.jpg", "in_stock": true, - "variants": [ + "skus": [ { - "variant_id": "67890", + "sku_id": "67890", "title": "粉色款", "price": 89.99, "compare_at_price": 129.99, @@ -303,8 +302,8 @@ curl -X POST "http://localhost:6002/search/" \ "max_score": 8.5, "facets": [ { - "field": "category_keyword", - "label": "category_keyword", + "field": "category.keyword", + "label": "category.keyword", "type": "terms", "values": [ { @@ -335,11 +334,11 @@ curl -X POST "http://localhost:6002/search/" \ | 字段 | 类型 | 说明 | |------|------|------| -| `results` | array | 搜索结果列表(ProductResult对象数组) | -| `results[].product_id` | string | 商品ID | +| `results` | array | 搜索结果列表(SpuResult对象数组) | +| `results[].spu_id` | string | SPU ID | | `results[].title` | string | 商品标题 | | `results[].price` | float | 价格(min_price) | -| `results[].variants` | array | 变体列表(SKU列表) | +| `results[].skus` | array | SKU列表 | | `results[].relevance_score` | float | 相关性分数 | | `total` | integer | 匹配的总文档数 | | `max_score` | float | 最高相关性分数 | @@ -347,31 +346,31 @@ curl -X POST "http://localhost:6002/search/" \ | `query_info` | object | 查询处理信息 | | `took_ms` | integer | 搜索耗时(毫秒) | -### ProductResult字段说明 +### SpuResult字段说明 | 字段 | 类型 | 说明 | |------|------|------| -| `product_id` | string | 商品ID(SPU ID) | +| `spu_id` | string | SPU ID | | `title` | string | 商品标题 | | `handle` | string | 商品URL handle | | `description` | string | 商品描述 | | `vendor` | string | 供应商/品牌 | -| `product_type` | string | 商品类型 | +| `category` | string | 类目 | | `tags` | string | 标签 | | `price` | float | 价格(min_price) | | `compare_at_price` | float | 原价 | | `currency` | string | 货币单位(默认USD) | | `image_url` | string | 主图URL | -| `in_stock` | boolean | 是否有库存(任意变体有库存即为true) | -| `variants` | array | 变体列表 | +| `in_stock` | boolean | 是否有库存(任意SKU有库存即为true) | +| `skus` | array | SKU 列表 | | `relevance_score` | float | 相关性分数 | -### VariantResult字段说明 +### SkuResult字段说明 | 字段 | 类型 | 说明 | |------|------|------| -| `variant_id` | string | 变体ID(SKU ID) | -| `title` | string | 变体标题 | +| `sku_id` | string | SKU ID | +| `title` | string | SKU标题 | | `price` | float | 价格 | | `compare_at_price` | float | 原价 | | `sku` | string | SKU编码 | @@ -405,7 +404,7 @@ curl -X POST "http://localhost:6002/search/" \ "query": "玩具", "size": 20, "filters": { - "category_keyword": "益智玩具" + "category.keyword": "益智玩具" }, "range_filters": { "min_price": { @@ -425,8 +424,8 @@ curl -X POST "http://localhost:6002/search/" \ "query": "玩具", "size": 20, "facets": [ - "category_keyword", - "vendor_keyword" + "category.keyword", + "vendor.keyword" ] } ``` @@ -440,7 +439,7 @@ curl -X POST "http://localhost:6002/search/" \ "query": "玩具", "size": 20, "filters": { - "vendor_keyword": ["乐高", "孩之宝", "美泰"] + "vendor.keyword": ["乐高", "孩之宝", "美泰"] }, "range_filters": { "min_price": { @@ -450,7 +449,7 @@ curl -X POST "http://localhost:6002/search/" \ }, "facets": [ { - "field": "category_keyword", + "field": "category.keyword", "size": 15 }, { @@ -575,7 +574,7 @@ curl "http://localhost:6002/search/instant?q=玩具&size=5" "source": { "title": "芭比时尚娃娃", "min_price": 89.99, - "category_keyword": "玩具" + "category.keyword": "玩具" } } ``` @@ -646,15 +645,15 @@ curl "http://localhost:6002/search/12345" | 字段名 | 类型 | 描述 | |--------|------|------| -| `product_id` | keyword | 商品 ID(SPU) | +| `spu_id` | keyword | SPU ID | | `sku_id` | keyword/long | SKU ID(主键) | | `title` | text | 商品名称(中文) | | `en_title` | text | 商品名称(英文) | | `ru_title` | text | 商品名称(俄文) | -| `category_keyword` | keyword | 类目(精确匹配) | -| `vendor_keyword` | keyword | 品牌/供应商(精确匹配) | -| `product_type_keyword` | keyword | 商品类型 | -| `tags_keyword` | keyword | 标签 | +| `category.keyword` | keyword | 类目(精确匹配) | +| `vendor.keyword` | keyword | 品牌/供应商(精确匹配) | +| `category` | HKText | 类目(支持 `category.keyword` 精确匹配) | +| `tags.keyword` | keyword | 标签 | | `min_price` | double | 最低价格 | | `max_price` | double | 最高价格 | | `compare_at_price` | double | 原价 | @@ -664,7 +663,7 @@ curl "http://localhost:6002/search/12345" | `text_embedding` | dense_vector | 文本向量(1024 维) | | `image_embedding` | dense_vector | 图片向量(1024 维) | -> 不同租户可自定义字段名称,但最佳实践是对可过滤字段建立 `*_keyword` 版本,对可排序字段显式建 keyword/数值映射。 +> 不同租户可自定义字段名称。推荐将可过滤的文本字段配置为 HKText,这样即可同时支持全文检索和 `field.keyword` 精确过滤;数值字段单独建索引以用于排序/Range。 --- @@ -672,12 +671,11 @@ curl "http://localhost:6002/search/12345" ### 常用字段列表 -#### 过滤字段(使用 `*_keyword` 后缀) +#### 过滤字段(使用 HKText 的 keyword 子字段) -- `category_keyword`: 类目 -- `vendor_keyword`: 品牌/供应商 -- `product_type_keyword`: 商品类型 -- `tags_keyword`: 标签 +- `category.keyword`: 类目 +- `vendor.keyword`: 品牌/供应商 +- `tags.keyword`: 标签 #### 范围字段 diff --git a/docs/搜索API速查表.md b/docs/搜索API速查表.md index 76c462b..980adb7 100644 --- a/docs/搜索API速查表.md +++ b/docs/搜索API速查表.md @@ -17,8 +17,8 @@ POST /search/ ```bash { "filters": { - "categoryName_keyword": "玩具", // 单值 - "brandName_keyword": ["乐高", "美泰"] // 多值(OR) + "category.keyword": "玩具", // 单值 + "vendor.keyword": ["乐高", "美泰"] // 多值(OR) } } ``` @@ -48,7 +48,7 @@ POST /search/ ```bash { - "facets": ["categoryName_keyword", "brandName_keyword"] + "facets": ["category.keyword", "vendor.keyword"] } ``` @@ -57,7 +57,7 @@ POST /search/ ```bash { "facets": [ - {"field": "categoryName_keyword", "size": 15}, + {"field": "category.keyword", "size": 15}, { "field": "price", "type": "range", @@ -115,14 +115,14 @@ POST /search/ "size": 20, "from": 0, "filters": { - "categoryName_keyword": ["玩具", "益智玩具"] + "category.keyword": ["玩具", "益智玩具"] }, "range_filters": { "price": {"gte": 50, "lte": 200} }, "facets": [ - {"field": "brandName_keyword", "size": 15}, - {"field": "categoryName_keyword", "size": 15} + {"field": "vendor.keyword", "size": 15}, + {"field": "category.keyword", "size": 15} ], "sort_by": "min_price", "sort_order": "asc" @@ -147,7 +147,7 @@ POST /search/ "took_ms": 45, "facets": [ { - "field": "categoryName_keyword", + "field": "category.keyword", "label": "商品类目", "type": "terms", "values": [ @@ -194,9 +194,9 @@ import requests result = requests.post('http://localhost:6002/search/', json={ "query": "玩具", - "filters": {"categoryName_keyword": "玩具"}, + "filters": {"category.keyword": "玩具"}, "range_filters": {"price": {"gte": 50, "lte": 200}}, - "facets": ["brandName_keyword"], + "facets": ["vendor.keyword"], "sort_by": "min_price", "sort_order": "asc" }).json() @@ -214,9 +214,9 @@ const result = await fetch('http://localhost:6002/search/', { headers: {'Content-Type': 'application/json'}, body: JSON.stringify({ query: "玩具", - filters: {categoryName_keyword: "玩具"}, + filters: {category.keyword: "玩具"}, range_filters: {price: {gte: 50, lte: 200}}, - facets: ["brandName_keyword"], + facets: ["vendor.keyword"], sort_by: "min_price", sort_order: "asc" }) diff --git a/docs/系统设计文档.md b/docs/系统设计文档.md index df25f92..6493f41 100644 --- a/docs/系统设计文档.md +++ b/docs/系统设计文档.md @@ -24,17 +24,17 @@ - 所有客户共享同一个Elasticsearch索引:`search_products` - 索引粒度:SPU级别(每个文档代表一个SPU) - 数据隔离:通过`tenant_id`字段实现租户隔离 -- 嵌套结构:每个SPU文档包含嵌套的`variants`数组(SKU变体) +- 嵌套结构:每个SPU文档包含嵌套的`skus`数组 **索引文档结构**: ```json { "tenant_id": "1", - "product_id": "123", + "spu_id": "123", "title": "蓝牙耳机", - "variants": [ + "skus": [ { - "variant_id": "456", + "sku_id": "456", "title": "黑色", "price": 199.99, "sku": "SKU-123-1", @@ -108,7 +108,7 @@ fields: store: true # 商品标识字段 - - name: "product_id" + - name: "spu_id" type: "KEYWORD" required: true index: true @@ -129,12 +129,12 @@ fields: index: true store: true - # 嵌套variants字段 - - name: "variants" + # 嵌套skus字段 + - name: "skus" type: "JSON" nested: true nested_properties: - variant_id: + sku_id: type: "keyword" price: type: "float" @@ -246,7 +246,7 @@ indexes: 1. **数据加载**:从MySQL读取`shoplazza_product_spu`和`shoplazza_product_sku`表 2. **数据转换**(`indexer/spu_transformer.py`): - 按`spu_id`和`tenant_id`关联SPU和SKU数据 - - 将SKU数据聚合为嵌套的`variants`数组 + - 将SKU数据聚合为嵌套的`skus`数组 - 计算扁平化价格字段(`min_price`, `max_price`, `compare_at_price`) - 字段映射(写死在代码中,不依赖配置) - 注入`tenant_id`字段 @@ -518,7 +518,7 @@ ranking: ### 6.6 Base配置(店匠通用) - ✅ SPU级别索引结构 -- ✅ 嵌套variants字段 +- ✅ 嵌套skus字段 - ✅ 统一索引(search_products) - ✅ 租户隔离(tenant_id) - ✅ 配置简化(移除MySQL相关配置) @@ -548,11 +548,11 @@ API返回格式不包含ES内部字段(`_id`, `_score`, `_source`),使用 { "results": [ { - "product_id": "123", + "spu_id": "123", "title": "蓝牙耳机", - "variants": [ + "skus": [ { - "variant_id": "456", + "sku_id": "456", "price": 199.99, "sku": "SKU-123-1", "stock": 50 @@ -569,8 +569,8 @@ API返回格式不包含ES内部字段(`_id`, `_score`, `_source`),使用 ``` **主要变化**: -- 结构化结果(`ProductResult`和`VariantResult`) -- 嵌套variants数组 +- 结构化结果(`SpuResult`和`SkuResult`) +- 嵌套skus数组 - 无ES内部字段 ### 8.2 租户隔离 @@ -610,14 +610,14 @@ Elasticsearch - **简单模式**:字符串列表(字段名),使用默认配置 ```json - ["categoryName_keyword", "brandName_keyword"] + ["category.keyword", "vendor.keyword"] ``` - **高级模式**:FacetConfig 对象列表,支持自定义配置 ```json [ { - "field": "categoryName_keyword", + "field": "category.keyword", "size": 15, "type": "terms" }, @@ -714,11 +714,3 @@ class FacetResult(BaseModel): **其他客户配置**:`config/schema/tenant1/config.yaml` --- - -## 9. 相关文档 - -- `MULTILANG_FEATURE.md` - 多语言功能详细说明 -- `QUICKSTART.md` - 快速开始指南 -- `HighLevelDesign.md` - 高层设计文档 -- `IMPLEMENTATION_SUMMARY.md` - 实现总结 -- `商品数据源入ES配置规范.md` - 数据源配置规范 diff --git a/docs/索引字段说明.md b/docs/索引字段说明.md index 2f2ab35..ba0b688 100644 --- a/docs/索引字段说明.md +++ b/docs/索引字段说明.md @@ -11,7 +11,7 @@ - 原始数据与用户环境均为多语言,需根据语言路由到不同分析器/索引方式,在线搜索时也要考虑多语言的适配。 4. **搜索接口适配** - 接口简单,自动为多语言的数据源和 query 适配最优检索策略。 - - 返回的结果格式约定为店匠系列的 SPU/SKU(products/variants)嵌套结构。 +- 返回的结果格式约定为店匠系列的 SPU/SKU嵌套结构。 - 支撑 facet/过滤/排序业务需求:用户可以选择任何一个 keyword 或 HKText 类型的字段做筛选、聚合;也可以选择任何一个数值型字段做 Range 过滤或排序。 本文档详细说明了 Elasticsearch 索引中所有字段的类型、索引方式、数据来源等信息。 @@ -20,7 +20,7 @@ - **索引名称**: `search_products` - **索引级别**: SPU级别(商品级别) -- **数据结构**: SPU文档包含嵌套的variants(SKU)数组 +- **数据结构**: SPU文档包含嵌套的skus数组 ## 索引类型与处理说明 @@ -131,7 +131,7 @@ | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| | tenant_id | KEYWORD | 是 | SPU表 | tenant_id | BIGINT | BIGINT转字符串 | 租户ID,用于多租户隔离 | -| product_id | KEYWORD | 是 | SPU表 | id | BIGINT | BIGINT转字符串 | 商品ID(SPU ID) | +| spu_id | KEYWORD | 是 | SPU表 | id | BIGINT | BIGINT转字符串 | SPU ID(主键) | | handle | KEYWORD | 是 | SPU表 | handle | VARCHAR(255) | | 商品URL handle | 数据预处理列留空表示该字段无需额外处理。 @@ -156,14 +156,9 @@ | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | Boost权重 | 是否返回 | 数据预处理 | 说明 | |-----------|-----------|---------|-----------|-----------|-------------|-----------|---------|-------------|------| -| vendor | TEXT | 是 | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | | 供应商/品牌(文本搜索) | -| vendor_keyword | HKText | 是 | SPU表 | vendor | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 供应商/品牌(HKText类型,keyword子字段用于过滤) | -| product_type | TEXT | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 商品类型(文本搜索) | -| product_type_keyword | HKText | 是 | SPU表 | category | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 商品类型(HKText类型) | -| tags | HKText | 是 | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | | 标签(HKText类型,支持搜索+过滤) | -| tags_keyword | HKText | 是 | SPU表 | tags | VARCHAR(1024) | - | 否 | 按逗号分割为list,去除空白项 | 标签keyword别名(HKText类型) | -| category | TEXT | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 类目(文本搜索) | -| category_keyword | HKText | 是 | SPU表 | category | VARCHAR(255) | - | 否 | 按逗号分割为list,去除空白项 | 类目(HKText类型) | +| vendor | HKText | 是 | SPU表 | vendor | VARCHAR(255) | 1.5 | 是 | | 供应商/品牌,HKText字段自动提供 `vendor.keyword` 用于过滤、聚合 | +| tags | HKText | 是 | SPU表 | tags | VARCHAR(1024) | 1.0 | 是 | | 标签字段,支持模糊搜索;使用 `tags.keyword` 进行精确过滤 | +| category | HKText | 是 | SPU表 | category | VARCHAR(255) | 1.5 | 是 | | 类目字段,使用 `category.keyword` 进行过滤/分面 | ### 价格字段 @@ -204,23 +199,23 @@ | shoplazza_created_at | DATE | 是 | SPU表 | shoplazza_created_at | DATETIME | 否 | 转换为UTC ISO8601字符串 | 店匠系统创建时间 | | shoplazza_updated_at | DATE | 是 | SPU表 | shoplazza_updated_at | DATETIME | 否 | 转换为UTC ISO8601字符串 | 店匠系统更新时间 | -### 嵌套Variants字段(SKU级别) +### 嵌套SKUs字段(SKU级别) | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| -| variants | JSON (nested) | 是 | SKU表 | - | - | 汇总同SPU下SKU记录,构建nested数组 | 商品变体数组(嵌套结构) | +| skus | JSON (nested) | 是 | SKU表 | - | - | 汇总同SPU下SKU记录,构建nested数组 | SKU数组(嵌套结构) | -#### Variants子字段 +#### SKUs子字段 | 索引字段名 | ES字段类型 | 是否索引 | 数据来源表 | 表中字段名 | 表中字段类型 | 数据预处理 | 说明 | |-----------|-----------|---------|-----------|-----------|-------------|-------------|------| -| variants.variant_id | keyword | 是 | SKU表 | id | BIGINT | BIGINT转字符串 | 变体ID(SKU ID) | -| variants.title | text | 是 | SKU表 | title | VARCHAR(500) | | 变体标题 | -| variants.price | float | 是 | SKU表 | price | DECIMAL(10,2) | DECIMAL转FLOAT | 变体价格 | -| variants.compare_at_price | float | 是 | SKU表 | compare_at_price | DECIMAL(10,2) | DECIMAL转FLOAT | 变体原价 | -| variants.sku | keyword | 是 | SKU表 | sku | VARCHAR(100) | | SKU编码 | -| variants.stock | long | 是 | SKU表 | inventory_quantity | INT(11) | INT转LONG | 库存数量 | -| variants.options | object | 是 | SKU表 | option1/option2/option3 | VARCHAR(255) | 合并option1/2/3并去除空值 | 选项(颜色、尺寸等) | +| skus.sku_id | keyword | 是 | SKU表 | id | BIGINT | BIGINT转字符串 | SKU ID | +| skus.title | text | 是 | SKU表 | title | VARCHAR(500) | | SKU标题 | +| skus.price | float | 是 | SKU表 | price | DECIMAL(10,2) | DECIMAL转FLOAT | SKU价格 | +| skus.compare_at_price | float | 是 | SKU表 | compare_at_price | DECIMAL(10,2) | DECIMAL转FLOAT | 原价 | +| skus.sku | keyword | 是 | SKU表 | sku | VARCHAR(100) | | SKU编码 | +| skus.stock | long | 是 | SKU表 | inventory_quantity | INT(11) | INT转LONG | 库存数量 | +| skus.options | object | 是 | SKU表 | option1/option2/option3 | VARCHAR(255) | 合并option1/2/3并去除空值 | 选项(颜色、尺寸等) | **Variants结构说明**: - `variants` 是一个嵌套对象数组,每个元素代表一个SKU @@ -285,7 +280,7 @@ ### 数据类型转换 -1. **BIGINT → KEYWORD**: 数字ID转换为字符串(如 `product_id`, `variant_id`) +1. **BIGINT → KEYWORD**: 数字ID转换为字符串(如 `spu_id`, `sku_id`) 2. **DECIMAL → FLOAT**: 价格字段从DECIMAL转换为FLOAT 3. **INT → LONG**: 库存数量从INT转换为LONG 4. **DATETIME → DATE**: 时间字段转换为ISO格式字符串 @@ -327,6 +322,53 @@ - `shoplazza_created_at`: DATETIME - 店匠创建时间 - `shoplazza_updated_at`: DATETIME - 店匠更新时间 +spu表全部字段 +"Field" "Type" "Null" "Key" "Default" "Extra" +"id" "bigint(20)" "NO" "PRI" "auto_increment" +"shop_id" "bigint(20)" "NO" "MUL" "" +"shoplazza_id" "varchar(64)" "NO" "" "" +"handle" "varchar(255)" "YES" "MUL" "" +"title" "varchar(500)" "NO" "" "" +"brief" "varchar(1000)" "YES" "" "" +"description" "text" "YES" "" "" +"spu" "varchar(100)" "YES" "" "" +"vendor" "varchar(255)" "YES" "" "" +"vendor_url" "varchar(500)" "YES" "" "" +"seo_title" "varchar(500)" "YES" "" "" +"seo_description" "text" "YES" "" "" +"seo_keywords" "text" "YES" "" "" +"image_src" "varchar(500)" "YES" "" "" +"image_width" "int(11)" "YES" "" "" +"image_height" "int(11)" "YES" "" "" +"image_path" "varchar(255)" "YES" "" "" +"image_alt" "varchar(500)" "YES" "" "" +"inventory_policy" "varchar(50)" "YES" "" "" +"inventory_quantity" "int(11)" "YES" "" "0" "" +"inventory_tracking" "tinyint(1)" "YES" "" "0" "" +"published" "tinyint(1)" "YES" "" "0" "" +"published_at" "datetime" "YES" "MUL" "" +"requires_shipping" "tinyint(1)" "YES" "" "1" "" +"taxable" "tinyint(1)" "YES" "" "0" "" +"fake_sales" "int(11)" "YES" "" "0" "" +"display_fake_sales" "tinyint(1)" "YES" "" "0" "" +"mixed_wholesale" "tinyint(1)" "YES" "" "0" "" +"need_variant_image" "tinyint(1)" "YES" "" "0" "" +"has_only_default_variant" "tinyint(1)" "YES" "" "0" "" +"tags" "text" "YES" "" "" +"note" "text" "YES" "" "" +"category" "varchar(255)" "YES" "" "" +"shoplazza_created_at" "datetime" "YES" "" "" +"shoplazza_updated_at" "datetime" "YES" "MUL" "" +"tenant_id" "bigint(20)" "NO" "MUL" "" +"creator" "varchar(64)" "YES" "" "" "" +"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" +"updater" "varchar(64)" "YES" "" "" "" +"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" +"deleted" "bit(1)" "NO" "" "b'0'" "" + + + + ### SKU表(shoplazza_product_sku) 主要字段: @@ -341,6 +383,40 @@ - `option2`: VARCHAR(255) - 选项2 - `option3`: VARCHAR(255) - 选项3 +sku全部字段 +"Field" "Type" "Null" "Key" "Default" "Extra" +"id" "bigint(20)" "NO" "PRI" "auto_increment" +"spu_id" "bigint(20)" "NO" "MUL" "" +"shop_id" "bigint(20)" "NO" "MUL" "" +"shoplazza_id" "varchar(64)" "NO" "" "" +"shoplazza_product_id" "varchar(64)" "NO" "MUL" "" +"shoplazza_image_id" "varchar(64)" "YES" "" "" +"title" "varchar(500)" "YES" "" "" +"sku" "varchar(100)" "YES" "MUL" "" +"barcode" "varchar(100)" "YES" "" "" +"position" "int(11)" "YES" "" "0" "" +"price" "decimal(10,2)" "YES" "" "" +"compare_at_price" "decimal(10,2)" "YES" "" "" +"cost_price" "decimal(10,2)" "YES" "" "" +"option1" "varchar(255)" "YES" "" "" +"option2" "varchar(255)" "YES" "" "" +"option3" "varchar(255)" "YES" "" "" +"inventory_quantity" "int(11)" "YES" "" "0" "" +"weight" "decimal(10,2)" "YES" "" "" +"weight_unit" "varchar(10)" "YES" "" "" +"image_src" "varchar(500)" "YES" "" "" +"wholesale_price" "json" "YES" "" "" +"note" "text" "YES" "" "" +"extend" "json" "YES" "" "" +"shoplazza_created_at" "datetime" "YES" "" "" +"shoplazza_updated_at" "datetime" "YES" "" "" +"tenant_id" "bigint(20)" "NO" "MUL" "" +"creator" "varchar(64)" "YES" "" "" "" +"create_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "" +"updater" "varchar(64)" "YES" "" "" "" +"update_time" "datetime" "NO" "" "CURRENT_TIMESTAMP" "on update CURRENT_TIMESTAMP" +"deleted" "bit(1)" "NO" "" "b'0'" "" + ## TODO 多语言问题。 diff --git a/docs/设计文档.md b/docs/设计文档.md new file mode 100644 index 0000000..a790b97 --- /dev/null +++ b/docs/设计文档.md @@ -0,0 +1,716 @@ +# 搜索引擎通用化开发进度 + +## 项目概述 + +对后端搜索技术 做通用化。 +通用化的本质 是 对于各种业务数据、各种检索需求,都可以 用少量定制+配置化 来实现效果。 + + +**通用化的本质**:对于各种业务数据、各种检索需求,都可以用少量定制+配置化来实现效果。 + +--- + +## 1. 原始数据层的约定 + +### 1.1 店匠主表 + +所有租户共用以下主表: +- `shoplazza_product_sku` - SKU级别商品数据 +- `shoplazza_product_spu` - SPU级别商品数据 + +### 1.2 索引结构(SPU维度) + +**统一索引架构**: +- 所有客户共享同一个Elasticsearch索引:`search_products` +- 索引粒度:SPU级别(每个文档代表一个SPU) +- 数据隔离:通过`tenant_id`字段实现租户隔离 +- 嵌套结构:每个SPU文档包含嵌套的`skus`数组(SKU变体) + +**索引文档结构**: +```json +{ + "tenant_id": "1", + "spu_id": "123", + "title": "蓝牙耳机", + "skus": [ + { + "sku_id": "456", + "title": "黑色", + "price": 199.99, + "sku": "SKU-123-1", + "stock": 50 + } + ], + "min_price": 199.99, + "max_price": 299.99 +} +``` + +### 1.3 配置化方案 + +**配置分离原则**: +- **搜索配置**:只包含ES字段定义、查询域、排序规则等搜索相关配置 +- **数据源配置**:不在搜索配置中,由Pipeline层(脚本)决定 +- **数据导入流程**:写死的脚本,不依赖配置 + +统一通过配置文件定义: +1. ES 字段定义(字段类型、分析器、boost等) +2. ES mapping 结构生成 +3. 查询域配置(indexes) +4. 排序和打分配置(function_score) + +**注意**:配置中**不包含**以下内容: +- `mysql_config` - MySQL数据库配置 +- `main_table` / `extension_table` - 数据表配置 +- `source_table` / `source_column` - 字段数据源映射 + +--- + +## 2. 配置系统实现 + +### 2.1 应用结构配置(字段定义) + +**配置文件位置**:`config/schema/{tenant_id}_config.yaml` + +**配置内容**:定义了 ES 的输入数据有哪些字段、关联 MySQL 的哪些字段。 + +**实现情况**: + +#### 字段类型支持 +- **TEXT**:文本字段,支持多语言分析器 +- **KEYWORD**:关键词字段,用于精确匹配和聚合 +- **TEXT_EMBEDDING**:文本向量字段(1024维,dot_product相似度) +- **IMAGE_EMBEDDING**:图片向量字段(1024维,dot_product相似度) +- **INT/LONG**:整数类型 +- **FLOAT/DOUBLE**:浮点数类型 +- **DATE**:日期类型 +- **BOOLEAN**:布尔类型 + +#### 分析器支持 +- **chinese_ecommerce**:中文电商分词器(index_ansj/query_ansj) +- **english**:英文分析器 +- **russian**:俄文分析器 +- **arabic**:阿拉伯文分析器 +- **spanish**:西班牙文分析器 +- **japanese**:日文分析器 +- **standard**:标准分析器 +- **keyword**:关键词分析器 + +#### 字段配置示例(Base配置) + +```yaml +fields: + # 租户隔离字段(必需) + - name: "tenant_id" + type: "KEYWORD" + required: true + index: true + store: true + + # 商品标识字段 + - name: "spu_id" + type: "KEYWORD" + required: true + index: true + store: true + + # 文本搜索字段 + - name: "title" + type: "TEXT" + analyzer: "chinese_ecommerce" + boost: 3.0 + index: true + store: true + + - name: "seo_keywords" + type: "TEXT" + analyzer: "chinese_ecommerce" + boost: 2.0 + index: true + store: true + + # 嵌套skus字段 + - name: "skus" + type: "JSON" + nested: true + nested_properties: + sku_id: + type: "keyword" + price: + type: "float" + sku: + type: "keyword" +``` + +**注意**:配置中**不包含**`source_table`和`source_column`,数据源映射由Pipeline层决定。 + +**实现模块**: +- `config/config_loader.py` - 配置加载器 +- `config/field_types.py` - 字段类型定义 +- `indexer/mapping_generator.py` - ES mapping 生成器 +- `indexer/data_transformer.py` - 数据转换器 + +### 2.2 索引结构配置(查询域配置) + +**配置内容**:定义了 ES 的字段索引 mapping 配置,支持各个域的查询,包括默认域的查询。 + +**实现情况**: + +#### 域(Domain)配置 +每个域定义了: +- 域名称(如 `default`, `title`, `category`, `brand`) +- 域标签(中文描述) +- 搜索字段列表 +- 默认分析器 +- 权重(boost) +- **多语言字段映射**(`language_field_mapping`) + +#### 多语言字段映射 + +支持将不同语言的查询路由到对应的字段: + +```yaml +indexes: + - name: "default" + label: "默认索引" + fields: + - "name" + - "enSpuName" + - "ruSkuName" + - "categoryName" + - "brandName" + analyzer: "chinese_ecommerce" + boost: 1.0 + language_field_mapping: + zh: + - "name" + - "categoryName" + - "brandName" + en: + - "enSpuName" + ru: + - "ruSkuName" + + - name: "title" + label: "标题索引" + fields: + - "name" + - "enSpuName" + - "ruSkuName" + analyzer: "chinese_ecommerce" + boost: 2.0 + language_field_mapping: + zh: + - "name" + en: + - "enSpuName" + ru: + - "ruSkuName" +``` + +**工作原理**: +1. 检测查询语言(中文、英文、俄文等) +2. 如果查询语言在 `language_field_mapping` 中,使用原始查询搜索对应语言的字段 +3. 将查询翻译到其他支持的语言,分别搜索对应语言的字段 +4. 组合多个语言查询的结果,提高召回率 + +**实现模块**: +- `search/multilang_query_builder.py` - 多语言查询构建器 +- `query/query_parser.py` - 查询解析器(支持语言检测和翻译) + +--- + +## 3. 数据导入流程 + +### 3.1 数据源 + +**店匠标准表**(Base配置使用): +- `shoplazza_product_spu` - SPU级别商品数据 +- `shoplazza_product_sku` - SKU级别商品数据 + +**其他客户表**(tenant1等): +- 使用各自的数据源表和扩展表 + +### 3.2 数据导入方式 + +**Pipeline层决定数据源**: +- 数据导入流程是写死的脚本,不依赖配置 +- 配置只关注ES搜索相关的内容 +- 数据源映射逻辑写死在转换器代码中 + +#### Base配置数据导入(店匠通用) + +**脚本**:`scripts/ingest_shoplazza.py` + +**数据流程**: +1. **数据加载**:从MySQL读取`shoplazza_product_spu`和`shoplazza_product_sku`表 +2. **数据转换**(`indexer/spu_transformer.py`): + - 按`spu_id`和`tenant_id`关联SPU和SKU数据 + - 将SKU数据聚合为嵌套的`skus`数组 + - 计算扁平化价格字段(`min_price`, `max_price`, `compare_at_price`) + - 字段映射(写死在代码中,不依赖配置) + - 注入`tenant_id`字段 +3. **索引创建**: + - 根据配置生成ES mapping + - 创建或更新`search_products`索引 +4. **批量入库**: + - 批量写入ES(默认每批500条) + - 错误处理和重试机制 + +**命令行工具**: +```bash +python scripts/ingest_shoplazza.py \ + --db-host localhost \ + --db-port 3306 \ + --db-database saas \ + --db-username root \ + --db-password password \ + --tenant-id "1" \ + --config base \ + --es-host http://localhost:9200 \ + --recreate \ + --batch-size 500 +``` + +#### 其他客户数据导入 + +- 使用各自的数据转换器(如`indexer/data_transformer.py`) +- 数据源映射逻辑写死在各自的转换器中 +- 共享`search_products`索引,通过`tenant_id`隔离 + +**实现模块**: +- `indexer/spu_transformer.py` - SPU数据转换器(Base配置) +- `indexer/data_transformer.py` - 通用数据转换器(其他客户) +- `indexer/bulk_indexer.py` - 批量索引器 +- `scripts/ingest_shoplazza.py` - 店匠数据导入脚本 + +--- + +## 4. QueryParser 实现 + + +### 4.1 查询改写(Query Rewriting) + +配置词典的key是query,value是改写后的查询表达式,比如。比如品牌词 改写为在brand|query OR name|query,类别词、标签词等都可以放进去。纠错、规范化、查询改写等 都可以通过这个词典来配置。 +**实现情况**: + +#### 配置方式 +在 `query_config.rewrite_dictionary` 中配置查询改写规则: + +```yaml +query_config: + enable_query_rewrite: true + rewrite_dictionary: + "芭比": "brand:芭比 OR name:芭比娃娃" + "玩具": "category:玩具" + "消防": "category:消防 OR name:消防" +``` + +#### 功能特性 +- **精确匹配**:查询完全匹配词典 key 时,替换为 value +- **部分匹配**:查询包含词典 key 时,替换该部分 +- **支持布尔表达式**:value 可以是复杂的布尔表达式(AND, OR, 域查询等) + +#### 实现模块 +- `query/query_rewriter.py` - 查询改写器 +- `query/query_parser.py` - 查询解析器(集成改写功能) + +### 4.2 翻译(Translation) + +**实现情况**: + +#### 配置方式 +```yaml +query_config: + supported_languages: + - "zh" + - "en" + - "ru" + default_language: "zh" + enable_translation: true + translation_service: "deepl" + translation_api_key: null # 通过环境变量设置 +``` + +#### 功能特性 +1. **语言检测**:自动检测查询语言 +2. **智能翻译**: + - 如果查询是中文,翻译为英文、俄文 + - 如果查询是英文,翻译为中文、俄文 + - 如果查询是其他语言,翻译为所有支持的语言 +3. **域感知翻译**: + - 如果域有 `language_field_mapping`,只翻译到映射中存在的语言 + - 避免不必要的翻译,提高效率 +4. **翻译缓存**:缓存翻译结果,避免重复调用 API + +#### 工作流程 +``` +查询输入 → 语言检测 → 确定目标语言 → 翻译 → 多语言查询构建 +``` + +#### 实现模块 +- `query/language_detector.py` - 语言检测器 +- `query/translator.py` - 翻译器(DeepL API) +- `query/query_parser.py` - 查询解析器(集成翻译功能) + +### 4.3 文本向量化(Text Embedding) + +如果配置打开了text_embedding查询,并且query 包含了default域的查询,那么要把default域的查询词转向量,后面searcher会用这个向量参与查询。 + +**实现情况**: + +#### 配置方式 +```yaml +query_config: + enable_text_embedding: true +``` + +#### 功能特性 +1. **条件生成**: + - 仅当 `enable_text_embedding=true` 时生成向量 + - 仅对 `default` 域查询生成向量 +2. **向量模型**:BGE-M3 模型(1024维向量) +3. **用途**:用于语义搜索(KNN 检索) + +#### 实现模块 +- `embeddings/bge_encoder.py` - BGE 文本编码器 +- `query/query_parser.py` - 查询解析器(集成向量生成) + +--- + +## 5. Searcher 实现 + +参考opensearch,他们自己定义的一套索引结构配置、支持自定义的一套检索表达式、排序表达式,这是各个客户进行配置化的基础,包括索引结构配置、排序策略配置。 +比如各种业务过滤策略 可以简单的通过表达式满足,比如brand|耐克 AND cate2|xxx。指定字段排序可以通过排序的表达式实现。 + +查询默认在default域,相也会对这个域的查询做一些相关性的重点优化,包括融合语义相关性、多语言相关性(可以基于配置 将查询翻译到指定语言并在对应的语言的字段进行查询)来弥补传统查询分析手段(比如查询改写 纠错 词权重等)的不足,也支持通过配置一些词表转为泛查询模式来优化相关性。 + +### 5.1 布尔表达式解析 + +**实现情况**: + +#### 支持的运算符 +- **AND**:所有项必须匹配 +- **OR**:任意项匹配 +- **RANK**:排序增强(类似 OR 但影响排序) +- **ANDNOT**:排除(第一项匹配,第二项不匹配) +- **()**:括号分组 + +#### 优先级(从高到低) +1. `()` - 括号 +2. `ANDNOT` - 排除 +3. `AND` - 与 +4. `OR` - 或 +5. `RANK` - 排序 + +#### 示例 +``` +laptop AND (gaming OR professional) ANDNOT cheap +``` + +#### 实现模块 +- `search/boolean_parser.py` - 布尔表达式解析器 +- `search/searcher.py` - 搜索器(集成布尔解析) + +### 5.2 多语言搜索 + +**实现情况**: + +#### 工作原理 +1. **查询解析**: + - 提取域(如 `title:查询` → 域=`title`,查询=`查询`) + - 检测查询语言 + - 生成翻译 +2. **多语言查询构建**: + - 如果域有 `language_field_mapping`: + - 使用检测到的语言查询对应字段(boost * 1.5) + - 使用翻译后的查询搜索其他语言字段(boost * 1.0) + - 如果域没有 `language_field_mapping`: + - 使用所有字段进行搜索 +3. **查询组合**: + - 多个语言查询组合为 `should` 子句 + - 提高召回率 + +#### 示例 +``` +查询: "芭比娃娃" +域: default +检测语言: zh + +生成的查询: +- 中文查询 "芭比娃娃" → 搜索 name, categoryName, brandName (boost * 1.5) +- 英文翻译 "Barbie doll" → 搜索 enSpuName (boost * 1.0) +- 俄文翻译 "Кукла Барби" → 搜索 ruSkuName (boost * 1.0) +``` + +#### 实现模块 +- `search/multilang_query_builder.py` - 多语言查询构建器 +- `search/searcher.py` - 搜索器(使用多语言构建器) + +### 5.3 相关性计算(Ranking) + +**实现情况**: + +#### 当前实现 +**公式**:`bm25() + 0.2 * text_embedding_relevance()` + +- **bm25()**:BM25 文本相关性得分 + - 包括多语言打分 + - 内部通过配置翻译为多种语言 + - 分别到对应的字段搜索 + - 中文字段使用中文分词器,英文字段使用英文分词器 +- **text_embedding_relevance()**:文本向量相关性得分(KNN 检索的打分) + - 权重:0.2 + +#### 配置方式 +```yaml +ranking: + expression: "bm25() + 0.2*text_embedding_relevance()" + description: "BM25 text relevance combined with semantic embedding similarity" +``` + +#### 扩展性 +- 支持表达式配置(未来可扩展) +- 支持自定义函数(如 `timeliness()`, `field_value()`) + +#### 实现模块 +- `search/ranking_engine.py` - 排序引擎 +- `search/searcher.py` - 搜索器(集成排序功能) + +--- + +## 6. 已完成功能总结 + +### 6.1 配置系统 +- ✅ 字段定义配置(类型、分析器、来源表/列) +- ✅ 索引域配置(多域查询、多语言映射) +- ✅ 查询配置(改写词典、翻译配置) +- ✅ 排序配置(表达式配置) +- ✅ 配置验证(字段存在性、类型检查、分析器匹配) + +### 6.2 数据索引 +- ✅ 数据转换(字段映射、类型转换) +- ✅ 向量生成(文本向量、图片向量) +- ✅ 向量缓存(避免重复计算) +- ✅ 批量索引(错误处理、重试机制) +- ✅ ES mapping 自动生成 + +### 6.3 查询处理 +- ✅ 查询改写(词典配置) +- ✅ 语言检测 +- ✅ 多语言翻译(DeepL API) +- ✅ 文本向量化(BGE-M3) +- ✅ 域提取(支持 `domain:query` 语法) + +### 6.4 搜索功能 +- ✅ 布尔表达式解析(AND, OR, RANK, ANDNOT, 括号) +- ✅ 多语言查询构建(语言路由、字段映射) +- ✅ 语义搜索(KNN 检索) +- ✅ 相关性排序(BM25 + 向量相似度) +- ✅ 结果聚合(Faceted Search) + +### 6.5 API 服务 +- ✅ RESTful API(FastAPI) +- ✅ 搜索接口(文本搜索、图片搜索) +- ✅ 文档查询接口 +- ✅ 前端界面(HTML + JavaScript) +- ✅ 租户隔离(tenant_id过滤) + +### 6.6 Base配置(店匠通用) +- ✅ SPU级别索引结构 +- ✅ 嵌套skus字段 +- ✅ 统一索引(search_products) +- ✅ 租户隔离(tenant_id) +- ✅ 配置简化(移除MySQL相关配置) + +--- + +## 7. 技术栈 + +- **后端**:Python 3.6+ +- **搜索引擎**:Elasticsearch +- **数据库**:MySQL(Shoplazza) +- **向量模型**:BGE-M3(文本)、CN-CLIP(图片) +- **翻译服务**:DeepL API +- **API 框架**:FastAPI +- **前端**:HTML + JavaScript + +--- + +## 8. API响应格式 + +### 8.1 外部友好格式 + +API返回格式不包含ES内部字段(`_id`, `_score`, `_source`),使用外部友好的格式: + +**响应结构**: +```json +{ + "results": [ + { + "spu_id": "123", + "title": "蓝牙耳机", + "skus": [ + { + "sku_id": "456", + "price": 199.99, + "sku": "SKU-123-1", + "stock": 50 + } + ], + "relevance_score": 0.95 + } + ], + "total": 10, + "facets": [...], + "suggestions": [], + "related_searches": [] +} +``` + +**主要变化**: +- 结构化结果(`SpuResult`和`SkuResult`) +- 嵌套skus数组 +- 无ES内部字段 + +### 8.2 租户隔离 + +所有API请求必须提供`tenant_id`: +- 请求头:`X-Tenant-ID: 1` +- 或查询参数:`?tenant_id=1` + +搜索时自动添加`tenant_id`过滤,确保数据隔离。 + +### 8.3 数据接口约定 + +**统一的数据约定格式**:所有API接口使用 Pydantic 模型进行数据验证和序列化。 + +#### 8.3.1 数据流模式 + +系统采用统一的数据流模式,确保数据在各层之间的一致性: + +**数据流转路径**: +``` +API Request (JSON) + ↓ +Pydantic 验证 → 结构化模型(RangeFilter, FacetConfig 等) + ↓ +Searcher(透传) + ↓ +ES Query Builder → model_dump() 转换为字典 + ↓ +ES Query (字典) + ↓ +Elasticsearch +``` + +#### 8.3.2 Facets 配置数据流 + +**输入格式**:`List[Union[str, FacetConfig]]` + +- **简单模式**:字符串列表(字段名),使用默认配置 + ```json + ["categoryName_keyword", "brandName_keyword"] + ``` + +- **高级模式**:FacetConfig 对象列表,支持自定义配置 + ```json + [ + { + "field": "categoryName_keyword", + "size": 15, + "type": "terms" + }, + { + "field": "price", + "type": "range", + "ranges": [ + {"key": "0-50", "to": 50}, + {"key": "50-100", "from": 50, "to": 100} + ] + } + ] + ``` + +**数据流**: +1. API 层:接收 `List[Union[str, FacetConfig]]` +2. Searcher 层:透传,不做转换 +3. ES Query Builder:只接受 `str` 或 `FacetConfig`,自动处理两种格式 +4. 输出:转换为 ES 聚合查询 + +#### 8.3.3 Range Filters 数据流 + +**输入格式**:`Dict[str, RangeFilter]` + +**RangeFilter 模型**: +```python +class RangeFilter(BaseModel): + gte: Optional[Union[float, str]] # 大于等于 + gt: Optional[Union[float, str]] # 大于 + lte: Optional[Union[float, str]] # 小于等于 + lt: Optional[Union[float, str]] # 小于 +``` + +**示例**: +```json +{ + "price": {"gte": 50, "lte": 200}, + "created_at": {"gte": "2023-01-01T00:00:00Z"} +} +``` + +**数据流**: +1. API 层:接收 `Dict[str, RangeFilter]`,Pydantic 自动验证 +2. Searcher 层:透传 `Dict[str, RangeFilter]` +3. ES Query Builder:调用 `range_filter.model_dump()` 转换为字典 +4. 输出:ES range 查询(支持数值和日期) + +**特性**: +- 自动验证:确保至少指定一个边界值(gte, gt, lte, lt) +- 类型支持:支持数值(float)和日期时间字符串(ISO 格式) +- 统一约定:所有范围过滤都使用 RangeFilter 模型 + +#### 8.3.4 响应 Facets 数据流 + +**输出格式**:`List[FacetResult]` + +**FacetResult 模型**: +```python +class FacetResult(BaseModel): + field: str # 字段名 + label: str # 显示标签 + type: Literal["terms", "range"] # 分面类型 + values: List[FacetValue] # 分面值列表 + total_count: Optional[int] # 总文档数 +``` + +**数据流**: +1. ES Response:返回聚合结果(字典格式) +2. Searcher 层:构建 `List[FacetResult]` 对象 +3. API 层:直接返回 `List[FacetResult]`(Pydantic 自动序列化为 JSON) + +**优势**: +- 类型安全:使用 Pydantic 模型确保数据结构一致性 +- 自动序列化:模型自动转换为 JSON,无需手动处理 +- 统一约定:所有响应都使用标准化的 Pydantic 模型 + +#### 8.3.5 统一约定的好处 + +1. **类型安全**:使用 Pydantic 模型提供运行时类型检查和验证 +2. **代码一致性**:所有层使用相同的数据模型,减少转换错误 +3. **自动文档**:FastAPI 自动生成 API 文档(基于 Pydantic 模型) +4. **易于维护**:修改数据结构只需更新模型定义 +5. **数据验证**:自动验证输入数据,减少错误处理代码 + +**实现模块**: +- `api/models.py` - 所有 Pydantic 模型定义 +- `api/result_formatter.py` - 结果格式化器(ES 响应 → Pydantic 模型) +- `search/es_query_builder.py` - ES 查询构建器(Pydantic 模型 → ES 查询) + +## 9. 配置文件示例 + +**Base配置**(店匠通用):`config/schema/base/config.yaml` + +**其他客户配置**:`config/schema/tenant1/config.yaml` + +--- diff --git a/frontend/README.md b/frontend/README.md index 4ca45ed..8eac773 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -105,7 +105,7 @@ POST http://120.76.41.98:6002/search/ "size": 20, // 每页结果数 "from": 0, // 偏移量(分页) "filters": { // 筛选条件 - "categoryName_keyword": ["玩具"], + "category.keyword": ["玩具"], "price": {"from": 50, "to": 100} }, "aggregations": {...}, // 聚合配置 diff --git a/frontend/static/js/app.js b/frontend/static/js/app.js index 0718480..2e86148 100644 --- a/frontend/static/js/app.js +++ b/frontend/static/js/app.js @@ -74,17 +74,17 @@ async function performSearch(page = 1) { // Define facets (简化配置) const facets = [ { - "field": "category_keyword", + "field": "category.keyword", "size": 15, "type": "terms" }, { - "field": "vendor_keyword", + "field": "vendor.keyword", "size": 15, "type": "terms" }, { - "field": "tags_keyword", + "field": "tags.keyword", "size": 10, "type": "terms" }, @@ -219,13 +219,13 @@ function displayFacets(facets) { let containerId = null; let maxDisplay = 10; - if (facet.field === 'category_keyword') { + if (facet.field === 'category.keyword') { containerId = 'categoryTags'; maxDisplay = 10; - } else if (facet.field === 'vendor_keyword') { + } else if (facet.field === 'vendor.keyword') { containerId = 'brandTags'; maxDisplay = 10; - } else if (facet.field === 'tags_keyword') { + } else if (facet.field === 'tags.keyword') { containerId = 'supplierTags'; maxDisplay = 8; } @@ -351,7 +351,7 @@ function updateClearFiltersButton() { // Update product count function updateProductCount(total) { - document.getElementById('productCount').textContent = `${total.toLocaleString()} products found`; + document.getElementById('productCount').textContent = `${total.toLocaleString()} SPUs found`; } // Sort functions diff --git a/frontend/static/js/app_base.js b/frontend/static/js/app_base.js index 2b9de67..c72bbc5 100644 --- a/frontend/static/js/app_base.js +++ b/frontend/static/js/app_base.js @@ -72,17 +72,17 @@ async function performSearch(page = 1) { // Define facets (简化配置) const facets = [ { - "field": "category_keyword", + "field": "category.keyword", "size": 15, "type": "terms" }, { - "field": "vendor_keyword", + "field": "vendor.keyword", "size": 15, "type": "terms" }, { - "field": "tags_keyword", + "field": "tags.keyword", "size": 10, "type": "terms" }, @@ -168,14 +168,14 @@ function displayResults(data) { let html = ''; - data.results.forEach((product) => { - const score = product.relevance_score; + data.results.forEach((spu) => { + const score = spu.relevance_score; html += `
- ${product.image_url ? ` - ${escapeHtml(product.title)} ` : ` @@ -184,21 +184,21 @@ function displayResults(data) {
- ${product.price ? `$${product.price.toFixed(2)}` : 'N/A'}${product.compare_at_price && product.compare_at_price > product.price ? `$${product.compare_at_price.toFixed(2)}` : ''} + ${spu.price ? `$${spu.price.toFixed(2)}` : 'N/A'}${spu.compare_at_price && spu.compare_at_price > spu.price ? `$${spu.compare_at_price.toFixed(2)}` : ''}
- ${product.in_stock ? 'In Stock' : 'Out of Stock'} - ${product.variants && product.variants.length > 0 ? `(${product.variants.length} variants)` : ''} + ${spu.in_stock ? 'In Stock' : 'Out of Stock'} + ${spu.skus && spu.skus.length > 0 ? `(${spu.skus.length} skus)` : ''}
- ${escapeHtml(product.title || 'N/A')} + ${escapeHtml(spu.title || 'N/A')}
-
${product.vendor ? escapeHtml(product.vendor) : ''}${product.product_type ? ' | ' + escapeHtml(product.product_type) : ''}${product.category ? ' | ' + escapeHtml(product.category) : ''} ${product.tags ? ` +
${spu.vendor ? escapeHtml(spu.vendor) : ''}${spu.category ? ' | ' + escapeHtml(spu.category) : ''} ${spu.tags ? `
- Tags: ${escapeHtml(product.tags)} + Tags: ${escapeHtml(spu.tags)}
` : ''}
@@ -217,13 +217,13 @@ function displayFacets(facets) { let containerId = null; let maxDisplay = 10; - if (facet.field === 'category_keyword') { + if (facet.field === 'category.keyword') { containerId = 'categoryTags'; maxDisplay = 10; - } else if (facet.field === 'vendor_keyword') { + } else if (facet.field === 'vendor.keyword') { containerId = 'brandTags'; maxDisplay = 10; - } else if (facet.field === 'tags_keyword') { + } else if (facet.field === 'tags.keyword') { containerId = 'supplierTags'; maxDisplay = 8; } @@ -349,7 +349,7 @@ function updateClearFiltersButton() { // Update product count function updateProductCount(total) { - document.getElementById('productCount').textContent = `${total.toLocaleString()} products found`; + document.getElementById('productCount').textContent = `${total.toLocaleString()} SPUs found`; } // Sort functions diff --git a/indexer/spu_transformer.py b/indexer/spu_transformer.py index f72c75a..aa4fcba 100644 --- a/indexer/spu_transformer.py +++ b/indexer/spu_transformer.py @@ -1,7 +1,7 @@ """ SPU data transformer for Shoplazza products. -Transforms SPU and SKU data from MySQL into SPU-level ES documents with nested variants. +Transforms SPU and SKU data from MySQL into SPU-level ES documents with nested skus. """ import pandas as pd @@ -165,8 +165,8 @@ class SPUTransformer: # Tenant ID (required) doc['tenant_id'] = str(self.tenant_id) - # Product ID - doc['product_id'] = str(spu_row['id']) + # SPU ID + doc['spu_id'] = str(spu_row['id']) # Handle if pd.notna(spu_row.get('handle')): @@ -195,23 +195,14 @@ class SPUTransformer: # Vendor if pd.notna(spu_row.get('vendor')): doc['vendor'] = str(spu_row['vendor']) - doc['vendor_keyword'] = str(spu_row['vendor']) - - # Product type (from category or tags) - if pd.notna(spu_row.get('category')): - doc['product_type'] = str(spu_row['category']) - doc['product_type_keyword'] = str(spu_row['category']) # Tags if pd.notna(spu_row.get('tags')): - tags_str = str(spu_row['tags']) - doc['tags'] = tags_str - doc['tags_keyword'] = tags_str + doc['tags'] = str(spu_row['tags']) # Category if pd.notna(spu_row.get('category')): doc['category'] = str(spu_row['category']) - doc['category_keyword'] = str(spu_row['category']) # Image URL if pd.notna(spu_row.get('image_src')): @@ -220,27 +211,27 @@ class SPUTransformer: image_src = f"//{image_src}" if image_src.startswith('//') else image_src doc['image_url'] = image_src - # Process variants - variants = [] + # Process SKUs + skus_list = [] prices = [] compare_prices = [] for _, sku_row in skus.iterrows(): - variant = self._transform_sku_to_variant(sku_row) - if variant: - variants.append(variant) - if 'price' in variant and variant['price'] is not None: + sku_data = self._transform_sku_row(sku_row) + if sku_data: + skus_list.append(sku_data) + if 'price' in sku_data and sku_data['price'] is not None: try: - prices.append(float(variant['price'])) + prices.append(float(sku_data['price'])) except (ValueError, TypeError): pass - if 'compare_at_price' in variant and variant['compare_at_price'] is not None: + if 'compare_at_price' in sku_data and sku_data['compare_at_price'] is not None: try: - compare_prices.append(float(variant['compare_at_price'])) + compare_prices.append(float(sku_data['compare_at_price'])) except (ValueError, TypeError): pass - doc['variants'] = variants + doc['skus'] = skus_list # Calculate price ranges if prices: @@ -286,55 +277,55 @@ class SPUTransformer: return doc - def _transform_sku_to_variant(self, sku_row: pd.Series) -> Optional[Dict[str, Any]]: + def _transform_sku_row(self, sku_row: pd.Series) -> Optional[Dict[str, Any]]: """ - Transform a SKU row into a variant object. + Transform a SKU row into a SKU object. Args: sku_row: SKU row from database Returns: - Variant dictionary or None + SKU dictionary or None """ - variant = {} + sku_data = {} - # Variant ID - variant['variant_id'] = str(sku_row['id']) + # SKU ID + sku_data['sku_id'] = str(sku_row['id']) # Title if pd.notna(sku_row.get('title')): - variant['title'] = str(sku_row['title']) + sku_data['title'] = str(sku_row['title']) # Price if pd.notna(sku_row.get('price')): try: - variant['price'] = float(sku_row['price']) + sku_data['price'] = float(sku_row['price']) except (ValueError, TypeError): - variant['price'] = None + sku_data['price'] = None else: - variant['price'] = None + sku_data['price'] = None # Compare at price if pd.notna(sku_row.get('compare_at_price')): try: - variant['compare_at_price'] = float(sku_row['compare_at_price']) + sku_data['compare_at_price'] = float(sku_row['compare_at_price']) except (ValueError, TypeError): - variant['compare_at_price'] = None + sku_data['compare_at_price'] = None else: - variant['compare_at_price'] = None + sku_data['compare_at_price'] = None # SKU if pd.notna(sku_row.get('sku')): - variant['sku'] = str(sku_row['sku']) + sku_data['sku'] = str(sku_row['sku']) # Stock if pd.notna(sku_row.get('inventory_quantity')): try: - variant['stock'] = int(sku_row['inventory_quantity']) + sku_data['stock'] = int(sku_row['inventory_quantity']) except (ValueError, TypeError): - variant['stock'] = 0 + sku_data['stock'] = 0 else: - variant['stock'] = 0 + sku_data['stock'] = 0 # Options (from option1, option2, option3) options = {} @@ -346,7 +337,7 @@ class SPUTransformer: options['option3'] = str(sku_row['option3']) if options: - variant['options'] = options + sku_data['options'] = options - return variant + return sku_data diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index 54ec74d..ecf1067 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -2,7 +2,7 @@ """ Generate test data for Shoplazza SPU and SKU tables. -Generates 100 SPU records with 1-5 SKU variants each. +Generates 100 SPU records with 1-5 SKUs each. """ import sys @@ -145,15 +145,15 @@ def generate_sku_data(spus: list, start_sku_id: int = 1): for spu in spus: spu_id = spu['id'] - num_variants = random.randint(1, 5) + num_skus = random.randint(1, 5) # Base price base_price = random.uniform(50, 500) - for i in range(num_variants): + for i in range(num_skus): # Generate variant options - color = random.choice(colors) if num_variants > 1 else None - size = random.choice(sizes) if num_variants > 2 else None + color = random.choice(colors) if num_skus > 1 else None + size = random.choice(sizes) if num_skus > 2 else None # Generate title title_parts = [] @@ -400,7 +400,7 @@ def main(): start_sku_id = 1 print(f"Using start SPU ID: {start_spu_id}, start SKU ID: {start_sku_id}") - print(f"Generating {args.num_spus} SPUs with variants...") + print(f"Generating {args.num_spus} SPUs with skus...") # Generate SPU data spus = generate_spu_data(args.num_spus, args.tenant_id, start_spu_id) diff --git a/scripts/ingest_shoplazza.py b/scripts/ingest_shoplazza.py index 2debe72..c8a8924 100644 --- a/scripts/ingest_shoplazza.py +++ b/scripts/ingest_shoplazza.py @@ -141,7 +141,7 @@ def main(): indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) try: - results = indexer.index_documents(documents, id_field="product_id", show_progress=True) + results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) print(f"\nIngestion complete:") print(f" Success: {results['success']}") print(f" Failed: {results['failed']}") diff --git a/scripts/test_base.py b/scripts/test_base.py index 5e42b28..3e80dcd 100644 --- a/scripts/test_base.py +++ b/scripts/test_base.py @@ -85,7 +85,7 @@ def validate_response_format(data: dict): # Validate first result structure if results: result = results[0] - required_fields = ['product_id', 'title', 'variants', 'relevance_score'] + required_fields = ['spu_id', 'title', 'skus', 'relevance_score'] for field in required_fields: if field not in result: errors.append(f"Result missing required field: {field}") @@ -96,17 +96,17 @@ def validate_response_format(data: dict): if field in result: errors.append(f"Result contains ES internal field: {field}") - # Validate variants - if 'variants' in result: - variants = result['variants'] - if not isinstance(variants, list): - errors.append("'variants' should be a list") - elif variants: - variant = variants[0] - variant_required = ['variant_id', 'price', 'sku', 'stock'] - for field in variant_required: - if field not in variant: - errors.append(f"Variant missing required field: {field}") + # Validate skus + if 'skus' in result: + skus = result['skus'] + if not isinstance(skus, list): + errors.append("'skus' should be a list") + elif skus: + sku = skus[0] + sku_required = ['sku_id', 'price', 'sku', 'stock'] + for field in sku_required: + if field not in sku: + errors.append(f"SKU missing required field: {field}") # Check for suggestions and related_searches if 'suggestions' not in data: @@ -136,7 +136,7 @@ def test_facets(base_url: str, tenant_id: str): payload = { "query": "商品", "size": 10, - "facets": ["category_keyword", "vendor_keyword"] + "facets": ["category.keyword", "vendor.keyword"] } print(f"\nTesting facets:") @@ -179,8 +179,8 @@ def test_tenant_isolation(base_url: str, tenant_id_1: str, tenant_id_2: str): data2 = test_search_api(base_url, tenant_id_2, "商品") if data1 and data2: - results1 = set(r.get('product_id') for r in data1.get('results', [])) - results2 = set(r.get('product_id') for r in data2.get('results', [])) + results1 = set(r.get('spu_id') for r in data1.get('results', [])) + results2 = set(r.get('spu_id') for r in data2.get('results', [])) overlap = results1 & results2 if overlap: diff --git a/search/searcher.py b/search/searcher.py index ee0443f..49d1fab 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -29,7 +29,7 @@ class SearchResult: def __init__( self, - results: List[Any], # List[ProductResult] + results: List[Any], # List[SpuResult] total: int, max_score: float, took_ms: int, -- libgit2 0.21.2