diff --git a/indexer/Untitled b/indexer/Untitled new file mode 100644 index 0000000..9527fa1 --- /dev/null +++ b/indexer/Untitled @@ -0,0 +1 @@ +taxonomy \ No newline at end of file diff --git a/indexer/taxonomy.md b/indexer/taxonomy.md new file mode 100644 index 0000000..1da8cee --- /dev/null +++ b/indexer/taxonomy.md @@ -0,0 +1,127 @@ + +服装大类的taxonomy + +### A. Product Classification + +| 一级层级 | 中文列名 | English Column Name | +| ------------------------- | ---- | ------------------- | +| A. Product Classification | 品类 | Product Type | +| A. Product Classification | 目标性别 | Target Gender | +| A. Product Classification | 年龄段 | Age Group | +| A. Product Classification | 适用季节 | Season | + +### B. Garment Design + +| 一级层级 | 中文列名 | English Column Name | +| ----------------- | ---- | ------------------- | +| B. Garment Design | 版型 | Fit | +| B. Garment Design | 廓形 | Silhouette | +| B. Garment Design | 领型 | Neckline | +| B. Garment Design | 袖型 | Sleeve Style | +| B. Garment Design | 肩带设计 | Strap Type | +| B. Garment Design | 腰型 | Rise / Waistline | +| B. Garment Design | 裤型 | Leg Shape | +| B. Garment Design | 裙型 | Skirt Shape | +| B. Garment Design | 长度 | Length Type | +| B. Garment Design | 闭合方式 | Closure Type | +| B. Garment Design | 设计细节 | Design Details | + +### C. Material & Performance + +| 一级层级 | 中文列名 | English Column Name | +| ------------------------- | ----------- | -------------------- | +| C. Material & Performance | 面料 | Fabric | +| C. Material & Performance | 成分 | Material Composition | +| C. Material & Performance | 面料特性 | Fabric Properties | +| C. Material & Performance | 服装特征 / 功能细节 | Clothing Features | +| C. Material & Performance | 功能 | Functional Benefits | + +### D. Merchandising Attributes + +| 一级层级 | 中文列名 | English Column Name | +| --------------------------- | ------- | ------------------- | +| D. Merchandising Attributes | 主颜色 | Color | +| D. Merchandising Attributes | 色系 | Color Family | +| D. Merchandising Attributes | 印花 / 图案 | Print / Pattern | +| D. Merchandising Attributes | 适用场景 | Occasion / End Use | +| D. Merchandising Attributes | 风格 | Style Aesthetic | + + + +根据这个产生 +enriched_taxonomy_attributes + +```python +Product Type +Target Gender +Age Group +Season +Fit +Silhouette +Neckline +Sleeve Length Type +Sleeve Style +Strap Type +Rise / Waistline +Leg Shape +Skirt Shape +Length Type +Closure Type +Design Details +Fabric +Material Composition +Fabric Properties +Clothing Features +Functional Benefits +Color +Color Family +Print / Pattern +Occasion / End Use +Style Aesthetic +``` + +提示词: + +```python +SHARED_ANALYSIS_INSTRUCTION = """ +Analyze each input product text and fill the columns below using an apparel attribute taxonomy. + +Output columns: +1. Product Type: concise ecommerce apparel category label, not a full marketing title +2. Target Gender: intended gender only if clearly implied +3. Age Group: only if clearly implied, e.g. adults, kids, teens, toddlers, babies +4. Season: season(s) or all-season suitability only if supported +5. Fit: body closeness, e.g. slim, regular, relaxed, oversized, fitted +6. Silhouette: overall garment shape, e.g. straight, A-line, boxy, tapered, bodycon, wide-leg +7. Neckline: neckline type when applicable, e.g. crew neck, V-neck, hooded, collared, square neck +8. Sleeve Length Type: sleeve length only, e.g. sleeveless, short sleeve, long sleeve, three-quarter sleeve +9. Sleeve Style: sleeve design only, e.g. puff sleeve, raglan sleeve, batwing sleeve, bell sleeve +10. Strap Type: strap design when applicable, e.g. spaghetti strap, wide strap, halter strap, adjustable strap +11. Rise / Waistline: waist placement when applicable, e.g. high rise, mid rise, low rise, empire waist +12. Leg Shape: for bottoms only, e.g. straight leg, wide leg, flare leg, tapered leg, skinny leg +13. Skirt Shape: for skirts only, e.g. A-line, pleated, pencil, mermaid +14. Length Type: design length only, not size, e.g. cropped, regular, longline, mini, midi, maxi, ankle length, full length +15. Closure Type: fastening method when applicable, e.g. zipper, button, drawstring, elastic waist, hook-and-loop +16. Design Details: construction or visual details, e.g. ruched, ruffled, pleated, cut-out, layered, distressed, split hem +17. Fabric: fabric type only, e.g. denim, knit, chiffon, jersey, fleece, cotton twill +18. Material Composition: fiber content or blend only if stated, e.g. cotton, polyester, spandex, linen blend, 95% cotton 5% elastane +19. Fabric Properties: inherent fabric traits, e.g. stretch, breathable, lightweight, soft-touch, water-resistant +20. Clothing Features: product features, e.g. lined, reversible, hooded, packable, padded, pocketed +21. Functional Benefits: wearer benefits, e.g. moisture-wicking, thermal insulation, UV protection, easy care, supportive compression +22. Color: specific color name when available +23. Color Family: normalized broad retail color group, e.g. black, white, blue, green, red, pink, beige, brown, gray +24. Print / Pattern: surface pattern when applicable, e.g. solid, striped, plaid, floral, graphic, animal print +25. Occasion / End Use: likely use occasion only if supported, e.g. office, casual wear, streetwear, lounge, workout, outdoor +26. Style Aesthetic: overall style only if supported, e.g. minimalist, streetwear, athleisure, smart casual, romantic, playful + +Rules: +- Keep the same row order and row count as input. +- Infer only from the provided product text. +- Leave blank if not applicable or not reasonably supported. +- Use concise, standardized English ecommerce wording. +- Do not combine different attribute dimensions in one field. +- If multiple values are needed, use the delimiter required by the localization setting. + +Input product list: +""" +``` diff --git a/mappings/README.md b/mappings/README.md index 929b615..c4286c3 100644 --- a/mappings/README.md +++ b/mappings/README.md @@ -68,6 +68,7 @@ - `option2_values` - `option3_values` - `enriched_attributes.value` +- `enriched_taxonomy_attributes.value` - `specifications.value_text` 以 `category_path` 和 `option*_values` 为例,核心语言灌入结果应至少包含: diff --git a/mappings/generate_search_products_mapping.py b/mappings/generate_search_products_mapping.py index 5ed7e98..4327c41 100644 --- a/mappings/generate_search_products_mapping.py +++ b/mappings/generate_search_products_mapping.py @@ -214,6 +214,11 @@ FIELD_SPECS = [ scalar_field("name", "keyword"), text_field("value", "core_language_text_with_keyword"), ), + nested_field( + "enriched_taxonomy_attributes", + scalar_field("name", "keyword"), + text_field("value", "core_language_text_with_keyword"), + ), scalar_field("option1_name", "keyword"), scalar_field("option2_name", "keyword"), scalar_field("option3_name", "keyword"), diff --git a/mappings/search_products.json b/mappings/search_products.json index d7c0ed1..6038dd8 100644 --- a/mappings/search_products.json +++ b/mappings/search_products.json @@ -34,8 +34,8 @@ "similarity": { "default": { "type": "BM25", - "b": 0.1, - "k1": 0.3 + "b": 0.0, + "k1": 0.0 } } }, @@ -2116,6 +2116,40 @@ } } }, + "enriched_taxonomy_attributes": { + "type": "nested", + "properties": { + "name": { + "type": "keyword" + }, + "value": { + "type": "object", + "properties": { + "zh": { + "type": "text", + "analyzer": "index_ik", + "search_analyzer": "query_ik", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "lowercase" + } + } + }, + "en": { + "type": "text", + "analyzer": "english", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "lowercase" + } + } + } + } + } + } + }, "option1_name": { "type": "keyword" }, -- libgit2 0.21.2