Commit 36cf0ef91e3ae77c0c5f29d459962df70cbda623
1 parent
c3425429
es索引结果修改
Showing
10 changed files
with
429 additions
and
229 deletions
Show diff stats
api/routes/indexer.py
| ... | ... | @@ -88,7 +88,7 @@ class EnrichContentItem(BaseModel): |
| 88 | 88 | |
| 89 | 89 | class EnrichContentRequest(BaseModel): |
| 90 | 90 | """ |
| 91 | - 内容理解字段生成请求:根据商品标题批量生成 qanchors、semantic_attributes、tags。 | |
| 91 | + 内容理解字段生成请求:根据商品标题批量生成 qanchors、enriched_attributes、tags。 | |
| 92 | 92 | 供外部 indexer 在自行组织 doc 时调用,与翻译、向量化等微服务并列。 |
| 93 | 93 | """ |
| 94 | 94 | tenant_id: str = Field(..., description="租户 ID,用于请求路由与结果归属,不参与缓存键") |
| ... | ... | @@ -447,7 +447,7 @@ async def build_docs_from_db(request: BuildDocsFromDbRequest): |
| 447 | 447 | def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: List[str]) -> List[Dict[str, Any]]: |
| 448 | 448 | """ |
| 449 | 449 | 同步执行内容理解:调用 product_enrich.analyze_products,按语言批量跑 LLM, |
| 450 | - 再聚合成每 SPU 的 qanchors、semantic_attributes、tags。供 run_in_executor 调用。 | |
| 450 | + 再聚合成每 SPU 的 qanchors、enriched_attributes、tags。供 run_in_executor 调用。 | |
| 451 | 451 | """ |
| 452 | 452 | from indexer.product_enrich import analyze_products, split_multi_value_field |
| 453 | 453 | |
| ... | ... | @@ -473,11 +473,11 @@ def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: |
| 473 | 473 | "features", |
| 474 | 474 | ] |
| 475 | 475 | |
| 476 | - # 按 spu_id 聚合:qanchors[lang], semantic_attributes[], tags[] | |
| 476 | + # 按 spu_id 聚合:qanchors[lang], enriched_attributes[], tags[] | |
| 477 | 477 | by_spu: Dict[str, Dict[str, Any]] = {} |
| 478 | 478 | for it in items: |
| 479 | 479 | sid = str(it["spu_id"]) |
| 480 | - by_spu[sid] = {"qanchors": {}, "semantic_attributes": [], "tags": []} | |
| 480 | + by_spu[sid] = {"qanchors": {}, "enriched_attributes": [], "tags": []} | |
| 481 | 481 | |
| 482 | 482 | for lang in llm_langs: |
| 483 | 483 | try: |
| ... | ... | @@ -511,7 +511,7 @@ def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: |
| 511 | 511 | if not raw: |
| 512 | 512 | continue |
| 513 | 513 | for value in split_multi_value_field(str(raw)): |
| 514 | - rec["semantic_attributes"].append({"lang": lang, "name": name, "value": value}) | |
| 514 | + rec["enriched_attributes"].append({"lang": lang, "name": name, "value": value}) | |
| 515 | 515 | if name == "tags": |
| 516 | 516 | rec["tags"].append(value) |
| 517 | 517 | |
| ... | ... | @@ -524,7 +524,7 @@ def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: |
| 524 | 524 | out.append({ |
| 525 | 525 | "spu_id": sid, |
| 526 | 526 | "qanchors": rec["qanchors"], |
| 527 | - "semantic_attributes": rec["semantic_attributes"], | |
| 527 | + "enriched_attributes": rec["enriched_attributes"], | |
| 528 | 528 | "tags": tags, |
| 529 | 529 | **({"error": rec["error"]} if rec.get("error") else {}), |
| 530 | 530 | }) |
| ... | ... | @@ -534,7 +534,7 @@ def _run_enrich_content(tenant_id: str, items: List[Dict[str, str]], languages: |
| 534 | 534 | @router.post("/enrich-content") |
| 535 | 535 | async def enrich_content(request: EnrichContentRequest): |
| 536 | 536 | """ |
| 537 | - 内容理解字段生成接口:根据商品标题批量生成 qanchors、semantic_attributes、tags。 | |
| 537 | + 内容理解字段生成接口:根据商品标题批量生成 qanchors、enriched_attributes、tags。 | |
| 538 | 538 | |
| 539 | 539 | 使用场景: |
| 540 | 540 | - 外部 indexer 采用「微服务组合」方式自己组织 doc 时,可调用本接口获取 LLM 生成的 | ... | ... |
docs/工作总结-微服务性能优化与架构.md
| ... | ... | @@ -95,10 +95,10 @@ instruction: "Given a shopping query, rank product titles by relevance" |
| 95 | 95 | |
| 96 | 96 | ### 5. 内容理解字段(支撑 Suggest) |
| 97 | 97 | |
| 98 | -**能力**:支持根据商品标题批量生成 **qanchors**(锚文本)、**semantic_attributes**、**tags**,供索引与 suggest 使用。 | |
| 98 | +**能力**:支持根据商品标题批量生成 **qanchors**(锚文本)、**enriched_attributes**、**tags**,供索引与 suggest 使用。 | |
| 99 | 99 | |
| 100 | 100 | **具体内容**: |
| 101 | -- **接口**:`POST /indexer/enrich-content`(Indexer 服务端口 **6004**)。请求体为 `items` 数组,每项含 `spu_id`、`title`(必填)及可选多语言标题等;单次请求最多 **50 条**,建议批量调用。响应 `results` 与 `items` 一一对应,每项含 `spu_id`、`qanchors`(按语言键,如 `qanchors.zh`、`qanchors.en`,逗号分隔短语)、`semantic_attributes`、`tags`。 | |
| 101 | +- **接口**:`POST /indexer/enrich-content`(Indexer 服务端口 **6004**)。请求体为 `items` 数组,每项含 `spu_id`、`title`(必填)及可选多语言标题等;单次请求最多 **50 条**,建议批量调用。响应 `results` 与 `items` 一一对应,每项含 `spu_id`、`qanchors`(按语言键,如 `qanchors.zh`、`qanchors.en`,逗号分隔短语)、`enriched_attributes`、`tags`。 | |
| 102 | 102 | -- **索引侧**:微服务组合方式下,调用方先拿不含 qanchors/tags 的 doc,再调用本接口补齐后写入 ES 的 `qanchors.{lang}` 等字段;索引 transformer(`indexer/document_transformer.py`、`indexer/product_enrich.py`)内也可在构建 doc 时调用内容理解逻辑,写入 `qanchors.{lang}`。 |
| 103 | 103 | - **Suggest 侧**:`suggestion/builder.py` 从 ES 商品索引读取 `_source: ["id", "spu_id", "title", "qanchors"]`,对 `qanchors.{lang}` 用 `_split_qanchors` 拆成词条,以 `source="qanchor"` 加入候选,排序时 `qanchor` 权重大于纯 title(`add_product("qanchor", ...)`);suggest 配置中 `sources: ["query_log", "qanchor"]` 表示候选来源包含 qanchor。 |
| 104 | 104 | - **实现与依赖**:内容理解内部使用大模型(需 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存(如 `product_anchors`);逻辑与 `indexer/product_enrich` 一致。 | ... | ... |
docs/常用查询 - ES.md
| ... | ... | @@ -664,4 +664,20 @@ curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ |
| 664 | 664 | } |
| 665 | 665 | } |
| 666 | 666 | }' |
| 667 | + | |
| 668 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ | |
| 669 | +'http://localhost:9200/search_products_tenant_163/_count' \ | |
| 670 | +-H 'Content-Type: application/json' \ | |
| 671 | +-d '{ | |
| 672 | +"query": { | |
| 673 | +"nested": { | |
| 674 | + "path": "image_embedding", | |
| 675 | + "query": { | |
| 676 | + "exists": { | |
| 677 | + "field": "image_embedding.vector" | |
| 678 | + } | |
| 679 | + } | |
| 680 | +} | |
| 681 | +} | |
| 682 | +}' | |
| 667 | 683 | ``` |
| 668 | 684 | \ No newline at end of file | ... | ... |
docs/搜索API对接指南-00-总览与快速开始.md
| ... | ... | @@ -90,7 +90,7 @@ curl -X POST "http://43.166.252.75:6002/search/" \ |
| 90 | 90 | | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据(不写入ES) | |
| 91 | 91 | | 构建ES文档(正式对接) | POST | `/indexer/build-docs` | 基于上游提供的 MySQL 行数据构建 ES doc,不写入 ES,供 Java 等调用后自行写入 | |
| 92 | 92 | | 构建ES文档(测试用) | POST | `/indexer/build-docs-from-db` | 仅在测试/调试时使用,根据 `tenant_id + spu_ids` 内部查库并构建 ES doc | |
| 93 | -| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、semantic_attributes、tags,供微服务组合方式使用 | | |
| 93 | +| 内容理解字段生成 | POST | `/indexer/enrich-content` | 根据商品标题批量生成 qanchors、enriched_attributes、tags,供微服务组合方式使用 | | |
| 94 | 94 | | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | |
| 95 | 95 | | 健康检查 | GET | `/admin/health` | 服务健康检查 | |
| 96 | 96 | | 获取配置 | GET | `/admin/config` | 获取租户配置 | | ... | ... |
docs/搜索API对接指南-05-索引接口(Indexer).md
| ... | ... | @@ -649,7 +649,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ |
| 649 | 649 | ### 5.8 内容理解字段生成接口 |
| 650 | 650 | |
| 651 | 651 | - **端点**: `POST /indexer/enrich-content` |
| 652 | -- **描述**: 根据商品内容信息批量生成 **qanchors**(锚文本)、**semantic_attributes**(语义属性)、**tags**(细分标签),供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段(必填/可选见下表)。内部逻辑与 `indexer.product_enrich` 一致,支持多语言与 Redis 缓存;单次请求在线程池中执行,避免阻塞其他接口。 | |
| 652 | +- **描述**: 根据商品内容信息批量生成 **qanchors**(锚文本)、**enriched_attributes**(语义属性)、**tags**(细分标签),供外部 indexer 在「微服务组合」方式下自行拼装 doc 时使用。请求以 `items[]` 传入商品内容字段(必填/可选见下表)。内部逻辑与 `indexer.product_enrich` 一致,支持多语言与 Redis 缓存;单次请求在线程池中执行,避免阻塞其他接口。 | |
| 653 | 653 | |
| 654 | 654 | #### 请求参数 |
| 655 | 655 | |
| ... | ... | @@ -714,7 +714,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ |
| 714 | 714 | "zh": "短袖T恤,纯棉,男装,夏季", |
| 715 | 715 | "en": "cotton t-shirt, short sleeve, men, summer" |
| 716 | 716 | }, |
| 717 | - "semantic_attributes": [ | |
| 717 | + "enriched_attributes": [ | |
| 718 | 718 | { "lang": "zh", "name": "tags", "value": "纯棉" }, |
| 719 | 719 | { "lang": "zh", "name": "usage_scene", "value": "日常" }, |
| 720 | 720 | { "lang": "en", "name": "tags", "value": "cotton" } |
| ... | ... | @@ -724,7 +724,7 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ |
| 724 | 724 | { |
| 725 | 725 | "spu_id": "223168", |
| 726 | 726 | "qanchors": { "en": "dolls, toys, 12pcs" }, |
| 727 | - "semantic_attributes": [], | |
| 727 | + "enriched_attributes": [], | |
| 728 | 728 | "tags": ["dolls", "toys"] |
| 729 | 729 | } |
| 730 | 730 | ] |
| ... | ... | @@ -733,9 +733,9 @@ curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ |
| 733 | 733 | |
| 734 | 734 | | 字段 | 类型 | 说明 | |
| 735 | 735 | |------|------|------| |
| 736 | -| `results` | array | 与请求 `items` 一一对应,每项含 `spu_id`、`qanchors`、`semantic_attributes`、`tags` | | |
| 736 | +| `results` | array | 与请求 `items` 一一对应,每项含 `spu_id`、`qanchors`、`enriched_attributes`、`tags` | | |
| 737 | 737 | | `results[].qanchors` | object | 按语言键的锚文本(逗号分隔短语),可写入 ES 文档的 `qanchors.{lang}` | |
| 738 | -| `results[].semantic_attributes` | array | 语义属性列表,每项为 `{ "lang", "name", "value" }`,可写入 ES 的 `semantic_attributes` nested 字段 | | |
| 738 | +| `results[].enriched_attributes` | array | 语义属性列表,每项为 `{ "lang", "name", "value" }`,可写入 ES 的 `enriched_attributes` nested 字段 | | |
| 739 | 739 | | `results[].tags` | array | 从语义属性中抽取的 `name=tags` 的 value 集合,可与业务原有 `tags` 合并后写入 ES 的 `tags` 字段 | |
| 740 | 740 | | `results[].error` | string | 若该条处理失败(如 LLM 异常),会在此字段返回错误信息 | |
| 741 | 741 | |
| ... | ... | @@ -753,7 +753,7 @@ curl -X POST "http://localhost:6004/indexer/enrich-content" \ |
| 753 | 753 | "items": [ |
| 754 | 754 | { |
| 755 | 755 | "spu_id": "223167", |
| 756 | - "title": "纯棉短袖T恤 夏季男装", | |
| 756 | + "title": "纯棉短袖T恤 夏季男装夏季男装", | |
| 757 | 757 | "brief": "夏季透气纯棉短袖,舒适亲肤", |
| 758 | 758 | "description": "100%棉,圆领版型,适合日常通勤与休闲穿搭。", |
| 759 | 759 | "image_url": "https://example.com/images/223167.jpg" | ... | ... |
docs/搜索API对接指南-07-微服务接口(Embedding-Reranker-Translation).md
| ... | ... | @@ -444,7 +444,7 @@ curl "http://localhost:6006/health" |
| 444 | 444 | |
| 445 | 445 | - **Base URL**: Indexer 服务地址,如 `http://localhost:6004` |
| 446 | 446 | - **路径**: `POST /indexer/enrich-content` |
| 447 | -- **说明**: 根据商品标题批量生成 `qanchors`、`semantic_attributes`、`tags`,用于拼装 ES 文档。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 | |
| 447 | +- **说明**: 根据商品标题批量生成 `qanchors`、`enriched_attributes`、`tags`,用于拼装 ES 文档。内部使用大模型(需配置 `DASHSCOPE_API_KEY`),支持多语言与 Redis 缓存;单次最多 50 条,建议批量调用以提升效率。 | |
| 448 | 448 | |
| 449 | 449 | 请求/响应格式、示例及错误码见 [-05-索引接口(Indexer)](./搜索API对接指南-05-索引接口(Indexer).md#58-内容理解字段生成接口)。 |
| 450 | 450 | ... | ... |
indexer/ANCHORS_AND_SEMANTIC_ATTRIBUTES.md
| 1 | -## qanchors 与 semantic_attributes 设计与索引逻辑说明 | |
| 1 | +## qanchors 与 enriched_attributes 设计与索引逻辑说明 | |
| 2 | 2 | |
| 3 | 3 | 本文档详细说明: |
| 4 | 4 | |
| 5 | 5 | - **锚文本字段 `qanchors.{lang}` 的作用与来源** |
| 6 | -- **语义属性字段 `semantic_attributes` 的结构、用途与写入流程** | |
| 6 | +- **语义属性字段 `enriched_attributes` 的结构、用途与写入流程** | |
| 7 | 7 | - **多语言支持策略(zh / en / de / ru / fr)** |
| 8 | 8 | - **索引阶段与 LLM 调用的集成方式** |
| 9 | 9 | |
| ... | ... | @@ -43,13 +43,13 @@ |
| 43 | 43 | - 主搜索:作为额外的全文字段参与 BM25 召回与打分(可在 `search/query_config.py` 中给一定权重); |
| 44 | 44 | - Suggestion:`suggestion/builder.py` 会从 `qanchors.{lang}` 中拆分词条作为候选(`source="qanchor"`,权重大于 `title`)。 |
| 45 | 45 | |
| 46 | -#### 1.2 `semantic_attributes`:面向过滤/分面的通用语义属性 | |
| 46 | +#### 1.2 `enriched_attributes`:面向过滤/分面的通用语义属性 | |
| 47 | 47 | |
| 48 | 48 | - **Mapping 位置**:`mappings/search_products.json`,追加的 nested 字段。 |
| 49 | 49 | - **结构**: |
| 50 | 50 | |
| 51 | 51 | ```1392:1410:/home/tw/saas-search/mappings/search_products.json |
| 52 | -"semantic_attributes": { | |
| 52 | +"enriched_attributes": { | |
| 53 | 53 | "type": "nested", |
| 54 | 54 | "properties": { |
| 55 | 55 | "lang": { "type": "keyword" }, // 语言:zh / en / de / ru / fr |
| ... | ... | @@ -199,7 +199,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 199 | 199 | self._fill_llm_attributes(doc, spu_row) |
| 200 | 200 | ``` |
| 201 | 201 | |
| 202 | -也就是说,**每个 SPU 文档默认会尝试补充 qanchors 与 semantic_attributes**。 | |
| 202 | +也就是说,**每个 SPU 文档默认会尝试补充 qanchors 与 enriched_attributes**。 | |
| 203 | 203 | |
| 204 | 204 | #### 3.2 语言选择策略 |
| 205 | 205 | |
| ... | ... | @@ -237,7 +237,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 237 | 237 | if not spu_id or not title: |
| 238 | 238 | return |
| 239 | 239 | |
| 240 | - semantic_list = doc.get("semantic_attributes") or [] | |
| 240 | + semantic_list = doc.get("enriched_attributes") or [] | |
| 241 | 241 | qanchors_obj = doc.get("qanchors") or {} |
| 242 | 242 | |
| 243 | 243 | dim_keys = [ |
| ... | ... | @@ -291,7 +291,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 291 | 291 | if qanchors_obj: |
| 292 | 292 | doc["qanchors"] = qanchors_obj |
| 293 | 293 | if semantic_list: |
| 294 | - doc["semantic_attributes"] = semantic_list | |
| 294 | + doc["enriched_attributes"] = semantic_list | |
| 295 | 295 | ``` |
| 296 | 296 | |
| 297 | 297 | 要点: |
| ... | ... | @@ -307,7 +307,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 307 | 307 | - 没有 `title`; |
| 308 | 308 | - 或者 `tenant_config.index_languages` 与 `SUPPORTED_LANGS` 没有交集; |
| 309 | 309 | - 或 `DASHSCOPE_API_KEY` 未配置 / LLM 请求报错; |
| 310 | -- 则 `_fill_llm_attributes` 会在日志中输出 `warning`,**不会抛异常**,索引流程继续,只是该 SPU 在这一轮不会得到 `qanchors` / `semantic_attributes`。 | |
| 310 | +- 则 `_fill_llm_attributes` 会在日志中输出 `warning`,**不会抛异常**,索引流程继续,只是该 SPU 在这一轮不会得到 `qanchors` / `enriched_attributes`。 | |
| 311 | 311 | |
| 312 | 312 | 这保证了整个索引服务在 LLM 不可用时表现为一个普通的“传统索引”,而不会中断。 |
| 313 | 313 | |
| ... | ... | @@ -344,13 +344,13 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 344 | 344 | ```json |
| 345 | 345 | { |
| 346 | 346 | "nested": { |
| 347 | - "path": "semantic_attributes", | |
| 347 | + "path": "enriched_attributes", | |
| 348 | 348 | "query": { |
| 349 | 349 | "bool": { |
| 350 | 350 | "must": [ |
| 351 | - { "term": { "semantic_attributes.lang": "zh" } }, | |
| 352 | - { "term": { "semantic_attributes.name": "usage_scene" } }, | |
| 353 | - { "term": { "semantic_attributes.value": "通勤" } } | |
| 351 | + { "term": { "enriched_attributes.lang": "zh" } }, | |
| 352 | + { "term": { "enriched_attributes.name": "usage_scene" } }, | |
| 353 | + { "term": { "enriched_attributes.value": "通勤" } } | |
| 354 | 354 | ] |
| 355 | 355 | } |
| 356 | 356 | } |
| ... | ... | @@ -400,7 +400,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 400 | 400 | |
| 401 | 401 | 1. **功能定位**: |
| 402 | 402 | - `qanchors.{lang}`:更好地贴近用户真实查询词,用于召回与 suggestion; |
| 403 | - - `semantic_attributes`:以结构化形式承载 LLM 抽取的语义维度,用于 filter / facet。 | |
| 403 | + - `enriched_attributes`:以结构化形式承载 LLM 抽取的语义维度,用于 filter / facet。 | |
| 404 | 404 | 2. **多语言对齐**: |
| 405 | 405 | - 完全复用租户级 `index_languages` 配置; |
| 406 | 406 | - 对每种语言单独生成锚文本与语义属性,不互相混用。 |
| ... | ... | @@ -409,7 +409,7 @@ def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> st |
| 409 | 409 | - 当 LLM/配置异常时,只是“缺少增强特征”,不影响基础搜索能力。 |
| 410 | 410 | 4. **未来扩展**: |
| 411 | 411 | - 可以在 `dim_keys` 中新增维度名(如 `style`, `benefit` 等),只要在 prompt 与解析逻辑中增加对应列即可; |
| 412 | - - 可以为 `semantic_attributes` 增加额外字段(如 `confidence`、`source`),用于更精细的控制(当前 mapping 为简单版)。 | |
| 412 | + - 可以为 `enriched_attributes` 增加额外字段(如 `confidence`、`source`),用于更精细的控制(当前 mapping 为简单版)。 | |
| 413 | 413 | |
| 414 | -如需在查询层面增加基于 `semantic_attributes` 的统一 DSL(类似 `specifications` 的过滤/分面规则),推荐在 `docs/搜索API对接指南.md` 中新增一节,并在 `search/es_query_builder.py` 里封装构造逻辑,避免前端直接拼 nested 查询。 | |
| 414 | +如需在查询层面增加基于 `enriched_attributes` 的统一 DSL(类似 `specifications` 的过滤/分面规则),推荐在 `docs/搜索API对接指南.md` 中新增一节,并在 `search/es_query_builder.py` 里封装构造逻辑,避免前端直接拼 nested 查询。 | |
| 415 | 415 | ... | ... |
indexer/document_transformer.py
| ... | ... | @@ -202,7 +202,7 @@ class SPUDocumentTransformer: |
| 202 | 202 | """ |
| 203 | 203 | 批量调用 LLM,为一批 doc 填充: |
| 204 | 204 | - qanchors.{lang} |
| 205 | - - semantic_attributes (lang/name/value) | |
| 205 | + - enriched_attributes (lang/name/value) | |
| 206 | 206 | |
| 207 | 207 | 设计目标: |
| 208 | 208 | - 尽可能攒批调用 LLM; |
| ... | ... | @@ -271,7 +271,7 @@ class SPUDocumentTransformer: |
| 271 | 271 | if row.get("error"): |
| 272 | 272 | return |
| 273 | 273 | |
| 274 | - semantic_list = doc.get("semantic_attributes") or [] | |
| 274 | + semantic_list = doc.get("enriched_attributes") or [] | |
| 275 | 275 | qanchors_obj = doc.get("qanchors") or {} |
| 276 | 276 | |
| 277 | 277 | anchor_text = str(row.get("anchor_text") or "").strip() |
| ... | ... | @@ -288,7 +288,7 @@ class SPUDocumentTransformer: |
| 288 | 288 | if qanchors_obj: |
| 289 | 289 | doc["qanchors"] = qanchors_obj |
| 290 | 290 | if semantic_list: |
| 291 | - doc["semantic_attributes"] = semantic_list | |
| 291 | + doc["enriched_attributes"] = semantic_list | |
| 292 | 292 | except Exception as e: |
| 293 | 293 | logger.warning("Failed to apply LLM row to doc (spu_id=%s, lang=%s): %s", doc.get("spu_id"), lang, e) |
| 294 | 294 | |
| ... | ... | @@ -638,7 +638,7 @@ class SPUDocumentTransformer: |
| 638 | 638 | """ |
| 639 | 639 | 调用 indexer.product_enrich.analyze_products,为当前 SPU 填充: |
| 640 | 640 | - qanchors.{lang} |
| 641 | - - semantic_attributes (lang/name/value) | |
| 641 | + - enriched_attributes (lang/name/value) | |
| 642 | 642 | """ |
| 643 | 643 | try: |
| 644 | 644 | index_langs = self.tenant_config.get("index_languages") or ["en", "zh"] |
| ... | ... | @@ -653,7 +653,7 @@ class SPUDocumentTransformer: |
| 653 | 653 | if not spu_id or not title: |
| 654 | 654 | return |
| 655 | 655 | |
| 656 | - semantic_list = doc.get("semantic_attributes") or [] | |
| 656 | + semantic_list = doc.get("enriched_attributes") or [] | |
| 657 | 657 | qanchors_obj = doc.get("qanchors") or {} |
| 658 | 658 | |
| 659 | 659 | dim_keys = [ |
| ... | ... | @@ -711,7 +711,7 @@ class SPUDocumentTransformer: |
| 711 | 711 | if qanchors_obj: |
| 712 | 712 | doc["qanchors"] = qanchors_obj |
| 713 | 713 | if semantic_list: |
| 714 | - doc["semantic_attributes"] = semantic_list | |
| 714 | + doc["enriched_attributes"] = semantic_list | |
| 715 | 715 | |
| 716 | 716 | def _transform_sku_row(self, sku_row: pd.Series, option_name_map: Dict[int, str] = None) -> Optional[Dict[str, Any]]: |
| 717 | 717 | """ | ... | ... |
mappings/search_products.json
| ... | ... | @@ -179,132 +179,6 @@ |
| 179 | 179 | } |
| 180 | 180 | } |
| 181 | 181 | }, |
| 182 | - "qanchors": { | |
| 183 | - "type": "object", | |
| 184 | - "properties": { | |
| 185 | - "zh": { | |
| 186 | - "type": "text", | |
| 187 | - "analyzer": "index_ik", | |
| 188 | - "search_analyzer": "query_ik" | |
| 189 | - }, | |
| 190 | - "en": { | |
| 191 | - "type": "text", | |
| 192 | - "analyzer": "english" | |
| 193 | - }, | |
| 194 | - "ar": { | |
| 195 | - "type": "text", | |
| 196 | - "analyzer": "arabic" | |
| 197 | - }, | |
| 198 | - "hy": { | |
| 199 | - "type": "text", | |
| 200 | - "analyzer": "armenian" | |
| 201 | - }, | |
| 202 | - "eu": { | |
| 203 | - "type": "text", | |
| 204 | - "analyzer": "basque" | |
| 205 | - }, | |
| 206 | - "pt_br": { | |
| 207 | - "type": "text", | |
| 208 | - "analyzer": "brazilian" | |
| 209 | - }, | |
| 210 | - "bg": { | |
| 211 | - "type": "text", | |
| 212 | - "analyzer": "bulgarian" | |
| 213 | - }, | |
| 214 | - "ca": { | |
| 215 | - "type": "text", | |
| 216 | - "analyzer": "catalan" | |
| 217 | - }, | |
| 218 | - "cjk": { | |
| 219 | - "type": "text", | |
| 220 | - "analyzer": "cjk" | |
| 221 | - }, | |
| 222 | - "cs": { | |
| 223 | - "type": "text", | |
| 224 | - "analyzer": "czech" | |
| 225 | - }, | |
| 226 | - "da": { | |
| 227 | - "type": "text", | |
| 228 | - "analyzer": "danish" | |
| 229 | - }, | |
| 230 | - "nl": { | |
| 231 | - "type": "text", | |
| 232 | - "analyzer": "dutch" | |
| 233 | - }, | |
| 234 | - "fi": { | |
| 235 | - "type": "text", | |
| 236 | - "analyzer": "finnish" | |
| 237 | - }, | |
| 238 | - "fr": { | |
| 239 | - "type": "text", | |
| 240 | - "analyzer": "french" | |
| 241 | - }, | |
| 242 | - "gl": { | |
| 243 | - "type": "text", | |
| 244 | - "analyzer": "galician" | |
| 245 | - }, | |
| 246 | - "de": { | |
| 247 | - "type": "text", | |
| 248 | - "analyzer": "german" | |
| 249 | - }, | |
| 250 | - "el": { | |
| 251 | - "type": "text", | |
| 252 | - "analyzer": "greek" | |
| 253 | - }, | |
| 254 | - "hi": { | |
| 255 | - "type": "text", | |
| 256 | - "analyzer": "hindi" | |
| 257 | - }, | |
| 258 | - "hu": { | |
| 259 | - "type": "text", | |
| 260 | - "analyzer": "hungarian" | |
| 261 | - }, | |
| 262 | - "id": { | |
| 263 | - "type": "text", | |
| 264 | - "analyzer": "indonesian" | |
| 265 | - }, | |
| 266 | - "it": { | |
| 267 | - "type": "text", | |
| 268 | - "analyzer": "italian" | |
| 269 | - }, | |
| 270 | - "no": { | |
| 271 | - "type": "text", | |
| 272 | - "analyzer": "norwegian" | |
| 273 | - }, | |
| 274 | - "fa": { | |
| 275 | - "type": "text", | |
| 276 | - "analyzer": "persian" | |
| 277 | - }, | |
| 278 | - "pt": { | |
| 279 | - "type": "text", | |
| 280 | - "analyzer": "portuguese" | |
| 281 | - }, | |
| 282 | - "ro": { | |
| 283 | - "type": "text", | |
| 284 | - "analyzer": "romanian" | |
| 285 | - }, | |
| 286 | - "ru": { | |
| 287 | - "type": "text", | |
| 288 | - "analyzer": "russian" | |
| 289 | - }, | |
| 290 | - "es": { | |
| 291 | - "type": "text", | |
| 292 | - "analyzer": "spanish" | |
| 293 | - }, | |
| 294 | - "sv": { | |
| 295 | - "type": "text", | |
| 296 | - "analyzer": "swedish" | |
| 297 | - }, | |
| 298 | - "tr": { | |
| 299 | - "type": "text", | |
| 300 | - "analyzer": "turkish" | |
| 301 | - }, | |
| 302 | - "th": { | |
| 303 | - "type": "text", | |
| 304 | - "analyzer": "thai" | |
| 305 | - } | |
| 306 | - } | |
| 307 | - }, | |
| 308 | 182 | "keywords": { |
| 309 | 183 | "type": "object", |
| 310 | 184 | "properties": { |
| ... | ... | @@ -315,119 +189,293 @@ |
| 315 | 189 | }, |
| 316 | 190 | "en": { |
| 317 | 191 | "type": "text", |
| 318 | - "analyzer": "english" | |
| 192 | + "analyzer": "english", | |
| 193 | + "fields": { | |
| 194 | + "keyword": { | |
| 195 | + "type": "keyword", | |
| 196 | + "normalizer": "lowercase" | |
| 197 | + } | |
| 198 | + } | |
| 319 | 199 | }, |
| 320 | 200 | "ar": { |
| 321 | 201 | "type": "text", |
| 322 | - "analyzer": "arabic" | |
| 202 | + "analyzer": "arabic", | |
| 203 | + "fields": { | |
| 204 | + "keyword": { | |
| 205 | + "type": "keyword", | |
| 206 | + "normalizer": "lowercase" | |
| 207 | + } | |
| 208 | + } | |
| 323 | 209 | }, |
| 324 | 210 | "hy": { |
| 325 | 211 | "type": "text", |
| 326 | - "analyzer": "armenian" | |
| 212 | + "analyzer": "armenian", | |
| 213 | + "fields": { | |
| 214 | + "keyword": { | |
| 215 | + "type": "keyword", | |
| 216 | + "normalizer": "lowercase" | |
| 217 | + } | |
| 218 | + } | |
| 327 | 219 | }, |
| 328 | 220 | "eu": { |
| 329 | 221 | "type": "text", |
| 330 | - "analyzer": "basque" | |
| 222 | + "analyzer": "basque", | |
| 223 | + "fields": { | |
| 224 | + "keyword": { | |
| 225 | + "type": "keyword", | |
| 226 | + "normalizer": "lowercase" | |
| 227 | + } | |
| 228 | + } | |
| 331 | 229 | }, |
| 332 | 230 | "pt_br": { |
| 333 | 231 | "type": "text", |
| 334 | - "analyzer": "brazilian" | |
| 232 | + "analyzer": "brazilian", | |
| 233 | + "fields": { | |
| 234 | + "keyword": { | |
| 235 | + "type": "keyword", | |
| 236 | + "normalizer": "lowercase" | |
| 237 | + } | |
| 238 | + } | |
| 335 | 239 | }, |
| 336 | 240 | "bg": { |
| 337 | 241 | "type": "text", |
| 338 | - "analyzer": "bulgarian" | |
| 242 | + "analyzer": "bulgarian", | |
| 243 | + "fields": { | |
| 244 | + "keyword": { | |
| 245 | + "type": "keyword", | |
| 246 | + "normalizer": "lowercase" | |
| 247 | + } | |
| 248 | + } | |
| 339 | 249 | }, |
| 340 | 250 | "ca": { |
| 341 | 251 | "type": "text", |
| 342 | - "analyzer": "catalan" | |
| 252 | + "analyzer": "catalan", | |
| 253 | + "fields": { | |
| 254 | + "keyword": { | |
| 255 | + "type": "keyword", | |
| 256 | + "normalizer": "lowercase" | |
| 257 | + } | |
| 258 | + } | |
| 343 | 259 | }, |
| 344 | 260 | "cjk": { |
| 345 | 261 | "type": "text", |
| 346 | - "analyzer": "cjk" | |
| 262 | + "analyzer": "cjk", | |
| 263 | + "fields": { | |
| 264 | + "keyword": { | |
| 265 | + "type": "keyword", | |
| 266 | + "normalizer": "lowercase" | |
| 267 | + } | |
| 268 | + } | |
| 347 | 269 | }, |
| 348 | 270 | "cs": { |
| 349 | 271 | "type": "text", |
| 350 | - "analyzer": "czech" | |
| 272 | + "analyzer": "czech", | |
| 273 | + "fields": { | |
| 274 | + "keyword": { | |
| 275 | + "type": "keyword", | |
| 276 | + "normalizer": "lowercase" | |
| 277 | + } | |
| 278 | + } | |
| 351 | 279 | }, |
| 352 | 280 | "da": { |
| 353 | 281 | "type": "text", |
| 354 | - "analyzer": "danish" | |
| 282 | + "analyzer": "danish", | |
| 283 | + "fields": { | |
| 284 | + "keyword": { | |
| 285 | + "type": "keyword", | |
| 286 | + "normalizer": "lowercase" | |
| 287 | + } | |
| 288 | + } | |
| 355 | 289 | }, |
| 356 | 290 | "nl": { |
| 357 | 291 | "type": "text", |
| 358 | - "analyzer": "dutch" | |
| 292 | + "analyzer": "dutch", | |
| 293 | + "fields": { | |
| 294 | + "keyword": { | |
| 295 | + "type": "keyword", | |
| 296 | + "normalizer": "lowercase" | |
| 297 | + } | |
| 298 | + } | |
| 359 | 299 | }, |
| 360 | 300 | "fi": { |
| 361 | 301 | "type": "text", |
| 362 | - "analyzer": "finnish" | |
| 302 | + "analyzer": "finnish", | |
| 303 | + "fields": { | |
| 304 | + "keyword": { | |
| 305 | + "type": "keyword", | |
| 306 | + "normalizer": "lowercase" | |
| 307 | + } | |
| 308 | + } | |
| 363 | 309 | }, |
| 364 | 310 | "fr": { |
| 365 | 311 | "type": "text", |
| 366 | - "analyzer": "french" | |
| 312 | + "analyzer": "french", | |
| 313 | + "fields": { | |
| 314 | + "keyword": { | |
| 315 | + "type": "keyword", | |
| 316 | + "normalizer": "lowercase" | |
| 317 | + } | |
| 318 | + } | |
| 367 | 319 | }, |
| 368 | 320 | "gl": { |
| 369 | 321 | "type": "text", |
| 370 | - "analyzer": "galician" | |
| 322 | + "analyzer": "galician", | |
| 323 | + "fields": { | |
| 324 | + "keyword": { | |
| 325 | + "type": "keyword", | |
| 326 | + "normalizer": "lowercase" | |
| 327 | + } | |
| 328 | + } | |
| 371 | 329 | }, |
| 372 | 330 | "de": { |
| 373 | 331 | "type": "text", |
| 374 | - "analyzer": "german" | |
| 332 | + "analyzer": "german", | |
| 333 | + "fields": { | |
| 334 | + "keyword": { | |
| 335 | + "type": "keyword", | |
| 336 | + "normalizer": "lowercase" | |
| 337 | + } | |
| 338 | + } | |
| 375 | 339 | }, |
| 376 | 340 | "el": { |
| 377 | 341 | "type": "text", |
| 378 | - "analyzer": "greek" | |
| 342 | + "analyzer": "greek", | |
| 343 | + "fields": { | |
| 344 | + "keyword": { | |
| 345 | + "type": "keyword", | |
| 346 | + "normalizer": "lowercase" | |
| 347 | + } | |
| 348 | + } | |
| 379 | 349 | }, |
| 380 | 350 | "hi": { |
| 381 | 351 | "type": "text", |
| 382 | - "analyzer": "hindi" | |
| 352 | + "analyzer": "hindi", | |
| 353 | + "fields": { | |
| 354 | + "keyword": { | |
| 355 | + "type": "keyword", | |
| 356 | + "normalizer": "lowercase" | |
| 357 | + } | |
| 358 | + } | |
| 383 | 359 | }, |
| 384 | 360 | "hu": { |
| 385 | 361 | "type": "text", |
| 386 | - "analyzer": "hungarian" | |
| 362 | + "analyzer": "hungarian", | |
| 363 | + "fields": { | |
| 364 | + "keyword": { | |
| 365 | + "type": "keyword", | |
| 366 | + "normalizer": "lowercase" | |
| 367 | + } | |
| 368 | + } | |
| 387 | 369 | }, |
| 388 | 370 | "id": { |
| 389 | 371 | "type": "text", |
| 390 | - "analyzer": "indonesian" | |
| 372 | + "analyzer": "indonesian", | |
| 373 | + "fields": { | |
| 374 | + "keyword": { | |
| 375 | + "type": "keyword", | |
| 376 | + "normalizer": "lowercase" | |
| 377 | + } | |
| 378 | + } | |
| 391 | 379 | }, |
| 392 | 380 | "it": { |
| 393 | 381 | "type": "text", |
| 394 | - "analyzer": "italian" | |
| 382 | + "analyzer": "italian", | |
| 383 | + "fields": { | |
| 384 | + "keyword": { | |
| 385 | + "type": "keyword", | |
| 386 | + "normalizer": "lowercase" | |
| 387 | + } | |
| 388 | + } | |
| 395 | 389 | }, |
| 396 | 390 | "no": { |
| 397 | 391 | "type": "text", |
| 398 | - "analyzer": "norwegian" | |
| 392 | + "analyzer": "norwegian", | |
| 393 | + "fields": { | |
| 394 | + "keyword": { | |
| 395 | + "type": "keyword", | |
| 396 | + "normalizer": "lowercase" | |
| 397 | + } | |
| 398 | + } | |
| 399 | 399 | }, |
| 400 | 400 | "fa": { |
| 401 | 401 | "type": "text", |
| 402 | - "analyzer": "persian" | |
| 402 | + "analyzer": "persian", | |
| 403 | + "fields": { | |
| 404 | + "keyword": { | |
| 405 | + "type": "keyword", | |
| 406 | + "normalizer": "lowercase" | |
| 407 | + } | |
| 408 | + } | |
| 403 | 409 | }, |
| 404 | 410 | "pt": { |
| 405 | 411 | "type": "text", |
| 406 | - "analyzer": "portuguese" | |
| 412 | + "analyzer": "portuguese", | |
| 413 | + "fields": { | |
| 414 | + "keyword": { | |
| 415 | + "type": "keyword", | |
| 416 | + "normalizer": "lowercase" | |
| 417 | + } | |
| 418 | + } | |
| 407 | 419 | }, |
| 408 | 420 | "ro": { |
| 409 | 421 | "type": "text", |
| 410 | - "analyzer": "romanian" | |
| 422 | + "analyzer": "romanian", | |
| 423 | + "fields": { | |
| 424 | + "keyword": { | |
| 425 | + "type": "keyword", | |
| 426 | + "normalizer": "lowercase" | |
| 427 | + } | |
| 428 | + } | |
| 411 | 429 | }, |
| 412 | 430 | "ru": { |
| 413 | 431 | "type": "text", |
| 414 | - "analyzer": "russian" | |
| 432 | + "analyzer": "russian", | |
| 433 | + "fields": { | |
| 434 | + "keyword": { | |
| 435 | + "type": "keyword", | |
| 436 | + "normalizer": "lowercase" | |
| 437 | + } | |
| 438 | + } | |
| 415 | 439 | }, |
| 416 | 440 | "es": { |
| 417 | 441 | "type": "text", |
| 418 | - "analyzer": "spanish" | |
| 442 | + "analyzer": "spanish", | |
| 443 | + "fields": { | |
| 444 | + "keyword": { | |
| 445 | + "type": "keyword", | |
| 446 | + "normalizer": "lowercase" | |
| 447 | + } | |
| 448 | + } | |
| 419 | 449 | }, |
| 420 | 450 | "sv": { |
| 421 | 451 | "type": "text", |
| 422 | - "analyzer": "swedish" | |
| 452 | + "analyzer": "swedish", | |
| 453 | + "fields": { | |
| 454 | + "keyword": { | |
| 455 | + "type": "keyword", | |
| 456 | + "normalizer": "lowercase" | |
| 457 | + } | |
| 458 | + } | |
| 423 | 459 | }, |
| 424 | 460 | "tr": { |
| 425 | 461 | "type": "text", |
| 426 | - "analyzer": "turkish" | |
| 462 | + "analyzer": "turkish", | |
| 463 | + "fields": { | |
| 464 | + "keyword": { | |
| 465 | + "type": "keyword", | |
| 466 | + "normalizer": "lowercase" | |
| 467 | + } | |
| 468 | + } | |
| 427 | 469 | }, |
| 428 | 470 | "th": { |
| 429 | 471 | "type": "text", |
| 430 | - "analyzer": "thai" | |
| 472 | + "analyzer": "thai", | |
| 473 | + "fields": { | |
| 474 | + "keyword": { | |
| 475 | + "type": "keyword", | |
| 476 | + "normalizer": "lowercase" | |
| 477 | + } | |
| 478 | + } | |
| 431 | 479 | } |
| 432 | 480 | } |
| 433 | 481 | }, |
| ... | ... | @@ -983,9 +1031,6 @@ |
| 983 | 1031 | } |
| 984 | 1032 | } |
| 985 | 1033 | }, |
| 986 | - "tags": { | |
| 987 | - "type": "keyword" | |
| 988 | - }, | |
| 989 | 1034 | "image_url": { |
| 990 | 1035 | "type": "keyword", |
| 991 | 1036 | "index": false |
| ... | ... | @@ -1012,21 +1057,6 @@ |
| 1012 | 1057 | } |
| 1013 | 1058 | } |
| 1014 | 1059 | }, |
| 1015 | - "image_embedding_512": { | |
| 1016 | - "type": "nested", | |
| 1017 | - "properties": { | |
| 1018 | - "vector": { | |
| 1019 | - "type": "dense_vector", | |
| 1020 | - "dims": 512, | |
| 1021 | - "index": true, | |
| 1022 | - "similarity": "dot_product", | |
| 1023 | - "element_type": "bfloat16" | |
| 1024 | - }, | |
| 1025 | - "url": { | |
| 1026 | - "type": "text" | |
| 1027 | - } | |
| 1028 | - } | |
| 1029 | - }, | |
| 1030 | 1060 | "category_path": { |
| 1031 | 1061 | "type": "object", |
| 1032 | 1062 | "properties": { |
| ... | ... | @@ -1279,6 +1309,46 @@ |
| 1279 | 1309 | } |
| 1280 | 1310 | } |
| 1281 | 1311 | }, |
| 1312 | + "qanchors": { | |
| 1313 | + "type": "object", | |
| 1314 | + "properties": { | |
| 1315 | + "zh": { | |
| 1316 | + "type": "text", | |
| 1317 | + "analyzer": "index_ik", | |
| 1318 | + "search_analyzer": "query_ik" | |
| 1319 | + }, | |
| 1320 | + "en": { | |
| 1321 | + "type": "text", | |
| 1322 | + "analyzer": "english" | |
| 1323 | + } | |
| 1324 | + } | |
| 1325 | + }, | |
| 1326 | + "tags": { | |
| 1327 | + "type": "object", | |
| 1328 | + "properties": { | |
| 1329 | + "zh": { | |
| 1330 | + "type": "text", | |
| 1331 | + "analyzer": "index_ik", | |
| 1332 | + "search_analyzer": "query_ik", | |
| 1333 | + "fields": { | |
| 1334 | + "keyword": { | |
| 1335 | + "type": "keyword", | |
| 1336 | + "normalizer": "lowercase" | |
| 1337 | + } | |
| 1338 | + } | |
| 1339 | + }, | |
| 1340 | + "en": { | |
| 1341 | + "type": "text", | |
| 1342 | + "analyzer": "english", | |
| 1343 | + "fields": { | |
| 1344 | + "keyword": { | |
| 1345 | + "type": "keyword", | |
| 1346 | + "normalizer": "lowercase" | |
| 1347 | + } | |
| 1348 | + } | |
| 1349 | + } | |
| 1350 | + } | |
| 1351 | + }, | |
| 1282 | 1352 | "category_id": { |
| 1283 | 1353 | "type": "keyword" |
| 1284 | 1354 | }, |
| ... | ... | @@ -1307,7 +1377,64 @@ |
| 1307 | 1377 | "type": "keyword" |
| 1308 | 1378 | }, |
| 1309 | 1379 | "value": { |
| 1380 | + "type": "object", | |
| 1381 | + "properties": { | |
| 1382 | + "zh": { | |
| 1383 | + "type": "text", | |
| 1384 | + "analyzer": "index_ik", | |
| 1385 | + "search_analyzer": "query_ik", | |
| 1386 | + "fields": { | |
| 1387 | + "keyword": { | |
| 1388 | + "type": "keyword", | |
| 1389 | + "normalizer": "lowercase" | |
| 1390 | + } | |
| 1391 | + } | |
| 1392 | + }, | |
| 1393 | + "en": { | |
| 1394 | + "type": "text", | |
| 1395 | + "analyzer": "english", | |
| 1396 | + "fields": { | |
| 1397 | + "keyword": { | |
| 1398 | + "type": "keyword", | |
| 1399 | + "normalizer": "lowercase" | |
| 1400 | + } | |
| 1401 | + } | |
| 1402 | + } | |
| 1403 | + } | |
| 1404 | + } | |
| 1405 | + } | |
| 1406 | + }, | |
| 1407 | + "enriched_attributes": { | |
| 1408 | + "type": "nested", | |
| 1409 | + "properties": { | |
| 1410 | + "name": { | |
| 1310 | 1411 | "type": "keyword" |
| 1412 | + }, | |
| 1413 | + "value": { | |
| 1414 | + "type": "object", | |
| 1415 | + "properties": { | |
| 1416 | + "zh": { | |
| 1417 | + "type": "text", | |
| 1418 | + "analyzer": "index_ik", | |
| 1419 | + "search_analyzer": "query_ik", | |
| 1420 | + "fields": { | |
| 1421 | + "keyword": { | |
| 1422 | + "type": "keyword", | |
| 1423 | + "normalizer": "lowercase" | |
| 1424 | + } | |
| 1425 | + } | |
| 1426 | + }, | |
| 1427 | + "en": { | |
| 1428 | + "type": "text", | |
| 1429 | + "analyzer": "english", | |
| 1430 | + "fields": { | |
| 1431 | + "keyword": { | |
| 1432 | + "type": "keyword", | |
| 1433 | + "normalizer": "lowercase" | |
| 1434 | + } | |
| 1435 | + } | |
| 1436 | + } | |
| 1437 | + } | |
| 1311 | 1438 | } |
| 1312 | 1439 | } |
| 1313 | 1440 | }, |
| ... | ... | @@ -1321,13 +1448,82 @@ |
| 1321 | 1448 | "type": "keyword" |
| 1322 | 1449 | }, |
| 1323 | 1450 | "option1_values": { |
| 1324 | - "type": "keyword" | |
| 1451 | + "type": "object", | |
| 1452 | + "properties": { | |
| 1453 | + "zh": { | |
| 1454 | + "type": "text", | |
| 1455 | + "analyzer": "index_ik", | |
| 1456 | + "search_analyzer": "query_ik", | |
| 1457 | + "fields": { | |
| 1458 | + "keyword": { | |
| 1459 | + "type": "keyword", | |
| 1460 | + "normalizer": "lowercase" | |
| 1461 | + } | |
| 1462 | + } | |
| 1463 | + }, | |
| 1464 | + "en": { | |
| 1465 | + "type": "text", | |
| 1466 | + "analyzer": "english", | |
| 1467 | + "fields": { | |
| 1468 | + "keyword": { | |
| 1469 | + "type": "keyword", | |
| 1470 | + "normalizer": "lowercase" | |
| 1471 | + } | |
| 1472 | + } | |
| 1473 | + } | |
| 1474 | + } | |
| 1325 | 1475 | }, |
| 1326 | 1476 | "option2_values": { |
| 1327 | - "type": "keyword" | |
| 1477 | + "type": "object", | |
| 1478 | + "properties": { | |
| 1479 | + "zh": { | |
| 1480 | + "type": "text", | |
| 1481 | + "analyzer": "index_ik", | |
| 1482 | + "search_analyzer": "query_ik", | |
| 1483 | + "fields": { | |
| 1484 | + "keyword": { | |
| 1485 | + "type": "keyword", | |
| 1486 | + "normalizer": "lowercase" | |
| 1487 | + } | |
| 1488 | + } | |
| 1489 | + }, | |
| 1490 | + "en": { | |
| 1491 | + "type": "text", | |
| 1492 | + "analyzer": "english", | |
| 1493 | + "fields": { | |
| 1494 | + "keyword": { | |
| 1495 | + "type": "keyword", | |
| 1496 | + "normalizer": "lowercase" | |
| 1497 | + } | |
| 1498 | + } | |
| 1499 | + } | |
| 1500 | + } | |
| 1328 | 1501 | }, |
| 1329 | 1502 | "option3_values": { |
| 1330 | - "type": "keyword" | |
| 1503 | + "type": "object", | |
| 1504 | + "properties": { | |
| 1505 | + "zh": { | |
| 1506 | + "type": "text", | |
| 1507 | + "analyzer": "index_ik", | |
| 1508 | + "search_analyzer": "query_ik", | |
| 1509 | + "fields": { | |
| 1510 | + "keyword": { | |
| 1511 | + "type": "keyword", | |
| 1512 | + "normalizer": "lowercase" | |
| 1513 | + } | |
| 1514 | + } | |
| 1515 | + }, | |
| 1516 | + "en": { | |
| 1517 | + "type": "text", | |
| 1518 | + "analyzer": "english", | |
| 1519 | + "fields": { | |
| 1520 | + "keyword": { | |
| 1521 | + "type": "keyword", | |
| 1522 | + "normalizer": "lowercase" | |
| 1523 | + } | |
| 1524 | + } | |
| 1525 | + } | |
| 1526 | + } | |
| 1331 | 1527 | }, |
| 1332 | 1528 | "min_price": { |
| 1333 | 1529 | "type": "float" |
| ... | ... | @@ -1391,20 +1587,6 @@ |
| 1391 | 1587 | "index": false |
| 1392 | 1588 | } |
| 1393 | 1589 | } |
| 1394 | - }, | |
| 1395 | - "semantic_attributes": { | |
| 1396 | - "type": "nested", | |
| 1397 | - "properties": { | |
| 1398 | - "lang": { | |
| 1399 | - "type": "keyword" | |
| 1400 | - }, | |
| 1401 | - "name": { | |
| 1402 | - "type": "keyword" | |
| 1403 | - }, | |
| 1404 | - "value": { | |
| 1405 | - "type": "keyword" | |
| 1406 | - } | |
| 1407 | - } | |
| 1408 | 1590 | } |
| 1409 | 1591 | } |
| 1410 | 1592 | } | ... | ... |
scripts/es_debug_search.py
| ... | ... | @@ -279,7 +279,9 @@ def _run_es( |
| 279 | 279 | body: Dict[str, Any], |
| 280 | 280 | size: int, |
| 281 | 281 | ) -> List[Dict[str, Any]]: |
| 282 | - resp = es.search(index=index_name, body=body, size=size) | |
| 282 | + # Avoid passing size= alongside body= (deprecated in elasticsearch-py). | |
| 283 | + payload = {**body, "size": size} | |
| 284 | + resp = es.search(index=index_name, body=payload) | |
| 283 | 285 | if hasattr(resp, "body"): |
| 284 | 286 | payload = resp.body |
| 285 | 287 | else: | ... | ... |