Blame view

tests/test_llm_enrichment_batch_fill.py 2.53 KB
be3f0d46   tangwang   /indexer/enrich-c...
1
2
3
4
5
6
7
8
9
  from __future__ import annotations
  
  from typing import Any, Dict, List
  
  import pandas as pd
  
  from indexer.document_transformer import SPUDocumentTransformer
  
  
d350861f   tangwang   索引结构修改
10
  def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
be3f0d46   tangwang   /indexer/enrich-c...
11
12
      seen_calls: List[Dict[str, Any]] = []
  
048631be   tangwang   1. 新增说明文档《product...
13
14
15
16
17
18
19
20
      def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None):
          seen_calls.append(
              {
                  "n": len(items),
                  "tenant_id": tenant_id,
                  "category_taxonomy_profile": category_taxonomy_profile,
              }
          )
be3f0d46   tangwang   /indexer/enrich-c...
21
22
          return [
              {
d350861f   tangwang   索引结构修改
23
24
25
26
27
                  "id": item["id"],
                  "qanchors": {
                      "zh": [f"zh-anchor-{item['id']}"],
                      "en": [f"en-anchor-{item['id']}"],
                  },
36516857   tangwang   feat(product_enri...
28
                  "enriched_tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
d350861f   tangwang   索引结构修改
29
                  "enriched_attributes": [
80f1e036   tangwang   enriched_attribut...
30
                      {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
d350861f   tangwang   索引结构修改
31
                  ],
36516857   tangwang   feat(product_enri...
32
33
34
                  "enriched_taxonomy_attributes": [
                      {"name": "Product Type", "value": {"zh": ["连衣裙"], "en": ["dress"]}},
                  ],
be3f0d46   tangwang   /indexer/enrich-c...
35
              }
d350861f   tangwang   索引结构修改
36
              for item in items
be3f0d46   tangwang   /indexer/enrich-c...
37
38
39
40
          ]
  
      import indexer.document_transformer as doc_tr
  
d350861f   tangwang   索引结构修改
41
      monkeypatch.setattr(doc_tr, "build_index_content_fields", _fake_build_index_content_fields)
be3f0d46   tangwang   /indexer/enrich-c...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  
      transformer = SPUDocumentTransformer(
          category_id_to_name={},
          searchable_option_dimensions=[],
          tenant_config={"index_languages": ["zh", "en"], "primary_language": "zh"},
          translator=None,
          encoder=None,
          enable_title_embedding=False,
          image_encoder=None,
          enable_image_embedding=False,
      )
  
      docs: List[Dict[str, Any]] = []
      rows: List[pd.Series] = []
      for i in range(45):
          docs.append({"tenant_id": "162", "spu_id": str(i)})
          rows.append(pd.Series({"id": i, "title": f"title-{i}"}))
  
      transformer.fill_llm_attributes_batch(docs, rows)
  
048631be   tangwang   1. 新增说明文档《product...
62
      assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}]
be3f0d46   tangwang   /indexer/enrich-c...
63
  
d350861f   tangwang   索引结构修改
64
65
      assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"]
      assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
36516857   tangwang   feat(product_enri...
66
67
      assert docs[0]["enriched_tags"]["zh"] == ["t1", "t2"]
      assert docs[0]["enriched_tags"]["en"] == ["t1", "t2"]
80f1e036   tangwang   enriched_attribut...
68
      assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
36516857   tangwang   feat(product_enri...
69
70
71
72
      assert {
          "name": "Product Type",
          "value": {"zh": ["连衣裙"], "en": ["dress"]},
      } in docs[0]["enriched_taxonomy_attributes"]