Blame view

tests/test_process_products_batching.py 3.24 KB
be3f0d46   tangwang   /indexer/enrich-c...
1
2
3
4
  from __future__ import annotations
  
  from typing import Any, Dict, List
  
6f7840cf   tangwang   refactor: rename ...
5
  import indexer.product_enrich as process_products
be3f0d46   tangwang   /indexer/enrich-c...
6
7
8
9
10
11
12
13
14
15
  
  
  def _mk_products(n: int) -> List[Dict[str, str]]:
      return [{"id": str(i), "title": f"title-{i}"} for i in range(n)]
  
  
  def test_analyze_products_caps_batch_size_to_20(monkeypatch):
      monkeypatch.setattr(process_products, "API_KEY", "fake-key")
      seen_batch_sizes: List[int] = []
  
36516857   tangwang   feat(product_enri...
16
17
18
19
20
      def _fake_process_batch(
          batch_data: List[Dict[str, str]],
          batch_num: int,
          target_lang: str = "zh",
          analysis_kind: str = "content",
2703b6ea   tangwang   refactor(indexer)...
21
          category_taxonomy_profile=None,
36516857   tangwang   feat(product_enri...
22
23
      ):
          assert analysis_kind == "content"
2703b6ea   tangwang   refactor(indexer)...
24
          assert category_taxonomy_profile is None
be3f0d46   tangwang   /indexer/enrich-c...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
          seen_batch_sizes.append(len(batch_data))
          return [
              {
                  "id": item["id"],
                  "lang": target_lang,
                  "title_input": item["title"],
                  "title": "",
                  "category_path": "",
                  "tags": "",
                  "target_audience": "",
                  "usage_scene": "",
                  "season": "",
                  "key_attributes": "",
                  "material": "",
                  "features": "",
be3f0d46   tangwang   /indexer/enrich-c...
40
41
42
43
44
45
                  "anchor_text": "",
              }
              for item in batch_data
          ]
  
      monkeypatch.setattr(process_products, "process_batch", _fake_process_batch)
36516857   tangwang   feat(product_enri...
46
      monkeypatch.setattr(process_products, "_set_cached_analysis_result", lambda *args, **kwargs: None)
be3f0d46   tangwang   /indexer/enrich-c...
47
48
49
50
51
52
53
54
55
  
      out = process_products.analyze_products(
          products=_mk_products(45),
          target_lang="zh",
          batch_size=200,
          tenant_id="162",
      )
  
      assert len(out) == 45
41f0b2e9   tangwang   product_enrich支持并发
56
57
      # 并发执行时 batch 调用顺序可能变化,因此校验“批大小集合”而不是严格顺序
      assert sorted(seen_batch_sizes) == [5, 20, 20]
be3f0d46   tangwang   /indexer/enrich-c...
58
59
60
61
62
63
  
  
  def test_analyze_products_uses_min_batch_size_1(monkeypatch):
      monkeypatch.setattr(process_products, "API_KEY", "fake-key")
      seen_batch_sizes: List[int] = []
  
36516857   tangwang   feat(product_enri...
64
65
66
67
68
      def _fake_process_batch(
          batch_data: List[Dict[str, str]],
          batch_num: int,
          target_lang: str = "zh",
          analysis_kind: str = "content",
2703b6ea   tangwang   refactor(indexer)...
69
          category_taxonomy_profile=None,
36516857   tangwang   feat(product_enri...
70
71
      ):
          assert analysis_kind == "content"
2703b6ea   tangwang   refactor(indexer)...
72
          assert category_taxonomy_profile is None
be3f0d46   tangwang   /indexer/enrich-c...
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
          seen_batch_sizes.append(len(batch_data))
          return [
              {
                  "id": item["id"],
                  "lang": target_lang,
                  "title_input": item["title"],
                  "title": "",
                  "category_path": "",
                  "tags": "",
                  "target_audience": "",
                  "usage_scene": "",
                  "season": "",
                  "key_attributes": "",
                  "material": "",
                  "features": "",
be3f0d46   tangwang   /indexer/enrich-c...
88
89
90
91
92
93
                  "anchor_text": "",
              }
              for item in batch_data
          ]
  
      monkeypatch.setattr(process_products, "process_batch", _fake_process_batch)
36516857   tangwang   feat(product_enri...
94
      monkeypatch.setattr(process_products, "_set_cached_analysis_result", lambda *args, **kwargs: None)
be3f0d46   tangwang   /indexer/enrich-c...
95
96
97
98
99
100
101
102
103
104
  
      out = process_products.analyze_products(
          products=_mk_products(3),
          target_lang="zh",
          batch_size=0,
          tenant_id="162",
      )
  
      assert len(out) == 3
      assert seen_batch_sizes == [1, 1, 1]