be3f0d46
tangwang
/indexer/enrich-c...
|
1
2
3
4
5
6
7
8
9
|
from __future__ import annotations
from typing import Any, Dict, List
import pandas as pd
from indexer.document_transformer import SPUDocumentTransformer
|
d350861f
tangwang
索引结构修改
|
10
|
def test_fill_llm_attributes_batch_uses_product_enrich_helper(monkeypatch):
|
be3f0d46
tangwang
/indexer/enrich-c...
|
11
12
|
seen_calls: List[Dict[str, Any]] = []
|
048631be
tangwang
1. 新增说明文档《product...
|
13
14
15
16
17
18
19
20
|
def _fake_build_index_content_fields(items, tenant_id=None, category_taxonomy_profile=None):
seen_calls.append(
{
"n": len(items),
"tenant_id": tenant_id,
"category_taxonomy_profile": category_taxonomy_profile,
}
)
|
be3f0d46
tangwang
/indexer/enrich-c...
|
21
22
|
return [
{
|
d350861f
tangwang
索引结构修改
|
23
24
25
26
27
|
"id": item["id"],
"qanchors": {
"zh": [f"zh-anchor-{item['id']}"],
"en": [f"en-anchor-{item['id']}"],
},
|
36516857
tangwang
feat(product_enri...
|
28
|
"enriched_tags": {"zh": ["t1", "t2"], "en": ["t1", "t2"]},
|
d350861f
tangwang
索引结构修改
|
29
|
"enriched_attributes": [
|
80f1e036
tangwang
enriched_attribut...
|
30
|
{"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}},
|
d350861f
tangwang
索引结构修改
|
31
|
],
|
36516857
tangwang
feat(product_enri...
|
32
33
34
|
"enriched_taxonomy_attributes": [
{"name": "Product Type", "value": {"zh": ["连衣裙"], "en": ["dress"]}},
],
|
be3f0d46
tangwang
/indexer/enrich-c...
|
35
|
}
|
d350861f
tangwang
索引结构修改
|
36
|
for item in items
|
be3f0d46
tangwang
/indexer/enrich-c...
|
37
38
39
40
|
]
import indexer.document_transformer as doc_tr
|
d350861f
tangwang
索引结构修改
|
41
|
monkeypatch.setattr(doc_tr, "build_index_content_fields", _fake_build_index_content_fields)
|
be3f0d46
tangwang
/indexer/enrich-c...
|
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
transformer = SPUDocumentTransformer(
category_id_to_name={},
searchable_option_dimensions=[],
tenant_config={"index_languages": ["zh", "en"], "primary_language": "zh"},
translator=None,
encoder=None,
enable_title_embedding=False,
image_encoder=None,
enable_image_embedding=False,
)
docs: List[Dict[str, Any]] = []
rows: List[pd.Series] = []
for i in range(45):
docs.append({"tenant_id": "162", "spu_id": str(i)})
rows.append(pd.Series({"id": i, "title": f"title-{i}"}))
transformer.fill_llm_attributes_batch(docs, rows)
|
048631be
tangwang
1. 新增说明文档《product...
|
62
|
assert seen_calls == [{"n": 45, "tenant_id": "162", "category_taxonomy_profile": "apparel"}]
|
be3f0d46
tangwang
/indexer/enrich-c...
|
63
|
|
d350861f
tangwang
索引结构修改
|
64
65
|
assert docs[0]["qanchors"]["zh"] == ["zh-anchor-0"]
assert docs[0]["qanchors"]["en"] == ["en-anchor-0"]
|
36516857
tangwang
feat(product_enri...
|
66
67
|
assert docs[0]["enriched_tags"]["zh"] == ["t1", "t2"]
assert docs[0]["enriched_tags"]["en"] == ["t1", "t2"]
|
80f1e036
tangwang
enriched_attribut...
|
68
|
assert {"name": "tags", "value": {"zh": ["t1"], "en": ["t1"]}} in docs[0]["enriched_attributes"]
|
36516857
tangwang
feat(product_enri...
|
69
70
71
72
|
assert {
"name": "Product Type",
"value": {"zh": ["连衣裙"], "en": ["dress"]},
} in docs[0]["enriched_taxonomy_attributes"]
|