Commit 76e1f08890671791eb5c646c9f1e1c47c9f1e9dd

Authored by tangwang
1 parent a73a751f

1. 减少一列sell points。有时候大模型输出会将这最后两列混淆,因此干脆去掉一个

2. 优化缓存,缓存粒度为商品级,每次只对batch中未cache的重新计算;key使用每个商品输入的hash
indexer/ANCHORS_AND_SEMANTIC_ATTRIBUTES.md
@@ -135,7 +135,6 @@ SUPPORTED_LANGS = set(LANG_LABELS.keys()) @@ -135,7 +135,6 @@ SUPPORTED_LANGS = set(LANG_LABELS.keys())
135 "key_attributes": "<逗号分隔的关键属性>", 135 "key_attributes": "<逗号分隔的关键属性>",
136 "material": "<逗号分隔的材质说明>", 136 "material": "<逗号分隔的材质说明>",
137 "features": "<逗号分隔的功能特点>", 137 "features": "<逗号分隔的功能特点>",
138 - "selling_points": "<一句话卖点>",  
139 "anchor_text": "<逗号分隔的锚文本短语>", 138 "anchor_text": "<逗号分隔的锚文本短语>",
140 # 若发生错误,还会附带: 139 # 若发生错误,还会附带:
141 # "error": "<异常信息>" 140 # "error": "<异常信息>"
indexer/product_enrich.py
@@ -425,8 +425,7 @@ def parse_markdown_table(markdown_content: str) -&gt; List[Dict[str, str]]: @@ -425,8 +425,7 @@ def parse_markdown_table(markdown_content: str) -&gt; List[Dict[str, str]]:
425 "key_attributes": parts[7] if len(parts) > 7 else "", # 关键属性 425 "key_attributes": parts[7] if len(parts) > 7 else "", # 关键属性
426 "material": parts[8] if len(parts) > 8 else "", # 材质说明 426 "material": parts[8] if len(parts) > 8 else "", # 材质说明
427 "features": parts[9] if len(parts) > 9 else "", # 功能特点 427 "features": parts[9] if len(parts) > 9 else "", # 功能特点
428 - "selling_points": parts[10] if len(parts) > 10 else "", # 商品卖点  
429 - "anchor_text": parts[11] if len(parts) > 11 else "", # 锚文本 428 + "anchor_text": parts[10] if len(parts) > 10 else "", # 锚文本
430 } 429 }
431 data.append(row) 430 data.append(row)
432 431
@@ -503,7 +502,6 @@ def process_batch( @@ -503,7 +502,6 @@ def process_batch(
503 "key_attributes": "", 502 "key_attributes": "",
504 "material": "", 503 "material": "",
505 "features": "", 504 "features": "",
506 - "selling_points": "",  
507 "anchor_text": "", 505 "anchor_text": "",
508 "error": f"prompt_creation_failed: unsupported target_lang={target_lang}", 506 "error": f"prompt_creation_failed: unsupported target_lang={target_lang}",
509 } 507 }
@@ -544,7 +542,6 @@ def process_batch( @@ -544,7 +542,6 @@ def process_batch(
544 "key_attributes": parsed_item.get("key_attributes", ""), # 关键属性 542 "key_attributes": parsed_item.get("key_attributes", ""), # 关键属性
545 "material": parsed_item.get("material", ""), # 材质说明 543 "material": parsed_item.get("material", ""), # 材质说明
546 "features": parsed_item.get("features", ""), # 功能特点 544 "features": parsed_item.get("features", ""), # 功能特点
547 - "selling_points": parsed_item.get("selling_points", ""), # 商品卖点  
548 "anchor_text": parsed_item.get("anchor_text", ""), # 锚文本 545 "anchor_text": parsed_item.get("anchor_text", ""), # 锚文本
549 } 546 }
550 results_with_ids.append(result) 547 results_with_ids.append(result)
@@ -586,7 +583,6 @@ def process_batch( @@ -586,7 +583,6 @@ def process_batch(
586 "key_attributes": "", 583 "key_attributes": "",
587 "material": "", 584 "material": "",
588 "features": "", 585 "features": "",
589 - "selling_points": "",  
590 "anchor_text": "", 586 "anchor_text": "",
591 "error": str(e), 587 "error": str(e),
592 } 588 }
@@ -614,39 +610,48 @@ def analyze_products( @@ -614,39 +610,48 @@ def analyze_products(
614 if not products: 610 if not products:
615 return [] 611 return []
616 612
617 - # 简单路径:索引阶段通常 batch_size=1,这里优先做单条缓存命中  
618 - if len(products) == 1:  
619 - p = products[0]  
620 - title = str(p.get("title") or "").strip()  
621 - if title:  
622 - cached = _get_cached_anchor_result(title, target_lang, tenant_id=tenant_id)  
623 - if cached:  
624 - logger.info(  
625 - f"[analyze_products] Cache hit for title='{title[:50]}...', "  
626 - f"lang={target_lang}, tenant_id={tenant_id or 'global'}"  
627 - )  
628 - return [cached] 613 + results_by_index: List[Optional[Dict[str, Any]]] = [None] * len(products)
  614 + uncached_items: List[Tuple[int, Dict[str, str]]] = []
  615 +
  616 + for idx, product in enumerate(products):
  617 + title = str(product.get("title") or "").strip()
  618 + if not title:
  619 + uncached_items.append((idx, product))
  620 + continue
  621 +
  622 + cached = _get_cached_anchor_result(title, target_lang, tenant_id=tenant_id)
  623 + if cached:
  624 + logger.info(
  625 + f"[analyze_products] Cache hit for title='{title[:50]}...', "
  626 + f"lang={target_lang}, tenant_id={tenant_id or 'global'}"
  627 + )
  628 + results_by_index[idx] = cached
  629 + continue
  630 +
  631 + uncached_items.append((idx, product))
  632 +
  633 + if not uncached_items:
  634 + return [item for item in results_by_index if item is not None]
629 635
630 # call_llm 一次处理上限固定为 BATCH_SIZE(默认 20): 636 # call_llm 一次处理上限固定为 BATCH_SIZE(默认 20):
631 # - 尽可能攒批处理; 637 # - 尽可能攒批处理;
632 # - 即便调用方传入更大的 batch_size,也会自动按上限拆批。 638 # - 即便调用方传入更大的 batch_size,也会自动按上限拆批。
633 req_bs = BATCH_SIZE if batch_size is None else int(batch_size) 639 req_bs = BATCH_SIZE if batch_size is None else int(batch_size)
634 bs = max(1, min(req_bs, BATCH_SIZE)) 640 bs = max(1, min(req_bs, BATCH_SIZE))
635 - all_results: List[Dict[str, Any]] = []  
636 - total_batches = (len(products) + bs - 1) // bs 641 + total_batches = (len(uncached_items) + bs - 1) // bs
637 642
638 - for i in range(0, len(products), bs): 643 + for i in range(0, len(uncached_items), bs):
639 batch_num = i // bs + 1 644 batch_num = i // bs + 1
640 - batch = products[i : i + bs] 645 + batch_slice = uncached_items[i : i + bs]
  646 + batch = [item for _, item in batch_slice]
641 logger.info( 647 logger.info(
642 f"[analyze_products] Processing batch {batch_num}/{total_batches}, " 648 f"[analyze_products] Processing batch {batch_num}/{total_batches}, "
643 f"size={len(batch)}, target_lang={target_lang}" 649 f"size={len(batch)}, target_lang={target_lang}"
644 ) 650 )
645 batch_results = process_batch(batch, batch_num=batch_num, target_lang=target_lang) 651 batch_results = process_batch(batch, batch_num=batch_num, target_lang=target_lang)
646 - all_results.extend(batch_results)  
647 652
648 - # 写入缓存  
649 - for item in batch_results: 653 + for (original_idx, _), item in zip(batch_slice, batch_results):
  654 + results_by_index[original_idx] = item
650 title_input = str(item.get("title_input") or "").strip() 655 title_input = str(item.get("title_input") or "").strip()
651 if not title_input: 656 if not title_input:
652 continue 657 continue
@@ -659,4 +664,4 @@ def analyze_products( @@ -659,4 +664,4 @@ def analyze_products(
659 # 已在内部记录 warning 664 # 已在内部记录 warning
660 pass 665 pass
661 666
662 - return all_results 667 + return [item for item in results_by_index if item is not None]
indexer/product_enrich_prompts.py
@@ -19,8 +19,7 @@ SHARED_ANALYSIS_INSTRUCTION = &quot;&quot;&quot;Analyze each input product title and fill these @@ -19,8 +19,7 @@ SHARED_ANALYSIS_INSTRUCTION = &quot;&quot;&quot;Analyze each input product title and fill these
19 7. Key attributes 19 7. Key attributes
20 8. Material description 20 8. Material description
21 9. Functional features 21 9. Functional features
22 -10. Selling point: one concise core selling phrase  
23 -11. Anchor text: a set of search-oriented words or phrases covering category, attributes, scenes, and demand 22 +10. Anchor text: a search-focused set of keywords, selling points, and phrases covering categories, attributes, usage scenarios, and user intent
24 23
25 Rules: 24 Rules:
26 - Keep the input order and row count exactly the same. 25 - Keep the input order and row count exactly the same.
@@ -46,7 +45,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -46,7 +45,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
46 "Key attributes", 45 "Key attributes",
47 "Material", 46 "Material",
48 "Features", 47 "Features",
49 - "Selling point",  
50 "Anchor text" 48 "Anchor text"
51 ], 49 ],
52 "zh": [ 50 "zh": [
@@ -60,7 +58,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -60,7 +58,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
60 "关键属性", 58 "关键属性",
61 "材质说明", 59 "材质说明",
62 "功能特点", 60 "功能特点",
63 - "商品卖点",  
64 "锚文本" 61 "锚文本"
65 ], 62 ],
66 "zh_tw": [ 63 "zh_tw": [
@@ -74,7 +71,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -74,7 +71,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
74 "關鍵屬性", 71 "關鍵屬性",
75 "材質說明", 72 "材質說明",
76 "功能特點", 73 "功能特點",
77 - "商品賣點",  
78 "錨文本" 74 "錨文本"
79 ], 75 ],
80 "ru": [ 76 "ru": [
@@ -88,7 +84,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -88,7 +84,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
88 "Ключевые атрибуты", 84 "Ключевые атрибуты",
89 "Материал", 85 "Материал",
90 "Особенности", 86 "Особенности",
91 - "Преимущество товара",  
92 "Анкорный текст" 87 "Анкорный текст"
93 ], 88 ],
94 "ja": [ 89 "ja": [
@@ -102,7 +97,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -102,7 +97,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
102 "主要属性", 97 "主要属性",
103 "素材", 98 "素材",
104 "機能特徴", 99 "機能特徴",
105 - "商品の訴求点",  
106 "アンカーテキスト" 100 "アンカーテキスト"
107 ], 101 ],
108 "ko": [ 102 "ko": [
@@ -116,7 +110,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -116,7 +110,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
116 "핵심 속성", 110 "핵심 속성",
117 "소재", 111 "소재",
118 "기능 특징", 112 "기능 특징",
119 - "상품 포인트",  
120 "앵커 텍스트" 113 "앵커 텍스트"
121 ], 114 ],
122 "es": [ 115 "es": [
@@ -130,7 +123,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -130,7 +123,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
130 "Atributos clave", 123 "Atributos clave",
131 "Material", 124 "Material",
132 "Caracteristicas", 125 "Caracteristicas",
133 - "Punto de venta",  
134 "Texto ancla" 126 "Texto ancla"
135 ], 127 ],
136 "fr": [ 128 "fr": [
@@ -144,7 +136,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -144,7 +136,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
144 "Attributs cles", 136 "Attributs cles",
145 "Matiere", 137 "Matiere",
146 "Caracteristiques", 138 "Caracteristiques",
147 - "Argument de vente",  
148 "Texte d'ancrage" 139 "Texte d'ancrage"
149 ], 140 ],
150 "pt": [ 141 "pt": [
@@ -158,7 +149,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -158,7 +149,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
158 "Atributos principais", 149 "Atributos principais",
159 "Material", 150 "Material",
160 "Caracteristicas", 151 "Caracteristicas",
161 - "Ponto de venda",  
162 "Texto ancora" 152 "Texto ancora"
163 ], 153 ],
164 "de": [ 154 "de": [
@@ -172,7 +162,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -172,7 +162,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
172 "Wichtige Attribute", 162 "Wichtige Attribute",
173 "Material", 163 "Material",
174 "Funktionen", 164 "Funktionen",
175 - "Verkaufsargument",  
176 "Ankertext" 165 "Ankertext"
177 ], 166 ],
178 "it": [ 167 "it": [
@@ -186,7 +175,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -186,7 +175,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
186 "Attributi chiave", 175 "Attributi chiave",
187 "Materiale", 176 "Materiale",
188 "Caratteristiche", 177 "Caratteristiche",
189 - "Punto di forza",  
190 "Testo ancora" 178 "Testo ancora"
191 ], 179 ],
192 "th": [ 180 "th": [
@@ -200,7 +188,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -200,7 +188,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
200 "คุณสมบัติสำคัญ", 188 "คุณสมบัติสำคัญ",
201 "วัสดุ", 189 "วัสดุ",
202 "คุณสมบัติการใช้งาน", 190 "คุณสมบัติการใช้งาน",
203 - "จุดขายสินค้า",  
204 "แองเคอร์เท็กซ์" 191 "แองเคอร์เท็กซ์"
205 ], 192 ],
206 "vi": [ 193 "vi": [
@@ -214,7 +201,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -214,7 +201,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
214 "Thuoc tinh chinh", 201 "Thuoc tinh chinh",
215 "Chat lieu", 202 "Chat lieu",
216 "Tinh nang", 203 "Tinh nang",
217 - "Diem ban hang",  
218 "Van ban neo" 204 "Van ban neo"
219 ], 205 ],
220 "id": [ 206 "id": [
@@ -228,7 +214,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -228,7 +214,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
228 "Atribut utama", 214 "Atribut utama",
229 "Bahan", 215 "Bahan",
230 "Fitur", 216 "Fitur",
231 - "Nilai jual",  
232 "Teks jangkar" 217 "Teks jangkar"
233 ], 218 ],
234 "ms": [ 219 "ms": [
@@ -242,7 +227,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -242,7 +227,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
242 "Atribut utama", 227 "Atribut utama",
243 "Bahan", 228 "Bahan",
244 "Ciri-ciri", 229 "Ciri-ciri",
245 - "Nilai jual",  
246 "Teks sauh" 230 "Teks sauh"
247 ], 231 ],
248 "ar": [ 232 "ar": [
@@ -256,7 +240,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -256,7 +240,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
256 "السمات الرئيسية", 240 "السمات الرئيسية",
257 "المادة", 241 "المادة",
258 "الميزات", 242 "الميزات",
259 - "نقطة البيع",  
260 "نص الربط" 243 "نص الربط"
261 ], 244 ],
262 "hi": [ 245 "hi": [
@@ -270,7 +253,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -270,7 +253,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
270 "मुख्य गुण", 253 "मुख्य गुण",
271 "सामग्री", 254 "सामग्री",
272 "विशेषताएं", 255 "विशेषताएं",
273 - "बिक्री बिंदु",  
274 "एंकर टेक्स्ट" 256 "एंकर टेक्स्ट"
275 ], 257 ],
276 "he": [ 258 "he": [
@@ -284,7 +266,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -284,7 +266,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
284 "מאפיינים מרכזיים", 266 "מאפיינים מרכזיים",
285 "חומר", 267 "חומר",
286 "תכונות", 268 "תכונות",
287 - "נקודת מכירה",  
288 "טקסט עוגן" 269 "טקסט עוגן"
289 ], 270 ],
290 "my": [ 271 "my": [
@@ -298,7 +279,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -298,7 +279,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
298 "အဓိကဂုဏ်သတ္တိများ", 279 "အဓိကဂုဏ်သတ္တိများ",
299 "ပစ္စည်း", 280 "ပစ္စည်း",
300 "လုပ်ဆောင်ချက်များ", 281 "လုပ်ဆောင်ချက်များ",
301 - "အရောင်းထူးခြားချက်",  
302 "အန်ကာစာသား" 282 "အန်ကာစာသား"
303 ], 283 ],
304 "ta": [ 284 "ta": [
@@ -312,7 +292,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -312,7 +292,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
312 "முக்கிய பண்புகள்", 292 "முக்கிய பண்புகள்",
313 "பொருள்", 293 "பொருள்",
314 "அம்சங்கள்", 294 "அம்சங்கள்",
315 - "விற்பனை அம்சம்",  
316 "ஆங்கர் உரை" 295 "ஆங்கர் உரை"
317 ], 296 ],
318 "ur": [ 297 "ur": [
@@ -326,7 +305,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -326,7 +305,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
326 "کلیدی خصوصیات", 305 "کلیدی خصوصیات",
327 "مواد", 306 "مواد",
328 "فیچرز", 307 "فیچرز",
329 - "فروختی نقطہ",  
330 "اینکر ٹیکسٹ" 308 "اینکر ٹیکسٹ"
331 ], 309 ],
332 "bn": [ 310 "bn": [
@@ -340,7 +318,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -340,7 +318,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
340 "মূল বৈশিষ্ট্য", 318 "মূল বৈশিষ্ট্য",
341 "উপাদান", 319 "উপাদান",
342 "ফিচার", 320 "ফিচার",
343 - "বিক্রয় পয়েন্ট",  
344 "অ্যাঙ্কর টেক্সট" 321 "অ্যাঙ্কর টেক্সট"
345 ], 322 ],
346 "pl": [ 323 "pl": [
@@ -354,7 +331,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -354,7 +331,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
354 "Kluczowe atrybuty", 331 "Kluczowe atrybuty",
355 "Material", 332 "Material",
356 "Cechy", 333 "Cechy",
357 - "Atut sprzedazowy",  
358 "Tekst kotwicy" 334 "Tekst kotwicy"
359 ], 335 ],
360 "nl": [ 336 "nl": [
@@ -368,7 +344,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -368,7 +344,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
368 "Belangrijke kenmerken", 344 "Belangrijke kenmerken",
369 "Materiaal", 345 "Materiaal",
370 "Functies", 346 "Functies",
371 - "Verkooppunt",  
372 "Ankertekst" 347 "Ankertekst"
373 ], 348 ],
374 "ro": [ 349 "ro": [
@@ -382,7 +357,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -382,7 +357,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
382 "Atribute cheie", 357 "Atribute cheie",
383 "Material", 358 "Material",
384 "Caracteristici", 359 "Caracteristici",
385 - "Punct de vanzare",  
386 "Text ancora" 360 "Text ancora"
387 ], 361 ],
388 "tr": [ 362 "tr": [
@@ -396,7 +370,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -396,7 +370,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
396 "Temel ozellikler", 370 "Temel ozellikler",
397 "Malzeme", 371 "Malzeme",
398 "Ozellikler", 372 "Ozellikler",
399 - "Satis noktasi",  
400 "Capa metni" 373 "Capa metni"
401 ], 374 ],
402 "km": [ 375 "km": [
@@ -410,7 +383,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -410,7 +383,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
410 "លក្ខណៈសម្បត្តិសំខាន់", 383 "លក្ខណៈសម្បត្តិសំខាន់",
411 "សម្ភារៈ", 384 "សម្ភារៈ",
412 "មុខងារ", 385 "មុខងារ",
413 - "ចំណុចលក់",  
414 "អត្ថបទអង់ក័រ" 386 "អត្ថបទអង់ក័រ"
415 ], 387 ],
416 "lo": [ 388 "lo": [
@@ -424,7 +396,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -424,7 +396,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
424 "ຄຸນລັກສະນະສຳຄັນ", 396 "ຄຸນລັກສະນະສຳຄັນ",
425 "ວັດສະດຸ", 397 "ວັດສະດຸ",
426 "ຄຸນສົມບັດ", 398 "ຄຸນສົມບັດ",
427 - "ຈຸດຂາຍ",  
428 "ຂໍ້ຄວາມອັງເຄີ" 399 "ຂໍ້ຄວາມອັງເຄີ"
429 ], 400 ],
430 "yue": [ 401 "yue": [
@@ -438,7 +409,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -438,7 +409,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
438 "關鍵屬性", 409 "關鍵屬性",
439 "材質說明", 410 "材質說明",
440 "功能特點", 411 "功能特點",
441 - "商品賣點",  
442 "錨文本" 412 "錨文本"
443 ], 413 ],
444 "cs": [ 414 "cs": [
@@ -452,7 +422,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -452,7 +422,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
452 "Klicove atributy", 422 "Klicove atributy",
453 "Material", 423 "Material",
454 "Vlastnosti", 424 "Vlastnosti",
455 - "Prodejni argument",  
456 "Kotvici text" 425 "Kotvici text"
457 ], 426 ],
458 "el": [ 427 "el": [
@@ -466,7 +435,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -466,7 +435,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
466 "Βασικά χαρακτηριστικά", 435 "Βασικά χαρακτηριστικά",
467 "Υλικό", 436 "Υλικό",
468 "Λειτουργίες", 437 "Λειτουργίες",
469 - "Σημείο πώλησης",  
470 "Κείμενο άγκυρας" 438 "Κείμενο άγκυρας"
471 ], 439 ],
472 "sv": [ 440 "sv": [
@@ -480,7 +448,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -480,7 +448,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
480 "Viktiga attribut", 448 "Viktiga attribut",
481 "Material", 449 "Material",
482 "Funktioner", 450 "Funktioner",
483 - "Saljpunkt",  
484 "Ankartext" 451 "Ankartext"
485 ], 452 ],
486 "hu": [ 453 "hu": [
@@ -494,7 +461,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -494,7 +461,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
494 "Fo jellemzok", 461 "Fo jellemzok",
495 "Anyag", 462 "Anyag",
496 "Funkciok", 463 "Funkciok",
497 - "Ertekesitesi elony",  
498 "Horgonyszoveg" 464 "Horgonyszoveg"
499 ], 465 ],
500 "da": [ 466 "da": [
@@ -508,7 +474,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -508,7 +474,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
508 "Nogleattributter", 474 "Nogleattributter",
509 "Materiale", 475 "Materiale",
510 "Funktioner", 476 "Funktioner",
511 - "Salgsargument",  
512 "Ankertekst" 477 "Ankertekst"
513 ], 478 ],
514 "fi": [ 479 "fi": [
@@ -522,7 +487,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -522,7 +487,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
522 "Keskeiset ominaisuudet", 487 "Keskeiset ominaisuudet",
523 "Materiaali", 488 "Materiaali",
524 "Ominaisuudet", 489 "Ominaisuudet",
525 - "Myyntivaltti",  
526 "Ankkuriteksti" 490 "Ankkuriteksti"
527 ], 491 ],
528 "uk": [ 492 "uk": [
@@ -536,7 +500,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -536,7 +500,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
536 "Ключові атрибути", 500 "Ключові атрибути",
537 "Матеріал", 501 "Матеріал",
538 "Особливості", 502 "Особливості",
539 - "Продаюча перевага",  
540 "Анкорний текст" 503 "Анкорний текст"
541 ], 504 ],
542 "bg": [ 505 "bg": [
@@ -550,7 +513,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = { @@ -550,7 +513,6 @@ LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
550 "Ключови атрибути", 513 "Ключови атрибути",
551 "Материал", 514 "Материал",
552 "Характеристики", 515 "Характеристики",
553 - "Търговско предимство",  
554 "Анкор текст" 516 "Анкор текст"
555 ] 517 ]
556 } 518 }
557 \ No newline at end of file 519 \ No newline at end of file
tests/ci/test_service_api_contracts.py
@@ -366,7 +366,6 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch @@ -366,7 +366,6 @@ def test_indexer_enrich_content_contract(indexer_client: TestClient, monkeypatch
366 "key_attributes": "", 366 "key_attributes": "",
367 "material": "", 367 "material": "",
368 "features": "", 368 "features": "",
369 - "selling_points": "",  
370 "anchor_text": f"{target_lang}-anchor-{p['id']}", 369 "anchor_text": f"{target_lang}-anchor-{p['id']}",
371 } 370 }
372 for p in products 371 for p in products
tests/test_process_products_batching.py
@@ -29,7 +29,6 @@ def test_analyze_products_caps_batch_size_to_20(monkeypatch): @@ -29,7 +29,6 @@ def test_analyze_products_caps_batch_size_to_20(monkeypatch):
29 "key_attributes": "", 29 "key_attributes": "",
30 "material": "", 30 "material": "",
31 "features": "", 31 "features": "",
32 - "selling_points": "",  
33 "anchor_text": "", 32 "anchor_text": "",
34 } 33 }
35 for item in batch_data 34 for item in batch_data
@@ -69,7 +68,6 @@ def test_analyze_products_uses_min_batch_size_1(monkeypatch): @@ -69,7 +68,6 @@ def test_analyze_products_uses_min_batch_size_1(monkeypatch):
69 "key_attributes": "", 68 "key_attributes": "",
70 "material": "", 69 "material": "",
71 "features": "", 70 "features": "",
72 - "selling_points": "",  
73 "anchor_text": "", 71 "anchor_text": "",
74 } 72 }
75 for item in batch_data 73 for item in batch_data
tests/test_product_enrich_partial_mode.py
@@ -195,9 +195,9 @@ def test_call_llm_logs_shared_context_once_and_verbose_contains_full_requests(): @@ -195,9 +195,9 @@ def test_call_llm_logs_shared_context_once_and_verbose_contains_full_requests():
195 195
196 196
197 def test_process_batch_reads_result_and_validates_expected_fields(): 197 def test_process_batch_reads_result_and_validates_expected_fields():
198 - merged_markdown = """| 序号 | 商品标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 商品卖点 | 锚文本 |  
199 -|----|----|----|----|----|----|----|----|----|----|----|----|  
200 -| 1 | 法式连衣裙 | 女装>连衣裙 | 法式,收腰 | 年轻女性 | 通勤,约会 | 春季,夏季 | 中长款 | 聚酯纤维 | 透气 | 修身显瘦 | 法式收腰连衣裙 | 198 + merged_markdown = """| 序号 | 商品标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 锚文本 |
  199 +|----|----|----|----|----|----|----|----|----|----|----|
  200 +| 1 | 法式连衣裙 | 女装>连衣裙 | 法式,收腰 | 年轻女性 | 通勤,约会 | 春季,夏季 | 中长款 | 聚酯纤维 | 透气 | 法式收腰连衣裙 |
201 """ 201 """
202 202
203 with mock.patch.object( 203 with mock.patch.object(
@@ -225,5 +225,89 @@ def test_process_batch_reads_result_and_validates_expected_fields(): @@ -225,5 +225,89 @@ def test_process_batch_reads_result_and_validates_expected_fields():
225 assert row["key_attributes"] == "中长款" 225 assert row["key_attributes"] == "中长款"
226 assert row["material"] == "聚酯纤维" 226 assert row["material"] == "聚酯纤维"
227 assert row["features"] == "透气" 227 assert row["features"] == "透气"
228 - assert row["selling_points"] == "修身显瘦"  
229 assert row["anchor_text"] == "法式收腰连衣裙" 228 assert row["anchor_text"] == "法式收腰连衣裙"
  229 +
  230 +
  231 +def test_analyze_products_uses_product_level_cache_across_batch_requests():
  232 + cache_store = {}
  233 + process_calls = []
  234 +
  235 + def fake_get_cached_anchor_result(title, target_lang, tenant_id=None):
  236 + return cache_store.get((tenant_id, target_lang, title))
  237 +
  238 + def fake_set_cached_anchor_result(title, target_lang, result, tenant_id=None):
  239 + cache_store[(tenant_id, target_lang, title)] = result
  240 +
  241 + def fake_process_batch(batch_data, batch_num, target_lang="zh"):
  242 + process_calls.append(
  243 + {
  244 + "batch_num": batch_num,
  245 + "target_lang": target_lang,
  246 + "titles": [item["title"] for item in batch_data],
  247 + }
  248 + )
  249 + return [
  250 + {
  251 + "id": item["id"],
  252 + "lang": target_lang,
  253 + "title_input": item["title"],
  254 + "title": f"normalized:{item['title']}",
  255 + "category_path": "cat",
  256 + "tags": "tags",
  257 + "target_audience": "audience",
  258 + "usage_scene": "scene",
  259 + "season": "season",
  260 + "key_attributes": "attrs",
  261 + "material": "material",
  262 + "features": "features",
  263 + "anchor_text": f"anchor:{item['title']}",
  264 + }
  265 + for item in batch_data
  266 + ]
  267 +
  268 + products = [
  269 + {"id": "1", "title": "dress"},
  270 + {"id": "2", "title": "shirt"},
  271 + ]
  272 +
  273 + with mock.patch.object(product_enrich, "API_KEY", "fake-key"), mock.patch.object(
  274 + product_enrich,
  275 + "_get_cached_anchor_result",
  276 + side_effect=fake_get_cached_anchor_result,
  277 + ), mock.patch.object(
  278 + product_enrich,
  279 + "_set_cached_anchor_result",
  280 + side_effect=fake_set_cached_anchor_result,
  281 + ), mock.patch.object(
  282 + product_enrich,
  283 + "process_batch",
  284 + side_effect=fake_process_batch,
  285 + ):
  286 + first = product_enrich.analyze_products(
  287 + [products[0]],
  288 + target_lang="zh",
  289 + tenant_id="170",
  290 + )
  291 + second = product_enrich.analyze_products(
  292 + products,
  293 + target_lang="zh",
  294 + tenant_id="170",
  295 + )
  296 + third = product_enrich.analyze_products(
  297 + products,
  298 + target_lang="zh",
  299 + tenant_id="170",
  300 + )
  301 +
  302 + assert [row["title_input"] for row in first] == ["dress"]
  303 + assert [row["title_input"] for row in second] == ["dress", "shirt"]
  304 + assert [row["title_input"] for row in third] == ["dress", "shirt"]
  305 +
  306 + assert process_calls == [
  307 + {"batch_num": 1, "target_lang": "zh", "titles": ["dress"]},
  308 + {"batch_num": 1, "target_lang": "zh", "titles": ["shirt"]},
  309 + ]
  310 + assert second[0]["anchor_text"] == "anchor:dress"
  311 + assert second[1]["anchor_text"] == "anchor:shirt"
  312 + assert third[0]["anchor_text"] == "anchor:dress"
  313 + assert third[1]["anchor_text"] == "anchor:shirt"