Commit a73a751f2d90d4544a7146eea38b8b68a736c98b

Authored by tangwang
1 parent e56fbdc1

enrich

config/config_loader.py
... ... @@ -301,7 +301,12 @@ class ConfigLoader:
301 301  
302 302 # Parse tenant config
303 303 tenant_config_data = config_data.get("tenant_config", {})
304   -
  304 +
  305 + # Parse extensible services/provider registry
  306 + services_data = config_data.get("services", {}) or {}
  307 + if not isinstance(services_data, dict):
  308 + raise ConfigurationError("services must be a dictionary if provided")
  309 +
305 310 return SearchConfig(
306 311 field_boosts=field_boosts,
307 312 indexes=indexes,
... ...
indexer/product_enrich.py
... ... @@ -11,6 +11,7 @@ import json
11 11 import logging
12 12 import time
13 13 import hashlib
  14 +from collections import OrderedDict
14 15 from datetime import datetime
15 16 from typing import List, Dict, Tuple, Any, Optional
16 17  
... ... @@ -20,6 +21,12 @@ from pathlib import Path
20 21  
21 22 from config.env_config import REDIS_CONFIG
22 23 from config.tenant_config_loader import SOURCE_LANG_CODE_MAP
  24 +from indexer.product_enrich_prompts import (
  25 + SYSTEM_MESSAGE,
  26 + USER_INSTRUCTION_TEMPLATE,
  27 + LANGUAGE_MARKDOWN_TABLE_HEADERS,
  28 + SHARED_ANALYSIS_INSTRUCTION,
  29 +)
23 30  
24 31 # 配置
25 32 BATCH_SIZE = 20
... ... @@ -32,6 +39,7 @@ API_KEY = os.environ.get("DASHSCOPE_API_KEY")
32 39 MAX_RETRIES = 3
33 40 RETRY_DELAY = 5 # 秒
34 41 REQUEST_TIMEOUT = 180 # 秒
  42 +LOGGED_SHARED_CONTEXT_CACHE_SIZE = 256
35 43  
36 44 # 日志路径
37 45 OUTPUT_DIR = Path("output_logs")
... ... @@ -42,6 +50,7 @@ LOG_DIR.mkdir(parents=True, exist_ok=True)
42 50 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
43 51 log_file = LOG_DIR / f"product_enrich_{timestamp}.log"
44 52 verbose_log_file = LOG_DIR / "product_enrich_verbose.log"
  53 +_logged_shared_context_keys: "OrderedDict[str, None]" = OrderedDict()
45 54  
46 55 # 主日志 logger:执行流程、批次信息等
47 56 logger = logging.getLogger("product_enrich")
... ... @@ -96,16 +105,11 @@ except Exception as e:
96 105 logger.warning(f"Failed to initialize Redis for anchors cache: {e}")
97 106 _anchor_redis = None
98 107  
99   -# 中文版本提示词(请勿删除):
100   -# "你是一名电商平台的商品标注员,你的工作是对输入的每个商品进行理解、分析和标注,"
101   -# "并按要求格式返回 Markdown 表格。所有输出内容必须为中文。"
102   -
103   -SYSTEM_MESSAGES = (
104   - "You are a product annotator for an e-commerce platform. "
105   - "For each input product, you must understand, analyze and label it, "
106   - "and return a Markdown table strictly following the requested format. "
107   - "All output must be in English."
108   -)
  108 +_missing_prompt_langs = sorted(set(SOURCE_LANG_CODE_MAP) - set(LANGUAGE_MARKDOWN_TABLE_HEADERS))
  109 +if _missing_prompt_langs:
  110 + raise RuntimeError(
  111 + f"Missing product_enrich prompt config for languages: {_missing_prompt_langs}"
  112 + )
109 113  
110 114  
111 115 def _make_anchor_cache_key(
... ... @@ -153,108 +157,109 @@ def _set_cached_anchor_result(
153 157 logger.warning(f"Failed to set anchor cache: {e}")
154 158  
155 159  
156   -def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> str:
157   - """根据目标语言创建 LLM 提示词和表头说明。
  160 +def _build_assistant_prefix(headers: List[str]) -> str:
  161 + header_line = "| " + " | ".join(headers) + " |"
  162 + separator_line = "|" + "----|" * len(headers)
  163 + return f"{header_line}\n{separator_line}\n"
158 164  
159   - 约定:
160   - - 提示词始终使用英文;
161   - - 当 target_lang == "en" 时,直接要求用英文分析并输出英文表头;
162   - - 当 target_lang 为其他语言时,视作“多轮对话”的后续轮次:
163   - * 默认上一轮已经用英文完成了分析;
164   - * 当前轮只需要在保持结构和含义不变的前提下,将整张表格翻译为目标语言,
165   - 包含表头与所有单元格内容。
166   - """
167   - lang_name = SOURCE_LANG_CODE_MAP.get(target_lang, target_lang)
168   -
169   -# 中文版本提示词(请勿删除)
170   -# prompt = """请对输入的每条商品标题,分析并提取以下信息:
171   -
172   -# 1. 商品标题:将输入商品名称翻译为自然、完整的中文商品标题
173   -# 2. 品类路径:从大类到细分品类,用">"分隔(例如:服装>女装>裤子>工装裤)
174   -# 3. 细分标签:商品的风格、特点、功能等(例如:碎花,收腰,法式)
175   -# 4. 适用人群:性别/年龄段等(例如:年轻女性)
176   -# 5. 使用场景
177   -# 6. 适用季节
178   -# 7. 关键属性
179   -# 8. 材质说明
180   -# 9. 功能特点
181   -# 10. 商品卖点:分析和提取一句话核心卖点,用于推荐理由
182   -# 11. 锚文本:生成一组能够代表该商品、并可能被用户用于搜索的词语或短语。这些词语应覆盖用户需求的各个维度,如品类、细分标签、功能特性、需求场景等等。
183   -
184   -# 输入商品列表:
185   -
186   -# """
187   -# prompt_tail = """
188   -# 请严格按照以下markdown表格格式返回,每列内部的多值内容都用逗号分隔,不要添加任何其他说明:
189   -
190   -# | 序号 | 商品标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 商品卖点 | 锚文本 |
191   -# |----|----|----|----|----|----|----|----|----|----|----|----|
192   -# """
193   -
194   - prompt = """Please analyze each input product title and extract the following information:
195   -
196   -1. Product title: a natural English product name derived from the input title
197   -2. Category path: from broad to fine-grained category, separated by ">" (e.g. Clothing>Women>Dresses>Work Dress)
198   -3. Fine-grained tags: style / features / attributes (e.g. floral, waist-cinching, French style)
199   -4. Target audience: gender / age group, etc. (e.g. young women)
200   -5. Usage scene
201   -6. Applicable season
202   -7. Key attributes
203   -8. Material description
204   -9. Functional features
205   -10. Selling point: one concise key selling sentence for recommendation
206   -11. Anchor text: a set of words or phrases that could be used by users as search queries for this product, covering category, fine-grained tags, functional attributes, usage scenes, etc.
207   -
208   -Input product list:
209   -
210   -"""
211 165  
  166 +def _build_shared_context(products: List[Dict[str, str]]) -> str:
  167 + shared_context = SHARED_ANALYSIS_INSTRUCTION
212 168 for idx, product in enumerate(products, 1):
213   - prompt += f'{idx}. {product["title"]}\n'
  169 + shared_context += f'{idx}. {product["title"]}\n'
  170 + return shared_context
214 171  
215   - if target_lang == "en":
216   - # 英文首轮:直接要求英文表头 + 英文内容
217   - prompt += """
218   -Please strictly return a Markdown table in the following format. For any column that can contain multiple values, separate values with commas. Do not add any other explanations:
219 172  
220   -| No. | Product title | Category path | Fine-grained tags | Target audience | Usage scene | Season | Key attributes | Material | Features | Selling point | Anchor text |
221   -|----|----|----|----|----|----|----|----|----|----|----|----|
222   -"""
223   - else:
224   - # 非英文语言:视作“下一轮对话”,只做翻译,要求表头与内容全部用目标语言
225   - prompt += f"""
226   -Now we will output the same table in {lang_name}.
227   -
228   -IMPORTANT:
229   -- Assume you have already generated the full table in English in a previous round.
230   -- In this round, you must output exactly the same table structure and content,
231   - but fully translated into {lang_name}, including ALL column headers and ALL cell values.
232   -- Do NOT change the meaning, fields, or the number/order of rows and columns.
233   -- Keep valid Markdown table syntax.
234   -
235   -Please return ONLY the Markdown table in {lang_name}, without any extra explanations.
236   -"""
  173 +def _hash_text(text: str) -> str:
  174 + return hashlib.md5((text or "").encode("utf-8")).hexdigest()[:12]
  175 +
  176 +
  177 +def _mark_shared_context_logged_once(shared_context_key: str) -> bool:
  178 + if shared_context_key in _logged_shared_context_keys:
  179 + _logged_shared_context_keys.move_to_end(shared_context_key)
  180 + return False
  181 +
  182 + _logged_shared_context_keys[shared_context_key] = None
  183 + if len(_logged_shared_context_keys) > LOGGED_SHARED_CONTEXT_CACHE_SIZE:
  184 + _logged_shared_context_keys.popitem(last=False)
  185 + return True
237 186  
238   - return prompt
239 187  
  188 +def reset_logged_shared_context_keys() -> None:
  189 + """测试辅助:清理已记录的共享 prompt key。"""
  190 + _logged_shared_context_keys.clear()
240 191  
241   -def call_llm(prompt: str, target_lang: str = "zh") -> Tuple[str, str]:
242   - """调用大模型API(带重试机制),按目标语言选择系统提示词。"""
  192 +
  193 +def create_prompt(
  194 + products: List[Dict[str, str]],
  195 + target_lang: str = "zh",
  196 +) -> Tuple[str, str, str]:
  197 + """根据目标语言创建共享上下文、本地化输出要求和 Partial Mode assistant 前缀。"""
  198 + markdown_table_headers = LANGUAGE_MARKDOWN_TABLE_HEADERS.get(target_lang)
  199 + if not markdown_table_headers:
  200 + logger.warning(
  201 + "Unsupported target_lang for markdown table headers: %s",
  202 + target_lang,
  203 + )
  204 + return None, None, None
  205 + shared_context = _build_shared_context(products)
  206 + language_label = SOURCE_LANG_CODE_MAP.get(target_lang, target_lang)
  207 + user_prompt = USER_INSTRUCTION_TEMPLATE.format(language=language_label).strip()
  208 + assistant_prefix = _build_assistant_prefix(markdown_table_headers)
  209 + return shared_context, user_prompt, assistant_prefix
  210 +
  211 +
  212 +def _merge_partial_response(assistant_prefix: str, generated_content: str) -> str:
  213 + """将 Partial Mode 的 assistant 前缀与补全文本拼成完整 markdown。"""
  214 + generated = (generated_content or "").lstrip()
  215 + prefix_lines = [line.strip() for line in assistant_prefix.strip().splitlines()]
  216 + generated_lines = generated.splitlines()
  217 +
  218 + if generated_lines:
  219 + first_line = generated_lines[0].strip()
  220 + if prefix_lines and first_line == prefix_lines[0]:
  221 + generated_lines = generated_lines[1:]
  222 + if generated_lines and len(prefix_lines) > 1 and generated_lines[0].strip() == prefix_lines[1]:
  223 + generated_lines = generated_lines[1:]
  224 + elif len(prefix_lines) > 1 and first_line == prefix_lines[1]:
  225 + generated_lines = generated_lines[1:]
  226 +
  227 + suffix = "\n".join(generated_lines).lstrip("\n")
  228 + if suffix:
  229 + return f"{assistant_prefix}{suffix}"
  230 + return assistant_prefix
  231 +
  232 +
  233 +def call_llm(
  234 + shared_context: str,
  235 + user_prompt: str,
  236 + assistant_prefix: str,
  237 + target_lang: str = "zh",
  238 +) -> Tuple[str, str]:
  239 + """调用大模型 API(带重试机制),使用 Partial Mode 强制 markdown 表格前缀。"""
243 240 headers = {
244 241 "Authorization": f"Bearer {API_KEY}",
245 242 "Content-Type": "application/json",
246 243 }
  244 + shared_context_key = _hash_text(shared_context)
  245 + localized_tail_key = _hash_text(f"{target_lang}\n{user_prompt}\n{assistant_prefix}")
  246 + combined_user_prompt = f"{shared_context.rstrip()}\n\n{user_prompt.strip()}"
247 247  
248 248 payload = {
249 249 "model": MODEL_NAME,
250 250 "messages": [
251 251 {
252 252 "role": "system",
253   - "content": SYSTEM_MESSAGES,
  253 + "content": SYSTEM_MESSAGE,
254 254 },
255 255 {
256 256 "role": "user",
257   - "content": prompt,
  257 + "content": combined_user_prompt,
  258 + },
  259 + {
  260 + "role": "assistant",
  261 + "content": assistant_prefix,
  262 + "partial": True,
258 263 },
259 264 ],
260 265 "temperature": 0.3,
... ... @@ -266,16 +271,41 @@ def call_llm(prompt: str, target_lang: str = "zh") -> Tuple[str, str]:
266 271 "payload": payload,
267 272 }
268 273  
269   - # 主日志 + 详尽日志:LLM Request
270   - logger.info(f"\n{'=' * 80}")
271   - logger.info(f"LLM Request (Model: {MODEL_NAME}):")
272   - logger.info(json.dumps(request_data, ensure_ascii=False, indent=2))
273   - logger.info(f"\nPrompt:\n{prompt}")
  274 + if _mark_shared_context_logged_once(shared_context_key):
  275 + logger.info(f"\n{'=' * 80}")
  276 + logger.info(
  277 + "LLM Shared Context [model=%s, shared_key=%s, chars=%s] (logged once per process key)",
  278 + MODEL_NAME,
  279 + shared_context_key,
  280 + len(shared_context),
  281 + )
  282 + logger.info("\nSystem Message:\n%s", SYSTEM_MESSAGE)
  283 + logger.info("\nShared Context:\n%s", shared_context)
274 284  
275 285 verbose_logger.info(f"\n{'=' * 80}")
276   - verbose_logger.info(f"LLM Request (Model: {MODEL_NAME}):")
  286 + verbose_logger.info(
  287 + "LLM Request [model=%s, lang=%s, shared_key=%s, tail_key=%s]:",
  288 + MODEL_NAME,
  289 + target_lang,
  290 + shared_context_key,
  291 + localized_tail_key,
  292 + )
277 293 verbose_logger.info(json.dumps(request_data, ensure_ascii=False, indent=2))
278   - verbose_logger.info(f"\nPrompt:\n{prompt}")
  294 + verbose_logger.info(f"\nCombined User Prompt:\n{combined_user_prompt}")
  295 + verbose_logger.info(f"\nShared Context:\n{shared_context}")
  296 + verbose_logger.info(f"\nLocalized Requirement:\n{user_prompt}")
  297 + verbose_logger.info(f"\nAssistant Prefix:\n{assistant_prefix}")
  298 +
  299 + logger.info(
  300 + "\nLLM Request Variant [lang=%s, shared_key=%s, tail_key=%s, prompt_chars=%s, prefix_chars=%s]",
  301 + target_lang,
  302 + shared_context_key,
  303 + localized_tail_key,
  304 + len(user_prompt),
  305 + len(assistant_prefix),
  306 + )
  307 + logger.info("\nLocalized Requirement:\n%s", user_prompt)
  308 + logger.info("\nAssistant Prefix:\n%s", assistant_prefix)
279 309  
280 310 # 创建session,禁用代理
281 311 session = requests.Session()
... ... @@ -295,19 +325,37 @@ def call_llm(prompt: str, target_lang: str = "zh") -> Tuple[str, str]:
295 325  
296 326 response.raise_for_status()
297 327 result = response.json()
  328 + usage = result.get("usage") or {}
  329 +
  330 + verbose_logger.info(
  331 + "\nLLM Response [model=%s, lang=%s, shared_key=%s, tail_key=%s]:",
  332 + MODEL_NAME,
  333 + target_lang,
  334 + shared_context_key,
  335 + localized_tail_key,
  336 + )
  337 + verbose_logger.info(json.dumps(result, ensure_ascii=False, indent=2))
298 338  
299   - # 主日志 + 详尽日志:LLM Response
300   - logger.info(f"\nLLM Response:")
301   - logger.info(json.dumps(result, ensure_ascii=False, indent=2))
  339 + generated_content = result["choices"][0]["message"]["content"]
  340 + full_markdown = _merge_partial_response(assistant_prefix, generated_content)
302 341  
303   - verbose_logger.info(f"\nLLM Response:")
304   - verbose_logger.info(json.dumps(result, ensure_ascii=False, indent=2))
  342 + logger.info(
  343 + "\nLLM Response Summary [lang=%s, shared_key=%s, tail_key=%s, generated_chars=%s, completion_tokens=%s, prompt_tokens=%s, total_tokens=%s]",
  344 + target_lang,
  345 + shared_context_key,
  346 + localized_tail_key,
  347 + len(generated_content or ""),
  348 + usage.get("completion_tokens"),
  349 + usage.get("prompt_tokens"),
  350 + usage.get("total_tokens"),
  351 + )
  352 + logger.info("\nGenerated Content:\n%s", generated_content)
  353 + logger.info("\nMerged Markdown:\n%s", full_markdown)
305 354  
306   - content = result["choices"][0]["message"]["content"]
307   - logger.info(f"\nExtracted Content:\n{content}")
308   - verbose_logger.info(f"\nExtracted Content:\n{content}")
  355 + verbose_logger.info(f"\nGenerated Content:\n{generated_content}")
  356 + verbose_logger.info(f"\nMerged Markdown:\n{full_markdown}")
309 357  
310   - return content, json.dumps(result, ensure_ascii=False)
  358 + return full_markdown, json.dumps(result, ensure_ascii=False)
311 359  
312 360 except requests.exceptions.ProxyError as e:
313 361 logger.warning(f"Attempt {attempt + 1}/{MAX_RETRIES}: Proxy error - {str(e)}")
... ... @@ -385,6 +433,39 @@ def parse_markdown_table(markdown_content: str) -> List[Dict[str, str]]:
385 433 return data
386 434  
387 435  
  436 +def _log_parsed_result_quality(
  437 + batch_data: List[Dict[str, str]],
  438 + parsed_results: List[Dict[str, str]],
  439 + target_lang: str,
  440 + batch_num: int,
  441 +) -> None:
  442 + expected = len(batch_data)
  443 + actual = len(parsed_results)
  444 + if actual != expected:
  445 + logger.warning(
  446 + "Parsed row count mismatch for batch=%s lang=%s: expected=%s actual=%s",
  447 + batch_num,
  448 + target_lang,
  449 + expected,
  450 + actual,
  451 + )
  452 +
  453 + missing_anchor = sum(1 for item in parsed_results if not str(item.get("anchor_text") or "").strip())
  454 + missing_category = sum(1 for item in parsed_results if not str(item.get("category_path") or "").strip())
  455 + missing_title = sum(1 for item in parsed_results if not str(item.get("title") or "").strip())
  456 +
  457 + logger.info(
  458 + "Parsed Quality Summary [batch=%s, lang=%s]: rows=%s/%s, missing_title=%s, missing_category=%s, missing_anchor=%s",
  459 + batch_num,
  460 + target_lang,
  461 + actual,
  462 + expected,
  463 + missing_title,
  464 + missing_category,
  465 + missing_anchor,
  466 + )
  467 +
  468 +
388 469 def process_batch(
389 470 batch_data: List[Dict[str, str]],
390 471 batch_num: int,
... ... @@ -395,14 +476,52 @@ def process_batch(
395 476 logger.info(f"Processing Batch {batch_num} ({len(batch_data)} items)")
396 477  
397 478 # 创建提示词
398   - prompt = create_prompt(batch_data, target_lang=target_lang)
  479 + shared_context, user_prompt, assistant_prefix = create_prompt(
  480 + batch_data,
  481 + target_lang=target_lang,
  482 + )
  483 +
  484 + # 如果提示词创建失败(例如不支持的 target_lang),本次批次整体失败,不再继续调用 LLM
  485 + if shared_context is None or user_prompt is None or assistant_prefix is None:
  486 + logger.error(
  487 + "Failed to create prompt for batch %s, target_lang=%s; "
  488 + "marking entire batch as failed without calling LLM",
  489 + batch_num,
  490 + target_lang,
  491 + )
  492 + return [
  493 + {
  494 + "id": item["id"],
  495 + "lang": target_lang,
  496 + "title_input": item.get("title", ""),
  497 + "title": "",
  498 + "category_path": "",
  499 + "tags": "",
  500 + "target_audience": "",
  501 + "usage_scene": "",
  502 + "season": "",
  503 + "key_attributes": "",
  504 + "material": "",
  505 + "features": "",
  506 + "selling_points": "",
  507 + "anchor_text": "",
  508 + "error": f"prompt_creation_failed: unsupported target_lang={target_lang}",
  509 + }
  510 + for item in batch_data
  511 + ]
399 512  
400 513 # 调用LLM
401 514 try:
402   - raw_response, full_response_json = call_llm(prompt, target_lang=target_lang)
  515 + raw_response, full_response_json = call_llm(
  516 + shared_context,
  517 + user_prompt,
  518 + assistant_prefix,
  519 + target_lang=target_lang,
  520 + )
403 521  
404 522 # 解析结果
405 523 parsed_results = parse_markdown_table(raw_response)
  524 + _log_parsed_result_quality(batch_data, parsed_results, target_lang, batch_num)
406 525  
407 526 logger.info(f"\nParsed Results ({len(parsed_results)} items):")
408 527 logger.info(json.dumps(parsed_results, ensure_ascii=False, indent=2))
... ... @@ -541,4 +660,3 @@ def analyze_products(
541 660 pass
542 661  
543 662 return all_results
544   -
... ...
indexer/product_enrich_prompts.py 0 → 100644
... ... @@ -0,0 +1,556 @@
  1 +#!/usr/bin/env python3
  2 +
  3 +from typing import Any, Dict
  4 +
  5 +SYSTEM_MESSAGE = (
  6 + "You are an e-commerce product annotator. "
  7 + "Continue the provided assistant Markdown table prefix. "
  8 + "Do not repeat or modify the prefix, and do not add explanations outside the table."
  9 +)
  10 +
  11 +SHARED_ANALYSIS_INSTRUCTION = """Analyze each input product title and fill these columns:
  12 +
  13 +1. Product title: a natural localized product name derived from the input title
  14 +2. Category path: broad to fine-grained category, separated by ">"
  15 +3. Fine-grained tags: style, features, functions, or notable attributes
  16 +4. Target audience: gender, age group, or suitable users
  17 +5. Usage scene
  18 +6. Applicable season
  19 +7. Key attributes
  20 +8. Material description
  21 +9. Functional features
  22 +10. Selling point: one concise core selling phrase
  23 +11. Anchor text: a set of search-oriented words or phrases covering category, attributes, scenes, and demand
  24 +
  25 +Rules:
  26 +- Keep the input order and row count exactly the same.
  27 +- Infer from the title only; if uncertain, prefer concise and broadly correct ecommerce wording.
  28 +- Keep category paths concise and use ">" as the separator.
  29 +- For columns with multiple values, the localized output requirement will define the delimiter.
  30 +
  31 +Input product list:
  32 +"""
  33 +
  34 +USER_INSTRUCTION_TEMPLATE = """Please strictly return a Markdown table following the given columns in the specified language. For any column containing multiple values, separate them with commas. Do not add any other explanation.
  35 +Language: {language}"""
  36 +
  37 +LANGUAGE_MARKDOWN_TABLE_HEADERS: Dict[str, Dict[str, Any]] = {
  38 + "en": [
  39 + "No.",
  40 + "Product title",
  41 + "Category path",
  42 + "Fine-grained tags",
  43 + "Target audience",
  44 + "Usage scene",
  45 + "Season",
  46 + "Key attributes",
  47 + "Material",
  48 + "Features",
  49 + "Selling point",
  50 + "Anchor text"
  51 + ],
  52 + "zh": [
  53 + "序号",
  54 + "商品标题",
  55 + "品类路径",
  56 + "细分标签",
  57 + "适用人群",
  58 + "使用场景",
  59 + "适用季节",
  60 + "关键属性",
  61 + "材质说明",
  62 + "功能特点",
  63 + "商品卖点",
  64 + "锚文本"
  65 + ],
  66 + "zh_tw": [
  67 + "序號",
  68 + "商品標題",
  69 + "品類路徑",
  70 + "細分標籤",
  71 + "適用人群",
  72 + "使用場景",
  73 + "適用季節",
  74 + "關鍵屬性",
  75 + "材質說明",
  76 + "功能特點",
  77 + "商品賣點",
  78 + "錨文本"
  79 + ],
  80 + "ru": [
  81 + "№",
  82 + "Название товара",
  83 + "Путь категории",
  84 + "Детализированные теги",
  85 + "Целевая аудитория",
  86 + "Сценарий использования",
  87 + "Сезон",
  88 + "Ключевые атрибуты",
  89 + "Материал",
  90 + "Особенности",
  91 + "Преимущество товара",
  92 + "Анкорный текст"
  93 + ],
  94 + "ja": [
  95 + "番号",
  96 + "商品タイトル",
  97 + "カテゴリパス",
  98 + "詳細タグ",
  99 + "対象ユーザー",
  100 + "利用シーン",
  101 + "季節",
  102 + "主要属性",
  103 + "素材",
  104 + "機能特徴",
  105 + "商品の訴求点",
  106 + "アンカーテキスト"
  107 + ],
  108 + "ko": [
  109 + "번호",
  110 + "상품 제목",
  111 + "카테고리 경로",
  112 + "세부 태그",
  113 + "대상 고객",
  114 + "사용 장면",
  115 + "계절",
  116 + "핵심 속성",
  117 + "소재",
  118 + "기능 특징",
  119 + "상품 포인트",
  120 + "앵커 텍스트"
  121 + ],
  122 + "es": [
  123 + "N.º",
  124 + "Titulo del producto",
  125 + "Ruta de categoria",
  126 + "Etiquetas detalladas",
  127 + "Publico objetivo",
  128 + "Escenario de uso",
  129 + "Temporada",
  130 + "Atributos clave",
  131 + "Material",
  132 + "Caracteristicas",
  133 + "Punto de venta",
  134 + "Texto ancla"
  135 + ],
  136 + "fr": [
  137 + "N°",
  138 + "Titre du produit",
  139 + "Chemin de categorie",
  140 + "Etiquettes detaillees",
  141 + "Public cible",
  142 + "Scenario d'utilisation",
  143 + "Saison",
  144 + "Attributs cles",
  145 + "Matiere",
  146 + "Caracteristiques",
  147 + "Argument de vente",
  148 + "Texte d'ancrage"
  149 + ],
  150 + "pt": [
  151 + "Nº",
  152 + "Titulo do produto",
  153 + "Caminho da categoria",
  154 + "Tags detalhadas",
  155 + "Publico-alvo",
  156 + "Cenario de uso",
  157 + "Estacao",
  158 + "Atributos principais",
  159 + "Material",
  160 + "Caracteristicas",
  161 + "Ponto de venda",
  162 + "Texto ancora"
  163 + ],
  164 + "de": [
  165 + "Nr.",
  166 + "Produkttitel",
  167 + "Kategoriepfad",
  168 + "Detaillierte Tags",
  169 + "Zielgruppe",
  170 + "Nutzungsszenario",
  171 + "Saison",
  172 + "Wichtige Attribute",
  173 + "Material",
  174 + "Funktionen",
  175 + "Verkaufsargument",
  176 + "Ankertext"
  177 + ],
  178 + "it": [
  179 + "N.",
  180 + "Titolo del prodotto",
  181 + "Percorso categoria",
  182 + "Tag dettagliati",
  183 + "Pubblico target",
  184 + "Scenario d'uso",
  185 + "Stagione",
  186 + "Attributi chiave",
  187 + "Materiale",
  188 + "Caratteristiche",
  189 + "Punto di forza",
  190 + "Testo ancora"
  191 + ],
  192 + "th": [
  193 + "ลำดับ",
  194 + "ชื่อสินค้า",
  195 + "เส้นทางหมวดหมู่",
  196 + "แท็กย่อย",
  197 + "กลุ่มเป้าหมาย",
  198 + "สถานการณ์การใช้งาน",
  199 + "ฤดูกาล",
  200 + "คุณสมบัติสำคัญ",
  201 + "วัสดุ",
  202 + "คุณสมบัติการใช้งาน",
  203 + "จุดขายสินค้า",
  204 + "แองเคอร์เท็กซ์"
  205 + ],
  206 + "vi": [
  207 + "STT",
  208 + "Tieu de san pham",
  209 + "Duong dan danh muc",
  210 + "The chi tiet",
  211 + "Doi tuong phu hop",
  212 + "Boi canh su dung",
  213 + "Mua phu hop",
  214 + "Thuoc tinh chinh",
  215 + "Chat lieu",
  216 + "Tinh nang",
  217 + "Diem ban hang",
  218 + "Van ban neo"
  219 + ],
  220 + "id": [
  221 + "No.",
  222 + "Judul produk",
  223 + "Jalur kategori",
  224 + "Tag terperinci",
  225 + "Target pengguna",
  226 + "Skenario penggunaan",
  227 + "Musim",
  228 + "Atribut utama",
  229 + "Bahan",
  230 + "Fitur",
  231 + "Nilai jual",
  232 + "Teks jangkar"
  233 + ],
  234 + "ms": [
  235 + "No.",
  236 + "Tajuk produk",
  237 + "Laluan kategori",
  238 + "Tag terperinci",
  239 + "Sasaran pengguna",
  240 + "Senario penggunaan",
  241 + "Musim",
  242 + "Atribut utama",
  243 + "Bahan",
  244 + "Ciri-ciri",
  245 + "Nilai jual",
  246 + "Teks sauh"
  247 + ],
  248 + "ar": [
  249 + "الرقم",
  250 + "عنوان المنتج",
  251 + "مسار الفئة",
  252 + "الوسوم التفصيلية",
  253 + "الفئة المستهدفة",
  254 + "سيناريو الاستخدام",
  255 + "الموسم",
  256 + "السمات الرئيسية",
  257 + "المادة",
  258 + "الميزات",
  259 + "نقطة البيع",
  260 + "نص الربط"
  261 + ],
  262 + "hi": [
  263 + "क्रमांक",
  264 + "उत्पाद शीर्षक",
  265 + "श्रेणी पथ",
  266 + "विस्तृत टैग",
  267 + "लक्षित उपभोक्ता",
  268 + "उपयोग परिदृश्य",
  269 + "मौसम",
  270 + "मुख्य गुण",
  271 + "सामग्री",
  272 + "विशेषताएं",
  273 + "बिक्री बिंदु",
  274 + "एंकर टेक्स्ट"
  275 + ],
  276 + "he": [
  277 + "מס׳",
  278 + "כותרת המוצר",
  279 + "נתיב קטגוריה",
  280 + "תגיות מפורטות",
  281 + "קהל יעד",
  282 + "תרחיש שימוש",
  283 + "עונה",
  284 + "מאפיינים מרכזיים",
  285 + "חומר",
  286 + "תכונות",
  287 + "נקודת מכירה",
  288 + "טקסט עוגן"
  289 + ],
  290 + "my": [
  291 + "အမှတ်စဉ်",
  292 + "ကုန်ပစ္စည်းခေါင်းစဉ်",
  293 + "အမျိုးအစားလမ်းကြောင်း",
  294 + "အသေးစိတ်တဂ်များ",
  295 + "ပစ်မှတ်အသုံးပြုသူ",
  296 + "အသုံးပြုမှုအခြေအနေ",
  297 + "ရာသီ",
  298 + "အဓိကဂုဏ်သတ္တိများ",
  299 + "ပစ္စည်း",
  300 + "လုပ်ဆောင်ချက်များ",
  301 + "အရောင်းထူးခြားချက်",
  302 + "အန်ကာစာသား"
  303 + ],
  304 + "ta": [
  305 + "எண்",
  306 + "தயாரிப்பு தலைப்பு",
  307 + "வகை பாதை",
  308 + "விரிவான குறிச்சொற்கள்",
  309 + "இலக்கு பயனர்கள்",
  310 + "பயன்பாட்டு நிலை",
  311 + "பருவம்",
  312 + "முக்கிய பண்புகள்",
  313 + "பொருள்",
  314 + "அம்சங்கள்",
  315 + "விற்பனை அம்சம்",
  316 + "ஆங்கர் உரை"
  317 + ],
  318 + "ur": [
  319 + "نمبر",
  320 + "پروڈکٹ عنوان",
  321 + "زمرہ راستہ",
  322 + "تفصیلی ٹیگز",
  323 + "ہدف صارفین",
  324 + "استعمال کا منظر",
  325 + "موسم",
  326 + "کلیدی خصوصیات",
  327 + "مواد",
  328 + "فیچرز",
  329 + "فروختی نقطہ",
  330 + "اینکر ٹیکسٹ"
  331 + ],
  332 + "bn": [
  333 + "ক্রম",
  334 + "পণ্যের শিরোনাম",
  335 + "শ্রেণি পথ",
  336 + "বিস্তারিত ট্যাগ",
  337 + "লক্ষ্য ব্যবহারকারী",
  338 + "ব্যবহারের দৃশ্য",
  339 + "মৌসুম",
  340 + "মূল বৈশিষ্ট্য",
  341 + "উপাদান",
  342 + "ফিচার",
  343 + "বিক্রয় পয়েন্ট",
  344 + "অ্যাঙ্কর টেক্সট"
  345 + ],
  346 + "pl": [
  347 + "Nr",
  348 + "Tytul produktu",
  349 + "Sciezka kategorii",
  350 + "Szczegolowe tagi",
  351 + "Grupa docelowa",
  352 + "Scenariusz uzycia",
  353 + "Sezon",
  354 + "Kluczowe atrybuty",
  355 + "Material",
  356 + "Cechy",
  357 + "Atut sprzedazowy",
  358 + "Tekst kotwicy"
  359 + ],
  360 + "nl": [
  361 + "Nr.",
  362 + "Producttitel",
  363 + "Categoriepad",
  364 + "Gedetailleerde tags",
  365 + "Doelgroep",
  366 + "Gebruikscontext",
  367 + "Seizoen",
  368 + "Belangrijke kenmerken",
  369 + "Materiaal",
  370 + "Functies",
  371 + "Verkooppunt",
  372 + "Ankertekst"
  373 + ],
  374 + "ro": [
  375 + "Nr.",
  376 + "Titlul produsului",
  377 + "Calea categoriei",
  378 + "Etichete detaliate",
  379 + "Public tinta",
  380 + "Scenariu de utilizare",
  381 + "Sezon",
  382 + "Atribute cheie",
  383 + "Material",
  384 + "Caracteristici",
  385 + "Punct de vanzare",
  386 + "Text ancora"
  387 + ],
  388 + "tr": [
  389 + "No.",
  390 + "Urun basligi",
  391 + "Kategori yolu",
  392 + "Ayrintili etiketler",
  393 + "Hedef kitle",
  394 + "Kullanim senaryosu",
  395 + "Sezon",
  396 + "Temel ozellikler",
  397 + "Malzeme",
  398 + "Ozellikler",
  399 + "Satis noktasi",
  400 + "Capa metni"
  401 + ],
  402 + "km": [
  403 + "ល.រ",
  404 + "ចំណងជើងផលិតផល",
  405 + "ផ្លូវប្រភេទ",
  406 + "ស្លាកលម្អិត",
  407 + "ក្រុមអ្នកប្រើគោលដៅ",
  408 + "សេណារីយ៉ូប្រើប្រាស់",
  409 + "រដូវកាល",
  410 + "លក្ខណៈសម្បត្តិសំខាន់",
  411 + "សម្ភារៈ",
  412 + "មុខងារ",
  413 + "ចំណុចលក់",
  414 + "អត្ថបទអង់ក័រ"
  415 + ],
  416 + "lo": [
  417 + "ລຳດັບ",
  418 + "ຊື່ສິນຄ້າ",
  419 + "ເສັ້ນທາງໝວດໝູ່",
  420 + "ແທັກລະອຽດ",
  421 + "ກຸ່ມເປົ້າໝາຍ",
  422 + "ສະຖານະການໃຊ້ງານ",
  423 + "ລະດູການ",
  424 + "ຄຸນລັກສະນະສຳຄັນ",
  425 + "ວັດສະດຸ",
  426 + "ຄຸນສົມບັດ",
  427 + "ຈຸດຂາຍ",
  428 + "ຂໍ້ຄວາມອັງເຄີ"
  429 + ],
  430 + "yue": [
  431 + "序號",
  432 + "商品標題",
  433 + "品類路徑",
  434 + "細分類標籤",
  435 + "適用人群",
  436 + "使用場景",
  437 + "適用季節",
  438 + "關鍵屬性",
  439 + "材質說明",
  440 + "功能特點",
  441 + "商品賣點",
  442 + "錨文本"
  443 + ],
  444 + "cs": [
  445 + "C.",
  446 + "Nazev produktu",
  447 + "Cesta kategorie",
  448 + "Podrobne stitky",
  449 + "Cilova skupina",
  450 + "Scenar pouziti",
  451 + "Sezona",
  452 + "Klicove atributy",
  453 + "Material",
  454 + "Vlastnosti",
  455 + "Prodejni argument",
  456 + "Kotvici text"
  457 + ],
  458 + "el": [
  459 + "Α/Α",
  460 + "Τίτλος προϊόντος",
  461 + "Διαδρομή κατηγορίας",
  462 + "Αναλυτικές ετικέτες",
  463 + "Κοινό-στόχος",
  464 + "Σενάριο χρήσης",
  465 + "Εποχή",
  466 + "Βασικά χαρακτηριστικά",
  467 + "Υλικό",
  468 + "Λειτουργίες",
  469 + "Σημείο πώλησης",
  470 + "Κείμενο άγκυρας"
  471 + ],
  472 + "sv": [
  473 + "Nr",
  474 + "Produkttitel",
  475 + "Kategorisokvag",
  476 + "Detaljerade taggar",
  477 + "Malgrupp",
  478 + "Anvandningsscenario",
  479 + "Sasong",
  480 + "Viktiga attribut",
  481 + "Material",
  482 + "Funktioner",
  483 + "Saljpunkt",
  484 + "Ankartext"
  485 + ],
  486 + "hu": [
  487 + "Sorszam",
  488 + "Termekcim",
  489 + "Kategoriavonal",
  490 + "Reszletes cimkek",
  491 + "Celcsoport",
  492 + "Hasznalati helyzet",
  493 + "Evszak",
  494 + "Fo jellemzok",
  495 + "Anyag",
  496 + "Funkciok",
  497 + "Ertekesitesi elony",
  498 + "Horgonyszoveg"
  499 + ],
  500 + "da": [
  501 + "Nr.",
  502 + "Produkttitel",
  503 + "Kategoristi",
  504 + "Detaljerede tags",
  505 + "Malgruppe",
  506 + "Brugsscenarie",
  507 + "Saeson",
  508 + "Nogleattributter",
  509 + "Materiale",
  510 + "Funktioner",
  511 + "Salgsargument",
  512 + "Ankertekst"
  513 + ],
  514 + "fi": [
  515 + "Nro",
  516 + "Tuotteen nimi",
  517 + "Kategoriapolku",
  518 + "Yksityiskohtaiset tunnisteet",
  519 + "Kohdeyleiso",
  520 + "Kayttotilanne",
  521 + "Kausi",
  522 + "Keskeiset ominaisuudet",
  523 + "Materiaali",
  524 + "Ominaisuudet",
  525 + "Myyntivaltti",
  526 + "Ankkuriteksti"
  527 + ],
  528 + "uk": [
  529 + "№",
  530 + "Назва товару",
  531 + "Шлях категорії",
  532 + "Детальні теги",
  533 + "Цільова аудиторія",
  534 + "Сценарій використання",
  535 + "Сезон",
  536 + "Ключові атрибути",
  537 + "Матеріал",
  538 + "Особливості",
  539 + "Продаюча перевага",
  540 + "Анкорний текст"
  541 + ],
  542 + "bg": [
  543 + "№",
  544 + "Заглавие на продукта",
  545 + "Път на категорията",
  546 + "Подробни тагове",
  547 + "Целева аудитория",
  548 + "Сценарий на употреба",
  549 + "Сезон",
  550 + "Ключови атрибути",
  551 + "Материал",
  552 + "Характеристики",
  553 + "Търговско предимство",
  554 + "Анкор текст"
  555 + ]
  556 +}
0 557 \ No newline at end of file
... ...
tests/test_product_enrich_partial_mode.py 0 → 100644
... ... @@ -0,0 +1,229 @@
  1 +from __future__ import annotations
  2 +
  3 +import importlib.util
  4 +import io
  5 +import json
  6 +import logging
  7 +import sys
  8 +import types
  9 +from pathlib import Path
  10 +from unittest import mock
  11 +
  12 +
  13 +def _load_product_enrich_module():
  14 + if "dotenv" not in sys.modules:
  15 + fake_dotenv = types.ModuleType("dotenv")
  16 + fake_dotenv.load_dotenv = lambda *args, **kwargs: None
  17 + sys.modules["dotenv"] = fake_dotenv
  18 +
  19 + if "redis" not in sys.modules:
  20 + fake_redis = types.ModuleType("redis")
  21 +
  22 + class _FakeRedisClient:
  23 + def __init__(self, *args, **kwargs):
  24 + pass
  25 +
  26 + def ping(self):
  27 + return True
  28 +
  29 + fake_redis.Redis = _FakeRedisClient
  30 + sys.modules["redis"] = fake_redis
  31 +
  32 + repo_root = Path(__file__).resolve().parents[1]
  33 + if str(repo_root) not in sys.path:
  34 + sys.path.insert(0, str(repo_root))
  35 +
  36 + module_path = repo_root / "indexer" / "product_enrich.py"
  37 + spec = importlib.util.spec_from_file_location("product_enrich_under_test", module_path)
  38 + module = importlib.util.module_from_spec(spec)
  39 + assert spec and spec.loader
  40 + spec.loader.exec_module(module)
  41 + return module
  42 +
  43 +
  44 +product_enrich = _load_product_enrich_module()
  45 +
  46 +
  47 +def _attach_stream(logger_obj: logging.Logger):
  48 + stream = io.StringIO()
  49 + handler = logging.StreamHandler(stream)
  50 + handler.setFormatter(logging.Formatter("%(message)s"))
  51 + logger_obj.addHandler(handler)
  52 + return stream, handler
  53 +
  54 +
  55 +def test_create_prompt_splits_shared_context_and_localized_tail():
  56 + products = [
  57 + {"id": "1", "title": "dress"},
  58 + {"id": "2", "title": "linen shirt"},
  59 + ]
  60 +
  61 + shared_zh, user_zh, prefix_zh = product_enrich.create_prompt(products, target_lang="zh")
  62 + shared_en, user_en, prefix_en = product_enrich.create_prompt(products, target_lang="en")
  63 +
  64 + assert shared_zh == shared_en
  65 + assert "Analyze each input product title" in shared_zh
  66 + assert "1. dress" in shared_zh
  67 + assert "2. linen shirt" in shared_zh
  68 + assert "Product list" not in user_zh
  69 + assert "Product list" not in user_en
  70 + assert "specified language" in user_zh
  71 + assert "Language: Chinese" in user_zh
  72 + assert "Language: English" in user_en
  73 + assert prefix_zh.startswith("| 序号 | 商品标题 | 品类路径 |")
  74 + assert prefix_en.startswith("| No. | Product title | Category path |")
  75 +
  76 +
  77 +def test_call_llm_logs_shared_context_once_and_verbose_contains_full_requests():
  78 + payloads = []
  79 + response_bodies = [
  80 + {
  81 + "choices": [
  82 + {
  83 + "message": {
  84 + "content": (
  85 + "| 1 | 连衣裙 | 女装>连衣裙 | 法式,收腰 | 年轻女性 | "
  86 + "通勤,约会 | 春季,夏季 | 中长款 | 聚酯纤维 | 透气 | "
  87 + "修身显瘦 | 法式收腰连衣裙 |\n"
  88 + )
  89 + }
  90 + }
  91 + ],
  92 + "usage": {"prompt_tokens": 120, "completion_tokens": 45, "total_tokens": 165},
  93 + },
  94 + {
  95 + "choices": [
  96 + {
  97 + "message": {
  98 + "content": (
  99 + "| 1 | Dress | Women>Dress | French,Waisted | Young women | "
  100 + "Commute,Date | Spring,Summer | Midi | Polyester | Breathable | "
  101 + "Slim fit | French waisted dress |\n"
  102 + )
  103 + }
  104 + }
  105 + ],
  106 + "usage": {"prompt_tokens": 118, "completion_tokens": 43, "total_tokens": 161},
  107 + },
  108 + ]
  109 +
  110 + class _FakeResponse:
  111 + def __init__(self, body):
  112 + self.body = body
  113 +
  114 + def raise_for_status(self):
  115 + return None
  116 +
  117 + def json(self):
  118 + return self.body
  119 +
  120 + class _FakeSession:
  121 + trust_env = True
  122 +
  123 + def post(self, url, headers=None, json=None, timeout=None, proxies=None):
  124 + del url, headers, timeout, proxies
  125 + payloads.append(json)
  126 + return _FakeResponse(response_bodies[len(payloads) - 1])
  127 +
  128 + def close(self):
  129 + return None
  130 +
  131 + product_enrich.reset_logged_shared_context_keys()
  132 + main_stream, main_handler = _attach_stream(product_enrich.logger)
  133 + verbose_stream, verbose_handler = _attach_stream(product_enrich.verbose_logger)
  134 +
  135 + try:
  136 + with mock.patch.object(product_enrich, "API_KEY", "fake-key"), mock.patch.object(
  137 + product_enrich.requests,
  138 + "Session",
  139 + lambda: _FakeSession(),
  140 + ):
  141 + zh_shared, zh_user, zh_prefix = product_enrich.create_prompt(
  142 + [{"id": "1", "title": "dress"}],
  143 + target_lang="zh",
  144 + )
  145 + en_shared, en_user, en_prefix = product_enrich.create_prompt(
  146 + [{"id": "1", "title": "dress"}],
  147 + target_lang="en",
  148 + )
  149 +
  150 + zh_markdown, zh_raw = product_enrich.call_llm(
  151 + zh_shared,
  152 + zh_user,
  153 + zh_prefix,
  154 + target_lang="zh",
  155 + )
  156 + en_markdown, en_raw = product_enrich.call_llm(
  157 + en_shared,
  158 + en_user,
  159 + en_prefix,
  160 + target_lang="en",
  161 + )
  162 + finally:
  163 + product_enrich.logger.removeHandler(main_handler)
  164 + product_enrich.verbose_logger.removeHandler(verbose_handler)
  165 +
  166 + assert zh_shared == en_shared
  167 + assert len(payloads) == 2
  168 + assert len(payloads[0]["messages"]) == 3
  169 + assert payloads[0]["messages"][1]["role"] == "user"
  170 + assert "1. dress" in payloads[0]["messages"][1]["content"]
  171 + assert "Language: Chinese" in payloads[0]["messages"][1]["content"]
  172 + assert "Language: English" in payloads[1]["messages"][1]["content"]
  173 + assert payloads[0]["messages"][-1]["partial"] is True
  174 + assert payloads[1]["messages"][-1]["partial"] is True
  175 +
  176 + main_log = main_stream.getvalue()
  177 + verbose_log = verbose_stream.getvalue()
  178 +
  179 + assert main_log.count("LLM Shared Context") == 1
  180 + assert main_log.count("LLM Request Variant") == 2
  181 + assert "Localized Requirement" in main_log
  182 + assert "Shared Context" in main_log
  183 +
  184 + assert verbose_log.count("LLM Request [model=") == 2
  185 + assert verbose_log.count("LLM Response [model=") == 2
  186 + assert '"partial": true' in verbose_log
  187 + assert "Combined User Prompt" in verbose_log
  188 + assert "French waisted dress" in verbose_log
  189 + assert "法式收腰连衣裙" in verbose_log
  190 +
  191 + assert zh_markdown.startswith(zh_prefix)
  192 + assert en_markdown.startswith(en_prefix)
  193 + assert json.loads(zh_raw)["usage"]["total_tokens"] == 165
  194 + assert json.loads(en_raw)["usage"]["total_tokens"] == 161
  195 +
  196 +
  197 +def test_process_batch_reads_result_and_validates_expected_fields():
  198 + merged_markdown = """| 序号 | 商品标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 商品卖点 | 锚文本 |
  199 +|----|----|----|----|----|----|----|----|----|----|----|----|
  200 +| 1 | 法式连衣裙 | 女装>连衣裙 | 法式,收腰 | 年轻女性 | 通勤,约会 | 春季,夏季 | 中长款 | 聚酯纤维 | 透气 | 修身显瘦 | 法式收腰连衣裙 |
  201 +"""
  202 +
  203 + with mock.patch.object(
  204 + product_enrich,
  205 + "call_llm",
  206 + return_value=(merged_markdown, json.dumps({"choices": [{"message": {"content": "stub"}}]})),
  207 + ):
  208 + results = product_enrich.process_batch(
  209 + [{"id": "sku-1", "title": "dress"}],
  210 + batch_num=1,
  211 + target_lang="zh",
  212 + )
  213 +
  214 + assert len(results) == 1
  215 + row = results[0]
  216 + assert row["id"] == "sku-1"
  217 + assert row["lang"] == "zh"
  218 + assert row["title_input"] == "dress"
  219 + assert row["title"] == "法式连衣裙"
  220 + assert row["category_path"] == "女装>连衣裙"
  221 + assert row["tags"] == "法式,收腰"
  222 + assert row["target_audience"] == "年轻女性"
  223 + assert row["usage_scene"] == "通勤,约会"
  224 + assert row["season"] == "春季,夏季"
  225 + assert row["key_attributes"] == "中长款"
  226 + assert row["material"] == "聚酯纤维"
  227 + assert row["features"] == "透气"
  228 + assert row["selling_points"] == "修身显瘦"
  229 + assert row["anchor_text"] == "法式收腰连衣裙"
... ...