enrich

tangwang
1 parent 702ba3aa
Showing 3 changed files with 13 additions and 8 deletions Show diff stats
app/agents/shopping_agent.py
offline/product_understanding/graphRAG.md
offline/product_understanding/process_products.py
@@ -59,7 +59,9 @@ SYSTEM_PROMPT = f&quot;&quot;&quot;  角色定义
 3. 在最终回复中使用 [SEARCH_RESULTS_REF:ref_id] 内联引用搜索结果：
   1. 搜索工具会返回一个结果引用标识[SEARCH_RESULTS_REF:ref_id]，撰写最终答复的时候请直接引用 [SEARCH_RESULTS_REF:ref_id] ，系统会自动在该位置渲染对应的商品卡片列表，无需复述搜索结果。
   2. 因为系统会自动将[SEARCH_RESULTS_REF:ref_id]渲染为搜索结果，所以[SEARCH_RESULTS_REF:ref_id]必须独占一行，且只在需要渲染该query完整的搜索结果时才进行引用，同一个结果不要重复引用。
-4. 今天是{datetime.now().strftime("%Y-%m-%d")}，所有与当前时间（比如天气、最新或即将发生的事件）相关的问题，都要使用web_search工具）。
+4. 所有与当前时间（比如天气、最新或即将发生的事件）相关的问题，都要使用web_search工具）。
+
+当前日期: {datetime.now().strftime("%Y-%m-%d")} 星期{datetime.now().strftime("%w")}
 """
  
  
@@ -137,7 +137,7 @@ graphRAG在商品搜索中如何使用？我想将他用于，对商品的模糊
  
 材质层：
 - material
-- fabric_texture  （棉麻感 / 轻薄 / 垂坠）
+- fabric_texture
  
 视觉层：
 - main_color
@@ -65,6 +65,7 @@ def create_prompt(products: List[Dict[str, str]]) -&gt; str:
 8. 材质说明 
 9. 功能特点
 10. 商品卖点：分析和提取一句话核心卖点，用于推荐理由
+11. 锚文本：生成一组能够代表该商品、并可能被用户用于搜索的词语或短语。这些词语应覆盖用户需求的各个维度，如品类、细分标签、功能特性、需求场景等等。
  
 输入商品列表：
  
@@ -73,8 +74,8 @@ def create_prompt(products: List[Dict[str, str]]) -&gt; str:
     prompt_tail = """
 请严格按照以下markdown表格格式返回，每列内部的多值内容都用逗号分隔，不要添加任何其他说明：
  
-| 序号 | 商品中文标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 商品卖点 |
-|----|----|----|----|----|----|----|----|----|----|----|
+| 序号 | 商品中文标题 | 品类路径 | 细分标签 | 适用人群 | 使用场景 | 适用季节 | 关键属性 | 材质说明 | 功能特点 | 商品卖点 | 锚文本 |
+|----|----|----|----|----|----|----|----|----|----|----|----|
 """
  
     for idx, product in enumerate(products, 1):
@@ -213,7 +214,8 @@ def parse_markdown_table(markdown_content: str) -&gt; List[Dict[str, str]]:
                     "key_attributes": parts[7] if len(parts) > 7 else "",  # 关键属性
                     "material": parts[8] if len(parts) > 8 else "",  # 材质说明
                     "features": parts[9] if len(parts) > 9 else "",  # 功能特点
-                    "selling_points": parts[10] if len(parts) > 10 else ""  # 商品卖点
+                    "selling_points": parts[10] if len(parts) > 10 else "",  # 商品卖点
+                    "anchor_text": parts[11] if len(parts) > 11 else ""  # 锚文本
                 }
                 data.append(row)
  
@@ -255,7 +257,8 @@ def process_batch(batch_data: List[Dict[str, str]], batch_num: int) -&gt; List[Dict
                     "key_attributes": parsed_item.get("key_attributes", ""),  # 关键属性
                     "material": parsed_item.get("material", ""),  # 材质说明
                     "features": parsed_item.get("features", ""),  # 功能特点
-                    "selling_points": parsed_item.get("selling_points", "")  # 商品卖点
+                    "selling_points": parsed_item.get("selling_points", ""),  # 商品卖点
+                    "anchor_text": parsed_item.get("anchor_text", "")  # 锚文本
                 }
                 results_with_ids.append(result)
                 logger.info(f"Mapped: seq={parsed_item['seq_no']} -> original_id={original_id}")
@@ -285,7 +288,7 @@ def process_batch(batch_data: List[Dict[str, str]], batch_num: int) -&gt; List[Dict
         return [{"id": item["id"], "title": item["title"],
                  "title_cn": "", "category_path": "", "tags": "", "target_audience": "",
                  "usage_scene": "", "season": "", "key_attributes": "",
-                 "material": "", "features": "", "selling_points": "",
+                 "material": "", "features": "", "selling_points": "", "anchor_text": "",
                  "error": str(e)} for item in batch_data]
  
  
@@ -308,7 +311,7 @@ def write_results(results: List[Dict[str, str]], output_file: Path):
  
     fieldnames = ["id", "title", "title_cn", "category_path", "tags",
                   "target_audience", "usage_scene", "season",
-                  "key_attributes", "material", "features", "selling_points"]
+                  "key_attributes", "material", "features", "selling_points", "anchor_text"]
  
     with open(output_file, 'w', encoding='utf-8', newline='') as f:
         writer = csv.DictWriter(f, fieldnames=fieldnames)