amazon data

tangwang
1 parent 6087131a
Showing 1 changed file with 12 additions and 14 deletions Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
@@ -28,6 +28,7 @@ import os
 import re
 import sys
 import argparse
+import random
 from datetime import datetime
 from collections import defaultdict, Counter
 from pathlib import Path
@@ -459,37 +460,34 @@ def main():
     group_rows_list = []  # List[List[dict]]
     spu_count = 0
     next_product_id = 1  # 用于填充商品ID，全局自增
+    # 将SPU顺序打乱，避免过于依赖输入文件的顺序
+    spu_items = list(groups.items())
+    random.shuffle(spu_items)
-    for spu_id, variants in groups.items():
+    for spu_id, variants in spu_items:
         if not variants:
             continue
-        # 过滤掉标题与主商品不一致的变体
+        # 统一变体标题：如果与主商品不一致，改为与主商品一致
         main_title = variants[0].get("商品标题") or ""
-        filtered = []
         for v in variants:
             title = v.get("商品标题") or ""
             if main_title and title and title != main_title:
                 print(
-                    f"SKIP variant due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
-                    f"main_title='{main_title}', variant_title='{title}'",
+                    f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
+                    f"main_title='{main_title}', variant_title='{title}' -> using main_title",
                     flush=True,
                 )
-                continue
-            filtered.append(v)
-
-        if not filtered:
-            # 整个SPU都被过滤掉
-            continue
+                v["商品标题"] = main_title  # 统一为主商品标题
         spu_count += 1
         if args.max_products is not None and spu_count > int(args.max_products):
             break
-        if len(filtered) == 1:
-            rows = [build_s_row(filtered[0])]
+        if len(variants) == 1:
+            rows = [build_s_row(variants[0])]
         else:
-            rows = build_m_p_rows(filtered)
+            rows = build_m_p_rows(variants)
         # 填充商品ID（从1开始全局递增）
         for r in rows: