Commit 8b1425bb35f7964ea254f784ca7a1fbd431d48a0

Authored by tangwang
1 parent 6087131a

amazon data

Showing 1 changed file with 12 additions and 14 deletions   Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
@@ -28,6 +28,7 @@ import os @@ -28,6 +28,7 @@ import os
28 import re 28 import re
29 import sys 29 import sys
30 import argparse 30 import argparse
  31 +import random
31 from datetime import datetime 32 from datetime import datetime
32 from collections import defaultdict, Counter 33 from collections import defaultdict, Counter
33 from pathlib import Path 34 from pathlib import Path
@@ -459,37 +460,34 @@ def main(): @@ -459,37 +460,34 @@ def main():
459 group_rows_list = [] # List[List[dict]] 460 group_rows_list = [] # List[List[dict]]
460 spu_count = 0 461 spu_count = 0
461 next_product_id = 1 # 用于填充商品ID,全局自增 462 next_product_id = 1 # 用于填充商品ID,全局自增
  463 + # 将SPU顺序打乱,避免过于依赖输入文件的顺序
  464 + spu_items = list(groups.items())
  465 + random.shuffle(spu_items)
462 466
463 - for spu_id, variants in groups.items(): 467 + for spu_id, variants in spu_items:
464 if not variants: 468 if not variants:
465 continue 469 continue
466 470
467 - # 过滤掉标题与主商品不一致的变体 471 + # 统一变体标题:如果与主商品不一致,改为与主商品一致
468 main_title = variants[0].get("商品标题") or "" 472 main_title = variants[0].get("商品标题") or ""
469 - filtered = []  
470 for v in variants: 473 for v in variants:
471 title = v.get("商品标题") or "" 474 title = v.get("商品标题") or ""
472 if main_title and title and title != main_title: 475 if main_title and title and title != main_title:
473 print( 476 print(
474 - f"SKIP variant due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "  
475 - f"main_title='{main_title}', variant_title='{title}'", 477 + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  478 + f"main_title='{main_title}', variant_title='{title}' -> using main_title",
476 flush=True, 479 flush=True,
477 ) 480 )
478 - continue  
479 - filtered.append(v)  
480 -  
481 - if not filtered:  
482 - # 整个SPU都被过滤掉  
483 - continue 481 + v["商品标题"] = main_title # 统一为主商品标题
484 482
485 spu_count += 1 483 spu_count += 1
486 if args.max_products is not None and spu_count > int(args.max_products): 484 if args.max_products is not None and spu_count > int(args.max_products):
487 break 485 break
488 486
489 - if len(filtered) == 1:  
490 - rows = [build_s_row(filtered[0])] 487 + if len(variants) == 1:
  488 + rows = [build_s_row(variants[0])]
491 else: 489 else:
492 - rows = build_m_p_rows(filtered) 490 + rows = build_m_p_rows(variants)
493 491
494 # 填充商品ID(从1开始全局递增) 492 # 填充商品ID(从1开始全局递增)
495 for r in rows: 493 for r in rows: