Commit 8b1425bb35f7964ea254f784ca7a1fbd431d48a0

Authored by tangwang
1 parent 6087131a

amazon data

Showing 1 changed file with 12 additions and 14 deletions   Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
... ... @@ -28,6 +28,7 @@ import os
28 28 import re
29 29 import sys
30 30 import argparse
  31 +import random
31 32 from datetime import datetime
32 33 from collections import defaultdict, Counter
33 34 from pathlib import Path
... ... @@ -459,37 +460,34 @@ def main():
459 460 group_rows_list = [] # List[List[dict]]
460 461 spu_count = 0
461 462 next_product_id = 1 # 用于填充商品ID,全局自增
  463 + # 将SPU顺序打乱,避免过于依赖输入文件的顺序
  464 + spu_items = list(groups.items())
  465 + random.shuffle(spu_items)
462 466  
463   - for spu_id, variants in groups.items():
  467 + for spu_id, variants in spu_items:
464 468 if not variants:
465 469 continue
466 470  
467   - # 过滤掉标题与主商品不一致的变体
  471 + # 统一变体标题:如果与主商品不一致,改为与主商品一致
468 472 main_title = variants[0].get("商品标题") or ""
469   - filtered = []
470 473 for v in variants:
471 474 title = v.get("商品标题") or ""
472 475 if main_title and title and title != main_title:
473 476 print(
474   - f"SKIP variant due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
475   - f"main_title='{main_title}', variant_title='{title}'",
  477 + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  478 + f"main_title='{main_title}', variant_title='{title}' -> using main_title",
476 479 flush=True,
477 480 )
478   - continue
479   - filtered.append(v)
480   -
481   - if not filtered:
482   - # 整个SPU都被过滤掉
483   - continue
  481 + v["商品标题"] = main_title # 统一为主商品标题
484 482  
485 483 spu_count += 1
486 484 if args.max_products is not None and spu_count > int(args.max_products):
487 485 break
488 486  
489   - if len(filtered) == 1:
490   - rows = [build_s_row(filtered[0])]
  487 + if len(variants) == 1:
  488 + rows = [build_s_row(variants[0])]
491 489 else:
492   - rows = build_m_p_rows(filtered)
  490 + rows = build_m_p_rows(variants)
493 491  
494 492 # 填充商品ID(从1开始全局递增)
495 493 for r in rows:
... ...