Commit 8b1425bb35f7964ea254f784ca7a1fbd431d48a0
1 parent
6087131a
amazon data
Showing
1 changed file
with
12 additions
and
14 deletions
Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
| @@ -28,6 +28,7 @@ import os | @@ -28,6 +28,7 @@ import os | ||
| 28 | import re | 28 | import re |
| 29 | import sys | 29 | import sys |
| 30 | import argparse | 30 | import argparse |
| 31 | +import random | ||
| 31 | from datetime import datetime | 32 | from datetime import datetime |
| 32 | from collections import defaultdict, Counter | 33 | from collections import defaultdict, Counter |
| 33 | from pathlib import Path | 34 | from pathlib import Path |
| @@ -459,37 +460,34 @@ def main(): | @@ -459,37 +460,34 @@ def main(): | ||
| 459 | group_rows_list = [] # List[List[dict]] | 460 | group_rows_list = [] # List[List[dict]] |
| 460 | spu_count = 0 | 461 | spu_count = 0 |
| 461 | next_product_id = 1 # 用于填充商品ID,全局自增 | 462 | next_product_id = 1 # 用于填充商品ID,全局自增 |
| 463 | + # 将SPU顺序打乱,避免过于依赖输入文件的顺序 | ||
| 464 | + spu_items = list(groups.items()) | ||
| 465 | + random.shuffle(spu_items) | ||
| 462 | 466 | ||
| 463 | - for spu_id, variants in groups.items(): | 467 | + for spu_id, variants in spu_items: |
| 464 | if not variants: | 468 | if not variants: |
| 465 | continue | 469 | continue |
| 466 | 470 | ||
| 467 | - # 过滤掉标题与主商品不一致的变体 | 471 | + # 统一变体标题:如果与主商品不一致,改为与主商品一致 |
| 468 | main_title = variants[0].get("商品标题") or "" | 472 | main_title = variants[0].get("商品标题") or "" |
| 469 | - filtered = [] | ||
| 470 | for v in variants: | 473 | for v in variants: |
| 471 | title = v.get("商品标题") or "" | 474 | title = v.get("商品标题") or "" |
| 472 | if main_title and title and title != main_title: | 475 | if main_title and title and title != main_title: |
| 473 | print( | 476 | print( |
| 474 | - f"SKIP variant due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " | ||
| 475 | - f"main_title='{main_title}', variant_title='{title}'", | 477 | + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " |
| 478 | + f"main_title='{main_title}', variant_title='{title}' -> using main_title", | ||
| 476 | flush=True, | 479 | flush=True, |
| 477 | ) | 480 | ) |
| 478 | - continue | ||
| 479 | - filtered.append(v) | ||
| 480 | - | ||
| 481 | - if not filtered: | ||
| 482 | - # 整个SPU都被过滤掉 | ||
| 483 | - continue | 481 | + v["商品标题"] = main_title # 统一为主商品标题 |
| 484 | 482 | ||
| 485 | spu_count += 1 | 483 | spu_count += 1 |
| 486 | if args.max_products is not None and spu_count > int(args.max_products): | 484 | if args.max_products is not None and spu_count > int(args.max_products): |
| 487 | break | 485 | break |
| 488 | 486 | ||
| 489 | - if len(filtered) == 1: | ||
| 490 | - rows = [build_s_row(filtered[0])] | 487 | + if len(variants) == 1: |
| 488 | + rows = [build_s_row(variants[0])] | ||
| 491 | else: | 489 | else: |
| 492 | - rows = build_m_p_rows(filtered) | 490 | + rows = build_m_p_rows(variants) |
| 493 | 491 | ||
| 494 | # 填充商品ID(从1开始全局递增) | 492 | # 填充商品ID(从1开始全局递增) |
| 495 | for r in rows: | 493 | for r in rows: |