Commit b735cced058c29de83bb4e15ff84f7368a780fe3
1 parent
148ab60d
scripts/amazon_xlsx_to_shoplazza_xlsx.py
1. 添加了两个开关参数 --keep-spu-if-parent-missing:保留父ASIN不在变体列表中的SPU(默认:丢弃整个SPU) --fix-sku-if-title-mismatch:修正标题不一致的SKU而不是丢弃(默认:丢弃标题不一致的SKU) 2. 实现了相关逻辑 父ASIN缺失处理: 默认:当父ASIN不在变体列表中时,打印警告并丢弃整个SPU 使用 --keep-spu-if-parent-missing:保留SPU,使用第一个变体作为主商品 标题不一致处理: 默认:当变体标题与主商品不一致时,打印日志并丢弃该SKU 使用 --fix-sku-if-title-mismatch:修正变体标题为主商品标题
Showing
1 changed file
with
40 additions
and
8 deletions
Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
| @@ -420,6 +420,9 @@ def main(): | @@ -420,6 +420,9 @@ def main(): | ||
| 420 | parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") | 420 | parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") |
| 421 | parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)") | 421 | parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)") |
| 422 | parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") | 422 | parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") |
| 423 | + # 默认行为:丢弃不符合要求的数据 | ||
| 424 | + parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=True, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)") | ||
| 425 | + parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=True, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)") | ||
| 423 | args = parser.parse_args() | 426 | args = parser.parse_args() |
| 424 | 427 | ||
| 425 | if not os.path.isdir(args.input_dir): | 428 | if not os.path.isdir(args.input_dir): |
| @@ -481,24 +484,53 @@ def main(): | @@ -481,24 +484,53 @@ def main(): | ||
| 481 | if parent_variant: | 484 | if parent_variant: |
| 482 | variants = [parent_variant] + other_variants | 485 | variants = [parent_variant] + other_variants |
| 483 | else: | 486 | else: |
| 484 | - # 如果找不到父ASIN对应的变体,打印错误日志 | 487 | + # 如果找不到父ASIN对应的变体 |
| 485 | print( | 488 | print( |
| 486 | f"WARN: Parent ASIN not found in variants: SPU={spu_id}, " | 489 | f"WARN: Parent ASIN not found in variants: SPU={spu_id}, " |
| 487 | f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}", | 490 | f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}", |
| 488 | flush=True, | 491 | flush=True, |
| 489 | ) | 492 | ) |
| 493 | + # 根据开关决定是否丢弃整个SPU | ||
| 494 | + if args.skip_spu_if_parent_missing: | ||
| 495 | + print( | ||
| 496 | + f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}", | ||
| 497 | + flush=True, | ||
| 498 | + ) | ||
| 499 | + continue | ||
| 490 | 500 | ||
| 491 | - # 统一变体标题:如果与主商品不一致,改为与主商品一致 | 501 | + # 处理变体标题:如果与主商品不一致,根据开关决定修正或丢弃 |
| 492 | main_title = variants[0].get("商品标题") or "" | 502 | main_title = variants[0].get("商品标题") or "" |
| 503 | + filtered_variants = [] | ||
| 493 | for v in variants: | 504 | for v in variants: |
| 494 | title = v.get("商品标题") or "" | 505 | title = v.get("商品标题") or "" |
| 495 | if main_title and title and title != main_title: | 506 | if main_title and title and title != main_title: |
| 496 | - print( | ||
| 497 | - f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " | ||
| 498 | - f"main_title='{main_title}', variant_title='{title}' -> using main_title", | ||
| 499 | - flush=True, | ||
| 500 | - ) | ||
| 501 | - v["商品标题"] = main_title # 统一为主商品标题 | 507 | + if args.skip_sku_if_title_mismatch: |
| 508 | + # 丢弃标题不一致的SKU | ||
| 509 | + print( | ||
| 510 | + f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " | ||
| 511 | + f"main_title='{main_title}', variant_title='{title}'", | ||
| 512 | + flush=True, | ||
| 513 | + ) | ||
| 514 | + continue | ||
| 515 | + else: | ||
| 516 | + # 修正标题 | ||
| 517 | + print( | ||
| 518 | + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " | ||
| 519 | + f"main_title='{main_title}', variant_title='{title}' -> using main_title", | ||
| 520 | + flush=True, | ||
| 521 | + ) | ||
| 522 | + v["商品标题"] = main_title # 统一为主商品标题 | ||
| 523 | + filtered_variants.append(v) | ||
| 524 | + | ||
| 525 | + # 如果所有变体都被过滤掉,跳过整个SPU | ||
| 526 | + if not filtered_variants: | ||
| 527 | + print( | ||
| 528 | + f"SKIP entire SPU: all variants filtered out, SPU={spu_id}", | ||
| 529 | + flush=True, | ||
| 530 | + ) | ||
| 531 | + continue | ||
| 532 | + | ||
| 533 | + variants = filtered_variants | ||
| 502 | 534 | ||
| 503 | spu_count += 1 | 535 | spu_count += 1 |
| 504 | if args.max_products is not None and spu_count > int(args.max_products): | 536 | if args.max_products is not None and spu_count > int(args.max_products): |