Commit b735cced058c29de83bb4e15ff84f7368a780fe3

Authored by tangwang
1 parent 148ab60d

scripts/amazon_xlsx_to_shoplazza_xlsx.py

1. 添加了两个开关参数
--keep-spu-if-parent-missing:保留父ASIN不在变体列表中的SPU(默认:丢弃整个SPU)
--fix-sku-if-title-mismatch:修正标题不一致的SKU而不是丢弃(默认:丢弃标题不一致的SKU)
2. 实现了相关逻辑
父ASIN缺失处理:
默认:当父ASIN不在变体列表中时,打印警告并丢弃整个SPU
使用 --keep-spu-if-parent-missing:保留SPU,使用第一个变体作为主商品
标题不一致处理:
默认:当变体标题与主商品不一致时,打印日志并丢弃该SKU
使用 --fix-sku-if-title-mismatch:修正变体标题为主商品标题
Showing 1 changed file with 40 additions and 8 deletions   Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
... ... @@ -420,6 +420,9 @@ def main():
420 420 parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
421 421 parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)")
422 422 parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
  423 + # 默认行为:丢弃不符合要求的数据
  424 + parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=True, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)")
  425 + parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=True, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)")
423 426 args = parser.parse_args()
424 427  
425 428 if not os.path.isdir(args.input_dir):
... ... @@ -481,24 +484,53 @@ def main():
481 484 if parent_variant:
482 485 variants = [parent_variant] + other_variants
483 486 else:
484   - # 如果找不到父ASIN对应的变体,打印错误日志
  487 + # 如果找不到父ASIN对应的变体
485 488 print(
486 489 f"WARN: Parent ASIN not found in variants: SPU={spu_id}, "
487 490 f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}",
488 491 flush=True,
489 492 )
  493 + # 根据开关决定是否丢弃整个SPU
  494 + if args.skip_spu_if_parent_missing:
  495 + print(
  496 + f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}",
  497 + flush=True,
  498 + )
  499 + continue
490 500  
491   - # 统一变体标题:如果与主商品不一致,改为与主商品一致
  501 + # 处理变体标题:如果与主商品不一致,根据开关决定修正或丢弃
492 502 main_title = variants[0].get("商品标题") or ""
  503 + filtered_variants = []
493 504 for v in variants:
494 505 title = v.get("商品标题") or ""
495 506 if main_title and title and title != main_title:
496   - print(
497   - f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
498   - f"main_title='{main_title}', variant_title='{title}' -> using main_title",
499   - flush=True,
500   - )
501   - v["商品标题"] = main_title # 统一为主商品标题
  507 + if args.skip_sku_if_title_mismatch:
  508 + # 丢弃标题不一致的SKU
  509 + print(
  510 + f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  511 + f"main_title='{main_title}', variant_title='{title}'",
  512 + flush=True,
  513 + )
  514 + continue
  515 + else:
  516 + # 修正标题
  517 + print(
  518 + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  519 + f"main_title='{main_title}', variant_title='{title}' -> using main_title",
  520 + flush=True,
  521 + )
  522 + v["商品标题"] = main_title # 统一为主商品标题
  523 + filtered_variants.append(v)
  524 +
  525 + # 如果所有变体都被过滤掉,跳过整个SPU
  526 + if not filtered_variants:
  527 + print(
  528 + f"SKIP entire SPU: all variants filtered out, SPU={spu_id}",
  529 + flush=True,
  530 + )
  531 + continue
  532 +
  533 + variants = filtered_variants
502 534  
503 535 spu_count += 1
504 536 if args.max_products is not None and spu_count > int(args.max_products):
... ...