Commit b735cced058c29de83bb4e15ff84f7368a780fe3

Authored by tangwang
1 parent 148ab60d

scripts/amazon_xlsx_to_shoplazza_xlsx.py

1. 添加了两个开关参数
--keep-spu-if-parent-missing:保留父ASIN不在变体列表中的SPU(默认:丢弃整个SPU)
--fix-sku-if-title-mismatch:修正标题不一致的SKU而不是丢弃(默认:丢弃标题不一致的SKU)
2. 实现了相关逻辑
父ASIN缺失处理:
默认:当父ASIN不在变体列表中时,打印警告并丢弃整个SPU
使用 --keep-spu-if-parent-missing:保留SPU,使用第一个变体作为主商品
标题不一致处理:
默认:当变体标题与主商品不一致时,打印日志并丢弃该SKU
使用 --fix-sku-if-title-mismatch:修正变体标题为主商品标题
Showing 1 changed file with 40 additions and 8 deletions   Show diff stats
scripts/amazon_xlsx_to_shoplazza_xlsx.py
@@ -420,6 +420,9 @@ def main(): @@ -420,6 +420,9 @@ def main():
420 parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") 420 parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
421 parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)") 421 parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)")
422 parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") 422 parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
  423 + # 默认行为:丢弃不符合要求的数据
  424 + parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=True, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)")
  425 + parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=True, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)")
423 args = parser.parse_args() 426 args = parser.parse_args()
424 427
425 if not os.path.isdir(args.input_dir): 428 if not os.path.isdir(args.input_dir):
@@ -481,24 +484,53 @@ def main(): @@ -481,24 +484,53 @@ def main():
481 if parent_variant: 484 if parent_variant:
482 variants = [parent_variant] + other_variants 485 variants = [parent_variant] + other_variants
483 else: 486 else:
484 - # 如果找不到父ASIN对应的变体,打印错误日志 487 + # 如果找不到父ASIN对应的变体
485 print( 488 print(
486 f"WARN: Parent ASIN not found in variants: SPU={spu_id}, " 489 f"WARN: Parent ASIN not found in variants: SPU={spu_id}, "
487 f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}", 490 f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}",
488 flush=True, 491 flush=True,
489 ) 492 )
  493 + # 根据开关决定是否丢弃整个SPU
  494 + if args.skip_spu_if_parent_missing:
  495 + print(
  496 + f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}",
  497 + flush=True,
  498 + )
  499 + continue
490 500
491 - # 统一变体标题:如果与主商品不一致,改为与主商品一致 501 + # 处理变体标题:如果与主商品不一致,根据开关决定修正或丢弃
492 main_title = variants[0].get("商品标题") or "" 502 main_title = variants[0].get("商品标题") or ""
  503 + filtered_variants = []
493 for v in variants: 504 for v in variants:
494 title = v.get("商品标题") or "" 505 title = v.get("商品标题") or ""
495 if main_title and title and title != main_title: 506 if main_title and title and title != main_title:
496 - print(  
497 - f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "  
498 - f"main_title='{main_title}', variant_title='{title}' -> using main_title",  
499 - flush=True,  
500 - )  
501 - v["商品标题"] = main_title # 统一为主商品标题 507 + if args.skip_sku_if_title_mismatch:
  508 + # 丢弃标题不一致的SKU
  509 + print(
  510 + f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  511 + f"main_title='{main_title}', variant_title='{title}'",
  512 + flush=True,
  513 + )
  514 + continue
  515 + else:
  516 + # 修正标题
  517 + print(
  518 + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
  519 + f"main_title='{main_title}', variant_title='{title}' -> using main_title",
  520 + flush=True,
  521 + )
  522 + v["商品标题"] = main_title # 统一为主商品标题
  523 + filtered_variants.append(v)
  524 +
  525 + # 如果所有变体都被过滤掉,跳过整个SPU
  526 + if not filtered_variants:
  527 + print(
  528 + f"SKIP entire SPU: all variants filtered out, SPU={spu_id}",
  529 + flush=True,
  530 + )
  531 + continue
  532 +
  533 + variants = filtered_variants
502 534
503 spu_count += 1 535 spu_count += 1
504 if args.max_products is not None and spu_count > int(args.max_products): 536 if args.max_products is not None and spu_count > int(args.max_products):