From b735cced058c29de83bb4e15ff84f7368a780fe3 Mon Sep 17 00:00:00 2001 From: tangwang Date: Thu, 18 Dec 2025 08:50:29 +0800 Subject: [PATCH] scripts/amazon_xlsx_to_shoplazza_xlsx.py --- scripts/amazon_xlsx_to_shoplazza_xlsx.py | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/scripts/amazon_xlsx_to_shoplazza_xlsx.py b/scripts/amazon_xlsx_to_shoplazza_xlsx.py index be37a5c..fbb9122 100644 --- a/scripts/amazon_xlsx_to_shoplazza_xlsx.py +++ b/scripts/amazon_xlsx_to_shoplazza_xlsx.py @@ -420,6 +420,9 @@ def main(): parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行,默认40000)") parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") + # 默认行为:丢弃不符合要求的数据 + parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=True, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)") + parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=True, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)") args = parser.parse_args() if not os.path.isdir(args.input_dir): @@ -481,24 +484,53 @@ def main(): if parent_variant: variants = [parent_variant] + other_variants else: - # 如果找不到父ASIN对应的变体,打印错误日志 + # 如果找不到父ASIN对应的变体 print( f"WARN: Parent ASIN not found in variants: SPU={spu_id}, " f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}", flush=True, ) + # 根据开关决定是否丢弃整个SPU + if args.skip_spu_if_parent_missing: + print( + f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}", + flush=True, + ) + continue - # 统一变体标题:如果与主商品不一致,改为与主商品一致 + # 处理变体标题:如果与主商品不一致,根据开关决定修正或丢弃 main_title = variants[0].get("商品标题") or "" + filtered_variants = [] for v in variants: title = v.get("商品标题") or "" if main_title and title and title != main_title: - print( - f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " - f"main_title='{main_title}', variant_title='{title}' -> using main_title", - flush=True, - ) - v["商品标题"] = main_title # 统一为主商品标题 + if args.skip_sku_if_title_mismatch: + # 丢弃标题不一致的SKU + print( + f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " + f"main_title='{main_title}', variant_title='{title}'", + flush=True, + ) + continue + else: + # 修正标题 + print( + f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, " + f"main_title='{main_title}', variant_title='{title}' -> using main_title", + flush=True, + ) + v["商品标题"] = main_title # 统一为主商品标题 + filtered_variants.append(v) + + # 如果所有变体都被过滤掉,跳过整个SPU + if not filtered_variants: + print( + f"SKIP entire SPU: all variants filtered out, SPU={spu_id}", + flush=True, + ) + continue + + variants = filtered_variants spu_count += 1 if args.max_products is not None and spu_count > int(args.max_products): -- libgit2 0.21.2