From cd29428b86b6a3c1f2c6b3663c57e750decf660c Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 17 Dec 2025 14:48:26 +0800 Subject: [PATCH] 亚马逊数据导入店匠店铺 - 数据处理 --- docs/亚马逊格式数据转店匠商品导入模板.md | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/amazon_xlsx_to_shoplazza_xlsx.py | 480 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/competitor_xlsx_to_shoplazza_xlsx.py | 35 ++++++++++++----------------------- 3 files changed, 628 insertions(+), 23 deletions(-) create mode 100644 docs/亚马逊格式数据转店匠商品导入模板.md create mode 100644 scripts/amazon_xlsx_to_shoplazza_xlsx.py diff --git a/docs/亚马逊格式数据转店匠商品导入模板.md b/docs/亚马逊格式数据转店匠商品导入模板.md new file mode 100644 index 0000000..11c24d3 --- /dev/null +++ b/docs/亚马逊格式数据转店匠商品导入模板.md @@ -0,0 +1,136 @@ +## 亚马逊格式数据 → 店匠(Shoplazza)商品导入模板:转换说明 + +本仓库支持把 `data/mai_jia_jing_ling/products_data/*.xlsx`(**亚马逊格式导出**)转换为店匠后台可导入的 `docs/商品导入模板.xlsx` 格式。 + +对应脚本: +- **主入口**:`scripts/amazon_xlsx_to_shoplazza_xlsx.py` +- **历史兼容**:`scripts/competitor_xlsx_to_shoplazza_xlsx.py`(仅名称过时,逻辑一致) +- **模板写入复用工具**:`scripts/shoplazza_excel_template.py` + +--- + +## 一、输入数据(亚马逊格式 xlsx)的关键字段 + +以 `Competitor-US-Last-30-days-363464.xlsx` 为例(文件名不影响:内容是亚马逊维度字段): + +- **ASIN**:变体 id(我们视为 `sku_id`,会写入模板的 `商品SKU`) +- **父ASIN**:父商品 id(我们视为 `spu_id`/`product_id`,会写入模板的 `商品spu`,并用于分组 M/P) +- **商品标题**:商品标题(写入 `商品标题*`、SEO标题等) +- **SKU**:亚马逊变体描述字符串(关键:解析出多款式维度) + - 示例:`Size: One Size | Color: Black` +- **商品主图**:图片 URL(用于 `商品图片*` / `商品主图`) +- **价格($)** / **prime价格($)**:价格(用于 `商品售价*` / `商品原价`) +- **详细参数**:详情参数串(用于拼接 `商品描述`) +- **上架时间**:用于 `创建时间` +- **类目路径/大类目/小类目/品牌/商品详情页链接/品牌链接**:用于专辑、标签、SEO、供应商URL、备注等 +- **商品重量(单位换算)/商品重量/商品尺寸**:用于 `商品重量/重量单位/尺寸信息` + +> 注意:该数据源通常**没有库存**,脚本默认给每个变体一个固定库存(当前默认 100),以满足导入后的可用性。 + +--- + +## 二、输出数据(店匠导入模板)的核心规则(M / P / S) + +店匠模板在 `docs/商品导入模板说明.md` 中定义了三种商品属性(`商品属性*`): + +- **S(单一款式)**:一个商品只有一个变体(只有 1 个 ASIN) + - 输出 **1 行** +- **M(主商品)+ P(子款式)**:一个父商品(父ASIN)包含多个变体(多个 ASIN) + - 输出 **1 行 M + N 行 P** + - 且 **同一商品的 P 行必须紧跟在 M 行后面**(模板导入强约束) + +本仓库的转换策略: +- 对每个 `父ASIN` 分组: + - **分组 size = 1** → 生成 `S` + - **分组 size > 1** → 生成 `M` + 多个 `P` + +--- + +## 三、多款式(变体)是如何构造的(最关键部分) + +### 1)为什么 “SKU” 列是关键 + +亚马逊格式里,变体的“颜色/尺码”等信息往往并不拆成多个列,而是集中在 `SKU` 字符串里,例如: + +- `Size: One Size | Color: Black` +- `Color: Red | Style: 2-Pack` + +店匠模板的多款式需要: +- **M 行**:`款式1/款式2/款式3` 写“维度名”(例如 Size / Color / Material) +- **P 行**:`款式1/款式2/款式3` 写“维度值”(例如 One Size / Black / Cotton) + +### 2)脚本如何从 SKU 解析出维度(key/value) + +脚本会把 `SKU` 以 `|` 分割,再用 `:` 拆成 key/value: + +- 输入:`Size: One Size | Color: Black` +- 解析结果:`{ "Size": "One Size", "Color": "Black" }` + +### 3)如何从多个变体里选出 “最多3个维度” + +店匠模板只提供 `款式1~3` 三个维度,因此脚本会在一个 `父ASIN` 组内统计 key 的出现频次,并按优先级挑选最多 3 个维度: + +- 优先级大致为:`Size`、`Color`、`Style`、`Pattern`、`Material` …… +- 如果一个组里解析不到任何 key/value,则退化为单维度:`Variant` + - M 行 `款式1 = Variant` + - P 行 `款式1 = ASIN` + +### 4)M 行与 P 行分别填什么(避免导入报错) + +根据模板说明,脚本遵循以下分工: + +- **M 行(主商品)**: + - 填:标题/描述/SEO/专辑/标签/主图/款式维度名 + - 不填:价格、库存、重量等 SKU 级字段(保持为空更安全) +- **P 行(子款式)**: + - 填:款式维度值、价格、商品SKU(ASIN)、库存、重量、尺寸、(可选)子款式图 + - 不填:描述/SEO/专辑/供应商等 SPU 级字段(保持为空) + +--- + +## 四、字段映射总览(高频字段) + +- **商品spu** ← `父ASIN`(无父ASIN则用 ASIN) +- **商品SKU** ← `ASIN` +- **商品标题\*** ← `商品标题` +- **商品图片\*** / **商品主图** ← `商品主图` +- **商品售价\*** ← `prime价格($)` 优先,否则 `价格($)` +- **创建时间** ← `上架时间`(仅日期时补齐为 `YYYY-MM-DD 00:00:00`) +- **商品描述** ← `商品标题` + `详细参数`(以 HTML 拼接) +- **专辑名称** ← `大类目`(无则取 `类目路径` 第一段) +- **标签** ← `品牌,大类目,小类目` +- **商品重量/重量单位** ← 优先解析 `商品重量(单位换算)`(如 `68.04 g`) +- **尺寸信息** ← 解析 `商品尺寸` 前三段数字(英寸)拼成 `L,W,H` + +--- + +## 五、如何运行(生成导入文件) + +### 1)先小批量验证(推荐) + +```bash +python scripts/amazon_xlsx_to_shoplazza_xlsx.py \ + --input-dir data/mai_jia_jing_ling/products_data \ + --template docs/商品导入模板.xlsx \ + --output data/mai_jia_jing_ling/amazon_shoplazza_import_SAMPLE.xlsx \ + --max-files 1 --max-rows-per-file 2000 --max-products 50 +``` + +### 2)生成全量 + +```bash +python scripts/amazon_xlsx_to_shoplazza_xlsx.py \ + --input-dir data/mai_jia_jing_ling/products_data \ + --template docs/商品导入模板.xlsx \ + --output data/mai_jia_jing_ling/amazon_shoplazza_import_ALL.xlsx +``` + +--- + +## 六、可扩展点(后续常见需求) + +- **库存/上架/收税策略参数化**:目前是脚本默认值(Y/N/100),可按目标店铺规则改为命令行参数。 +- **更强的多款式解析**:如果未来亚马逊格式 `SKU` 不规范,可补充从 `详细参数` 里挖出 `Color/Size`。 +- **图片策略**:目前 P 行用各自 `商品主图`;也可改为 M 行合并多图(逗号拼接)。 + + diff --git a/scripts/amazon_xlsx_to_shoplazza_xlsx.py b/scripts/amazon_xlsx_to_shoplazza_xlsx.py new file mode 100644 index 0000000..90f2e90 --- /dev/null +++ b/scripts/amazon_xlsx_to_shoplazza_xlsx.py @@ -0,0 +1,480 @@ +#!/usr/bin/env python3 +""" +Convert Amazon-format Excel exports (with Parent/Child ASIN structure) into +Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`. + +Data source: +- Directory with multiple `*.xlsx` files under `products_data/`. +- Each file contains a main sheet + "Notes" sheet. +- Column meanings (sample): + - ASIN: variant id (sku_id) + - 父ASIN: parent product id (spu_id) + +Output: +- For each 父ASIN group: + - If only 1 ASIN: generate one "S" row + - Else: generate one "M" row + multiple "P" rows + +Multi-variant (M/P) key point: +- Variant dimensions are parsed primarily from the `SKU` column, e.g. + "Size: One Size | Color: Black", and mapped into 款式1/2/3. +""" + +# NOTE: This file is intentionally the same implementation as +# `competitor_xlsx_to_shoplazza_xlsx.py`, but renamed to reflect the correct +# data source (Amazon-format exports). Keep the logic in sync. + +import os +import re +import sys +import argparse +from datetime import datetime +from collections import defaultdict, Counter +from pathlib import Path + +from openpyxl import load_workbook + +# Allow running as `python scripts/xxx.py` without installing as a package +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from shoplazza_excel_template import create_excel_from_template + + +PREFERRED_OPTION_KEYS = [ + "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent", + "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length", + "Width", "Height", "Model", "Configuration", +] + + +def clean_str(v): + if v is None: + return "" + return str(v).strip() + + +def html_escape(s): + s = clean_str(s) + return (s.replace("&", "&") + .replace("<", "<") + .replace(">", ">")) + + +def generate_handle(title): + """ + Generate URL-friendly handle from title (ASCII only). + Keep consistent with existing scripts. + """ + handle = clean_str(title).lower() + handle = re.sub(r"[^a-z0-9\\s-]", "", handle) + handle = re.sub(r"[-\\s]+", "-", handle).strip("-") + if len(handle) > 255: + handle = handle[:255] + return handle or "product" + + +def parse_date_to_template(dt_value): + """ + Template expects: YYYY-MM-DD HH:MM:SS + Input could be "2018-05-09" or datetime/date. + """ + if dt_value is None or dt_value == "": + return "" + if isinstance(dt_value, datetime): + return dt_value.strftime("%Y-%m-%d %H:%M:%S") + s = clean_str(dt_value) + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): + try: + d = datetime.strptime(s, fmt) + return d.strftime("%Y-%m-%d %H:%M:%S") + except Exception: + pass + return "" + + +def parse_weight(weight_conv, weight_raw): + """ + Return (weight_value, unit) where unit in {kg, lb, g, oz}. + Prefer '商品重量(单位换算)' like '68.04 g'. + Fallback to '商品重量' like '0.15 pounds'. + """ + s = clean_str(weight_conv) or clean_str(weight_raw) + if not s: + return ("", "") + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s) + if not m: + return ("", "") + val = float(m.group(1)) + unit = m.group(2).lower() + if unit in ("g", "gram", "grams"): + return (val, "g") + if unit in ("kg", "kilogram", "kilograms"): + return (val, "kg") + if unit in ("lb", "lbs", "pound", "pounds"): + return (val, "lb") + if unit in ("oz", "ounce", "ounces"): + return (val, "oz") + return ("", "") + + +def parse_dimensions_inches(dim_raw): + """ + Template '尺寸信息': 'L,W,H' in inches. + Input example: '7.9 x 7.9 x 2 inches' + """ + s = clean_str(dim_raw) + if not s: + return "" + nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s) + if len(nums) < 3: + return "" + return "{},{},{}".format(nums[0], nums[1], nums[2]) + + +def parse_sku_options(sku_text): + """ + Parse 'SKU' column into {key: value}. + Example: + 'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'} + """ + s = clean_str(sku_text) + if not s: + return {} + parts = [p.strip() for p in s.split("|") if p.strip()] + out = {} + for p in parts: + if ":" not in p: + continue + k, v = p.split(":", 1) + k = clean_str(k) + v = clean_str(v) + if k and v: + out[k] = v + return out + + +def choose_option_keys(variant_dicts, max_keys=3): + freq = Counter() + for d in variant_dicts: + for k, v in d.items(): + if v: + freq[k] += 1 + if not freq: + return [] + preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)} + + def key_sort(k): + return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower()) + + keys = sorted(freq.keys(), key=key_sort) + return keys[:max_keys] + + +def build_description_html(title, details, product_url): + parts = [] + if title: + parts.append("

{}

".format(html_escape(title))) + detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()] + if detail_items: + li = "".join(["
  • {}
  • ".format(html_escape(x)) for x in detail_items[:30]]) + parts.append("".format(li)) + if product_url: + parts.append('

    Source: {0}

    '.format(html_escape(product_url))) + return "".join(parts) + + +def amazon_sheet(ws): + headers = [] + for c in range(1, ws.max_column + 1): + v = ws.cell(1, c).value + headers.append(clean_str(v)) + return {h: i + 1 for i, h in enumerate(headers) if h} + + +def read_amazon_rows_from_file(xlsx_path, max_rows=None): + wb = load_workbook(xlsx_path, read_only=True, data_only=True) + sheet_name = None + for name in wb.sheetnames: + if str(name).lower() == "notes": + continue + sheet_name = name + break + if sheet_name is None: + return [] + ws = wb[sheet_name] + idx = amazon_sheet(ws) + + required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)", + "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接", + "商品重量(单位换算)", "商品重量", "商品尺寸"] + for k in required: + if k not in idx: + raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name)) + + rows = [] + end_row = ws.max_row + if max_rows is not None: + end_row = min(end_row, 1 + int(max_rows)) + + for r in range(2, end_row + 1): + asin = clean_str(ws.cell(r, idx["ASIN"]).value) + if not asin: + continue + parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin + rows.append({ + "ASIN": asin, + "父ASIN": parent, + "SKU": clean_str(ws.cell(r, idx["SKU"]).value), + "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value), + "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value), + "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value), + "价格($)": ws.cell(r, idx["价格($)"]).value, + "prime价格($)": ws.cell(r, idx["prime价格($)"]).value, + "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value), + "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value), + "大类目": clean_str(ws.cell(r, idx["大类目"]).value), + "小类目": clean_str(ws.cell(r, idx["小类目"]).value), + "品牌": clean_str(ws.cell(r, idx["品牌"]).value), + "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value), + "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value), + "商品重量(单位换算)": clean_str(ws.cell(r, idx["商品重量(单位换算)"]).value), + "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value), + "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value), + }) + return rows + + +def to_price(v): + if v is None or v == "": + return None + try: + return float(v) + except Exception: + s = clean_str(v) + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s) + return float(m.group(1)) if m else None + + +def build_common_fields(base_row, spu_id): + title = base_row.get("商品标题") or "Product" + brand = base_row.get("品牌") or "" + big_cat = base_row.get("大类目") or "" + small_cat = base_row.get("小类目") or "" + cat_path = base_row.get("类目路径") or "" + + handle = generate_handle(title) + if handle and not handle.startswith("products/"): + handle = "products/{}".format(handle) + + seo_title = title + seo_desc_parts = [x for x in [brand, title, big_cat] if x] + seo_description = " ".join(seo_desc_parts)[:5000] + seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x])[:5000] + tags = ",".join([x for x in [brand, big_cat, small_cat] if x]) + + created_at = parse_date_to_template(base_row.get("上架时间")) + description = build_description_html(title, base_row.get("详细参数"), base_row.get("商品详情页链接")) + + inventory_qty = 100 + weight_val, weight_unit = parse_weight(base_row.get("商品重量(单位换算)"), base_row.get("商品重量")) + size_info = parse_dimensions_inches(base_row.get("商品尺寸")) + + album = big_cat or (cat_path.split(":")[0] if cat_path else "") + + return { + "商品ID": "", + "创建时间": created_at, + "商品标题*": title[:255], + "商品副标题": "{} {}".format(brand, big_cat).strip()[:600], + "商品描述": description, + "SEO标题": seo_title[:5000], + "SEO描述": seo_description, + "SEO URL Handle": handle, + "SEO URL 重定向": "N", + "SEO关键词": seo_keywords, + "商品上架": "Y", + "需要物流": "Y", + "商品收税": "N", + "商品spu": spu_id[:100], + "启用虚拟销量": "N", + "虚拟销量值": "", + "跟踪库存": "Y", + "库存规则*": "1", + "专辑名称": album, + "标签": tags, + "供应商名称": "Amazon", + "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "", + "商品重量": weight_val if weight_val != "" else "", + "重量单位": weight_unit, + "商品库存": inventory_qty, + "尺寸信息": size_info, + "原产地国别": "", + "HS(协调制度)代码": "", + "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format( + base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "") + )[:500], + "款式备注": "", + } + + +def build_s_row(base_row): + spu_id = base_row.get("父ASIN") or base_row.get("ASIN") + common = build_common_fields(base_row, spu_id=spu_id) + price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99 + image = base_row.get("商品主图") or "" + row = {} + row.update(common) + row.update({ + "商品属性*": "S", + "款式1": "", + "款式2": "", + "款式3": "", + "商品售价*": price, + "商品原价": price, + "成本价": "", + "商品SKU": base_row.get("ASIN") or "", + "商品条形码": "", + "商品图片*": image, + "商品主图": image, + }) + return row + + +def build_m_p_rows(variant_rows): + base = variant_rows[0] + spu_id = base.get("父ASIN") or base.get("ASIN") + common = build_common_fields(base, spu_id=spu_id) + + option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows] + option_keys = choose_option_keys(option_dicts, max_keys=3) or ["Variant"] + + m = {} + m.update(common) + m.update({ + "商品属性*": "M", + "款式1": option_keys[0] if len(option_keys) > 0 else "", + "款式2": option_keys[1] if len(option_keys) > 1 else "", + "款式3": option_keys[2] if len(option_keys) > 2 else "", + "商品售价*": "", + "商品原价": "", + "成本价": "", + "商品SKU": "", + "商品条形码": "", + "商品图片*": base.get("商品主图") or "", + "商品主图": base.get("商品主图") or "", + }) + m["商品重量"] = "" + m["重量单位"] = "" + m["商品库存"] = "" + m["尺寸信息"] = "" + + rows = [m] + + for v in variant_rows: + v_common = build_common_fields(v, spu_id=spu_id) + v_common.update({ + "商品副标题": "", + "商品描述": "", + "SEO标题": "", + "SEO描述": "", + "SEO URL Handle": "", + "SEO URL 重定向": "", + "SEO关键词": "", + "专辑名称": "", + "标签": "", + "供应商名称": "", + "供应商URL": "", + "商品备注": "", + }) + + opt = parse_sku_options(v.get("SKU")) + opt_vals = [v.get("ASIN")] if option_keys == ["Variant"] else [opt.get(k, "") for k in option_keys] + + price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99 + image = v.get("商品主图") or "" + + p = {} + p.update(v_common) + p.update({ + "商品属性*": "P", + "款式1": opt_vals[0] if len(opt_vals) > 0 else "", + "款式2": opt_vals[1] if len(opt_vals) > 1 else "", + "款式3": opt_vals[2] if len(opt_vals) > 2 else "", + "商品售价*": price, + "商品原价": price, + "成本价": "", + "商品SKU": v.get("ASIN") or "", + "商品条形码": "", + "商品图片*": image, + "商品主图": "", + }) + rows.append(p) + + return rows + + +def main(): + parser = argparse.ArgumentParser(description="Convert Amazon-format xlsx files to Shoplazza import xlsx") + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing Amazon-format xlsx files") + parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx") + parser.add_argument("--output", default="amazon_shoplazza_import.xlsx", help="Output xlsx file path") + parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") + parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)") + parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") + args = parser.parse_args() + + if not os.path.isdir(args.input_dir): + raise RuntimeError("input-dir not found: {}".format(args.input_dir)) + if not os.path.exists(args.template): + raise RuntimeError("template not found: {}".format(args.template)) + + files = [os.path.join(args.input_dir, f) for f in os.listdir(args.input_dir) if f.lower().endswith(".xlsx")] + files.sort() + if args.max_files is not None: + files = files[: int(args.max_files)] + + print("Reading Amazon-format files: {} (from {})".format(len(files), args.input_dir), flush=True) + + groups = defaultdict(list) + seen_asin = set() + + for fp in files: + print(" - loading: {}".format(fp), flush=True) + try: + rows = read_amazon_rows_from_file(fp, max_rows=args.max_rows_per_file) + except Exception as e: + print("WARN: failed to read {}: {}".format(fp, e)) + continue + print(" loaded rows: {}".format(len(rows)), flush=True) + + for r in rows: + asin = r.get("ASIN") + if asin in seen_asin: + continue + seen_asin.add(asin) + spu_id = r.get("父ASIN") or asin + groups[spu_id].append(r) + + print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True) + + excel_rows = [] + spu_count = 0 + + for spu_id, variants in groups.items(): + if not variants: + continue + spu_count += 1 + if args.max_products is not None and spu_count > int(args.max_products): + break + if len(variants) == 1: + excel_rows.append(build_s_row(variants[0])) + else: + excel_rows.extend(build_m_p_rows(variants)) + + print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True) + create_excel_from_template(args.template, args.output, excel_rows) + + +if __name__ == "__main__": + main() + + diff --git a/scripts/competitor_xlsx_to_shoplazza_xlsx.py b/scripts/competitor_xlsx_to_shoplazza_xlsx.py index 5a9c214..1048503 100644 --- a/scripts/competitor_xlsx_to_shoplazza_xlsx.py +++ b/scripts/competitor_xlsx_to_shoplazza_xlsx.py @@ -1,24 +1,13 @@ #!/usr/bin/env python3 """ -Convert competitor Excel exports (with Parent/Child ASIN structure) into -Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`. - -Data source: -- Directory with multiple `Competitor-*.xlsx` files. -- Each file contains a main sheet + "Notes" sheet. -- Column meanings (sample): - - ASIN: variant id (sku_id) - - 父ASIN: product id (spu_id) - -Output: -- For each 父ASIN group: - - If only 1 ASIN: generate one "S" row - - Else: generate one "M" row + multiple "P" rows - -Important: -- Variant dimensions are parsed primarily from the `SKU` column: - "Size: One Size | Color: Black" - and mapped into 款式1/2/3. +DEPRECATED NAME (kept for backward compatibility). + +The input `products_data/*.xlsx` files are **Amazon-format exports** (with Parent/Child ASIN), +not “competitor data”. Please use: + + - `scripts/amazon_xlsx_to_shoplazza_xlsx.py` + +This script keeps the same logic but updates user-facing naming gradually. """ import os @@ -457,10 +446,10 @@ def build_m_p_rows(variant_rows): def main(): - parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx") - parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files") + parser = argparse.ArgumentParser(description="Convert Amazon-format xlsx files to Shoplazza import xlsx (deprecated script name)") + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing Amazon-format xlsx files") parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx") - parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path") + parser.add_argument("--output", default="amazon_shoplazza_import.xlsx", help="Output xlsx file path") parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)") parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") @@ -477,7 +466,7 @@ def main(): if args.max_files is not None: files = files[: int(args.max_files)] - print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True) + print("Reading Amazon-format files: {} (from {})".format(len(files), input_dir), flush=True) groups = defaultdict(list) # spu_id -> [variant rows] seen_asin = set() -- libgit2 0.21.2