Commit f3c11fef37a249640c5e2a576582747c3f1fc90e

Authored by tangwang
1 parent 19d2d90f

亚马逊格式数据 导入店匠

Showing 53 changed files with 723 additions and 122 deletions   Show diff stats
README.md
... ... @@ -14,11 +14,12 @@ source .env
14 14  
15 15 ## 测试pipeline
16 16  
17   -fake数据 生成商品导入数据 提交到店匠的店铺:
  17 +1. fake数据 生成商品导入数据 提交到店匠的店铺:
18 18 cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx
19 19  
20   -自动同步到mysql
21   -mysql到ES:
  20 +2. 后端:自动同步到mysql
  21 +
  22 +3. mysql到ES:
22 23  
23 24 python scripts/recreate_and_import.py \
24 25 --tenant-id 162 \
... ...
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx 0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx 0 → 100644
No preview for this file type
scripts/competitor_xlsx_to_shoplazza_xlsx.py 0 → 100644
... ... @@ -0,0 +1,525 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Convert competitor Excel exports (with Parent/Child ASIN structure) into
  4 +Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`.
  5 +
  6 +Data source:
  7 +- Directory with multiple `Competitor-*.xlsx` files.
  8 +- Each file contains a main sheet + "Notes" sheet.
  9 +- Column meanings (sample):
  10 + - ASIN: variant id (sku_id)
  11 + - 父ASIN: product id (spu_id)
  12 +
  13 +Output:
  14 +- For each 父ASIN group:
  15 + - If only 1 ASIN: generate one "S" row
  16 + - Else: generate one "M" row + multiple "P" rows
  17 +
  18 +Important:
  19 +- Variant dimensions are parsed primarily from the `SKU` column:
  20 + "Size: One Size | Color: Black"
  21 + and mapped into 款式1/2/3.
  22 +"""
  23 +
  24 +import os
  25 +import re
  26 +import sys
  27 +import argparse
  28 +from datetime import datetime
  29 +from collections import defaultdict, Counter
  30 +from pathlib import Path
  31 +
  32 +from openpyxl import load_workbook
  33 +
  34 +# Allow running as `python scripts/xxx.py` without installing as a package
  35 +sys.path.insert(0, str(Path(__file__).resolve().parent))
  36 +from shoplazza_excel_template import create_excel_from_template
  37 +
  38 +
  39 +PREFERRED_OPTION_KEYS = [
  40 + "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent",
  41 + "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length",
  42 + "Width", "Height", "Model", "Configuration",
  43 +]
  44 +
  45 +
  46 +def clean_str(v):
  47 + if v is None:
  48 + return ""
  49 + return str(v).strip()
  50 +
  51 +
  52 +def html_escape(s):
  53 + s = clean_str(s)
  54 + return (s.replace("&", "&")
  55 + .replace("<", "&lt;")
  56 + .replace(">", "&gt;"))
  57 +
  58 +
  59 +def generate_handle(title):
  60 + """
  61 + Generate URL-friendly handle from title (ASCII only).
  62 + Keep consistent with existing scripts.
  63 + """
  64 + handle = clean_str(title).lower()
  65 + handle = re.sub(r"[^a-z0-9\\s-]", "", handle)
  66 + handle = re.sub(r"[-\\s]+", "-", handle).strip("-")
  67 + if len(handle) > 255:
  68 + handle = handle[:255]
  69 + return handle or "product"
  70 +
  71 +
  72 +def parse_date_to_template(dt_value):
  73 + """
  74 + Template expects: YYYY-MM-DD HH:MM:SS
  75 + Input could be "2018-05-09" or datetime/date.
  76 + """
  77 + if dt_value is None or dt_value == "":
  78 + return ""
  79 + if isinstance(dt_value, datetime):
  80 + return dt_value.strftime("%Y-%m-%d %H:%M:%S")
  81 + s = clean_str(dt_value)
  82 + # common formats
  83 + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
  84 + try:
  85 + d = datetime.strptime(s, fmt)
  86 + return d.strftime("%Y-%m-%d %H:%M:%S")
  87 + except Exception:
  88 + pass
  89 + return ""
  90 +
  91 +
  92 +def parse_weight(weight_conv, weight_raw):
  93 + """
  94 + Return (weight_value, unit) where unit in {kg, lb, g, oz}.
  95 + Prefer '商品重量(单位换算)' like '68.04 g'.
  96 + Fallback to '商品重量' like '0.15 pounds'.
  97 + """
  98 + s = clean_str(weight_conv) or clean_str(weight_raw)
  99 + if not s:
  100 + return ("", "")
  101 + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s)
  102 + if not m:
  103 + return ("", "")
  104 + val = float(m.group(1))
  105 + unit = m.group(2).lower()
  106 + if unit in ("g", "gram", "grams"):
  107 + return (val, "g")
  108 + if unit in ("kg", "kilogram", "kilograms"):
  109 + return (val, "kg")
  110 + if unit in ("lb", "lbs", "pound", "pounds"):
  111 + return (val, "lb")
  112 + if unit in ("oz", "ounce", "ounces"):
  113 + return (val, "oz")
  114 + return ("", "")
  115 +
  116 +
  117 +def parse_dimensions_inches(dim_raw):
  118 + """
  119 + Template '尺寸信息': 'L,W,H' in inches.
  120 + Input example: '7.9 x 7.9 x 2 inches'
  121 + """
  122 + s = clean_str(dim_raw)
  123 + if not s:
  124 + return ""
  125 + # extract first 3 numbers in order
  126 + nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s)
  127 + if len(nums) < 3:
  128 + return ""
  129 + return "{},{},{}".format(nums[0], nums[1], nums[2])
  130 +
  131 +
  132 +def parse_sku_options(sku_text):
  133 + """
  134 + Parse 'SKU' column into {key: value}.
  135 + Example:
  136 + 'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'}
  137 + """
  138 + s = clean_str(sku_text)
  139 + if not s:
  140 + return {}
  141 + parts = [p.strip() for p in s.split("|") if p.strip()]
  142 + out = {}
  143 + for p in parts:
  144 + if ":" not in p:
  145 + continue
  146 + k, v = p.split(":", 1)
  147 + k = clean_str(k)
  148 + v = clean_str(v)
  149 + if k and v:
  150 + out[k] = v
  151 + return out
  152 +
  153 +
  154 +def choose_option_keys(variant_dicts, max_keys=3):
  155 + """
  156 + Choose up to 3 option keys for a product group.
  157 + Order by preference list first, then by frequency.
  158 + """
  159 + freq = Counter()
  160 + for d in variant_dicts:
  161 + for k, v in d.items():
  162 + if v:
  163 + freq[k] += 1
  164 + if not freq:
  165 + return []
  166 +
  167 + preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)}
  168 +
  169 + def key_sort(k):
  170 + return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower())
  171 +
  172 + keys = sorted(freq.keys(), key=key_sort)
  173 + return keys[:max_keys]
  174 +
  175 +
  176 +def build_description_html(title, details, product_url):
  177 + parts = []
  178 + if title:
  179 + parts.append("<p>{}</p>".format(html_escape(title)))
  180 + detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()]
  181 + if detail_items:
  182 + li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]])
  183 + parts.append("<ul>{}</ul>".format(li))
  184 + if product_url:
  185 + parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url)))
  186 + return "".join(parts)
  187 +
  188 +
  189 +def competitor_sheet(ws):
  190 + """
  191 + Build (header->col_index) for competitor sheet.
  192 + Assumes header is row 1.
  193 + """
  194 + headers = []
  195 + for c in range(1, ws.max_column + 1):
  196 + v = ws.cell(1, c).value
  197 + headers.append(clean_str(v))
  198 + idx = {h: i + 1 for i, h in enumerate(headers) if h}
  199 + return idx
  200 +
  201 +
  202 +def read_competitor_rows_from_file(xlsx_path, max_rows=None):
  203 + wb = load_workbook(xlsx_path, read_only=True, data_only=True)
  204 + # pick first non-Notes sheet
  205 + sheet_name = None
  206 + for name in wb.sheetnames:
  207 + if str(name).lower() == "notes":
  208 + continue
  209 + sheet_name = name
  210 + break
  211 + if sheet_name is None:
  212 + return []
  213 + ws = wb[sheet_name]
  214 + idx = competitor_sheet(ws)
  215 +
  216 + required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)",
  217 + "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接",
  218 + "商品重量(单位换算)", "商品重量", "商品尺寸"]
  219 + for k in required:
  220 + if k not in idx:
  221 + raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name))
  222 +
  223 + rows = []
  224 + end_row = ws.max_row
  225 + if max_rows is not None:
  226 + end_row = min(end_row, 1 + int(max_rows))
  227 +
  228 + for r in range(2, end_row + 1):
  229 + asin = clean_str(ws.cell(r, idx["ASIN"]).value)
  230 + if not asin:
  231 + continue
  232 + parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin
  233 + row = {
  234 + "ASIN": asin,
  235 + "父ASIN": parent,
  236 + "SKU": clean_str(ws.cell(r, idx["SKU"]).value),
  237 + "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value),
  238 + "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value),
  239 + "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value),
  240 + "价格($)": ws.cell(r, idx["价格($)"]).value,
  241 + "prime价格($)": ws.cell(r, idx["prime价格($)"]).value,
  242 + "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value),
  243 + "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value),
  244 + "大类目": clean_str(ws.cell(r, idx["大类目"]).value),
  245 + "小类目": clean_str(ws.cell(r, idx["小类目"]).value),
  246 + "品牌": clean_str(ws.cell(r, idx["品牌"]).value),
  247 + "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value),
  248 + "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value),
  249 + "商品重量(单位换算)": clean_str(ws.cell(r, idx["商品重量(单位换算)"]).value),
  250 + "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value),
  251 + "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value),
  252 + }
  253 + rows.append(row)
  254 + return rows
  255 +
  256 +
  257 +def to_price(v):
  258 + if v is None or v == "":
  259 + return None
  260 + try:
  261 + return float(v)
  262 + except Exception:
  263 + s = clean_str(v)
  264 + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s)
  265 + if not m:
  266 + return None
  267 + return float(m.group(1))
  268 +
  269 +
  270 +def build_common_fields(base_row, spu_id):
  271 + title = base_row.get("商品标题") or "Product"
  272 + brand = base_row.get("品牌") or ""
  273 + big_cat = base_row.get("大类目") or ""
  274 + small_cat = base_row.get("小类目") or ""
  275 + cat_path = base_row.get("类目路径") or ""
  276 +
  277 + handle = generate_handle(title)
  278 + if handle and not handle.startswith("products/"):
  279 + handle = "products/{}".format(handle)
  280 +
  281 + seo_title = title
  282 + seo_desc_parts = []
  283 + if brand:
  284 + seo_desc_parts.append(brand)
  285 + seo_desc_parts.append(title)
  286 + if big_cat:
  287 + seo_desc_parts.append(big_cat)
  288 + seo_description = " ".join([x for x in seo_desc_parts if x])[:5000]
  289 +
  290 + seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x])
  291 + tags = ",".join([x for x in [brand, big_cat, small_cat] if x])
  292 +
  293 + created_at = parse_date_to_template(base_row.get("上架时间"))
  294 +
  295 + description = build_description_html(
  296 + title=title,
  297 + details=base_row.get("详细参数"),
  298 + product_url=base_row.get("商品详情页链接"),
  299 + )
  300 +
  301 + # default inventory settings (data source has no stock)
  302 + inventory_qty = 100
  303 +
  304 + weight_val, weight_unit = parse_weight(base_row.get("商品重量(单位换算)"), base_row.get("商品重量"))
  305 + size_info = parse_dimensions_inches(base_row.get("商品尺寸"))
  306 +
  307 + album = big_cat or ""
  308 + if not album and cat_path:
  309 + album = cat_path.split(":")[0]
  310 +
  311 + common = {
  312 + "商品ID": "",
  313 + "创建时间": created_at,
  314 + "商品标题*": title[:255],
  315 + "商品副标题": "{} {}".format(brand, big_cat).strip()[:600],
  316 + "商品描述": description,
  317 + "SEO标题": seo_title[:5000],
  318 + "SEO描述": seo_description,
  319 + "SEO URL Handle": handle,
  320 + "SEO URL 重定向": "N",
  321 + "SEO关键词": seo_keywords[:5000],
  322 + "商品上架": "Y",
  323 + "需要物流": "Y",
  324 + "商品收税": "N",
  325 + "商品spu": spu_id[:100],
  326 + "启用虚拟销量": "N",
  327 + "虚拟销量值": "",
  328 + "跟踪库存": "Y",
  329 + "库存规则*": "1",
  330 + "专辑名称": album,
  331 + "标签": tags,
  332 + "供应商名称": "Amazon",
  333 + "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "",
  334 + "商品重量": weight_val if weight_val != "" else "",
  335 + "重量单位": weight_unit,
  336 + "商品库存": inventory_qty,
  337 + "尺寸信息": size_info,
  338 + "原产地国别": "",
  339 + "HS(协调制度)代码": "",
  340 + "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format(
  341 + base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "")
  342 + )[:500],
  343 + "款式备注": "",
  344 + }
  345 + return common
  346 +
  347 +
  348 +def build_s_row(base_row):
  349 + spu_id = base_row.get("父ASIN") or base_row.get("ASIN")
  350 + common = build_common_fields(base_row, spu_id=spu_id)
  351 + price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99
  352 + image = base_row.get("商品主图") or ""
  353 +
  354 + row = {}
  355 + row.update(common)
  356 + row.update({
  357 + "商品属性*": "S",
  358 + "款式1": "",
  359 + "款式2": "",
  360 + "款式3": "",
  361 + "商品售价*": price,
  362 + "商品原价": price,
  363 + "成本价": "",
  364 + "商品SKU": base_row.get("ASIN") or "",
  365 + "商品条形码": "",
  366 + "商品图片*": image,
  367 + "商品主图": image,
  368 + })
  369 + return row
  370 +
  371 +
  372 +def build_m_p_rows(variant_rows):
  373 + """
  374 + variant_rows: List[dict] with same 父ASIN.
  375 + """
  376 + base = variant_rows[0]
  377 + spu_id = base.get("父ASIN") or base.get("ASIN")
  378 + common = build_common_fields(base, spu_id=spu_id)
  379 +
  380 + option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows]
  381 + option_keys = choose_option_keys(option_dicts, max_keys=3)
  382 + if not option_keys:
  383 + option_keys = ["Variant"]
  384 +
  385 + # M row
  386 + m = {}
  387 + m.update(common)
  388 + m.update({
  389 + "商品属性*": "M",
  390 + "款式1": option_keys[0] if len(option_keys) > 0 else "",
  391 + "款式2": option_keys[1] if len(option_keys) > 1 else "",
  392 + "款式3": option_keys[2] if len(option_keys) > 2 else "",
  393 + "商品售价*": "",
  394 + "商品原价": "",
  395 + "成本价": "",
  396 + "商品SKU": "",
  397 + "商品条形码": "",
  398 + "商品图片*": base.get("商品主图") or "",
  399 + "商品主图": base.get("商品主图") or "",
  400 + })
  401 +
  402 + # For M row, these SKU-level fields should be empty per template guidance
  403 + m["商品重量"] = ""
  404 + m["重量单位"] = ""
  405 + m["商品库存"] = ""
  406 + m["尺寸信息"] = ""
  407 +
  408 + rows = [m]
  409 +
  410 + # P rows
  411 + for v in variant_rows:
  412 + v_common = build_common_fields(v, spu_id=spu_id)
  413 + # wipe SPU-only fields for P row
  414 + v_common.update({
  415 + "商品副标题": "",
  416 + "商品描述": "",
  417 + "SEO标题": "",
  418 + "SEO描述": "",
  419 + "SEO URL Handle": "",
  420 + "SEO URL 重定向": "",
  421 + "SEO关键词": "",
  422 + "专辑名称": "",
  423 + "标签": "",
  424 + "供应商名称": "",
  425 + "供应商URL": "",
  426 + "商品备注": "",
  427 + })
  428 +
  429 + opt = parse_sku_options(v.get("SKU"))
  430 + if option_keys == ["Variant"]:
  431 + opt_vals = [v.get("ASIN")]
  432 + else:
  433 + opt_vals = [opt.get(k, "") for k in option_keys]
  434 +
  435 + price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99
  436 + image = v.get("商品主图") or ""
  437 +
  438 + p = {}
  439 + p.update(v_common)
  440 + p.update({
  441 + "商品属性*": "P",
  442 + "款式1": opt_vals[0] if len(opt_vals) > 0 else "",
  443 + "款式2": opt_vals[1] if len(opt_vals) > 1 else "",
  444 + "款式3": opt_vals[2] if len(opt_vals) > 2 else "",
  445 + "商品售价*": price,
  446 + "商品原价": price,
  447 + "成本价": "",
  448 + "商品SKU": v.get("ASIN") or "",
  449 + "商品条形码": "",
  450 + # P row supports one variant image; we use variant's main image
  451 + "商品图片*": image,
  452 + "商品主图": "",
  453 + })
  454 + rows.append(p)
  455 +
  456 + return rows
  457 +
  458 +
  459 +def main():
  460 + parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx")
  461 + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files")
  462 + parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx")
  463 + parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path")
  464 + parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
  465 + parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)")
  466 + parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
  467 + args = parser.parse_args()
  468 +
  469 + input_dir = args.input_dir
  470 + if not os.path.isdir(input_dir):
  471 + raise RuntimeError("input-dir not found: {}".format(input_dir))
  472 + if not os.path.exists(args.template):
  473 + raise RuntimeError("template not found: {}".format(args.template))
  474 +
  475 + files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".xlsx")]
  476 + files.sort()
  477 + if args.max_files is not None:
  478 + files = files[: int(args.max_files)]
  479 +
  480 + print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True)
  481 +
  482 + groups = defaultdict(list) # spu_id -> [variant rows]
  483 + seen_asin = set()
  484 +
  485 + for fp in files:
  486 + print(" - loading: {}".format(fp), flush=True)
  487 + try:
  488 + rows = read_competitor_rows_from_file(fp, max_rows=args.max_rows_per_file)
  489 + except Exception as e:
  490 + print("WARN: failed to read {}: {}".format(fp, e))
  491 + continue
  492 + print(" loaded rows: {}".format(len(rows)), flush=True)
  493 +
  494 + for r in rows:
  495 + asin = r.get("ASIN")
  496 + if asin in seen_asin:
  497 + continue
  498 + seen_asin.add(asin)
  499 + spu_id = r.get("父ASIN") or asin
  500 + groups[spu_id].append(r)
  501 +
  502 + print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True)
  503 +
  504 + excel_rows = []
  505 + spu_count = 0
  506 +
  507 + for spu_id, variants in groups.items():
  508 + if not variants:
  509 + continue
  510 + spu_count += 1
  511 + if args.max_products is not None and spu_count > int(args.max_products):
  512 + break
  513 + if len(variants) == 1:
  514 + excel_rows.append(build_s_row(variants[0]))
  515 + else:
  516 + excel_rows.extend(build_m_p_rows(variants))
  517 +
  518 + print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True)
  519 + create_excel_from_template(args.template, args.output, excel_rows)
  520 +
  521 +
  522 +if __name__ == "__main__":
  523 + main()
  524 +
  525 +
... ...
scripts/csv_to_excel.py
... ... @@ -22,6 +22,10 @@ from openpyxl import load_workbook
22 22 from openpyxl.styles import Font, Alignment
23 23 from openpyxl.utils import get_column_letter
24 24  
  25 +# Shared helpers (keeps template writing consistent across scripts)
  26 +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
  27 +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
  28 +
25 29 # Add parent directory to path
26 30 sys.path.insert(0, str(Path(__file__).parent.parent))
27 31  
... ... @@ -82,21 +86,8 @@ def generate_handle(title: str) -&gt; str:
82 86 Returns:
83 87 URL-friendly handle (ASCII only)
84 88 """
85   - # Convert to lowercase
86   - handle = title.lower()
87   -
88   - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens
89   - handle = re.sub(r'[^a-z0-9\s-]', '', handle)
90   -
91   - # Replace spaces and multiple hyphens with single hyphen
92   - handle = re.sub(r'[-\s]+', '-', handle)
93   - handle = handle.strip('-')
94   -
95   - # Limit length
96   - if len(handle) > 255:
97   - handle = handle[:255]
98   -
99   - return handle or 'product'
  89 + # Keep backward-compatible function name while delegating to shared helper.
  90 + return _generate_handle_shared(title)
100 91  
101 92  
102 93 def read_csv_file(csv_file: str) -> list:
... ... @@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li
255 246 output_file: Path to output Excel file
256 247 csv_data_list: List of parsed CSV data dictionaries
257 248 """
258   - # Load template
259   - wb = load_workbook(template_file)
260   - ws = wb.active # Use the active sheet (Sheet4)
261   -
262   - # Find header row (row 2, index 1)
263   - header_row_idx = 2 # Row 2 in Excel (1-based, but header is at index 1 in pandas)
264   -
265   - # Get column mapping from header row
266   - column_mapping = {}
267   - for col_idx in range(1, ws.max_column + 1):
268   - cell_value = ws.cell(row=header_row_idx, column=col_idx).value
269   - if cell_value:
270   - column_mapping[cell_value] = col_idx
271   -
272   - # Start writing data from row 4 (after header and instructions)
273   - data_start_row = 4 # Row 4 in Excel (1-based)
274   -
275   - # Clear existing data rows (from row 4 onwards, but keep header and instructions)
276   - # Find the last row with data in the template
277   - last_template_row = ws.max_row
278   - if last_template_row >= data_start_row:
279   - # Clear data rows (keep header and instruction rows)
280   - for row in range(data_start_row, last_template_row + 1):
281   - for col in range(1, ws.max_column + 1):
282   - ws.cell(row=row, column=col).value = None
283   -
284   - # Convert CSV data to Excel rows
285   - for row_idx, csv_data in enumerate(csv_data_list):
286   - excel_row = csv_to_excel_row(csv_data)
287   - excel_row_num = data_start_row + row_idx
288   -
289   - # Write each field to corresponding column
290   - for field_name, col_idx in column_mapping.items():
291   - if field_name in excel_row:
292   - cell = ws.cell(row=excel_row_num, column=col_idx)
293   - value = excel_row[field_name]
294   - cell.value = value
295   -
296   - # Set alignment for text fields
297   - if isinstance(value, str):
298   - cell.alignment = Alignment(vertical='top', wrap_text=True)
299   - elif isinstance(value, (int, float)):
300   - cell.alignment = Alignment(vertical='top')
301   -
302   - # Save workbook
303   - wb.save(output_file)
  249 + excel_rows = [csv_to_excel_row(d) for d in csv_data_list]
  250 + _create_excel_from_template_shared(template_file, output_file, excel_rows)
304 251 print(f"Excel file created: {output_file}")
305 252 print(f" - Total rows: {len(csv_data_list)}")
306 253  
... ...
scripts/csv_to_excel_multi_variant.py
... ... @@ -22,6 +22,10 @@ import itertools
22 22 from openpyxl import load_workbook
23 23 from openpyxl.styles import Alignment
24 24  
  25 +# Shared helpers (keeps template writing consistent across scripts)
  26 +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
  27 +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
  28 +
25 29 # Add parent directory to path
26 30 sys.path.insert(0, str(Path(__file__).parent.parent))
27 31  
... ... @@ -90,21 +94,8 @@ def generate_handle(title: str) -&gt; str:
90 94 Returns:
91 95 URL-friendly handle (ASCII only)
92 96 """
93   - # Convert to lowercase
94   - handle = title.lower()
95   -
96   - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens
97   - handle = re.sub(r'[^a-z0-9\s-]', '', handle)
98   -
99   - # Replace spaces and multiple hyphens with single hyphen
100   - handle = re.sub(r'[-\s]+', '-', handle)
101   - handle = handle.strip('-')
102   -
103   - # Limit length
104   - if len(handle) > 255:
105   - handle = handle[:255]
106   -
107   - return handle or 'product'
  97 + # Keep backward-compatible function name while delegating to shared helper.
  98 + return _generate_handle_shared(title)
108 99  
109 100  
110 101 def extract_material_from_title(title: str) -> str:
... ... @@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows:
478 469 output_file: Path to output Excel file
479 470 excel_rows: List of dictionaries mapping Excel column names to values
480 471 """
481   - # Load template
482   - wb = load_workbook(template_file)
483   - ws = wb.active # Use the active sheet (Sheet4)
484   -
485   - # Find header row (row 2)
486   - header_row_idx = 2
487   -
488   - # Get column mapping from header row
489   - column_mapping = {}
490   - for col_idx in range(1, ws.max_column + 1):
491   - cell_value = ws.cell(row=header_row_idx, column=col_idx).value
492   - if cell_value:
493   - column_mapping[cell_value] = col_idx
494   -
495   - # Start writing data from row 4
496   - data_start_row = 4
497   -
498   - # Clear existing data rows
499   - last_template_row = ws.max_row
500   - if last_template_row >= data_start_row:
501   - for row in range(data_start_row, last_template_row + 1):
502   - for col in range(1, ws.max_column + 1):
503   - ws.cell(row=row, column=col).value = None
504   -
505   - # Write data rows
506   - for row_idx, excel_row in enumerate(excel_rows):
507   - excel_row_num = data_start_row + row_idx
508   -
509   - # Write each field to corresponding column
510   - for field_name, col_idx in column_mapping.items():
511   - if field_name in excel_row:
512   - cell = ws.cell(row=excel_row_num, column=col_idx)
513   - value = excel_row[field_name]
514   - cell.value = value
515   -
516   - # Set alignment
517   - if isinstance(value, str):
518   - cell.alignment = Alignment(vertical='top', wrap_text=True)
519   - elif isinstance(value, (int, float)):
520   - cell.alignment = Alignment(vertical='top')
521   -
522   - # Save workbook
523   - wb.save(output_file)
  472 + _create_excel_from_template_shared(template_file, output_file, excel_rows)
524 473 print(f"Excel file created: {output_file}")
525 474 print(f" - Total rows: {len(excel_rows)}")
526 475  
... ...
scripts/shoplazza_excel_template.py 0 → 100644
... ... @@ -0,0 +1,67 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Shared utilities for generating Shoplazza (店匠) product import Excel files
  4 +based on the provided template `docs/商品导入模板.xlsx`.
  5 +
  6 +We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
  7 +"""
  8 +
  9 +from openpyxl import load_workbook
  10 +from openpyxl.styles import Alignment
  11 +
  12 +
  13 +def load_template_column_mapping(ws, header_row_idx=2):
  14 + """
  15 + Read the header row in the template sheet and build a mapping:
  16 + header_name -> column_index (1-based).
  17 + """
  18 + column_mapping = {}
  19 + for col_idx in range(1, ws.max_column + 1):
  20 + cell_value = ws.cell(row=header_row_idx, column=col_idx).value
  21 + if cell_value:
  22 + column_mapping[str(cell_value).strip()] = col_idx
  23 + return column_mapping
  24 +
  25 +
  26 +def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
  27 + """
  28 + Create an Excel file from the Shoplazza template and fill with data rows.
  29 +
  30 + Args:
  31 + template_file: Path to Excel template file
  32 + output_file: Path to output Excel file
  33 + excel_rows: List[Dict[str, Any]] mapping template header -> value
  34 + header_row_idx: Header row index in template (default 2)
  35 + data_start_row: Data start row index in template (default 4)
  36 + """
  37 + wb = load_workbook(template_file)
  38 + ws = wb.active
  39 +
  40 + column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
  41 +
  42 + # Clear existing data rows
  43 + last_template_row = ws.max_row
  44 + if last_template_row >= data_start_row:
  45 + for row in range(data_start_row, last_template_row + 1):
  46 + for col in range(1, ws.max_column + 1):
  47 + ws.cell(row=row, column=col).value = None
  48 +
  49 + # Write data rows
  50 + for row_idx, excel_row in enumerate(excel_rows):
  51 + excel_row_num = data_start_row + row_idx
  52 + for field_name, col_idx in column_mapping.items():
  53 + if field_name not in excel_row:
  54 + continue
  55 + cell = ws.cell(row=excel_row_num, column=col_idx)
  56 + value = excel_row[field_name]
  57 + cell.value = value
  58 + if isinstance(value, str):
  59 + cell.alignment = Alignment(vertical='top', wrap_text=True)
  60 + else:
  61 + cell.alignment = Alignment(vertical='top')
  62 +
  63 + wb.save(output_file)
  64 + print("Excel file created: {}".format(output_file))
  65 + print(" - Total rows: {}".format(len(excel_rows)))
  66 +
  67 +
... ...
scripts/shoplazza_import_template.py 0 → 100644
... ... @@ -0,0 +1,112 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +Shared helpers for generating Shoplazza product import Excel files from the
  4 +official template `docs/商品导入模板.xlsx`.
  5 +
  6 +We keep this module small and dependency-light (openpyxl only) so other scripts
  7 +can reuse the same template-writing behavior (header row mapping, data start
  8 +row, alignment).
  9 +"""
  10 +
  11 +import re
  12 +from datetime import datetime
  13 +from typing import Dict, Iterable, List, Optional
  14 +
  15 +from openpyxl import load_workbook
  16 +from openpyxl.styles import Alignment
  17 +
  18 +
  19 +def generate_handle(title: str) -> str:
  20 + """
  21 + Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza
  22 + `SEO URL Handle` field. Caller may prepend `products/`.
  23 + """
  24 + if not title:
  25 + return "product"
  26 +
  27 + handle = str(title).lower()
  28 + handle = re.sub(r"[^a-z0-9\s-]", "", handle)
  29 + handle = re.sub(r"[-\s]+", "-", handle).strip("-")
  30 +
  31 + if len(handle) > 255:
  32 + handle = handle[:255]
  33 +
  34 + return handle or "product"
  35 +
  36 +
  37 +def parse_date_to_datetime_str(value) -> str:
  38 + """
  39 + Parse common date strings into Shoplazza template datetime string:
  40 + `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string.
  41 + """
  42 + if value is None:
  43 + return ""
  44 +
  45 + if isinstance(value, datetime):
  46 + return value.strftime("%Y-%m-%d %H:%M:%S")
  47 +
  48 + s = str(value).strip()
  49 + if not s:
  50 + return ""
  51 +
  52 + # Most competitor sheets use YYYY-MM-DD
  53 + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
  54 + try:
  55 + dt = datetime.strptime(s, fmt)
  56 + if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
  57 + dt = dt.replace(hour=0, minute=0, second=0)
  58 + return dt.strftime("%Y-%m-%d %H:%M:%S")
  59 + except Exception:
  60 + pass
  61 +
  62 + return ""
  63 +
  64 +
  65 +def create_excel_from_template(
  66 + template_file: str,
  67 + output_file: str,
  68 + excel_rows: List[Dict[str, object]],
  69 + *,
  70 + header_row_idx: int = 2,
  71 + data_start_row: int = 4,
  72 + sheet_name: Optional[str] = None,
  73 +) -> None:
  74 + """
  75 + Create an Excel file from Shoplazza import template and fill rows.
  76 +
  77 + - Header row is expected at row 2 (1-based) in the official template.
  78 + - Data starts at row 4 (1-based), after the instruction row(s).
  79 + """
  80 + wb = load_workbook(template_file)
  81 + ws = wb[sheet_name] if sheet_name else wb.active
  82 +
  83 + column_mapping: Dict[str, int] = {}
  84 + for col_idx in range(1, ws.max_column + 1):
  85 + cell_value = ws.cell(row=header_row_idx, column=col_idx).value
  86 + if cell_value:
  87 + column_mapping[str(cell_value).strip()] = col_idx
  88 +
  89 + # Clear existing data rows
  90 + last_template_row = ws.max_row
  91 + if last_template_row >= data_start_row:
  92 + for row in range(data_start_row, last_template_row + 1):
  93 + for col in range(1, ws.max_column + 1):
  94 + ws.cell(row=row, column=col).value = None
  95 +
  96 + # Write data rows
  97 + for row_idx, excel_row in enumerate(excel_rows):
  98 + excel_row_num = data_start_row + row_idx
  99 + for field_name, col_idx in column_mapping.items():
  100 + if field_name not in excel_row:
  101 + continue
  102 + value = excel_row[field_name]
  103 + cell = ws.cell(row=excel_row_num, column=col_idx)
  104 + cell.value = value
  105 + if isinstance(value, str):
  106 + cell.alignment = Alignment(vertical="top", wrap_text=True)
  107 + elif isinstance(value, (int, float)):
  108 + cell.alignment = Alignment(vertical="top")
  109 +
  110 + wb.save(output_file)
  111 +
  112 +
... ...