Commit f3c11fef37a249640c5e2a576582747c3f1fc90e
1 parent
19d2d90f
亚马逊格式数据 导入店匠
Showing
53 changed files
with
723 additions
and
122 deletions
Show diff stats
README.md
| @@ -14,11 +14,12 @@ source .env | @@ -14,11 +14,12 @@ source .env | ||
| 14 | 14 | ||
| 15 | ## 测试pipeline | 15 | ## 测试pipeline |
| 16 | 16 | ||
| 17 | -fake数据 生成商品导入数据 提交到店匠的店铺: | 17 | +1. fake数据 生成商品导入数据 提交到店匠的店铺: |
| 18 | cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx | 18 | cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx |
| 19 | 19 | ||
| 20 | -自动同步到mysql | ||
| 21 | -mysql到ES: | 20 | +2. 后端:自动同步到mysql |
| 21 | + | ||
| 22 | +3. mysql到ES: | ||
| 22 | 23 | ||
| 23 | python scripts/recreate_and_import.py \ | 24 | python scripts/recreate_and_import.py \ |
| 24 | --tenant-id 162 \ | 25 | --tenant-id 162 \ |
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx
0 → 100644
No preview for this file type
| @@ -0,0 +1,525 @@ | @@ -0,0 +1,525 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +""" | ||
| 3 | +Convert competitor Excel exports (with Parent/Child ASIN structure) into | ||
| 4 | +Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`. | ||
| 5 | + | ||
| 6 | +Data source: | ||
| 7 | +- Directory with multiple `Competitor-*.xlsx` files. | ||
| 8 | +- Each file contains a main sheet + "Notes" sheet. | ||
| 9 | +- Column meanings (sample): | ||
| 10 | + - ASIN: variant id (sku_id) | ||
| 11 | + - 父ASIN: product id (spu_id) | ||
| 12 | + | ||
| 13 | +Output: | ||
| 14 | +- For each 父ASIN group: | ||
| 15 | + - If only 1 ASIN: generate one "S" row | ||
| 16 | + - Else: generate one "M" row + multiple "P" rows | ||
| 17 | + | ||
| 18 | +Important: | ||
| 19 | +- Variant dimensions are parsed primarily from the `SKU` column: | ||
| 20 | + "Size: One Size | Color: Black" | ||
| 21 | + and mapped into 款式1/2/3. | ||
| 22 | +""" | ||
| 23 | + | ||
| 24 | +import os | ||
| 25 | +import re | ||
| 26 | +import sys | ||
| 27 | +import argparse | ||
| 28 | +from datetime import datetime | ||
| 29 | +from collections import defaultdict, Counter | ||
| 30 | +from pathlib import Path | ||
| 31 | + | ||
| 32 | +from openpyxl import load_workbook | ||
| 33 | + | ||
| 34 | +# Allow running as `python scripts/xxx.py` without installing as a package | ||
| 35 | +sys.path.insert(0, str(Path(__file__).resolve().parent)) | ||
| 36 | +from shoplazza_excel_template import create_excel_from_template | ||
| 37 | + | ||
| 38 | + | ||
| 39 | +PREFERRED_OPTION_KEYS = [ | ||
| 40 | + "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent", | ||
| 41 | + "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length", | ||
| 42 | + "Width", "Height", "Model", "Configuration", | ||
| 43 | +] | ||
| 44 | + | ||
| 45 | + | ||
| 46 | +def clean_str(v): | ||
| 47 | + if v is None: | ||
| 48 | + return "" | ||
| 49 | + return str(v).strip() | ||
| 50 | + | ||
| 51 | + | ||
| 52 | +def html_escape(s): | ||
| 53 | + s = clean_str(s) | ||
| 54 | + return (s.replace("&", "&") | ||
| 55 | + .replace("<", "<") | ||
| 56 | + .replace(">", ">")) | ||
| 57 | + | ||
| 58 | + | ||
| 59 | +def generate_handle(title): | ||
| 60 | + """ | ||
| 61 | + Generate URL-friendly handle from title (ASCII only). | ||
| 62 | + Keep consistent with existing scripts. | ||
| 63 | + """ | ||
| 64 | + handle = clean_str(title).lower() | ||
| 65 | + handle = re.sub(r"[^a-z0-9\\s-]", "", handle) | ||
| 66 | + handle = re.sub(r"[-\\s]+", "-", handle).strip("-") | ||
| 67 | + if len(handle) > 255: | ||
| 68 | + handle = handle[:255] | ||
| 69 | + return handle or "product" | ||
| 70 | + | ||
| 71 | + | ||
| 72 | +def parse_date_to_template(dt_value): | ||
| 73 | + """ | ||
| 74 | + Template expects: YYYY-MM-DD HH:MM:SS | ||
| 75 | + Input could be "2018-05-09" or datetime/date. | ||
| 76 | + """ | ||
| 77 | + if dt_value is None or dt_value == "": | ||
| 78 | + return "" | ||
| 79 | + if isinstance(dt_value, datetime): | ||
| 80 | + return dt_value.strftime("%Y-%m-%d %H:%M:%S") | ||
| 81 | + s = clean_str(dt_value) | ||
| 82 | + # common formats | ||
| 83 | + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): | ||
| 84 | + try: | ||
| 85 | + d = datetime.strptime(s, fmt) | ||
| 86 | + return d.strftime("%Y-%m-%d %H:%M:%S") | ||
| 87 | + except Exception: | ||
| 88 | + pass | ||
| 89 | + return "" | ||
| 90 | + | ||
| 91 | + | ||
| 92 | +def parse_weight(weight_conv, weight_raw): | ||
| 93 | + """ | ||
| 94 | + Return (weight_value, unit) where unit in {kg, lb, g, oz}. | ||
| 95 | + Prefer '商品重量(单位换算)' like '68.04 g'. | ||
| 96 | + Fallback to '商品重量' like '0.15 pounds'. | ||
| 97 | + """ | ||
| 98 | + s = clean_str(weight_conv) or clean_str(weight_raw) | ||
| 99 | + if not s: | ||
| 100 | + return ("", "") | ||
| 101 | + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s) | ||
| 102 | + if not m: | ||
| 103 | + return ("", "") | ||
| 104 | + val = float(m.group(1)) | ||
| 105 | + unit = m.group(2).lower() | ||
| 106 | + if unit in ("g", "gram", "grams"): | ||
| 107 | + return (val, "g") | ||
| 108 | + if unit in ("kg", "kilogram", "kilograms"): | ||
| 109 | + return (val, "kg") | ||
| 110 | + if unit in ("lb", "lbs", "pound", "pounds"): | ||
| 111 | + return (val, "lb") | ||
| 112 | + if unit in ("oz", "ounce", "ounces"): | ||
| 113 | + return (val, "oz") | ||
| 114 | + return ("", "") | ||
| 115 | + | ||
| 116 | + | ||
| 117 | +def parse_dimensions_inches(dim_raw): | ||
| 118 | + """ | ||
| 119 | + Template '尺寸信息': 'L,W,H' in inches. | ||
| 120 | + Input example: '7.9 x 7.9 x 2 inches' | ||
| 121 | + """ | ||
| 122 | + s = clean_str(dim_raw) | ||
| 123 | + if not s: | ||
| 124 | + return "" | ||
| 125 | + # extract first 3 numbers in order | ||
| 126 | + nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s) | ||
| 127 | + if len(nums) < 3: | ||
| 128 | + return "" | ||
| 129 | + return "{},{},{}".format(nums[0], nums[1], nums[2]) | ||
| 130 | + | ||
| 131 | + | ||
| 132 | +def parse_sku_options(sku_text): | ||
| 133 | + """ | ||
| 134 | + Parse 'SKU' column into {key: value}. | ||
| 135 | + Example: | ||
| 136 | + 'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'} | ||
| 137 | + """ | ||
| 138 | + s = clean_str(sku_text) | ||
| 139 | + if not s: | ||
| 140 | + return {} | ||
| 141 | + parts = [p.strip() for p in s.split("|") if p.strip()] | ||
| 142 | + out = {} | ||
| 143 | + for p in parts: | ||
| 144 | + if ":" not in p: | ||
| 145 | + continue | ||
| 146 | + k, v = p.split(":", 1) | ||
| 147 | + k = clean_str(k) | ||
| 148 | + v = clean_str(v) | ||
| 149 | + if k and v: | ||
| 150 | + out[k] = v | ||
| 151 | + return out | ||
| 152 | + | ||
| 153 | + | ||
| 154 | +def choose_option_keys(variant_dicts, max_keys=3): | ||
| 155 | + """ | ||
| 156 | + Choose up to 3 option keys for a product group. | ||
| 157 | + Order by preference list first, then by frequency. | ||
| 158 | + """ | ||
| 159 | + freq = Counter() | ||
| 160 | + for d in variant_dicts: | ||
| 161 | + for k, v in d.items(): | ||
| 162 | + if v: | ||
| 163 | + freq[k] += 1 | ||
| 164 | + if not freq: | ||
| 165 | + return [] | ||
| 166 | + | ||
| 167 | + preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)} | ||
| 168 | + | ||
| 169 | + def key_sort(k): | ||
| 170 | + return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower()) | ||
| 171 | + | ||
| 172 | + keys = sorted(freq.keys(), key=key_sort) | ||
| 173 | + return keys[:max_keys] | ||
| 174 | + | ||
| 175 | + | ||
| 176 | +def build_description_html(title, details, product_url): | ||
| 177 | + parts = [] | ||
| 178 | + if title: | ||
| 179 | + parts.append("<p>{}</p>".format(html_escape(title))) | ||
| 180 | + detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()] | ||
| 181 | + if detail_items: | ||
| 182 | + li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]]) | ||
| 183 | + parts.append("<ul>{}</ul>".format(li)) | ||
| 184 | + if product_url: | ||
| 185 | + parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url))) | ||
| 186 | + return "".join(parts) | ||
| 187 | + | ||
| 188 | + | ||
| 189 | +def competitor_sheet(ws): | ||
| 190 | + """ | ||
| 191 | + Build (header->col_index) for competitor sheet. | ||
| 192 | + Assumes header is row 1. | ||
| 193 | + """ | ||
| 194 | + headers = [] | ||
| 195 | + for c in range(1, ws.max_column + 1): | ||
| 196 | + v = ws.cell(1, c).value | ||
| 197 | + headers.append(clean_str(v)) | ||
| 198 | + idx = {h: i + 1 for i, h in enumerate(headers) if h} | ||
| 199 | + return idx | ||
| 200 | + | ||
| 201 | + | ||
| 202 | +def read_competitor_rows_from_file(xlsx_path, max_rows=None): | ||
| 203 | + wb = load_workbook(xlsx_path, read_only=True, data_only=True) | ||
| 204 | + # pick first non-Notes sheet | ||
| 205 | + sheet_name = None | ||
| 206 | + for name in wb.sheetnames: | ||
| 207 | + if str(name).lower() == "notes": | ||
| 208 | + continue | ||
| 209 | + sheet_name = name | ||
| 210 | + break | ||
| 211 | + if sheet_name is None: | ||
| 212 | + return [] | ||
| 213 | + ws = wb[sheet_name] | ||
| 214 | + idx = competitor_sheet(ws) | ||
| 215 | + | ||
| 216 | + required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)", | ||
| 217 | + "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接", | ||
| 218 | + "商品重量(单位换算)", "商品重量", "商品尺寸"] | ||
| 219 | + for k in required: | ||
| 220 | + if k not in idx: | ||
| 221 | + raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name)) | ||
| 222 | + | ||
| 223 | + rows = [] | ||
| 224 | + end_row = ws.max_row | ||
| 225 | + if max_rows is not None: | ||
| 226 | + end_row = min(end_row, 1 + int(max_rows)) | ||
| 227 | + | ||
| 228 | + for r in range(2, end_row + 1): | ||
| 229 | + asin = clean_str(ws.cell(r, idx["ASIN"]).value) | ||
| 230 | + if not asin: | ||
| 231 | + continue | ||
| 232 | + parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin | ||
| 233 | + row = { | ||
| 234 | + "ASIN": asin, | ||
| 235 | + "父ASIN": parent, | ||
| 236 | + "SKU": clean_str(ws.cell(r, idx["SKU"]).value), | ||
| 237 | + "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value), | ||
| 238 | + "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value), | ||
| 239 | + "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value), | ||
| 240 | + "价格($)": ws.cell(r, idx["价格($)"]).value, | ||
| 241 | + "prime价格($)": ws.cell(r, idx["prime价格($)"]).value, | ||
| 242 | + "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value), | ||
| 243 | + "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value), | ||
| 244 | + "大类目": clean_str(ws.cell(r, idx["大类目"]).value), | ||
| 245 | + "小类目": clean_str(ws.cell(r, idx["小类目"]).value), | ||
| 246 | + "品牌": clean_str(ws.cell(r, idx["品牌"]).value), | ||
| 247 | + "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value), | ||
| 248 | + "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value), | ||
| 249 | + "商品重量(单位换算)": clean_str(ws.cell(r, idx["商品重量(单位换算)"]).value), | ||
| 250 | + "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value), | ||
| 251 | + "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value), | ||
| 252 | + } | ||
| 253 | + rows.append(row) | ||
| 254 | + return rows | ||
| 255 | + | ||
| 256 | + | ||
| 257 | +def to_price(v): | ||
| 258 | + if v is None or v == "": | ||
| 259 | + return None | ||
| 260 | + try: | ||
| 261 | + return float(v) | ||
| 262 | + except Exception: | ||
| 263 | + s = clean_str(v) | ||
| 264 | + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s) | ||
| 265 | + if not m: | ||
| 266 | + return None | ||
| 267 | + return float(m.group(1)) | ||
| 268 | + | ||
| 269 | + | ||
| 270 | +def build_common_fields(base_row, spu_id): | ||
| 271 | + title = base_row.get("商品标题") or "Product" | ||
| 272 | + brand = base_row.get("品牌") or "" | ||
| 273 | + big_cat = base_row.get("大类目") or "" | ||
| 274 | + small_cat = base_row.get("小类目") or "" | ||
| 275 | + cat_path = base_row.get("类目路径") or "" | ||
| 276 | + | ||
| 277 | + handle = generate_handle(title) | ||
| 278 | + if handle and not handle.startswith("products/"): | ||
| 279 | + handle = "products/{}".format(handle) | ||
| 280 | + | ||
| 281 | + seo_title = title | ||
| 282 | + seo_desc_parts = [] | ||
| 283 | + if brand: | ||
| 284 | + seo_desc_parts.append(brand) | ||
| 285 | + seo_desc_parts.append(title) | ||
| 286 | + if big_cat: | ||
| 287 | + seo_desc_parts.append(big_cat) | ||
| 288 | + seo_description = " ".join([x for x in seo_desc_parts if x])[:5000] | ||
| 289 | + | ||
| 290 | + seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x]) | ||
| 291 | + tags = ",".join([x for x in [brand, big_cat, small_cat] if x]) | ||
| 292 | + | ||
| 293 | + created_at = parse_date_to_template(base_row.get("上架时间")) | ||
| 294 | + | ||
| 295 | + description = build_description_html( | ||
| 296 | + title=title, | ||
| 297 | + details=base_row.get("详细参数"), | ||
| 298 | + product_url=base_row.get("商品详情页链接"), | ||
| 299 | + ) | ||
| 300 | + | ||
| 301 | + # default inventory settings (data source has no stock) | ||
| 302 | + inventory_qty = 100 | ||
| 303 | + | ||
| 304 | + weight_val, weight_unit = parse_weight(base_row.get("商品重量(单位换算)"), base_row.get("商品重量")) | ||
| 305 | + size_info = parse_dimensions_inches(base_row.get("商品尺寸")) | ||
| 306 | + | ||
| 307 | + album = big_cat or "" | ||
| 308 | + if not album and cat_path: | ||
| 309 | + album = cat_path.split(":")[0] | ||
| 310 | + | ||
| 311 | + common = { | ||
| 312 | + "商品ID": "", | ||
| 313 | + "创建时间": created_at, | ||
| 314 | + "商品标题*": title[:255], | ||
| 315 | + "商品副标题": "{} {}".format(brand, big_cat).strip()[:600], | ||
| 316 | + "商品描述": description, | ||
| 317 | + "SEO标题": seo_title[:5000], | ||
| 318 | + "SEO描述": seo_description, | ||
| 319 | + "SEO URL Handle": handle, | ||
| 320 | + "SEO URL 重定向": "N", | ||
| 321 | + "SEO关键词": seo_keywords[:5000], | ||
| 322 | + "商品上架": "Y", | ||
| 323 | + "需要物流": "Y", | ||
| 324 | + "商品收税": "N", | ||
| 325 | + "商品spu": spu_id[:100], | ||
| 326 | + "启用虚拟销量": "N", | ||
| 327 | + "虚拟销量值": "", | ||
| 328 | + "跟踪库存": "Y", | ||
| 329 | + "库存规则*": "1", | ||
| 330 | + "专辑名称": album, | ||
| 331 | + "标签": tags, | ||
| 332 | + "供应商名称": "Amazon", | ||
| 333 | + "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "", | ||
| 334 | + "商品重量": weight_val if weight_val != "" else "", | ||
| 335 | + "重量单位": weight_unit, | ||
| 336 | + "商品库存": inventory_qty, | ||
| 337 | + "尺寸信息": size_info, | ||
| 338 | + "原产地国别": "", | ||
| 339 | + "HS(协调制度)代码": "", | ||
| 340 | + "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format( | ||
| 341 | + base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "") | ||
| 342 | + )[:500], | ||
| 343 | + "款式备注": "", | ||
| 344 | + } | ||
| 345 | + return common | ||
| 346 | + | ||
| 347 | + | ||
| 348 | +def build_s_row(base_row): | ||
| 349 | + spu_id = base_row.get("父ASIN") or base_row.get("ASIN") | ||
| 350 | + common = build_common_fields(base_row, spu_id=spu_id) | ||
| 351 | + price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99 | ||
| 352 | + image = base_row.get("商品主图") or "" | ||
| 353 | + | ||
| 354 | + row = {} | ||
| 355 | + row.update(common) | ||
| 356 | + row.update({ | ||
| 357 | + "商品属性*": "S", | ||
| 358 | + "款式1": "", | ||
| 359 | + "款式2": "", | ||
| 360 | + "款式3": "", | ||
| 361 | + "商品售价*": price, | ||
| 362 | + "商品原价": price, | ||
| 363 | + "成本价": "", | ||
| 364 | + "商品SKU": base_row.get("ASIN") or "", | ||
| 365 | + "商品条形码": "", | ||
| 366 | + "商品图片*": image, | ||
| 367 | + "商品主图": image, | ||
| 368 | + }) | ||
| 369 | + return row | ||
| 370 | + | ||
| 371 | + | ||
| 372 | +def build_m_p_rows(variant_rows): | ||
| 373 | + """ | ||
| 374 | + variant_rows: List[dict] with same 父ASIN. | ||
| 375 | + """ | ||
| 376 | + base = variant_rows[0] | ||
| 377 | + spu_id = base.get("父ASIN") or base.get("ASIN") | ||
| 378 | + common = build_common_fields(base, spu_id=spu_id) | ||
| 379 | + | ||
| 380 | + option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows] | ||
| 381 | + option_keys = choose_option_keys(option_dicts, max_keys=3) | ||
| 382 | + if not option_keys: | ||
| 383 | + option_keys = ["Variant"] | ||
| 384 | + | ||
| 385 | + # M row | ||
| 386 | + m = {} | ||
| 387 | + m.update(common) | ||
| 388 | + m.update({ | ||
| 389 | + "商品属性*": "M", | ||
| 390 | + "款式1": option_keys[0] if len(option_keys) > 0 else "", | ||
| 391 | + "款式2": option_keys[1] if len(option_keys) > 1 else "", | ||
| 392 | + "款式3": option_keys[2] if len(option_keys) > 2 else "", | ||
| 393 | + "商品售价*": "", | ||
| 394 | + "商品原价": "", | ||
| 395 | + "成本价": "", | ||
| 396 | + "商品SKU": "", | ||
| 397 | + "商品条形码": "", | ||
| 398 | + "商品图片*": base.get("商品主图") or "", | ||
| 399 | + "商品主图": base.get("商品主图") or "", | ||
| 400 | + }) | ||
| 401 | + | ||
| 402 | + # For M row, these SKU-level fields should be empty per template guidance | ||
| 403 | + m["商品重量"] = "" | ||
| 404 | + m["重量单位"] = "" | ||
| 405 | + m["商品库存"] = "" | ||
| 406 | + m["尺寸信息"] = "" | ||
| 407 | + | ||
| 408 | + rows = [m] | ||
| 409 | + | ||
| 410 | + # P rows | ||
| 411 | + for v in variant_rows: | ||
| 412 | + v_common = build_common_fields(v, spu_id=spu_id) | ||
| 413 | + # wipe SPU-only fields for P row | ||
| 414 | + v_common.update({ | ||
| 415 | + "商品副标题": "", | ||
| 416 | + "商品描述": "", | ||
| 417 | + "SEO标题": "", | ||
| 418 | + "SEO描述": "", | ||
| 419 | + "SEO URL Handle": "", | ||
| 420 | + "SEO URL 重定向": "", | ||
| 421 | + "SEO关键词": "", | ||
| 422 | + "专辑名称": "", | ||
| 423 | + "标签": "", | ||
| 424 | + "供应商名称": "", | ||
| 425 | + "供应商URL": "", | ||
| 426 | + "商品备注": "", | ||
| 427 | + }) | ||
| 428 | + | ||
| 429 | + opt = parse_sku_options(v.get("SKU")) | ||
| 430 | + if option_keys == ["Variant"]: | ||
| 431 | + opt_vals = [v.get("ASIN")] | ||
| 432 | + else: | ||
| 433 | + opt_vals = [opt.get(k, "") for k in option_keys] | ||
| 434 | + | ||
| 435 | + price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99 | ||
| 436 | + image = v.get("商品主图") or "" | ||
| 437 | + | ||
| 438 | + p = {} | ||
| 439 | + p.update(v_common) | ||
| 440 | + p.update({ | ||
| 441 | + "商品属性*": "P", | ||
| 442 | + "款式1": opt_vals[0] if len(opt_vals) > 0 else "", | ||
| 443 | + "款式2": opt_vals[1] if len(opt_vals) > 1 else "", | ||
| 444 | + "款式3": opt_vals[2] if len(opt_vals) > 2 else "", | ||
| 445 | + "商品售价*": price, | ||
| 446 | + "商品原价": price, | ||
| 447 | + "成本价": "", | ||
| 448 | + "商品SKU": v.get("ASIN") or "", | ||
| 449 | + "商品条形码": "", | ||
| 450 | + # P row supports one variant image; we use variant's main image | ||
| 451 | + "商品图片*": image, | ||
| 452 | + "商品主图": "", | ||
| 453 | + }) | ||
| 454 | + rows.append(p) | ||
| 455 | + | ||
| 456 | + return rows | ||
| 457 | + | ||
| 458 | + | ||
| 459 | +def main(): | ||
| 460 | + parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx") | ||
| 461 | + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files") | ||
| 462 | + parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx") | ||
| 463 | + parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path") | ||
| 464 | + parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") | ||
| 465 | + parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)") | ||
| 466 | + parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") | ||
| 467 | + args = parser.parse_args() | ||
| 468 | + | ||
| 469 | + input_dir = args.input_dir | ||
| 470 | + if not os.path.isdir(input_dir): | ||
| 471 | + raise RuntimeError("input-dir not found: {}".format(input_dir)) | ||
| 472 | + if not os.path.exists(args.template): | ||
| 473 | + raise RuntimeError("template not found: {}".format(args.template)) | ||
| 474 | + | ||
| 475 | + files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".xlsx")] | ||
| 476 | + files.sort() | ||
| 477 | + if args.max_files is not None: | ||
| 478 | + files = files[: int(args.max_files)] | ||
| 479 | + | ||
| 480 | + print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True) | ||
| 481 | + | ||
| 482 | + groups = defaultdict(list) # spu_id -> [variant rows] | ||
| 483 | + seen_asin = set() | ||
| 484 | + | ||
| 485 | + for fp in files: | ||
| 486 | + print(" - loading: {}".format(fp), flush=True) | ||
| 487 | + try: | ||
| 488 | + rows = read_competitor_rows_from_file(fp, max_rows=args.max_rows_per_file) | ||
| 489 | + except Exception as e: | ||
| 490 | + print("WARN: failed to read {}: {}".format(fp, e)) | ||
| 491 | + continue | ||
| 492 | + print(" loaded rows: {}".format(len(rows)), flush=True) | ||
| 493 | + | ||
| 494 | + for r in rows: | ||
| 495 | + asin = r.get("ASIN") | ||
| 496 | + if asin in seen_asin: | ||
| 497 | + continue | ||
| 498 | + seen_asin.add(asin) | ||
| 499 | + spu_id = r.get("父ASIN") or asin | ||
| 500 | + groups[spu_id].append(r) | ||
| 501 | + | ||
| 502 | + print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True) | ||
| 503 | + | ||
| 504 | + excel_rows = [] | ||
| 505 | + spu_count = 0 | ||
| 506 | + | ||
| 507 | + for spu_id, variants in groups.items(): | ||
| 508 | + if not variants: | ||
| 509 | + continue | ||
| 510 | + spu_count += 1 | ||
| 511 | + if args.max_products is not None and spu_count > int(args.max_products): | ||
| 512 | + break | ||
| 513 | + if len(variants) == 1: | ||
| 514 | + excel_rows.append(build_s_row(variants[0])) | ||
| 515 | + else: | ||
| 516 | + excel_rows.extend(build_m_p_rows(variants)) | ||
| 517 | + | ||
| 518 | + print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True) | ||
| 519 | + create_excel_from_template(args.template, args.output, excel_rows) | ||
| 520 | + | ||
| 521 | + | ||
| 522 | +if __name__ == "__main__": | ||
| 523 | + main() | ||
| 524 | + | ||
| 525 | + |
scripts/csv_to_excel.py
| @@ -22,6 +22,10 @@ from openpyxl import load_workbook | @@ -22,6 +22,10 @@ from openpyxl import load_workbook | ||
| 22 | from openpyxl.styles import Font, Alignment | 22 | from openpyxl.styles import Font, Alignment |
| 23 | from openpyxl.utils import get_column_letter | 23 | from openpyxl.utils import get_column_letter |
| 24 | 24 | ||
| 25 | +# Shared helpers (keeps template writing consistent across scripts) | ||
| 26 | +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared | ||
| 27 | +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared | ||
| 28 | + | ||
| 25 | # Add parent directory to path | 29 | # Add parent directory to path |
| 26 | sys.path.insert(0, str(Path(__file__).parent.parent)) | 30 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 27 | 31 | ||
| @@ -82,21 +86,8 @@ def generate_handle(title: str) -> str: | @@ -82,21 +86,8 @@ def generate_handle(title: str) -> str: | ||
| 82 | Returns: | 86 | Returns: |
| 83 | URL-friendly handle (ASCII only) | 87 | URL-friendly handle (ASCII only) |
| 84 | """ | 88 | """ |
| 85 | - # Convert to lowercase | ||
| 86 | - handle = title.lower() | ||
| 87 | - | ||
| 88 | - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens | ||
| 89 | - handle = re.sub(r'[^a-z0-9\s-]', '', handle) | ||
| 90 | - | ||
| 91 | - # Replace spaces and multiple hyphens with single hyphen | ||
| 92 | - handle = re.sub(r'[-\s]+', '-', handle) | ||
| 93 | - handle = handle.strip('-') | ||
| 94 | - | ||
| 95 | - # Limit length | ||
| 96 | - if len(handle) > 255: | ||
| 97 | - handle = handle[:255] | ||
| 98 | - | ||
| 99 | - return handle or 'product' | 89 | + # Keep backward-compatible function name while delegating to shared helper. |
| 90 | + return _generate_handle_shared(title) | ||
| 100 | 91 | ||
| 101 | 92 | ||
| 102 | def read_csv_file(csv_file: str) -> list: | 93 | def read_csv_file(csv_file: str) -> list: |
| @@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li | @@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li | ||
| 255 | output_file: Path to output Excel file | 246 | output_file: Path to output Excel file |
| 256 | csv_data_list: List of parsed CSV data dictionaries | 247 | csv_data_list: List of parsed CSV data dictionaries |
| 257 | """ | 248 | """ |
| 258 | - # Load template | ||
| 259 | - wb = load_workbook(template_file) | ||
| 260 | - ws = wb.active # Use the active sheet (Sheet4) | ||
| 261 | - | ||
| 262 | - # Find header row (row 2, index 1) | ||
| 263 | - header_row_idx = 2 # Row 2 in Excel (1-based, but header is at index 1 in pandas) | ||
| 264 | - | ||
| 265 | - # Get column mapping from header row | ||
| 266 | - column_mapping = {} | ||
| 267 | - for col_idx in range(1, ws.max_column + 1): | ||
| 268 | - cell_value = ws.cell(row=header_row_idx, column=col_idx).value | ||
| 269 | - if cell_value: | ||
| 270 | - column_mapping[cell_value] = col_idx | ||
| 271 | - | ||
| 272 | - # Start writing data from row 4 (after header and instructions) | ||
| 273 | - data_start_row = 4 # Row 4 in Excel (1-based) | ||
| 274 | - | ||
| 275 | - # Clear existing data rows (from row 4 onwards, but keep header and instructions) | ||
| 276 | - # Find the last row with data in the template | ||
| 277 | - last_template_row = ws.max_row | ||
| 278 | - if last_template_row >= data_start_row: | ||
| 279 | - # Clear data rows (keep header and instruction rows) | ||
| 280 | - for row in range(data_start_row, last_template_row + 1): | ||
| 281 | - for col in range(1, ws.max_column + 1): | ||
| 282 | - ws.cell(row=row, column=col).value = None | ||
| 283 | - | ||
| 284 | - # Convert CSV data to Excel rows | ||
| 285 | - for row_idx, csv_data in enumerate(csv_data_list): | ||
| 286 | - excel_row = csv_to_excel_row(csv_data) | ||
| 287 | - excel_row_num = data_start_row + row_idx | ||
| 288 | - | ||
| 289 | - # Write each field to corresponding column | ||
| 290 | - for field_name, col_idx in column_mapping.items(): | ||
| 291 | - if field_name in excel_row: | ||
| 292 | - cell = ws.cell(row=excel_row_num, column=col_idx) | ||
| 293 | - value = excel_row[field_name] | ||
| 294 | - cell.value = value | ||
| 295 | - | ||
| 296 | - # Set alignment for text fields | ||
| 297 | - if isinstance(value, str): | ||
| 298 | - cell.alignment = Alignment(vertical='top', wrap_text=True) | ||
| 299 | - elif isinstance(value, (int, float)): | ||
| 300 | - cell.alignment = Alignment(vertical='top') | ||
| 301 | - | ||
| 302 | - # Save workbook | ||
| 303 | - wb.save(output_file) | 249 | + excel_rows = [csv_to_excel_row(d) for d in csv_data_list] |
| 250 | + _create_excel_from_template_shared(template_file, output_file, excel_rows) | ||
| 304 | print(f"Excel file created: {output_file}") | 251 | print(f"Excel file created: {output_file}") |
| 305 | print(f" - Total rows: {len(csv_data_list)}") | 252 | print(f" - Total rows: {len(csv_data_list)}") |
| 306 | 253 |
scripts/csv_to_excel_multi_variant.py
| @@ -22,6 +22,10 @@ import itertools | @@ -22,6 +22,10 @@ import itertools | ||
| 22 | from openpyxl import load_workbook | 22 | from openpyxl import load_workbook |
| 23 | from openpyxl.styles import Alignment | 23 | from openpyxl.styles import Alignment |
| 24 | 24 | ||
| 25 | +# Shared helpers (keeps template writing consistent across scripts) | ||
| 26 | +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared | ||
| 27 | +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared | ||
| 28 | + | ||
| 25 | # Add parent directory to path | 29 | # Add parent directory to path |
| 26 | sys.path.insert(0, str(Path(__file__).parent.parent)) | 30 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 27 | 31 | ||
| @@ -90,21 +94,8 @@ def generate_handle(title: str) -> str: | @@ -90,21 +94,8 @@ def generate_handle(title: str) -> str: | ||
| 90 | Returns: | 94 | Returns: |
| 91 | URL-friendly handle (ASCII only) | 95 | URL-friendly handle (ASCII only) |
| 92 | """ | 96 | """ |
| 93 | - # Convert to lowercase | ||
| 94 | - handle = title.lower() | ||
| 95 | - | ||
| 96 | - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens | ||
| 97 | - handle = re.sub(r'[^a-z0-9\s-]', '', handle) | ||
| 98 | - | ||
| 99 | - # Replace spaces and multiple hyphens with single hyphen | ||
| 100 | - handle = re.sub(r'[-\s]+', '-', handle) | ||
| 101 | - handle = handle.strip('-') | ||
| 102 | - | ||
| 103 | - # Limit length | ||
| 104 | - if len(handle) > 255: | ||
| 105 | - handle = handle[:255] | ||
| 106 | - | ||
| 107 | - return handle or 'product' | 97 | + # Keep backward-compatible function name while delegating to shared helper. |
| 98 | + return _generate_handle_shared(title) | ||
| 108 | 99 | ||
| 109 | 100 | ||
| 110 | def extract_material_from_title(title: str) -> str: | 101 | def extract_material_from_title(title: str) -> str: |
| @@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows: | @@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows: | ||
| 478 | output_file: Path to output Excel file | 469 | output_file: Path to output Excel file |
| 479 | excel_rows: List of dictionaries mapping Excel column names to values | 470 | excel_rows: List of dictionaries mapping Excel column names to values |
| 480 | """ | 471 | """ |
| 481 | - # Load template | ||
| 482 | - wb = load_workbook(template_file) | ||
| 483 | - ws = wb.active # Use the active sheet (Sheet4) | ||
| 484 | - | ||
| 485 | - # Find header row (row 2) | ||
| 486 | - header_row_idx = 2 | ||
| 487 | - | ||
| 488 | - # Get column mapping from header row | ||
| 489 | - column_mapping = {} | ||
| 490 | - for col_idx in range(1, ws.max_column + 1): | ||
| 491 | - cell_value = ws.cell(row=header_row_idx, column=col_idx).value | ||
| 492 | - if cell_value: | ||
| 493 | - column_mapping[cell_value] = col_idx | ||
| 494 | - | ||
| 495 | - # Start writing data from row 4 | ||
| 496 | - data_start_row = 4 | ||
| 497 | - | ||
| 498 | - # Clear existing data rows | ||
| 499 | - last_template_row = ws.max_row | ||
| 500 | - if last_template_row >= data_start_row: | ||
| 501 | - for row in range(data_start_row, last_template_row + 1): | ||
| 502 | - for col in range(1, ws.max_column + 1): | ||
| 503 | - ws.cell(row=row, column=col).value = None | ||
| 504 | - | ||
| 505 | - # Write data rows | ||
| 506 | - for row_idx, excel_row in enumerate(excel_rows): | ||
| 507 | - excel_row_num = data_start_row + row_idx | ||
| 508 | - | ||
| 509 | - # Write each field to corresponding column | ||
| 510 | - for field_name, col_idx in column_mapping.items(): | ||
| 511 | - if field_name in excel_row: | ||
| 512 | - cell = ws.cell(row=excel_row_num, column=col_idx) | ||
| 513 | - value = excel_row[field_name] | ||
| 514 | - cell.value = value | ||
| 515 | - | ||
| 516 | - # Set alignment | ||
| 517 | - if isinstance(value, str): | ||
| 518 | - cell.alignment = Alignment(vertical='top', wrap_text=True) | ||
| 519 | - elif isinstance(value, (int, float)): | ||
| 520 | - cell.alignment = Alignment(vertical='top') | ||
| 521 | - | ||
| 522 | - # Save workbook | ||
| 523 | - wb.save(output_file) | 472 | + _create_excel_from_template_shared(template_file, output_file, excel_rows) |
| 524 | print(f"Excel file created: {output_file}") | 473 | print(f"Excel file created: {output_file}") |
| 525 | print(f" - Total rows: {len(excel_rows)}") | 474 | print(f" - Total rows: {len(excel_rows)}") |
| 526 | 475 |
| @@ -0,0 +1,67 @@ | @@ -0,0 +1,67 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +""" | ||
| 3 | +Shared utilities for generating Shoplazza (店匠) product import Excel files | ||
| 4 | +based on the provided template `docs/商品导入模板.xlsx`. | ||
| 5 | + | ||
| 6 | +We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts. | ||
| 7 | +""" | ||
| 8 | + | ||
| 9 | +from openpyxl import load_workbook | ||
| 10 | +from openpyxl.styles import Alignment | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +def load_template_column_mapping(ws, header_row_idx=2): | ||
| 14 | + """ | ||
| 15 | + Read the header row in the template sheet and build a mapping: | ||
| 16 | + header_name -> column_index (1-based). | ||
| 17 | + """ | ||
| 18 | + column_mapping = {} | ||
| 19 | + for col_idx in range(1, ws.max_column + 1): | ||
| 20 | + cell_value = ws.cell(row=header_row_idx, column=col_idx).value | ||
| 21 | + if cell_value: | ||
| 22 | + column_mapping[str(cell_value).strip()] = col_idx | ||
| 23 | + return column_mapping | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4): | ||
| 27 | + """ | ||
| 28 | + Create an Excel file from the Shoplazza template and fill with data rows. | ||
| 29 | + | ||
| 30 | + Args: | ||
| 31 | + template_file: Path to Excel template file | ||
| 32 | + output_file: Path to output Excel file | ||
| 33 | + excel_rows: List[Dict[str, Any]] mapping template header -> value | ||
| 34 | + header_row_idx: Header row index in template (default 2) | ||
| 35 | + data_start_row: Data start row index in template (default 4) | ||
| 36 | + """ | ||
| 37 | + wb = load_workbook(template_file) | ||
| 38 | + ws = wb.active | ||
| 39 | + | ||
| 40 | + column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx) | ||
| 41 | + | ||
| 42 | + # Clear existing data rows | ||
| 43 | + last_template_row = ws.max_row | ||
| 44 | + if last_template_row >= data_start_row: | ||
| 45 | + for row in range(data_start_row, last_template_row + 1): | ||
| 46 | + for col in range(1, ws.max_column + 1): | ||
| 47 | + ws.cell(row=row, column=col).value = None | ||
| 48 | + | ||
| 49 | + # Write data rows | ||
| 50 | + for row_idx, excel_row in enumerate(excel_rows): | ||
| 51 | + excel_row_num = data_start_row + row_idx | ||
| 52 | + for field_name, col_idx in column_mapping.items(): | ||
| 53 | + if field_name not in excel_row: | ||
| 54 | + continue | ||
| 55 | + cell = ws.cell(row=excel_row_num, column=col_idx) | ||
| 56 | + value = excel_row[field_name] | ||
| 57 | + cell.value = value | ||
| 58 | + if isinstance(value, str): | ||
| 59 | + cell.alignment = Alignment(vertical='top', wrap_text=True) | ||
| 60 | + else: | ||
| 61 | + cell.alignment = Alignment(vertical='top') | ||
| 62 | + | ||
| 63 | + wb.save(output_file) | ||
| 64 | + print("Excel file created: {}".format(output_file)) | ||
| 65 | + print(" - Total rows: {}".format(len(excel_rows))) | ||
| 66 | + | ||
| 67 | + |
| @@ -0,0 +1,112 @@ | @@ -0,0 +1,112 @@ | ||
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +""" | ||
| 3 | +Shared helpers for generating Shoplazza product import Excel files from the | ||
| 4 | +official template `docs/商品导入模板.xlsx`. | ||
| 5 | + | ||
| 6 | +We keep this module small and dependency-light (openpyxl only) so other scripts | ||
| 7 | +can reuse the same template-writing behavior (header row mapping, data start | ||
| 8 | +row, alignment). | ||
| 9 | +""" | ||
| 10 | + | ||
| 11 | +import re | ||
| 12 | +from datetime import datetime | ||
| 13 | +from typing import Dict, Iterable, List, Optional | ||
| 14 | + | ||
| 15 | +from openpyxl import load_workbook | ||
| 16 | +from openpyxl.styles import Alignment | ||
| 17 | + | ||
| 18 | + | ||
| 19 | +def generate_handle(title: str) -> str: | ||
| 20 | + """ | ||
| 21 | + Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza | ||
| 22 | + `SEO URL Handle` field. Caller may prepend `products/`. | ||
| 23 | + """ | ||
| 24 | + if not title: | ||
| 25 | + return "product" | ||
| 26 | + | ||
| 27 | + handle = str(title).lower() | ||
| 28 | + handle = re.sub(r"[^a-z0-9\s-]", "", handle) | ||
| 29 | + handle = re.sub(r"[-\s]+", "-", handle).strip("-") | ||
| 30 | + | ||
| 31 | + if len(handle) > 255: | ||
| 32 | + handle = handle[:255] | ||
| 33 | + | ||
| 34 | + return handle or "product" | ||
| 35 | + | ||
| 36 | + | ||
| 37 | +def parse_date_to_datetime_str(value) -> str: | ||
| 38 | + """ | ||
| 39 | + Parse common date strings into Shoplazza template datetime string: | ||
| 40 | + `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string. | ||
| 41 | + """ | ||
| 42 | + if value is None: | ||
| 43 | + return "" | ||
| 44 | + | ||
| 45 | + if isinstance(value, datetime): | ||
| 46 | + return value.strftime("%Y-%m-%d %H:%M:%S") | ||
| 47 | + | ||
| 48 | + s = str(value).strip() | ||
| 49 | + if not s: | ||
| 50 | + return "" | ||
| 51 | + | ||
| 52 | + # Most competitor sheets use YYYY-MM-DD | ||
| 53 | + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): | ||
| 54 | + try: | ||
| 55 | + dt = datetime.strptime(s, fmt) | ||
| 56 | + if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): | ||
| 57 | + dt = dt.replace(hour=0, minute=0, second=0) | ||
| 58 | + return dt.strftime("%Y-%m-%d %H:%M:%S") | ||
| 59 | + except Exception: | ||
| 60 | + pass | ||
| 61 | + | ||
| 62 | + return "" | ||
| 63 | + | ||
| 64 | + | ||
| 65 | +def create_excel_from_template( | ||
| 66 | + template_file: str, | ||
| 67 | + output_file: str, | ||
| 68 | + excel_rows: List[Dict[str, object]], | ||
| 69 | + *, | ||
| 70 | + header_row_idx: int = 2, | ||
| 71 | + data_start_row: int = 4, | ||
| 72 | + sheet_name: Optional[str] = None, | ||
| 73 | +) -> None: | ||
| 74 | + """ | ||
| 75 | + Create an Excel file from Shoplazza import template and fill rows. | ||
| 76 | + | ||
| 77 | + - Header row is expected at row 2 (1-based) in the official template. | ||
| 78 | + - Data starts at row 4 (1-based), after the instruction row(s). | ||
| 79 | + """ | ||
| 80 | + wb = load_workbook(template_file) | ||
| 81 | + ws = wb[sheet_name] if sheet_name else wb.active | ||
| 82 | + | ||
| 83 | + column_mapping: Dict[str, int] = {} | ||
| 84 | + for col_idx in range(1, ws.max_column + 1): | ||
| 85 | + cell_value = ws.cell(row=header_row_idx, column=col_idx).value | ||
| 86 | + if cell_value: | ||
| 87 | + column_mapping[str(cell_value).strip()] = col_idx | ||
| 88 | + | ||
| 89 | + # Clear existing data rows | ||
| 90 | + last_template_row = ws.max_row | ||
| 91 | + if last_template_row >= data_start_row: | ||
| 92 | + for row in range(data_start_row, last_template_row + 1): | ||
| 93 | + for col in range(1, ws.max_column + 1): | ||
| 94 | + ws.cell(row=row, column=col).value = None | ||
| 95 | + | ||
| 96 | + # Write data rows | ||
| 97 | + for row_idx, excel_row in enumerate(excel_rows): | ||
| 98 | + excel_row_num = data_start_row + row_idx | ||
| 99 | + for field_name, col_idx in column_mapping.items(): | ||
| 100 | + if field_name not in excel_row: | ||
| 101 | + continue | ||
| 102 | + value = excel_row[field_name] | ||
| 103 | + cell = ws.cell(row=excel_row_num, column=col_idx) | ||
| 104 | + cell.value = value | ||
| 105 | + if isinstance(value, str): | ||
| 106 | + cell.alignment = Alignment(vertical="top", wrap_text=True) | ||
| 107 | + elif isinstance(value, (int, float)): | ||
| 108 | + cell.alignment = Alignment(vertical="top") | ||
| 109 | + | ||
| 110 | + wb.save(output_file) | ||
| 111 | + | ||
| 112 | + |