Commit f3c11fef37a249640c5e2a576582747c3f1fc90e
1 parent
19d2d90f
亚马逊格式数据 导入店匠
Showing
53 changed files
with
723 additions
and
122 deletions
Show diff stats
README.md
| ... | ... | @@ -14,11 +14,12 @@ source .env |
| 14 | 14 | |
| 15 | 15 | ## 测试pipeline |
| 16 | 16 | |
| 17 | -fake数据 生成商品导入数据 提交到店匠的店铺: | |
| 17 | +1. fake数据 生成商品导入数据 提交到店匠的店铺: | |
| 18 | 18 | cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx |
| 19 | 19 | |
| 20 | -自动同步到mysql | |
| 21 | -mysql到ES: | |
| 20 | +2. 后端:自动同步到mysql | |
| 21 | + | |
| 22 | +3. mysql到ES: | |
| 22 | 23 | |
| 23 | 24 | python scripts/recreate_and_import.py \ |
| 24 | 25 | --tenant-id 162 \ | ... | ... |
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx
0 → 100644
No preview for this file type
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx
0 → 100644
No preview for this file type
| ... | ... | @@ -0,0 +1,525 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Convert competitor Excel exports (with Parent/Child ASIN structure) into | |
| 4 | +Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`. | |
| 5 | + | |
| 6 | +Data source: | |
| 7 | +- Directory with multiple `Competitor-*.xlsx` files. | |
| 8 | +- Each file contains a main sheet + "Notes" sheet. | |
| 9 | +- Column meanings (sample): | |
| 10 | + - ASIN: variant id (sku_id) | |
| 11 | + - 父ASIN: product id (spu_id) | |
| 12 | + | |
| 13 | +Output: | |
| 14 | +- For each 父ASIN group: | |
| 15 | + - If only 1 ASIN: generate one "S" row | |
| 16 | + - Else: generate one "M" row + multiple "P" rows | |
| 17 | + | |
| 18 | +Important: | |
| 19 | +- Variant dimensions are parsed primarily from the `SKU` column: | |
| 20 | + "Size: One Size | Color: Black" | |
| 21 | + and mapped into 款式1/2/3. | |
| 22 | +""" | |
| 23 | + | |
| 24 | +import os | |
| 25 | +import re | |
| 26 | +import sys | |
| 27 | +import argparse | |
| 28 | +from datetime import datetime | |
| 29 | +from collections import defaultdict, Counter | |
| 30 | +from pathlib import Path | |
| 31 | + | |
| 32 | +from openpyxl import load_workbook | |
| 33 | + | |
| 34 | +# Allow running as `python scripts/xxx.py` without installing as a package | |
| 35 | +sys.path.insert(0, str(Path(__file__).resolve().parent)) | |
| 36 | +from shoplazza_excel_template import create_excel_from_template | |
| 37 | + | |
| 38 | + | |
| 39 | +PREFERRED_OPTION_KEYS = [ | |
| 40 | + "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent", | |
| 41 | + "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length", | |
| 42 | + "Width", "Height", "Model", "Configuration", | |
| 43 | +] | |
| 44 | + | |
| 45 | + | |
| 46 | +def clean_str(v): | |
| 47 | + if v is None: | |
| 48 | + return "" | |
| 49 | + return str(v).strip() | |
| 50 | + | |
| 51 | + | |
| 52 | +def html_escape(s): | |
| 53 | + s = clean_str(s) | |
| 54 | + return (s.replace("&", "&") | |
| 55 | + .replace("<", "<") | |
| 56 | + .replace(">", ">")) | |
| 57 | + | |
| 58 | + | |
| 59 | +def generate_handle(title): | |
| 60 | + """ | |
| 61 | + Generate URL-friendly handle from title (ASCII only). | |
| 62 | + Keep consistent with existing scripts. | |
| 63 | + """ | |
| 64 | + handle = clean_str(title).lower() | |
| 65 | + handle = re.sub(r"[^a-z0-9\\s-]", "", handle) | |
| 66 | + handle = re.sub(r"[-\\s]+", "-", handle).strip("-") | |
| 67 | + if len(handle) > 255: | |
| 68 | + handle = handle[:255] | |
| 69 | + return handle or "product" | |
| 70 | + | |
| 71 | + | |
| 72 | +def parse_date_to_template(dt_value): | |
| 73 | + """ | |
| 74 | + Template expects: YYYY-MM-DD HH:MM:SS | |
| 75 | + Input could be "2018-05-09" or datetime/date. | |
| 76 | + """ | |
| 77 | + if dt_value is None or dt_value == "": | |
| 78 | + return "" | |
| 79 | + if isinstance(dt_value, datetime): | |
| 80 | + return dt_value.strftime("%Y-%m-%d %H:%M:%S") | |
| 81 | + s = clean_str(dt_value) | |
| 82 | + # common formats | |
| 83 | + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): | |
| 84 | + try: | |
| 85 | + d = datetime.strptime(s, fmt) | |
| 86 | + return d.strftime("%Y-%m-%d %H:%M:%S") | |
| 87 | + except Exception: | |
| 88 | + pass | |
| 89 | + return "" | |
| 90 | + | |
| 91 | + | |
| 92 | +def parse_weight(weight_conv, weight_raw): | |
| 93 | + """ | |
| 94 | + Return (weight_value, unit) where unit in {kg, lb, g, oz}. | |
| 95 | + Prefer '商品重量(单位换算)' like '68.04 g'. | |
| 96 | + Fallback to '商品重量' like '0.15 pounds'. | |
| 97 | + """ | |
| 98 | + s = clean_str(weight_conv) or clean_str(weight_raw) | |
| 99 | + if not s: | |
| 100 | + return ("", "") | |
| 101 | + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s) | |
| 102 | + if not m: | |
| 103 | + return ("", "") | |
| 104 | + val = float(m.group(1)) | |
| 105 | + unit = m.group(2).lower() | |
| 106 | + if unit in ("g", "gram", "grams"): | |
| 107 | + return (val, "g") | |
| 108 | + if unit in ("kg", "kilogram", "kilograms"): | |
| 109 | + return (val, "kg") | |
| 110 | + if unit in ("lb", "lbs", "pound", "pounds"): | |
| 111 | + return (val, "lb") | |
| 112 | + if unit in ("oz", "ounce", "ounces"): | |
| 113 | + return (val, "oz") | |
| 114 | + return ("", "") | |
| 115 | + | |
| 116 | + | |
| 117 | +def parse_dimensions_inches(dim_raw): | |
| 118 | + """ | |
| 119 | + Template '尺寸信息': 'L,W,H' in inches. | |
| 120 | + Input example: '7.9 x 7.9 x 2 inches' | |
| 121 | + """ | |
| 122 | + s = clean_str(dim_raw) | |
| 123 | + if not s: | |
| 124 | + return "" | |
| 125 | + # extract first 3 numbers in order | |
| 126 | + nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s) | |
| 127 | + if len(nums) < 3: | |
| 128 | + return "" | |
| 129 | + return "{},{},{}".format(nums[0], nums[1], nums[2]) | |
| 130 | + | |
| 131 | + | |
| 132 | +def parse_sku_options(sku_text): | |
| 133 | + """ | |
| 134 | + Parse 'SKU' column into {key: value}. | |
| 135 | + Example: | |
| 136 | + 'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'} | |
| 137 | + """ | |
| 138 | + s = clean_str(sku_text) | |
| 139 | + if not s: | |
| 140 | + return {} | |
| 141 | + parts = [p.strip() for p in s.split("|") if p.strip()] | |
| 142 | + out = {} | |
| 143 | + for p in parts: | |
| 144 | + if ":" not in p: | |
| 145 | + continue | |
| 146 | + k, v = p.split(":", 1) | |
| 147 | + k = clean_str(k) | |
| 148 | + v = clean_str(v) | |
| 149 | + if k and v: | |
| 150 | + out[k] = v | |
| 151 | + return out | |
| 152 | + | |
| 153 | + | |
| 154 | +def choose_option_keys(variant_dicts, max_keys=3): | |
| 155 | + """ | |
| 156 | + Choose up to 3 option keys for a product group. | |
| 157 | + Order by preference list first, then by frequency. | |
| 158 | + """ | |
| 159 | + freq = Counter() | |
| 160 | + for d in variant_dicts: | |
| 161 | + for k, v in d.items(): | |
| 162 | + if v: | |
| 163 | + freq[k] += 1 | |
| 164 | + if not freq: | |
| 165 | + return [] | |
| 166 | + | |
| 167 | + preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)} | |
| 168 | + | |
| 169 | + def key_sort(k): | |
| 170 | + return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower()) | |
| 171 | + | |
| 172 | + keys = sorted(freq.keys(), key=key_sort) | |
| 173 | + return keys[:max_keys] | |
| 174 | + | |
| 175 | + | |
| 176 | +def build_description_html(title, details, product_url): | |
| 177 | + parts = [] | |
| 178 | + if title: | |
| 179 | + parts.append("<p>{}</p>".format(html_escape(title))) | |
| 180 | + detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()] | |
| 181 | + if detail_items: | |
| 182 | + li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]]) | |
| 183 | + parts.append("<ul>{}</ul>".format(li)) | |
| 184 | + if product_url: | |
| 185 | + parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url))) | |
| 186 | + return "".join(parts) | |
| 187 | + | |
| 188 | + | |
| 189 | +def competitor_sheet(ws): | |
| 190 | + """ | |
| 191 | + Build (header->col_index) for competitor sheet. | |
| 192 | + Assumes header is row 1. | |
| 193 | + """ | |
| 194 | + headers = [] | |
| 195 | + for c in range(1, ws.max_column + 1): | |
| 196 | + v = ws.cell(1, c).value | |
| 197 | + headers.append(clean_str(v)) | |
| 198 | + idx = {h: i + 1 for i, h in enumerate(headers) if h} | |
| 199 | + return idx | |
| 200 | + | |
| 201 | + | |
| 202 | +def read_competitor_rows_from_file(xlsx_path, max_rows=None): | |
| 203 | + wb = load_workbook(xlsx_path, read_only=True, data_only=True) | |
| 204 | + # pick first non-Notes sheet | |
| 205 | + sheet_name = None | |
| 206 | + for name in wb.sheetnames: | |
| 207 | + if str(name).lower() == "notes": | |
| 208 | + continue | |
| 209 | + sheet_name = name | |
| 210 | + break | |
| 211 | + if sheet_name is None: | |
| 212 | + return [] | |
| 213 | + ws = wb[sheet_name] | |
| 214 | + idx = competitor_sheet(ws) | |
| 215 | + | |
| 216 | + required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)", | |
| 217 | + "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接", | |
| 218 | + "商品重量(单位换算)", "商品重量", "商品尺寸"] | |
| 219 | + for k in required: | |
| 220 | + if k not in idx: | |
| 221 | + raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name)) | |
| 222 | + | |
| 223 | + rows = [] | |
| 224 | + end_row = ws.max_row | |
| 225 | + if max_rows is not None: | |
| 226 | + end_row = min(end_row, 1 + int(max_rows)) | |
| 227 | + | |
| 228 | + for r in range(2, end_row + 1): | |
| 229 | + asin = clean_str(ws.cell(r, idx["ASIN"]).value) | |
| 230 | + if not asin: | |
| 231 | + continue | |
| 232 | + parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin | |
| 233 | + row = { | |
| 234 | + "ASIN": asin, | |
| 235 | + "父ASIN": parent, | |
| 236 | + "SKU": clean_str(ws.cell(r, idx["SKU"]).value), | |
| 237 | + "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value), | |
| 238 | + "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value), | |
| 239 | + "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value), | |
| 240 | + "价格($)": ws.cell(r, idx["价格($)"]).value, | |
| 241 | + "prime价格($)": ws.cell(r, idx["prime价格($)"]).value, | |
| 242 | + "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value), | |
| 243 | + "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value), | |
| 244 | + "大类目": clean_str(ws.cell(r, idx["大类目"]).value), | |
| 245 | + "小类目": clean_str(ws.cell(r, idx["小类目"]).value), | |
| 246 | + "品牌": clean_str(ws.cell(r, idx["品牌"]).value), | |
| 247 | + "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value), | |
| 248 | + "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value), | |
| 249 | + "商品重量(单位换算)": clean_str(ws.cell(r, idx["商品重量(单位换算)"]).value), | |
| 250 | + "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value), | |
| 251 | + "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value), | |
| 252 | + } | |
| 253 | + rows.append(row) | |
| 254 | + return rows | |
| 255 | + | |
| 256 | + | |
| 257 | +def to_price(v): | |
| 258 | + if v is None or v == "": | |
| 259 | + return None | |
| 260 | + try: | |
| 261 | + return float(v) | |
| 262 | + except Exception: | |
| 263 | + s = clean_str(v) | |
| 264 | + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s) | |
| 265 | + if not m: | |
| 266 | + return None | |
| 267 | + return float(m.group(1)) | |
| 268 | + | |
| 269 | + | |
| 270 | +def build_common_fields(base_row, spu_id): | |
| 271 | + title = base_row.get("商品标题") or "Product" | |
| 272 | + brand = base_row.get("品牌") or "" | |
| 273 | + big_cat = base_row.get("大类目") or "" | |
| 274 | + small_cat = base_row.get("小类目") or "" | |
| 275 | + cat_path = base_row.get("类目路径") or "" | |
| 276 | + | |
| 277 | + handle = generate_handle(title) | |
| 278 | + if handle and not handle.startswith("products/"): | |
| 279 | + handle = "products/{}".format(handle) | |
| 280 | + | |
| 281 | + seo_title = title | |
| 282 | + seo_desc_parts = [] | |
| 283 | + if brand: | |
| 284 | + seo_desc_parts.append(brand) | |
| 285 | + seo_desc_parts.append(title) | |
| 286 | + if big_cat: | |
| 287 | + seo_desc_parts.append(big_cat) | |
| 288 | + seo_description = " ".join([x for x in seo_desc_parts if x])[:5000] | |
| 289 | + | |
| 290 | + seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x]) | |
| 291 | + tags = ",".join([x for x in [brand, big_cat, small_cat] if x]) | |
| 292 | + | |
| 293 | + created_at = parse_date_to_template(base_row.get("上架时间")) | |
| 294 | + | |
| 295 | + description = build_description_html( | |
| 296 | + title=title, | |
| 297 | + details=base_row.get("详细参数"), | |
| 298 | + product_url=base_row.get("商品详情页链接"), | |
| 299 | + ) | |
| 300 | + | |
| 301 | + # default inventory settings (data source has no stock) | |
| 302 | + inventory_qty = 100 | |
| 303 | + | |
| 304 | + weight_val, weight_unit = parse_weight(base_row.get("商品重量(单位换算)"), base_row.get("商品重量")) | |
| 305 | + size_info = parse_dimensions_inches(base_row.get("商品尺寸")) | |
| 306 | + | |
| 307 | + album = big_cat or "" | |
| 308 | + if not album and cat_path: | |
| 309 | + album = cat_path.split(":")[0] | |
| 310 | + | |
| 311 | + common = { | |
| 312 | + "商品ID": "", | |
| 313 | + "创建时间": created_at, | |
| 314 | + "商品标题*": title[:255], | |
| 315 | + "商品副标题": "{} {}".format(brand, big_cat).strip()[:600], | |
| 316 | + "商品描述": description, | |
| 317 | + "SEO标题": seo_title[:5000], | |
| 318 | + "SEO描述": seo_description, | |
| 319 | + "SEO URL Handle": handle, | |
| 320 | + "SEO URL 重定向": "N", | |
| 321 | + "SEO关键词": seo_keywords[:5000], | |
| 322 | + "商品上架": "Y", | |
| 323 | + "需要物流": "Y", | |
| 324 | + "商品收税": "N", | |
| 325 | + "商品spu": spu_id[:100], | |
| 326 | + "启用虚拟销量": "N", | |
| 327 | + "虚拟销量值": "", | |
| 328 | + "跟踪库存": "Y", | |
| 329 | + "库存规则*": "1", | |
| 330 | + "专辑名称": album, | |
| 331 | + "标签": tags, | |
| 332 | + "供应商名称": "Amazon", | |
| 333 | + "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "", | |
| 334 | + "商品重量": weight_val if weight_val != "" else "", | |
| 335 | + "重量单位": weight_unit, | |
| 336 | + "商品库存": inventory_qty, | |
| 337 | + "尺寸信息": size_info, | |
| 338 | + "原产地国别": "", | |
| 339 | + "HS(协调制度)代码": "", | |
| 340 | + "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format( | |
| 341 | + base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "") | |
| 342 | + )[:500], | |
| 343 | + "款式备注": "", | |
| 344 | + } | |
| 345 | + return common | |
| 346 | + | |
| 347 | + | |
| 348 | +def build_s_row(base_row): | |
| 349 | + spu_id = base_row.get("父ASIN") or base_row.get("ASIN") | |
| 350 | + common = build_common_fields(base_row, spu_id=spu_id) | |
| 351 | + price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99 | |
| 352 | + image = base_row.get("商品主图") or "" | |
| 353 | + | |
| 354 | + row = {} | |
| 355 | + row.update(common) | |
| 356 | + row.update({ | |
| 357 | + "商品属性*": "S", | |
| 358 | + "款式1": "", | |
| 359 | + "款式2": "", | |
| 360 | + "款式3": "", | |
| 361 | + "商品售价*": price, | |
| 362 | + "商品原价": price, | |
| 363 | + "成本价": "", | |
| 364 | + "商品SKU": base_row.get("ASIN") or "", | |
| 365 | + "商品条形码": "", | |
| 366 | + "商品图片*": image, | |
| 367 | + "商品主图": image, | |
| 368 | + }) | |
| 369 | + return row | |
| 370 | + | |
| 371 | + | |
| 372 | +def build_m_p_rows(variant_rows): | |
| 373 | + """ | |
| 374 | + variant_rows: List[dict] with same 父ASIN. | |
| 375 | + """ | |
| 376 | + base = variant_rows[0] | |
| 377 | + spu_id = base.get("父ASIN") or base.get("ASIN") | |
| 378 | + common = build_common_fields(base, spu_id=spu_id) | |
| 379 | + | |
| 380 | + option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows] | |
| 381 | + option_keys = choose_option_keys(option_dicts, max_keys=3) | |
| 382 | + if not option_keys: | |
| 383 | + option_keys = ["Variant"] | |
| 384 | + | |
| 385 | + # M row | |
| 386 | + m = {} | |
| 387 | + m.update(common) | |
| 388 | + m.update({ | |
| 389 | + "商品属性*": "M", | |
| 390 | + "款式1": option_keys[0] if len(option_keys) > 0 else "", | |
| 391 | + "款式2": option_keys[1] if len(option_keys) > 1 else "", | |
| 392 | + "款式3": option_keys[2] if len(option_keys) > 2 else "", | |
| 393 | + "商品售价*": "", | |
| 394 | + "商品原价": "", | |
| 395 | + "成本价": "", | |
| 396 | + "商品SKU": "", | |
| 397 | + "商品条形码": "", | |
| 398 | + "商品图片*": base.get("商品主图") or "", | |
| 399 | + "商品主图": base.get("商品主图") or "", | |
| 400 | + }) | |
| 401 | + | |
| 402 | + # For M row, these SKU-level fields should be empty per template guidance | |
| 403 | + m["商品重量"] = "" | |
| 404 | + m["重量单位"] = "" | |
| 405 | + m["商品库存"] = "" | |
| 406 | + m["尺寸信息"] = "" | |
| 407 | + | |
| 408 | + rows = [m] | |
| 409 | + | |
| 410 | + # P rows | |
| 411 | + for v in variant_rows: | |
| 412 | + v_common = build_common_fields(v, spu_id=spu_id) | |
| 413 | + # wipe SPU-only fields for P row | |
| 414 | + v_common.update({ | |
| 415 | + "商品副标题": "", | |
| 416 | + "商品描述": "", | |
| 417 | + "SEO标题": "", | |
| 418 | + "SEO描述": "", | |
| 419 | + "SEO URL Handle": "", | |
| 420 | + "SEO URL 重定向": "", | |
| 421 | + "SEO关键词": "", | |
| 422 | + "专辑名称": "", | |
| 423 | + "标签": "", | |
| 424 | + "供应商名称": "", | |
| 425 | + "供应商URL": "", | |
| 426 | + "商品备注": "", | |
| 427 | + }) | |
| 428 | + | |
| 429 | + opt = parse_sku_options(v.get("SKU")) | |
| 430 | + if option_keys == ["Variant"]: | |
| 431 | + opt_vals = [v.get("ASIN")] | |
| 432 | + else: | |
| 433 | + opt_vals = [opt.get(k, "") for k in option_keys] | |
| 434 | + | |
| 435 | + price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99 | |
| 436 | + image = v.get("商品主图") or "" | |
| 437 | + | |
| 438 | + p = {} | |
| 439 | + p.update(v_common) | |
| 440 | + p.update({ | |
| 441 | + "商品属性*": "P", | |
| 442 | + "款式1": opt_vals[0] if len(opt_vals) > 0 else "", | |
| 443 | + "款式2": opt_vals[1] if len(opt_vals) > 1 else "", | |
| 444 | + "款式3": opt_vals[2] if len(opt_vals) > 2 else "", | |
| 445 | + "商品售价*": price, | |
| 446 | + "商品原价": price, | |
| 447 | + "成本价": "", | |
| 448 | + "商品SKU": v.get("ASIN") or "", | |
| 449 | + "商品条形码": "", | |
| 450 | + # P row supports one variant image; we use variant's main image | |
| 451 | + "商品图片*": image, | |
| 452 | + "商品主图": "", | |
| 453 | + }) | |
| 454 | + rows.append(p) | |
| 455 | + | |
| 456 | + return rows | |
| 457 | + | |
| 458 | + | |
| 459 | +def main(): | |
| 460 | + parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx") | |
| 461 | + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files") | |
| 462 | + parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx") | |
| 463 | + parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path") | |
| 464 | + parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") | |
| 465 | + parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)") | |
| 466 | + parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") | |
| 467 | + args = parser.parse_args() | |
| 468 | + | |
| 469 | + input_dir = args.input_dir | |
| 470 | + if not os.path.isdir(input_dir): | |
| 471 | + raise RuntimeError("input-dir not found: {}".format(input_dir)) | |
| 472 | + if not os.path.exists(args.template): | |
| 473 | + raise RuntimeError("template not found: {}".format(args.template)) | |
| 474 | + | |
| 475 | + files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".xlsx")] | |
| 476 | + files.sort() | |
| 477 | + if args.max_files is not None: | |
| 478 | + files = files[: int(args.max_files)] | |
| 479 | + | |
| 480 | + print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True) | |
| 481 | + | |
| 482 | + groups = defaultdict(list) # spu_id -> [variant rows] | |
| 483 | + seen_asin = set() | |
| 484 | + | |
| 485 | + for fp in files: | |
| 486 | + print(" - loading: {}".format(fp), flush=True) | |
| 487 | + try: | |
| 488 | + rows = read_competitor_rows_from_file(fp, max_rows=args.max_rows_per_file) | |
| 489 | + except Exception as e: | |
| 490 | + print("WARN: failed to read {}: {}".format(fp, e)) | |
| 491 | + continue | |
| 492 | + print(" loaded rows: {}".format(len(rows)), flush=True) | |
| 493 | + | |
| 494 | + for r in rows: | |
| 495 | + asin = r.get("ASIN") | |
| 496 | + if asin in seen_asin: | |
| 497 | + continue | |
| 498 | + seen_asin.add(asin) | |
| 499 | + spu_id = r.get("父ASIN") or asin | |
| 500 | + groups[spu_id].append(r) | |
| 501 | + | |
| 502 | + print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True) | |
| 503 | + | |
| 504 | + excel_rows = [] | |
| 505 | + spu_count = 0 | |
| 506 | + | |
| 507 | + for spu_id, variants in groups.items(): | |
| 508 | + if not variants: | |
| 509 | + continue | |
| 510 | + spu_count += 1 | |
| 511 | + if args.max_products is not None and spu_count > int(args.max_products): | |
| 512 | + break | |
| 513 | + if len(variants) == 1: | |
| 514 | + excel_rows.append(build_s_row(variants[0])) | |
| 515 | + else: | |
| 516 | + excel_rows.extend(build_m_p_rows(variants)) | |
| 517 | + | |
| 518 | + print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True) | |
| 519 | + create_excel_from_template(args.template, args.output, excel_rows) | |
| 520 | + | |
| 521 | + | |
| 522 | +if __name__ == "__main__": | |
| 523 | + main() | |
| 524 | + | |
| 525 | + | ... | ... |
scripts/csv_to_excel.py
| ... | ... | @@ -22,6 +22,10 @@ from openpyxl import load_workbook |
| 22 | 22 | from openpyxl.styles import Font, Alignment |
| 23 | 23 | from openpyxl.utils import get_column_letter |
| 24 | 24 | |
| 25 | +# Shared helpers (keeps template writing consistent across scripts) | |
| 26 | +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared | |
| 27 | +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared | |
| 28 | + | |
| 25 | 29 | # Add parent directory to path |
| 26 | 30 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 27 | 31 | |
| ... | ... | @@ -82,21 +86,8 @@ def generate_handle(title: str) -> str: |
| 82 | 86 | Returns: |
| 83 | 87 | URL-friendly handle (ASCII only) |
| 84 | 88 | """ |
| 85 | - # Convert to lowercase | |
| 86 | - handle = title.lower() | |
| 87 | - | |
| 88 | - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens | |
| 89 | - handle = re.sub(r'[^a-z0-9\s-]', '', handle) | |
| 90 | - | |
| 91 | - # Replace spaces and multiple hyphens with single hyphen | |
| 92 | - handle = re.sub(r'[-\s]+', '-', handle) | |
| 93 | - handle = handle.strip('-') | |
| 94 | - | |
| 95 | - # Limit length | |
| 96 | - if len(handle) > 255: | |
| 97 | - handle = handle[:255] | |
| 98 | - | |
| 99 | - return handle or 'product' | |
| 89 | + # Keep backward-compatible function name while delegating to shared helper. | |
| 90 | + return _generate_handle_shared(title) | |
| 100 | 91 | |
| 101 | 92 | |
| 102 | 93 | def read_csv_file(csv_file: str) -> list: |
| ... | ... | @@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li |
| 255 | 246 | output_file: Path to output Excel file |
| 256 | 247 | csv_data_list: List of parsed CSV data dictionaries |
| 257 | 248 | """ |
| 258 | - # Load template | |
| 259 | - wb = load_workbook(template_file) | |
| 260 | - ws = wb.active # Use the active sheet (Sheet4) | |
| 261 | - | |
| 262 | - # Find header row (row 2, index 1) | |
| 263 | - header_row_idx = 2 # Row 2 in Excel (1-based, but header is at index 1 in pandas) | |
| 264 | - | |
| 265 | - # Get column mapping from header row | |
| 266 | - column_mapping = {} | |
| 267 | - for col_idx in range(1, ws.max_column + 1): | |
| 268 | - cell_value = ws.cell(row=header_row_idx, column=col_idx).value | |
| 269 | - if cell_value: | |
| 270 | - column_mapping[cell_value] = col_idx | |
| 271 | - | |
| 272 | - # Start writing data from row 4 (after header and instructions) | |
| 273 | - data_start_row = 4 # Row 4 in Excel (1-based) | |
| 274 | - | |
| 275 | - # Clear existing data rows (from row 4 onwards, but keep header and instructions) | |
| 276 | - # Find the last row with data in the template | |
| 277 | - last_template_row = ws.max_row | |
| 278 | - if last_template_row >= data_start_row: | |
| 279 | - # Clear data rows (keep header and instruction rows) | |
| 280 | - for row in range(data_start_row, last_template_row + 1): | |
| 281 | - for col in range(1, ws.max_column + 1): | |
| 282 | - ws.cell(row=row, column=col).value = None | |
| 283 | - | |
| 284 | - # Convert CSV data to Excel rows | |
| 285 | - for row_idx, csv_data in enumerate(csv_data_list): | |
| 286 | - excel_row = csv_to_excel_row(csv_data) | |
| 287 | - excel_row_num = data_start_row + row_idx | |
| 288 | - | |
| 289 | - # Write each field to corresponding column | |
| 290 | - for field_name, col_idx in column_mapping.items(): | |
| 291 | - if field_name in excel_row: | |
| 292 | - cell = ws.cell(row=excel_row_num, column=col_idx) | |
| 293 | - value = excel_row[field_name] | |
| 294 | - cell.value = value | |
| 295 | - | |
| 296 | - # Set alignment for text fields | |
| 297 | - if isinstance(value, str): | |
| 298 | - cell.alignment = Alignment(vertical='top', wrap_text=True) | |
| 299 | - elif isinstance(value, (int, float)): | |
| 300 | - cell.alignment = Alignment(vertical='top') | |
| 301 | - | |
| 302 | - # Save workbook | |
| 303 | - wb.save(output_file) | |
| 249 | + excel_rows = [csv_to_excel_row(d) for d in csv_data_list] | |
| 250 | + _create_excel_from_template_shared(template_file, output_file, excel_rows) | |
| 304 | 251 | print(f"Excel file created: {output_file}") |
| 305 | 252 | print(f" - Total rows: {len(csv_data_list)}") |
| 306 | 253 | ... | ... |
scripts/csv_to_excel_multi_variant.py
| ... | ... | @@ -22,6 +22,10 @@ import itertools |
| 22 | 22 | from openpyxl import load_workbook |
| 23 | 23 | from openpyxl.styles import Alignment |
| 24 | 24 | |
| 25 | +# Shared helpers (keeps template writing consistent across scripts) | |
| 26 | +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared | |
| 27 | +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared | |
| 28 | + | |
| 25 | 29 | # Add parent directory to path |
| 26 | 30 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 27 | 31 | |
| ... | ... | @@ -90,21 +94,8 @@ def generate_handle(title: str) -> str: |
| 90 | 94 | Returns: |
| 91 | 95 | URL-friendly handle (ASCII only) |
| 92 | 96 | """ |
| 93 | - # Convert to lowercase | |
| 94 | - handle = title.lower() | |
| 95 | - | |
| 96 | - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens | |
| 97 | - handle = re.sub(r'[^a-z0-9\s-]', '', handle) | |
| 98 | - | |
| 99 | - # Replace spaces and multiple hyphens with single hyphen | |
| 100 | - handle = re.sub(r'[-\s]+', '-', handle) | |
| 101 | - handle = handle.strip('-') | |
| 102 | - | |
| 103 | - # Limit length | |
| 104 | - if len(handle) > 255: | |
| 105 | - handle = handle[:255] | |
| 106 | - | |
| 107 | - return handle or 'product' | |
| 97 | + # Keep backward-compatible function name while delegating to shared helper. | |
| 98 | + return _generate_handle_shared(title) | |
| 108 | 99 | |
| 109 | 100 | |
| 110 | 101 | def extract_material_from_title(title: str) -> str: |
| ... | ... | @@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows: |
| 478 | 469 | output_file: Path to output Excel file |
| 479 | 470 | excel_rows: List of dictionaries mapping Excel column names to values |
| 480 | 471 | """ |
| 481 | - # Load template | |
| 482 | - wb = load_workbook(template_file) | |
| 483 | - ws = wb.active # Use the active sheet (Sheet4) | |
| 484 | - | |
| 485 | - # Find header row (row 2) | |
| 486 | - header_row_idx = 2 | |
| 487 | - | |
| 488 | - # Get column mapping from header row | |
| 489 | - column_mapping = {} | |
| 490 | - for col_idx in range(1, ws.max_column + 1): | |
| 491 | - cell_value = ws.cell(row=header_row_idx, column=col_idx).value | |
| 492 | - if cell_value: | |
| 493 | - column_mapping[cell_value] = col_idx | |
| 494 | - | |
| 495 | - # Start writing data from row 4 | |
| 496 | - data_start_row = 4 | |
| 497 | - | |
| 498 | - # Clear existing data rows | |
| 499 | - last_template_row = ws.max_row | |
| 500 | - if last_template_row >= data_start_row: | |
| 501 | - for row in range(data_start_row, last_template_row + 1): | |
| 502 | - for col in range(1, ws.max_column + 1): | |
| 503 | - ws.cell(row=row, column=col).value = None | |
| 504 | - | |
| 505 | - # Write data rows | |
| 506 | - for row_idx, excel_row in enumerate(excel_rows): | |
| 507 | - excel_row_num = data_start_row + row_idx | |
| 508 | - | |
| 509 | - # Write each field to corresponding column | |
| 510 | - for field_name, col_idx in column_mapping.items(): | |
| 511 | - if field_name in excel_row: | |
| 512 | - cell = ws.cell(row=excel_row_num, column=col_idx) | |
| 513 | - value = excel_row[field_name] | |
| 514 | - cell.value = value | |
| 515 | - | |
| 516 | - # Set alignment | |
| 517 | - if isinstance(value, str): | |
| 518 | - cell.alignment = Alignment(vertical='top', wrap_text=True) | |
| 519 | - elif isinstance(value, (int, float)): | |
| 520 | - cell.alignment = Alignment(vertical='top') | |
| 521 | - | |
| 522 | - # Save workbook | |
| 523 | - wb.save(output_file) | |
| 472 | + _create_excel_from_template_shared(template_file, output_file, excel_rows) | |
| 524 | 473 | print(f"Excel file created: {output_file}") |
| 525 | 474 | print(f" - Total rows: {len(excel_rows)}") |
| 526 | 475 | ... | ... |
| ... | ... | @@ -0,0 +1,67 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Shared utilities for generating Shoplazza (店匠) product import Excel files | |
| 4 | +based on the provided template `docs/商品导入模板.xlsx`. | |
| 5 | + | |
| 6 | +We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts. | |
| 7 | +""" | |
| 8 | + | |
| 9 | +from openpyxl import load_workbook | |
| 10 | +from openpyxl.styles import Alignment | |
| 11 | + | |
| 12 | + | |
| 13 | +def load_template_column_mapping(ws, header_row_idx=2): | |
| 14 | + """ | |
| 15 | + Read the header row in the template sheet and build a mapping: | |
| 16 | + header_name -> column_index (1-based). | |
| 17 | + """ | |
| 18 | + column_mapping = {} | |
| 19 | + for col_idx in range(1, ws.max_column + 1): | |
| 20 | + cell_value = ws.cell(row=header_row_idx, column=col_idx).value | |
| 21 | + if cell_value: | |
| 22 | + column_mapping[str(cell_value).strip()] = col_idx | |
| 23 | + return column_mapping | |
| 24 | + | |
| 25 | + | |
| 26 | +def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4): | |
| 27 | + """ | |
| 28 | + Create an Excel file from the Shoplazza template and fill with data rows. | |
| 29 | + | |
| 30 | + Args: | |
| 31 | + template_file: Path to Excel template file | |
| 32 | + output_file: Path to output Excel file | |
| 33 | + excel_rows: List[Dict[str, Any]] mapping template header -> value | |
| 34 | + header_row_idx: Header row index in template (default 2) | |
| 35 | + data_start_row: Data start row index in template (default 4) | |
| 36 | + """ | |
| 37 | + wb = load_workbook(template_file) | |
| 38 | + ws = wb.active | |
| 39 | + | |
| 40 | + column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx) | |
| 41 | + | |
| 42 | + # Clear existing data rows | |
| 43 | + last_template_row = ws.max_row | |
| 44 | + if last_template_row >= data_start_row: | |
| 45 | + for row in range(data_start_row, last_template_row + 1): | |
| 46 | + for col in range(1, ws.max_column + 1): | |
| 47 | + ws.cell(row=row, column=col).value = None | |
| 48 | + | |
| 49 | + # Write data rows | |
| 50 | + for row_idx, excel_row in enumerate(excel_rows): | |
| 51 | + excel_row_num = data_start_row + row_idx | |
| 52 | + for field_name, col_idx in column_mapping.items(): | |
| 53 | + if field_name not in excel_row: | |
| 54 | + continue | |
| 55 | + cell = ws.cell(row=excel_row_num, column=col_idx) | |
| 56 | + value = excel_row[field_name] | |
| 57 | + cell.value = value | |
| 58 | + if isinstance(value, str): | |
| 59 | + cell.alignment = Alignment(vertical='top', wrap_text=True) | |
| 60 | + else: | |
| 61 | + cell.alignment = Alignment(vertical='top') | |
| 62 | + | |
| 63 | + wb.save(output_file) | |
| 64 | + print("Excel file created: {}".format(output_file)) | |
| 65 | + print(" - Total rows: {}".format(len(excel_rows))) | |
| 66 | + | |
| 67 | + | ... | ... |
| ... | ... | @@ -0,0 +1,112 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +Shared helpers for generating Shoplazza product import Excel files from the | |
| 4 | +official template `docs/商品导入模板.xlsx`. | |
| 5 | + | |
| 6 | +We keep this module small and dependency-light (openpyxl only) so other scripts | |
| 7 | +can reuse the same template-writing behavior (header row mapping, data start | |
| 8 | +row, alignment). | |
| 9 | +""" | |
| 10 | + | |
| 11 | +import re | |
| 12 | +from datetime import datetime | |
| 13 | +from typing import Dict, Iterable, List, Optional | |
| 14 | + | |
| 15 | +from openpyxl import load_workbook | |
| 16 | +from openpyxl.styles import Alignment | |
| 17 | + | |
| 18 | + | |
| 19 | +def generate_handle(title: str) -> str: | |
| 20 | + """ | |
| 21 | + Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza | |
| 22 | + `SEO URL Handle` field. Caller may prepend `products/`. | |
| 23 | + """ | |
| 24 | + if not title: | |
| 25 | + return "product" | |
| 26 | + | |
| 27 | + handle = str(title).lower() | |
| 28 | + handle = re.sub(r"[^a-z0-9\s-]", "", handle) | |
| 29 | + handle = re.sub(r"[-\s]+", "-", handle).strip("-") | |
| 30 | + | |
| 31 | + if len(handle) > 255: | |
| 32 | + handle = handle[:255] | |
| 33 | + | |
| 34 | + return handle or "product" | |
| 35 | + | |
| 36 | + | |
| 37 | +def parse_date_to_datetime_str(value) -> str: | |
| 38 | + """ | |
| 39 | + Parse common date strings into Shoplazza template datetime string: | |
| 40 | + `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string. | |
| 41 | + """ | |
| 42 | + if value is None: | |
| 43 | + return "" | |
| 44 | + | |
| 45 | + if isinstance(value, datetime): | |
| 46 | + return value.strftime("%Y-%m-%d %H:%M:%S") | |
| 47 | + | |
| 48 | + s = str(value).strip() | |
| 49 | + if not s: | |
| 50 | + return "" | |
| 51 | + | |
| 52 | + # Most competitor sheets use YYYY-MM-DD | |
| 53 | + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): | |
| 54 | + try: | |
| 55 | + dt = datetime.strptime(s, fmt) | |
| 56 | + if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): | |
| 57 | + dt = dt.replace(hour=0, minute=0, second=0) | |
| 58 | + return dt.strftime("%Y-%m-%d %H:%M:%S") | |
| 59 | + except Exception: | |
| 60 | + pass | |
| 61 | + | |
| 62 | + return "" | |
| 63 | + | |
| 64 | + | |
| 65 | +def create_excel_from_template( | |
| 66 | + template_file: str, | |
| 67 | + output_file: str, | |
| 68 | + excel_rows: List[Dict[str, object]], | |
| 69 | + *, | |
| 70 | + header_row_idx: int = 2, | |
| 71 | + data_start_row: int = 4, | |
| 72 | + sheet_name: Optional[str] = None, | |
| 73 | +) -> None: | |
| 74 | + """ | |
| 75 | + Create an Excel file from Shoplazza import template and fill rows. | |
| 76 | + | |
| 77 | + - Header row is expected at row 2 (1-based) in the official template. | |
| 78 | + - Data starts at row 4 (1-based), after the instruction row(s). | |
| 79 | + """ | |
| 80 | + wb = load_workbook(template_file) | |
| 81 | + ws = wb[sheet_name] if sheet_name else wb.active | |
| 82 | + | |
| 83 | + column_mapping: Dict[str, int] = {} | |
| 84 | + for col_idx in range(1, ws.max_column + 1): | |
| 85 | + cell_value = ws.cell(row=header_row_idx, column=col_idx).value | |
| 86 | + if cell_value: | |
| 87 | + column_mapping[str(cell_value).strip()] = col_idx | |
| 88 | + | |
| 89 | + # Clear existing data rows | |
| 90 | + last_template_row = ws.max_row | |
| 91 | + if last_template_row >= data_start_row: | |
| 92 | + for row in range(data_start_row, last_template_row + 1): | |
| 93 | + for col in range(1, ws.max_column + 1): | |
| 94 | + ws.cell(row=row, column=col).value = None | |
| 95 | + | |
| 96 | + # Write data rows | |
| 97 | + for row_idx, excel_row in enumerate(excel_rows): | |
| 98 | + excel_row_num = data_start_row + row_idx | |
| 99 | + for field_name, col_idx in column_mapping.items(): | |
| 100 | + if field_name not in excel_row: | |
| 101 | + continue | |
| 102 | + value = excel_row[field_name] | |
| 103 | + cell = ws.cell(row=excel_row_num, column=col_idx) | |
| 104 | + cell.value = value | |
| 105 | + if isinstance(value, str): | |
| 106 | + cell.alignment = Alignment(vertical="top", wrap_text=True) | |
| 107 | + elif isinstance(value, (int, float)): | |
| 108 | + cell.alignment = Alignment(vertical="top") | |
| 109 | + | |
| 110 | + wb.save(output_file) | |
| 111 | + | |
| 112 | + | ... | ... |