亚马逊格式数据导入店匠

tangwang
1 parent 19d2d90f
Showing 53 changed files with 723 additions and 122 deletions Show diff stats
README.md
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx
data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx
@@ -14,11 +14,12 @@ source .env
  
 ## 测试pipeline
  
-fake数据 生成商品导入数据 提交到店匠的店铺：
+1. fake数据 生成商品导入数据 提交到店匠的店铺：
 cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output  with_colors.xlsx 
  
-自动同步到mysql
-mysql到ES：
+2. 后端：自动同步到mysql
+
+3. mysql到ES：
  
 python scripts/recreate_and_import.py \
     --tenant-id 162 \
@@ -0,0 +1,525 @@
+#!/usr/bin/env python3
+"""
+Convert competitor Excel exports (with Parent/Child ASIN structure) into
+Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`.
+
+Data source:
+- Directory with multiple `Competitor-*.xlsx` files.
+- Each file contains a main sheet + "Notes" sheet.
+- Column meanings (sample):
+  - ASIN: variant id (sku_id)
+  - 父ASIN: product id (spu_id)
+
+Output:
+- For each 父ASIN group:
+  - If only 1 ASIN: generate one "S" row
+  - Else: generate one "M" row + multiple "P" rows
+
+Important:
+- Variant dimensions are parsed primarily from the `SKU` column:
+    "Size: One Size | Color: Black"
+  and mapped into 款式1/2/3.
+"""
+
+import os
+import re
+import sys
+import argparse
+from datetime import datetime
+from collections import defaultdict, Counter
+from pathlib import Path
+
+from openpyxl import load_workbook
+
+# Allow running as `python scripts/xxx.py` without installing as a package
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from shoplazza_excel_template import create_excel_from_template
+
+
+PREFERRED_OPTION_KEYS = [
+    "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent",
+    "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length",
+    "Width", "Height", "Model", "Configuration",
+]
+
+
+def clean_str(v):
+    if v is None:
+        return ""
+    return str(v).strip()
+
+
+def html_escape(s):
+    s = clean_str(s)
+    return (s.replace("&", "&amp;")
+             .replace("<", "&lt;")
+             .replace(">", "&gt;"))
+
+
+def generate_handle(title):
+    """
+    Generate URL-friendly handle from title (ASCII only).
+    Keep consistent with existing scripts.
+    """
+    handle = clean_str(title).lower()
+    handle = re.sub(r"[^a-z0-9\\s-]", "", handle)
+    handle = re.sub(r"[-\\s]+", "-", handle).strip("-")
+    if len(handle) > 255:
+        handle = handle[:255]
+    return handle or "product"
+
+
+def parse_date_to_template(dt_value):
+    """
+    Template expects: YYYY-MM-DD HH:MM:SS
+    Input could be "2018-05-09" or datetime/date.
+    """
+    if dt_value is None or dt_value == "":
+        return ""
+    if isinstance(dt_value, datetime):
+        return dt_value.strftime("%Y-%m-%d %H:%M:%S")
+    s = clean_str(dt_value)
+    # common formats
+    for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
+        try:
+            d = datetime.strptime(s, fmt)
+            return d.strftime("%Y-%m-%d %H:%M:%S")
+        except Exception:
+            pass
+    return ""
+
+
+def parse_weight(weight_conv, weight_raw):
+    """
+    Return (weight_value, unit) where unit in {kg, lb, g, oz}.
+    Prefer '商品重量（单位换算）' like '68.04 g'.
+    Fallback to '商品重量' like '0.15 pounds'.
+    """
+    s = clean_str(weight_conv) or clean_str(weight_raw)
+    if not s:
+        return ("", "")
+    m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s)
+    if not m:
+        return ("", "")
+    val = float(m.group(1))
+    unit = m.group(2).lower()
+    if unit in ("g", "gram", "grams"):
+        return (val, "g")
+    if unit in ("kg", "kilogram", "kilograms"):
+        return (val, "kg")
+    if unit in ("lb", "lbs", "pound", "pounds"):
+        return (val, "lb")
+    if unit in ("oz", "ounce", "ounces"):
+        return (val, "oz")
+    return ("", "")
+
+
+def parse_dimensions_inches(dim_raw):
+    """
+    Template '尺寸信息': 'L,W,H' in inches.
+    Input example: '7.9 x 7.9 x 2 inches'
+    """
+    s = clean_str(dim_raw)
+    if not s:
+        return ""
+    # extract first 3 numbers in order
+    nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s)
+    if len(nums) < 3:
+        return ""
+    return "{},{},{}".format(nums[0], nums[1], nums[2])
+
+
+def parse_sku_options(sku_text):
+    """
+    Parse 'SKU' column into {key: value}.
+    Example:
+      'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'}
+    """
+    s = clean_str(sku_text)
+    if not s:
+        return {}
+    parts = [p.strip() for p in s.split("|") if p.strip()]
+    out = {}
+    for p in parts:
+        if ":" not in p:
+            continue
+        k, v = p.split(":", 1)
+        k = clean_str(k)
+        v = clean_str(v)
+        if k and v:
+            out[k] = v
+    return out
+
+
+def choose_option_keys(variant_dicts, max_keys=3):
+    """
+    Choose up to 3 option keys for a product group.
+    Order by preference list first, then by frequency.
+    """
+    freq = Counter()
+    for d in variant_dicts:
+        for k, v in d.items():
+            if v:
+                freq[k] += 1
+    if not freq:
+        return []
+
+    preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)}
+
+    def key_sort(k):
+        return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower())
+
+    keys = sorted(freq.keys(), key=key_sort)
+    return keys[:max_keys]
+
+
+def build_description_html(title, details, product_url):
+    parts = []
+    if title:
+        parts.append("<p>{}</p>".format(html_escape(title)))
+    detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()]
+    if detail_items:
+        li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]])
+        parts.append("<ul>{}</ul>".format(li))
+    if product_url:
+        parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url)))
+    return "".join(parts)
+
+
+def competitor_sheet(ws):
+    """
+    Build (header->col_index) for competitor sheet.
+    Assumes header is row 1.
+    """
+    headers = []
+    for c in range(1, ws.max_column + 1):
+        v = ws.cell(1, c).value
+        headers.append(clean_str(v))
+    idx = {h: i + 1 for i, h in enumerate(headers) if h}
+    return idx
+
+
+def read_competitor_rows_from_file(xlsx_path, max_rows=None):
+    wb = load_workbook(xlsx_path, read_only=True, data_only=True)
+    # pick first non-Notes sheet
+    sheet_name = None
+    for name in wb.sheetnames:
+        if str(name).lower() == "notes":
+            continue
+        sheet_name = name
+        break
+    if sheet_name is None:
+        return []
+    ws = wb[sheet_name]
+    idx = competitor_sheet(ws)
+
+    required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)",
+                "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接",
+                "商品重量（单位换算）", "商品重量", "商品尺寸"]
+    for k in required:
+        if k not in idx:
+            raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name))
+
+    rows = []
+    end_row = ws.max_row
+    if max_rows is not None:
+        end_row = min(end_row, 1 + int(max_rows))
+
+    for r in range(2, end_row + 1):
+        asin = clean_str(ws.cell(r, idx["ASIN"]).value)
+        if not asin:
+            continue
+        parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin
+        row = {
+            "ASIN": asin,
+            "父ASIN": parent,
+            "SKU": clean_str(ws.cell(r, idx["SKU"]).value),
+            "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value),
+            "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value),
+            "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value),
+            "价格($)": ws.cell(r, idx["价格($)"]).value,
+            "prime价格($)": ws.cell(r, idx["prime价格($)"]).value,
+            "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value),
+            "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value),
+            "大类目": clean_str(ws.cell(r, idx["大类目"]).value),
+            "小类目": clean_str(ws.cell(r, idx["小类目"]).value),
+            "品牌": clean_str(ws.cell(r, idx["品牌"]).value),
+            "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value),
+            "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value),
+            "商品重量（单位换算）": clean_str(ws.cell(r, idx["商品重量（单位换算）"]).value),
+            "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value),
+            "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value),
+        }
+        rows.append(row)
+    return rows
+
+
+def to_price(v):
+    if v is None or v == "":
+        return None
+    try:
+        return float(v)
+    except Exception:
+        s = clean_str(v)
+        m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s)
+        if not m:
+            return None
+        return float(m.group(1))
+
+
+def build_common_fields(base_row, spu_id):
+    title = base_row.get("商品标题") or "Product"
+    brand = base_row.get("品牌") or ""
+    big_cat = base_row.get("大类目") or ""
+    small_cat = base_row.get("小类目") or ""
+    cat_path = base_row.get("类目路径") or ""
+
+    handle = generate_handle(title)
+    if handle and not handle.startswith("products/"):
+        handle = "products/{}".format(handle)
+
+    seo_title = title
+    seo_desc_parts = []
+    if brand:
+        seo_desc_parts.append(brand)
+    seo_desc_parts.append(title)
+    if big_cat:
+        seo_desc_parts.append(big_cat)
+    seo_description = " ".join([x for x in seo_desc_parts if x])[:5000]
+
+    seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x])
+    tags = ",".join([x for x in [brand, big_cat, small_cat] if x])
+
+    created_at = parse_date_to_template(base_row.get("上架时间"))
+
+    description = build_description_html(
+        title=title,
+        details=base_row.get("详细参数"),
+        product_url=base_row.get("商品详情页链接"),
+    )
+
+    # default inventory settings (data source has no stock)
+    inventory_qty = 100
+
+    weight_val, weight_unit = parse_weight(base_row.get("商品重量（单位换算）"), base_row.get("商品重量"))
+    size_info = parse_dimensions_inches(base_row.get("商品尺寸"))
+
+    album = big_cat or ""
+    if not album and cat_path:
+        album = cat_path.split(":")[0]
+
+    common = {
+        "商品ID": "",
+        "创建时间": created_at,
+        "商品标题*": title[:255],
+        "商品副标题": "{} {}".format(brand, big_cat).strip()[:600],
+        "商品描述": description,
+        "SEO标题": seo_title[:5000],
+        "SEO描述": seo_description,
+        "SEO URL Handle": handle,
+        "SEO URL 重定向": "N",
+        "SEO关键词": seo_keywords[:5000],
+        "商品上架": "Y",
+        "需要物流": "Y",
+        "商品收税": "N",
+        "商品spu": spu_id[:100],
+        "启用虚拟销量": "N",
+        "虚拟销量值": "",
+        "跟踪库存": "Y",
+        "库存规则*": "1",
+        "专辑名称": album,
+        "标签": tags,
+        "供应商名称": "Amazon",
+        "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "",
+        "商品重量": weight_val if weight_val != "" else "",
+        "重量单位": weight_unit,
+        "商品库存": inventory_qty,
+        "尺寸信息": size_info,
+        "原产地国别": "",
+        "HS（协调制度）代码": "",
+        "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format(
+            base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "")
+        )[:500],
+        "款式备注": "",
+    }
+    return common
+
+
+def build_s_row(base_row):
+    spu_id = base_row.get("父ASIN") or base_row.get("ASIN")
+    common = build_common_fields(base_row, spu_id=spu_id)
+    price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99
+    image = base_row.get("商品主图") or ""
+
+    row = {}
+    row.update(common)
+    row.update({
+        "商品属性*": "S",
+        "款式1": "",
+        "款式2": "",
+        "款式3": "",
+        "商品售价*": price,
+        "商品原价": price,
+        "成本价": "",
+        "商品SKU": base_row.get("ASIN") or "",
+        "商品条形码": "",
+        "商品图片*": image,
+        "商品主图": image,
+    })
+    return row
+
+
+def build_m_p_rows(variant_rows):
+    """
+    variant_rows: List[dict] with same 父ASIN.
+    """
+    base = variant_rows[0]
+    spu_id = base.get("父ASIN") or base.get("ASIN")
+    common = build_common_fields(base, spu_id=spu_id)
+
+    option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows]
+    option_keys = choose_option_keys(option_dicts, max_keys=3)
+    if not option_keys:
+        option_keys = ["Variant"]
+
+    # M row
+    m = {}
+    m.update(common)
+    m.update({
+        "商品属性*": "M",
+        "款式1": option_keys[0] if len(option_keys) > 0 else "",
+        "款式2": option_keys[1] if len(option_keys) > 1 else "",
+        "款式3": option_keys[2] if len(option_keys) > 2 else "",
+        "商品售价*": "",
+        "商品原价": "",
+        "成本价": "",
+        "商品SKU": "",
+        "商品条形码": "",
+        "商品图片*": base.get("商品主图") or "",
+        "商品主图": base.get("商品主图") or "",
+    })
+
+    # For M row, these SKU-level fields should be empty per template guidance
+    m["商品重量"] = ""
+    m["重量单位"] = ""
+    m["商品库存"] = ""
+    m["尺寸信息"] = ""
+
+    rows = [m]
+
+    # P rows
+    for v in variant_rows:
+        v_common = build_common_fields(v, spu_id=spu_id)
+        # wipe SPU-only fields for P row
+        v_common.update({
+            "商品副标题": "",
+            "商品描述": "",
+            "SEO标题": "",
+            "SEO描述": "",
+            "SEO URL Handle": "",
+            "SEO URL 重定向": "",
+            "SEO关键词": "",
+            "专辑名称": "",
+            "标签": "",
+            "供应商名称": "",
+            "供应商URL": "",
+            "商品备注": "",
+        })
+
+        opt = parse_sku_options(v.get("SKU"))
+        if option_keys == ["Variant"]:
+            opt_vals = [v.get("ASIN")]
+        else:
+            opt_vals = [opt.get(k, "") for k in option_keys]
+
+        price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99
+        image = v.get("商品主图") or ""
+
+        p = {}
+        p.update(v_common)
+        p.update({
+            "商品属性*": "P",
+            "款式1": opt_vals[0] if len(opt_vals) > 0 else "",
+            "款式2": opt_vals[1] if len(opt_vals) > 1 else "",
+            "款式3": opt_vals[2] if len(opt_vals) > 2 else "",
+            "商品售价*": price,
+            "商品原价": price,
+            "成本价": "",
+            "商品SKU": v.get("ASIN") or "",
+            "商品条形码": "",
+            # P row supports one variant image; we use variant's main image
+            "商品图片*": image,
+            "商品主图": "",
+        })
+        rows.append(p)
+
+    return rows
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx")
+    parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files")
+    parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx")
+    parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path")
+    parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
+    parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)")
+    parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
+    args = parser.parse_args()
+
+    input_dir = args.input_dir
+    if not os.path.isdir(input_dir):
+        raise RuntimeError("input-dir not found: {}".format(input_dir))
+    if not os.path.exists(args.template):
+        raise RuntimeError("template not found: {}".format(args.template))
+
+    files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".xlsx")]
+    files.sort()
+    if args.max_files is not None:
+        files = files[: int(args.max_files)]
+
+    print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True)
+
+    groups = defaultdict(list)  # spu_id -> [variant rows]
+    seen_asin = set()
+
+    for fp in files:
+        print("  - loading: {}".format(fp), flush=True)
+        try:
+            rows = read_competitor_rows_from_file(fp, max_rows=args.max_rows_per_file)
+        except Exception as e:
+            print("WARN: failed to read {}: {}".format(fp, e))
+            continue
+        print("    loaded rows: {}".format(len(rows)), flush=True)
+
+        for r in rows:
+            asin = r.get("ASIN")
+            if asin in seen_asin:
+                continue
+            seen_asin.add(asin)
+            spu_id = r.get("父ASIN") or asin
+            groups[spu_id].append(r)
+
+    print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True)
+
+    excel_rows = []
+    spu_count = 0
+
+    for spu_id, variants in groups.items():
+        if not variants:
+            continue
+        spu_count += 1
+        if args.max_products is not None and spu_count > int(args.max_products):
+            break
+        if len(variants) == 1:
+            excel_rows.append(build_s_row(variants[0]))
+        else:
+            excel_rows.extend(build_m_p_rows(variants))
+
+    print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True)
+    create_excel_from_template(args.template, args.output, excel_rows)
+
+
+if __name__ == "__main__":
+    main()
+
+
@@ -22,6 +22,10 @@ from openpyxl import load_workbook
 from openpyxl.styles import Font, Alignment
 from openpyxl.utils import get_column_letter
  
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
+
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
  
@@ -82,21 +86,8 @@ def generate_handle(title: str) -&gt; str:
     Returns:
         URL-friendly handle (ASCII only)
     """
-    # Convert to lowercase
-    handle = title.lower()
-    
-    # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens
-    handle = re.sub(r'[^a-z0-9\s-]', '', handle)
-    
-    # Replace spaces and multiple hyphens with single hyphen
-    handle = re.sub(r'[-\s]+', '-', handle)
-    handle = handle.strip('-')
-    
-    # Limit length
-    if len(handle) > 255:
-        handle = handle[:255]
-    
-    return handle or 'product'
+    # Keep backward-compatible function name while delegating to shared helper.
+    return _generate_handle_shared(title)
  
  
 def read_csv_file(csv_file: str) -> list:
@@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li
         output_file: Path to output Excel file
         csv_data_list: List of parsed CSV data dictionaries
     """
-    # Load template
-    wb = load_workbook(template_file)
-    ws = wb.active  # Use the active sheet (Sheet4)
-    
-    # Find header row (row 2, index 1)
-    header_row_idx = 2  # Row 2 in Excel (1-based, but header is at index 1 in pandas)
-    
-    # Get column mapping from header row
-    column_mapping = {}
-    for col_idx in range(1, ws.max_column + 1):
-        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
-        if cell_value:
-            column_mapping[cell_value] = col_idx
-    
-    # Start writing data from row 4 (after header and instructions)
-    data_start_row = 4  # Row 4 in Excel (1-based)
-    
-    # Clear existing data rows (from row 4 onwards, but keep header and instructions)
-    # Find the last row with data in the template
-    last_template_row = ws.max_row
-    if last_template_row >= data_start_row:
-        # Clear data rows (keep header and instruction rows)
-        for row in range(data_start_row, last_template_row + 1):
-            for col in range(1, ws.max_column + 1):
-                ws.cell(row=row, column=col).value = None
-    
-    # Convert CSV data to Excel rows
-    for row_idx, csv_data in enumerate(csv_data_list):
-        excel_row = csv_to_excel_row(csv_data)
-        excel_row_num = data_start_row + row_idx
-        
-        # Write each field to corresponding column
-        for field_name, col_idx in column_mapping.items():
-            if field_name in excel_row:
-                cell = ws.cell(row=excel_row_num, column=col_idx)
-                value = excel_row[field_name]
-                cell.value = value
-                
-                # Set alignment for text fields
-                if isinstance(value, str):
-                    cell.alignment = Alignment(vertical='top', wrap_text=True)
-                elif isinstance(value, (int, float)):
-                    cell.alignment = Alignment(vertical='top')
-    
-    # Save workbook
-    wb.save(output_file)
+    excel_rows = [csv_to_excel_row(d) for d in csv_data_list]
+    _create_excel_from_template_shared(template_file, output_file, excel_rows)
     print(f"Excel file created: {output_file}")
     print(f"  - Total rows: {len(csv_data_list)}")
  
@@ -22,6 +22,10 @@ import itertools
 from openpyxl import load_workbook
 from openpyxl.styles import Alignment
  
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
+
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
  
@@ -90,21 +94,8 @@ def generate_handle(title: str) -&gt; str:
     Returns:
         URL-friendly handle (ASCII only)
     """
-    # Convert to lowercase
-    handle = title.lower()
-    
-    # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens
-    handle = re.sub(r'[^a-z0-9\s-]', '', handle)
-    
-    # Replace spaces and multiple hyphens with single hyphen
-    handle = re.sub(r'[-\s]+', '-', handle)
-    handle = handle.strip('-')
-    
-    # Limit length
-    if len(handle) > 255:
-        handle = handle[:255]
-    
-    return handle or 'product'
+    # Keep backward-compatible function name while delegating to shared helper.
+    return _generate_handle_shared(title)
  
  
 def extract_material_from_title(title: str) -> str:
@@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows:
         output_file: Path to output Excel file
         excel_rows: List of dictionaries mapping Excel column names to values
     """
-    # Load template
-    wb = load_workbook(template_file)
-    ws = wb.active  # Use the active sheet (Sheet4)
-    
-    # Find header row (row 2)
-    header_row_idx = 2
-    
-    # Get column mapping from header row
-    column_mapping = {}
-    for col_idx in range(1, ws.max_column + 1):
-        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
-        if cell_value:
-            column_mapping[cell_value] = col_idx
-    
-    # Start writing data from row 4
-    data_start_row = 4
-    
-    # Clear existing data rows
-    last_template_row = ws.max_row
-    if last_template_row >= data_start_row:
-        for row in range(data_start_row, last_template_row + 1):
-            for col in range(1, ws.max_column + 1):
-                ws.cell(row=row, column=col).value = None
-    
-    # Write data rows
-    for row_idx, excel_row in enumerate(excel_rows):
-        excel_row_num = data_start_row + row_idx
-        
-        # Write each field to corresponding column
-        for field_name, col_idx in column_mapping.items():
-            if field_name in excel_row:
-                cell = ws.cell(row=excel_row_num, column=col_idx)
-                value = excel_row[field_name]
-                cell.value = value
-                
-                # Set alignment
-                if isinstance(value, str):
-                    cell.alignment = Alignment(vertical='top', wrap_text=True)
-                elif isinstance(value, (int, float)):
-                    cell.alignment = Alignment(vertical='top')
-    
-    # Save workbook
-    wb.save(output_file)
+    _create_excel_from_template_shared(template_file, output_file, excel_rows)
     print(f"Excel file created: {output_file}")
     print(f"  - Total rows: {len(excel_rows)}")
  
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Shared utilities for generating Shoplazza (店匠) product import Excel files
+based on the provided template `docs/商品导入模板.xlsx`.
+
+We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
+"""
+
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+
+
+def load_template_column_mapping(ws, header_row_idx=2):
+    """
+    Read the header row in the template sheet and build a mapping:
+    header_name -> column_index (1-based).
+    """
+    column_mapping = {}
+    for col_idx in range(1, ws.max_column + 1):
+        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
+        if cell_value:
+            column_mapping[str(cell_value).strip()] = col_idx
+    return column_mapping
+
+
+def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
+    """
+    Create an Excel file from the Shoplazza template and fill with data rows.
+
+    Args:
+        template_file: Path to Excel template file
+        output_file: Path to output Excel file
+        excel_rows: List[Dict[str, Any]] mapping template header -> value
+        header_row_idx: Header row index in template (default 2)
+        data_start_row: Data start row index in template (default 4)
+    """
+    wb = load_workbook(template_file)
+    ws = wb.active
+
+    column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
+
+    # Clear existing data rows
+    last_template_row = ws.max_row
+    if last_template_row >= data_start_row:
+        for row in range(data_start_row, last_template_row + 1):
+            for col in range(1, ws.max_column + 1):
+                ws.cell(row=row, column=col).value = None
+
+    # Write data rows
+    for row_idx, excel_row in enumerate(excel_rows):
+        excel_row_num = data_start_row + row_idx
+        for field_name, col_idx in column_mapping.items():
+            if field_name not in excel_row:
+                continue
+            cell = ws.cell(row=excel_row_num, column=col_idx)
+            value = excel_row[field_name]
+            cell.value = value
+            if isinstance(value, str):
+                cell.alignment = Alignment(vertical='top', wrap_text=True)
+            else:
+                cell.alignment = Alignment(vertical='top')
+
+    wb.save(output_file)
+    print("Excel file created: {}".format(output_file))
+    print("  - Total rows: {}".format(len(excel_rows)))
+
+
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Shared helpers for generating Shoplazza product import Excel files from the
+official template `docs/商品导入模板.xlsx`.
+
+We keep this module small and dependency-light (openpyxl only) so other scripts
+can reuse the same template-writing behavior (header row mapping, data start
+row, alignment).
+"""
+
+import re
+from datetime import datetime
+from typing import Dict, Iterable, List, Optional
+
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+
+
+def generate_handle(title: str) -> str:
+    """
+    Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza
+    `SEO URL Handle` field. Caller may prepend `products/`.
+    """
+    if not title:
+        return "product"
+
+    handle = str(title).lower()
+    handle = re.sub(r"[^a-z0-9\s-]", "", handle)
+    handle = re.sub(r"[-\s]+", "-", handle).strip("-")
+
+    if len(handle) > 255:
+        handle = handle[:255]
+
+    return handle or "product"
+
+
+def parse_date_to_datetime_str(value) -> str:
+    """
+    Parse common date strings into Shoplazza template datetime string:
+    `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string.
+    """
+    if value is None:
+        return ""
+
+    if isinstance(value, datetime):
+        return value.strftime("%Y-%m-%d %H:%M:%S")
+
+    s = str(value).strip()
+    if not s:
+        return ""
+
+    # Most competitor sheets use YYYY-MM-DD
+    for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
+        try:
+            dt = datetime.strptime(s, fmt)
+            if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
+                dt = dt.replace(hour=0, minute=0, second=0)
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+        except Exception:
+            pass
+
+    return ""
+
+
+def create_excel_from_template(
+    template_file: str,
+    output_file: str,
+    excel_rows: List[Dict[str, object]],
+    *,
+    header_row_idx: int = 2,
+    data_start_row: int = 4,
+    sheet_name: Optional[str] = None,
+) -> None:
+    """
+    Create an Excel file from Shoplazza import template and fill rows.
+
+    - Header row is expected at row 2 (1-based) in the official template.
+    - Data starts at row 4 (1-based), after the instruction row(s).
+    """
+    wb = load_workbook(template_file)
+    ws = wb[sheet_name] if sheet_name else wb.active
+
+    column_mapping: Dict[str, int] = {}
+    for col_idx in range(1, ws.max_column + 1):
+        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
+        if cell_value:
+            column_mapping[str(cell_value).strip()] = col_idx
+
+    # Clear existing data rows
+    last_template_row = ws.max_row
+    if last_template_row >= data_start_row:
+        for row in range(data_start_row, last_template_row + 1):
+            for col in range(1, ws.max_column + 1):
+                ws.cell(row=row, column=col).value = None
+
+    # Write data rows
+    for row_idx, excel_row in enumerate(excel_rows):
+        excel_row_num = data_start_row + row_idx
+        for field_name, col_idx in column_mapping.items():
+            if field_name not in excel_row:
+                continue
+            value = excel_row[field_name]
+            cell = ws.cell(row=excel_row_num, column=col_idx)
+            cell.value = value
+            if isinstance(value, str):
+                cell.alignment = Alignment(vertical="top", wrap_text=True)
+            elif isinstance(value, (int, float)):
+                cell.alignment = Alignment(vertical="top")
+
+    wb.save(output_file)
+
+