shoplazza_excel_template.py 4.72 KB
#!/usr/bin/env python3
"""
Shared utilities for generating Shoplazza (店匠) product import Excel files
based on the provided template `docs/商品导入模板.xlsx`.

We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
"""

from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Alignment


def load_template_column_mapping(ws, header_row_idx=2):
    """
    Read the header row in the template sheet and build a mapping:
    header_name -> column_index (1-based).
    """
    column_mapping = {}
    for col_idx in range(1, ws.max_column + 1):
        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
        if cell_value:
            column_mapping[str(cell_value).strip()] = col_idx
    return column_mapping


def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
    """
    Create an Excel file from the Shoplazza template and fill with data rows.

    Args:
        template_file: Path to Excel template file
        output_file: Path to output Excel file
        excel_rows: List[Dict[str, Any]] mapping template header -> value
        header_row_idx: Header row index in template (default 2)
        data_start_row: Data start row index in template (default 4)
    """
    wb = load_workbook(template_file)
    ws = wb.active

    column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)

    # Clear existing data rows
    last_template_row = ws.max_row
    if last_template_row >= data_start_row:
        for row in range(data_start_row, last_template_row + 1):
            for col in range(1, ws.max_column + 1):
                ws.cell(row=row, column=col).value = None

    # Write data rows (OPT: only write fields that actually exist in excel_row)
    # This avoids looping over all 42 template columns for every output row.
    for row_idx, excel_row in enumerate(excel_rows):
        excel_row_num = data_start_row + row_idx
        for field_name, value in excel_row.items():
            col_idx = column_mapping.get(field_name)
            if not col_idx:
                continue
            cell = ws.cell(row=excel_row_num, column=col_idx)
            cell.value = value
            if isinstance(value, str):
                cell.alignment = Alignment(vertical='top', wrap_text=True)
            else:
                cell.alignment = Alignment(vertical='top')

    wb.save(output_file)
    print("Excel file created: {}".format(output_file))
    print("  - Total rows: {}".format(len(excel_rows)))


def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
    """
    Faster writer for large datasets.

    Instead of opening the template workbook in write mode and assigning cells one by one,
    we:
    - read the template's first (data_start_row-1) rows as values
    - build a header->index mapping from header_row_idx
    - create a new write_only workbook and append rows

    This is much faster for tens/hundreds of thousands of cells.
    """
    tpl_wb = load_workbook(template_file, read_only=True, data_only=True)
    tpl_ws = tpl_wb.active

    max_col = tpl_ws.max_column

    # Copy template "instruction" rows (typically rows 1-3) into output
    prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True))

    header_values = None
    if 1 <= header_row_idx <= len(prefix_rows):
        header_values = prefix_rows[header_row_idx - 1]
    else:
        # Fallback: read header row directly
        header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True))

    header_values = list(header_values)[:max_col]
    col_map = {}
    for i, v in enumerate(header_values):
        if v is None:
            continue
        col_map[str(v).strip()] = i  # 0-based

    wb = Workbook(write_only=True)
    ws = wb.create_sheet(title=tpl_ws.title)
    # remove default sheet if present (openpyxl may create one)
    if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws:
        try:
            wb.remove(wb["Sheet"])
        except Exception:
            pass

    # Write prefix rows, normalized to max_col
    for r in prefix_rows:
        r = list(r)[:max_col]
        if len(r) < max_col:
            r = r + [None] * (max_col - len(r))
        ws.append(r)

    # Write data rows
    for excel_row in excel_rows:
        row_vals = [None] * max_col
        for field_name, value in excel_row.items():
            if field_name not in col_map:
                continue
            row_vals[col_map[field_name]] = value
        ws.append(row_vals)

    wb.save(output_file)
    print("Excel file created (fast): {}".format(output_file))
    print("  - Total rows: {}".format(len(excel_rows)))