#!/usr/bin/env python3 """ Shared utilities for generating Shoplazza (店匠) product import Excel files based on the provided template `docs/商品导入模板.xlsx`. We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts. """ from openpyxl import Workbook from openpyxl import load_workbook from openpyxl.styles import Alignment def load_template_column_mapping(ws, header_row_idx=2): """ Read the header row in the template sheet and build a mapping: header_name -> column_index (1-based). """ column_mapping = {} for col_idx in range(1, ws.max_column + 1): cell_value = ws.cell(row=header_row_idx, column=col_idx).value if cell_value: column_mapping[str(cell_value).strip()] = col_idx return column_mapping def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4): """ Create an Excel file from the Shoplazza template and fill with data rows. Args: template_file: Path to Excel template file output_file: Path to output Excel file excel_rows: List[Dict[str, Any]] mapping template header -> value header_row_idx: Header row index in template (default 2) data_start_row: Data start row index in template (default 4) """ wb = load_workbook(template_file) ws = wb.active column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx) # Clear existing data rows last_template_row = ws.max_row if last_template_row >= data_start_row: for row in range(data_start_row, last_template_row + 1): for col in range(1, ws.max_column + 1): ws.cell(row=row, column=col).value = None # Write data rows (OPT: only write fields that actually exist in excel_row) # This avoids looping over all 42 template columns for every output row. for row_idx, excel_row in enumerate(excel_rows): excel_row_num = data_start_row + row_idx for field_name, value in excel_row.items(): col_idx = column_mapping.get(field_name) if not col_idx: continue cell = ws.cell(row=excel_row_num, column=col_idx) cell.value = value if isinstance(value, str): cell.alignment = Alignment(vertical='top', wrap_text=True) else: cell.alignment = Alignment(vertical='top') wb.save(output_file) print("Excel file created: {}".format(output_file)) print(" - Total rows: {}".format(len(excel_rows))) def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4): """ Faster writer for large datasets. Instead of opening the template workbook in write mode and assigning cells one by one, we: - read the template's first (data_start_row-1) rows as values - build a header->index mapping from header_row_idx - create a new write_only workbook and append rows This is much faster for tens/hundreds of thousands of cells. """ tpl_wb = load_workbook(template_file, read_only=True, data_only=True) tpl_ws = tpl_wb.active max_col = tpl_ws.max_column # Copy template "instruction" rows (typically rows 1-3) into output prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True)) header_values = None if 1 <= header_row_idx <= len(prefix_rows): header_values = prefix_rows[header_row_idx - 1] else: # Fallback: read header row directly header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True)) header_values = list(header_values)[:max_col] col_map = {} for i, v in enumerate(header_values): if v is None: continue col_map[str(v).strip()] = i # 0-based wb = Workbook(write_only=True) ws = wb.create_sheet(title=tpl_ws.title) # remove default sheet if present (openpyxl may create one) if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws: try: wb.remove(wb["Sheet"]) except Exception: pass # Write prefix rows, normalized to max_col for r in prefix_rows: r = list(r)[:max_col] if len(r) < max_col: r = r + [None] * (max_col - len(r)) ws.append(r) # Write data rows for excel_row in excel_rows: row_vals = [None] * max_col for field_name, value in excel_row.items(): if field_name not in col_map: continue row_vals[col_map[field_name]] = value ws.append(row_vals) wb.save(output_file) print("Excel file created (fast): {}".format(output_file)) print(" - Total rows: {}".format(len(excel_rows)))