shoplazza_excel_template.py
4.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
"""
Shared utilities for generating Shoplazza (店匠) product import Excel files
based on the provided template `docs/商品导入模板.xlsx`.
We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
"""
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Alignment
def load_template_column_mapping(ws, header_row_idx=2):
"""
Read the header row in the template sheet and build a mapping:
header_name -> column_index (1-based).
"""
column_mapping = {}
for col_idx in range(1, ws.max_column + 1):
cell_value = ws.cell(row=header_row_idx, column=col_idx).value
if cell_value:
column_mapping[str(cell_value).strip()] = col_idx
return column_mapping
def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
"""
Create an Excel file from the Shoplazza template and fill with data rows.
Args:
template_file: Path to Excel template file
output_file: Path to output Excel file
excel_rows: List[Dict[str, Any]] mapping template header -> value
header_row_idx: Header row index in template (default 2)
data_start_row: Data start row index in template (default 4)
"""
wb = load_workbook(template_file)
ws = wb.active
column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
# Clear existing data rows
last_template_row = ws.max_row
if last_template_row >= data_start_row:
for row in range(data_start_row, last_template_row + 1):
for col in range(1, ws.max_column + 1):
ws.cell(row=row, column=col).value = None
# Write data rows (OPT: only write fields that actually exist in excel_row)
# This avoids looping over all 42 template columns for every output row.
for row_idx, excel_row in enumerate(excel_rows):
excel_row_num = data_start_row + row_idx
for field_name, value in excel_row.items():
col_idx = column_mapping.get(field_name)
if not col_idx:
continue
cell = ws.cell(row=excel_row_num, column=col_idx)
cell.value = value
if isinstance(value, str):
cell.alignment = Alignment(vertical='top', wrap_text=True)
else:
cell.alignment = Alignment(vertical='top')
wb.save(output_file)
print("Excel file created: {}".format(output_file))
print(" - Total rows: {}".format(len(excel_rows)))
def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
"""
Faster writer for large datasets.
Instead of opening the template workbook in write mode and assigning cells one by one,
we:
- read the template's first (data_start_row-1) rows as values
- build a header->index mapping from header_row_idx
- create a new write_only workbook and append rows
This is much faster for tens/hundreds of thousands of cells.
"""
tpl_wb = load_workbook(template_file, read_only=True, data_only=True)
tpl_ws = tpl_wb.active
max_col = tpl_ws.max_column
# Copy template "instruction" rows (typically rows 1-3) into output
prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True))
header_values = None
if 1 <= header_row_idx <= len(prefix_rows):
header_values = prefix_rows[header_row_idx - 1]
else:
# Fallback: read header row directly
header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True))
header_values = list(header_values)[:max_col]
col_map = {}
for i, v in enumerate(header_values):
if v is None:
continue
col_map[str(v).strip()] = i # 0-based
wb = Workbook(write_only=True)
ws = wb.create_sheet(title=tpl_ws.title)
# remove default sheet if present (openpyxl may create one)
if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws:
try:
wb.remove(wb["Sheet"])
except Exception:
pass
# Write prefix rows, normalized to max_col
for r in prefix_rows:
r = list(r)[:max_col]
if len(r) < max_col:
r = r + [None] * (max_col - len(r))
ws.append(r)
# Write data rows
for excel_row in excel_rows:
row_vals = [None] * max_col
for field_name, value in excel_row.items():
if field_name not in col_map:
continue
row_vals[col_map[field_name]] = value
ws.append(row_vals)
wb.save(output_file)
print("Excel file created (fast): {}".format(output_file))
print(" - Total rows: {}".format(len(excel_rows)))