Blame view

scripts/shoplazza_excel_template.py 4.72 KB
f3c11fef   tangwang   亚马逊格式数据 导入店匠
1
2
3
4
5
6
7
8
  #!/usr/bin/env python3
  """
  Shared utilities for generating Shoplazza (店匠) product import Excel files
  based on the provided template `docs/商品导入模板.xlsx`.
  
  We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
  """
  
80519ec6   tangwang   emazon -> shoplazza
9
  from openpyxl import Workbook
f3c11fef   tangwang   亚马逊格式数据 导入店匠
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
  from openpyxl import load_workbook
  from openpyxl.styles import Alignment
  
  
  def load_template_column_mapping(ws, header_row_idx=2):
      """
      Read the header row in the template sheet and build a mapping:
      header_name -> column_index (1-based).
      """
      column_mapping = {}
      for col_idx in range(1, ws.max_column + 1):
          cell_value = ws.cell(row=header_row_idx, column=col_idx).value
          if cell_value:
              column_mapping[str(cell_value).strip()] = col_idx
      return column_mapping
  
  
  def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
      """
      Create an Excel file from the Shoplazza template and fill with data rows.
  
      Args:
          template_file: Path to Excel template file
          output_file: Path to output Excel file
          excel_rows: List[Dict[str, Any]] mapping template header -> value
          header_row_idx: Header row index in template (default 2)
          data_start_row: Data start row index in template (default 4)
      """
      wb = load_workbook(template_file)
      ws = wb.active
  
      column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
  
      # Clear existing data rows
      last_template_row = ws.max_row
      if last_template_row >= data_start_row:
          for row in range(data_start_row, last_template_row + 1):
              for col in range(1, ws.max_column + 1):
                  ws.cell(row=row, column=col).value = None
  
80519ec6   tangwang   emazon -> shoplazza
50
51
      # Write data rows (OPT: only write fields that actually exist in excel_row)
      # This avoids looping over all 42 template columns for every output row.
f3c11fef   tangwang   亚马逊格式数据 导入店匠
52
53
      for row_idx, excel_row in enumerate(excel_rows):
          excel_row_num = data_start_row + row_idx
80519ec6   tangwang   emazon -> shoplazza
54
55
56
          for field_name, value in excel_row.items():
              col_idx = column_mapping.get(field_name)
              if not col_idx:
f3c11fef   tangwang   亚马逊格式数据 导入店匠
57
58
                  continue
              cell = ws.cell(row=excel_row_num, column=col_idx)
f3c11fef   tangwang   亚马逊格式数据 导入店匠
59
60
61
62
63
64
65
66
67
68
69
              cell.value = value
              if isinstance(value, str):
                  cell.alignment = Alignment(vertical='top', wrap_text=True)
              else:
                  cell.alignment = Alignment(vertical='top')
  
      wb.save(output_file)
      print("Excel file created: {}".format(output_file))
      print("  - Total rows: {}".format(len(excel_rows)))
  
  
80519ec6   tangwang   emazon -> shoplazza
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
  def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
      """
      Faster writer for large datasets.
  
      Instead of opening the template workbook in write mode and assigning cells one by one,
      we:
      - read the template's first (data_start_row-1) rows as values
      - build a header->index mapping from header_row_idx
      - create a new write_only workbook and append rows
  
      This is much faster for tens/hundreds of thousands of cells.
      """
      tpl_wb = load_workbook(template_file, read_only=True, data_only=True)
      tpl_ws = tpl_wb.active
  
      max_col = tpl_ws.max_column
  
      # Copy template "instruction" rows (typically rows 1-3) into output
      prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True))
  
      header_values = None
      if 1 <= header_row_idx <= len(prefix_rows):
          header_values = prefix_rows[header_row_idx - 1]
      else:
          # Fallback: read header row directly
          header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True))
  
      header_values = list(header_values)[:max_col]
      col_map = {}
      for i, v in enumerate(header_values):
          if v is None:
              continue
          col_map[str(v).strip()] = i  # 0-based
  
      wb = Workbook(write_only=True)
      ws = wb.create_sheet(title=tpl_ws.title)
      # remove default sheet if present (openpyxl may create one)
      if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws:
          try:
              wb.remove(wb["Sheet"])
          except Exception:
              pass
  
      # Write prefix rows, normalized to max_col
      for r in prefix_rows:
          r = list(r)[:max_col]
          if len(r) < max_col:
              r = r + [None] * (max_col - len(r))
          ws.append(r)
  
      # Write data rows
      for excel_row in excel_rows:
          row_vals = [None] * max_col
          for field_name, value in excel_row.items():
              if field_name not in col_map:
                  continue
              row_vals[col_map[field_name]] = value
          ws.append(row_vals)
  
      wb.save(output_file)
      print("Excel file created (fast): {}".format(output_file))
      print("  - Total rows: {}".format(len(excel_rows)))