From 32e9b30c71aba5d8ce1c76793107546c6d6a9712 Mon Sep 17 00:00:00 2001
From: tangwang <tangwang@essa.top>
Date: Thu, 9 Apr 2026 23:48:39 +0800
Subject: [PATCH] scripts/ 根目录主要保留启动/编排入口，其他脚本归到了几个固定子目录：

---
 docs/QUICKSTART.md                                       |   2 +-
 docs/工作总结-微服务性能优化与架构.md      |   4 ++--
 scripts/README.md                                        |   8 +++++++-
 scripts/amazon_xlsx_to_shoplazza_xlsx.py                 | 615 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/check_data_source.py                             | 301 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/check_es_data.py                                 | 268 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/check_index_mapping.py                           | 168 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/compare_index_mappings.py                        | 189 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/competitor_xlsx_to_shoplazza_xlsx.py             |  27 ---------------------------
 scripts/csv_to_excel.py                                  | 302 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/csv_to_excel_multi_variant.py                    | 565 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/daily_log_router.sh                              |  56 --------------------------------------------------------
 scripts/data_import/README.md                            |  13 +++++++++++++
 scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py     | 615 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/data_import/competitor_xlsx_to_shoplazza_xlsx.py |  26 ++++++++++++++++++++++++++
 scripts/data_import/csv_to_excel.py                      | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/data_import/csv_to_excel_multi_variant.py        | 564 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/data_import/shoplazza_excel_template.py          | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/data_import/shoplazza_import_template.py         | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/data_import/tenant3_csv_to_shoplazza_xlsx.sh     |  20 ++++++++++++++++++++
 scripts/download_translation_models.py                   | 125 -----------------------------------------------------------------------------------------------------------------------------
 scripts/frontend/frontend_server.py                      | 276 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/frontend_server.py                               | 276 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/inspect/README.md                                |  10 ++++++++++
 scripts/inspect/check_data_source.py                     | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/inspect/check_es_data.py                         | 267 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/inspect/check_index_mapping.py                   | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/inspect/compare_index_mappings.py                | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/maintenance/embed_tenant_image_urls.py           | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/monitor_eviction.py                              |  89 -----------------------------------------------------------------------------------------
 scripts/ops/README.md                                    |   8 ++++++++
 scripts/ops/daily_log_router.sh                          |  56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/ops/wechat_alert.py                              | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/redis/monitor_eviction.py                        |  89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/service_ctl.sh                                   |   2 +-
 scripts/setup_translator_venv.sh                         |   2 +-
 scripts/shoplazza_excel_template.py                      | 133 -------------------------------------------------------------------------------------------------------------------------------------
 scripts/shoplazza_import_template.py                     | 112 ----------------------------------------------------------------------------------------------------------------
 scripts/start_cnclip_service.sh                          |   2 +-
 scripts/start_frontend.sh                                |   2 +-
 scripts/temp_embed_tenant_image_urls.py                  | 246 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 scripts/tenant3__csv_to_shoplazza_xlsx.sh                |  20 --------------------
 scripts/translation/download_translation_models.py       | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/wechat_alert.py                                  | 104 --------------------------------------------------------------------------------------------------------
 translation/README.md                                    |   4 ++--
 45 files changed, 3636 insertions(+), 3606 deletions(-)
 delete mode 100644 scripts/amazon_xlsx_to_shoplazza_xlsx.py
 delete mode 100755 scripts/check_data_source.py
 delete mode 100755 scripts/check_es_data.py
 delete mode 100644 scripts/check_index_mapping.py
 delete mode 100644 scripts/compare_index_mappings.py
 delete mode 100644 scripts/competitor_xlsx_to_shoplazza_xlsx.py
 delete mode 100755 scripts/csv_to_excel.py
 delete mode 100755 scripts/csv_to_excel_multi_variant.py
 delete mode 100755 scripts/daily_log_router.sh
 create mode 100644 scripts/data_import/README.md
 create mode 100644 scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py
 create mode 100644 scripts/data_import/competitor_xlsx_to_shoplazza_xlsx.py
 create mode 100755 scripts/data_import/csv_to_excel.py
 create mode 100755 scripts/data_import/csv_to_excel_multi_variant.py
 create mode 100644 scripts/data_import/shoplazza_excel_template.py
 create mode 100644 scripts/data_import/shoplazza_import_template.py
 create mode 100755 scripts/data_import/tenant3_csv_to_shoplazza_xlsx.sh
 delete mode 100755 scripts/download_translation_models.py
 create mode 100755 scripts/frontend/frontend_server.py
 delete mode 100755 scripts/frontend_server.py
 create mode 100644 scripts/inspect/README.md
 create mode 100755 scripts/inspect/check_data_source.py
 create mode 100755 scripts/inspect/check_es_data.py
 create mode 100644 scripts/inspect/check_index_mapping.py
 create mode 100644 scripts/inspect/compare_index_mappings.py
 create mode 100644 scripts/maintenance/embed_tenant_image_urls.py
 delete mode 100755 scripts/monitor_eviction.py
 create mode 100644 scripts/ops/README.md
 create mode 100755 scripts/ops/daily_log_router.sh
 create mode 100644 scripts/ops/wechat_alert.py
 create mode 100755 scripts/redis/monitor_eviction.py
 delete mode 100644 scripts/shoplazza_excel_template.py
 delete mode 100644 scripts/shoplazza_import_template.py
 delete mode 100644 scripts/temp_embed_tenant_image_urls.py
 delete mode 100755 scripts/tenant3__csv_to_shoplazza_xlsx.sh
 create mode 100755 scripts/translation/download_translation_models.py
 delete mode 100644 scripts/wechat_alert.py

diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md
index db2d057..9e52dbd 100644
--- a/docs/QUICKSTART.md
+++ b/docs/QUICKSTART.md
@@ -166,7 +166,7 @@ curl -X POST http://localhost:6008/embed/image \
 
 ```bash
 ./scripts/setup_translator_venv.sh
-./.venv-translator/bin/python scripts/download_translation_models.py --all-local   # 如需本地模型
+./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local   # 如需本地模型
 ./scripts/start_translator.sh
 
 curl -X POST http://localhost:6006/translate \
diff --git a/docs/工作总结-微服务性能优化与架构.md b/docs/工作总结-微服务性能优化与架构.md
index 5407569..6a2b5df 100644
--- a/docs/工作总结-微服务性能优化与架构.md
+++ b/docs/工作总结-微服务性能优化与架构.md
@@ -133,8 +133,8 @@ instruction: "Given a shopping query, rank product titles by relevance"
   - 启动时：backend/indexer/frontend/embedding/translator/reranker 会写 pid 到 `logs/<service>.pid`，并执行 `wait_for_health`（GET `http://127.0.0.1:<port>/health`）；reranker 健康重试 90 次，其余 30 次；TEI 校验 Docker 容器存在且 `/health` 成功；cnclip 无 HTTP 健康则仅校验进程/端口。
 - **监控常驻**：
   - `./scripts/service_ctl.sh monitor-start <targets>` 启动后台监控进程，将 targets 写入 `logs/service-monitor.targets`，pid 写入 `logs/service-monitor.pid`，日志追加到 `logs/service-monitor.log`。
-  - 轮询间隔 `MONITOR_INTERVAL_SEC` 默认 **10** 秒；连续 **3** 次（`MONITOR_FAIL_THRESHOLD`）健康失败则触发重启；重启冷却 `MONITOR_RESTART_COOLDOWN_SEC` 默认 **30** 秒；每小时最多重启 `MONITOR_MAX_RESTARTS_PER_HOUR` 默认 **6** 次；超限时调用 `scripts/wechat_alert.py` 告警（若存在）。
-- **日志**：各服务按日滚动到 `logs/<service>-<date>.log`，通过 `scripts/daily_log_router.sh` 与 `LOG_RETENTION_DAYS`（默认 30）控制保留。
+  - 轮询间隔 `MONITOR_INTERVAL_SEC` 默认 **10** 秒；连续 **3** 次（`MONITOR_FAIL_THRESHOLD`）健康失败则触发重启；重启冷却 `MONITOR_RESTART_COOLDOWN_SEC` 默认 **30** 秒；每小时最多重启 `MONITOR_MAX_RESTARTS_PER_HOUR` 默认 **6** 次；超限时调用 `scripts/ops/wechat_alert.py` 告警（若存在）。
+- **日志**：各服务按日滚动到 `logs/<service>-<date>.log`，通过 `scripts/ops/daily_log_router.sh` 与 `LOG_RETENTION_DAYS`（默认 30）控制保留。
 
 详见：`scripts/service_ctl.sh` 内注释及 `docs/Usage-Guide.md`。
 
diff --git a/scripts/README.md b/scripts/README.md
index 8c6a3b6..501e544 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,6 +1,6 @@
 # Scripts
 
-`scripts/` 现在只保留当前架构下仍然有效的运行、运维、环境和数据处理脚本。
+`scripts/` 现在只保留当前架构下仍然有效的运行、运维、环境和数据处理脚本，并按职责拆到稳定子目录，避免继续在根目录平铺。
 
 ## 当前分类
 
@@ -20,6 +20,8 @@
   - `stop.sh`
   - `stop_tei_service.sh`
   - `stop_cnclip_service.sh`
+  - `frontend/`
+  - `ops/`
 
 - 环境初始化
   - `create_venv.sh`
@@ -33,11 +35,15 @@
   - `create_tenant_index.sh`
   - `build_suggestions.sh`
   - `mock_data.sh`
+  - `data_import/`
+  - `inspect/`
+  - `maintenance/`
 
 - 评估与专项工具
   - `evaluation/`
   - `redis/`
   - `debug/`
+  - `translation/`
 
 ## 已迁移
 
diff --git a/scripts/amazon_xlsx_to_shoplazza_xlsx.py b/scripts/amazon_xlsx_to_shoplazza_xlsx.py
deleted file mode 100644
index 5a5c70a..0000000
--- a/scripts/amazon_xlsx_to_shoplazza_xlsx.py
+++ /dev/null
@@ -1,615 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert Amazon-format Excel exports (with Parent/Child ASIN structure) into
-Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`.
-
-Data source:
-- Directory with multiple `*.xlsx` files under `products_data/`.
-- Each file contains a main sheet + "Notes" sheet.
-- Column meanings (sample):
-  - ASIN: variant id (sku_id)
-  - 父ASIN: parent product id (spu_id)
-
-Output:
-- For each 父ASIN group:
-  - If only 1 ASIN: generate one "S" row
-  - Else: generate one "M" row + multiple "P" rows
-
-Multi-variant (M/P) key point:
-- Variant dimensions are parsed primarily from the `SKU` column, e.g.
-  "Size: One Size | Color: Black", and mapped into 款式1/2/3.
-"""
-
-# NOTE: This file is intentionally the same implementation as
-# `competitor_xlsx_to_shoplazza_xlsx.py`, but renamed to reflect the correct
-# data source (Amazon-format exports). Keep the logic in sync.
-
-import os
-import re
-import sys
-import argparse
-import random
-from datetime import datetime
-from collections import defaultdict, Counter
-from pathlib import Path
-
-from openpyxl import load_workbook
-
-# Allow running as `python scripts/xxx.py` without installing as a package
-sys.path.insert(0, str(Path(__file__).resolve().parent))
-from shoplazza_excel_template import create_excel_from_template_fast
-
-
-PREFERRED_OPTION_KEYS = [
-    "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent",
-    "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length",
-    "Width", "Height", "Model", "Configuration",
-]
-
-
-def clean_str(v):
-    if v is None:
-        return ""
-    return str(v).strip()
-
-
-def html_escape(s):
-    s = clean_str(s)
-    return (s.replace("&", "&amp;")
-             .replace("<", "&lt;")
-             .replace(">", "&gt;"))
-
-
-def generate_handle(title):
-    """
-    Generate URL-friendly handle from title (ASCII only).
-    Keep consistent with existing scripts.
-    """
-    handle = clean_str(title).lower()
-    handle = re.sub(r"[^a-z0-9\\s-]", "", handle)
-    handle = re.sub(r"[-\\s]+", "-", handle).strip("-")
-    if len(handle) > 255:
-        handle = handle[:255]
-    return handle or "product"
-
-
-def parse_date_to_template(dt_value):
-    """
-    Template expects: YYYY-MM-DD HH:MM:SS
-    Input could be "2018-05-09" or datetime/date.
-    """
-    if dt_value is None or dt_value == "":
-        return ""
-    if isinstance(dt_value, datetime):
-        return dt_value.strftime("%Y-%m-%d %H:%M:%S")
-    s = clean_str(dt_value)
-    for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
-        try:
-            d = datetime.strptime(s, fmt)
-            return d.strftime("%Y-%m-%d %H:%M:%S")
-        except Exception:
-            pass
-    return ""
-
-
-def parse_weight(weight_conv, weight_raw):
-    """
-    Return (weight_value, unit) where unit in {kg, lb, g, oz}.
-    Prefer '商品重量（单位换算）' like '68.04 g'.
-    Fallback to '商品重量' like '0.15 pounds'.
-    """
-    s = clean_str(weight_conv) or clean_str(weight_raw)
-    if not s:
-        return ("", "")
-    m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s)
-    if not m:
-        return ("", "")
-    val = float(m.group(1))
-    unit = m.group(2).lower()
-    if unit in ("g", "gram", "grams"):
-        return (val, "g")
-    if unit in ("kg", "kilogram", "kilograms"):
-        return (val, "kg")
-    if unit in ("lb", "lbs", "pound", "pounds"):
-        return (val, "lb")
-    if unit in ("oz", "ounce", "ounces"):
-        return (val, "oz")
-    return ("", "")
-
-
-def parse_dimensions_inches(dim_raw):
-    """
-    Template '尺寸信息': 'L,W,H' in inches.
-    Input example: '7.9 x 7.9 x 2 inches'
-    """
-    s = clean_str(dim_raw)
-    if not s:
-        return ""
-    nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s)
-    if len(nums) < 3:
-        return ""
-    return "{},{},{}".format(nums[0], nums[1], nums[2])
-
-
-def parse_sku_options(sku_text):
-    """
-    Parse 'SKU' column into {key: value}.
-    Example:
-      'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'}
-    """
-    s = clean_str(sku_text)
-    if not s:
-        return {}
-    parts = [p.strip() for p in s.split("|") if p.strip()]
-    out = {}
-    for p in parts:
-        if ":" not in p:
-            continue
-        k, v = p.split(":", 1)
-        k = clean_str(k)
-        v = clean_str(v)
-        if k and v:
-            out[k] = v
-    return out
-
-
-def choose_option_keys(variant_dicts, max_keys=3):
-    freq = Counter()
-    for d in variant_dicts:
-        for k, v in d.items():
-            if v:
-                freq[k] += 1
-    if not freq:
-        return []
-    preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)}
-
-    def key_sort(k):
-        return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower())
-
-    keys = sorted(freq.keys(), key=key_sort)
-    return keys[:max_keys]
-
-
-def build_description_html(title, details, product_url):
-    parts = []
-    if title:
-        parts.append("<p>{}</p>".format(html_escape(title)))
-    detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()]
-    if detail_items:
-        li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]])
-        parts.append("<ul>{}</ul>".format(li))
-    if product_url:
-        parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url)))
-    return "".join(parts)
-
-
-def read_amazon_rows_from_file(xlsx_path, max_rows=None):
-    wb = load_workbook(xlsx_path, read_only=True, data_only=True)
-    sheet_name = None
-    for name in wb.sheetnames:
-        if str(name).lower() == "notes":
-            continue
-        sheet_name = name
-        break
-    if sheet_name is None:
-        return []
-    ws = wb[sheet_name]
-
-    # Build header index from first row
-    header = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
-    idx = {clean_str(v): i for i, v in enumerate(header) if v is not None and clean_str(v)}
-
-    required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)",
-                "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接",
-                "商品重量（单位换算）", "商品重量", "商品尺寸"]
-    for k in required:
-        if k not in idx:
-            raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name))
-
-    # OPT: use iter_rows(values_only=True) instead of ws.cell() per field.
-    # openpyxl cell access is relatively expensive; values_only is much faster.
-    pos = {k: idx[k] for k in required}  # 0-based positions in row tuple
-
-    rows = []
-    end_row = ws.max_row
-    if max_rows is not None:
-        end_row = min(end_row, 1 + int(max_rows))
-
-    for tup in ws.iter_rows(min_row=2, max_row=end_row, values_only=True):
-        asin = clean_str(tup[pos["ASIN"]])
-        if not asin:
-            continue
-        parent = clean_str(tup[pos["父ASIN"]]) or asin
-        rows.append({
-            "ASIN": asin,
-            "父ASIN": parent,
-            "SKU": clean_str(tup[pos["SKU"]]),
-            "详细参数": clean_str(tup[pos["详细参数"]]),
-            "商品标题": clean_str(tup[pos["商品标题"]]),
-            "商品主图": clean_str(tup[pos["商品主图"]]),
-            "价格($)": tup[pos["价格($)"]],
-            "prime价格($)": tup[pos["prime价格($)"]],
-            "上架时间": clean_str(tup[pos["上架时间"]]),
-            "类目路径": clean_str(tup[pos["类目路径"]]),
-            "大类目": clean_str(tup[pos["大类目"]]),
-            "小类目": clean_str(tup[pos["小类目"]]),
-            "品牌": clean_str(tup[pos["品牌"]]),
-            "品牌链接": clean_str(tup[pos["品牌链接"]]),
-            "商品详情页链接": clean_str(tup[pos["商品详情页链接"]]),
-            "商品重量（单位换算）": clean_str(tup[pos["商品重量（单位换算）"]]),
-            "商品重量": clean_str(tup[pos["商品重量"]]),
-            "商品尺寸": clean_str(tup[pos["商品尺寸"]]),
-        })
-    return rows
-
-
-def to_price(v):
-    if v is None or v == "":
-        return None
-    try:
-        return float(v)
-    except Exception:
-        s = clean_str(v)
-        m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s)
-        return float(m.group(1)) if m else None
-
-
-def build_common_fields(base_row, spu_id):
-    title = base_row.get("商品标题") or "Product"
-    brand = base_row.get("品牌") or ""
-    big_cat = base_row.get("大类目") or ""
-    small_cat = base_row.get("小类目") or ""
-    cat_path = base_row.get("类目路径") or ""
-
-    handle = generate_handle(title)
-    if handle and not handle.startswith("products/"):
-        handle = "products/{}".format(handle)
-
-    seo_title = title
-    seo_desc_parts = [x for x in [brand, title, big_cat] if x]
-    seo_description = " ".join(seo_desc_parts)[:5000]
-    seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x])[:5000]
-    tags = ",".join([x for x in [brand, big_cat, small_cat] if x])
-
-    created_at = parse_date_to_template(base_row.get("上架时间"))
-    description = build_description_html(title, base_row.get("详细参数"), base_row.get("商品详情页链接"))
-
-    inventory_qty = 100
-    weight_val, weight_unit = parse_weight(base_row.get("商品重量（单位换算）"), base_row.get("商品重量"))
-    size_info = parse_dimensions_inches(base_row.get("商品尺寸"))
-
-    album = big_cat or (cat_path.split(":")[0] if cat_path else "")
-
-    return {
-        "商品ID": "",
-        "创建时间": created_at,
-        "商品标题*": title[:255],
-        "商品副标题": "{} {}".format(brand, big_cat).strip()[:600],
-        "商品描述": description,
-        "SEO标题": seo_title[:5000],
-        "SEO描述": seo_description,
-        "SEO URL Handle": handle,
-        "SEO URL 重定向": "N",
-        "SEO关键词": seo_keywords,
-        "商品上架": "Y",
-        "需要物流": "Y",
-        "商品收税": "N",
-        "商品spu": spu_id[:100],
-        "启用虚拟销量": "N",
-        "虚拟销量值": "",
-        "跟踪库存": "Y",
-        "库存规则*": "1",
-        "专辑名称": album,
-        "标签": tags,
-        "供应商名称": "Amazon",
-        "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "",
-        "商品重量": weight_val if weight_val != "" else "",
-        "重量单位": weight_unit,
-        "商品库存": inventory_qty,
-        "尺寸信息": size_info,
-        "原产地国别": "",
-        "HS（协调制度）代码": "",
-        "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format(
-            base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "")
-        )[:500],
-        "款式备注": "",
-    }
-
-
-def build_s_row(base_row):
-    spu_id = base_row.get("父ASIN") or base_row.get("ASIN")
-    common = build_common_fields(base_row, spu_id=spu_id)
-    price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99
-    image = base_row.get("商品主图") or ""
-    row = {}
-    row.update(common)
-    row.update({
-        "商品属性*": "S",
-        "款式1": "",
-        "款式2": "",
-        "款式3": "",
-        "商品售价*": price,
-        "商品原价": price,
-        "成本价": "",
-        "商品SKU": base_row.get("ASIN") or "",
-        "商品条形码": "",
-        "商品图片*": image,
-        "商品主图": image,
-    })
-    return row
-
-
-def build_m_p_rows(variant_rows):
-    base = variant_rows[0]
-    spu_id = base.get("父ASIN") or base.get("ASIN")
-    common = build_common_fields(base, spu_id=spu_id)
-
-    option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows]
-    option_keys = choose_option_keys(option_dicts, max_keys=3) or ["Variant"]
-
-    m = {}
-    m.update(common)
-    m.update({
-        "商品属性*": "M",
-        "款式1": option_keys[0] if len(option_keys) > 0 else "",
-        "款式2": option_keys[1] if len(option_keys) > 1 else "",
-        "款式3": option_keys[2] if len(option_keys) > 2 else "",
-        "商品售价*": "",
-        "商品原价": "",
-        "成本价": "",
-        "商品SKU": "",
-        "商品条形码": "",
-        "商品图片*": base.get("商品主图") or "",
-        "商品主图": base.get("商品主图") or "",
-    })
-    m["商品重量"] = ""
-    m["重量单位"] = ""
-    m["商品库存"] = ""
-    m["尺寸信息"] = ""
-
-    rows = [m]
-
-    for v in variant_rows:
-        v_common = build_common_fields(v, spu_id=spu_id)
-        v_common.update({
-            "商品副标题": "",
-            "商品描述": "",
-            "SEO标题": "",
-            "SEO描述": "",
-            "SEO URL Handle": "",
-            "SEO URL 重定向": "",
-            "SEO关键词": "",
-            "专辑名称": "",
-            "标签": "",
-            "供应商名称": "",
-            "供应商URL": "",
-            "商品备注": "",
-        })
-
-        opt = parse_sku_options(v.get("SKU"))
-        opt_vals = [v.get("ASIN")] if option_keys == ["Variant"] else [opt.get(k, "") for k in option_keys]
-
-        price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99
-        image = v.get("商品主图") or ""
-
-        p = {}
-        p.update(v_common)
-        p.update({
-            "商品属性*": "P",
-            "款式1": opt_vals[0] if len(opt_vals) > 0 else "",
-            "款式2": opt_vals[1] if len(opt_vals) > 1 else "",
-            "款式3": opt_vals[2] if len(opt_vals) > 2 else "",
-            "商品售价*": price,
-            "商品原价": price,
-            "成本价": "",
-            "商品SKU": v.get("ASIN") or "",
-            "商品条形码": "",
-            "商品图片*": image,
-            "商品主图": "",
-        })
-        rows.append(p)
-
-    return rows
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Convert Amazon-format xlsx files to Shoplazza import xlsx")
-    parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing Amazon-format xlsx files")
-    parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx")
-    parser.add_argument("--output", default="amazon_shoplazza_import.xlsx", help="Output xlsx file path (or prefix when split)")
-    parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
-    parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行，默认40000)")
-    parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
-    # 默认行为：丢弃不符合要求的数据
-    parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=False, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)")
-    parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=False, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)")
-    args = parser.parse_args()
-
-    if not os.path.isdir(args.input_dir):
-        raise RuntimeError("input-dir not found: {}".format(args.input_dir))
-    if not os.path.exists(args.template):
-        raise RuntimeError("template not found: {}".format(args.template))
-
-    files = [os.path.join(args.input_dir, f) for f in os.listdir(args.input_dir) if f.lower().endswith(".xlsx")]
-    files.sort()
-    if args.max_files is not None:
-        files = files[: int(args.max_files)]
-
-    print("Reading Amazon-format files: {} (from {})".format(len(files), args.input_dir), flush=True)
-
-    groups = defaultdict(list)
-    seen_asin = set()
-
-    for fp in files:
-        print("  - loading: {}".format(fp), flush=True)
-        try:
-            rows = read_amazon_rows_from_file(fp)
-        except Exception as e:
-            print("WARN: failed to read {}: {}".format(fp, e))
-            continue
-        print("    loaded rows: {}".format(len(rows)), flush=True)
-
-        for r in rows:
-            asin = r.get("ASIN")
-            if asin in seen_asin:
-                continue
-            seen_asin.add(asin)
-            spu_id = r.get("父ASIN") or asin
-            groups[spu_id].append(r)
-
-    print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True)
-
-    # 先按 SPU 构造每个组的行，方便做“按最大行数拆分但不拆组”
-    group_rows_list = []  # List[List[dict]]
-    spu_count = 0
-    next_product_id = 1  # 用于填充商品ID，全局自增
-    # 将SPU顺序打乱，避免过于依赖输入文件的顺序
-    spu_items = list(groups.items())
-    random.shuffle(spu_items)
-
-    for spu_id, variants in spu_items:
-        if not variants:
-            continue
-
-        # 确保父ASIN对应的变体在列表最前面
-        parent_variant = None
-        other_variants = []
-        for v in variants:
-            if v.get("ASIN") == spu_id:
-                parent_variant = v
-            else:
-                other_variants.append(v)
-        
-        # 重新排序：父ASIN在前，其他在后
-        if parent_variant:
-            variants = [parent_variant] + other_variants
-        else:
-            # 如果找不到父ASIN对应的变体
-            print(
-                f"WARN: Parent ASIN not found in variants: SPU={spu_id}, "
-                f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}",
-                flush=True,
-            )
-            # 根据开关决定是否丢弃整个SPU
-            if args.skip_spu_if_parent_missing:
-                print(
-                    f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}",
-                    flush=True,
-                )
-                continue
-
-        # 处理变体标题：如果与主商品不一致，根据开关决定修正或丢弃
-        main_title = variants[0].get("商品标题") or ""
-        filtered_variants = []
-        for v in variants:
-            title = v.get("商品标题") or ""
-            if main_title and title and title != main_title:
-                if args.skip_sku_if_title_mismatch:
-                    # 丢弃标题不一致的SKU
-                    print(
-                        f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
-                        f"main_title='{main_title}', variant_title='{title}'",
-                        flush=True,
-                    )
-                    continue
-                else:
-                    # 修正标题
-                    print(
-                        f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
-                        f"main_title='{main_title}', variant_title='{title}' -> using main_title",
-                        flush=True,
-                    )
-                    v["商品标题"] = main_title  # 统一为主商品标题
-            filtered_variants.append(v)
-        
-        # 如果所有变体都被过滤掉，跳过整个SPU
-        if not filtered_variants:
-            print(
-                f"SKIP entire SPU: all variants filtered out, SPU={spu_id}",
-                flush=True,
-            )
-            continue
-        
-        variants = filtered_variants
-
-        spu_count += 1
-        if args.max_products is not None and spu_count > int(args.max_products):
-            break
-
-        if len(variants) == 1:
-            rows = [build_s_row(variants[0])]
-        else:
-            rows = build_m_p_rows(variants)
-
-        # 填充商品ID（从1开始全局递增）
-        for r in rows:
-            r["商品ID"] = next_product_id
-            next_product_id += 1
-
-        group_rows_list.append(rows)
-
-    # 按最大行数拆成多个文件（注意：同一 SPU 不拆分）
-    data_start_row = 4  # 与模板/写入工具保持一致
-    header_rows = data_start_row - 1  # 包含标题行+说明行
-    max_total_rows = args.max_rows_per_output or 0
-    if max_total_rows and max_total_rows > header_rows:
-        max_data_rows = max_total_rows - header_rows
-    else:
-        max_data_rows = None  # 不限制
-
-    chunks = []
-    current_chunk = []
-    current_count = 0
-
-    if max_data_rows is None:
-        # 不做分片，直接一个 chunk
-        for gr in group_rows_list:
-            current_chunk.extend(gr)
-        if current_chunk:
-            chunks.append(current_chunk)
-    else:
-        for gr in group_rows_list:
-            gsize = len(gr)
-            # 如果单个 SPU 本身就超过阈值，只能独占一个文件
-            if gsize > max_data_rows:
-                if current_chunk:
-                    chunks.append(current_chunk)
-                    current_chunk = []
-                    current_count = 0
-                chunks.append(gr)
-                continue
-            # 如果放不下当前 chunk，则先封一个，再开新 chunk
-            if current_count + gsize > max_data_rows:
-                if current_chunk:
-                    chunks.append(current_chunk)
-                current_chunk = list(gr)
-                current_count = gsize
-            else:
-                current_chunk.extend(gr)
-                current_count += gsize
-        if current_chunk:
-            chunks.append(current_chunk)
-
-    total_rows = sum(len(c) for c in chunks)
-    print(
-        "Generated Excel data rows: {} (SPU groups output: {}, files: {})".format(
-            total_rows, len(group_rows_list), len(chunks)
-        ),
-        flush=True,
-    )
-
-    # 输出多个文件：如果只一个 chunk，直接用指定 output；多个则加 _partN 后缀
-    base = Path(args.output)
-    stem = base.stem
-    suffix = base.suffix or ".xlsx"
-
-    for idx, chunk in enumerate(chunks, start=1):
-        out_path = str(base) if len(chunks) == 1 else str(base.with_name(f"{stem}_part{idx}{suffix}"))
-        print(f"Writing file {idx}/{len(chunks)}: {out_path} (rows: {len(chunk)})", flush=True)
-        create_excel_from_template_fast(args.template, out_path, chunk, data_start_row=data_start_row)
-
-
-if __name__ == "__main__":
-    main()
-
-
diff --git a/scripts/check_data_source.py b/scripts/check_data_source.py
deleted file mode 100755
index 58fe105..0000000
--- a/scripts/check_data_source.py
+++ /dev/null
@@ -1,301 +0,0 @@
-#!/usr/bin/env python3
-"""
-诊断脚本：检查MySQL数据源中分类和规格信息是否正确
-
-检查：
-1. category_path 字段是否有值
-2. category_path 格式是否正确（应该能被解析为 category1_name）
-3. shoplazza_product_option 表的 name 字段是否有值（应该是 "color", "size", "material"）
-4. shoplazza_product_sku 表的 option1/2/3 字段是否有值
-"""
-
-import sys
-import argparse
-from pathlib import Path
-from sqlalchemy import create_engine, text
-
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from utils.db_connector import create_db_connection
-
-
-def check_category_path(db_engine, tenant_id: str):
-    """检查 category_path 和 category 字段"""
-    print("\n" + "="*60)
-    print("1. 检查 category_path 和 category 字段")
-    print("="*60)
-    
-    query = text("""
-        SELECT 
-            COUNT(*) as total,
-            COUNT(category_path) as has_category_path,
-            COUNT(*) - COUNT(category_path) as null_category_path,
-            COUNT(category) as has_category,
-            COUNT(*) - COUNT(category) as null_category
-        FROM shoplazza_product_spu
-        WHERE tenant_id = :tenant_id AND deleted = 0
-    """)
-    
-    with db_engine.connect() as conn:
-        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
-        total = result[0]
-        has_category_path = result[1]
-        null_category_path = result[2]
-        has_category = result[3]
-        null_category = result[4]
-        
-        print(f"总SPU数: {total}")
-        print(f"有 category_path 的SPU: {has_category_path}")
-        print(f"category_path 为空的SPU: {null_category_path}")
-        print(f"有 category 的SPU: {has_category}")
-        print(f"category 为空的SPU: {null_category}")
-        
-        # 查看category字段的示例
-        if has_category > 0:
-            sample_query = text("""
-                SELECT id, title, category_path, category, category_id, category_level
-                FROM shoplazza_product_spu
-                WHERE tenant_id = :tenant_id 
-                  AND deleted = 0 
-                  AND category IS NOT NULL
-                LIMIT 5
-            """)
-            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
-            print(f"\n示例数据（前5条有 category 的记录）:")
-            for row in samples:
-                print(f"  SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
-                print(f"    category_path: {row[2]}")
-                print(f"    category: '{row[3]}'")
-                print(f"    category_id: {row[4]}, category_level: {row[5]}")
-                
-                # 解析 category 字段（用于生成 category1_name）
-                if row[3]:
-                    category = str(row[3])
-                    if '/' in category:
-                        path_parts = category.split('/')
-                        print(f"    解析后（按'/'分割）: {path_parts}")
-                        if len(path_parts) > 0:
-                            print(f"    → category1_name: '{path_parts[0].strip()}'")
-                    else:
-                        print(f"    → category1_name: '{category.strip()}'（直接作为category1_name）")
-        else:
-            print("\n⚠️ 警告: 没有SPU有 category 值！")
-        
-        # 查看category_path的示例（如果有）
-        if has_category_path > 0:
-            sample_query = text("""
-                SELECT id, title, category_path, category
-                FROM shoplazza_product_spu
-                WHERE tenant_id = :tenant_id 
-                  AND deleted = 0 
-                  AND category_path IS NOT NULL
-                LIMIT 3
-            """)
-            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
-            print(f"\n示例数据（有 category_path 的记录）:")
-            for row in samples:
-                print(f"  SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
-                print(f"    category_path: '{row[2]}'")
-                print(f"    category: '{row[3]}'")
-                
-                # 检查是否是ID列表格式
-                if row[2] and ',' in str(row[2]) and not '/' in str(row[2]):
-                    print(f"    ⚠️  注意: category_path是ID列表格式（逗号分隔），不是路径格式")
-
-
-def check_options(db_engine, tenant_id: str):
-    """检查 option 表的 name 字段"""
-    print("\n" + "="*60)
-    print("2. 检查 shoplazza_product_option 表的 name 字段")
-    print("="*60)
-    
-    query = text("""
-        SELECT 
-            COUNT(*) as total_options,
-            COUNT(DISTINCT name) as distinct_names,
-            COUNT(DISTINCT spu_id) as spus_with_options
-        FROM shoplazza_product_option
-        WHERE tenant_id = :tenant_id AND deleted = 0
-    """)
-    
-    with db_engine.connect() as conn:
-        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
-        total_options = result[0]
-        distinct_names = result[1]
-        spus_with_options = result[2]
-        
-        print(f"总 option 记录数: {total_options}")
-        print(f"不同的 name 数量: {distinct_names}")
-        print(f"有 option 定义的 SPU 数量: {spus_with_options}")
-        
-        if total_options > 0:
-            # 查看不同的 name 值
-            name_query = text("""
-                SELECT DISTINCT name, position, COUNT(*) as count
-                FROM shoplazza_product_option
-                WHERE tenant_id = :tenant_id AND deleted = 0
-                GROUP BY name, position
-                ORDER BY position, name
-            """)
-            names = conn.execute(name_query, {"tenant_id": tenant_id}).fetchall()
-            print(f"\n不同的 name 值:")
-            for row in names:
-                print(f"  position={row[1]}, name='{row[0]}', count={row[2]}")
-                
-            # 查看一些示例
-            sample_query = text("""
-                SELECT spu_id, position, name, `values`
-                FROM shoplazza_product_option
-                WHERE tenant_id = :tenant_id AND deleted = 0
-                ORDER BY spu_id, position
-                LIMIT 10
-            """)
-            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
-            print(f"\n示例数据（前10条 option 记录）:")
-            for row in samples:
-                print(f"  SPU ID: {row[0]}, position: {row[1]}, name: '{row[2]}', values: {row[3]}")
-        else:
-            print("\n⚠️ 警告: 没有 option 记录！")
-
-
-def check_sku_options(db_engine, tenant_id: str):
-    """检查 SKU 表的 option1/2/3 字段"""
-    print("\n" + "="*60)
-    print("3. 检查 shoplazza_product_sku 表的 option1/2/3 字段")
-    print("="*60)
-    
-    query = text("""
-        SELECT 
-            COUNT(*) as total_skus,
-            COUNT(option1) as has_option1,
-            COUNT(option2) as has_option2,
-            COUNT(option3) as has_option3,
-            COUNT(DISTINCT spu_id) as distinct_spus
-        FROM shoplazza_product_sku
-        WHERE tenant_id = :tenant_id AND deleted = 0
-    """)
-    
-    with db_engine.connect() as conn:
-        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
-        total_skus = result[0]
-        has_option1 = result[1]
-        has_option2 = result[2]
-        has_option3 = result[3]
-        distinct_spus = result[4]
-        
-        print(f"总 SKU 数: {total_skus}")
-        print(f"有 option1 的 SKU: {has_option1}")
-        print(f"有 option2 的 SKU: {has_option2}")
-        print(f"有 option3 的 SKU: {has_option3}")
-        print(f"不同的 SPU 数量: {distinct_spus}")
-        
-        if total_skus > 0:
-            # 查看一些示例
-            sample_query = text("""
-                SELECT spu_id, id, option1, option2, option3
-                FROM shoplazza_product_sku
-                WHERE tenant_id = :tenant_id AND deleted = 0
-                ORDER BY spu_id, id
-                LIMIT 10
-            """)
-            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
-            print(f"\n示例数据（前10条 SKU 记录）:")
-            for row in samples:
-                print(f"  SPU ID: {row[0]}, SKU ID: {row[1]}")
-                print(f"    option1: '{row[2]}', option2: '{row[3]}', option3: '{row[4]}'")
-        else:
-            print("\n⚠️ 警告: 没有 SKU 记录！")
-
-
-def check_spu_summary(db_engine, tenant_id: str):
-    """检查 SPU 汇总信息"""
-    print("\n" + "="*60)
-    print("4. SPU 汇总信息")
-    print("="*60)
-    
-    query = text("""
-        SELECT 
-            COUNT(DISTINCT spu.id) as total_spus,
-            COUNT(DISTINCT sku.id) as total_skus,
-            COUNT(DISTINCT opt.id) as total_options,
-            COUNT(DISTINCT CASE WHEN spu.category_path IS NOT NULL THEN spu.id END) as spus_with_category_path,
-            COUNT(DISTINCT opt.spu_id) as spus_with_options
-        FROM shoplazza_product_spu spu
-        LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id AND sku.tenant_id = :tenant_id AND sku.deleted = 0
-        LEFT JOIN shoplazza_product_option opt ON spu.id = opt.spu_id AND opt.tenant_id = :tenant_id AND opt.deleted = 0
-        WHERE spu.tenant_id = :tenant_id AND spu.deleted = 0
-    """)
-    
-    with db_engine.connect() as conn:
-        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
-        total_spus = result[0]
-        total_skus = result[1]
-        total_options = result[2]
-        spus_with_category_path = result[3]
-        spus_with_options = result[4]
-        
-        print(f"总 SPU 数: {total_spus}")
-        print(f"总 SKU 数: {total_skus}")
-        print(f"总 option 记录数: {total_options}")
-        print(f"有 category_path 的 SPU: {spus_with_category_path}")
-        print(f"有 option 定义的 SPU: {spus_with_options}")
-
-
-def main():
-    parser = argparse.ArgumentParser(description='检查MySQL数据源中的分类和规格信息')
-    parser.add_argument('--tenant-id', required=True, help='Tenant ID')
-    parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)')
-    parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)')
-    parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)')
-    parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)')
-    parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)')
-    
-    args = parser.parse_args()
-    
-    # 连接数据库
-    import os
-    db_host = args.db_host or os.environ.get('DB_HOST')
-    db_port = args.db_port or int(os.environ.get('DB_PORT', 3306))
-    db_database = args.db_database or os.environ.get('DB_DATABASE')
-    db_username = args.db_username or os.environ.get('DB_USERNAME')
-    db_password = args.db_password or os.environ.get('DB_PASSWORD')
-    
-    if not all([db_host, db_database, db_username, db_password]):
-        print("错误: MySQL连接参数不完整")
-        print("请提供 --db-host, --db-database, --db-username, --db-password")
-        print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD")
-        return 1
-    
-    print(f"连接MySQL: {db_host}:{db_port}/{db_database}")
-    print(f"Tenant ID: {args.tenant_id}")
-    
-    try:
-        db_engine = create_db_connection(
-            host=db_host,
-            port=db_port,
-            database=db_database,
-            username=db_username,
-            password=db_password
-        )
-        print("✓ MySQL连接成功\n")
-    except Exception as e:
-        print(f"✗ 连接MySQL失败: {e}")
-        return 1
-    
-    # 执行检查
-    check_spu_summary(db_engine, args.tenant_id)
-    check_category_path(db_engine, args.tenant_id)
-    check_options(db_engine, args.tenant_id)
-    check_sku_options(db_engine, args.tenant_id)
-    
-    print("\n" + "="*60)
-    print("检查完成")
-    print("="*60)
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-
diff --git a/scripts/check_es_data.py b/scripts/check_es_data.py
deleted file mode 100755
index 33da512..0000000
--- a/scripts/check_es_data.py
+++ /dev/null
@@ -1,268 +0,0 @@
-#!/usr/bin/env python3
-"""
-Check actual data in ES index to see if facet fields have values
-"""
-
-import sys
-import os
-import argparse
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from utils.es_client import ESClient
-
-
-def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
-    """Check facet-related fields in ES"""
-    print("\n" + "="*60)
-    print("Checking facet field data in ES index")
-    print("="*60)
-    
-    query = {
-        "query": {
-            "term": {
-                "tenant_id": tenant_id
-            }
-        },
-        "size": size,
-        "_source": [
-            "spu_id",
-            "title",
-            "category1_name",
-            "category2_name",
-            "category3_name",
-            "category_name",
-            "category_path",
-            "specifications",
-            "option1_name",
-            "option2_name",
-            "option3_name"
-        ]
-    }
-    
-    try:
-        response = es_client.client.search(index="search_products", body=query)
-        hits = response.get('hits', {}).get('hits', [])
-        total = response.get('hits', {}).get('total', {}).get('value', 0)
-        
-        print(f"\nTotal documents: {total}")
-        print(f"Checking first {len(hits)} documents:\n")
-        
-        for i, hit in enumerate(hits, 1):
-            source = hit.get('_source', {})
-            title_obj = source.get("title") or {}
-            category_path_obj = source.get("category_path") or {}
-            print(f"Document {i}:")
-            print(f"  spu_id: {source.get('spu_id')}")
-            print(f"  title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
-            print(f"  category1_name: {source.get('category1_name')}")
-            print(f"  category2_name: {source.get('category2_name')}")
-            print(f"  category3_name: {source.get('category3_name')}")
-            print(f"  category_name: {source.get('category_name')}")
-            print(f"  category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}")
-            print(f"  option1_name: {source.get('option1_name')}")
-            print(f"  option2_name: {source.get('option2_name')}")
-            print(f"  option3_name: {source.get('option3_name')}")
-            
-            specs = source.get('specifications', [])
-            if specs:
-                print(f"  specifications count: {len(specs)}")
-                # 显示前3个specifications
-                for spec in specs[:3]:
-                    print(f"    - name: {spec.get('name')}, value: {spec.get('value')}")
-            else:
-                print(f"  specifications: empty")
-            print()
-            
-    except Exception as e:
-        print(f"Error: {e}")
-        import traceback
-        traceback.print_exc()
-
-
-def check_facet_aggregations(es_client, tenant_id: str):
-    """Check facet aggregation queries"""
-    print("\n" + "="*60)
-    print("Checking facet aggregation query results")
-    print("="*60)
-    
-    query = {
-        "query": {
-            "term": {
-                "tenant_id": tenant_id
-            }
-        },
-        "size": 0,
-        "aggs": {
-            "category1_facet": {
-                "terms": {
-                    "field": "category1_name",
-                    "size": 10
-                }
-            },
-            "color_facet": {
-                "nested": {
-                    "path": "specifications"
-                },
-                "aggs": {
-                    "filter_by_name": {
-                        "filter": {
-                            "term": {
-                                "specifications.name": "color"
-                            }
-                        },
-                        "aggs": {
-                            "value_counts": {
-                                "terms": {
-                                    "field": "specifications.value",
-                                    "size": 10
-                                }
-                            }
-                        }
-                    }
-                }
-            },
-            "size_facet": {
-                "nested": {
-                    "path": "specifications"
-                },
-                "aggs": {
-                    "filter_by_name": {
-                        "filter": {
-                            "term": {
-                                "specifications.name": "size"
-                            }
-                        },
-                        "aggs": {
-                            "value_counts": {
-                                "terms": {
-                                    "field": "specifications.value",
-                                    "size": 10
-                                }
-                            }
-                        }
-                    }
-                }
-            },
-            "material_facet": {
-                "nested": {
-                    "path": "specifications"
-                },
-                "aggs": {
-                    "filter_by_name": {
-                        "filter": {
-                            "term": {
-                                "specifications.name": "material"
-                            }
-                        },
-                        "aggs": {
-                            "value_counts": {
-                                "terms": {
-                                    "field": "specifications.value",
-                                    "size": 10
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    
-    try:
-        response = es_client.client.search(index="search_products", body=query)
-        aggs = response.get('aggregations', {})
-        
-        print("\n1. category1_name facet:")
-        category1 = aggs.get('category1_facet', {})
-        buckets = category1.get('buckets', [])
-        if buckets:
-            for bucket in buckets:
-                print(f"  {bucket['key']}: {bucket['doc_count']}")
-        else:
-            print("  empty (no data)")
-        
-        print("\n2. specifications.color facet:")
-        color_agg = aggs.get('color_facet', {})
-        color_filter = color_agg.get('filter_by_name', {})
-        color_values = color_filter.get('value_counts', {})
-        color_buckets = color_values.get('buckets', [])
-        if color_buckets:
-            for bucket in color_buckets:
-                print(f"  {bucket['key']}: {bucket['doc_count']}")
-        else:
-            print("  empty (no data)")
-        
-        print("\n3. specifications.size facet:")
-        size_agg = aggs.get('size_facet', {})
-        size_filter = size_agg.get('filter_by_name', {})
-        size_values = size_filter.get('value_counts', {})
-        size_buckets = size_values.get('buckets', [])
-        if size_buckets:
-            for bucket in size_buckets:
-                print(f"  {bucket['key']}: {bucket['doc_count']}")
-        else:
-            print("  empty (no data)")
-        
-        print("\n4. specifications.material facet:")
-        material_agg = aggs.get('material_facet', {})
-        material_filter = material_agg.get('filter_by_name', {})
-        material_values = material_filter.get('value_counts', {})
-        material_buckets = material_values.get('buckets', [])
-        if material_buckets:
-            for bucket in material_buckets:
-                print(f"  {bucket['key']}: {bucket['doc_count']}")
-        else:
-            print("  empty (no data)")
-            
-    except Exception as e:
-        print(f"Error: {e}")
-        import traceback
-        traceback.print_exc()
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Check facet field data in ES index')
-    parser.add_argument('--tenant-id', required=True, help='Tenant ID')
-    parser.add_argument('--es-host', help='Elasticsearch host (or use env var ES_HOST, default: http://localhost:9200)')
-    parser.add_argument('--size', type=int, default=5, help='Number of documents to check (default: 5)')
-    
-    args = parser.parse_args()
-    
-    # 连接ES
-    es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200')
-    es_username = os.environ.get('ES_USERNAME')
-    es_password = os.environ.get('ES_PASSWORD')
-    
-    print(f"Connecting to Elasticsearch: {es_host}")
-    print(f"Tenant ID: {args.tenant_id}\n")
-    
-    try:
-        if es_username and es_password:
-            es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
-        else:
-            es_client = ESClient(hosts=[es_host])
-        
-        if not es_client.ping():
-            print(f"✗ Cannot connect to Elasticsearch: {es_host}")
-            return 1
-        print("✓ Elasticsearch connected successfully\n")
-    except Exception as e:
-        print(f"✗ Failed to connect to Elasticsearch: {e}")
-        return 1
-    
-    # 检查ES数据
-    check_es_facet_fields(es_client, args.tenant_id, args.size)
-    check_facet_aggregations(es_client, args.tenant_id)
-    
-    print("\n" + "="*60)
-    print("Check completed")
-    print("="*60)
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-
diff --git a/scripts/check_index_mapping.py b/scripts/check_index_mapping.py
deleted file mode 100644
index e7569fd..0000000
--- a/scripts/check_index_mapping.py
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env python3
-"""
-检查ES索引的实际映射配置，特别是中文字段的analyzer设置
-"""
-
-import os
-import sys
-import json
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from utils.es_client import get_es_client_from_env
-from indexer.mapping_generator import get_tenant_index_name
-
-
-def check_field_mapping(mapping_dict, field_path):
-    """递归查找字段映射"""
-    parts = field_path.split('.')
-    current = mapping_dict
-    
-    for part in parts:
-        if not isinstance(current, dict):
-            return None
-
-        # ES mapping nesting: object fields store subfields under "properties"
-        if "properties" in current and isinstance(current["properties"], dict):
-            current = current["properties"]
-
-        # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
-        if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
-            current = current["fields"]
-
-        current = current.get(part)
-        if current is None:
-            return None
-    return current
-
-
-def main():
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="检查 Elasticsearch 索引实际映射配置")
-    parser.add_argument("--tenant-id", type=str, required=True, help="租户ID")
-    args = parser.parse_args()
-    
-    print("=" * 80)
-    print("检查 Elasticsearch 索引实际映射配置")
-    print("=" * 80)
-    
-    # 连接ES
-    try:
-        es_client = get_es_client_from_env()
-        if not es_client.ping():
-            print("✗ 无法连接到 Elasticsearch")
-            return 1
-        print("✓ Elasticsearch 连接成功\n")
-    except Exception as e:
-        print(f"✗ 连接 Elasticsearch 失败: {e}")
-        return 1
-    
-    index_name = get_tenant_index_name(args.tenant_id)
-    
-    # 检查索引是否存在
-    if not es_client.index_exists(index_name):
-        print(f"✗ 索引 '{index_name}' 不存在")
-        return 1
-    
-    # 获取实际映射
-    print(f"获取索引 '{index_name}' 的映射配置...\n")
-    mapping = es_client.get_mapping(index_name)
-    
-    if not mapping:
-        print("✗ 无法获取索引映射")
-        return 1
-    
-    # 提取实际映射结构
-    # ES返回格式: {index_name: {mappings: {properties: {...}}}}
-    index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {})
-    
-    if not index_mapping:
-        print("✗ 无法解析映射结构")
-        return 1
-    
-    # 检查关键字段
-    fields_to_check = [
-        "title.zh",
-        "brief.zh",
-        "description.zh",
-        "vendor.zh",
-        "vendor.zh.keyword",
-        "category_path.zh",
-        "category_name_text.zh"
-    ]
-    
-    print("=" * 80)
-    print("中文字段实际映射配置")
-    print("=" * 80)
-    
-    for field_name in fields_to_check:
-        field_mapping = check_field_mapping(index_mapping, field_name)
-        
-        if field_mapping is None:
-            print(f"\n❌ {field_name}: 字段不存在")
-            continue
-        
-        print(f"\n📋 {field_name}:")
-        print(f"   类型: {field_mapping.get('type', 'N/A')}")
-        
-        analyzer = field_mapping.get('analyzer')
-        search_analyzer = field_mapping.get('search_analyzer')
-        
-        if analyzer:
-            print(f"   索引分析器 (analyzer): {analyzer}")
-        else:
-            print(f"   索引分析器 (analyzer): 未设置（使用默认）")
-        
-        if search_analyzer:
-            print(f"   查询分析器 (search_analyzer): {search_analyzer}")
-        else:
-            print(f"   查询分析器 (search_analyzer): 未设置（使用analyzer或默认）")
-        
-        # 检查是否有子字段
-        if 'fields' in field_mapping:
-            print(f"   子字段:")
-            for sub_field, sub_mapping in field_mapping['fields'].items():
-                print(f"     - {sub_field}: {sub_mapping.get('type', 'N/A')}")
-                if 'normalizer' in sub_mapping:
-                    print(f"       normalizer: {sub_mapping['normalizer']}")
-    
-    # 获取settings中的analyzer定义
-    print("\n" + "=" * 80)
-    print("索引 Settings 中的 Analyzer 定义")
-    print("=" * 80)
-    
-    try:
-        settings = es_client.client.indices.get_settings(index=index_name)
-        index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {})
-        analysis = index_settings.get('analysis', {})
-        analyzers = analysis.get('analyzer', {})
-        
-        if analyzers:
-            print("\n定义的 Analyzer:")
-            for analyzer_name, analyzer_config in analyzers.items():
-                print(f"\n  {analyzer_name}:")
-                if isinstance(analyzer_config, dict):
-                    print(f"    类型: {analyzer_config.get('type', 'N/A')}")
-                    if 'tokenizer' in analyzer_config:
-                        print(f"    tokenizer: {analyzer_config['tokenizer']}")
-                    if 'filter' in analyzer_config:
-                        print(f"    filter: {analyzer_config['filter']}")
-                else:
-                    print(f"    配置: {analyzer_config}")
-        else:
-            print("\n⚠ 未找到自定义 analyzer 定义")
-            
-    except Exception as e:
-        print(f"\n⚠ 无法获取 settings: {e}")
-    
-    print("\n" + "=" * 80)
-    print("检查完成")
-    print("=" * 80)
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/scripts/compare_index_mappings.py b/scripts/compare_index_mappings.py
deleted file mode 100644
index 7554e56..0000000
--- a/scripts/compare_index_mappings.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-"""
-对比不同租户索引的 mapping 结构
-"""
-
-import os
-import sys
-import json
-from pathlib import Path
-from typing import Dict, Any
-
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from utils.es_client import get_es_client_from_env
-
-
-def get_field_type(mapping_dict: Dict, field_path: str) -> Dict[str, Any]:
-    """递归获取字段的 mapping 信息"""
-    parts = field_path.split('.')
-    current = mapping_dict
-    
-    for part in parts:
-        if isinstance(current, dict):
-            current = current.get(part)
-            if current is None:
-                return None
-        else:
-            return None
-    return current
-
-
-def compare_mappings(mapping1: Dict[str, Any], mapping2: Dict[str, Any], index1_name: str, index2_name: str):
-    """对比两个索引的 mapping"""
-    props1 = mapping1.get('mappings', {}).get('properties', {})
-    props2 = mapping2.get('mappings', {}).get('properties', {})
-    
-    all_fields = set(props1.keys()) | set(props2.keys())
-    
-    print(f"\n{'='*80}")
-    print(f"对比索引映射结构")
-    print(f"{'='*80}")
-    print(f"索引1: {index1_name}")
-    print(f"索引2: {index2_name}")
-    print(f"{'='*80}\n")
-    
-    differences = []
-    same_fields = []
-    
-    for field in sorted(all_fields):
-        field1 = props1.get(field)
-        field2 = props2.get(field)
-        
-        if field1 is None:
-            differences.append((field, f"只在 {index2_name} 中存在", field2))
-            continue
-        if field2 is None:
-            differences.append((field, f"只在 {index1_name} 中存在", field1))
-            continue
-        
-        type1 = field1.get('type')
-        type2 = field2.get('type')
-        
-        if type1 != type2:
-            differences.append((field, f"类型不同: {index1_name}={type1}, {index2_name}={type2}", (field1, field2)))
-        else:
-            same_fields.append((field, type1))
-    
-    # 打印相同的字段
-    print(f"✓ 相同字段 ({len(same_fields)} 个):")
-    for field, field_type in same_fields[:20]:  # 只显示前20个
-        print(f"  - {field}: {field_type}")
-    if len(same_fields) > 20:
-        print(f"  ... 还有 {len(same_fields) - 20} 个相同字段")
-    
-    # 打印不同的字段
-    if differences:
-        print(f"\n✗ 不同字段 ({len(differences)} 个):")
-        for field, reason, details in differences:
-            print(f"\n  {field}:")
-            print(f"    {reason}")
-            if isinstance(details, tuple):
-                print(f"    {index1_name}: {json.dumps(details[0], indent=4, ensure_ascii=False)}")
-                print(f"    {index2_name}: {json.dumps(details[1], indent=4, ensure_ascii=False)}")
-            else:
-                print(f"    详情: {json.dumps(details, indent=4, ensure_ascii=False)}")
-    else:
-        print(f"\n✓ 所有字段类型一致！")
-    
-    # 特别检查 tags 字段
-    print(f"\n{'='*80}")
-    print(f"特别检查: tags 字段")
-    print(f"{'='*80}")
-    
-    tags1 = get_field_type(props1, 'tags')
-    tags2 = get_field_type(props2, 'tags')
-    
-    if tags1:
-        print(f"\n{index1_name}.tags:")
-        print(f"  类型: {tags1.get('type')}")
-        print(f"  完整定义: {json.dumps(tags1, indent=2, ensure_ascii=False)}")
-    else:
-        print(f"\n{index1_name}.tags: 不存在")
-    
-    if tags2:
-        print(f"\n{index2_name}.tags:")
-        print(f"  类型: {tags2.get('type')}")
-        print(f"  完整定义: {json.dumps(tags2, indent=2, ensure_ascii=False)}")
-    else:
-        print(f"\n{index2_name}.tags: 不存在")
-
-
-def main():
-    import argparse
-    
-    parser = argparse.ArgumentParser(description='对比 Elasticsearch 索引的 mapping 结构')
-    parser.add_argument('index1', help='第一个索引名称 (例如: search_products_tenant_171)')
-    parser.add_argument('index2', nargs='?', help='第二个索引名称 (例如: search_products_tenant_162)')
-    parser.add_argument('--list', action='store_true', help='列出所有以 index1 为前缀的索引')
-    
-    args = parser.parse_args()
-    
-    # 连接 ES
-    try:
-        es_client = get_es_client_from_env()
-        if not es_client.ping():
-            print("✗ 无法连接到 Elasticsearch")
-            return 1
-        print("✓ Elasticsearch 连接成功\n")
-    except Exception as e:
-        print(f"✗ 连接 Elasticsearch 失败: {e}")
-        return 1
-    
-    # 如果指定了 --list，列出所有匹配的索引
-    if args.list or not args.index2:
-        try:
-            # 使用 cat API 列出所有索引
-            indices = es_client.client.cat.indices(format='json')
-            matching_indices = [idx['index'] for idx in indices if idx['index'].startswith(args.index1)]
-            
-            if matching_indices:
-                print(f"找到 {len(matching_indices)} 个匹配的索引:")
-                for idx in sorted(matching_indices):
-                    print(f"  - {idx}")
-                return 0
-            else:
-                print(f"未找到以 '{args.index1}' 开头的索引")
-                return 1
-        except Exception as e:
-            print(f"✗ 列出索引失败: {e}")
-            return 1
-    
-    # 获取两个索引的 mapping
-    index1 = args.index1
-    index2 = args.index2
-    
-    print(f"正在获取索引映射...")
-    print(f"  索引1: {index1}")
-    print(f"  索引2: {index2}\n")
-    
-    # 检查索引是否存在
-    if not es_client.index_exists(index1):
-        print(f"✗ 索引 '{index1}' 不存在")
-        return 1
-    
-    if not es_client.index_exists(index2):
-        print(f"✗ 索引 '{index2}' 不存在")
-        return 1
-    
-    # 获取 mapping
-    mapping1 = es_client.get_mapping(index1)
-    mapping2 = es_client.get_mapping(index2)
-    
-    if not mapping1 or index1 not in mapping1:
-        print(f"✗ 无法获取索引 '{index1}' 的映射")
-        return 1
-    
-    if not mapping2 or index2 not in mapping2:
-        print(f"✗ 无法获取索引 '{index2}' 的映射")
-        return 1
-    
-    # 对比 mapping
-    compare_mappings(mapping1[index1], mapping2[index2], index1, index2)
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-
diff --git a/scripts/competitor_xlsx_to_shoplazza_xlsx.py b/scripts/competitor_xlsx_to_shoplazza_xlsx.py
deleted file mode 100644
index 5812357..0000000
--- a/scripts/competitor_xlsx_to_shoplazza_xlsx.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-"""
-DEPRECATED SCRIPT NAME (kept for backward compatibility).
-
-The input `data/mai_jia_jing_ling/products_data/*.xlsx` files are Amazon-format exports
-(Parent/Child ASIN), not “competitor data”.
-
-Please use:
-  - `scripts/amazon_xlsx_to_shoplazza_xlsx.py`
-
-This wrapper simply forwards all CLI args to the correctly named script, so you
-automatically get the latest performance improvements (fast read/write).
-"""
-
-import sys
-from pathlib import Path
-
-# Allow running as `python scripts/xxx.py` without installing as a package
-sys.path.insert(0, str(Path(__file__).resolve().parent))
-
-from amazon_xlsx_to_shoplazza_xlsx import main as amazon_main
-
-
-if __name__ == "__main__":
-    amazon_main()
-
-
diff --git a/scripts/csv_to_excel.py b/scripts/csv_to_excel.py
deleted file mode 100755
index 5149eba..0000000
--- a/scripts/csv_to_excel.py
+++ /dev/null
@@ -1,302 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert CSV data to Excel import template.
-
-Reads CSV file (goods_with_pic.5years_congku.csv.shuf.1w) and generates Excel file
-based on the template format (商品导入模板.xlsx).
-
-Each CSV row corresponds to 1 SPU and 1 SKU, which will be exported as a single
-S (Single variant) row in the Excel template.
-"""
-
-import sys
-import os
-import csv
-import random
-import argparse
-import re
-from pathlib import Path
-from datetime import datetime, timedelta
-import pandas as pd
-from openpyxl import load_workbook
-from openpyxl.styles import Font, Alignment
-from openpyxl.utils import get_column_letter
-
-# Shared helpers (keeps template writing consistent across scripts)
-from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
-from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
-
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-
-def clean_value(value):
-    """
-    Clean and normalize value.
-    
-    Args:
-        value: Value to clean
-        
-    Returns:
-        Cleaned string value
-    """
-    if value is None:
-        return ''
-    value = str(value).strip()
-    # Remove surrounding quotes
-    if value.startswith('"') and value.endswith('"'):
-        value = value[1:-1]
-    return value
-
-
-def parse_csv_row(row: dict) -> dict:
-    """
-    Parse CSV row and extract fields.
-    
-    Args:
-        row: CSV row dictionary
-        
-    Returns:
-        Parsed data dictionary
-    """
-    return {
-        'skuId': clean_value(row.get('skuId', '')),
-        'name': clean_value(row.get('name', '')),
-        'name_pinyin': clean_value(row.get('name_pinyin', '')),
-        'create_time': clean_value(row.get('create_time', '')),
-        'ruSkuName': clean_value(row.get('ruSkuName', '')),
-        'enSpuName': clean_value(row.get('enSpuName', '')),
-        'categoryName': clean_value(row.get('categoryName', '')),
-        'supplierName': clean_value(row.get('supplierName', '')),
-        'brandName': clean_value(row.get('brandName', '')),
-        'file_id': clean_value(row.get('file_id', '')),
-        'days_since_last_update': clean_value(row.get('days_since_last_update', '')),
-        'id': clean_value(row.get('id', '')),
-        'imageUrl': clean_value(row.get('imageUrl', ''))
-    }
-
-
-def generate_handle(title: str) -> str:
-    """
-    Generate URL-friendly handle from title.
-    
-    Args:
-        title: Product title
-        
-    Returns:
-        URL-friendly handle (ASCII only)
-    """
-    # Keep backward-compatible function name while delegating to shared helper.
-    return _generate_handle_shared(title)
-
-
-def read_csv_file(csv_file: str) -> list:
-    """
-    Read CSV file and return list of parsed rows.
-    
-    Args:
-        csv_file: Path to CSV file
-        
-    Returns:
-        List of parsed CSV data dictionaries
-    """
-    csv_data_list = []
-    
-    with open(csv_file, 'r', encoding='utf-8') as f:
-        reader = csv.DictReader(f)
-        for row in reader:
-            parsed = parse_csv_row(row)
-            csv_data_list.append(parsed)
-    
-    return csv_data_list
-
-
-def csv_to_excel_row(csv_data: dict) -> dict:
-    """
-    Convert CSV data row to Excel template row.
-    
-    Each CSV row represents a single product with one variant (S type in Excel).
-    
-    Args:
-        csv_data: Parsed CSV row data
-        
-    Returns:
-        Dictionary mapping Excel column names to values
-    """
-    # Parse create_time
-    try:
-        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    except:
-        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    
-    # Generate title - use name or enSpuName
-    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
-    
-    # Generate handle - prefer enSpuName, then name_pinyin, then title
-    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
-    handle = generate_handle(handle_source)
-    if handle and not handle.startswith('products/'):
-        handle = f'products/{handle}'
-    
-    # Generate SEO fields
-    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
-    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
-    seo_keywords_parts = [title]
-    if csv_data['categoryName']:
-        seo_keywords_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        seo_keywords_parts.append(csv_data['brandName'])
-    seo_keywords = ','.join(seo_keywords_parts)
-    
-    # Generate tags from category and brand
-    tags_parts = []
-    if csv_data['categoryName']:
-        tags_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        tags_parts.append(csv_data['brandName'])
-    tags = ','.join(tags_parts) if tags_parts else ''
-    
-    # Generate prices (similar to import_tenant2_csv.py)
-    price = round(random.uniform(50, 500), 2)
-    compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
-    cost_price = round(price * 0.6, 2)
-    
-    # Generate random stock
-    inventory_quantity = random.randint(0, 100)
-    
-    # Generate random weight
-    weight = round(random.uniform(0.1, 5.0), 2)
-    weight_unit = 'kg'
-    
-    # Use ruSkuName as SKU title, fallback to name
-    sku_title = csv_data['ruSkuName'] or csv_data['name'] or 'SKU'
-    
-    # Use skuId as SKU code
-    sku_code = csv_data['skuId'] or ''
-    
-    # Generate barcode
-    try:
-        sku_id = int(csv_data['skuId'])
-        barcode = f"BAR{sku_id:08d}"
-    except:
-        barcode = ''
-    
-    # Build description
-    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
-    
-    # Build brief (subtitle)
-    brief = csv_data['name'] or ''
-    
-    # Excel row data (mapping to Excel template columns)
-    excel_row = {
-        '商品ID': '',  # Empty for new products
-        '创建时间': create_time_str,
-        '商品标题*': title,
-        '商品属性*': 'S',  # Single variant product
-        '商品副标题': brief,
-        '商品描述': description,
-        'SEO标题': seo_title,
-        'SEO描述': seo_description,
-        'SEO URL Handle': handle,
-        'SEO URL 重定向': 'N',  # Default to N
-        'SEO关键词': seo_keywords,
-        '商品上架': 'Y',  # Published by default
-        '需要物流': 'Y',  # Requires shipping
-        '商品收税': 'N',  # Not taxable by default
-        '商品spu': '',  # Empty
-        '启用虚拟销量': 'N',  # No fake sales
-        '虚拟销量值': '',  # Empty
-        '跟踪库存': 'Y',  # Track inventory
-        '库存规则*': '1',  # Allow purchase when stock is 0
-        '专辑名称': csv_data['categoryName'] or '',  # Category as album
-        '标签': tags,
-        '供应商名称': csv_data['supplierName'] or '',
-        '供应商URL': '',  # Empty
-        '款式1': '',  # Not used for S type
-        '款式2': '',  # Not used for S type
-        '款式3': '',  # Not used for S type
-        '商品售价*': price,
-        '商品原价': compare_at_price,
-        '成本价': cost_price,
-        '商品SKU': sku_code,
-        '商品重量': weight,
-        '重量单位': weight_unit,
-        '商品条形码': barcode,
-        '商品库存': inventory_quantity,
-        '尺寸信息': '',  # Empty
-        '原产地国别': '',  # Empty
-        'HS（协调制度）代码': '',  # Empty
-        '商品图片*': csv_data['imageUrl'] or '',  # Image URL
-        '商品备注': '',  # Empty
-        '款式备注': '',  # Empty
-        '商品主图': csv_data['imageUrl'] or '',  # Main image URL
-    }
-    
-    return excel_row
-
-
-def create_excel_from_template(template_file: str, output_file: str, csv_data_list: list):
-    """
-    Create Excel file from template and fill with CSV data.
-    
-    Args:
-        template_file: Path to Excel template file
-        output_file: Path to output Excel file
-        csv_data_list: List of parsed CSV data dictionaries
-    """
-    excel_rows = [csv_to_excel_row(d) for d in csv_data_list]
-    _create_excel_from_template_shared(template_file, output_file, excel_rows)
-    print(f"Excel file created: {output_file}")
-    print(f"  - Total rows: {len(csv_data_list)}")
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Convert CSV data to Excel import template')
-    parser.add_argument('--csv-file', 
-                       default='data/customer1/goods_with_pic.5years_congku.csv.shuf.1w',
-                       help='CSV file path (default: data/customer1/goods_with_pic.5years_congku.csv.shuf.1w)')
-    parser.add_argument('--template', 
-                       default='docs/商品导入模板.xlsx',
-                       help='Excel template file path (default: docs/商品导入模板.xlsx)')
-    parser.add_argument('--output', 
-                       default='商品导入数据.xlsx',
-                       help='Output Excel file path (default: 商品导入数据.xlsx)')
-    parser.add_argument('--limit', 
-                       type=int, 
-                       default=None,
-                       help='Limit number of rows to process (default: all)')
-    
-    args = parser.parse_args()
-    
-    # Check if files exist
-    if not os.path.exists(args.csv_file):
-        print(f"Error: CSV file not found: {args.csv_file}")
-        sys.exit(1)
-    
-    if not os.path.exists(args.template):
-        print(f"Error: Template file not found: {args.template}")
-        sys.exit(1)
-    
-    # Read CSV file
-    print(f"Reading CSV file: {args.csv_file}")
-    csv_data_list = read_csv_file(args.csv_file)
-    print(f"Read {len(csv_data_list)} rows from CSV")
-    
-    # Limit rows if specified
-    if args.limit:
-        csv_data_list = csv_data_list[:args.limit]
-        print(f"Limited to {len(csv_data_list)} rows")
-    
-    # Create Excel file
-    print(f"Creating Excel file from template: {args.template}")
-    print(f"Output file: {args.output}")
-    create_excel_from_template(args.template, args.output, csv_data_list)
-    
-    print(f"\nDone! Generated {len(csv_data_list)} product rows in Excel file.")
-
-
-if __name__ == '__main__':
-    main()
-
diff --git a/scripts/csv_to_excel_multi_variant.py b/scripts/csv_to_excel_multi_variant.py
deleted file mode 100755
index 4e91f59..0000000
--- a/scripts/csv_to_excel_multi_variant.py
+++ /dev/null
@@ -1,565 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert CSV data to Excel import template with multi-variant support.
-
-Reads CSV file (goods_with_pic.5years_congku.csv.shuf.1w) and generates Excel file
-based on the template format (商品导入模板.xlsx).
-
-Features:
-- 30% products as Single variant (S type)
-- 70% products as Multi variant (M+P type) with color, size, material options
-"""
-
-import sys
-import os
-import csv
-import random
-import argparse
-import re
-from pathlib import Path
-from datetime import datetime, timedelta
-import itertools
-from openpyxl import load_workbook
-from openpyxl.styles import Alignment
-
-# Shared helpers (keeps template writing consistent across scripts)
-from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
-from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared
-
-# Add parent directory to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-# Color definitions
-COLORS = [
-    "Red", "Blue", "Green", "Yellow", "Black", "White", "Orange", "Purple",
-    "Pink", "Brown", "Gray", "Navy", "Beige", "Cream", "Maroon", "Olive",
-    "Teal", "Cyan", "Magenta", "Lime", "Indigo", "Gold", "Silver", "Bronze",
-    "Coral", "Turquoise", "Violet", "Khaki", "Charcoal", "Ivory"
-]
-
-
-def clean_value(value):
-    """
-    Clean and normalize value.
-    
-    Args:
-        value: Value to clean
-        
-    Returns:
-        Cleaned string value
-    """
-    if value is None:
-        return ''
-    value = str(value).strip()
-    # Remove surrounding quotes
-    if value.startswith('"') and value.endswith('"'):
-        value = value[1:-1]
-    return value
-
-
-def parse_csv_row(row: dict) -> dict:
-    """
-    Parse CSV row and extract fields.
-    
-    Args:
-        row: CSV row dictionary
-        
-    Returns:
-        Parsed data dictionary
-    """
-    return {
-        'skuId': clean_value(row.get('skuId', '')),
-        'name': clean_value(row.get('name', '')),
-        'name_pinyin': clean_value(row.get('name_pinyin', '')),
-        'create_time': clean_value(row.get('create_time', '')),
-        'ruSkuName': clean_value(row.get('ruSkuName', '')),
-        'enSpuName': clean_value(row.get('enSpuName', '')),
-        'categoryName': clean_value(row.get('categoryName', '')),
-        'supplierName': clean_value(row.get('supplierName', '')),
-        'brandName': clean_value(row.get('brandName', '')),
-        'file_id': clean_value(row.get('file_id', '')),
-        'days_since_last_update': clean_value(row.get('days_since_last_update', '')),
-        'id': clean_value(row.get('id', '')),
-        'imageUrl': clean_value(row.get('imageUrl', ''))
-    }
-
-
-def generate_handle(title: str) -> str:
-    """
-    Generate URL-friendly handle from title.
-    
-    Args:
-        title: Product title
-        
-    Returns:
-        URL-friendly handle (ASCII only)
-    """
-    # Keep backward-compatible function name while delegating to shared helper.
-    return _generate_handle_shared(title)
-
-
-def extract_material_from_title(title: str) -> str:
-    """
-    Extract material from title by taking the last word after splitting by space.
-    
-    按照商品标题空格分割后的最后一个字符串作为material。
-    例如："消防套 塑料【英文包装】" -> 最后一个字符串是 "塑料【英文包装】"
-    
-    Args:
-        title: Product title
-        
-    Returns:
-        Material string (single value)
-    """
-    if not title:
-        return 'default'
-    
-    # Split by spaces (只按空格分割，保持原样)
-    parts = title.strip().split()
-    if parts:
-        # Get last part (最后一个字符串)
-        material = parts[-1]
-        # Remove brackets but keep content
-        material = re.sub(r'[【】\[\]()（）]', '', material)
-        material = material.strip()
-        if material:
-            return material
-    
-    return 'default'
-
-
-def generate_single_variant_row(csv_data: dict, base_sku_id: int = 1) -> dict:
-    """
-    Generate Excel row for Single variant (S type) product.
-    
-    Args:
-        csv_data: Parsed CSV row data
-        base_sku_id: Base SKU ID for generating SKU code
-        
-    Returns:
-        Dictionary mapping Excel column names to values
-    """
-    # Parse create_time
-    try:
-        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    except:
-        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    
-    # Generate title - use name or enSpuName
-    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
-    
-    # Generate handle - prefer enSpuName, then name_pinyin, then title
-    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
-    handle = generate_handle(handle_source)
-    if handle and not handle.startswith('products/'):
-        handle = f'products/{handle}'
-    
-    # Generate SEO fields
-    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
-    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
-    seo_keywords_parts = [title]
-    if csv_data['categoryName']:
-        seo_keywords_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        seo_keywords_parts.append(csv_data['brandName'])
-    seo_keywords = ','.join(seo_keywords_parts)
-    
-    # Generate tags from category and brand
-    tags_parts = []
-    if csv_data['categoryName']:
-        tags_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        tags_parts.append(csv_data['brandName'])
-    tags = ','.join(tags_parts) if tags_parts else ''
-    
-    # Generate prices
-    price = round(random.uniform(50, 500), 2)
-    compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
-    cost_price = round(price * 0.6, 2)
-    
-    # Generate random stock
-    inventory_quantity = random.randint(0, 100)
-    
-    # Generate random weight
-    weight = round(random.uniform(0.1, 5.0), 2)
-    weight_unit = 'kg'
-    
-    # Use skuId as SKU code
-    sku_code = csv_data['skuId'] or f'SKU-{base_sku_id}'
-    
-    # Generate barcode
-    try:
-        sku_id = int(csv_data['skuId']) if csv_data['skuId'] else base_sku_id
-        barcode = f"BAR{sku_id:08d}"
-    except:
-        barcode = f"BAR{base_sku_id:08d}"
-    
-    # Build description
-    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
-    
-    # Build brief (subtitle)
-    brief = csv_data['name'] or ''
-    
-    # Excel row data
-    excel_row = {
-        '商品ID': '',  # Empty for new products
-        '创建时间': create_time_str,
-        '商品标题*': title,
-        '商品属性*': 'S',  # Single variant product
-        '商品副标题': brief,
-        '商品描述': description,
-        'SEO标题': seo_title,
-        'SEO描述': seo_description,
-        'SEO URL Handle': handle,
-        'SEO URL 重定向': 'N',
-        'SEO关键词': seo_keywords,
-        '商品上架': 'Y',
-        '需要物流': 'Y',
-        '商品收税': 'N',
-        '商品spu': '',
-        '启用虚拟销量': 'N',
-        '虚拟销量值': '',
-        '跟踪库存': 'Y',
-        '库存规则*': '1',
-        '专辑名称': csv_data['categoryName'] or '',
-        '标签': tags,
-        '供应商名称': csv_data['supplierName'] or '',
-        '供应商URL': '',
-        '款式1': '',  # Empty for S type
-        '款式2': '',  # Empty for S type
-        '款式3': '',  # Empty for S type
-        '商品售价*': price,
-        '商品原价': compare_at_price,
-        '成本价': cost_price,
-        '商品SKU': sku_code,
-        '商品重量': weight,
-        '重量单位': weight_unit,
-        '商品条形码': barcode,
-        '商品库存': inventory_quantity,
-        '尺寸信息': '',
-        '原产地国别': '',
-        'HS（协调制度）代码': '',
-        '商品图片*': csv_data['imageUrl'] or '',
-        '商品备注': '',
-        '款式备注': '',
-        '商品主图': csv_data['imageUrl'] or '',
-    }
-    
-    return excel_row
-
-
-def generate_multi_variant_rows(csv_data: dict, base_sku_id: int = 1) -> list:
-    """
-    Generate Excel rows for Multi variant (M+P type) product.
-    
-    Returns a list of rows:
-    - First row: M (主商品) with option names
-    - Following rows: P (子款式) with option values
-    
-    Args:
-        csv_data: Parsed CSV row data
-        base_sku_id: Base SKU ID for generating SKU codes
-        
-    Returns:
-        List of dictionaries mapping Excel column names to values
-    """
-    rows = []
-    
-    # Parse create_time
-    try:
-        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    except:
-        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
-        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
-    
-    # Generate title
-    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
-    
-    # Generate handle
-    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
-    handle = generate_handle(handle_source)
-    if handle and not handle.startswith('products/'):
-        handle = f'products/{handle}'
-    
-    # Generate SEO fields
-    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
-    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
-    seo_keywords_parts = [title]
-    if csv_data['categoryName']:
-        seo_keywords_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        seo_keywords_parts.append(csv_data['brandName'])
-    seo_keywords = ','.join(seo_keywords_parts)
-    
-    # Generate tags
-    tags_parts = []
-    if csv_data['categoryName']:
-        tags_parts.append(csv_data['categoryName'])
-    if csv_data['brandName']:
-        tags_parts.append(csv_data['brandName'])
-    tags = ','.join(tags_parts) if tags_parts else ''
-    
-    # Extract material from title (last word after splitting by space)
-    material = extract_material_from_title(title)
-    
-    # Generate color options: randomly select 2-10 colors from COLORS list
-    num_colors = random.randint(2, 10)
-    selected_colors = random.sample(COLORS, min(num_colors, len(COLORS)))
-    
-    # Generate size options: 1-30, randomly select 4-8
-    num_sizes = random.randint(4, 8)
-    all_sizes = [str(i) for i in range(1, 31)]
-    selected_sizes = random.sample(all_sizes, num_sizes)
-    
-    # Material has only one value
-    materials = [material]
-    
-    # Generate all combinations (Cartesian product)
-    variants = list(itertools.product(selected_colors, selected_sizes, materials))
-    
-    # Generate M row (主商品)
-    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
-    brief = csv_data['name'] or ''
-    
-    m_row = {
-        '商品ID': '',
-        '创建时间': create_time_str,
-        '商品标题*': title,
-        '商品属性*': 'M',  # Main product
-        '商品副标题': brief,
-        '商品描述': description,
-        'SEO标题': seo_title,
-        'SEO描述': seo_description,
-        'SEO URL Handle': handle,
-        'SEO URL 重定向': 'N',
-        'SEO关键词': seo_keywords,
-        '商品上架': 'Y',
-        '需要物流': 'Y',
-        '商品收税': 'N',
-        '商品spu': '',
-        '启用虚拟销量': 'N',
-        '虚拟销量值': '',
-        '跟踪库存': 'Y',
-        '库存规则*': '1',
-        '专辑名称': csv_data['categoryName'] or '',
-        '标签': tags,
-        '供应商名称': csv_data['supplierName'] or '',
-        '供应商URL': '',
-        '款式1': 'color',  # Option name
-        '款式2': 'size',   # Option name
-        '款式3': 'material',  # Option name
-        '商品售价*': '',  # Empty for M row
-        '商品原价': '',
-        '成本价': '',
-        '商品SKU': '',  # Empty for M row
-        '商品重量': '',
-        '重量单位': '',
-        '商品条形码': '',
-        '商品库存': '',  # Empty for M row
-        '尺寸信息': '',
-        '原产地国别': '',
-        'HS（协调制度）代码': '',
-        '商品图片*': csv_data['imageUrl'] or '',  # Main product image
-        '商品备注': '',
-        '款式备注': '',
-        '商品主图': csv_data['imageUrl'] or '',
-    }
-    rows.append(m_row)
-    
-    # Generate P rows (子款式) for each variant combination
-    base_price = round(random.uniform(50, 500), 2)
-    
-    for variant_idx, (color, size, mat) in enumerate(variants):
-        # Generate price variation (within ±20% of base)
-        price = round(base_price * random.uniform(0.8, 1.2), 2)
-        compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
-        cost_price = round(price * 0.6, 2)
-        
-        # Generate random stock
-        inventory_quantity = random.randint(0, 100)
-        
-        # Generate random weight
-        weight = round(random.uniform(0.1, 5.0), 2)
-        weight_unit = 'kg'
-        
-        # Generate SKU code
-        sku_code = f"{csv_data['skuId']}-{color}-{size}-{mat}" if csv_data['skuId'] else f'SKU-{base_sku_id}-{variant_idx+1}'
-        
-        # Generate barcode
-        barcode = f"BAR{base_sku_id:08d}{variant_idx+1:03d}"
-        
-        p_row = {
-            '商品ID': '',
-            '创建时间': create_time_str,
-            '商品标题*': title,  # Same as M row
-            '商品属性*': 'P',  # Variant
-            '商品副标题': '',  # Empty for P row
-            '商品描述': '',  # Empty for P row
-            'SEO标题': '',  # Empty for P row
-            'SEO描述': '',  # Empty for P row
-            'SEO URL Handle': '',  # Empty for P row
-            'SEO URL 重定向': '',
-            'SEO关键词': '',
-            '商品上架': 'Y',
-            '需要物流': 'Y',
-            '商品收税': 'N',
-            '商品spu': '',
-            '启用虚拟销量': 'N',
-            '虚拟销量值': '',
-            '跟踪库存': 'Y',
-            '库存规则*': '1',
-            '专辑名称': '',  # Empty for P row
-            '标签': '',  # Empty for P row
-            '供应商名称': '',  # Empty for P row
-            '供应商URL': '',
-            '款式1': color,  # Option value
-            '款式2': size,   # Option value
-            '款式3': mat,    # Option value
-            '商品售价*': price,
-            '商品原价': compare_at_price,
-            '成本价': cost_price,
-            '商品SKU': sku_code,
-            '商品重量': weight,
-            '重量单位': weight_unit,
-            '商品条形码': barcode,
-            '商品库存': inventory_quantity,
-            '尺寸信息': '',
-            '原产地国别': '',
-            'HS（协调制度）代码': '',
-            '商品图片*': '',  # Empty for P row (uses main product image)
-            '商品备注': '',
-            '款式备注': '',
-            '商品主图': '',
-        }
-        rows.append(p_row)
-    
-    return rows
-
-
-def read_csv_file(csv_file: str) -> list:
-    """
-    Read CSV file and return list of parsed rows.
-    
-    Args:
-        csv_file: Path to CSV file
-        
-    Returns:
-        List of parsed CSV data dictionaries
-    """
-    csv_data_list = []
-    
-    with open(csv_file, 'r', encoding='utf-8') as f:
-        reader = csv.DictReader(f)
-        for row in reader:
-            parsed = parse_csv_row(row)
-            csv_data_list.append(parsed)
-    
-    return csv_data_list
-
-
-def create_excel_from_template(template_file: str, output_file: str, excel_rows: list):
-    """
-    Create Excel file from template and fill with data rows.
-    
-    Args:
-        template_file: Path to Excel template file
-        output_file: Path to output Excel file
-        excel_rows: List of dictionaries mapping Excel column names to values
-    """
-    _create_excel_from_template_shared(template_file, output_file, excel_rows)
-    print(f"Excel file created: {output_file}")
-    print(f"  - Total rows: {len(excel_rows)}")
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Convert CSV data to Excel import template with multi-variant support')
-    parser.add_argument('--csv-file', 
-                       default='data/customer1/goods_with_pic.5years_congku.csv.shuf.1w',
-                       help='CSV file path')
-    parser.add_argument('--template', 
-                       default='docs/商品导入模板.xlsx',
-                       help='Excel template file path')
-    parser.add_argument('--output', 
-                       default='商品导入数据.xlsx',
-                       help='Output Excel file path')
-    parser.add_argument('--limit', 
-                       type=int, 
-                       default=None,
-                       help='Limit number of products to process')
-    parser.add_argument('--single-ratio', 
-                       type=float, 
-                       default=0.3,
-                       help='Ratio of single variant products (default: 0.3 = 30%%)')
-    parser.add_argument('--seed', 
-                       type=int, 
-                       default=None,
-                       help='Random seed for reproducible results')
-    
-    args = parser.parse_args()
-    
-    # Set random seed if provided
-    if args.seed is not None:
-        random.seed(args.seed)
-    
-    # Check if files exist
-    if not os.path.exists(args.csv_file):
-        print(f"Error: CSV file not found: {args.csv_file}")
-        sys.exit(1)
-    
-    if not os.path.exists(args.template):
-        print(f"Error: Template file not found: {args.template}")
-        sys.exit(1)
-    
-    # Read CSV file
-    print(f"Reading CSV file: {args.csv_file}")
-    csv_data_list = read_csv_file(args.csv_file)
-    print(f"Read {len(csv_data_list)} rows from CSV")
-    
-    # Limit products if specified
-    if args.limit:
-        csv_data_list = csv_data_list[:args.limit]
-        print(f"Limited to {len(csv_data_list)} products")
-    
-    # Generate Excel rows
-    print(f"\nGenerating Excel rows...")
-    print(f"  - Single variant ratio: {args.single_ratio*100:.0f}%")
-    print(f"  - Multi variant ratio: {(1-args.single_ratio)*100:.0f}%")
-    
-    excel_rows = []
-    single_count = 0
-    multi_count = 0
-    
-    for idx, csv_data in enumerate(csv_data_list):
-        # Decide if this product should be single or multi variant
-        is_single = random.random() < args.single_ratio
-        
-        if is_single:
-            # Generate single variant (S type)
-            row = generate_single_variant_row(csv_data, base_sku_id=idx+1)
-            excel_rows.append(row)
-            single_count += 1
-        else:
-            # Generate multi variant (M+P type)
-            rows = generate_multi_variant_rows(csv_data, base_sku_id=idx+1)
-            excel_rows.extend(rows)
-            multi_count += 1
-    
-    print(f"\nGenerated:")
-    print(f"  - Single variant products: {single_count}")
-    print(f"  - Multi variant products: {multi_count}")
-    print(f"  - Total Excel rows: {len(excel_rows)}")
-    
-    # Create Excel file
-    print(f"\nCreating Excel file from template: {args.template}")
-    print(f"Output file: {args.output}")
-    create_excel_from_template(args.template, args.output, excel_rows)
-    
-    print(f"\nDone! Generated {len(excel_rows)} rows in Excel file.")
-
-
-if __name__ == '__main__':
-    main()
-
diff --git a/scripts/daily_log_router.sh b/scripts/daily_log_router.sh
deleted file mode 100755
index 5c9ddda..0000000
--- a/scripts/daily_log_router.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-#
-# Route incoming log stream into per-day files.
-#
-# Usage:
-#   command 2>&1 | ./scripts/daily_log_router.sh <service> <log_dir> [retention_days]
-#
-
-set -euo pipefail
-
-if [ "$#" -lt 2 ]; then
-  echo "Usage: $0 <service> <log_dir> [retention_days]" >&2
-  exit 1
-fi
-
-SERVICE_NAME="$1"
-LOG_DIR="$2"
-RETENTION_DAYS="${3:-30}"
-
-mkdir -p "${LOG_DIR}"
-
-awk -v dir="${LOG_DIR}" -v service="${SERVICE_NAME}" -v retention_days="${RETENTION_DAYS}" '
-function rotate_file(day) {
-  return sprintf("%s/%s-%s.log", dir, service, day)
-}
-
-function update_symlink(day) {
-  cmd = sprintf("ln -sfn \"%s-%s.log\" \"%s/%s.log\"", service, day, dir, service)
-  system(cmd)
-}
-
-function cleanup_old_logs() {
-  cmd = sprintf("find \"%s\" -maxdepth 1 -type f -name \"%s-*.log\" -mtime +%d -delete >/dev/null 2>&1", dir, service, retention_days)
-  system(cmd)
-}
-
-{
-  day = strftime("%Y-%m-%d")
-  target = rotate_file(day)
-
-  if (target != current_target) {
-    update_symlink(day)
-    cleanup_old_logs()
-    current_target = target
-  }
-
-  print >> current_target
-  fflush(current_target)
-}
-
-END {
-  if (current_target != "") {
-    close(current_target)
-  }
-}
-'
diff --git a/scripts/data_import/README.md b/scripts/data_import/README.md
new file mode 100644
index 0000000..98b831e
--- /dev/null
+++ b/scripts/data_import/README.md
@@ -0,0 +1,13 @@
+# Data Import Scripts
+
+这一组脚本用于把外部商品数据或 CSV/XLSX 样本转换为 Shoplazza 导入格式。
+
+- `amazon_xlsx_to_shoplazza_xlsx.py`
+- `competitor_xlsx_to_shoplazza_xlsx.py`
+- `csv_to_excel.py`
+- `csv_to_excel_multi_variant.py`
+- `shoplazza_excel_template.py`
+- `shoplazza_import_template.py`
+- `tenant3_csv_to_shoplazza_xlsx.sh`
+
+这里是离线数据转换工具，不属于线上服务运维入口。
diff --git a/scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py b/scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py
new file mode 100644
index 0000000..f9a80c5
--- /dev/null
+++ b/scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py
@@ -0,0 +1,615 @@
+#!/usr/bin/env python3
+"""
+Convert Amazon-format Excel exports (with Parent/Child ASIN structure) into
+Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`.
+
+Data source:
+- Directory with multiple `*.xlsx` files under `products_data/`.
+- Each file contains a main sheet + "Notes" sheet.
+- Column meanings (sample):
+  - ASIN: variant id (sku_id)
+  - 父ASIN: parent product id (spu_id)
+
+Output:
+- For each 父ASIN group:
+  - If only 1 ASIN: generate one "S" row
+  - Else: generate one "M" row + multiple "P" rows
+
+Multi-variant (M/P) key point:
+- Variant dimensions are parsed primarily from the `SKU` column, e.g.
+  "Size: One Size | Color: Black", and mapped into 款式1/2/3.
+"""
+
+# NOTE: This file is intentionally the same implementation as
+# `competitor_xlsx_to_shoplazza_xlsx.py`, but renamed to reflect the correct
+# data source (Amazon-format exports). Keep the logic in sync.
+
+import os
+import re
+import sys
+import argparse
+import random
+from datetime import datetime
+from collections import defaultdict, Counter
+from pathlib import Path
+
+from openpyxl import load_workbook
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.data_import.shoplazza_excel_template import create_excel_from_template_fast
+
+
+PREFERRED_OPTION_KEYS = [
+    "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent",
+    "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length",
+    "Width", "Height", "Model", "Configuration",
+]
+
+
+def clean_str(v):
+    if v is None:
+        return ""
+    return str(v).strip()
+
+
+def html_escape(s):
+    s = clean_str(s)
+    return (s.replace("&", "&amp;")
+             .replace("<", "&lt;")
+             .replace(">", "&gt;"))
+
+
+def generate_handle(title):
+    """
+    Generate URL-friendly handle from title (ASCII only).
+    Keep consistent with existing scripts.
+    """
+    handle = clean_str(title).lower()
+    handle = re.sub(r"[^a-z0-9\\s-]", "", handle)
+    handle = re.sub(r"[-\\s]+", "-", handle).strip("-")
+    if len(handle) > 255:
+        handle = handle[:255]
+    return handle or "product"
+
+
+def parse_date_to_template(dt_value):
+    """
+    Template expects: YYYY-MM-DD HH:MM:SS
+    Input could be "2018-05-09" or datetime/date.
+    """
+    if dt_value is None or dt_value == "":
+        return ""
+    if isinstance(dt_value, datetime):
+        return dt_value.strftime("%Y-%m-%d %H:%M:%S")
+    s = clean_str(dt_value)
+    for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
+        try:
+            d = datetime.strptime(s, fmt)
+            return d.strftime("%Y-%m-%d %H:%M:%S")
+        except Exception:
+            pass
+    return ""
+
+
+def parse_weight(weight_conv, weight_raw):
+    """
+    Return (weight_value, unit) where unit in {kg, lb, g, oz}.
+    Prefer '商品重量（单位换算）' like '68.04 g'.
+    Fallback to '商品重量' like '0.15 pounds'.
+    """
+    s = clean_str(weight_conv) or clean_str(weight_raw)
+    if not s:
+        return ("", "")
+    m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s)
+    if not m:
+        return ("", "")
+    val = float(m.group(1))
+    unit = m.group(2).lower()
+    if unit in ("g", "gram", "grams"):
+        return (val, "g")
+    if unit in ("kg", "kilogram", "kilograms"):
+        return (val, "kg")
+    if unit in ("lb", "lbs", "pound", "pounds"):
+        return (val, "lb")
+    if unit in ("oz", "ounce", "ounces"):
+        return (val, "oz")
+    return ("", "")
+
+
+def parse_dimensions_inches(dim_raw):
+    """
+    Template '尺寸信息': 'L,W,H' in inches.
+    Input example: '7.9 x 7.9 x 2 inches'
+    """
+    s = clean_str(dim_raw)
+    if not s:
+        return ""
+    nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s)
+    if len(nums) < 3:
+        return ""
+    return "{},{},{}".format(nums[0], nums[1], nums[2])
+
+
+def parse_sku_options(sku_text):
+    """
+    Parse 'SKU' column into {key: value}.
+    Example:
+      'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'}
+    """
+    s = clean_str(sku_text)
+    if not s:
+        return {}
+    parts = [p.strip() for p in s.split("|") if p.strip()]
+    out = {}
+    for p in parts:
+        if ":" not in p:
+            continue
+        k, v = p.split(":", 1)
+        k = clean_str(k)
+        v = clean_str(v)
+        if k and v:
+            out[k] = v
+    return out
+
+
+def choose_option_keys(variant_dicts, max_keys=3):
+    freq = Counter()
+    for d in variant_dicts:
+        for k, v in d.items():
+            if v:
+                freq[k] += 1
+    if not freq:
+        return []
+    preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)}
+
+    def key_sort(k):
+        return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower())
+
+    keys = sorted(freq.keys(), key=key_sort)
+    return keys[:max_keys]
+
+
+def build_description_html(title, details, product_url):
+    parts = []
+    if title:
+        parts.append("<p>{}</p>".format(html_escape(title)))
+    detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()]
+    if detail_items:
+        li = "".join(["<li>{}</li>".format(html_escape(x)) for x in detail_items[:30]])
+        parts.append("<ul>{}</ul>".format(li))
+    if product_url:
+        parts.append('<p>Source: <a href="{0}">{0}</a></p>'.format(html_escape(product_url)))
+    return "".join(parts)
+
+
+def read_amazon_rows_from_file(xlsx_path, max_rows=None):
+    wb = load_workbook(xlsx_path, read_only=True, data_only=True)
+    sheet_name = None
+    for name in wb.sheetnames:
+        if str(name).lower() == "notes":
+            continue
+        sheet_name = name
+        break
+    if sheet_name is None:
+        return []
+    ws = wb[sheet_name]
+
+    # Build header index from first row
+    header = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
+    idx = {clean_str(v): i for i, v in enumerate(header) if v is not None and clean_str(v)}
+
+    required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)",
+                "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接",
+                "商品重量（单位换算）", "商品重量", "商品尺寸"]
+    for k in required:
+        if k not in idx:
+            raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name))
+
+    # OPT: use iter_rows(values_only=True) instead of ws.cell() per field.
+    # openpyxl cell access is relatively expensive; values_only is much faster.
+    pos = {k: idx[k] for k in required}  # 0-based positions in row tuple
+
+    rows = []
+    end_row = ws.max_row
+    if max_rows is not None:
+        end_row = min(end_row, 1 + int(max_rows))
+
+    for tup in ws.iter_rows(min_row=2, max_row=end_row, values_only=True):
+        asin = clean_str(tup[pos["ASIN"]])
+        if not asin:
+            continue
+        parent = clean_str(tup[pos["父ASIN"]]) or asin
+        rows.append({
+            "ASIN": asin,
+            "父ASIN": parent,
+            "SKU": clean_str(tup[pos["SKU"]]),
+            "详细参数": clean_str(tup[pos["详细参数"]]),
+            "商品标题": clean_str(tup[pos["商品标题"]]),
+            "商品主图": clean_str(tup[pos["商品主图"]]),
+            "价格($)": tup[pos["价格($)"]],
+            "prime价格($)": tup[pos["prime价格($)"]],
+            "上架时间": clean_str(tup[pos["上架时间"]]),
+            "类目路径": clean_str(tup[pos["类目路径"]]),
+            "大类目": clean_str(tup[pos["大类目"]]),
+            "小类目": clean_str(tup[pos["小类目"]]),
+            "品牌": clean_str(tup[pos["品牌"]]),
+            "品牌链接": clean_str(tup[pos["品牌链接"]]),
+            "商品详情页链接": clean_str(tup[pos["商品详情页链接"]]),
+            "商品重量（单位换算）": clean_str(tup[pos["商品重量（单位换算）"]]),
+            "商品重量": clean_str(tup[pos["商品重量"]]),
+            "商品尺寸": clean_str(tup[pos["商品尺寸"]]),
+        })
+    return rows
+
+
+def to_price(v):
+    if v is None or v == "":
+        return None
+    try:
+        return float(v)
+    except Exception:
+        s = clean_str(v)
+        m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s)
+        return float(m.group(1)) if m else None
+
+
+def build_common_fields(base_row, spu_id):
+    title = base_row.get("商品标题") or "Product"
+    brand = base_row.get("品牌") or ""
+    big_cat = base_row.get("大类目") or ""
+    small_cat = base_row.get("小类目") or ""
+    cat_path = base_row.get("类目路径") or ""
+
+    handle = generate_handle(title)
+    if handle and not handle.startswith("products/"):
+        handle = "products/{}".format(handle)
+
+    seo_title = title
+    seo_desc_parts = [x for x in [brand, title, big_cat] if x]
+    seo_description = " ".join(seo_desc_parts)[:5000]
+    seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x])[:5000]
+    tags = ",".join([x for x in [brand, big_cat, small_cat] if x])
+
+    created_at = parse_date_to_template(base_row.get("上架时间"))
+    description = build_description_html(title, base_row.get("详细参数"), base_row.get("商品详情页链接"))
+
+    inventory_qty = 100
+    weight_val, weight_unit = parse_weight(base_row.get("商品重量（单位换算）"), base_row.get("商品重量"))
+    size_info = parse_dimensions_inches(base_row.get("商品尺寸"))
+
+    album = big_cat or (cat_path.split(":")[0] if cat_path else "")
+
+    return {
+        "商品ID": "",
+        "创建时间": created_at,
+        "商品标题*": title[:255],
+        "商品副标题": "{} {}".format(brand, big_cat).strip()[:600],
+        "商品描述": description,
+        "SEO标题": seo_title[:5000],
+        "SEO描述": seo_description,
+        "SEO URL Handle": handle,
+        "SEO URL 重定向": "N",
+        "SEO关键词": seo_keywords,
+        "商品上架": "Y",
+        "需要物流": "Y",
+        "商品收税": "N",
+        "商品spu": spu_id[:100],
+        "启用虚拟销量": "N",
+        "虚拟销量值": "",
+        "跟踪库存": "Y",
+        "库存规则*": "1",
+        "专辑名称": album,
+        "标签": tags,
+        "供应商名称": "Amazon",
+        "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "",
+        "商品重量": weight_val if weight_val != "" else "",
+        "重量单位": weight_unit,
+        "商品库存": inventory_qty,
+        "尺寸信息": size_info,
+        "原产地国别": "",
+        "HS（协调制度）代码": "",
+        "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format(
+            base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "")
+        )[:500],
+        "款式备注": "",
+    }
+
+
+def build_s_row(base_row):
+    spu_id = base_row.get("父ASIN") or base_row.get("ASIN")
+    common = build_common_fields(base_row, spu_id=spu_id)
+    price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99
+    image = base_row.get("商品主图") or ""
+    row = {}
+    row.update(common)
+    row.update({
+        "商品属性*": "S",
+        "款式1": "",
+        "款式2": "",
+        "款式3": "",
+        "商品售价*": price,
+        "商品原价": price,
+        "成本价": "",
+        "商品SKU": base_row.get("ASIN") or "",
+        "商品条形码": "",
+        "商品图片*": image,
+        "商品主图": image,
+    })
+    return row
+
+
+def build_m_p_rows(variant_rows):
+    base = variant_rows[0]
+    spu_id = base.get("父ASIN") or base.get("ASIN")
+    common = build_common_fields(base, spu_id=spu_id)
+
+    option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows]
+    option_keys = choose_option_keys(option_dicts, max_keys=3) or ["Variant"]
+
+    m = {}
+    m.update(common)
+    m.update({
+        "商品属性*": "M",
+        "款式1": option_keys[0] if len(option_keys) > 0 else "",
+        "款式2": option_keys[1] if len(option_keys) > 1 else "",
+        "款式3": option_keys[2] if len(option_keys) > 2 else "",
+        "商品售价*": "",
+        "商品原价": "",
+        "成本价": "",
+        "商品SKU": "",
+        "商品条形码": "",
+        "商品图片*": base.get("商品主图") or "",
+        "商品主图": base.get("商品主图") or "",
+    })
+    m["商品重量"] = ""
+    m["重量单位"] = ""
+    m["商品库存"] = ""
+    m["尺寸信息"] = ""
+
+    rows = [m]
+
+    for v in variant_rows:
+        v_common = build_common_fields(v, spu_id=spu_id)
+        v_common.update({
+            "商品副标题": "",
+            "商品描述": "",
+            "SEO标题": "",
+            "SEO描述": "",
+            "SEO URL Handle": "",
+            "SEO URL 重定向": "",
+            "SEO关键词": "",
+            "专辑名称": "",
+            "标签": "",
+            "供应商名称": "",
+            "供应商URL": "",
+            "商品备注": "",
+        })
+
+        opt = parse_sku_options(v.get("SKU"))
+        opt_vals = [v.get("ASIN")] if option_keys == ["Variant"] else [opt.get(k, "") for k in option_keys]
+
+        price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99
+        image = v.get("商品主图") or ""
+
+        p = {}
+        p.update(v_common)
+        p.update({
+            "商品属性*": "P",
+            "款式1": opt_vals[0] if len(opt_vals) > 0 else "",
+            "款式2": opt_vals[1] if len(opt_vals) > 1 else "",
+            "款式3": opt_vals[2] if len(opt_vals) > 2 else "",
+            "商品售价*": price,
+            "商品原价": price,
+            "成本价": "",
+            "商品SKU": v.get("ASIN") or "",
+            "商品条形码": "",
+            "商品图片*": image,
+            "商品主图": "",
+        })
+        rows.append(p)
+
+    return rows
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert Amazon-format xlsx files to Shoplazza import xlsx")
+    parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing Amazon-format xlsx files")
+    parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx")
+    parser.add_argument("--output", default="amazon_shoplazza_import.xlsx", help="Output xlsx file path (or prefix when split)")
+    parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)")
+    parser.add_argument("--max-rows-per-output", type=int, default=40000, help="Max total Excel rows per output file (including模板头部行，默认40000)")
+    parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)")
+    # 默认行为：丢弃不符合要求的数据
+    parser.add_argument("--keep-spu-if-parent-missing", action="store_false", dest="skip_spu_if_parent_missing", default=False, help="Keep SPU even if parent ASIN not found in variants (default: skip entire SPU)")
+    parser.add_argument("--fix-sku-if-title-mismatch", action="store_false", dest="skip_sku_if_title_mismatch", default=False, help="Fix SKU title to match parent instead of skipping (default: skip SKU with mismatched title)")
+    args = parser.parse_args()
+
+    if not os.path.isdir(args.input_dir):
+        raise RuntimeError("input-dir not found: {}".format(args.input_dir))
+    if not os.path.exists(args.template):
+        raise RuntimeError("template not found: {}".format(args.template))
+
+    files = [os.path.join(args.input_dir, f) for f in os.listdir(args.input_dir) if f.lower().endswith(".xlsx")]
+    files.sort()
+    if args.max_files is not None:
+        files = files[: int(args.max_files)]
+
+    print("Reading Amazon-format files: {} (from {})".format(len(files), args.input_dir), flush=True)
+
+    groups = defaultdict(list)
+    seen_asin = set()
+
+    for fp in files:
+        print("  - loading: {}".format(fp), flush=True)
+        try:
+            rows = read_amazon_rows_from_file(fp)
+        except Exception as e:
+            print("WARN: failed to read {}: {}".format(fp, e))
+            continue
+        print("    loaded rows: {}".format(len(rows)), flush=True)
+
+        for r in rows:
+            asin = r.get("ASIN")
+            if asin in seen_asin:
+                continue
+            seen_asin.add(asin)
+            spu_id = r.get("父ASIN") or asin
+            groups[spu_id].append(r)
+
+    print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True)
+
+    # 先按 SPU 构造每个组的行，方便做“按最大行数拆分但不拆组”
+    group_rows_list = []  # List[List[dict]]
+    spu_count = 0
+    next_product_id = 1  # 用于填充商品ID，全局自增
+    # 将SPU顺序打乱，避免过于依赖输入文件的顺序
+    spu_items = list(groups.items())
+    random.shuffle(spu_items)
+
+    for spu_id, variants in spu_items:
+        if not variants:
+            continue
+
+        # 确保父ASIN对应的变体在列表最前面
+        parent_variant = None
+        other_variants = []
+        for v in variants:
+            if v.get("ASIN") == spu_id:
+                parent_variant = v
+            else:
+                other_variants.append(v)
+        
+        # 重新排序：父ASIN在前，其他在后
+        if parent_variant:
+            variants = [parent_variant] + other_variants
+        else:
+            # 如果找不到父ASIN对应的变体
+            print(
+                f"WARN: Parent ASIN not found in variants: SPU={spu_id}, "
+                f"variant_count={len(variants)}, first_ASIN={variants[0].get('ASIN') if variants else 'N/A'}",
+                flush=True,
+            )
+            # 根据开关决定是否丢弃整个SPU
+            if args.skip_spu_if_parent_missing:
+                print(
+                    f"SKIP entire SPU due to missing parent ASIN: SPU={spu_id}",
+                    flush=True,
+                )
+                continue
+
+        # 处理变体标题：如果与主商品不一致，根据开关决定修正或丢弃
+        main_title = variants[0].get("商品标题") or ""
+        filtered_variants = []
+        for v in variants:
+            title = v.get("商品标题") or ""
+            if main_title and title and title != main_title:
+                if args.skip_sku_if_title_mismatch:
+                    # 丢弃标题不一致的SKU
+                    print(
+                        f"SKIP SKU due to title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
+                        f"main_title='{main_title}', variant_title='{title}'",
+                        flush=True,
+                    )
+                    continue
+                else:
+                    # 修正标题
+                    print(
+                        f"FIX variant title mismatch: SPU={spu_id}, ASIN={v.get('ASIN')}, "
+                        f"main_title='{main_title}', variant_title='{title}' -> using main_title",
+                        flush=True,
+                    )
+                    v["商品标题"] = main_title  # 统一为主商品标题
+            filtered_variants.append(v)
+        
+        # 如果所有变体都被过滤掉，跳过整个SPU
+        if not filtered_variants:
+            print(
+                f"SKIP entire SPU: all variants filtered out, SPU={spu_id}",
+                flush=True,
+            )
+            continue
+        
+        variants = filtered_variants
+
+        spu_count += 1
+        if args.max_products is not None and spu_count > int(args.max_products):
+            break
+
+        if len(variants) == 1:
+            rows = [build_s_row(variants[0])]
+        else:
+            rows = build_m_p_rows(variants)
+
+        # 填充商品ID（从1开始全局递增）
+        for r in rows:
+            r["商品ID"] = next_product_id
+            next_product_id += 1
+
+        group_rows_list.append(rows)
+
+    # 按最大行数拆成多个文件（注意：同一 SPU 不拆分）
+    data_start_row = 4  # 与模板/写入工具保持一致
+    header_rows = data_start_row - 1  # 包含标题行+说明行
+    max_total_rows = args.max_rows_per_output or 0
+    if max_total_rows and max_total_rows > header_rows:
+        max_data_rows = max_total_rows - header_rows
+    else:
+        max_data_rows = None  # 不限制
+
+    chunks = []
+    current_chunk = []
+    current_count = 0
+
+    if max_data_rows is None:
+        # 不做分片，直接一个 chunk
+        for gr in group_rows_list:
+            current_chunk.extend(gr)
+        if current_chunk:
+            chunks.append(current_chunk)
+    else:
+        for gr in group_rows_list:
+            gsize = len(gr)
+            # 如果单个 SPU 本身就超过阈值，只能独占一个文件
+            if gsize > max_data_rows:
+                if current_chunk:
+                    chunks.append(current_chunk)
+                    current_chunk = []
+                    current_count = 0
+                chunks.append(gr)
+                continue
+            # 如果放不下当前 chunk，则先封一个，再开新 chunk
+            if current_count + gsize > max_data_rows:
+                if current_chunk:
+                    chunks.append(current_chunk)
+                current_chunk = list(gr)
+                current_count = gsize
+            else:
+                current_chunk.extend(gr)
+                current_count += gsize
+        if current_chunk:
+            chunks.append(current_chunk)
+
+    total_rows = sum(len(c) for c in chunks)
+    print(
+        "Generated Excel data rows: {} (SPU groups output: {}, files: {})".format(
+            total_rows, len(group_rows_list), len(chunks)
+        ),
+        flush=True,
+    )
+
+    # 输出多个文件：如果只一个 chunk，直接用指定 output；多个则加 _partN 后缀
+    base = Path(args.output)
+    stem = base.stem
+    suffix = base.suffix or ".xlsx"
+
+    for idx, chunk in enumerate(chunks, start=1):
+        out_path = str(base) if len(chunks) == 1 else str(base.with_name(f"{stem}_part{idx}{suffix}"))
+        print(f"Writing file {idx}/{len(chunks)}: {out_path} (rows: {len(chunk)})", flush=True)
+        create_excel_from_template_fast(args.template, out_path, chunk, data_start_row=data_start_row)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/data_import/competitor_xlsx_to_shoplazza_xlsx.py b/scripts/data_import/competitor_xlsx_to_shoplazza_xlsx.py
new file mode 100644
index 0000000..b9d56d7
--- /dev/null
+++ b/scripts/data_import/competitor_xlsx_to_shoplazza_xlsx.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+"""
+DEPRECATED SCRIPT NAME (kept for backward compatibility).
+
+The input `data/mai_jia_jing_ling/products_data/*.xlsx` files are Amazon-format exports
+(Parent/Child ASIN), not “competitor data”.
+
+Please use:
+  - `scripts/data_import/amazon_xlsx_to_shoplazza_xlsx.py`
+
+This wrapper simply forwards all CLI args to the correctly named script, so you
+automatically get the latest performance improvements (fast read/write).
+"""
+
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.data_import.amazon_xlsx_to_shoplazza_xlsx import main as amazon_main
+
+
+if __name__ == "__main__":
+    amazon_main()
+
diff --git a/scripts/data_import/csv_to_excel.py b/scripts/data_import/csv_to_excel.py
new file mode 100755
index 0000000..6d38b34
--- /dev/null
+++ b/scripts/data_import/csv_to_excel.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+"""
+Convert CSV data to Excel import template.
+
+Reads CSV file (goods_with_pic.5years_congku.csv.shuf.1w) and generates Excel file
+based on the template format (商品导入模板.xlsx).
+
+Each CSV row corresponds to 1 SPU and 1 SKU, which will be exported as a single
+S (Single variant) row in the Excel template.
+"""
+
+import sys
+import os
+import csv
+import random
+import argparse
+import re
+from pathlib import Path
+from datetime import datetime, timedelta
+import pandas as pd
+from openpyxl import load_workbook
+from openpyxl.styles import Font, Alignment
+from openpyxl.utils import get_column_letter
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.data_import.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.data_import.shoplazza_import_template import generate_handle as _generate_handle_shared
+
+
+def clean_value(value):
+    """
+    Clean and normalize value.
+    
+    Args:
+        value: Value to clean
+        
+    Returns:
+        Cleaned string value
+    """
+    if value is None:
+        return ''
+    value = str(value).strip()
+    # Remove surrounding quotes
+    if value.startswith('"') and value.endswith('"'):
+        value = value[1:-1]
+    return value
+
+
+def parse_csv_row(row: dict) -> dict:
+    """
+    Parse CSV row and extract fields.
+    
+    Args:
+        row: CSV row dictionary
+        
+    Returns:
+        Parsed data dictionary
+    """
+    return {
+        'skuId': clean_value(row.get('skuId', '')),
+        'name': clean_value(row.get('name', '')),
+        'name_pinyin': clean_value(row.get('name_pinyin', '')),
+        'create_time': clean_value(row.get('create_time', '')),
+        'ruSkuName': clean_value(row.get('ruSkuName', '')),
+        'enSpuName': clean_value(row.get('enSpuName', '')),
+        'categoryName': clean_value(row.get('categoryName', '')),
+        'supplierName': clean_value(row.get('supplierName', '')),
+        'brandName': clean_value(row.get('brandName', '')),
+        'file_id': clean_value(row.get('file_id', '')),
+        'days_since_last_update': clean_value(row.get('days_since_last_update', '')),
+        'id': clean_value(row.get('id', '')),
+        'imageUrl': clean_value(row.get('imageUrl', ''))
+    }
+
+
+def generate_handle(title: str) -> str:
+    """
+    Generate URL-friendly handle from title.
+    
+    Args:
+        title: Product title
+        
+    Returns:
+        URL-friendly handle (ASCII only)
+    """
+    # Keep backward-compatible function name while delegating to shared helper.
+    return _generate_handle_shared(title)
+
+
+def read_csv_file(csv_file: str) -> list:
+    """
+    Read CSV file and return list of parsed rows.
+    
+    Args:
+        csv_file: Path to CSV file
+        
+    Returns:
+        List of parsed CSV data dictionaries
+    """
+    csv_data_list = []
+    
+    with open(csv_file, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            parsed = parse_csv_row(row)
+            csv_data_list.append(parsed)
+    
+    return csv_data_list
+
+
+def csv_to_excel_row(csv_data: dict) -> dict:
+    """
+    Convert CSV data row to Excel template row.
+    
+    Each CSV row represents a single product with one variant (S type in Excel).
+    
+    Args:
+        csv_data: Parsed CSV row data
+        
+    Returns:
+        Dictionary mapping Excel column names to values
+    """
+    # Parse create_time
+    try:
+        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    except:
+        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    
+    # Generate title - use name or enSpuName
+    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
+    
+    # Generate handle - prefer enSpuName, then name_pinyin, then title
+    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
+    handle = generate_handle(handle_source)
+    if handle and not handle.startswith('products/'):
+        handle = f'products/{handle}'
+    
+    # Generate SEO fields
+    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
+    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
+    seo_keywords_parts = [title]
+    if csv_data['categoryName']:
+        seo_keywords_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        seo_keywords_parts.append(csv_data['brandName'])
+    seo_keywords = ','.join(seo_keywords_parts)
+    
+    # Generate tags from category and brand
+    tags_parts = []
+    if csv_data['categoryName']:
+        tags_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        tags_parts.append(csv_data['brandName'])
+    tags = ','.join(tags_parts) if tags_parts else ''
+    
+    # Generate prices (similar to import_tenant2_csv.py)
+    price = round(random.uniform(50, 500), 2)
+    compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
+    cost_price = round(price * 0.6, 2)
+    
+    # Generate random stock
+    inventory_quantity = random.randint(0, 100)
+    
+    # Generate random weight
+    weight = round(random.uniform(0.1, 5.0), 2)
+    weight_unit = 'kg'
+    
+    # Use ruSkuName as SKU title, fallback to name
+    sku_title = csv_data['ruSkuName'] or csv_data['name'] or 'SKU'
+    
+    # Use skuId as SKU code
+    sku_code = csv_data['skuId'] or ''
+    
+    # Generate barcode
+    try:
+        sku_id = int(csv_data['skuId'])
+        barcode = f"BAR{sku_id:08d}"
+    except:
+        barcode = ''
+    
+    # Build description
+    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
+    
+    # Build brief (subtitle)
+    brief = csv_data['name'] or ''
+    
+    # Excel row data (mapping to Excel template columns)
+    excel_row = {
+        '商品ID': '',  # Empty for new products
+        '创建时间': create_time_str,
+        '商品标题*': title,
+        '商品属性*': 'S',  # Single variant product
+        '商品副标题': brief,
+        '商品描述': description,
+        'SEO标题': seo_title,
+        'SEO描述': seo_description,
+        'SEO URL Handle': handle,
+        'SEO URL 重定向': 'N',  # Default to N
+        'SEO关键词': seo_keywords,
+        '商品上架': 'Y',  # Published by default
+        '需要物流': 'Y',  # Requires shipping
+        '商品收税': 'N',  # Not taxable by default
+        '商品spu': '',  # Empty
+        '启用虚拟销量': 'N',  # No fake sales
+        '虚拟销量值': '',  # Empty
+        '跟踪库存': 'Y',  # Track inventory
+        '库存规则*': '1',  # Allow purchase when stock is 0
+        '专辑名称': csv_data['categoryName'] or '',  # Category as album
+        '标签': tags,
+        '供应商名称': csv_data['supplierName'] or '',
+        '供应商URL': '',  # Empty
+        '款式1': '',  # Not used for S type
+        '款式2': '',  # Not used for S type
+        '款式3': '',  # Not used for S type
+        '商品售价*': price,
+        '商品原价': compare_at_price,
+        '成本价': cost_price,
+        '商品SKU': sku_code,
+        '商品重量': weight,
+        '重量单位': weight_unit,
+        '商品条形码': barcode,
+        '商品库存': inventory_quantity,
+        '尺寸信息': '',  # Empty
+        '原产地国别': '',  # Empty
+        'HS（协调制度）代码': '',  # Empty
+        '商品图片*': csv_data['imageUrl'] or '',  # Image URL
+        '商品备注': '',  # Empty
+        '款式备注': '',  # Empty
+        '商品主图': csv_data['imageUrl'] or '',  # Main image URL
+    }
+    
+    return excel_row
+
+
+def create_excel_from_template(template_file: str, output_file: str, csv_data_list: list):
+    """
+    Create Excel file from template and fill with CSV data.
+    
+    Args:
+        template_file: Path to Excel template file
+        output_file: Path to output Excel file
+        csv_data_list: List of parsed CSV data dictionaries
+    """
+    excel_rows = [csv_to_excel_row(d) for d in csv_data_list]
+    _create_excel_from_template_shared(template_file, output_file, excel_rows)
+    print(f"Excel file created: {output_file}")
+    print(f"  - Total rows: {len(csv_data_list)}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert CSV data to Excel import template')
+    parser.add_argument('--csv-file', 
+                       default='data/customer1/goods_with_pic.5years_congku.csv.shuf.1w',
+                       help='CSV file path (default: data/customer1/goods_with_pic.5years_congku.csv.shuf.1w)')
+    parser.add_argument('--template', 
+                       default='docs/商品导入模板.xlsx',
+                       help='Excel template file path (default: docs/商品导入模板.xlsx)')
+    parser.add_argument('--output', 
+                       default='商品导入数据.xlsx',
+                       help='Output Excel file path (default: 商品导入数据.xlsx)')
+    parser.add_argument('--limit', 
+                       type=int, 
+                       default=None,
+                       help='Limit number of rows to process (default: all)')
+    
+    args = parser.parse_args()
+    
+    # Check if files exist
+    if not os.path.exists(args.csv_file):
+        print(f"Error: CSV file not found: {args.csv_file}")
+        sys.exit(1)
+    
+    if not os.path.exists(args.template):
+        print(f"Error: Template file not found: {args.template}")
+        sys.exit(1)
+    
+    # Read CSV file
+    print(f"Reading CSV file: {args.csv_file}")
+    csv_data_list = read_csv_file(args.csv_file)
+    print(f"Read {len(csv_data_list)} rows from CSV")
+    
+    # Limit rows if specified
+    if args.limit:
+        csv_data_list = csv_data_list[:args.limit]
+        print(f"Limited to {len(csv_data_list)} rows")
+    
+    # Create Excel file
+    print(f"Creating Excel file from template: {args.template}")
+    print(f"Output file: {args.output}")
+    create_excel_from_template(args.template, args.output, csv_data_list)
+    
+    print(f"\nDone! Generated {len(csv_data_list)} product rows in Excel file.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/data_import/csv_to_excel_multi_variant.py b/scripts/data_import/csv_to_excel_multi_variant.py
new file mode 100755
index 0000000..a9a5936
--- /dev/null
+++ b/scripts/data_import/csv_to_excel_multi_variant.py
@@ -0,0 +1,564 @@
+#!/usr/bin/env python3
+"""
+Convert CSV data to Excel import template with multi-variant support.
+
+Reads CSV file (goods_with_pic.5years_congku.csv.shuf.1w) and generates Excel file
+based on the template format (商品导入模板.xlsx).
+
+Features:
+- 30% products as Single variant (S type)
+- 70% products as Multi variant (M+P type) with color, size, material options
+"""
+
+import sys
+import os
+import csv
+import random
+import argparse
+import re
+from pathlib import Path
+from datetime import datetime, timedelta
+import itertools
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+
+# Shared helpers (keeps template writing consistent across scripts)
+from scripts.data_import.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared
+from scripts.data_import.shoplazza_import_template import generate_handle as _generate_handle_shared
+
+# Color definitions
+COLORS = [
+    "Red", "Blue", "Green", "Yellow", "Black", "White", "Orange", "Purple",
+    "Pink", "Brown", "Gray", "Navy", "Beige", "Cream", "Maroon", "Olive",
+    "Teal", "Cyan", "Magenta", "Lime", "Indigo", "Gold", "Silver", "Bronze",
+    "Coral", "Turquoise", "Violet", "Khaki", "Charcoal", "Ivory"
+]
+
+
+def clean_value(value):
+    """
+    Clean and normalize value.
+    
+    Args:
+        value: Value to clean
+        
+    Returns:
+        Cleaned string value
+    """
+    if value is None:
+        return ''
+    value = str(value).strip()
+    # Remove surrounding quotes
+    if value.startswith('"') and value.endswith('"'):
+        value = value[1:-1]
+    return value
+
+
+def parse_csv_row(row: dict) -> dict:
+    """
+    Parse CSV row and extract fields.
+    
+    Args:
+        row: CSV row dictionary
+        
+    Returns:
+        Parsed data dictionary
+    """
+    return {
+        'skuId': clean_value(row.get('skuId', '')),
+        'name': clean_value(row.get('name', '')),
+        'name_pinyin': clean_value(row.get('name_pinyin', '')),
+        'create_time': clean_value(row.get('create_time', '')),
+        'ruSkuName': clean_value(row.get('ruSkuName', '')),
+        'enSpuName': clean_value(row.get('enSpuName', '')),
+        'categoryName': clean_value(row.get('categoryName', '')),
+        'supplierName': clean_value(row.get('supplierName', '')),
+        'brandName': clean_value(row.get('brandName', '')),
+        'file_id': clean_value(row.get('file_id', '')),
+        'days_since_last_update': clean_value(row.get('days_since_last_update', '')),
+        'id': clean_value(row.get('id', '')),
+        'imageUrl': clean_value(row.get('imageUrl', ''))
+    }
+
+
+def generate_handle(title: str) -> str:
+    """
+    Generate URL-friendly handle from title.
+    
+    Args:
+        title: Product title
+        
+    Returns:
+        URL-friendly handle (ASCII only)
+    """
+    # Keep backward-compatible function name while delegating to shared helper.
+    return _generate_handle_shared(title)
+
+
+def extract_material_from_title(title: str) -> str:
+    """
+    Extract material from title by taking the last word after splitting by space.
+    
+    按照商品标题空格分割后的最后一个字符串作为material。
+    例如："消防套 塑料【英文包装】" -> 最后一个字符串是 "塑料【英文包装】"
+    
+    Args:
+        title: Product title
+        
+    Returns:
+        Material string (single value)
+    """
+    if not title:
+        return 'default'
+    
+    # Split by spaces (只按空格分割，保持原样)
+    parts = title.strip().split()
+    if parts:
+        # Get last part (最后一个字符串)
+        material = parts[-1]
+        # Remove brackets but keep content
+        material = re.sub(r'[【】\[\]()（）]', '', material)
+        material = material.strip()
+        if material:
+            return material
+    
+    return 'default'
+
+
+def generate_single_variant_row(csv_data: dict, base_sku_id: int = 1) -> dict:
+    """
+    Generate Excel row for Single variant (S type) product.
+    
+    Args:
+        csv_data: Parsed CSV row data
+        base_sku_id: Base SKU ID for generating SKU code
+        
+    Returns:
+        Dictionary mapping Excel column names to values
+    """
+    # Parse create_time
+    try:
+        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    except:
+        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    
+    # Generate title - use name or enSpuName
+    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
+    
+    # Generate handle - prefer enSpuName, then name_pinyin, then title
+    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
+    handle = generate_handle(handle_source)
+    if handle and not handle.startswith('products/'):
+        handle = f'products/{handle}'
+    
+    # Generate SEO fields
+    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
+    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
+    seo_keywords_parts = [title]
+    if csv_data['categoryName']:
+        seo_keywords_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        seo_keywords_parts.append(csv_data['brandName'])
+    seo_keywords = ','.join(seo_keywords_parts)
+    
+    # Generate tags from category and brand
+    tags_parts = []
+    if csv_data['categoryName']:
+        tags_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        tags_parts.append(csv_data['brandName'])
+    tags = ','.join(tags_parts) if tags_parts else ''
+    
+    # Generate prices
+    price = round(random.uniform(50, 500), 2)
+    compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
+    cost_price = round(price * 0.6, 2)
+    
+    # Generate random stock
+    inventory_quantity = random.randint(0, 100)
+    
+    # Generate random weight
+    weight = round(random.uniform(0.1, 5.0), 2)
+    weight_unit = 'kg'
+    
+    # Use skuId as SKU code
+    sku_code = csv_data['skuId'] or f'SKU-{base_sku_id}'
+    
+    # Generate barcode
+    try:
+        sku_id = int(csv_data['skuId']) if csv_data['skuId'] else base_sku_id
+        barcode = f"BAR{sku_id:08d}"
+    except:
+        barcode = f"BAR{base_sku_id:08d}"
+    
+    # Build description
+    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
+    
+    # Build brief (subtitle)
+    brief = csv_data['name'] or ''
+    
+    # Excel row data
+    excel_row = {
+        '商品ID': '',  # Empty for new products
+        '创建时间': create_time_str,
+        '商品标题*': title,
+        '商品属性*': 'S',  # Single variant product
+        '商品副标题': brief,
+        '商品描述': description,
+        'SEO标题': seo_title,
+        'SEO描述': seo_description,
+        'SEO URL Handle': handle,
+        'SEO URL 重定向': 'N',
+        'SEO关键词': seo_keywords,
+        '商品上架': 'Y',
+        '需要物流': 'Y',
+        '商品收税': 'N',
+        '商品spu': '',
+        '启用虚拟销量': 'N',
+        '虚拟销量值': '',
+        '跟踪库存': 'Y',
+        '库存规则*': '1',
+        '专辑名称': csv_data['categoryName'] or '',
+        '标签': tags,
+        '供应商名称': csv_data['supplierName'] or '',
+        '供应商URL': '',
+        '款式1': '',  # Empty for S type
+        '款式2': '',  # Empty for S type
+        '款式3': '',  # Empty for S type
+        '商品售价*': price,
+        '商品原价': compare_at_price,
+        '成本价': cost_price,
+        '商品SKU': sku_code,
+        '商品重量': weight,
+        '重量单位': weight_unit,
+        '商品条形码': barcode,
+        '商品库存': inventory_quantity,
+        '尺寸信息': '',
+        '原产地国别': '',
+        'HS（协调制度）代码': '',
+        '商品图片*': csv_data['imageUrl'] or '',
+        '商品备注': '',
+        '款式备注': '',
+        '商品主图': csv_data['imageUrl'] or '',
+    }
+    
+    return excel_row
+
+
+def generate_multi_variant_rows(csv_data: dict, base_sku_id: int = 1) -> list:
+    """
+    Generate Excel rows for Multi variant (M+P type) product.
+    
+    Returns a list of rows:
+    - First row: M (主商品) with option names
+    - Following rows: P (子款式) with option values
+    
+    Args:
+        csv_data: Parsed CSV row data
+        base_sku_id: Base SKU ID for generating SKU codes
+        
+    Returns:
+        List of dictionaries mapping Excel column names to values
+    """
+    rows = []
+    
+    # Parse create_time
+    try:
+        created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S')
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    except:
+        created_at = datetime.now() - timedelta(days=random.randint(1, 365))
+        create_time_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
+    
+    # Generate title
+    title = csv_data['name'] or csv_data['enSpuName'] or 'Product'
+    
+    # Generate handle
+    handle_source = csv_data['enSpuName'] or csv_data['name_pinyin'] or title
+    handle = generate_handle(handle_source)
+    if handle and not handle.startswith('products/'):
+        handle = f'products/{handle}'
+    
+    # Generate SEO fields
+    seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title
+    seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title
+    seo_keywords_parts = [title]
+    if csv_data['categoryName']:
+        seo_keywords_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        seo_keywords_parts.append(csv_data['brandName'])
+    seo_keywords = ','.join(seo_keywords_parts)
+    
+    # Generate tags
+    tags_parts = []
+    if csv_data['categoryName']:
+        tags_parts.append(csv_data['categoryName'])
+    if csv_data['brandName']:
+        tags_parts.append(csv_data['brandName'])
+    tags = ','.join(tags_parts) if tags_parts else ''
+    
+    # Extract material from title (last word after splitting by space)
+    material = extract_material_from_title(title)
+    
+    # Generate color options: randomly select 2-10 colors from COLORS list
+    num_colors = random.randint(2, 10)
+    selected_colors = random.sample(COLORS, min(num_colors, len(COLORS)))
+    
+    # Generate size options: 1-30, randomly select 4-8
+    num_sizes = random.randint(4, 8)
+    all_sizes = [str(i) for i in range(1, 31)]
+    selected_sizes = random.sample(all_sizes, num_sizes)
+    
+    # Material has only one value
+    materials = [material]
+    
+    # Generate all combinations (Cartesian product)
+    variants = list(itertools.product(selected_colors, selected_sizes, materials))
+    
+    # Generate M row (主商品)
+    description = f"<p>{csv_data['name']}</p>" if csv_data['name'] else ''
+    brief = csv_data['name'] or ''
+    
+    m_row = {
+        '商品ID': '',
+        '创建时间': create_time_str,
+        '商品标题*': title,
+        '商品属性*': 'M',  # Main product
+        '商品副标题': brief,
+        '商品描述': description,
+        'SEO标题': seo_title,
+        'SEO描述': seo_description,
+        'SEO URL Handle': handle,
+        'SEO URL 重定向': 'N',
+        'SEO关键词': seo_keywords,
+        '商品上架': 'Y',
+        '需要物流': 'Y',
+        '商品收税': 'N',
+        '商品spu': '',
+        '启用虚拟销量': 'N',
+        '虚拟销量值': '',
+        '跟踪库存': 'Y',
+        '库存规则*': '1',
+        '专辑名称': csv_data['categoryName'] or '',
+        '标签': tags,
+        '供应商名称': csv_data['supplierName'] or '',
+        '供应商URL': '',
+        '款式1': 'color',  # Option name
+        '款式2': 'size',   # Option name
+        '款式3': 'material',  # Option name
+        '商品售价*': '',  # Empty for M row
+        '商品原价': '',
+        '成本价': '',
+        '商品SKU': '',  # Empty for M row
+        '商品重量': '',
+        '重量单位': '',
+        '商品条形码': '',
+        '商品库存': '',  # Empty for M row
+        '尺寸信息': '',
+        '原产地国别': '',
+        'HS（协调制度）代码': '',
+        '商品图片*': csv_data['imageUrl'] or '',  # Main product image
+        '商品备注': '',
+        '款式备注': '',
+        '商品主图': csv_data['imageUrl'] or '',
+    }
+    rows.append(m_row)
+    
+    # Generate P rows (子款式) for each variant combination
+    base_price = round(random.uniform(50, 500), 2)
+    
+    for variant_idx, (color, size, mat) in enumerate(variants):
+        # Generate price variation (within ±20% of base)
+        price = round(base_price * random.uniform(0.8, 1.2), 2)
+        compare_at_price = round(price * random.uniform(1.2, 1.5), 2)
+        cost_price = round(price * 0.6, 2)
+        
+        # Generate random stock
+        inventory_quantity = random.randint(0, 100)
+        
+        # Generate random weight
+        weight = round(random.uniform(0.1, 5.0), 2)
+        weight_unit = 'kg'
+        
+        # Generate SKU code
+        sku_code = f"{csv_data['skuId']}-{color}-{size}-{mat}" if csv_data['skuId'] else f'SKU-{base_sku_id}-{variant_idx+1}'
+        
+        # Generate barcode
+        barcode = f"BAR{base_sku_id:08d}{variant_idx+1:03d}"
+        
+        p_row = {
+            '商品ID': '',
+            '创建时间': create_time_str,
+            '商品标题*': title,  # Same as M row
+            '商品属性*': 'P',  # Variant
+            '商品副标题': '',  # Empty for P row
+            '商品描述': '',  # Empty for P row
+            'SEO标题': '',  # Empty for P row
+            'SEO描述': '',  # Empty for P row
+            'SEO URL Handle': '',  # Empty for P row
+            'SEO URL 重定向': '',
+            'SEO关键词': '',
+            '商品上架': 'Y',
+            '需要物流': 'Y',
+            '商品收税': 'N',
+            '商品spu': '',
+            '启用虚拟销量': 'N',
+            '虚拟销量值': '',
+            '跟踪库存': 'Y',
+            '库存规则*': '1',
+            '专辑名称': '',  # Empty for P row
+            '标签': '',  # Empty for P row
+            '供应商名称': '',  # Empty for P row
+            '供应商URL': '',
+            '款式1': color,  # Option value
+            '款式2': size,   # Option value
+            '款式3': mat,    # Option value
+            '商品售价*': price,
+            '商品原价': compare_at_price,
+            '成本价': cost_price,
+            '商品SKU': sku_code,
+            '商品重量': weight,
+            '重量单位': weight_unit,
+            '商品条形码': barcode,
+            '商品库存': inventory_quantity,
+            '尺寸信息': '',
+            '原产地国别': '',
+            'HS（协调制度）代码': '',
+            '商品图片*': '',  # Empty for P row (uses main product image)
+            '商品备注': '',
+            '款式备注': '',
+            '商品主图': '',
+        }
+        rows.append(p_row)
+    
+    return rows
+
+
+def read_csv_file(csv_file: str) -> list:
+    """
+    Read CSV file and return list of parsed rows.
+    
+    Args:
+        csv_file: Path to CSV file
+        
+    Returns:
+        List of parsed CSV data dictionaries
+    """
+    csv_data_list = []
+    
+    with open(csv_file, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            parsed = parse_csv_row(row)
+            csv_data_list.append(parsed)
+    
+    return csv_data_list
+
+
+def create_excel_from_template(template_file: str, output_file: str, excel_rows: list):
+    """
+    Create Excel file from template and fill with data rows.
+    
+    Args:
+        template_file: Path to Excel template file
+        output_file: Path to output Excel file
+        excel_rows: List of dictionaries mapping Excel column names to values
+    """
+    _create_excel_from_template_shared(template_file, output_file, excel_rows)
+    print(f"Excel file created: {output_file}")
+    print(f"  - Total rows: {len(excel_rows)}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert CSV data to Excel import template with multi-variant support')
+    parser.add_argument('--csv-file', 
+                       default='data/customer1/goods_with_pic.5years_congku.csv.shuf.1w',
+                       help='CSV file path')
+    parser.add_argument('--template', 
+                       default='docs/商品导入模板.xlsx',
+                       help='Excel template file path')
+    parser.add_argument('--output', 
+                       default='商品导入数据.xlsx',
+                       help='Output Excel file path')
+    parser.add_argument('--limit', 
+                       type=int, 
+                       default=None,
+                       help='Limit number of products to process')
+    parser.add_argument('--single-ratio', 
+                       type=float, 
+                       default=0.3,
+                       help='Ratio of single variant products (default: 0.3 = 30%%)')
+    parser.add_argument('--seed', 
+                       type=int, 
+                       default=None,
+                       help='Random seed for reproducible results')
+    
+    args = parser.parse_args()
+    
+    # Set random seed if provided
+    if args.seed is not None:
+        random.seed(args.seed)
+    
+    # Check if files exist
+    if not os.path.exists(args.csv_file):
+        print(f"Error: CSV file not found: {args.csv_file}")
+        sys.exit(1)
+    
+    if not os.path.exists(args.template):
+        print(f"Error: Template file not found: {args.template}")
+        sys.exit(1)
+    
+    # Read CSV file
+    print(f"Reading CSV file: {args.csv_file}")
+    csv_data_list = read_csv_file(args.csv_file)
+    print(f"Read {len(csv_data_list)} rows from CSV")
+    
+    # Limit products if specified
+    if args.limit:
+        csv_data_list = csv_data_list[:args.limit]
+        print(f"Limited to {len(csv_data_list)} products")
+    
+    # Generate Excel rows
+    print(f"\nGenerating Excel rows...")
+    print(f"  - Single variant ratio: {args.single_ratio*100:.0f}%")
+    print(f"  - Multi variant ratio: {(1-args.single_ratio)*100:.0f}%")
+    
+    excel_rows = []
+    single_count = 0
+    multi_count = 0
+    
+    for idx, csv_data in enumerate(csv_data_list):
+        # Decide if this product should be single or multi variant
+        is_single = random.random() < args.single_ratio
+        
+        if is_single:
+            # Generate single variant (S type)
+            row = generate_single_variant_row(csv_data, base_sku_id=idx+1)
+            excel_rows.append(row)
+            single_count += 1
+        else:
+            # Generate multi variant (M+P type)
+            rows = generate_multi_variant_rows(csv_data, base_sku_id=idx+1)
+            excel_rows.extend(rows)
+            multi_count += 1
+    
+    print(f"\nGenerated:")
+    print(f"  - Single variant products: {single_count}")
+    print(f"  - Multi variant products: {multi_count}")
+    print(f"  - Total Excel rows: {len(excel_rows)}")
+    
+    # Create Excel file
+    print(f"\nCreating Excel file from template: {args.template}")
+    print(f"Output file: {args.output}")
+    create_excel_from_template(args.template, args.output, excel_rows)
+    
+    print(f"\nDone! Generated {len(excel_rows)} rows in Excel file.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/data_import/shoplazza_excel_template.py b/scripts/data_import/shoplazza_excel_template.py
new file mode 100644
index 0000000..c2bbec2
--- /dev/null
+++ b/scripts/data_import/shoplazza_excel_template.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Shared utilities for generating Shoplazza (店匠) product import Excel files
+based on the provided template `docs/商品导入模板.xlsx`.
+
+We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
+"""
+
+from openpyxl import Workbook
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+
+
+def load_template_column_mapping(ws, header_row_idx=2):
+    """
+    Read the header row in the template sheet and build a mapping:
+    header_name -> column_index (1-based).
+    """
+    column_mapping = {}
+    for col_idx in range(1, ws.max_column + 1):
+        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
+        if cell_value:
+            column_mapping[str(cell_value).strip()] = col_idx
+    return column_mapping
+
+
+def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
+    """
+    Create an Excel file from the Shoplazza template and fill with data rows.
+
+    Args:
+        template_file: Path to Excel template file
+        output_file: Path to output Excel file
+        excel_rows: List[Dict[str, Any]] mapping template header -> value
+        header_row_idx: Header row index in template (default 2)
+        data_start_row: Data start row index in template (default 4)
+    """
+    wb = load_workbook(template_file)
+    ws = wb.active
+
+    column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
+
+    # Clear existing data rows
+    last_template_row = ws.max_row
+    if last_template_row >= data_start_row:
+        for row in range(data_start_row, last_template_row + 1):
+            for col in range(1, ws.max_column + 1):
+                ws.cell(row=row, column=col).value = None
+
+    # Write data rows (OPT: only write fields that actually exist in excel_row)
+    # This avoids looping over all 42 template columns for every output row.
+    for row_idx, excel_row in enumerate(excel_rows):
+        excel_row_num = data_start_row + row_idx
+        for field_name, value in excel_row.items():
+            col_idx = column_mapping.get(field_name)
+            if not col_idx:
+                continue
+            cell = ws.cell(row=excel_row_num, column=col_idx)
+            cell.value = value
+            if isinstance(value, str):
+                cell.alignment = Alignment(vertical='top', wrap_text=True)
+            else:
+                cell.alignment = Alignment(vertical='top')
+
+    wb.save(output_file)
+    print("Excel file created: {}".format(output_file))
+    print("  - Total rows: {}".format(len(excel_rows)))
+
+
+def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
+    """
+    Faster writer for large datasets.
+
+    Instead of opening the template workbook in write mode and assigning cells one by one,
+    we:
+    - read the template's first (data_start_row-1) rows as values
+    - build a header->index mapping from header_row_idx
+    - create a new write_only workbook and append rows
+
+    This is much faster for tens/hundreds of thousands of cells.
+    """
+    tpl_wb = load_workbook(template_file, read_only=True, data_only=True)
+    tpl_ws = tpl_wb.active
+
+    max_col = tpl_ws.max_column
+
+    # Copy template "instruction" rows (typically rows 1-3) into output
+    prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True))
+
+    header_values = None
+    if 1 <= header_row_idx <= len(prefix_rows):
+        header_values = prefix_rows[header_row_idx - 1]
+    else:
+        # Fallback: read header row directly
+        header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True))
+
+    header_values = list(header_values)[:max_col]
+    col_map = {}
+    for i, v in enumerate(header_values):
+        if v is None:
+            continue
+        col_map[str(v).strip()] = i  # 0-based
+
+    wb = Workbook(write_only=True)
+    ws = wb.create_sheet(title=tpl_ws.title)
+    # remove default sheet if present (openpyxl may create one)
+    if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws:
+        try:
+            wb.remove(wb["Sheet"])
+        except Exception:
+            pass
+
+    # Write prefix rows, normalized to max_col
+    for r in prefix_rows:
+        r = list(r)[:max_col]
+        if len(r) < max_col:
+            r = r + [None] * (max_col - len(r))
+        ws.append(r)
+
+    # Write data rows
+    for excel_row in excel_rows:
+        row_vals = [None] * max_col
+        for field_name, value in excel_row.items():
+            if field_name not in col_map:
+                continue
+            row_vals[col_map[field_name]] = value
+        ws.append(row_vals)
+
+    wb.save(output_file)
+    print("Excel file created (fast): {}".format(output_file))
+    print("  - Total rows: {}".format(len(excel_rows)))
+
+
diff --git a/scripts/data_import/shoplazza_import_template.py b/scripts/data_import/shoplazza_import_template.py
new file mode 100644
index 0000000..a05f291
--- /dev/null
+++ b/scripts/data_import/shoplazza_import_template.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Shared helpers for generating Shoplazza product import Excel files from the
+official template `docs/商品导入模板.xlsx`.
+
+We keep this module small and dependency-light (openpyxl only) so other scripts
+can reuse the same template-writing behavior (header row mapping, data start
+row, alignment).
+"""
+
+import re
+from datetime import datetime
+from typing import Dict, Iterable, List, Optional
+
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+
+
+def generate_handle(title: str) -> str:
+    """
+    Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza
+    `SEO URL Handle` field. Caller may prepend `products/`.
+    """
+    if not title:
+        return "product"
+
+    handle = str(title).lower()
+    handle = re.sub(r"[^a-z0-9\s-]", "", handle)
+    handle = re.sub(r"[-\s]+", "-", handle).strip("-")
+
+    if len(handle) > 255:
+        handle = handle[:255]
+
+    return handle or "product"
+
+
+def parse_date_to_datetime_str(value) -> str:
+    """
+    Parse common date strings into Shoplazza template datetime string:
+    `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string.
+    """
+    if value is None:
+        return ""
+
+    if isinstance(value, datetime):
+        return value.strftime("%Y-%m-%d %H:%M:%S")
+
+    s = str(value).strip()
+    if not s:
+        return ""
+
+    # Most competitor sheets use YYYY-MM-DD
+    for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
+        try:
+            dt = datetime.strptime(s, fmt)
+            if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
+                dt = dt.replace(hour=0, minute=0, second=0)
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+        except Exception:
+            pass
+
+    return ""
+
+
+def create_excel_from_template(
+    template_file: str,
+    output_file: str,
+    excel_rows: List[Dict[str, object]],
+    *,
+    header_row_idx: int = 2,
+    data_start_row: int = 4,
+    sheet_name: Optional[str] = None,
+) -> None:
+    """
+    Create an Excel file from Shoplazza import template and fill rows.
+
+    - Header row is expected at row 2 (1-based) in the official template.
+    - Data starts at row 4 (1-based), after the instruction row(s).
+    """
+    wb = load_workbook(template_file)
+    ws = wb[sheet_name] if sheet_name else wb.active
+
+    column_mapping: Dict[str, int] = {}
+    for col_idx in range(1, ws.max_column + 1):
+        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
+        if cell_value:
+            column_mapping[str(cell_value).strip()] = col_idx
+
+    # Clear existing data rows
+    last_template_row = ws.max_row
+    if last_template_row >= data_start_row:
+        for row in range(data_start_row, last_template_row + 1):
+            for col in range(1, ws.max_column + 1):
+                ws.cell(row=row, column=col).value = None
+
+    # Write data rows
+    for row_idx, excel_row in enumerate(excel_rows):
+        excel_row_num = data_start_row + row_idx
+        for field_name, col_idx in column_mapping.items():
+            if field_name not in excel_row:
+                continue
+            value = excel_row[field_name]
+            cell = ws.cell(row=excel_row_num, column=col_idx)
+            cell.value = value
+            if isinstance(value, str):
+                cell.alignment = Alignment(vertical="top", wrap_text=True)
+            elif isinstance(value, (int, float)):
+                cell.alignment = Alignment(vertical="top")
+
+    wb.save(output_file)
+
+
diff --git a/scripts/data_import/tenant3_csv_to_shoplazza_xlsx.sh b/scripts/data_import/tenant3_csv_to_shoplazza_xlsx.sh
new file mode 100755
index 0000000..d051aba
--- /dev/null
+++ b/scripts/data_import/tenant3_csv_to_shoplazza_xlsx.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+cd "$(dirname "$0")/.."
+source ./activate.sh
+
+# # 基本使用（生成所有数据）
+# python scripts/data_import/csv_to_excel.py
+
+# # 指定输出文件
+# python scripts/data_import/csv_to_excel.py --output tenant3_imports.xlsx
+
+# # 限制处理行数（用于测试）
+# python scripts/data_import/csv_to_excel.py --limit 100
+
+# 指定CSV文件和模板文件
+python scripts/data_import/csv_to_excel.py \
+    --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \
+    --template docs/商品导入模板.xlsx \
+    --output tenant3_imports.xlsx
diff --git a/scripts/download_translation_models.py b/scripts/download_translation_models.py
deleted file mode 100755
index a6fcba4..0000000
--- a/scripts/download_translation_models.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""Download local translation models declared in services.translation.capabilities."""
-
-from __future__ import annotations
-
-import argparse
-import os
-from pathlib import Path
-import shutil
-import subprocess
-import sys
-from typing import Iterable
-
-from huggingface_hub import snapshot_download
-
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.insert(0, str(PROJECT_ROOT))
-os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
-
-from config.services_config import get_translation_config
-
-
-LOCAL_BACKENDS = {"local_nllb", "local_marian"}
-
-
-def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
-    cfg = get_translation_config()
-    capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
-    for name, capability in capabilities.items():
-        backend = str(capability.get("backend") or "").strip().lower()
-        if backend not in LOCAL_BACKENDS:
-            continue
-        if selected and name not in selected:
-            continue
-        yield name, capability
-
-
-def _compute_ct2_output_dir(capability: dict) -> Path:
-    custom = str(capability.get("ct2_model_dir") or "").strip()
-    if custom:
-        return Path(custom).expanduser()
-    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-    compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
-    normalized = compute_type.replace("_", "-")
-    return model_dir / f"ctranslate2-{normalized}"
-
-
-def _resolve_converter_binary() -> str:
-    candidate = shutil.which("ct2-transformers-converter")
-    if candidate:
-        return candidate
-    venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
-    if venv_candidate.exists():
-        return str(venv_candidate)
-    raise RuntimeError(
-        "ct2-transformers-converter was not found. "
-        "Install ctranslate2 in the active Python environment first."
-    )
-
-
-def convert_to_ctranslate2(name: str, capability: dict) -> None:
-    model_id = str(capability.get("model_id") or "").strip()
-    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-    model_source = str(model_dir if model_dir.exists() else model_id)
-    output_dir = _compute_ct2_output_dir(capability)
-    if (output_dir / "model.bin").exists():
-        print(f"[skip-convert] {name} -> {output_dir}")
-        return
-    quantization = str(
-        capability.get("ct2_conversion_quantization")
-        or capability.get("ct2_compute_type")
-        or capability.get("torch_dtype")
-        or "default"
-    ).strip()
-    output_dir.parent.mkdir(parents=True, exist_ok=True)
-    print(f"[convert] {name} -> {output_dir} ({quantization})")
-    subprocess.run(
-        [
-            _resolve_converter_binary(),
-            "--model",
-            model_source,
-            "--output_dir",
-            str(output_dir),
-            "--quantization",
-            quantization,
-        ],
-        check=True,
-    )
-    print(f"[converted] {name}")
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Download local translation models")
-    parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
-    parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
-    parser.add_argument(
-        "--convert-ctranslate2",
-        action="store_true",
-        help="Also convert the downloaded Hugging Face models into CTranslate2 format",
-    )
-    args = parser.parse_args()
-
-    selected = {item.strip().lower() for item in args.models if item.strip()} or None
-    if not args.all_local and not selected:
-        parser.error("pass --all-local or --models <name> ...")
-
-    for name, capability in iter_local_capabilities(selected):
-        model_id = str(capability.get("model_id") or "").strip()
-        model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
-        if not model_id or not model_dir:
-            raise ValueError(f"Capability '{name}' must define model_id and model_dir")
-        model_dir.parent.mkdir(parents=True, exist_ok=True)
-        print(f"[download] {name} -> {model_dir} ({model_id})")
-        snapshot_download(
-            repo_id=model_id,
-            local_dir=str(model_dir),
-        )
-        print(f"[done] {name}")
-        if args.convert_ctranslate2:
-            convert_to_ctranslate2(name, capability)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/frontend/frontend_server.py b/scripts/frontend/frontend_server.py
new file mode 100755
index 0000000..15231ca
--- /dev/null
+++ b/scripts/frontend/frontend_server.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""
+Simple HTTP server for saas-search frontend.
+"""
+
+import http.server
+import socketserver
+import os
+import sys
+import logging
+import time
+import urllib.request
+import urllib.error
+from collections import defaultdict, deque
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Load .env file
+project_root = Path(__file__).resolve().parents[2]
+load_dotenv(project_root / '.env')
+
+# Get API_BASE_URL from environment（默认不注入，避免被旧 .env 覆盖同源策略）
+# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
+API_BASE_URL = os.getenv('API_BASE_URL') or None
+INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
+# Backend proxy target for same-origin API forwarding
+BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
+
+# Change to frontend directory
+frontend_dir = os.path.join(project_root, 'frontend')
+os.chdir(frontend_dir)
+
+# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
+PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
+
+# Configure logging to suppress scanner noise
+logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
+
+class RateLimitingMixin:
+    """Mixin for rate limiting requests by IP address."""
+    request_counts = defaultdict(deque)
+    rate_limit = 100  # requests per minute
+    window = 60  # seconds
+
+    @classmethod
+    def is_rate_limited(cls, ip):
+        now = time.time()
+
+        # Clean old requests
+        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
+            cls.request_counts[ip].popleft()
+
+        # Check rate limit
+        if len(cls.request_counts[ip]) > cls.rate_limit:
+            return True
+
+        cls.request_counts[ip].append(now)
+        return False
+
+class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
+    """Custom request handler with CORS support and robust error handling."""
+
+    def _is_proxy_path(self, path: str) -> bool:
+        """Return True for API paths that should be forwarded to backend service."""
+        return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
+
+    def _proxy_to_backend(self):
+        """Proxy current request to backend service on the GPU server."""
+        target_url = f"{BACKEND_PROXY_URL}{self.path}"
+        method = self.command.upper()
+
+        try:
+            content_length = int(self.headers.get('Content-Length', '0'))
+        except ValueError:
+            content_length = 0
+        body = self.rfile.read(content_length) if content_length > 0 else None
+
+        forward_headers = {}
+        for key, value in self.headers.items():
+            lk = key.lower()
+            if lk in ('host', 'content-length', 'connection'):
+                continue
+            forward_headers[key] = value
+
+        req = urllib.request.Request(
+            target_url,
+            data=body,
+            headers=forward_headers,
+            method=method,
+        )
+
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                resp_body = resp.read()
+                self.send_response(resp.getcode())
+                for header, value in resp.getheaders():
+                    lh = header.lower()
+                    if lh in ('transfer-encoding', 'connection', 'content-length'):
+                        continue
+                    self.send_header(header, value)
+                self.end_headers()
+                self.wfile.write(resp_body)
+        except urllib.error.HTTPError as e:
+            err_body = e.read() if hasattr(e, 'read') else b''
+            self.send_response(e.code)
+            if e.headers:
+                for header, value in e.headers.items():
+                    lh = header.lower()
+                    if lh in ('transfer-encoding', 'connection', 'content-length'):
+                        continue
+                    self.send_header(header, value)
+            self.end_headers()
+            if err_body:
+                self.wfile.write(err_body)
+        except Exception as e:
+            logging.error(f"Backend proxy error for {method} {self.path}: {e}")
+            self.send_response(502)
+            self.send_header('Content-Type', 'application/json; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
+
+    def do_GET(self):
+        """Handle GET requests with API config injection."""
+        path = self.path.split('?')[0]
+
+        # Proxy API paths to backend first
+        if self._is_proxy_path(path):
+            self._proxy_to_backend()
+            return
+        
+        # Route / to index.html
+        if path == '/' or path == '':
+            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
+        
+        # Inject API config for HTML files
+        if self.path.endswith('.html'):
+            self._serve_html_with_config()
+        else:
+            super().do_GET()
+    
+    def _serve_html_with_config(self):
+        """Serve HTML with optional API_BASE_URL injected."""
+        try:
+            file_path = self.path.lstrip('/')
+            if not os.path.exists(file_path):
+                self.send_error(404)
+                return
+            
+            with open(file_path, 'r', encoding='utf-8') as f:
+                html = f.read()
+
+            # 默认不注入 API_BASE_URL，避免历史 .env（如 http://xx:6002）覆盖同源调用。
+            # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
+            if INJECT_API_BASE_URL and API_BASE_URL:
+                config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n    '
+                html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
+            
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(html.encode('utf-8'))
+        except Exception as e:
+            logging.error(f"Error serving HTML: {e}")
+            self.send_error(500)
+
+    def do_POST(self):
+        """Handle POST requests. Proxy API requests to backend."""
+        path = self.path.split('?')[0]
+        if self._is_proxy_path(path):
+            self._proxy_to_backend()
+            return
+        self.send_error(405, "Method Not Allowed")
+
+    def setup(self):
+        """Setup with error handling."""
+        try:
+            super().setup()
+        except Exception:
+            pass  # Silently handle setup errors from scanners
+
+    def handle_one_request(self):
+        """Handle single request with error catching."""
+        try:
+            # Check rate limiting
+            client_ip = self.client_address[0]
+            if self.is_rate_limited(client_ip):
+                logging.warning(f"Rate limiting IP: {client_ip}")
+                self.send_error(429, "Too Many Requests")
+                return
+
+            super().handle_one_request()
+        except (ConnectionResetError, BrokenPipeError):
+            # Client disconnected prematurely - common with scanners
+            pass
+        except UnicodeDecodeError:
+            # Binary data received - not HTTP
+            pass
+        except Exception as e:
+            # Log unexpected errors but don't crash
+            logging.debug(f"Request handling error: {e}")
+
+    def log_message(self, format, *args):
+        """Suppress logging for malformed requests from scanners."""
+        message = format % args
+        # Filter out scanner noise
+        noise_patterns = [
+            "code 400",
+            "Bad request",
+            "Bad request version",
+            "Bad HTTP/0.9 request type",
+            "Bad request syntax"
+        ]
+        if any(pattern in message for pattern in noise_patterns):
+            return
+        # Only log legitimate requests
+        if message and not message.startswith(" ") and len(message) > 10:
+            super().log_message(format, *args)
+
+    def end_headers(self):
+        # Add CORS headers
+        self.send_header('Access-Control-Allow-Origin', '*')
+        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
+        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
+        # Add security headers
+        self.send_header('X-Content-Type-Options', 'nosniff')
+        self.send_header('X-Frame-Options', 'DENY')
+        self.send_header('X-XSS-Protection', '1; mode=block')
+        super().end_headers()
+
+    def do_OPTIONS(self):
+        """Handle OPTIONS requests."""
+        try:
+            path = self.path.split('?')[0]
+            if self._is_proxy_path(path):
+                self.send_response(204)
+                self.end_headers()
+                return
+            self.send_response(200)
+            self.end_headers()
+        except Exception:
+            pass
+
+class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
+    """Threaded TCP server with better error handling."""
+    allow_reuse_address = True
+    daemon_threads = True
+
+if __name__ == '__main__':
+    # Check if port is already in use
+    import socket
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        sock.bind(("", PORT))
+        sock.close()
+    except OSError:
+        print(f"ERROR: Port {PORT} is already in use.")
+        print(f"Please stop the existing server or use a different port.")
+        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
+        sys.exit(1)
+    
+    # Create threaded server for better concurrency
+    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
+        print(f"Frontend server started at http://localhost:{PORT}")
+        print(f"Serving files from: {os.getcwd()}")
+        print("\nPress Ctrl+C to stop the server")
+
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            print("\nShutting down server...")
+            httpd.shutdown()
+            print("Server stopped")
+            sys.exit(0)
+        except Exception as e:
+            print(f"Server error: {e}")
+            sys.exit(1)
diff --git a/scripts/frontend_server.py b/scripts/frontend_server.py
deleted file mode 100755
index 77dac02..0000000
--- a/scripts/frontend_server.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple HTTP server for saas-search frontend.
-"""
-
-import http.server
-import socketserver
-import os
-import sys
-import logging
-import time
-import urllib.request
-import urllib.error
-from collections import defaultdict, deque
-from pathlib import Path
-from dotenv import load_dotenv
-
-# Load .env file
-project_root = Path(__file__).parent.parent
-load_dotenv(project_root / '.env')
-
-# Get API_BASE_URL from environment（默认不注入，避免被旧 .env 覆盖同源策略）
-# 仅当显式设置 FRONTEND_INJECT_API_BASE_URL=1 时才注入 window.API_BASE_URL。
-API_BASE_URL = os.getenv('API_BASE_URL') or None
-INJECT_API_BASE_URL = os.getenv('FRONTEND_INJECT_API_BASE_URL', '0') == '1'
-# Backend proxy target for same-origin API forwarding
-BACKEND_PROXY_URL = os.getenv('BACKEND_PROXY_URL', 'http://127.0.0.1:6002').rstrip('/')
-
-# Change to frontend directory
-frontend_dir = os.path.join(os.path.dirname(__file__), '../frontend')
-os.chdir(frontend_dir)
-
-# FRONTEND_PORT is the canonical config; keep PORT as a secondary fallback.
-PORT = int(os.getenv('FRONTEND_PORT', os.getenv('PORT', 6003)))
-
-# Configure logging to suppress scanner noise
-logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
-
-class RateLimitingMixin:
-    """Mixin for rate limiting requests by IP address."""
-    request_counts = defaultdict(deque)
-    rate_limit = 100  # requests per minute
-    window = 60  # seconds
-
-    @classmethod
-    def is_rate_limited(cls, ip):
-        now = time.time()
-
-        # Clean old requests
-        while cls.request_counts[ip] and cls.request_counts[ip][0] < now - cls.window:
-            cls.request_counts[ip].popleft()
-
-        # Check rate limit
-        if len(cls.request_counts[ip]) > cls.rate_limit:
-            return True
-
-        cls.request_counts[ip].append(now)
-        return False
-
-class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler, RateLimitingMixin):
-    """Custom request handler with CORS support and robust error handling."""
-
-    def _is_proxy_path(self, path: str) -> bool:
-        """Return True for API paths that should be forwarded to backend service."""
-        return path.startswith('/search/') or path.startswith('/admin/') or path.startswith('/indexer/')
-
-    def _proxy_to_backend(self):
-        """Proxy current request to backend service on the GPU server."""
-        target_url = f"{BACKEND_PROXY_URL}{self.path}"
-        method = self.command.upper()
-
-        try:
-            content_length = int(self.headers.get('Content-Length', '0'))
-        except ValueError:
-            content_length = 0
-        body = self.rfile.read(content_length) if content_length > 0 else None
-
-        forward_headers = {}
-        for key, value in self.headers.items():
-            lk = key.lower()
-            if lk in ('host', 'content-length', 'connection'):
-                continue
-            forward_headers[key] = value
-
-        req = urllib.request.Request(
-            target_url,
-            data=body,
-            headers=forward_headers,
-            method=method,
-        )
-
-        try:
-            with urllib.request.urlopen(req, timeout=30) as resp:
-                resp_body = resp.read()
-                self.send_response(resp.getcode())
-                for header, value in resp.getheaders():
-                    lh = header.lower()
-                    if lh in ('transfer-encoding', 'connection', 'content-length'):
-                        continue
-                    self.send_header(header, value)
-                self.end_headers()
-                self.wfile.write(resp_body)
-        except urllib.error.HTTPError as e:
-            err_body = e.read() if hasattr(e, 'read') else b''
-            self.send_response(e.code)
-            if e.headers:
-                for header, value in e.headers.items():
-                    lh = header.lower()
-                    if lh in ('transfer-encoding', 'connection', 'content-length'):
-                        continue
-                    self.send_header(header, value)
-            self.end_headers()
-            if err_body:
-                self.wfile.write(err_body)
-        except Exception as e:
-            logging.error(f"Backend proxy error for {method} {self.path}: {e}")
-            self.send_response(502)
-            self.send_header('Content-Type', 'application/json; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(b'{"error":"Bad Gateway: backend proxy failed"}')
-
-    def do_GET(self):
-        """Handle GET requests with API config injection."""
-        path = self.path.split('?')[0]
-
-        # Proxy API paths to backend first
-        if self._is_proxy_path(path):
-            self._proxy_to_backend()
-            return
-        
-        # Route / to index.html
-        if path == '/' or path == '':
-            self.path = '/index.html' + (self.path.split('?', 1)[1] if '?' in self.path else '')
-        
-        # Inject API config for HTML files
-        if self.path.endswith('.html'):
-            self._serve_html_with_config()
-        else:
-            super().do_GET()
-    
-    def _serve_html_with_config(self):
-        """Serve HTML with optional API_BASE_URL injected."""
-        try:
-            file_path = self.path.lstrip('/')
-            if not os.path.exists(file_path):
-                self.send_error(404)
-                return
-            
-            with open(file_path, 'r', encoding='utf-8') as f:
-                html = f.read()
-
-            # 默认不注入 API_BASE_URL，避免历史 .env（如 http://xx:6002）覆盖同源调用。
-            # 仅当 FRONTEND_INJECT_API_BASE_URL=1 且 API_BASE_URL 有值时才注入。
-            if INJECT_API_BASE_URL and API_BASE_URL:
-                config_script = f'<script>window.API_BASE_URL="{API_BASE_URL}";</script>\n    '
-                html = html.replace('<script src="/static/js/app.js', config_script + '<script src="/static/js/app.js', 1)
-            
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(html.encode('utf-8'))
-        except Exception as e:
-            logging.error(f"Error serving HTML: {e}")
-            self.send_error(500)
-
-    def do_POST(self):
-        """Handle POST requests. Proxy API requests to backend."""
-        path = self.path.split('?')[0]
-        if self._is_proxy_path(path):
-            self._proxy_to_backend()
-            return
-        self.send_error(405, "Method Not Allowed")
-
-    def setup(self):
-        """Setup with error handling."""
-        try:
-            super().setup()
-        except Exception:
-            pass  # Silently handle setup errors from scanners
-
-    def handle_one_request(self):
-        """Handle single request with error catching."""
-        try:
-            # Check rate limiting
-            client_ip = self.client_address[0]
-            if self.is_rate_limited(client_ip):
-                logging.warning(f"Rate limiting IP: {client_ip}")
-                self.send_error(429, "Too Many Requests")
-                return
-
-            super().handle_one_request()
-        except (ConnectionResetError, BrokenPipeError):
-            # Client disconnected prematurely - common with scanners
-            pass
-        except UnicodeDecodeError:
-            # Binary data received - not HTTP
-            pass
-        except Exception as e:
-            # Log unexpected errors but don't crash
-            logging.debug(f"Request handling error: {e}")
-
-    def log_message(self, format, *args):
-        """Suppress logging for malformed requests from scanners."""
-        message = format % args
-        # Filter out scanner noise
-        noise_patterns = [
-            "code 400",
-            "Bad request",
-            "Bad request version",
-            "Bad HTTP/0.9 request type",
-            "Bad request syntax"
-        ]
-        if any(pattern in message for pattern in noise_patterns):
-            return
-        # Only log legitimate requests
-        if message and not message.startswith(" ") and len(message) > 10:
-            super().log_message(format, *args)
-
-    def end_headers(self):
-        # Add CORS headers
-        self.send_header('Access-Control-Allow-Origin', '*')
-        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
-        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
-        # Add security headers
-        self.send_header('X-Content-Type-Options', 'nosniff')
-        self.send_header('X-Frame-Options', 'DENY')
-        self.send_header('X-XSS-Protection', '1; mode=block')
-        super().end_headers()
-
-    def do_OPTIONS(self):
-        """Handle OPTIONS requests."""
-        try:
-            path = self.path.split('?')[0]
-            if self._is_proxy_path(path):
-                self.send_response(204)
-                self.end_headers()
-                return
-            self.send_response(200)
-            self.end_headers()
-        except Exception:
-            pass
-
-class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
-    """Threaded TCP server with better error handling."""
-    allow_reuse_address = True
-    daemon_threads = True
-
-if __name__ == '__main__':
-    # Check if port is already in use
-    import socket
-    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    try:
-        sock.bind(("", PORT))
-        sock.close()
-    except OSError:
-        print(f"ERROR: Port {PORT} is already in use.")
-        print(f"Please stop the existing server or use a different port.")
-        print(f"To stop existing server: kill $(lsof -t -i:{PORT})")
-        sys.exit(1)
-    
-    # Create threaded server for better concurrency
-    with ThreadedTCPServer(("", PORT), MyHTTPRequestHandler) as httpd:
-        print(f"Frontend server started at http://localhost:{PORT}")
-        print(f"Serving files from: {os.getcwd()}")
-        print("\nPress Ctrl+C to stop the server")
-
-        try:
-            httpd.serve_forever()
-        except KeyboardInterrupt:
-            print("\nShutting down server...")
-            httpd.shutdown()
-            print("Server stopped")
-            sys.exit(0)
-        except Exception as e:
-            print(f"Server error: {e}")
-            sys.exit(1)
diff --git a/scripts/inspect/README.md b/scripts/inspect/README.md
new file mode 100644
index 0000000..80e60e5
--- /dev/null
+++ b/scripts/inspect/README.md
@@ -0,0 +1,10 @@
+# Inspect Scripts
+
+这一组脚本用于做一次性诊断、索引检查和数据核对：
+
+- `check_data_source.py`
+- `check_es_data.py`
+- `check_index_mapping.py`
+- `compare_index_mappings.py`
+
+它们依赖真实 DB / ES 环境，不属于 CI 测试或 benchmark。
diff --git a/scripts/inspect/check_data_source.py b/scripts/inspect/check_data_source.py
new file mode 100755
index 0000000..c20bae6
--- /dev/null
+++ b/scripts/inspect/check_data_source.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+"""
+诊断脚本：检查MySQL数据源中分类和规格信息是否正确
+
+检查：
+1. category_path 字段是否有值
+2. category_path 格式是否正确（应该能被解析为 category1_name）
+3. shoplazza_product_option 表的 name 字段是否有值（应该是 "color", "size", "material"）
+4. shoplazza_product_sku 表的 option1/2/3 字段是否有值
+"""
+
+import sys
+import argparse
+from pathlib import Path
+from sqlalchemy import create_engine, text
+
+# Add repo root to path
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from utils.db_connector import create_db_connection
+
+
+def check_category_path(db_engine, tenant_id: str):
+    """检查 category_path 和 category 字段"""
+    print("\n" + "="*60)
+    print("1. 检查 category_path 和 category 字段")
+    print("="*60)
+    
+    query = text("""
+        SELECT 
+            COUNT(*) as total,
+            COUNT(category_path) as has_category_path,
+            COUNT(*) - COUNT(category_path) as null_category_path,
+            COUNT(category) as has_category,
+            COUNT(*) - COUNT(category) as null_category
+        FROM shoplazza_product_spu
+        WHERE tenant_id = :tenant_id AND deleted = 0
+    """)
+    
+    with db_engine.connect() as conn:
+        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
+        total = result[0]
+        has_category_path = result[1]
+        null_category_path = result[2]
+        has_category = result[3]
+        null_category = result[4]
+        
+        print(f"总SPU数: {total}")
+        print(f"有 category_path 的SPU: {has_category_path}")
+        print(f"category_path 为空的SPU: {null_category_path}")
+        print(f"有 category 的SPU: {has_category}")
+        print(f"category 为空的SPU: {null_category}")
+        
+        # 查看category字段的示例
+        if has_category > 0:
+            sample_query = text("""
+                SELECT id, title, category_path, category, category_id, category_level
+                FROM shoplazza_product_spu
+                WHERE tenant_id = :tenant_id 
+                  AND deleted = 0 
+                  AND category IS NOT NULL
+                LIMIT 5
+            """)
+            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
+            print(f"\n示例数据（前5条有 category 的记录）:")
+            for row in samples:
+                print(f"  SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
+                print(f"    category_path: {row[2]}")
+                print(f"    category: '{row[3]}'")
+                print(f"    category_id: {row[4]}, category_level: {row[5]}")
+                
+                # 解析 category 字段（用于生成 category1_name）
+                if row[3]:
+                    category = str(row[3])
+                    if '/' in category:
+                        path_parts = category.split('/')
+                        print(f"    解析后（按'/'分割）: {path_parts}")
+                        if len(path_parts) > 0:
+                            print(f"    → category1_name: '{path_parts[0].strip()}'")
+                    else:
+                        print(f"    → category1_name: '{category.strip()}'（直接作为category1_name）")
+        else:
+            print("\n⚠️ 警告: 没有SPU有 category 值！")
+        
+        # 查看category_path的示例（如果有）
+        if has_category_path > 0:
+            sample_query = text("""
+                SELECT id, title, category_path, category
+                FROM shoplazza_product_spu
+                WHERE tenant_id = :tenant_id 
+                  AND deleted = 0 
+                  AND category_path IS NOT NULL
+                LIMIT 3
+            """)
+            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
+            print(f"\n示例数据（有 category_path 的记录）:")
+            for row in samples:
+                print(f"  SPU ID: {row[0]}, Title: {row[1][:50] if row[1] else ''}")
+                print(f"    category_path: '{row[2]}'")
+                print(f"    category: '{row[3]}'")
+                
+                # 检查是否是ID列表格式
+                if row[2] and ',' in str(row[2]) and not '/' in str(row[2]):
+                    print(f"    ⚠️  注意: category_path是ID列表格式（逗号分隔），不是路径格式")
+
+
+def check_options(db_engine, tenant_id: str):
+    """检查 option 表的 name 字段"""
+    print("\n" + "="*60)
+    print("2. 检查 shoplazza_product_option 表的 name 字段")
+    print("="*60)
+    
+    query = text("""
+        SELECT 
+            COUNT(*) as total_options,
+            COUNT(DISTINCT name) as distinct_names,
+            COUNT(DISTINCT spu_id) as spus_with_options
+        FROM shoplazza_product_option
+        WHERE tenant_id = :tenant_id AND deleted = 0
+    """)
+    
+    with db_engine.connect() as conn:
+        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
+        total_options = result[0]
+        distinct_names = result[1]
+        spus_with_options = result[2]
+        
+        print(f"总 option 记录数: {total_options}")
+        print(f"不同的 name 数量: {distinct_names}")
+        print(f"有 option 定义的 SPU 数量: {spus_with_options}")
+        
+        if total_options > 0:
+            # 查看不同的 name 值
+            name_query = text("""
+                SELECT DISTINCT name, position, COUNT(*) as count
+                FROM shoplazza_product_option
+                WHERE tenant_id = :tenant_id AND deleted = 0
+                GROUP BY name, position
+                ORDER BY position, name
+            """)
+            names = conn.execute(name_query, {"tenant_id": tenant_id}).fetchall()
+            print(f"\n不同的 name 值:")
+            for row in names:
+                print(f"  position={row[1]}, name='{row[0]}', count={row[2]}")
+                
+            # 查看一些示例
+            sample_query = text("""
+                SELECT spu_id, position, name, `values`
+                FROM shoplazza_product_option
+                WHERE tenant_id = :tenant_id AND deleted = 0
+                ORDER BY spu_id, position
+                LIMIT 10
+            """)
+            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
+            print(f"\n示例数据（前10条 option 记录）:")
+            for row in samples:
+                print(f"  SPU ID: {row[0]}, position: {row[1]}, name: '{row[2]}', values: {row[3]}")
+        else:
+            print("\n⚠️ 警告: 没有 option 记录！")
+
+
+def check_sku_options(db_engine, tenant_id: str):
+    """检查 SKU 表的 option1/2/3 字段"""
+    print("\n" + "="*60)
+    print("3. 检查 shoplazza_product_sku 表的 option1/2/3 字段")
+    print("="*60)
+    
+    query = text("""
+        SELECT 
+            COUNT(*) as total_skus,
+            COUNT(option1) as has_option1,
+            COUNT(option2) as has_option2,
+            COUNT(option3) as has_option3,
+            COUNT(DISTINCT spu_id) as distinct_spus
+        FROM shoplazza_product_sku
+        WHERE tenant_id = :tenant_id AND deleted = 0
+    """)
+    
+    with db_engine.connect() as conn:
+        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
+        total_skus = result[0]
+        has_option1 = result[1]
+        has_option2 = result[2]
+        has_option3 = result[3]
+        distinct_spus = result[4]
+        
+        print(f"总 SKU 数: {total_skus}")
+        print(f"有 option1 的 SKU: {has_option1}")
+        print(f"有 option2 的 SKU: {has_option2}")
+        print(f"有 option3 的 SKU: {has_option3}")
+        print(f"不同的 SPU 数量: {distinct_spus}")
+        
+        if total_skus > 0:
+            # 查看一些示例
+            sample_query = text("""
+                SELECT spu_id, id, option1, option2, option3
+                FROM shoplazza_product_sku
+                WHERE tenant_id = :tenant_id AND deleted = 0
+                ORDER BY spu_id, id
+                LIMIT 10
+            """)
+            samples = conn.execute(sample_query, {"tenant_id": tenant_id}).fetchall()
+            print(f"\n示例数据（前10条 SKU 记录）:")
+            for row in samples:
+                print(f"  SPU ID: {row[0]}, SKU ID: {row[1]}")
+                print(f"    option1: '{row[2]}', option2: '{row[3]}', option3: '{row[4]}'")
+        else:
+            print("\n⚠️ 警告: 没有 SKU 记录！")
+
+
+def check_spu_summary(db_engine, tenant_id: str):
+    """检查 SPU 汇总信息"""
+    print("\n" + "="*60)
+    print("4. SPU 汇总信息")
+    print("="*60)
+    
+    query = text("""
+        SELECT 
+            COUNT(DISTINCT spu.id) as total_spus,
+            COUNT(DISTINCT sku.id) as total_skus,
+            COUNT(DISTINCT opt.id) as total_options,
+            COUNT(DISTINCT CASE WHEN spu.category_path IS NOT NULL THEN spu.id END) as spus_with_category_path,
+            COUNT(DISTINCT opt.spu_id) as spus_with_options
+        FROM shoplazza_product_spu spu
+        LEFT JOIN shoplazza_product_sku sku ON spu.id = sku.spu_id AND sku.tenant_id = :tenant_id AND sku.deleted = 0
+        LEFT JOIN shoplazza_product_option opt ON spu.id = opt.spu_id AND opt.tenant_id = :tenant_id AND opt.deleted = 0
+        WHERE spu.tenant_id = :tenant_id AND spu.deleted = 0
+    """)
+    
+    with db_engine.connect() as conn:
+        result = conn.execute(query, {"tenant_id": tenant_id}).fetchone()
+        total_spus = result[0]
+        total_skus = result[1]
+        total_options = result[2]
+        spus_with_category_path = result[3]
+        spus_with_options = result[4]
+        
+        print(f"总 SPU 数: {total_spus}")
+        print(f"总 SKU 数: {total_skus}")
+        print(f"总 option 记录数: {total_options}")
+        print(f"有 category_path 的 SPU: {spus_with_category_path}")
+        print(f"有 option 定义的 SPU: {spus_with_options}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='检查MySQL数据源中的分类和规格信息')
+    parser.add_argument('--tenant-id', required=True, help='Tenant ID')
+    parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)')
+    parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)')
+    parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)')
+    parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)')
+    parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)')
+    
+    args = parser.parse_args()
+    
+    # 连接数据库
+    import os
+    db_host = args.db_host or os.environ.get('DB_HOST')
+    db_port = args.db_port or int(os.environ.get('DB_PORT', 3306))
+    db_database = args.db_database or os.environ.get('DB_DATABASE')
+    db_username = args.db_username or os.environ.get('DB_USERNAME')
+    db_password = args.db_password or os.environ.get('DB_PASSWORD')
+    
+    if not all([db_host, db_database, db_username, db_password]):
+        print("错误: MySQL连接参数不完整")
+        print("请提供 --db-host, --db-database, --db-username, --db-password")
+        print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD")
+        return 1
+    
+    print(f"连接MySQL: {db_host}:{db_port}/{db_database}")
+    print(f"Tenant ID: {args.tenant_id}")
+    
+    try:
+        db_engine = create_db_connection(
+            host=db_host,
+            port=db_port,
+            database=db_database,
+            username=db_username,
+            password=db_password
+        )
+        print("✓ MySQL连接成功\n")
+    except Exception as e:
+        print(f"✗ 连接MySQL失败: {e}")
+        return 1
+    
+    # 执行检查
+    check_spu_summary(db_engine, args.tenant_id)
+    check_category_path(db_engine, args.tenant_id)
+    check_options(db_engine, args.tenant_id)
+    check_sku_options(db_engine, args.tenant_id)
+    
+    print("\n" + "="*60)
+    print("检查完成")
+    print("="*60)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/inspect/check_es_data.py b/scripts/inspect/check_es_data.py
new file mode 100755
index 0000000..aa35bc5
--- /dev/null
+++ b/scripts/inspect/check_es_data.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+"""
+Check actual data in ES index to see if facet fields have values
+"""
+
+import sys
+import os
+import argparse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from utils.es_client import ESClient
+
+
+def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
+    """Check facet-related fields in ES"""
+    print("\n" + "="*60)
+    print("Checking facet field data in ES index")
+    print("="*60)
+    
+    query = {
+        "query": {
+            "term": {
+                "tenant_id": tenant_id
+            }
+        },
+        "size": size,
+        "_source": [
+            "spu_id",
+            "title",
+            "category1_name",
+            "category2_name",
+            "category3_name",
+            "category_name",
+            "category_path",
+            "specifications",
+            "option1_name",
+            "option2_name",
+            "option3_name"
+        ]
+    }
+    
+    try:
+        response = es_client.client.search(index="search_products", body=query)
+        hits = response.get('hits', {}).get('hits', [])
+        total = response.get('hits', {}).get('total', {}).get('value', 0)
+        
+        print(f"\nTotal documents: {total}")
+        print(f"Checking first {len(hits)} documents:\n")
+        
+        for i, hit in enumerate(hits, 1):
+            source = hit.get('_source', {})
+            title_obj = source.get("title") or {}
+            category_path_obj = source.get("category_path") or {}
+            print(f"Document {i}:")
+            print(f"  spu_id: {source.get('spu_id')}")
+            print(f"  title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
+            print(f"  category1_name: {source.get('category1_name')}")
+            print(f"  category2_name: {source.get('category2_name')}")
+            print(f"  category3_name: {source.get('category3_name')}")
+            print(f"  category_name: {source.get('category_name')}")
+            print(f"  category_path.zh: {category_path_obj.get('zh') if isinstance(category_path_obj, dict) else None}")
+            print(f"  option1_name: {source.get('option1_name')}")
+            print(f"  option2_name: {source.get('option2_name')}")
+            print(f"  option3_name: {source.get('option3_name')}")
+            
+            specs = source.get('specifications', [])
+            if specs:
+                print(f"  specifications count: {len(specs)}")
+                # 显示前3个specifications
+                for spec in specs[:3]:
+                    print(f"    - name: {spec.get('name')}, value: {spec.get('value')}")
+            else:
+                print(f"  specifications: empty")
+            print()
+            
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def check_facet_aggregations(es_client, tenant_id: str):
+    """Check facet aggregation queries"""
+    print("\n" + "="*60)
+    print("Checking facet aggregation query results")
+    print("="*60)
+    
+    query = {
+        "query": {
+            "term": {
+                "tenant_id": tenant_id
+            }
+        },
+        "size": 0,
+        "aggs": {
+            "category1_facet": {
+                "terms": {
+                    "field": "category1_name",
+                    "size": 10
+                }
+            },
+            "color_facet": {
+                "nested": {
+                    "path": "specifications"
+                },
+                "aggs": {
+                    "filter_by_name": {
+                        "filter": {
+                            "term": {
+                                "specifications.name": "color"
+                            }
+                        },
+                        "aggs": {
+                            "value_counts": {
+                                "terms": {
+                                    "field": "specifications.value",
+                                    "size": 10
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            "size_facet": {
+                "nested": {
+                    "path": "specifications"
+                },
+                "aggs": {
+                    "filter_by_name": {
+                        "filter": {
+                            "term": {
+                                "specifications.name": "size"
+                            }
+                        },
+                        "aggs": {
+                            "value_counts": {
+                                "terms": {
+                                    "field": "specifications.value",
+                                    "size": 10
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            "material_facet": {
+                "nested": {
+                    "path": "specifications"
+                },
+                "aggs": {
+                    "filter_by_name": {
+                        "filter": {
+                            "term": {
+                                "specifications.name": "material"
+                            }
+                        },
+                        "aggs": {
+                            "value_counts": {
+                                "terms": {
+                                    "field": "specifications.value",
+                                    "size": 10
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    try:
+        response = es_client.client.search(index="search_products", body=query)
+        aggs = response.get('aggregations', {})
+        
+        print("\n1. category1_name facet:")
+        category1 = aggs.get('category1_facet', {})
+        buckets = category1.get('buckets', [])
+        if buckets:
+            for bucket in buckets:
+                print(f"  {bucket['key']}: {bucket['doc_count']}")
+        else:
+            print("  empty (no data)")
+        
+        print("\n2. specifications.color facet:")
+        color_agg = aggs.get('color_facet', {})
+        color_filter = color_agg.get('filter_by_name', {})
+        color_values = color_filter.get('value_counts', {})
+        color_buckets = color_values.get('buckets', [])
+        if color_buckets:
+            for bucket in color_buckets:
+                print(f"  {bucket['key']}: {bucket['doc_count']}")
+        else:
+            print("  empty (no data)")
+        
+        print("\n3. specifications.size facet:")
+        size_agg = aggs.get('size_facet', {})
+        size_filter = size_agg.get('filter_by_name', {})
+        size_values = size_filter.get('value_counts', {})
+        size_buckets = size_values.get('buckets', [])
+        if size_buckets:
+            for bucket in size_buckets:
+                print(f"  {bucket['key']}: {bucket['doc_count']}")
+        else:
+            print("  empty (no data)")
+        
+        print("\n4. specifications.material facet:")
+        material_agg = aggs.get('material_facet', {})
+        material_filter = material_agg.get('filter_by_name', {})
+        material_values = material_filter.get('value_counts', {})
+        material_buckets = material_values.get('buckets', [])
+        if material_buckets:
+            for bucket in material_buckets:
+                print(f"  {bucket['key']}: {bucket['doc_count']}")
+        else:
+            print("  empty (no data)")
+            
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Check facet field data in ES index')
+    parser.add_argument('--tenant-id', required=True, help='Tenant ID')
+    parser.add_argument('--es-host', help='Elasticsearch host (or use env var ES_HOST, default: http://localhost:9200)')
+    parser.add_argument('--size', type=int, default=5, help='Number of documents to check (default: 5)')
+    
+    args = parser.parse_args()
+    
+    # 连接ES
+    es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200')
+    es_username = os.environ.get('ES_USERNAME')
+    es_password = os.environ.get('ES_PASSWORD')
+    
+    print(f"Connecting to Elasticsearch: {es_host}")
+    print(f"Tenant ID: {args.tenant_id}\n")
+    
+    try:
+        if es_username and es_password:
+            es_client = ESClient(hosts=[es_host], username=es_username, password=es_password)
+        else:
+            es_client = ESClient(hosts=[es_host])
+        
+        if not es_client.ping():
+            print(f"✗ Cannot connect to Elasticsearch: {es_host}")
+            return 1
+        print("✓ Elasticsearch connected successfully\n")
+    except Exception as e:
+        print(f"✗ Failed to connect to Elasticsearch: {e}")
+        return 1
+    
+    # 检查ES数据
+    check_es_facet_fields(es_client, args.tenant_id, args.size)
+    check_facet_aggregations(es_client, args.tenant_id)
+    
+    print("\n" + "="*60)
+    print("Check completed")
+    print("="*60)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/inspect/check_index_mapping.py b/scripts/inspect/check_index_mapping.py
new file mode 100644
index 0000000..a4eae74
--- /dev/null
+++ b/scripts/inspect/check_index_mapping.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""
+检查ES索引的实际映射配置，特别是中文字段的analyzer设置
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from utils.es_client import get_es_client_from_env
+from indexer.mapping_generator import get_tenant_index_name
+
+
+def check_field_mapping(mapping_dict, field_path):
+    """递归查找字段映射"""
+    parts = field_path.split('.')
+    current = mapping_dict
+    
+    for part in parts:
+        if not isinstance(current, dict):
+            return None
+
+        # ES mapping nesting: object fields store subfields under "properties"
+        if "properties" in current and isinstance(current["properties"], dict):
+            current = current["properties"]
+
+        # multi-fields store subfields under "fields" (e.g. vendor.zh.keyword)
+        if part != parts[0] and "fields" in current and isinstance(current["fields"], dict) and part in current["fields"]:
+            current = current["fields"]
+
+        current = current.get(part)
+        if current is None:
+            return None
+    return current
+
+
+def main():
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="检查 Elasticsearch 索引实际映射配置")
+    parser.add_argument("--tenant-id", type=str, required=True, help="租户ID")
+    args = parser.parse_args()
+    
+    print("=" * 80)
+    print("检查 Elasticsearch 索引实际映射配置")
+    print("=" * 80)
+    
+    # 连接ES
+    try:
+        es_client = get_es_client_from_env()
+        if not es_client.ping():
+            print("✗ 无法连接到 Elasticsearch")
+            return 1
+        print("✓ Elasticsearch 连接成功\n")
+    except Exception as e:
+        print(f"✗ 连接 Elasticsearch 失败: {e}")
+        return 1
+    
+    index_name = get_tenant_index_name(args.tenant_id)
+    
+    # 检查索引是否存在
+    if not es_client.index_exists(index_name):
+        print(f"✗ 索引 '{index_name}' 不存在")
+        return 1
+    
+    # 获取实际映射
+    print(f"获取索引 '{index_name}' 的映射配置...\n")
+    mapping = es_client.get_mapping(index_name)
+    
+    if not mapping:
+        print("✗ 无法获取索引映射")
+        return 1
+    
+    # 提取实际映射结构
+    # ES返回格式: {index_name: {mappings: {properties: {...}}}}
+    index_mapping = mapping.get(index_name, {}).get('mappings', {}).get('properties', {})
+    
+    if not index_mapping:
+        print("✗ 无法解析映射结构")
+        return 1
+    
+    # 检查关键字段
+    fields_to_check = [
+        "title.zh",
+        "brief.zh",
+        "description.zh",
+        "vendor.zh",
+        "vendor.zh.keyword",
+        "category_path.zh",
+        "category_name_text.zh"
+    ]
+    
+    print("=" * 80)
+    print("中文字段实际映射配置")
+    print("=" * 80)
+    
+    for field_name in fields_to_check:
+        field_mapping = check_field_mapping(index_mapping, field_name)
+        
+        if field_mapping is None:
+            print(f"\n❌ {field_name}: 字段不存在")
+            continue
+        
+        print(f"\n📋 {field_name}:")
+        print(f"   类型: {field_mapping.get('type', 'N/A')}")
+        
+        analyzer = field_mapping.get('analyzer')
+        search_analyzer = field_mapping.get('search_analyzer')
+        
+        if analyzer:
+            print(f"   索引分析器 (analyzer): {analyzer}")
+        else:
+            print(f"   索引分析器 (analyzer): 未设置（使用默认）")
+        
+        if search_analyzer:
+            print(f"   查询分析器 (search_analyzer): {search_analyzer}")
+        else:
+            print(f"   查询分析器 (search_analyzer): 未设置（使用analyzer或默认）")
+        
+        # 检查是否有子字段
+        if 'fields' in field_mapping:
+            print(f"   子字段:")
+            for sub_field, sub_mapping in field_mapping['fields'].items():
+                print(f"     - {sub_field}: {sub_mapping.get('type', 'N/A')}")
+                if 'normalizer' in sub_mapping:
+                    print(f"       normalizer: {sub_mapping['normalizer']}")
+    
+    # 获取settings中的analyzer定义
+    print("\n" + "=" * 80)
+    print("索引 Settings 中的 Analyzer 定义")
+    print("=" * 80)
+    
+    try:
+        settings = es_client.client.indices.get_settings(index=index_name)
+        index_settings = settings.get(index_name, {}).get('settings', {}).get('index', {})
+        analysis = index_settings.get('analysis', {})
+        analyzers = analysis.get('analyzer', {})
+        
+        if analyzers:
+            print("\n定义的 Analyzer:")
+            for analyzer_name, analyzer_config in analyzers.items():
+                print(f"\n  {analyzer_name}:")
+                if isinstance(analyzer_config, dict):
+                    print(f"    类型: {analyzer_config.get('type', 'N/A')}")
+                    if 'tokenizer' in analyzer_config:
+                        print(f"    tokenizer: {analyzer_config['tokenizer']}")
+                    if 'filter' in analyzer_config:
+                        print(f"    filter: {analyzer_config['filter']}")
+                else:
+                    print(f"    配置: {analyzer_config}")
+        else:
+            print("\n⚠ 未找到自定义 analyzer 定义")
+            
+    except Exception as e:
+        print(f"\n⚠ 无法获取 settings: {e}")
+    
+    print("\n" + "=" * 80)
+    print("检查完成")
+    print("=" * 80)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/inspect/compare_index_mappings.py b/scripts/inspect/compare_index_mappings.py
new file mode 100644
index 0000000..c048c96
--- /dev/null
+++ b/scripts/inspect/compare_index_mappings.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""
+对比不同租户索引的 mapping 结构
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+from typing import Dict, Any
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from utils.es_client import get_es_client_from_env
+
+
+def get_field_type(mapping_dict: Dict, field_path: str) -> Dict[str, Any]:
+    """递归获取字段的 mapping 信息"""
+    parts = field_path.split('.')
+    current = mapping_dict
+    
+    for part in parts:
+        if isinstance(current, dict):
+            current = current.get(part)
+            if current is None:
+                return None
+        else:
+            return None
+    return current
+
+
+def compare_mappings(mapping1: Dict[str, Any], mapping2: Dict[str, Any], index1_name: str, index2_name: str):
+    """对比两个索引的 mapping"""
+    props1 = mapping1.get('mappings', {}).get('properties', {})
+    props2 = mapping2.get('mappings', {}).get('properties', {})
+    
+    all_fields = set(props1.keys()) | set(props2.keys())
+    
+    print(f"\n{'='*80}")
+    print(f"对比索引映射结构")
+    print(f"{'='*80}")
+    print(f"索引1: {index1_name}")
+    print(f"索引2: {index2_name}")
+    print(f"{'='*80}\n")
+    
+    differences = []
+    same_fields = []
+    
+    for field in sorted(all_fields):
+        field1 = props1.get(field)
+        field2 = props2.get(field)
+        
+        if field1 is None:
+            differences.append((field, f"只在 {index2_name} 中存在", field2))
+            continue
+        if field2 is None:
+            differences.append((field, f"只在 {index1_name} 中存在", field1))
+            continue
+        
+        type1 = field1.get('type')
+        type2 = field2.get('type')
+        
+        if type1 != type2:
+            differences.append((field, f"类型不同: {index1_name}={type1}, {index2_name}={type2}", (field1, field2)))
+        else:
+            same_fields.append((field, type1))
+    
+    # 打印相同的字段
+    print(f"✓ 相同字段 ({len(same_fields)} 个):")
+    for field, field_type in same_fields[:20]:  # 只显示前20个
+        print(f"  - {field}: {field_type}")
+    if len(same_fields) > 20:
+        print(f"  ... 还有 {len(same_fields) - 20} 个相同字段")
+    
+    # 打印不同的字段
+    if differences:
+        print(f"\n✗ 不同字段 ({len(differences)} 个):")
+        for field, reason, details in differences:
+            print(f"\n  {field}:")
+            print(f"    {reason}")
+            if isinstance(details, tuple):
+                print(f"    {index1_name}: {json.dumps(details[0], indent=4, ensure_ascii=False)}")
+                print(f"    {index2_name}: {json.dumps(details[1], indent=4, ensure_ascii=False)}")
+            else:
+                print(f"    详情: {json.dumps(details, indent=4, ensure_ascii=False)}")
+    else:
+        print(f"\n✓ 所有字段类型一致！")
+    
+    # 特别检查 tags 字段
+    print(f"\n{'='*80}")
+    print(f"特别检查: tags 字段")
+    print(f"{'='*80}")
+    
+    tags1 = get_field_type(props1, 'tags')
+    tags2 = get_field_type(props2, 'tags')
+    
+    if tags1:
+        print(f"\n{index1_name}.tags:")
+        print(f"  类型: {tags1.get('type')}")
+        print(f"  完整定义: {json.dumps(tags1, indent=2, ensure_ascii=False)}")
+    else:
+        print(f"\n{index1_name}.tags: 不存在")
+    
+    if tags2:
+        print(f"\n{index2_name}.tags:")
+        print(f"  类型: {tags2.get('type')}")
+        print(f"  完整定义: {json.dumps(tags2, indent=2, ensure_ascii=False)}")
+    else:
+        print(f"\n{index2_name}.tags: 不存在")
+
+
+def main():
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='对比 Elasticsearch 索引的 mapping 结构')
+    parser.add_argument('index1', help='第一个索引名称 (例如: search_products_tenant_171)')
+    parser.add_argument('index2', nargs='?', help='第二个索引名称 (例如: search_products_tenant_162)')
+    parser.add_argument('--list', action='store_true', help='列出所有以 index1 为前缀的索引')
+    
+    args = parser.parse_args()
+    
+    # 连接 ES
+    try:
+        es_client = get_es_client_from_env()
+        if not es_client.ping():
+            print("✗ 无法连接到 Elasticsearch")
+            return 1
+        print("✓ Elasticsearch 连接成功\n")
+    except Exception as e:
+        print(f"✗ 连接 Elasticsearch 失败: {e}")
+        return 1
+    
+    # 如果指定了 --list，列出所有匹配的索引
+    if args.list or not args.index2:
+        try:
+            # 使用 cat API 列出所有索引
+            indices = es_client.client.cat.indices(format='json')
+            matching_indices = [idx['index'] for idx in indices if idx['index'].startswith(args.index1)]
+            
+            if matching_indices:
+                print(f"找到 {len(matching_indices)} 个匹配的索引:")
+                for idx in sorted(matching_indices):
+                    print(f"  - {idx}")
+                return 0
+            else:
+                print(f"未找到以 '{args.index1}' 开头的索引")
+                return 1
+        except Exception as e:
+            print(f"✗ 列出索引失败: {e}")
+            return 1
+    
+    # 获取两个索引的 mapping
+    index1 = args.index1
+    index2 = args.index2
+    
+    print(f"正在获取索引映射...")
+    print(f"  索引1: {index1}")
+    print(f"  索引2: {index2}\n")
+    
+    # 检查索引是否存在
+    if not es_client.index_exists(index1):
+        print(f"✗ 索引 '{index1}' 不存在")
+        return 1
+    
+    if not es_client.index_exists(index2):
+        print(f"✗ 索引 '{index2}' 不存在")
+        return 1
+    
+    # 获取 mapping
+    mapping1 = es_client.get_mapping(index1)
+    mapping2 = es_client.get_mapping(index2)
+    
+    if not mapping1 or index1 not in mapping1:
+        print(f"✗ 无法获取索引 '{index1}' 的映射")
+        return 1
+    
+    if not mapping2 or index2 not in mapping2:
+        print(f"✗ 无法获取索引 '{index2}' 的映射")
+        return 1
+    
+    # 对比 mapping
+    compare_mappings(mapping1[index1], mapping2[index2], index1, index2)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/maintenance/embed_tenant_image_urls.py b/scripts/maintenance/embed_tenant_image_urls.py
new file mode 100644
index 0000000..bbcda0c
--- /dev/null
+++ b/scripts/maintenance/embed_tenant_image_urls.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+"""
+临时脚本：从 ES 遍历指定租户的 image_url，批量调用图片 embedding 服务。
+5 进程并发，每请求最多 8 条 URL。日志打印到标准输出。
+
+用法:
+  source activate.sh   # 会加载 .env，提供 ES_HOST / ES_USERNAME / ES_PASSWORD
+  python scripts/maintenance/embed_tenant_image_urls.py
+
+未 source 时脚本也会尝试加载项目根目录 .env。
+"""
+
+from __future__ import annotations
+
+import json
+import multiprocessing as mp
+import os
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import urlencode
+
+import requests
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import scan
+
+# 未 source activate.sh 时仍可从项目根 .env 加载（与 ES_HOST / ES_USERNAME / ES_PASSWORD 一致）
+try:
+    from dotenv import load_dotenv
+
+    _ROOT = Path(__file__).resolve().parents[2]
+    load_dotenv(_ROOT / ".env")
+except ImportError:
+    pass
+
+# ---------------------------------------------------------------------------
+# 配置（可按需修改；默认与 .env 中 ES_* 一致，见 config/loader.py）
+# ---------------------------------------------------------------------------
+
+# Elasticsearch（默认读环境变量：ES_HOST、ES_USERNAME、ES_PASSWORD）
+ES_HOST: str = os.getenv("ES_HOST", "http://localhost:9200")
+ES_USERNAME: Optional[str] = os.getenv("ES_USERNAME") or None
+ES_PASSWORD: Optional[str] = os.getenv("ES_PASSWORD") or None
+ES_INDEX: str = "search_products_tenant_163"
+
+# 租户（keyword 字段，字符串）
+TENANT_ID: str = "163"
+
+# 图片 embedding 服务（与文档 7.1.2 一致）
+EMBED_BASE_URL: str = "http://localhost:6008"
+EMBED_PATH: str = "/embed/image"
+EMBED_QUERY: Dict[str, Any] = {
+    "normalize": "true",
+    "priority": "1",  # 与对接文档 curl 一致；批量离线可改为 "0"
+}
+
+# 并发与批量
+WORKER_PROCESSES: int = 5
+URLS_PER_REQUEST: int = 8
+
+# HTTP
+REQUEST_TIMEOUT_SEC: float = 120.0
+
+# ES scan（elasticsearch-py 8+/ES 9：`scan(..., query=...)` 会展开为 `client.search(**kwargs)`，
+# 必须传与 Search API 一致的参数名，例如顶层 `query` = DSL 的 query 子句，不要用裸 `match_all`。）
+SCROLL_CHUNK_SIZE: int = 500
+
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class BatchResult:
+    batch_index: int
+    url_count: int
+    ok: bool
+    status_code: Optional[int]
+    elapsed_sec: float
+    error: Optional[str] = None
+
+
+def _build_embed_url() -> str:
+    q = urlencode(EMBED_QUERY)
+    return f"{EMBED_BASE_URL.rstrip('/')}{EMBED_PATH}?{q}"
+
+
+def _process_batch(payload: Tuple[int, List[str]]) -> BatchResult:
+    batch_index, urls = payload
+    if not urls:
+        return BatchResult(batch_index, 0, True, None, 0.0, None)
+
+    url = _build_embed_url()
+    t0 = time.perf_counter()
+    try:
+        resp = requests.post(
+            url,
+            headers={"Content-Type": "application/json"},
+            data=json.dumps(urls),
+            timeout=REQUEST_TIMEOUT_SEC,
+        )
+        elapsed = time.perf_counter() - t0
+        ok = resp.status_code == 200
+        err: Optional[str] = None
+        if ok:
+            try:
+                body = resp.json()
+                if not isinstance(body, list) or len(body) != len(urls):
+                    ok = False
+                    err = f"response length mismatch or not list: got {type(body).__name__}"
+            except Exception as e:
+                ok = False
+                err = f"json decode: {e}"
+        else:
+            err = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
+
+        worker = mp.current_process().name
+        status = resp.status_code if resp else None
+        ms = elapsed * 1000.0
+        if ok:
+            print(
+                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+                f"http={status} elapsed_ms={ms:.2f} ok",
+                flush=True,
+            )
+        else:
+            print(
+                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+                f"http={status} elapsed_ms={ms:.2f} FAIL err={err}",
+                flush=True,
+            )
+        return BatchResult(batch_index, len(urls), ok, status, elapsed, err)
+    except Exception as e:
+        elapsed = time.perf_counter() - t0
+        worker = mp.current_process().name
+        print(
+            f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
+            f"http=None elapsed_ms={elapsed * 1000.0:.2f} FAIL err={e}",
+            flush=True,
+        )
+        return BatchResult(batch_index, len(urls), False, None, elapsed, str(e))
+
+
+def _iter_image_urls(es: Elasticsearch) -> List[str]:
+    # 对应 search body: { "query": { "term": { "tenant_id": "..." } } }
+    search_kw: Dict[str, Any] = {
+        "query": {"term": {"tenant_id": TENANT_ID}},
+        "source_includes": ["image_url"],
+    }
+    urls: List[str] = []
+    for hit in scan(
+        es,
+        query=search_kw,
+        index=ES_INDEX,
+        size=SCROLL_CHUNK_SIZE,
+    ):
+        src = hit.get("_source") or {}
+        u = src.get("image_url")
+        if u is None:
+            continue
+        s = str(u).strip()
+        if not s:
+            continue
+        urls.append(s)
+    return urls
+
+
+def main() -> int:
+    t_wall0 = time.perf_counter()
+
+    auth = None
+    if ES_USERNAME and ES_PASSWORD:
+        auth = (ES_USERNAME, ES_PASSWORD)
+
+    es = Elasticsearch([ES_HOST], basic_auth=auth)
+    if not es.ping():
+        print("ERROR: Elasticsearch ping failed", file=sys.stderr)
+        return 1
+
+    print(
+        f"[main] ES={ES_HOST} basic_auth={'yes' if auth else 'no'} "
+        f"index={ES_INDEX} tenant_id={TENANT_ID} "
+        f"workers={WORKER_PROCESSES} urls_per_req={URLS_PER_REQUEST}",
+        flush=True,
+    )
+    print(f"[main] embed_url={_build_embed_url()}", flush=True)
+
+    t_fetch0 = time.perf_counter()
+    all_urls = _iter_image_urls(es)
+    fetch_elapsed = time.perf_counter() - t_fetch0
+    print(
+        f"[main] collected image_url count={len(all_urls)} es_scan_elapsed_sec={fetch_elapsed:.3f}",
+        flush=True,
+    )
+
+    batches: List[List[str]] = []
+    for i in range(0, len(all_urls), URLS_PER_REQUEST):
+        batches.append(all_urls[i : i + URLS_PER_REQUEST])
+
+    if not batches:
+        print("[main] no URLs to process; done.", flush=True)
+        return 0
+
+    tasks = [(idx, batch) for idx, batch in enumerate(batches)]
+    print(f"[main] batches={len(tasks)} (parallel processes={WORKER_PROCESSES})", flush=True)
+
+    t_run0 = time.perf_counter()
+    total_urls = 0
+    success_urls = 0
+    failed_urls = 0
+    ok_batches = 0
+    fail_batches = 0
+    sum_req_sec = 0.0
+
+    with mp.Pool(processes=WORKER_PROCESSES) as pool:
+        for res in pool.imap_unordered(_process_batch, tasks, chunksize=1):
+            total_urls += res.url_count
+            sum_req_sec += res.elapsed_sec
+            if res.ok:
+                ok_batches += 1
+                success_urls += res.url_count
+            else:
+                fail_batches += 1
+                failed_urls += res.url_count
+
+    wall_total = time.perf_counter() - t_wall0
+    run_elapsed = time.perf_counter() - t_run0
+
+    print("---------- summary ----------", flush=True)
+    print(f"tenant_id:              {TENANT_ID}", flush=True)
+    print(f"total documents w/ url: {len(all_urls)}", flush=True)
+    print(f"total batches:          {len(batches)}", flush=True)
+    print(f"batches succeeded:      {ok_batches}", flush=True)
+    print(f"batches failed:         {fail_batches}", flush=True)
+    print(f"urls (success path):    {success_urls}", flush=True)
+    print(f"urls (failed path):     {failed_urls}", flush=True)
+    print(f"ES scan elapsed (s):    {fetch_elapsed:.3f}", flush=True)
+    print(f"embed phase wall (s):   {run_elapsed:.3f}", flush=True)
+    print(f"sum request time (s):   {sum_req_sec:.3f}  (sequential sum, for reference)", flush=True)
+    print(f"total wall time (s):    {wall_total:.3f}", flush=True)
+    print("-----------------------------", flush=True)
+    return 0 if fail_batches == 0 else 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/monitor_eviction.py b/scripts/monitor_eviction.py
deleted file mode 100755
index 37be401..0000000
--- a/scripts/monitor_eviction.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python3
-"""
-Real-time monitoring of Redis cache eviction events
-
-Continuously monitor evicted_keys statistics and warn when new evictions occur
-"""
-
-import redis
-import time
-import sys
-from pathlib import Path
-from datetime import datetime
-
-# 添加项目路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from config.env_config import REDIS_CONFIG
-
-def get_redis_client():
-    """Get Redis client"""
-    return redis.Redis(
-        host=REDIS_CONFIG.get('host', 'localhost'),
-        port=REDIS_CONFIG.get('port', 6479),
-        password=REDIS_CONFIG.get('password'),
-        decode_responses=True,
-        socket_timeout=5,
-        socket_connect_timeout=5,
-    )
-
-def monitor_eviction(interval=5):
-    """Continuously monitor eviction events"""
-    print("=" * 60)
-    print("Redis Cache Eviction Real-time Monitoring")
-    print("=" * 60)
-    print(f"Monitoring interval: {interval} seconds")
-    print("Press Ctrl+C to stop monitoring")
-    print("=" * 60)
-    print()
-    
-    try:
-        client = get_redis_client()
-        client.ping()
-    except Exception as e:
-        print(f"❌ Redis connection failed: {e}")
-        return
-    
-    last_evicted = 0
-    
-    try:
-        while True:
-            info = client.info('stats')
-            current_evicted = info.get('evicted_keys', 0)
-            
-            if current_evicted > last_evicted:
-                new_evictions = current_evicted - last_evicted
-                timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-                print(f"[{timestamp}] ⚠️  Detected {new_evictions} new eviction events!")
-                print(f"    Total evictions: {current_evicted:,}")
-                
-                # 检查内存使用情况
-                mem_info = client.info('memory')
-                maxmemory = mem_info.get('maxmemory', 0)
-                used_memory = mem_info.get('used_memory', 0)
-                if maxmemory > 0:
-                    usage_percent = (used_memory / maxmemory) * 100
-                    print(f"    Current memory usage: {usage_percent:.2f}%")
-                
-                last_evicted = current_evicted
-            else:
-                timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-                print(f"[{timestamp}] ✅ No new eviction events (Total: {current_evicted:,})")
-            
-            time.sleep(interval)
-            
-    except KeyboardInterrupt:
-        print("\n\nMonitoring stopped")
-    except Exception as e:
-        print(f"\n❌ Monitoring error: {e}")
-        import traceback
-        traceback.print_exc()
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description='Real-time monitoring of Redis cache eviction events')
-    parser.add_argument('--interval', type=int, default=5, help='Monitoring interval in seconds (default: 5)')
-    args = parser.parse_args()
-    
-    monitor_eviction(interval=args.interval)
diff --git a/scripts/ops/README.md b/scripts/ops/README.md
new file mode 100644
index 0000000..22cce98
--- /dev/null
+++ b/scripts/ops/README.md
@@ -0,0 +1,8 @@
+# Ops Scripts
+
+这一组脚本是服务编排过程中的辅助脚本：
+
+- `daily_log_router.sh`：按天切日志
+- `wechat_alert.py`：监控告警发送
+
+如果其他启动脚本引用这些文件，应通过这里的固定路径，不要再复制出新的同类工具。
diff --git a/scripts/ops/daily_log_router.sh b/scripts/ops/daily_log_router.sh
new file mode 100755
index 0000000..af8d9fd
--- /dev/null
+++ b/scripts/ops/daily_log_router.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Route incoming log stream into per-day files.
+#
+# Usage:
+#   command 2>&1 | ./scripts/ops/daily_log_router.sh <service> <log_dir> [retention_days]
+#
+
+set -euo pipefail
+
+if [ "$#" -lt 2 ]; then
+  echo "Usage: $0 <service> <log_dir> [retention_days]" >&2
+  exit 1
+fi
+
+SERVICE_NAME="$1"
+LOG_DIR="$2"
+RETENTION_DAYS="${3:-30}"
+
+mkdir -p "${LOG_DIR}"
+
+awk -v dir="${LOG_DIR}" -v service="${SERVICE_NAME}" -v retention_days="${RETENTION_DAYS}" '
+function rotate_file(day) {
+  return sprintf("%s/%s-%s.log", dir, service, day)
+}
+
+function update_symlink(day) {
+  cmd = sprintf("ln -sfn \"%s-%s.log\" \"%s/%s.log\"", service, day, dir, service)
+  system(cmd)
+}
+
+function cleanup_old_logs() {
+  cmd = sprintf("find \"%s\" -maxdepth 1 -type f -name \"%s-*.log\" -mtime +%d -delete >/dev/null 2>&1", dir, service, retention_days)
+  system(cmd)
+}
+
+{
+  day = strftime("%Y-%m-%d")
+  target = rotate_file(day)
+
+  if (target != current_target) {
+    update_symlink(day)
+    cleanup_old_logs()
+    current_target = target
+  }
+
+  print >> current_target
+  fflush(current_target)
+}
+
+END {
+  if (current_target != "") {
+    close(current_target)
+  }
+}
+'
diff --git a/scripts/ops/wechat_alert.py b/scripts/ops/wechat_alert.py
new file mode 100644
index 0000000..773a457
--- /dev/null
+++ b/scripts/ops/wechat_alert.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+"""
+Lightweight Enterprise WeChat webhook sender for service monitor alerts.
+
+This module is intentionally small and focused so that Bash-based monitors
+can invoke it without pulling in the full application stack.
+
+Usage example:
+  python scripts/ops/wechat_alert.py --service backend --level error --message "backend restarted"
+"""
+
+import argparse
+import json
+import os
+import sys
+from datetime import datetime
+
+import requests
+
+
+def get_webhook_url() -> str:
+    """
+    Resolve webhook URL from environment, with optional default.
+
+    Priority:
+      1. SERVICE_MONITOR_WECHAT_WEBHOOK
+      2. Built-in default (provided by ops)
+    """
+    env_url = os.getenv("SERVICE_MONITOR_WECHAT_WEBHOOK", "").strip()
+    if env_url:
+        return env_url
+    # Fallback to the URL provided in ops configuration.
+    return (
+        "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?"
+        "key=2d9e38ef-9242-4e2e-82cc-fab060322871"
+    )
+
+
+def build_text_payload(message: str) -> dict:
+    return {
+        "msgtype": "text",
+        "text": {
+            "content": message,
+        },
+    }
+
+
+def send_wechat_message(message: str) -> None:
+    url = get_webhook_url()
+    if not url:
+        # No webhook configured; fail silently to avoid breaking callers.
+        return
+
+    payload = build_text_payload(message)
+    headers = {"Content-Type": "application/json"}
+
+    try:
+        resp = requests.post(url, headers=headers, data=json.dumps(payload), timeout=5)
+    except Exception:
+        # Swallow all exceptions to avoid impacting the caller.
+        return
+
+    try:
+        if resp.status_code != 200:
+            return
+        data = resp.json()
+        # errcode == 0 means success per WeCom docs
+        if int(data.get("errcode", -1)) != 0:
+            return
+    except Exception:
+        return
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Send Enterprise WeChat alert message")
+    parser.add_argument("--service", help="service name", default="")
+    parser.add_argument("--level", help="alert level (info|warn|error)", default="info")
+    parser.add_argument(
+        "--message",
+        required=True,
+        help="alert message body (short, human-readable)",
+    )
+
+    args = parser.parse_args(argv)
+
+    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    parts = [
+        f"【服务监控告警】",
+        f"时间: {ts}",
+    ]
+    if args.service:
+        parts.append(f"服务: {args.service}")
+    if args.level:
+        parts.append(f"级别: {args.level}")
+    parts.append(f"详情: {args.message}")
+
+    full_message = "\n".join(parts)
+    send_wechat_message(full_message)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/redis/monitor_eviction.py b/scripts/redis/monitor_eviction.py
new file mode 100755
index 0000000..ade4a84
--- /dev/null
+++ b/scripts/redis/monitor_eviction.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+Real-time monitoring of Redis cache eviction events
+
+Continuously monitor evicted_keys statistics and warn when new evictions occur
+"""
+
+import redis
+import time
+import sys
+from pathlib import Path
+from datetime import datetime
+
+# 添加项目路径
+project_root = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(project_root))
+
+from config.env_config import REDIS_CONFIG
+
+def get_redis_client():
+    """Get Redis client"""
+    return redis.Redis(
+        host=REDIS_CONFIG.get('host', 'localhost'),
+        port=REDIS_CONFIG.get('port', 6479),
+        password=REDIS_CONFIG.get('password'),
+        decode_responses=True,
+        socket_timeout=5,
+        socket_connect_timeout=5,
+    )
+
+def monitor_eviction(interval=5):
+    """Continuously monitor eviction events"""
+    print("=" * 60)
+    print("Redis Cache Eviction Real-time Monitoring")
+    print("=" * 60)
+    print(f"Monitoring interval: {interval} seconds")
+    print("Press Ctrl+C to stop monitoring")
+    print("=" * 60)
+    print()
+    
+    try:
+        client = get_redis_client()
+        client.ping()
+    except Exception as e:
+        print(f"❌ Redis connection failed: {e}")
+        return
+    
+    last_evicted = 0
+    
+    try:
+        while True:
+            info = client.info('stats')
+            current_evicted = info.get('evicted_keys', 0)
+            
+            if current_evicted > last_evicted:
+                new_evictions = current_evicted - last_evicted
+                timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                print(f"[{timestamp}] ⚠️  Detected {new_evictions} new eviction events!")
+                print(f"    Total evictions: {current_evicted:,}")
+                
+                # 检查内存使用情况
+                mem_info = client.info('memory')
+                maxmemory = mem_info.get('maxmemory', 0)
+                used_memory = mem_info.get('used_memory', 0)
+                if maxmemory > 0:
+                    usage_percent = (used_memory / maxmemory) * 100
+                    print(f"    Current memory usage: {usage_percent:.2f}%")
+                
+                last_evicted = current_evicted
+            else:
+                timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                print(f"[{timestamp}] ✅ No new eviction events (Total: {current_evicted:,})")
+            
+            time.sleep(interval)
+            
+    except KeyboardInterrupt:
+        print("\n\nMonitoring stopped")
+    except Exception as e:
+        print(f"\n❌ Monitoring error: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Real-time monitoring of Redis cache eviction events')
+    parser.add_argument('--interval', type=int, default=5, help='Monitoring interval in seconds (default: 5)')
+    args = parser.parse_args()
+    
+    monitor_eviction(interval=args.interval)
diff --git a/scripts/service_ctl.sh b/scripts/service_ctl.sh
index 93a2aa5..5fb26a3 100755
--- a/scripts/service_ctl.sh
+++ b/scripts/service_ctl.sh
@@ -334,7 +334,7 @@ monitor_services() {
   local fail_threshold="${MONITOR_FAIL_THRESHOLD:-3}"
   local restart_cooldown_sec="${MONITOR_RESTART_COOLDOWN_SEC:-30}"
   local max_restarts_per_hour="${MONITOR_MAX_RESTARTS_PER_HOUR:-6}"
-  local wechat_alert_py="${PROJECT_ROOT}/scripts/wechat_alert.py"
+  local wechat_alert_py="${PROJECT_ROOT}/scripts/ops/wechat_alert.py"
 
   require_positive_int "MONITOR_INTERVAL_SEC" "${interval_sec}"
   require_positive_int "MONITOR_FAIL_THRESHOLD" "${fail_threshold}"
diff --git a/scripts/setup_translator_venv.sh b/scripts/setup_translator_venv.sh
index 760b4e8..a17abe0 100755
--- a/scripts/setup_translator_venv.sh
+++ b/scripts/setup_translator_venv.sh
@@ -39,5 +39,5 @@ echo "Using TMPDIR=${TMPDIR}"
 echo
 echo "Done."
 echo "Translator venv: ${VENV_DIR}"
-echo "Download local models: ./.venv-translator/bin/python scripts/download_translation_models.py --all-local"
+echo "Download local models: ./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local"
 echo "Start service: ./scripts/start_translator.sh"
diff --git a/scripts/shoplazza_excel_template.py b/scripts/shoplazza_excel_template.py
deleted file mode 100644
index c2bbec2..0000000
--- a/scripts/shoplazza_excel_template.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-"""
-Shared utilities for generating Shoplazza (店匠) product import Excel files
-based on the provided template `docs/商品导入模板.xlsx`.
-
-We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts.
-"""
-
-from openpyxl import Workbook
-from openpyxl import load_workbook
-from openpyxl.styles import Alignment
-
-
-def load_template_column_mapping(ws, header_row_idx=2):
-    """
-    Read the header row in the template sheet and build a mapping:
-    header_name -> column_index (1-based).
-    """
-    column_mapping = {}
-    for col_idx in range(1, ws.max_column + 1):
-        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
-        if cell_value:
-            column_mapping[str(cell_value).strip()] = col_idx
-    return column_mapping
-
-
-def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
-    """
-    Create an Excel file from the Shoplazza template and fill with data rows.
-
-    Args:
-        template_file: Path to Excel template file
-        output_file: Path to output Excel file
-        excel_rows: List[Dict[str, Any]] mapping template header -> value
-        header_row_idx: Header row index in template (default 2)
-        data_start_row: Data start row index in template (default 4)
-    """
-    wb = load_workbook(template_file)
-    ws = wb.active
-
-    column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx)
-
-    # Clear existing data rows
-    last_template_row = ws.max_row
-    if last_template_row >= data_start_row:
-        for row in range(data_start_row, last_template_row + 1):
-            for col in range(1, ws.max_column + 1):
-                ws.cell(row=row, column=col).value = None
-
-    # Write data rows (OPT: only write fields that actually exist in excel_row)
-    # This avoids looping over all 42 template columns for every output row.
-    for row_idx, excel_row in enumerate(excel_rows):
-        excel_row_num = data_start_row + row_idx
-        for field_name, value in excel_row.items():
-            col_idx = column_mapping.get(field_name)
-            if not col_idx:
-                continue
-            cell = ws.cell(row=excel_row_num, column=col_idx)
-            cell.value = value
-            if isinstance(value, str):
-                cell.alignment = Alignment(vertical='top', wrap_text=True)
-            else:
-                cell.alignment = Alignment(vertical='top')
-
-    wb.save(output_file)
-    print("Excel file created: {}".format(output_file))
-    print("  - Total rows: {}".format(len(excel_rows)))
-
-
-def create_excel_from_template_fast(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4):
-    """
-    Faster writer for large datasets.
-
-    Instead of opening the template workbook in write mode and assigning cells one by one,
-    we:
-    - read the template's first (data_start_row-1) rows as values
-    - build a header->index mapping from header_row_idx
-    - create a new write_only workbook and append rows
-
-    This is much faster for tens/hundreds of thousands of cells.
-    """
-    tpl_wb = load_workbook(template_file, read_only=True, data_only=True)
-    tpl_ws = tpl_wb.active
-
-    max_col = tpl_ws.max_column
-
-    # Copy template "instruction" rows (typically rows 1-3) into output
-    prefix_rows = list(tpl_ws.iter_rows(min_row=1, max_row=data_start_row - 1, values_only=True))
-
-    header_values = None
-    if 1 <= header_row_idx <= len(prefix_rows):
-        header_values = prefix_rows[header_row_idx - 1]
-    else:
-        # Fallback: read header row directly
-        header_values = next(tpl_ws.iter_rows(min_row=header_row_idx, max_row=header_row_idx, values_only=True))
-
-    header_values = list(header_values)[:max_col]
-    col_map = {}
-    for i, v in enumerate(header_values):
-        if v is None:
-            continue
-        col_map[str(v).strip()] = i  # 0-based
-
-    wb = Workbook(write_only=True)
-    ws = wb.create_sheet(title=tpl_ws.title)
-    # remove default sheet if present (openpyxl may create one)
-    if "Sheet" in wb.sheetnames and wb["Sheet"] is not ws:
-        try:
-            wb.remove(wb["Sheet"])
-        except Exception:
-            pass
-
-    # Write prefix rows, normalized to max_col
-    for r in prefix_rows:
-        r = list(r)[:max_col]
-        if len(r) < max_col:
-            r = r + [None] * (max_col - len(r))
-        ws.append(r)
-
-    # Write data rows
-    for excel_row in excel_rows:
-        row_vals = [None] * max_col
-        for field_name, value in excel_row.items():
-            if field_name not in col_map:
-                continue
-            row_vals[col_map[field_name]] = value
-        ws.append(row_vals)
-
-    wb.save(output_file)
-    print("Excel file created (fast): {}".format(output_file))
-    print("  - Total rows: {}".format(len(excel_rows)))
-
-
diff --git a/scripts/shoplazza_import_template.py b/scripts/shoplazza_import_template.py
deleted file mode 100644
index a05f291..0000000
--- a/scripts/shoplazza_import_template.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python3
-"""
-Shared helpers for generating Shoplazza product import Excel files from the
-official template `docs/商品导入模板.xlsx`.
-
-We keep this module small and dependency-light (openpyxl only) so other scripts
-can reuse the same template-writing behavior (header row mapping, data start
-row, alignment).
-"""
-
-import re
-from datetime import datetime
-from typing import Dict, Iterable, List, Optional
-
-from openpyxl import load_workbook
-from openpyxl.styles import Alignment
-
-
-def generate_handle(title: str) -> str:
-    """
-    Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza
-    `SEO URL Handle` field. Caller may prepend `products/`.
-    """
-    if not title:
-        return "product"
-
-    handle = str(title).lower()
-    handle = re.sub(r"[^a-z0-9\s-]", "", handle)
-    handle = re.sub(r"[-\s]+", "-", handle).strip("-")
-
-    if len(handle) > 255:
-        handle = handle[:255]
-
-    return handle or "product"
-
-
-def parse_date_to_datetime_str(value) -> str:
-    """
-    Parse common date strings into Shoplazza template datetime string:
-    `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string.
-    """
-    if value is None:
-        return ""
-
-    if isinstance(value, datetime):
-        return value.strftime("%Y-%m-%d %H:%M:%S")
-
-    s = str(value).strip()
-    if not s:
-        return ""
-
-    # Most competitor sheets use YYYY-MM-DD
-    for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
-        try:
-            dt = datetime.strptime(s, fmt)
-            if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"):
-                dt = dt.replace(hour=0, minute=0, second=0)
-            return dt.strftime("%Y-%m-%d %H:%M:%S")
-        except Exception:
-            pass
-
-    return ""
-
-
-def create_excel_from_template(
-    template_file: str,
-    output_file: str,
-    excel_rows: List[Dict[str, object]],
-    *,
-    header_row_idx: int = 2,
-    data_start_row: int = 4,
-    sheet_name: Optional[str] = None,
-) -> None:
-    """
-    Create an Excel file from Shoplazza import template and fill rows.
-
-    - Header row is expected at row 2 (1-based) in the official template.
-    - Data starts at row 4 (1-based), after the instruction row(s).
-    """
-    wb = load_workbook(template_file)
-    ws = wb[sheet_name] if sheet_name else wb.active
-
-    column_mapping: Dict[str, int] = {}
-    for col_idx in range(1, ws.max_column + 1):
-        cell_value = ws.cell(row=header_row_idx, column=col_idx).value
-        if cell_value:
-            column_mapping[str(cell_value).strip()] = col_idx
-
-    # Clear existing data rows
-    last_template_row = ws.max_row
-    if last_template_row >= data_start_row:
-        for row in range(data_start_row, last_template_row + 1):
-            for col in range(1, ws.max_column + 1):
-                ws.cell(row=row, column=col).value = None
-
-    # Write data rows
-    for row_idx, excel_row in enumerate(excel_rows):
-        excel_row_num = data_start_row + row_idx
-        for field_name, col_idx in column_mapping.items():
-            if field_name not in excel_row:
-                continue
-            value = excel_row[field_name]
-            cell = ws.cell(row=excel_row_num, column=col_idx)
-            cell.value = value
-            if isinstance(value, str):
-                cell.alignment = Alignment(vertical="top", wrap_text=True)
-            elif isinstance(value, (int, float)):
-                cell.alignment = Alignment(vertical="top")
-
-    wb.save(output_file)
-
-
diff --git a/scripts/start_cnclip_service.sh b/scripts/start_cnclip_service.sh
index a4ac64f..1028322 100755
--- a/scripts/start_cnclip_service.sh
+++ b/scripts/start_cnclip_service.sh
@@ -61,7 +61,7 @@ LOG_DIR="${PROJECT_ROOT}/logs"
 PID_FILE="${LOG_DIR}/cnclip.pid"
 LOG_LINK="${LOG_DIR}/cnclip.log"
 LOG_FILE="${LOG_DIR}/cnclip-$(date +%F).log"
-LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/daily_log_router.sh"
+LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/ops/daily_log_router.sh"
 
 # 帮助信息
 show_help() {
diff --git a/scripts/start_frontend.sh b/scripts/start_frontend.sh
index a87b1cb..5b77994 100755
--- a/scripts/start_frontend.sh
+++ b/scripts/start_frontend.sh
@@ -27,4 +27,4 @@ echo -e "  ${GREEN}http://localhost:${API_PORT}${NC}"
 echo ""
 
 export FRONTEND_PORT API_PORT PORT
-exec python scripts/frontend_server.py
+exec python scripts/frontend/frontend_server.py
diff --git a/scripts/temp_embed_tenant_image_urls.py b/scripts/temp_embed_tenant_image_urls.py
deleted file mode 100644
index 61ff54f..0000000
--- a/scripts/temp_embed_tenant_image_urls.py
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env python3
-"""
-临时脚本：从 ES 遍历指定租户的 image_url，批量调用图片 embedding 服务。
-5 进程并发，每请求最多 8 条 URL。日志打印到标准输出。
-
-用法:
-  source activate.sh   # 会加载 .env，提供 ES_HOST / ES_USERNAME / ES_PASSWORD
-  python scripts/temp_embed_tenant_image_urls.py
-
-未 source 时脚本也会尝试加载项目根目录 .env。
-"""
-
-from __future__ import annotations
-
-import json
-import multiprocessing as mp
-import os
-import sys
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import urlencode
-
-import requests
-from elasticsearch import Elasticsearch
-from elasticsearch.helpers import scan
-
-# 未 source activate.sh 时仍可从项目根 .env 加载（与 ES_HOST / ES_USERNAME / ES_PASSWORD 一致）
-try:
-    from dotenv import load_dotenv
-
-    _ROOT = Path(__file__).resolve().parents[1]
-    load_dotenv(_ROOT / ".env")
-except ImportError:
-    pass
-
-# ---------------------------------------------------------------------------
-# 配置（可按需修改；默认与 .env 中 ES_* 一致，见 config/loader.py）
-# ---------------------------------------------------------------------------
-
-# Elasticsearch（默认读环境变量：ES_HOST、ES_USERNAME、ES_PASSWORD）
-ES_HOST: str = os.getenv("ES_HOST", "http://localhost:9200")
-ES_USERNAME: Optional[str] = os.getenv("ES_USERNAME") or None
-ES_PASSWORD: Optional[str] = os.getenv("ES_PASSWORD") or None
-ES_INDEX: str = "search_products_tenant_163"
-
-# 租户（keyword 字段，字符串）
-TENANT_ID: str = "163"
-
-# 图片 embedding 服务（与文档 7.1.2 一致）
-EMBED_BASE_URL: str = "http://localhost:6008"
-EMBED_PATH: str = "/embed/image"
-EMBED_QUERY: Dict[str, Any] = {
-    "normalize": "true",
-    "priority": "1",  # 与对接文档 curl 一致；批量离线可改为 "0"
-}
-
-# 并发与批量
-WORKER_PROCESSES: int = 5
-URLS_PER_REQUEST: int = 8
-
-# HTTP
-REQUEST_TIMEOUT_SEC: float = 120.0
-
-# ES scan（elasticsearch-py 8+/ES 9：`scan(..., query=...)` 会展开为 `client.search(**kwargs)`，
-# 必须传与 Search API 一致的参数名，例如顶层 `query` = DSL 的 query 子句，不要用裸 `match_all`。）
-SCROLL_CHUNK_SIZE: int = 500
-
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class BatchResult:
-    batch_index: int
-    url_count: int
-    ok: bool
-    status_code: Optional[int]
-    elapsed_sec: float
-    error: Optional[str] = None
-
-
-def _build_embed_url() -> str:
-    q = urlencode(EMBED_QUERY)
-    return f"{EMBED_BASE_URL.rstrip('/')}{EMBED_PATH}?{q}"
-
-
-def _process_batch(payload: Tuple[int, List[str]]) -> BatchResult:
-    batch_index, urls = payload
-    if not urls:
-        return BatchResult(batch_index, 0, True, None, 0.0, None)
-
-    url = _build_embed_url()
-    t0 = time.perf_counter()
-    try:
-        resp = requests.post(
-            url,
-            headers={"Content-Type": "application/json"},
-            data=json.dumps(urls),
-            timeout=REQUEST_TIMEOUT_SEC,
-        )
-        elapsed = time.perf_counter() - t0
-        ok = resp.status_code == 200
-        err: Optional[str] = None
-        if ok:
-            try:
-                body = resp.json()
-                if not isinstance(body, list) or len(body) != len(urls):
-                    ok = False
-                    err = f"response length mismatch or not list: got {type(body).__name__}"
-            except Exception as e:
-                ok = False
-                err = f"json decode: {e}"
-        else:
-            err = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
-
-        worker = mp.current_process().name
-        status = resp.status_code if resp else None
-        ms = elapsed * 1000.0
-        if ok:
-            print(
-                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
-                f"http={status} elapsed_ms={ms:.2f} ok",
-                flush=True,
-            )
-        else:
-            print(
-                f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
-                f"http={status} elapsed_ms={ms:.2f} FAIL err={err}",
-                flush=True,
-            )
-        return BatchResult(batch_index, len(urls), ok, status, elapsed, err)
-    except Exception as e:
-        elapsed = time.perf_counter() - t0
-        worker = mp.current_process().name
-        print(
-            f"[embed] worker={worker} batch={batch_index} urls={len(urls)} "
-            f"http=None elapsed_ms={elapsed * 1000.0:.2f} FAIL err={e}",
-            flush=True,
-        )
-        return BatchResult(batch_index, len(urls), False, None, elapsed, str(e))
-
-
-def _iter_image_urls(es: Elasticsearch) -> List[str]:
-    # 对应 search body: { "query": { "term": { "tenant_id": "..." } } }
-    search_kw: Dict[str, Any] = {
-        "query": {"term": {"tenant_id": TENANT_ID}},
-        "source_includes": ["image_url"],
-    }
-    urls: List[str] = []
-    for hit in scan(
-        es,
-        query=search_kw,
-        index=ES_INDEX,
-        size=SCROLL_CHUNK_SIZE,
-    ):
-        src = hit.get("_source") or {}
-        u = src.get("image_url")
-        if u is None:
-            continue
-        s = str(u).strip()
-        if not s:
-            continue
-        urls.append(s)
-    return urls
-
-
-def main() -> int:
-    t_wall0 = time.perf_counter()
-
-    auth = None
-    if ES_USERNAME and ES_PASSWORD:
-        auth = (ES_USERNAME, ES_PASSWORD)
-
-    es = Elasticsearch([ES_HOST], basic_auth=auth)
-    if not es.ping():
-        print("ERROR: Elasticsearch ping failed", file=sys.stderr)
-        return 1
-
-    print(
-        f"[main] ES={ES_HOST} basic_auth={'yes' if auth else 'no'} "
-        f"index={ES_INDEX} tenant_id={TENANT_ID} "
-        f"workers={WORKER_PROCESSES} urls_per_req={URLS_PER_REQUEST}",
-        flush=True,
-    )
-    print(f"[main] embed_url={_build_embed_url()}", flush=True)
-
-    t_fetch0 = time.perf_counter()
-    all_urls = _iter_image_urls(es)
-    fetch_elapsed = time.perf_counter() - t_fetch0
-    print(
-        f"[main] collected image_url count={len(all_urls)} es_scan_elapsed_sec={fetch_elapsed:.3f}",
-        flush=True,
-    )
-
-    batches: List[List[str]] = []
-    for i in range(0, len(all_urls), URLS_PER_REQUEST):
-        batches.append(all_urls[i : i + URLS_PER_REQUEST])
-
-    if not batches:
-        print("[main] no URLs to process; done.", flush=True)
-        return 0
-
-    tasks = [(idx, batch) for idx, batch in enumerate(batches)]
-    print(f"[main] batches={len(tasks)} (parallel processes={WORKER_PROCESSES})", flush=True)
-
-    t_run0 = time.perf_counter()
-    total_urls = 0
-    success_urls = 0
-    failed_urls = 0
-    ok_batches = 0
-    fail_batches = 0
-    sum_req_sec = 0.0
-
-    with mp.Pool(processes=WORKER_PROCESSES) as pool:
-        for res in pool.imap_unordered(_process_batch, tasks, chunksize=1):
-            total_urls += res.url_count
-            sum_req_sec += res.elapsed_sec
-            if res.ok:
-                ok_batches += 1
-                success_urls += res.url_count
-            else:
-                fail_batches += 1
-                failed_urls += res.url_count
-
-    wall_total = time.perf_counter() - t_wall0
-    run_elapsed = time.perf_counter() - t_run0
-
-    print("---------- summary ----------", flush=True)
-    print(f"tenant_id:              {TENANT_ID}", flush=True)
-    print(f"total documents w/ url: {len(all_urls)}", flush=True)
-    print(f"total batches:          {len(batches)}", flush=True)
-    print(f"batches succeeded:      {ok_batches}", flush=True)
-    print(f"batches failed:         {fail_batches}", flush=True)
-    print(f"urls (success path):    {success_urls}", flush=True)
-    print(f"urls (failed path):     {failed_urls}", flush=True)
-    print(f"ES scan elapsed (s):    {fetch_elapsed:.3f}", flush=True)
-    print(f"embed phase wall (s):   {run_elapsed:.3f}", flush=True)
-    print(f"sum request time (s):   {sum_req_sec:.3f}  (sequential sum, for reference)", flush=True)
-    print(f"total wall time (s):    {wall_total:.3f}", flush=True)
-    print("-----------------------------", flush=True)
-    return 0 if fail_batches == 0 else 2
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/scripts/tenant3__csv_to_shoplazza_xlsx.sh b/scripts/tenant3__csv_to_shoplazza_xlsx.sh
deleted file mode 100755
index c0c85e8..0000000
--- a/scripts/tenant3__csv_to_shoplazza_xlsx.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-set -e
-
-cd "$(dirname "$0")/.."
-source ./activate.sh
-
-# # 基本使用（生成所有数据）
-# python scripts/csv_to_excel.py
-
-# # 指定输出文件
-# python scripts/csv_to_excel.py --output tenant3_imports.xlsx
-
-# # 限制处理行数（用于测试）
-# python scripts/csv_to_excel.py --limit 100
-
-# 指定CSV文件和模板文件
-python scripts/csv_to_excel.py \
-    --csv-file data/customer1/goods_with_pic.5years_congku.csv.shuf.1w \
-    --template docs/商品导入模板.xlsx \
-    --output tenant3_imports.xlsx
\ No newline at end of file
diff --git a/scripts/translation/download_translation_models.py b/scripts/translation/download_translation_models.py
new file mode 100755
index 0000000..527159f
--- /dev/null
+++ b/scripts/translation/download_translation_models.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""Download local translation models declared in services.translation.capabilities."""
+
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+from typing import Iterable
+
+from huggingface_hub import snapshot_download
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+os.environ.setdefault("HF_HUB_DISABLE_XET", "1")
+
+from config.services_config import get_translation_config
+
+
+LOCAL_BACKENDS = {"local_nllb", "local_marian"}
+
+
+def iter_local_capabilities(selected: set[str] | None = None) -> Iterable[tuple[str, dict]]:
+    cfg = get_translation_config()
+    capabilities = cfg.get("capabilities", {}) if isinstance(cfg, dict) else {}
+    for name, capability in capabilities.items():
+        backend = str(capability.get("backend") or "").strip().lower()
+        if backend not in LOCAL_BACKENDS:
+            continue
+        if selected and name not in selected:
+            continue
+        yield name, capability
+
+
+def _compute_ct2_output_dir(capability: dict) -> Path:
+    custom = str(capability.get("ct2_model_dir") or "").strip()
+    if custom:
+        return Path(custom).expanduser()
+    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+    compute_type = str(capability.get("ct2_compute_type") or capability.get("torch_dtype") or "default").strip().lower()
+    normalized = compute_type.replace("_", "-")
+    return model_dir / f"ctranslate2-{normalized}"
+
+
+def _resolve_converter_binary() -> str:
+    candidate = shutil.which("ct2-transformers-converter")
+    if candidate:
+        return candidate
+    venv_candidate = Path(sys.executable).absolute().parent / "ct2-transformers-converter"
+    if venv_candidate.exists():
+        return str(venv_candidate)
+    raise RuntimeError(
+        "ct2-transformers-converter was not found. "
+        "Install ctranslate2 in the active Python environment first."
+    )
+
+
+def convert_to_ctranslate2(name: str, capability: dict) -> None:
+    model_id = str(capability.get("model_id") or "").strip()
+    model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+    model_source = str(model_dir if model_dir.exists() else model_id)
+    output_dir = _compute_ct2_output_dir(capability)
+    if (output_dir / "model.bin").exists():
+        print(f"[skip-convert] {name} -> {output_dir}")
+        return
+    quantization = str(
+        capability.get("ct2_conversion_quantization")
+        or capability.get("ct2_compute_type")
+        or capability.get("torch_dtype")
+        or "default"
+    ).strip()
+    output_dir.parent.mkdir(parents=True, exist_ok=True)
+    print(f"[convert] {name} -> {output_dir} ({quantization})")
+    subprocess.run(
+        [
+            _resolve_converter_binary(),
+            "--model",
+            model_source,
+            "--output_dir",
+            str(output_dir),
+            "--quantization",
+            quantization,
+        ],
+        check=True,
+    )
+    print(f"[converted] {name}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Download local translation models")
+    parser.add_argument("--all-local", action="store_true", help="Download all configured local translation models")
+    parser.add_argument("--models", nargs="*", default=[], help="Specific capability names to download")
+    parser.add_argument(
+        "--convert-ctranslate2",
+        action="store_true",
+        help="Also convert the downloaded Hugging Face models into CTranslate2 format",
+    )
+    args = parser.parse_args()
+
+    selected = {item.strip().lower() for item in args.models if item.strip()} or None
+    if not args.all_local and not selected:
+        parser.error("pass --all-local or --models <name> ...")
+
+    for name, capability in iter_local_capabilities(selected):
+        model_id = str(capability.get("model_id") or "").strip()
+        model_dir = Path(str(capability.get("model_dir") or "")).expanduser()
+        if not model_id or not model_dir:
+            raise ValueError(f"Capability '{name}' must define model_id and model_dir")
+        model_dir.parent.mkdir(parents=True, exist_ok=True)
+        print(f"[download] {name} -> {model_dir} ({model_id})")
+        snapshot_download(
+            repo_id=model_id,
+            local_dir=str(model_dir),
+        )
+        print(f"[done] {name}")
+        if args.convert_ctranslate2:
+            convert_to_ctranslate2(name, capability)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/wechat_alert.py b/scripts/wechat_alert.py
deleted file mode 100644
index f4507a3..0000000
--- a/scripts/wechat_alert.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-"""
-Lightweight Enterprise WeChat webhook sender for service monitor alerts.
-
-This module is intentionally small and focused so that Bash-based monitors
-can invoke it without pulling in the full application stack.
-
-Usage example:
-  python scripts/wechat_alert.py --service backend --level error --message "backend restarted"
-"""
-
-import argparse
-import json
-import os
-import sys
-from datetime import datetime
-
-import requests
-
-
-def get_webhook_url() -> str:
-    """
-    Resolve webhook URL from environment, with optional default.
-
-    Priority:
-      1. SERVICE_MONITOR_WECHAT_WEBHOOK
-      2. Built-in default (provided by ops)
-    """
-    env_url = os.getenv("SERVICE_MONITOR_WECHAT_WEBHOOK", "").strip()
-    if env_url:
-        return env_url
-    # Fallback to the URL provided in ops configuration.
-    return (
-        "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?"
-        "key=2d9e38ef-9242-4e2e-82cc-fab060322871"
-    )
-
-
-def build_text_payload(message: str) -> dict:
-    return {
-        "msgtype": "text",
-        "text": {
-            "content": message,
-        },
-    }
-
-
-def send_wechat_message(message: str) -> None:
-    url = get_webhook_url()
-    if not url:
-        # No webhook configured; fail silently to avoid breaking callers.
-        return
-
-    payload = build_text_payload(message)
-    headers = {"Content-Type": "application/json"}
-
-    try:
-        resp = requests.post(url, headers=headers, data=json.dumps(payload), timeout=5)
-    except Exception:
-        # Swallow all exceptions to avoid impacting the caller.
-        return
-
-    try:
-        if resp.status_code != 200:
-            return
-        data = resp.json()
-        # errcode == 0 means success per WeCom docs
-        if int(data.get("errcode", -1)) != 0:
-            return
-    except Exception:
-        return
-
-
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(description="Send Enterprise WeChat alert message")
-    parser.add_argument("--service", help="service name", default="")
-    parser.add_argument("--level", help="alert level (info|warn|error)", default="info")
-    parser.add_argument(
-        "--message",
-        required=True,
-        help="alert message body (short, human-readable)",
-    )
-
-    args = parser.parse_args(argv)
-
-    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    parts = [
-        f"【服务监控告警】",
-        f"时间: {ts}",
-    ]
-    if args.service:
-        parts.append(f"服务: {args.service}")
-    if args.level:
-        parts.append(f"级别: {args.level}")
-    parts.append(f"详情: {args.message}")
-
-    full_message = "\n".join(parts)
-    send_wechat_message(full_message)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
-
diff --git a/translation/README.md b/translation/README.md
index 810424d..37ef5fc 100644
--- a/translation/README.md
+++ b/translation/README.md
@@ -11,7 +11,7 @@
 相关脚本与报告：
 - 启动脚本：[`scripts/start_translator.sh`](/data/saas-search/scripts/start_translator.sh)
 - 虚拟环境：[`scripts/setup_translator_venv.sh`](/data/saas-search/scripts/setup_translator_venv.sh)
-- 模型下载：[`scripts/download_translation_models.py`](/data/saas-search/scripts/download_translation_models.py)
+- 模型下载：[`scripts/translation/download_translation_models.py`](/data/saas-search/scripts/translation/download_translation_models.py)
 - 本地模型压测：[`benchmarks/translation/benchmark_translation_local_models.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models.py)
 - 聚焦压测脚本：[`benchmarks/translation/benchmark_translation_local_models_focus.py`](/data/saas-search/benchmarks/translation/benchmark_translation_local_models_focus.py)
 - 基线性能报告：[`perf_reports/20260318/translation_local_models/README.md`](/data/saas-search/perf_reports/20260318/translation_local_models/README.md)
@@ -493,7 +493,7 @@ cd /data/saas-search
 下载全部本地模型：
 
 ```bash
-./.venv-translator/bin/python scripts/download_translation_models.py --all-local
+./.venv-translator/bin/python scripts/translation/download_translation_models.py --all-local
 ```
 
 下载完成后，默认目录应存在：
--
libgit2 0.21.2