From f3c11fef37a249640c5e2a576582747c3f1fc90e Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 17 Dec 2025 14:28:06 +0800 Subject: [PATCH] 亚马逊格式数据 导入店匠 --- README.md | 7 ++++--- data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx | Bin 0 -> 1458652 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx | Bin 0 -> 1513406 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx | Bin 0 -> 1435801 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx | Bin 0 -> 1515582 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx | Bin 0 -> 1539404 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx | Bin 0 -> 1513980 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx | Bin 0 -> 1503237 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx | Bin 0 -> 1543771 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx | Bin 0 -> 1518055 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx | Bin 0 -> 1540051 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx | Bin 0 -> 1509598 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx | Bin 0 -> 1589666 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx | Bin 0 -> 1580673 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx | Bin 0 -> 1548852 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx | Bin 0 -> 1545834 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx | Bin 0 -> 1579446 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx | Bin 0 -> 1525622 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx | Bin 0 -> 1565554 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx | Bin 0 -> 1560629 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx | Bin 0 -> 1541948 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx | Bin 0 -> 1543872 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx | Bin 0 -> 1543412 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx | Bin 0 -> 1535278 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx | Bin 0 -> 1524011 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx | Bin 0 -> 1512652 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx | Bin 0 -> 1549026 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx | Bin 0 -> 1557521 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx | Bin 0 -> 1502973 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx | Bin 0 -> 1528237 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx | Bin 0 -> 1561632 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx | Bin 0 -> 1574560 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx | Bin 0 -> 1518344 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx | Bin 0 -> 1494999 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx | Bin 0 -> 1555047 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx | Bin 0 -> 1532500 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx | Bin 0 -> 1564367 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx | Bin 0 -> 1500885 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx | Bin 0 -> 1546109 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx | Bin 0 -> 1504037 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx | Bin 0 -> 1593751 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx | Bin 0 -> 1533240 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx | Bin 0 -> 1534808 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx | Bin 0 -> 1550951 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx | Bin 0 -> 1572503 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx | Bin 0 -> 1539065 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx | Bin 0 -> 1518430 bytes data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx | Bin 0 -> 1503968 bytes scripts/competitor_xlsx_to_shoplazza_xlsx.py | 525 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/csv_to_excel.py | 69 ++++++++------------------------------------------------------------- scripts/csv_to_excel_multi_variant.py | 65 +++++++---------------------------------------------------------- scripts/shoplazza_excel_template.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/shoplazza_import_template.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 53 files changed, 723 insertions(+), 122 deletions(-) create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx create mode 100644 data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx create mode 100644 scripts/competitor_xlsx_to_shoplazza_xlsx.py create mode 100644 scripts/shoplazza_excel_template.py create mode 100644 scripts/shoplazza_import_template.py diff --git a/README.md b/README.md index 208b378..d443f5c 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,12 @@ source .env ## 测试pipeline -fake数据 生成商品导入数据 提交到店匠的店铺: +1. fake数据 生成商品导入数据 提交到店匠的店铺: cd /home/tw/SearchEngine && source /home/tw/miniconda3/etc/profile.d/conda.sh && conda activate searchengine && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx -自动同步到mysql -mysql到ES: +2. 后端:自动同步到mysql + +3. mysql到ES: python scripts/recreate_and_import.py \ --tenant-id 162 \ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx new file mode 100644 index 0000000..2dad148 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363464.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx new file mode 100644 index 0000000..a395d31 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363499.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx new file mode 100644 index 0000000..6658171 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363533.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx new file mode 100644 index 0000000..514eb87 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363560.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx new file mode 100644 index 0000000..1c8f0a2 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363598.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx new file mode 100644 index 0000000..4362f95 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363625.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx new file mode 100644 index 0000000..52e2aca Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363662.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx new file mode 100644 index 0000000..74d7dbb Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363709.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx new file mode 100644 index 0000000..3eedac4 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363746.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx new file mode 100644 index 0000000..5031f60 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363778.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx new file mode 100644 index 0000000..73e801b Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363802.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx new file mode 100644 index 0000000..77654f2 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363824.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx new file mode 100644 index 0000000..e88f05f Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363854.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx new file mode 100644 index 0000000..86e34f0 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363884.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx new file mode 100644 index 0000000..26ee862 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363899.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx new file mode 100644 index 0000000..2f29c25 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363924.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx new file mode 100644 index 0000000..193caa5 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363943.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx new file mode 100644 index 0000000..e88f3df Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363956.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx new file mode 100644 index 0000000..8a376f4 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-363985.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx new file mode 100644 index 0000000..93398ad Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364002.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx new file mode 100644 index 0000000..9774ab0 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364039.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx new file mode 100644 index 0000000..ad5740c Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364076.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx new file mode 100644 index 0000000..9d111ec Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364113.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx new file mode 100644 index 0000000..dcc438f Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364149.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx new file mode 100644 index 0000000..c5bad6b Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364182.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx new file mode 100644 index 0000000..12a72a8 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364202.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx new file mode 100644 index 0000000..6eab358 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364223.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx new file mode 100644 index 0000000..14ac404 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364237.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx new file mode 100644 index 0000000..0842c66 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364253.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx new file mode 100644 index 0000000..f447265 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364272.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx new file mode 100644 index 0000000..95df8fe Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364293.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx new file mode 100644 index 0000000..37d7b44 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364309.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx new file mode 100644 index 0000000..a7cc79d Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364335.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx new file mode 100644 index 0000000..eedd332 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364361.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx new file mode 100644 index 0000000..fad00ca Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364384.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx new file mode 100644 index 0000000..dcfdd0c Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364409.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx new file mode 100644 index 0000000..41a38bc Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364471.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx new file mode 100644 index 0000000..cd4b906 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364496.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx new file mode 100644 index 0000000..093feab Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364519.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx new file mode 100644 index 0000000..94ba002 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364538.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx new file mode 100644 index 0000000..5bd8bc8 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364557.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx new file mode 100644 index 0000000..8c863cc Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364581.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx new file mode 100644 index 0000000..45ef349 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364605.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx new file mode 100644 index 0000000..41d1d1c Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364628.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx new file mode 100644 index 0000000..d00b9e3 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364644.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx new file mode 100644 index 0000000..d9ef138 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364667.xlsx differ diff --git a/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx new file mode 100644 index 0000000..9415ca0 Binary files /dev/null and b/data/mai_jia_jing_ling/products_data/Competitor-US-Last-30-days-364690.xlsx differ diff --git a/scripts/competitor_xlsx_to_shoplazza_xlsx.py b/scripts/competitor_xlsx_to_shoplazza_xlsx.py new file mode 100644 index 0000000..5a9c214 --- /dev/null +++ b/scripts/competitor_xlsx_to_shoplazza_xlsx.py @@ -0,0 +1,525 @@ +#!/usr/bin/env python3 +""" +Convert competitor Excel exports (with Parent/Child ASIN structure) into +Shoplazza (店匠) product import Excel format based on `docs/商品导入模板.xlsx`. + +Data source: +- Directory with multiple `Competitor-*.xlsx` files. +- Each file contains a main sheet + "Notes" sheet. +- Column meanings (sample): + - ASIN: variant id (sku_id) + - 父ASIN: product id (spu_id) + +Output: +- For each 父ASIN group: + - If only 1 ASIN: generate one "S" row + - Else: generate one "M" row + multiple "P" rows + +Important: +- Variant dimensions are parsed primarily from the `SKU` column: + "Size: One Size | Color: Black" + and mapped into 款式1/2/3. +""" + +import os +import re +import sys +import argparse +from datetime import datetime +from collections import defaultdict, Counter +from pathlib import Path + +from openpyxl import load_workbook + +# Allow running as `python scripts/xxx.py` without installing as a package +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from shoplazza_excel_template import create_excel_from_template + + +PREFERRED_OPTION_KEYS = [ + "Size", "Color", "Style", "Pattern", "Material", "Flavor", "Scent", + "Pack", "Pack of", "Number of Items", "Count", "Capacity", "Length", + "Width", "Height", "Model", "Configuration", +] + + +def clean_str(v): + if v is None: + return "" + return str(v).strip() + + +def html_escape(s): + s = clean_str(s) + return (s.replace("&", "&") + .replace("<", "<") + .replace(">", ">")) + + +def generate_handle(title): + """ + Generate URL-friendly handle from title (ASCII only). + Keep consistent with existing scripts. + """ + handle = clean_str(title).lower() + handle = re.sub(r"[^a-z0-9\\s-]", "", handle) + handle = re.sub(r"[-\\s]+", "-", handle).strip("-") + if len(handle) > 255: + handle = handle[:255] + return handle or "product" + + +def parse_date_to_template(dt_value): + """ + Template expects: YYYY-MM-DD HH:MM:SS + Input could be "2018-05-09" or datetime/date. + """ + if dt_value is None or dt_value == "": + return "" + if isinstance(dt_value, datetime): + return dt_value.strftime("%Y-%m-%d %H:%M:%S") + s = clean_str(dt_value) + # common formats + for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): + try: + d = datetime.strptime(s, fmt) + return d.strftime("%Y-%m-%d %H:%M:%S") + except Exception: + pass + return "" + + +def parse_weight(weight_conv, weight_raw): + """ + Return (weight_value, unit) where unit in {kg, lb, g, oz}. + Prefer '商品重量(单位换算)' like '68.04 g'. + Fallback to '商品重量' like '0.15 pounds'. + """ + s = clean_str(weight_conv) or clean_str(weight_raw) + if not s: + return ("", "") + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)\\s*([a-zA-Z]+)", s) + if not m: + return ("", "") + val = float(m.group(1)) + unit = m.group(2).lower() + if unit in ("g", "gram", "grams"): + return (val, "g") + if unit in ("kg", "kilogram", "kilograms"): + return (val, "kg") + if unit in ("lb", "lbs", "pound", "pounds"): + return (val, "lb") + if unit in ("oz", "ounce", "ounces"): + return (val, "oz") + return ("", "") + + +def parse_dimensions_inches(dim_raw): + """ + Template '尺寸信息': 'L,W,H' in inches. + Input example: '7.9 x 7.9 x 2 inches' + """ + s = clean_str(dim_raw) + if not s: + return "" + # extract first 3 numbers in order + nums = re.findall(r"([0-9]+(?:\\.[0-9]+)?)", s) + if len(nums) < 3: + return "" + return "{},{},{}".format(nums[0], nums[1], nums[2]) + + +def parse_sku_options(sku_text): + """ + Parse 'SKU' column into {key: value}. + Example: + 'Size: One Size | Color: Black' -> {'Size':'One Size','Color':'Black'} + """ + s = clean_str(sku_text) + if not s: + return {} + parts = [p.strip() for p in s.split("|") if p.strip()] + out = {} + for p in parts: + if ":" not in p: + continue + k, v = p.split(":", 1) + k = clean_str(k) + v = clean_str(v) + if k and v: + out[k] = v + return out + + +def choose_option_keys(variant_dicts, max_keys=3): + """ + Choose up to 3 option keys for a product group. + Order by preference list first, then by frequency. + """ + freq = Counter() + for d in variant_dicts: + for k, v in d.items(): + if v: + freq[k] += 1 + if not freq: + return [] + + preferred_rank = {k: i for i, k in enumerate(PREFERRED_OPTION_KEYS)} + + def key_sort(k): + return (preferred_rank.get(k, 10 ** 6), -freq[k], k.lower()) + + keys = sorted(freq.keys(), key=key_sort) + return keys[:max_keys] + + +def build_description_html(title, details, product_url): + parts = [] + if title: + parts.append("

{}

".format(html_escape(title))) + detail_items = [x.strip() for x in clean_str(details).split("|") if x.strip()] + if detail_items: + li = "".join(["
  • {}
  • ".format(html_escape(x)) for x in detail_items[:30]]) + parts.append("".format(li)) + if product_url: + parts.append('

    Source: {0}

    '.format(html_escape(product_url))) + return "".join(parts) + + +def competitor_sheet(ws): + """ + Build (header->col_index) for competitor sheet. + Assumes header is row 1. + """ + headers = [] + for c in range(1, ws.max_column + 1): + v = ws.cell(1, c).value + headers.append(clean_str(v)) + idx = {h: i + 1 for i, h in enumerate(headers) if h} + return idx + + +def read_competitor_rows_from_file(xlsx_path, max_rows=None): + wb = load_workbook(xlsx_path, read_only=True, data_only=True) + # pick first non-Notes sheet + sheet_name = None + for name in wb.sheetnames: + if str(name).lower() == "notes": + continue + sheet_name = name + break + if sheet_name is None: + return [] + ws = wb[sheet_name] + idx = competitor_sheet(ws) + + required = ["ASIN", "父ASIN", "商品标题", "商品主图", "SKU", "详细参数", "价格($)", "prime价格($)", + "上架时间", "类目路径", "大类目", "小类目", "品牌", "品牌链接", "商品详情页链接", + "商品重量(单位换算)", "商品重量", "商品尺寸"] + for k in required: + if k not in idx: + raise RuntimeError("Missing column '{}' in {} sheet {}".format(k, xlsx_path, sheet_name)) + + rows = [] + end_row = ws.max_row + if max_rows is not None: + end_row = min(end_row, 1 + int(max_rows)) + + for r in range(2, end_row + 1): + asin = clean_str(ws.cell(r, idx["ASIN"]).value) + if not asin: + continue + parent = clean_str(ws.cell(r, idx["父ASIN"]).value) or asin + row = { + "ASIN": asin, + "父ASIN": parent, + "SKU": clean_str(ws.cell(r, idx["SKU"]).value), + "详细参数": clean_str(ws.cell(r, idx["详细参数"]).value), + "商品标题": clean_str(ws.cell(r, idx["商品标题"]).value), + "商品主图": clean_str(ws.cell(r, idx["商品主图"]).value), + "价格($)": ws.cell(r, idx["价格($)"]).value, + "prime价格($)": ws.cell(r, idx["prime价格($)"]).value, + "上架时间": clean_str(ws.cell(r, idx["上架时间"]).value), + "类目路径": clean_str(ws.cell(r, idx["类目路径"]).value), + "大类目": clean_str(ws.cell(r, idx["大类目"]).value), + "小类目": clean_str(ws.cell(r, idx["小类目"]).value), + "品牌": clean_str(ws.cell(r, idx["品牌"]).value), + "品牌链接": clean_str(ws.cell(r, idx["品牌链接"]).value), + "商品详情页链接": clean_str(ws.cell(r, idx["商品详情页链接"]).value), + "商品重量(单位换算)": clean_str(ws.cell(r, idx["商品重量(单位换算)"]).value), + "商品重量": clean_str(ws.cell(r, idx["商品重量"]).value), + "商品尺寸": clean_str(ws.cell(r, idx["商品尺寸"]).value), + } + rows.append(row) + return rows + + +def to_price(v): + if v is None or v == "": + return None + try: + return float(v) + except Exception: + s = clean_str(v) + m = re.search(r"([0-9]+(?:\\.[0-9]+)?)", s) + if not m: + return None + return float(m.group(1)) + + +def build_common_fields(base_row, spu_id): + title = base_row.get("商品标题") or "Product" + brand = base_row.get("品牌") or "" + big_cat = base_row.get("大类目") or "" + small_cat = base_row.get("小类目") or "" + cat_path = base_row.get("类目路径") or "" + + handle = generate_handle(title) + if handle and not handle.startswith("products/"): + handle = "products/{}".format(handle) + + seo_title = title + seo_desc_parts = [] + if brand: + seo_desc_parts.append(brand) + seo_desc_parts.append(title) + if big_cat: + seo_desc_parts.append(big_cat) + seo_description = " ".join([x for x in seo_desc_parts if x])[:5000] + + seo_keywords = ",".join([x for x in [title, brand, big_cat, small_cat] if x]) + tags = ",".join([x for x in [brand, big_cat, small_cat] if x]) + + created_at = parse_date_to_template(base_row.get("上架时间")) + + description = build_description_html( + title=title, + details=base_row.get("详细参数"), + product_url=base_row.get("商品详情页链接"), + ) + + # default inventory settings (data source has no stock) + inventory_qty = 100 + + weight_val, weight_unit = parse_weight(base_row.get("商品重量(单位换算)"), base_row.get("商品重量")) + size_info = parse_dimensions_inches(base_row.get("商品尺寸")) + + album = big_cat or "" + if not album and cat_path: + album = cat_path.split(":")[0] + + common = { + "商品ID": "", + "创建时间": created_at, + "商品标题*": title[:255], + "商品副标题": "{} {}".format(brand, big_cat).strip()[:600], + "商品描述": description, + "SEO标题": seo_title[:5000], + "SEO描述": seo_description, + "SEO URL Handle": handle, + "SEO URL 重定向": "N", + "SEO关键词": seo_keywords[:5000], + "商品上架": "Y", + "需要物流": "Y", + "商品收税": "N", + "商品spu": spu_id[:100], + "启用虚拟销量": "N", + "虚拟销量值": "", + "跟踪库存": "Y", + "库存规则*": "1", + "专辑名称": album, + "标签": tags, + "供应商名称": "Amazon", + "供应商URL": base_row.get("商品详情页链接") or base_row.get("品牌链接") or "", + "商品重量": weight_val if weight_val != "" else "", + "重量单位": weight_unit, + "商品库存": inventory_qty, + "尺寸信息": size_info, + "原产地国别": "", + "HS(协调制度)代码": "", + "商品备注": "ASIN:{}; ParentASIN:{}; CategoryPath:{}".format( + base_row.get("ASIN", ""), spu_id, (cat_path[:200] if cat_path else "") + )[:500], + "款式备注": "", + } + return common + + +def build_s_row(base_row): + spu_id = base_row.get("父ASIN") or base_row.get("ASIN") + common = build_common_fields(base_row, spu_id=spu_id) + price = to_price(base_row.get("prime价格($)")) or to_price(base_row.get("价格($)")) or 9.99 + image = base_row.get("商品主图") or "" + + row = {} + row.update(common) + row.update({ + "商品属性*": "S", + "款式1": "", + "款式2": "", + "款式3": "", + "商品售价*": price, + "商品原价": price, + "成本价": "", + "商品SKU": base_row.get("ASIN") or "", + "商品条形码": "", + "商品图片*": image, + "商品主图": image, + }) + return row + + +def build_m_p_rows(variant_rows): + """ + variant_rows: List[dict] with same 父ASIN. + """ + base = variant_rows[0] + spu_id = base.get("父ASIN") or base.get("ASIN") + common = build_common_fields(base, spu_id=spu_id) + + option_dicts = [parse_sku_options(v.get("SKU")) for v in variant_rows] + option_keys = choose_option_keys(option_dicts, max_keys=3) + if not option_keys: + option_keys = ["Variant"] + + # M row + m = {} + m.update(common) + m.update({ + "商品属性*": "M", + "款式1": option_keys[0] if len(option_keys) > 0 else "", + "款式2": option_keys[1] if len(option_keys) > 1 else "", + "款式3": option_keys[2] if len(option_keys) > 2 else "", + "商品售价*": "", + "商品原价": "", + "成本价": "", + "商品SKU": "", + "商品条形码": "", + "商品图片*": base.get("商品主图") or "", + "商品主图": base.get("商品主图") or "", + }) + + # For M row, these SKU-level fields should be empty per template guidance + m["商品重量"] = "" + m["重量单位"] = "" + m["商品库存"] = "" + m["尺寸信息"] = "" + + rows = [m] + + # P rows + for v in variant_rows: + v_common = build_common_fields(v, spu_id=spu_id) + # wipe SPU-only fields for P row + v_common.update({ + "商品副标题": "", + "商品描述": "", + "SEO标题": "", + "SEO描述": "", + "SEO URL Handle": "", + "SEO URL 重定向": "", + "SEO关键词": "", + "专辑名称": "", + "标签": "", + "供应商名称": "", + "供应商URL": "", + "商品备注": "", + }) + + opt = parse_sku_options(v.get("SKU")) + if option_keys == ["Variant"]: + opt_vals = [v.get("ASIN")] + else: + opt_vals = [opt.get(k, "") for k in option_keys] + + price = to_price(v.get("prime价格($)")) or to_price(v.get("价格($)")) or 9.99 + image = v.get("商品主图") or "" + + p = {} + p.update(v_common) + p.update({ + "商品属性*": "P", + "款式1": opt_vals[0] if len(opt_vals) > 0 else "", + "款式2": opt_vals[1] if len(opt_vals) > 1 else "", + "款式3": opt_vals[2] if len(opt_vals) > 2 else "", + "商品售价*": price, + "商品原价": price, + "成本价": "", + "商品SKU": v.get("ASIN") or "", + "商品条形码": "", + # P row supports one variant image; we use variant's main image + "商品图片*": image, + "商品主图": "", + }) + rows.append(p) + + return rows + + +def main(): + parser = argparse.ArgumentParser(description="Convert competitor xlsx files to Shoplazza import xlsx") + parser.add_argument("--input-dir", default="data/mai_jia_jing_ling/products_data", help="Directory containing competitor xlsx files") + parser.add_argument("--template", default="docs/商品导入模板.xlsx", help="Shoplazza import template xlsx") + parser.add_argument("--output", default="competitor_shoplazza_import.xlsx", help="Output xlsx file path") + parser.add_argument("--max-files", type=int, default=None, help="Limit number of xlsx files to read (for testing)") + parser.add_argument("--max-rows-per-file", type=int, default=None, help="Limit rows per xlsx file (for testing)") + parser.add_argument("--max-products", type=int, default=None, help="Limit number of SPU groups to output (for testing)") + args = parser.parse_args() + + input_dir = args.input_dir + if not os.path.isdir(input_dir): + raise RuntimeError("input-dir not found: {}".format(input_dir)) + if not os.path.exists(args.template): + raise RuntimeError("template not found: {}".format(args.template)) + + files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".xlsx")] + files.sort() + if args.max_files is not None: + files = files[: int(args.max_files)] + + print("Reading competitor files: {} (from {})".format(len(files), input_dir), flush=True) + + groups = defaultdict(list) # spu_id -> [variant rows] + seen_asin = set() + + for fp in files: + print(" - loading: {}".format(fp), flush=True) + try: + rows = read_competitor_rows_from_file(fp, max_rows=args.max_rows_per_file) + except Exception as e: + print("WARN: failed to read {}: {}".format(fp, e)) + continue + print(" loaded rows: {}".format(len(rows)), flush=True) + + for r in rows: + asin = r.get("ASIN") + if asin in seen_asin: + continue + seen_asin.add(asin) + spu_id = r.get("父ASIN") or asin + groups[spu_id].append(r) + + print("Collected variants: {}, SPU groups: {}".format(len(seen_asin), len(groups)), flush=True) + + excel_rows = [] + spu_count = 0 + + for spu_id, variants in groups.items(): + if not variants: + continue + spu_count += 1 + if args.max_products is not None and spu_count > int(args.max_products): + break + if len(variants) == 1: + excel_rows.append(build_s_row(variants[0])) + else: + excel_rows.extend(build_m_p_rows(variants)) + + print("Generated Excel rows: {} (SPU groups output: {})".format(len(excel_rows), min(spu_count, len(groups))), flush=True) + create_excel_from_template(args.template, args.output, excel_rows) + + +if __name__ == "__main__": + main() + + diff --git a/scripts/csv_to_excel.py b/scripts/csv_to_excel.py index 7dae590..5149eba 100755 --- a/scripts/csv_to_excel.py +++ b/scripts/csv_to_excel.py @@ -22,6 +22,10 @@ from openpyxl import load_workbook from openpyxl.styles import Font, Alignment from openpyxl.utils import get_column_letter +# Shared helpers (keeps template writing consistent across scripts) +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared + # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -82,21 +86,8 @@ def generate_handle(title: str) -> str: Returns: URL-friendly handle (ASCII only) """ - # Convert to lowercase - handle = title.lower() - - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens - handle = re.sub(r'[^a-z0-9\s-]', '', handle) - - # Replace spaces and multiple hyphens with single hyphen - handle = re.sub(r'[-\s]+', '-', handle) - handle = handle.strip('-') - - # Limit length - if len(handle) > 255: - handle = handle[:255] - - return handle or 'product' + # Keep backward-compatible function name while delegating to shared helper. + return _generate_handle_shared(title) def read_csv_file(csv_file: str) -> list: @@ -255,52 +246,8 @@ def create_excel_from_template(template_file: str, output_file: str, csv_data_li output_file: Path to output Excel file csv_data_list: List of parsed CSV data dictionaries """ - # Load template - wb = load_workbook(template_file) - ws = wb.active # Use the active sheet (Sheet4) - - # Find header row (row 2, index 1) - header_row_idx = 2 # Row 2 in Excel (1-based, but header is at index 1 in pandas) - - # Get column mapping from header row - column_mapping = {} - for col_idx in range(1, ws.max_column + 1): - cell_value = ws.cell(row=header_row_idx, column=col_idx).value - if cell_value: - column_mapping[cell_value] = col_idx - - # Start writing data from row 4 (after header and instructions) - data_start_row = 4 # Row 4 in Excel (1-based) - - # Clear existing data rows (from row 4 onwards, but keep header and instructions) - # Find the last row with data in the template - last_template_row = ws.max_row - if last_template_row >= data_start_row: - # Clear data rows (keep header and instruction rows) - for row in range(data_start_row, last_template_row + 1): - for col in range(1, ws.max_column + 1): - ws.cell(row=row, column=col).value = None - - # Convert CSV data to Excel rows - for row_idx, csv_data in enumerate(csv_data_list): - excel_row = csv_to_excel_row(csv_data) - excel_row_num = data_start_row + row_idx - - # Write each field to corresponding column - for field_name, col_idx in column_mapping.items(): - if field_name in excel_row: - cell = ws.cell(row=excel_row_num, column=col_idx) - value = excel_row[field_name] - cell.value = value - - # Set alignment for text fields - if isinstance(value, str): - cell.alignment = Alignment(vertical='top', wrap_text=True) - elif isinstance(value, (int, float)): - cell.alignment = Alignment(vertical='top') - - # Save workbook - wb.save(output_file) + excel_rows = [csv_to_excel_row(d) for d in csv_data_list] + _create_excel_from_template_shared(template_file, output_file, excel_rows) print(f"Excel file created: {output_file}") print(f" - Total rows: {len(csv_data_list)}") diff --git a/scripts/csv_to_excel_multi_variant.py b/scripts/csv_to_excel_multi_variant.py index 4df2e1d..4e91f59 100755 --- a/scripts/csv_to_excel_multi_variant.py +++ b/scripts/csv_to_excel_multi_variant.py @@ -22,6 +22,10 @@ import itertools from openpyxl import load_workbook from openpyxl.styles import Alignment +# Shared helpers (keeps template writing consistent across scripts) +from scripts.shoplazza_import_template import create_excel_from_template as _create_excel_from_template_shared +from scripts.shoplazza_import_template import generate_handle as _generate_handle_shared + # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -90,21 +94,8 @@ def generate_handle(title: str) -> str: Returns: URL-friendly handle (ASCII only) """ - # Convert to lowercase - handle = title.lower() - - # Remove non-ASCII characters, keep only letters, numbers, spaces, and hyphens - handle = re.sub(r'[^a-z0-9\s-]', '', handle) - - # Replace spaces and multiple hyphens with single hyphen - handle = re.sub(r'[-\s]+', '-', handle) - handle = handle.strip('-') - - # Limit length - if len(handle) > 255: - handle = handle[:255] - - return handle or 'product' + # Keep backward-compatible function name while delegating to shared helper. + return _generate_handle_shared(title) def extract_material_from_title(title: str) -> str: @@ -478,49 +469,7 @@ def create_excel_from_template(template_file: str, output_file: str, excel_rows: output_file: Path to output Excel file excel_rows: List of dictionaries mapping Excel column names to values """ - # Load template - wb = load_workbook(template_file) - ws = wb.active # Use the active sheet (Sheet4) - - # Find header row (row 2) - header_row_idx = 2 - - # Get column mapping from header row - column_mapping = {} - for col_idx in range(1, ws.max_column + 1): - cell_value = ws.cell(row=header_row_idx, column=col_idx).value - if cell_value: - column_mapping[cell_value] = col_idx - - # Start writing data from row 4 - data_start_row = 4 - - # Clear existing data rows - last_template_row = ws.max_row - if last_template_row >= data_start_row: - for row in range(data_start_row, last_template_row + 1): - for col in range(1, ws.max_column + 1): - ws.cell(row=row, column=col).value = None - - # Write data rows - for row_idx, excel_row in enumerate(excel_rows): - excel_row_num = data_start_row + row_idx - - # Write each field to corresponding column - for field_name, col_idx in column_mapping.items(): - if field_name in excel_row: - cell = ws.cell(row=excel_row_num, column=col_idx) - value = excel_row[field_name] - cell.value = value - - # Set alignment - if isinstance(value, str): - cell.alignment = Alignment(vertical='top', wrap_text=True) - elif isinstance(value, (int, float)): - cell.alignment = Alignment(vertical='top') - - # Save workbook - wb.save(output_file) + _create_excel_from_template_shared(template_file, output_file, excel_rows) print(f"Excel file created: {output_file}") print(f" - Total rows: {len(excel_rows)}") diff --git a/scripts/shoplazza_excel_template.py b/scripts/shoplazza_excel_template.py new file mode 100644 index 0000000..2005e04 --- /dev/null +++ b/scripts/shoplazza_excel_template.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Shared utilities for generating Shoplazza (店匠) product import Excel files +based on the provided template `docs/商品导入模板.xlsx`. + +We keep this in `scripts/` to maximize reuse by existing ad-hoc pipeline scripts. +""" + +from openpyxl import load_workbook +from openpyxl.styles import Alignment + + +def load_template_column_mapping(ws, header_row_idx=2): + """ + Read the header row in the template sheet and build a mapping: + header_name -> column_index (1-based). + """ + column_mapping = {} + for col_idx in range(1, ws.max_column + 1): + cell_value = ws.cell(row=header_row_idx, column=col_idx).value + if cell_value: + column_mapping[str(cell_value).strip()] = col_idx + return column_mapping + + +def create_excel_from_template(template_file, output_file, excel_rows, header_row_idx=2, data_start_row=4): + """ + Create an Excel file from the Shoplazza template and fill with data rows. + + Args: + template_file: Path to Excel template file + output_file: Path to output Excel file + excel_rows: List[Dict[str, Any]] mapping template header -> value + header_row_idx: Header row index in template (default 2) + data_start_row: Data start row index in template (default 4) + """ + wb = load_workbook(template_file) + ws = wb.active + + column_mapping = load_template_column_mapping(ws, header_row_idx=header_row_idx) + + # Clear existing data rows + last_template_row = ws.max_row + if last_template_row >= data_start_row: + for row in range(data_start_row, last_template_row + 1): + for col in range(1, ws.max_column + 1): + ws.cell(row=row, column=col).value = None + + # Write data rows + for row_idx, excel_row in enumerate(excel_rows): + excel_row_num = data_start_row + row_idx + for field_name, col_idx in column_mapping.items(): + if field_name not in excel_row: + continue + cell = ws.cell(row=excel_row_num, column=col_idx) + value = excel_row[field_name] + cell.value = value + if isinstance(value, str): + cell.alignment = Alignment(vertical='top', wrap_text=True) + else: + cell.alignment = Alignment(vertical='top') + + wb.save(output_file) + print("Excel file created: {}".format(output_file)) + print(" - Total rows: {}".format(len(excel_rows))) + + diff --git a/scripts/shoplazza_import_template.py b/scripts/shoplazza_import_template.py new file mode 100644 index 0000000..a05f291 --- /dev/null +++ b/scripts/shoplazza_import_template.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +Shared helpers for generating Shoplazza product import Excel files from the +official template `docs/商品导入模板.xlsx`. + +We keep this module small and dependency-light (openpyxl only) so other scripts +can reuse the same template-writing behavior (header row mapping, data start +row, alignment). +""" + +import re +from datetime import datetime +from typing import Dict, Iterable, List, Optional + +from openpyxl import load_workbook +from openpyxl.styles import Alignment + + +def generate_handle(title: str) -> str: + """ + Generate URL-friendly handle from title (ASCII only), suitable for Shoplazza + `SEO URL Handle` field. Caller may prepend `products/`. + """ + if not title: + return "product" + + handle = str(title).lower() + handle = re.sub(r"[^a-z0-9\s-]", "", handle) + handle = re.sub(r"[-\s]+", "-", handle).strip("-") + + if len(handle) > 255: + handle = handle[:255] + + return handle or "product" + + +def parse_date_to_datetime_str(value) -> str: + """ + Parse common date strings into Shoplazza template datetime string: + `YYYY-MM-DD HH:MM:SS`. If parsing fails, returns empty string. + """ + if value is None: + return "" + + if isinstance(value, datetime): + return value.strftime("%Y-%m-%d %H:%M:%S") + + s = str(value).strip() + if not s: + return "" + + # Most competitor sheets use YYYY-MM-DD + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): + try: + dt = datetime.strptime(s, fmt) + if fmt in ("%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"): + dt = dt.replace(hour=0, minute=0, second=0) + return dt.strftime("%Y-%m-%d %H:%M:%S") + except Exception: + pass + + return "" + + +def create_excel_from_template( + template_file: str, + output_file: str, + excel_rows: List[Dict[str, object]], + *, + header_row_idx: int = 2, + data_start_row: int = 4, + sheet_name: Optional[str] = None, +) -> None: + """ + Create an Excel file from Shoplazza import template and fill rows. + + - Header row is expected at row 2 (1-based) in the official template. + - Data starts at row 4 (1-based), after the instruction row(s). + """ + wb = load_workbook(template_file) + ws = wb[sheet_name] if sheet_name else wb.active + + column_mapping: Dict[str, int] = {} + for col_idx in range(1, ws.max_column + 1): + cell_value = ws.cell(row=header_row_idx, column=col_idx).value + if cell_value: + column_mapping[str(cell_value).strip()] = col_idx + + # Clear existing data rows + last_template_row = ws.max_row + if last_template_row >= data_start_row: + for row in range(data_start_row, last_template_row + 1): + for col in range(1, ws.max_column + 1): + ws.cell(row=row, column=col).value = None + + # Write data rows + for row_idx, excel_row in enumerate(excel_rows): + excel_row_num = data_start_row + row_idx + for field_name, col_idx in column_mapping.items(): + if field_name not in excel_row: + continue + value = excel_row[field_name] + cell = ws.cell(row=excel_row_num, column=col_idx) + cell.value = value + if isinstance(value, str): + cell.alignment = Alignment(vertical="top", wrap_text=True) + elif isinstance(value, (int, float)): + cell.alignment = Alignment(vertical="top") + + wb.save(output_file) + + -- libgit2 0.21.2