mock_data.sh
5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/bin/bash
# ============================================================================
# Mock Data Script for SearchEngine
# ============================================================================
#
# 功能说明:
# 本脚本用于构造测试数据,包含两部分:
# 1. tenant_id=1: 自动生成的mock数据(使用 generate_test_data.py)
# 2. tenant_id=2: 从CSV文件导入的数据(使用 import_tenant2_csv.py)
#
# 数据说明:
# - 所有数据源配置(数据库地址、CSV路径、字段映射等)都写死在脚本中
# - 这是外部系统构造测试数据,不需要配置化
# - 脚本会自动计算起始ID,避免主键冲突
#
# 使用方式:
# ./scripts/mock_data.sh
#
# ============================================================================
cd "$(dirname "$0")/.."
source /home/tw/miniconda3/etc/profile.d/conda.sh
conda activate searchengine
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN}Mock Data Script${NC}"
echo -e "${GREEN}========================================${NC}"
# Load config from .env file if it exists
if [ -f .env ]; then
set -a
source .env
set +a
fi
# ============================================================================
# 写死的配置参数(不需要配置化,这是测试数据构造脚本)
# ============================================================================
# Tenant 1: Mock数据配置
TENANT1_NUM_SPUS=1000 # 生成的SPU数量
# Tenant 2: CSV数据配置
TENANT2_CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w" # CSV文件路径
# 数据库配置(写死,不需要配置化)
DB_HOST="120.79.247.228"
DB_PORT="3316"
DB_DATABASE="saas"
DB_USERNAME="saas"
DB_PASSWORD="P89cZHS5d7dFyc9R"
echo -e "\n${YELLOW}Configuration:${NC}"
echo " Tenant 1 (Mock): $TENANT1_NUM_SPUS SPUs"
echo " Tenant 2 (CSV): $TENANT2_CSV_FILE"
echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE"
# Validate CSV file exists
if [ ! -f "$TENANT2_CSV_FILE" ]; then
echo -e "${RED}ERROR: CSV file not found: $TENANT2_CSV_FILE${NC}"
echo "请确保CSV文件存在于: $TENANT2_CSV_FILE"
exit 1
fi
# ============================================================================
# Part 1: 生成并导入 tenant_id=1 的Mock数据
# ============================================================================
echo -e "\n${YELLOW}========================================${NC}"
echo -e "${YELLOW}Part 1/2: 生成并导入 tenant_id=1 的Mock数据${NC}"
echo -e "${YELLOW}========================================${NC}"
TENANT1_SQL_FILE="test_data_tenant1.sql"
echo -e "\n${YELLOW}Step 1.1: 生成Mock测试数据${NC}"
python scripts/generate_test_data.py \
--num-spus $TENANT1_NUM_SPUS \
--tenant-id "1" \
--output "$TENANT1_SQL_FILE" \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-database "$DB_DATABASE" \
--db-username "$DB_USERNAME" \
--db-password "$DB_PASSWORD"
if [ $? -ne 0 ]; then
echo -e "${RED}✗ 生成tenant_id=1数据失败${NC}"
exit 1
fi
echo -e "${GREEN}✓ 数据已生成: $TENANT1_SQL_FILE${NC}"
echo -e "\n${YELLOW}Step 1.2: 导入tenant_id=1数据到MySQL${NC}"
python scripts/import_test_data.py \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-database "$DB_DATABASE" \
--db-username "$DB_USERNAME" \
--db-password "$DB_PASSWORD" \
--sql-file "$TENANT1_SQL_FILE" \
--tenant-id "1"
if [ $? -ne 0 ]; then
echo -e "${RED}✗ 导入tenant_id=1数据失败${NC}"
exit 1
fi
echo -e "${GREEN}✓ tenant_id=1数据已导入MySQL${NC}"
# ============================================================================
# Part 2: 生成并导入 tenant_id=2 的CSV数据
# ============================================================================
echo -e "\n${YELLOW}========================================${NC}"
echo -e "${YELLOW}Part 2/2: 生成并导入 tenant_id=2 的CSV数据${NC}"
echo -e "${YELLOW}========================================${NC}"
TENANT2_SQL_FILE="tenant2_data.sql"
echo -e "\n${YELLOW}Step 2.1: 从CSV生成数据${NC}"
python scripts/import_tenant2_csv.py \
--csv-file "$TENANT2_CSV_FILE" \
--tenant-id "2" \
--output "$TENANT2_SQL_FILE" \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-database "$DB_DATABASE" \
--db-username "$DB_USERNAME" \
--db-password "$DB_PASSWORD"
if [ $? -ne 0 ]; then
echo -e "${RED}✗ 生成tenant_id=2数据失败${NC}"
exit 1
fi
echo -e "${GREEN}✓ 数据已生成: $TENANT2_SQL_FILE${NC}"
echo -e "\n${YELLOW}Step 2.2: 导入tenant_id=2数据到MySQL${NC}"
python scripts/import_test_data.py \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-database "$DB_DATABASE" \
--db-username "$DB_USERNAME" \
--db-password "$DB_PASSWORD" \
--sql-file "$TENANT2_SQL_FILE" \
--tenant-id "2"
if [ $? -ne 0 ]; then
echo -e "${RED}✗ 导入tenant_id=2数据失败${NC}"
exit 1
fi
echo -e "${GREEN}✓ tenant_id=2数据已导入MySQL${NC}"
# ============================================================================
# 完成
# ============================================================================
echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}数据导入完成!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e "下一步:"
echo -e " ${YELLOW}./scripts/ingest.sh 1 true${NC} - 从MySQL灌入tenant_id=1数据到ES"
echo -e " ${YELLOW}./scripts/ingest.sh 2 true${NC} - 从MySQL灌入tenant_id=2数据到ES"
echo ""