mock_data.sh 5.56 KB
#!/bin/bash

# ============================================================================
# Mock Data Script for SearchEngine
# ============================================================================
# 
# 功能说明:
# 本脚本用于构造测试数据,包含两部分:
# 1. tenant_id=1: 自动生成的mock数据(使用 generate_test_data.py)
# 2. tenant_id=2: 从CSV文件导入的数据(使用 import_tenant2_csv.py)
#
# 数据说明:
# - 所有数据源配置(数据库地址、CSV路径、字段映射等)都写死在脚本中
# - 这是外部系统构造测试数据,不需要配置化
# - 脚本会自动计算起始ID,避免主键冲突
#
# 使用方式:
#   ./scripts/mock_data.sh
#
# ============================================================================

cd "$(dirname "$0")/.."
source /home/tw/miniconda3/etc/profile.d/conda.sh
conda activate searchengine

GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'

echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN}Mock Data Script${NC}"
echo -e "${GREEN}========================================${NC}"

# Load config from .env file if it exists
if [ -f .env ]; then
    set -a
    source .env
    set +a
fi

# ============================================================================
# 写死的配置参数(不需要配置化,这是测试数据构造脚本)
# ============================================================================

# Tenant 1: Mock数据配置
TENANT1_NUM_SPUS=1000  # 生成的SPU数量

# Tenant 2: CSV数据配置
TENANT2_CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w"  # CSV文件路径

# 数据库配置(写死,不需要配置化)
DB_HOST="120.79.247.228"
DB_PORT="3316"
DB_DATABASE="saas"
DB_USERNAME="saas"
DB_PASSWORD="P89cZHS5d7dFyc9R"

echo -e "\n${YELLOW}Configuration:${NC}"
echo "  Tenant 1 (Mock): $TENANT1_NUM_SPUS SPUs"
echo "  Tenant 2 (CSV): $TENANT2_CSV_FILE"
echo "  MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE"

# Validate CSV file exists
if [ ! -f "$TENANT2_CSV_FILE" ]; then
    echo -e "${RED}ERROR: CSV file not found: $TENANT2_CSV_FILE${NC}"
    echo "请确保CSV文件存在于: $TENANT2_CSV_FILE"
    exit 1
fi

# ============================================================================
# Part 1: 生成并导入 tenant_id=1 的Mock数据
# ============================================================================

echo -e "\n${YELLOW}========================================${NC}"
echo -e "${YELLOW}Part 1/2: 生成并导入 tenant_id=1 的Mock数据${NC}"
echo -e "${YELLOW}========================================${NC}"

TENANT1_SQL_FILE="test_data_tenant1.sql"

echo -e "\n${YELLOW}Step 1.1: 生成Mock测试数据${NC}"
python scripts/generate_test_data.py \
    --num-spus $TENANT1_NUM_SPUS \
    --tenant-id "1" \
    --output "$TENANT1_SQL_FILE" \
    --db-host "$DB_HOST" \
    --db-port "$DB_PORT" \
    --db-database "$DB_DATABASE" \
    --db-username "$DB_USERNAME" \
    --db-password "$DB_PASSWORD"

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 生成tenant_id=1数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ 数据已生成: $TENANT1_SQL_FILE${NC}"

echo -e "\n${YELLOW}Step 1.2: 导入tenant_id=1数据到MySQL${NC}"
python scripts/import_test_data.py \
    --db-host "$DB_HOST" \
    --db-port "$DB_PORT" \
    --db-database "$DB_DATABASE" \
    --db-username "$DB_USERNAME" \
    --db-password "$DB_PASSWORD" \
    --sql-file "$TENANT1_SQL_FILE" \
    --tenant-id "1"

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 导入tenant_id=1数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ tenant_id=1数据已导入MySQL${NC}"

# ============================================================================
# Part 2: 生成并导入 tenant_id=2 的CSV数据
# ============================================================================

echo -e "\n${YELLOW}========================================${NC}"
echo -e "${YELLOW}Part 2/2: 生成并导入 tenant_id=2 的CSV数据${NC}"
echo -e "${YELLOW}========================================${NC}"

TENANT2_SQL_FILE="customer1_data.sql"

echo -e "\n${YELLOW}Step 2.1: 从CSV生成数据${NC}"
python scripts/import_tenant2_csv.py \
    --csv-file "$TENANT2_CSV_FILE" \
    --tenant-id "2" \
    --output "$TENANT2_SQL_FILE" \
    --db-host "$DB_HOST" \
    --db-port "$DB_PORT" \
    --db-database "$DB_DATABASE" \
    --db-username "$DB_USERNAME" \
    --db-password "$DB_PASSWORD"

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 生成tenant_id=2数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ 数据已生成: $TENANT2_SQL_FILE${NC}"

echo -e "\n${YELLOW}Step 2.2: 导入tenant_id=2数据到MySQL${NC}"
python scripts/import_test_data.py \
    --db-host "$DB_HOST" \
    --db-port "$DB_PORT" \
    --db-database "$DB_DATABASE" \
    --db-username "$DB_USERNAME" \
    --db-password "$DB_PASSWORD" \
    --sql-file "$TENANT2_SQL_FILE" \
    --tenant-id "2"

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 导入tenant_id=2数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ tenant_id=2数据已导入MySQL${NC}"

# ============================================================================
# 完成
# ============================================================================

echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}数据导入完成!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e "下一步:"
echo -e "  ${YELLOW}./scripts/ingest.sh 1 true${NC}  - 从MySQL灌入tenant_id=1数据到ES"
echo -e "  ${YELLOW}./scripts/ingest.sh 2 true${NC}  - 从MySQL灌入tenant_id=2数据到ES"
echo ""