mock_data.sh 3.16 KB
#!/bin/bash

# Mock data script for SearchEngine
# Generates test data and imports to MySQL
# Supports both mock data generation and CSV import

cd "$(dirname "$0")/.."
source /home/tw/miniconda3/etc/profile.d/conda.sh
conda activate searchengine

GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'

echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN}Mock Data Script${NC}"
echo -e "${GREEN}========================================${NC}"

# Load config from .env file if it exists
if [ -f .env ]; then
    set -a
    source .env
    set +a
fi

# 写死的配置参数
MODE="csv"  # "mock" or "csv"
TENANT_ID="2"
NUM_SPUS=100
CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w"  # CSV文件路径(相对于项目根目录)
START_SPU_ID=1

# Database configuration
DB_HOST=${DB_HOST:-"120.79.247.228"}
DB_PORT=${DB_PORT:-"3316"}
DB_DATABASE=${DB_DATABASE:-"saas"}
DB_USERNAME=${DB_USERNAME:-"saas"}
DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"}

echo -e "\n${YELLOW}Configuration:${NC}"
echo "  Mode: $MODE"
echo "  Tenant ID: $TENANT_ID"
if [ "$MODE" = "mock" ]; then
    echo "  Number of SPUs: $NUM_SPUS"
    SQL_FILE="test_data.sql"
else
    echo "  CSV File: $CSV_FILE"
    echo "  Start SPU ID: $START_SPU_ID"
    SQL_FILE="customer1_data.sql"
fi
echo "  MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE"
echo "  SQL File: $SQL_FILE"

# Validate CSV mode
if [ "$MODE" = "csv" ]; then
    if [ ! -f "$CSV_FILE" ]; then
        echo -e "${RED}ERROR: CSV file not found: $CSV_FILE${NC}"
        echo "请确保CSV文件存在于: $CSV_FILE"
        exit 1
    fi
fi

# Step 1: Generate test data
if [ "$MODE" = "mock" ]; then
    echo -e "\n${YELLOW}Step 1/2: 生成Mock测试数据${NC}"
    python scripts/generate_test_data.py \
        --num-spus $NUM_SPUS \
        --tenant-id "$TENANT_ID" \
        --start-spu-id 1 \
        --start-sku-id 1 \
        --output "$SQL_FILE"
else
    echo -e "\n${YELLOW}Step 1/2: 从CSV生成数据${NC}"
    python scripts/import_customer1_csv.py \
        --csv-file "$CSV_FILE" \
        --tenant-id "$TENANT_ID" \
        --start-spu-id $START_SPU_ID \
        --output "$SQL_FILE"
fi

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 生成数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ 数据已生成: $SQL_FILE${NC}"

# Step 2: Import data to MySQL
echo -e "\n${YELLOW}Step 2/2: 导入数据到MySQL${NC}"
if [ -z "$DB_PASSWORD" ]; then
    echo -e "${RED}ERROR: DB_PASSWORD未设置,请检查.env文件或环境变量${NC}"
    exit 1
fi

python scripts/import_test_data.py \
    --db-host "$DB_HOST" \
    --db-port "$DB_PORT" \
    --db-database "$DB_DATABASE" \
    --db-username "$DB_USERNAME" \
    --db-password "$DB_PASSWORD" \
    --sql-file "$SQL_FILE" \
    --tenant-id "$TENANT_ID"

if [ $? -ne 0 ]; then
    echo -e "${RED}✗ 导入数据失败${NC}"
    exit 1
fi

echo -e "${GREEN}✓ 数据已导入MySQL${NC}"

echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}数据导入完成!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e "下一步:"
echo -e "  ${YELLOW}./scripts/ingest.sh $TENANT_ID${NC}  - 从MySQL灌入数据到ES"
echo ""