#!/bin/bash # Data Ingestion Script for Customer1 set -e cd "$(dirname "$0")/.." source /home/tw/miniconda3/etc/profile.d/conda.sh conda activate searchengine GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}Customer1 Data Ingestion${NC}" echo -e "${GREEN}========================================${NC}" # Default values LIMIT=${1:-1000} SKIP_EMBEDDINGS=${2:-false} echo -e "\n${YELLOW}Configuration:${NC}" echo " Limit: $LIMIT documents" echo " Skip embeddings: $SKIP_EMBEDDINGS" CSV_FILE="data/customer1/goods_with_pic.5years_congku.csv.shuf.1w" if [ ! -f "$CSV_FILE" ]; then echo "Error: CSV file not found: $CSV_FILE" exit 1 fi # Build command CMD="python data/customer1/ingest_customer1.py \ --csv $CSV_FILE \ --limit $LIMIT \ --recreate-index \ --batch-size 100" if [ "$SKIP_EMBEDDINGS" = "true" ]; then CMD="$CMD --skip-embeddings" fi echo -e "\n${YELLOW}Starting ingestion...${NC}" eval $CMD echo -e "\n${GREEN}========================================${NC}" echo -e "${GREEN}Ingestion Complete!${NC}" echo -e "${GREEN}========================================${NC}"