diff --git a/config/field_types.py b/config/field_types.py index 8275f19..418ed2c 100644 --- a/config/field_types.py +++ b/config/field_types.py @@ -183,7 +183,13 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: # Add analyzer for text fields if prop_type == "text" and "analyzer" in prop_config: - prop_mapping["analyzer"] = prop_config["analyzer"] + analyzer_str = prop_config["analyzer"] + # Convert chinese_ecommerce to index_ansj/query_ansj + if analyzer_str == "chinese_ecommerce": + prop_mapping["analyzer"] = "index_ansj" + prop_mapping["search_analyzer"] = "query_ansj" + else: + prop_mapping["analyzer"] = analyzer_str # Add other properties if "index" in prop_config: diff --git a/scripts/ingest.sh b/scripts/ingest.sh index a81b827..a420fe3 100755 --- a/scripts/ingest.sh +++ b/scripts/ingest.sh @@ -24,7 +24,9 @@ if [ -f .env ]; then fi # Parameters -TENANT_ID=${1:-"1"} +TENANT_ID=${1:-""} +RECREATE_INDEX=${2:-"false"} + DB_HOST=${DB_HOST:-"120.79.247.228"} DB_PORT=${DB_PORT:-"3316"} DB_DATABASE=${DB_DATABASE:-"saas"} @@ -32,19 +34,20 @@ DB_USERNAME=${DB_USERNAME:-"saas"} DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"} ES_HOST=${ES_HOST:-"http://localhost:9200"} BATCH_SIZE=${BATCH_SIZE:-500} -RECREATE=${RECREATE:-false} echo -e "\n${YELLOW}Configuration:${NC}" echo " Tenant ID: $TENANT_ID" +echo " Recreate Index: $RECREATE_INDEX" echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE" echo " Elasticsearch: $ES_HOST" echo " Batch Size: $BATCH_SIZE" -echo " Recreate Index: $RECREATE" # Validate parameters if [ -z "$TENANT_ID" ]; then echo -e "${RED}ERROR: Tenant ID is required${NC}" - echo "Usage: $0 [batch_size] [recreate]" + echo "Usage: $0 [recreate_index]" + echo " tenant_id: Required, tenant ID" + echo " recreate_index: Optional, recreate index if exists (true/false, default: false)" exit 1 fi @@ -64,8 +67,9 @@ CMD="python scripts/ingest_shoplazza.py \ --es-host $ES_HOST \ --batch-size $BATCH_SIZE" -if [ "$RECREATE" = "true" ] || [ "$RECREATE" = "1" ]; then +if [ "$RECREATE_INDEX" = "true" ] || [ "$RECREATE_INDEX" = "1" ]; then CMD="$CMD --recreate" + echo -e "\n${YELLOW}Warning: Index will be deleted and recreated!${NC}" fi echo -e "\n${YELLOW}Starting data ingestion...${NC}" diff --git a/scripts/ingest_shoplazza.py b/scripts/ingest_shoplazza.py index 12878af..697b7c1 100644 --- a/scripts/ingest_shoplazza.py +++ b/scripts/ingest_shoplazza.py @@ -105,11 +105,16 @@ def main(): if args.recreate: if es_client.index_exists(index_name): print(f"Deleting existing index: {index_name}") - es_client.delete_index(index_name) + if not es_client.delete_index(index_name): + print(f"ERROR: Failed to delete index '{index_name}'") + return 1 if not es_client.index_exists(index_name): print(f"Creating index: {index_name}") - es_client.create_index(index_name, mapping) + if not es_client.create_index(index_name, mapping): + print(f"ERROR: Failed to create index '{index_name}'") + print("Please check the mapping configuration and try again.") + return 1 else: print(f"Using existing index: {index_name}") diff --git a/utils/es_client.py b/utils/es_client.py index 29cf8ed..2bd5114 100644 --- a/utils/es_client.py +++ b/utils/es_client.py @@ -68,14 +68,16 @@ class ESClient: body: Index configuration (settings + mappings) Returns: - True if successful + True if successful, False otherwise """ try: self.client.indices.create(index=index_name, body=body) print(f"Index '{index_name}' created successfully") return True except Exception as e: - print(f"Failed to create index '{index_name}': {e}") + print(f"ERROR: Failed to create index '{index_name}': {e}") + import traceback + traceback.print_exc() return False def delete_index(self, index_name: str) -> bool: -- libgit2 0.21.2