Commit 41e1f8df7e4e5db152ce237f7b795241bd3ba071
1 parent
362d43b6
店匠体系数据的搜索:mock data -> mysql, mysql->ES
Showing
4 changed files
with
27 additions
and
10 deletions
Show diff stats
config/field_types.py
| @@ -183,7 +183,13 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | @@ -183,7 +183,13 @@ def get_es_mapping_for_field(field_config: FieldConfig) -> Dict[str, Any]: | ||
| 183 | 183 | ||
| 184 | # Add analyzer for text fields | 184 | # Add analyzer for text fields |
| 185 | if prop_type == "text" and "analyzer" in prop_config: | 185 | if prop_type == "text" and "analyzer" in prop_config: |
| 186 | - prop_mapping["analyzer"] = prop_config["analyzer"] | 186 | + analyzer_str = prop_config["analyzer"] |
| 187 | + # Convert chinese_ecommerce to index_ansj/query_ansj | ||
| 188 | + if analyzer_str == "chinese_ecommerce": | ||
| 189 | + prop_mapping["analyzer"] = "index_ansj" | ||
| 190 | + prop_mapping["search_analyzer"] = "query_ansj" | ||
| 191 | + else: | ||
| 192 | + prop_mapping["analyzer"] = analyzer_str | ||
| 187 | 193 | ||
| 188 | # Add other properties | 194 | # Add other properties |
| 189 | if "index" in prop_config: | 195 | if "index" in prop_config: |
scripts/ingest.sh
| @@ -24,7 +24,9 @@ if [ -f .env ]; then | @@ -24,7 +24,9 @@ if [ -f .env ]; then | ||
| 24 | fi | 24 | fi |
| 25 | 25 | ||
| 26 | # Parameters | 26 | # Parameters |
| 27 | -TENANT_ID=${1:-"1"} | 27 | +TENANT_ID=${1:-""} |
| 28 | +RECREATE_INDEX=${2:-"false"} | ||
| 29 | + | ||
| 28 | DB_HOST=${DB_HOST:-"120.79.247.228"} | 30 | DB_HOST=${DB_HOST:-"120.79.247.228"} |
| 29 | DB_PORT=${DB_PORT:-"3316"} | 31 | DB_PORT=${DB_PORT:-"3316"} |
| 30 | DB_DATABASE=${DB_DATABASE:-"saas"} | 32 | DB_DATABASE=${DB_DATABASE:-"saas"} |
| @@ -32,19 +34,20 @@ DB_USERNAME=${DB_USERNAME:-"saas"} | @@ -32,19 +34,20 @@ DB_USERNAME=${DB_USERNAME:-"saas"} | ||
| 32 | DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"} | 34 | DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"} |
| 33 | ES_HOST=${ES_HOST:-"http://localhost:9200"} | 35 | ES_HOST=${ES_HOST:-"http://localhost:9200"} |
| 34 | BATCH_SIZE=${BATCH_SIZE:-500} | 36 | BATCH_SIZE=${BATCH_SIZE:-500} |
| 35 | -RECREATE=${RECREATE:-false} | ||
| 36 | 37 | ||
| 37 | echo -e "\n${YELLOW}Configuration:${NC}" | 38 | echo -e "\n${YELLOW}Configuration:${NC}" |
| 38 | echo " Tenant ID: $TENANT_ID" | 39 | echo " Tenant ID: $TENANT_ID" |
| 40 | +echo " Recreate Index: $RECREATE_INDEX" | ||
| 39 | echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE" | 41 | echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE" |
| 40 | echo " Elasticsearch: $ES_HOST" | 42 | echo " Elasticsearch: $ES_HOST" |
| 41 | echo " Batch Size: $BATCH_SIZE" | 43 | echo " Batch Size: $BATCH_SIZE" |
| 42 | -echo " Recreate Index: $RECREATE" | ||
| 43 | 44 | ||
| 44 | # Validate parameters | 45 | # Validate parameters |
| 45 | if [ -z "$TENANT_ID" ]; then | 46 | if [ -z "$TENANT_ID" ]; then |
| 46 | echo -e "${RED}ERROR: Tenant ID is required${NC}" | 47 | echo -e "${RED}ERROR: Tenant ID is required${NC}" |
| 47 | - echo "Usage: $0 <tenant_id> [batch_size] [recreate]" | 48 | + echo "Usage: $0 <tenant_id> [recreate_index]" |
| 49 | + echo " tenant_id: Required, tenant ID" | ||
| 50 | + echo " recreate_index: Optional, recreate index if exists (true/false, default: false)" | ||
| 48 | exit 1 | 51 | exit 1 |
| 49 | fi | 52 | fi |
| 50 | 53 | ||
| @@ -64,8 +67,9 @@ CMD="python scripts/ingest_shoplazza.py \ | @@ -64,8 +67,9 @@ CMD="python scripts/ingest_shoplazza.py \ | ||
| 64 | --es-host $ES_HOST \ | 67 | --es-host $ES_HOST \ |
| 65 | --batch-size $BATCH_SIZE" | 68 | --batch-size $BATCH_SIZE" |
| 66 | 69 | ||
| 67 | -if [ "$RECREATE" = "true" ] || [ "$RECREATE" = "1" ]; then | 70 | +if [ "$RECREATE_INDEX" = "true" ] || [ "$RECREATE_INDEX" = "1" ]; then |
| 68 | CMD="$CMD --recreate" | 71 | CMD="$CMD --recreate" |
| 72 | + echo -e "\n${YELLOW}Warning: Index will be deleted and recreated!${NC}" | ||
| 69 | fi | 73 | fi |
| 70 | 74 | ||
| 71 | echo -e "\n${YELLOW}Starting data ingestion...${NC}" | 75 | echo -e "\n${YELLOW}Starting data ingestion...${NC}" |
scripts/ingest_shoplazza.py
| @@ -105,11 +105,16 @@ def main(): | @@ -105,11 +105,16 @@ def main(): | ||
| 105 | if args.recreate: | 105 | if args.recreate: |
| 106 | if es_client.index_exists(index_name): | 106 | if es_client.index_exists(index_name): |
| 107 | print(f"Deleting existing index: {index_name}") | 107 | print(f"Deleting existing index: {index_name}") |
| 108 | - es_client.delete_index(index_name) | 108 | + if not es_client.delete_index(index_name): |
| 109 | + print(f"ERROR: Failed to delete index '{index_name}'") | ||
| 110 | + return 1 | ||
| 109 | 111 | ||
| 110 | if not es_client.index_exists(index_name): | 112 | if not es_client.index_exists(index_name): |
| 111 | print(f"Creating index: {index_name}") | 113 | print(f"Creating index: {index_name}") |
| 112 | - es_client.create_index(index_name, mapping) | 114 | + if not es_client.create_index(index_name, mapping): |
| 115 | + print(f"ERROR: Failed to create index '{index_name}'") | ||
| 116 | + print("Please check the mapping configuration and try again.") | ||
| 117 | + return 1 | ||
| 113 | else: | 118 | else: |
| 114 | print(f"Using existing index: {index_name}") | 119 | print(f"Using existing index: {index_name}") |
| 115 | 120 |
utils/es_client.py
| @@ -68,14 +68,16 @@ class ESClient: | @@ -68,14 +68,16 @@ class ESClient: | ||
| 68 | body: Index configuration (settings + mappings) | 68 | body: Index configuration (settings + mappings) |
| 69 | 69 | ||
| 70 | Returns: | 70 | Returns: |
| 71 | - True if successful | 71 | + True if successful, False otherwise |
| 72 | """ | 72 | """ |
| 73 | try: | 73 | try: |
| 74 | self.client.indices.create(index=index_name, body=body) | 74 | self.client.indices.create(index=index_name, body=body) |
| 75 | print(f"Index '{index_name}' created successfully") | 75 | print(f"Index '{index_name}' created successfully") |
| 76 | return True | 76 | return True |
| 77 | except Exception as e: | 77 | except Exception as e: |
| 78 | - print(f"Failed to create index '{index_name}': {e}") | 78 | + print(f"ERROR: Failed to create index '{index_name}': {e}") |
| 79 | + import traceback | ||
| 80 | + traceback.print_exc() | ||
| 79 | return False | 81 | return False |
| 80 | 82 | ||
| 81 | def delete_index(self, index_name: str) -> bool: | 83 | def delete_index(self, index_name: str) -> bool: |