test_multilang_config.py 4.99 KB
#!/usr/bin/env python3
"""
Test script to validate multi-language configuration.

This script validates that:
1. language_field_mapping is correctly loaded from config
2. All referenced fields exist and have correct analyzers
3. Multi-language query builder works correctly
"""

import sys
import json
from config import ConfigLoader

def test_config_loading():
    """Test that configuration loads correctly with language_field_mapping."""
    print("=" * 60)
    print("Testing Configuration Loading")
    print("=" * 60)
    
    try:
        loader = ConfigLoader()
        config = loader.load_customer_config('customer1')
        
        print(f"\n✓ Configuration loaded successfully")
        print(f"  Customer: {config.customer_name}")
        print(f"  ES Index: {config.es_index_name}")
        
        # Validate configuration
        errors = loader.validate_config(config)
        if errors:
            print(f"\n✗ Configuration validation failed:")
            for error in errors:
                print(f"  - {error}")
            return False
        else:
            print(f"\n✓ Configuration validation passed")
        
        # Check indexes with language_field_mapping
        print(f"\nIndexes with multi-language support:")
        for index in config.indexes:
            if index.language_field_mapping:
                print(f"\n  {index.name} ({index.label}):")
                print(f"    Fields: {index.fields}")
                print(f"    Language mapping:")
                for lang, fields in index.language_field_mapping.items():
                    print(f"      {lang}: {fields}")
            else:
                print(f"\n  {index.name} ({index.label}): No language mapping")
        
        return True
        
    except Exception as e:
        print(f"\n✗ Error loading configuration: {e}")
        import traceback
        traceback.print_exc()
        return False


def test_multilang_query_builder():
    """Test that MultiLanguageQueryBuilder works correctly."""
    print("\n" + "=" * 60)
    print("Testing Multi-Language Query Builder")
    print("=" * 60)
    
    try:
        from config import ConfigLoader
        from query import QueryParser
        from search.multilang_query_builder import MultiLanguageQueryBuilder
        from indexer import MappingGenerator
        
        loader = ConfigLoader()
        config = loader.load_customer_config('customer1')
        
        # Initialize query builder
        mapping_gen = MappingGenerator(config)
        text_embedding_field = mapping_gen.get_text_embedding_field()
        image_embedding_field = mapping_gen.get_image_embedding_field()
        
        query_builder = MultiLanguageQueryBuilder(
            config=config,
            index_name=config.es_index_name,
            text_embedding_field=text_embedding_field,
            image_embedding_field=image_embedding_field
        )
        
        print(f"\n✓ MultiLanguageQueryBuilder initialized")
        
        # Get domain summary
        summary = query_builder.get_domain_summary()
        print(f"\nDomain Summary:")
        for domain, info in summary.items():
            print(f"  {domain}:")
            print(f"    Label: {info['label']}")
            print(f"    Has multilang mapping: {info['has_multilang_mapping']}")
            if info['has_multilang_mapping']:
                print(f"    Supported languages: {info['supported_languages']}")
        
        # Test query parsing
        query_parser = QueryParser(config)
        test_queries = [
            "芭比娃娃",
            "title:芭比娃娃",
            "default:玩具"
        ]
        
        print(f"\nTesting query parsing:")
        for query in test_queries:
            print(f"\n  Query: '{query}'")
            parsed = query_parser.parse(query, generate_vector=False)
            print(f"    Domain: {parsed.domain}")
            print(f"    Detected language: {parsed.detected_language}")
            print(f"    Translations: {list(parsed.translations.keys())}")
            
            # Build query
            es_query = query_builder.build_multilang_query(
                parsed_query=parsed,
                query_vector=None,
                filters=None,
                size=10,
                enable_knn=False
            )
            print(f"    ES Query keys: {list(es_query.keys())}")
        
        return True
        
    except Exception as e:
        print(f"\n✗ Error testing query builder: {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == "__main__":
    print("Multi-Language Configuration Test")
    print("=" * 60)
    
    success = True
    
    # Test 1: Configuration loading
    if not test_config_loading():
        success = False
    
    # Test 2: Multi-language query builder
    if not test_multilang_query_builder():
        success = False
    
    print("\n" + "=" * 60)
    if success:
        print("✓ All tests passed!")
        sys.exit(0)
    else:
        print("✗ Some tests failed")
        sys.exit(1)