Blame view

test_multilang_config.py 4.99 KB
b926f678   tangwang   多语言查询
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  #!/usr/bin/env python3
  """
  Test script to validate multi-language configuration.
  
  This script validates that:
  1. language_field_mapping is correctly loaded from config
  2. All referenced fields exist and have correct analyzers
  3. Multi-language query builder works correctly
  """
  
  import sys
  import json
  from config import ConfigLoader
  
  def test_config_loading():
      """Test that configuration loads correctly with language_field_mapping."""
      print("=" * 60)
      print("Testing Configuration Loading")
      print("=" * 60)
      
      try:
          loader = ConfigLoader()
          config = loader.load_customer_config('customer1')
          
          print(f"\n✓ Configuration loaded successfully")
          print(f"  Customer: {config.customer_name}")
          print(f"  ES Index: {config.es_index_name}")
          
          # Validate configuration
          errors = loader.validate_config(config)
          if errors:
              print(f"\n✗ Configuration validation failed:")
              for error in errors:
                  print(f"  - {error}")
              return False
          else:
              print(f"\n✓ Configuration validation passed")
          
          # Check indexes with language_field_mapping
          print(f"\nIndexes with multi-language support:")
          for index in config.indexes:
              if index.language_field_mapping:
                  print(f"\n  {index.name} ({index.label}):")
                  print(f"    Fields: {index.fields}")
                  print(f"    Language mapping:")
                  for lang, fields in index.language_field_mapping.items():
                      print(f"      {lang}: {fields}")
              else:
                  print(f"\n  {index.name} ({index.label}): No language mapping")
          
          return True
          
      except Exception as e:
          print(f"\n✗ Error loading configuration: {e}")
          import traceback
          traceback.print_exc()
          return False
  
  
  def test_multilang_query_builder():
      """Test that MultiLanguageQueryBuilder works correctly."""
      print("\n" + "=" * 60)
      print("Testing Multi-Language Query Builder")
      print("=" * 60)
      
      try:
          from config import ConfigLoader
          from query import QueryParser
          from search.multilang_query_builder import MultiLanguageQueryBuilder
          from indexer import MappingGenerator
          
          loader = ConfigLoader()
          config = loader.load_customer_config('customer1')
          
          # Initialize query builder
          mapping_gen = MappingGenerator(config)
          text_embedding_field = mapping_gen.get_text_embedding_field()
          image_embedding_field = mapping_gen.get_image_embedding_field()
          
          query_builder = MultiLanguageQueryBuilder(
              config=config,
              index_name=config.es_index_name,
              text_embedding_field=text_embedding_field,
              image_embedding_field=image_embedding_field
          )
          
          print(f"\n✓ MultiLanguageQueryBuilder initialized")
          
          # Get domain summary
          summary = query_builder.get_domain_summary()
          print(f"\nDomain Summary:")
          for domain, info in summary.items():
              print(f"  {domain}:")
              print(f"    Label: {info['label']}")
              print(f"    Has multilang mapping: {info['has_multilang_mapping']}")
              if info['has_multilang_mapping']:
                  print(f"    Supported languages: {info['supported_languages']}")
          
          # Test query parsing
          query_parser = QueryParser(config)
          test_queries = [
              "芭比娃娃",
              "title:芭比娃娃",
              "default:玩具"
          ]
          
          print(f"\nTesting query parsing:")
          for query in test_queries:
              print(f"\n  Query: '{query}'")
              parsed = query_parser.parse(query, generate_vector=False)
              print(f"    Domain: {parsed.domain}")
              print(f"    Detected language: {parsed.detected_language}")
              print(f"    Translations: {list(parsed.translations.keys())}")
              
              # Build query
              es_query = query_builder.build_multilang_query(
                  parsed_query=parsed,
                  query_vector=None,
                  filters=None,
                  size=10,
                  enable_knn=False
              )
              print(f"    ES Query keys: {list(es_query.keys())}")
          
          return True
          
      except Exception as e:
          print(f"\n✗ Error testing query builder: {e}")
          import traceback
          traceback.print_exc()
          return False
  
  
  if __name__ == "__main__":
      print("Multi-Language Configuration Test")
      print("=" * 60)
      
      success = True
      
      # Test 1: Configuration loading
      if not test_config_loading():
          success = False
      
      # Test 2: Multi-language query builder
      if not test_multilang_query_builder():
          success = False
      
      print("\n" + "=" * 60)
      if success:
          print("✓ All tests passed!")
          sys.exit(0)
      else:
          print("✗ Some tests failed")
          sys.exit(1)