#!/usr/bin/env python3 """ Debug script to investigate why query "车" returns no results for tenant_id=2 """ import json import os import sys # Import ESClient directly to avoid sqlalchemy dependency from elasticsearch import Elasticsearch from typing import Dict, Any, List, Optional class ESClient: """Simple ES client for debugging.""" def __init__(self, hosts: List[str] = None, username: Optional[str] = None, password: Optional[str] = None): if hosts is None: hosts = [os.getenv('ES_HOST', 'http://localhost:9200')] client_config = {'hosts': hosts, 'timeout': 30} if username and password: client_config['http_auth'] = (username, password) self.client = Elasticsearch(**client_config) def ping(self) -> bool: try: return self.client.ping() except: return False def index_exists(self, index_name: str) -> bool: return self.client.indices.exists(index=index_name) def count(self, index_name: str, body: Optional[Dict[str, Any]] = None) -> int: try: result = self.client.count(index=index_name, body=body) return result['count'] except: return 0 def search(self, index_name: str, body: Dict[str, Any], size: int = 10, from_: int = 0) -> Dict[str, Any]: try: return self.client.search(index=index_name, body=body, size=size, from_=from_) except Exception as e: return {'hits': {'total': {'value': 0}, 'hits': []}, 'error': str(e)} from config.config_loader import ConfigLoader def main(): # Load config config_loader = ConfigLoader("config/config.yaml") config = config_loader.load_config() # Get ES connection info es_host = os.getenv('ES_HOST', 'http://localhost:9200') es_username = os.getenv('ES_USERNAME') es_password = os.getenv('ES_PASSWORD') # Initialize ES client if es_username and es_password: es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) else: es_client = ESClient(hosts=[es_host]) if not es_client.ping(): print(f"ERROR: Cannot connect to Elasticsearch at {es_host}") return 1 index_name = config.es_index_name print(f"Using index: {index_name}") print(f"ES host: {es_host}\n") # 1. Check if index exists if not es_client.index_exists(index_name): print(f"ERROR: Index '{index_name}' does not exist") return 1 # 2. Count documents with tenant_id=2 count_query = { "query": { "term": { "tenant_id": "2" } } } tenant_count = es_client.count(index_name, count_query) print(f"Documents with tenant_id='2': {tenant_count}") # 3. Check if there are documents containing "遥控车" or "车" search_terms = ["遥控车", "车"] for term in search_terms: term_query = { "query": { "bool": { "must": [ { "multi_match": { "query": term, "fields": [ "title^3.0", "brief^1.5", "description", "seo_title^2.0", "seo_description^1.5", "seo_keywords^2.0", "vendor^1.5", "product_type^1.5", "tags", "category^1.5" ] } } ], "filter": [ { "term": { "tenant_id": "2" } } ] } }, "size": 5 } result = es_client.search(index_name, term_query, size=5) total = result.get('hits', {}).get('total', {}) if isinstance(total, dict): total_value = total.get('value', 0) else: total_value = total print(f"\nSearch for '{term}' with tenant_id=2: {total_value} results") if total_value > 0: print("Sample results:") for i, hit in enumerate(result.get('hits', {}).get('hits', [])[:3], 1): source = hit.get('_source', {}) print(f" {i}. {source.get('title', 'N/A')} (score: {hit.get('_score', 0):.3f})") print(f" tenant_id: {source.get('tenant_id', 'N/A')}") # 4. Test the exact query structure from the user print("\n" + "="*60) print("Testing exact query structure from user:") print("="*60) exact_query = { "query": { "bool": { "must": [ { "bool": { "should": [ { "multi_match": { "query": "车", "fields": [ "title^3.0", "brief^1.5", "description", "seo_title^2.0", "seo_description^1.5", "seo_keywords^2.0", "vendor^1.5", "product_type^1.5", "tags", "category^1.5" ], "minimum_should_match": "67%", "tie_breaker": 0.9, "boost": 1, "_name": "default_query" } } ], "minimum_should_match": 1 } } ], "filter": [ { "term": { "tenant_id": "2" } } ] } }, "size": 10 } print("\nQuery structure:") print(json.dumps(exact_query, indent=2, ensure_ascii=False)) result = es_client.search(index_name, exact_query, size=10) total = result.get('hits', {}).get('total', {}) if isinstance(total, dict): total_value = total.get('value', 0) else: total_value = total max_score = result.get('hits', {}).get('max_score') or 0.0 print(f"\nResults: {total_value} hits, max_score: {max_score}") if total_value == 0: print("\n⚠️ No results found! Investigating...") # 5. Check if "车" matches without minimum_should_match print("\nTesting without minimum_should_match:") simple_query = { "query": { "bool": { "must": [ { "multi_match": { "query": "车", "fields": [ "title^3.0", "brief^1.5", "description" ] } } ], "filter": [ { "term": { "tenant_id": "2" } } ] } }, "size": 5 } simple_result = es_client.search(index_name, simple_query, size=5) simple_total = simple_result.get('hits', {}).get('total', {}) if isinstance(simple_total, dict): simple_total_value = simple_total.get('value', 0) else: simple_total_value = simple_total print(f"Results without minimum_should_match: {simple_total_value}") # 6. Check field values for tenant_id=2 documents print("\nChecking sample documents with tenant_id=2:") sample_query = { "query": { "term": { "tenant_id": "2" } }, "size": 3, "_source": ["title", "brief", "description", "category", "tags", "tenant_id"] } sample_result = es_client.search(index_name, sample_query, size=3) for i, hit in enumerate(sample_result.get('hits', {}).get('hits', []), 1): source = hit.get('_source', {}) print(f"\n Document {i}:") print(f" title: {source.get('title', 'N/A')}") print(f" brief: {source.get('brief', 'N/A')[:50] if source.get('brief') else 'N/A'}") print(f" category: {source.get('category', 'N/A')}") print(f" tags: {source.get('tags', 'N/A')}") else: print("\n✅ Found results!") print("\nTop results:") for i, hit in enumerate(result.get('hits', {}).get('hits', [])[:5], 1): source = hit.get('_source', {}) print(f" {i}. {source.get('title', 'N/A')} (score: {hit.get('_score', 0):.3f})") return 0 if __name__ == "__main__": sys.exit(main())