diff --git a/search/multilang_query_builder.py b/search/multilang_query_builder.py index 7c2387f..773202a 100644 --- a/search/multilang_query_builder.py +++ b/search/multilang_query_builder.py @@ -84,6 +84,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): self, parsed_query: ParsedQuery, query_vector: Optional[np.ndarray] = None, + query_node: Optional[Any] = None, filters: Optional[Dict[str, Any]] = None, size: int = 10, from_: int = 0, @@ -136,7 +137,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): print(f"[MultiLangQueryBuilder] Available translations: {list(parsed_query.translations.keys())}") # Build query clause with multi-language support - query_clause = self._build_multilang_text_query(parsed_query, domain_config) + if query_node and isinstance(query_node, tuple) and len(query_node) > 0: + # Handle boolean query from tuple (AST, score) + ast_node = query_node[0] + query_clause = self._build_boolean_query_from_tuple(ast_node) + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}") + elif query_node and hasattr(query_node, 'operator') and query_node.operator != 'TERM': + # Handle boolean query using base class method + query_clause = self._build_boolean_query(query_node) + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}") + else: + # Handle text query with multi-language support + query_clause = self._build_multilang_text_query(parsed_query, domain_config) es_query = { "size": size, @@ -290,6 +302,122 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): result.append(field_name) return result + def _build_boolean_query_from_tuple(self, node) -> Dict[str, Any]: + """ + Build query from boolean expression tuple. + + Args: + node: Boolean expression tuple (operator, terms...) + + Returns: + ES query clause + """ + if not node: + return {"match_all": {}} + + # Handle different node types from boolean parser + if hasattr(node, 'operator'): + # QueryNode object + operator = node.operator + terms = node.terms + elif isinstance(node, tuple) and len(node) > 0: + # Tuple format from boolean parser + if hasattr(node[0], 'operator'): + # Nested tuple with QueryNode + operator = node[0].operator + terms = node[0].terms + elif isinstance(node[0], str): + # Simple tuple like ('TERM', 'field:value') + operator = node[0] + terms = node[1] if len(node) > 1 else '' + else: + # Complex tuple like (OR( TERM(...), TERM(...) ), score) + if hasattr(node[0], '__class__') and hasattr(node[0], '__name__'): + # Constructor call like OR(...) + operator = node[0].__name__ + elif str(node[0]).startswith('('): + # String representation of constructor call + import re + match = re.match(r'(\w+)\(', str(node[0])) + if match: + operator = match.group(1) + else: + return {"match_all": {}} + else: + operator = str(node[0]) + + # Extract terms from nested structure + terms = [] + if len(node) > 1 and isinstance(node[1], tuple): + terms = node[1] + else: + return {"match_all": {}} + + print(f"[MultiLangQueryBuilder] Building boolean query for operator: {operator}") + print(f"[MultiLangQueryBuilder] Terms: {terms}") + + if operator == 'TERM': + # Leaf node - handle field:query format + if isinstance(terms, str) and ':' in terms: + field, value = terms.split(':', 1) + return { + "term": { + field: value + } + } + else: + return {"match_all": {}} + + elif operator == 'OR': + # Any term must match + should_clauses = [] + for term in terms: + should_clauses.append(self._build_boolean_query_from_tuple(term)) + return { + "bool": { + "should": should_clauses, + "minimum_should_match": 1 + } + } + + elif operator == 'AND': + # All terms must match + must_clauses = [] + for term in terms: + must_clauses.append(self._build_boolean_query_from_tuple(term)) + return { + "bool": { + "must": must_clauses + } + } + + elif operator == 'ANDNOT': + # First term must match, second must not + if len(terms) >= 2: + return { + "bool": { + "must": [self._build_boolean_query_from_tuple(terms[0])], + "must_not": [self._build_boolean_query_from_tuple(terms[1])] + } + } + else: + return self._build_boolean_query_from_tuple(terms[0]) + + elif operator == 'RANK': + # Like OR but for ranking (all terms contribute to score) + should_clauses = [] + for term in terms: + should_clauses.append(self._build_boolean_query_from_tuple(term)) + return { + "bool": { + "should": should_clauses + } + } + + else: + # Unknown operator + return {"match_all": {}} + def get_domain_summary(self) -> Dict[str, Any]: """Get summary of all configured domains.""" summary = {} diff --git a/search/searcher.py b/search/searcher.py index 0a798ed..6408628 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -149,6 +149,7 @@ class Searcher: es_query = self.query_builder.build_multilang_query( parsed_query=parsed_query, query_vector=parsed_query.query_vector if enable_embedding else None, + query_node=query_node, filters=filters, size=size, from_=from_, @@ -170,6 +171,9 @@ class Searcher: if agg_fields: es_query = self.query_builder.add_aggregations(es_query, agg_fields) + # Extract size and from from body for ES client parameters + body_for_es = {k: v for k, v in es_query.items() if k not in ['size', 'from']} + print(f"[Searcher] ES Query:") import json print(json.dumps(es_query, indent=2)) @@ -178,7 +182,7 @@ class Searcher: print(f"[Searcher] Executing ES query...") es_response = self.es_client.search( index_name=self.config.es_index_name, - body=es_query, + body=body_for_es, size=size, from_=from_ ) -- libgit2 0.21.2