Commit f739c5e33fb7eefb26d9c5ce9c3b5a5594761f54

Authored by tangwang
1 parent bb3c5ef8

fix sch

Showing 2 changed files with 134 additions and 2 deletions   Show diff stats
search/multilang_query_builder.py
@@ -84,6 +84,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -84,6 +84,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
84 self, 84 self,
85 parsed_query: ParsedQuery, 85 parsed_query: ParsedQuery,
86 query_vector: Optional[np.ndarray] = None, 86 query_vector: Optional[np.ndarray] = None,
  87 + query_node: Optional[Any] = None,
87 filters: Optional[Dict[str, Any]] = None, 88 filters: Optional[Dict[str, Any]] = None,
88 size: int = 10, 89 size: int = 10,
89 from_: int = 0, 90 from_: int = 0,
@@ -136,7 +137,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -136,7 +137,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
136 print(f"[MultiLangQueryBuilder] Available translations: {list(parsed_query.translations.keys())}") 137 print(f"[MultiLangQueryBuilder] Available translations: {list(parsed_query.translations.keys())}")
137 138
138 # Build query clause with multi-language support 139 # Build query clause with multi-language support
139 - query_clause = self._build_multilang_text_query(parsed_query, domain_config) 140 + if query_node and isinstance(query_node, tuple) and len(query_node) > 0:
  141 + # Handle boolean query from tuple (AST, score)
  142 + ast_node = query_node[0]
  143 + query_clause = self._build_boolean_query_from_tuple(ast_node)
  144 + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}")
  145 + elif query_node and hasattr(query_node, 'operator') and query_node.operator != 'TERM':
  146 + # Handle boolean query using base class method
  147 + query_clause = self._build_boolean_query(query_node)
  148 + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}")
  149 + else:
  150 + # Handle text query with multi-language support
  151 + query_clause = self._build_multilang_text_query(parsed_query, domain_config)
140 152
141 es_query = { 153 es_query = {
142 "size": size, 154 "size": size,
@@ -290,6 +302,122 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): @@ -290,6 +302,122 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
290 result.append(field_name) 302 result.append(field_name)
291 return result 303 return result
292 304
  305 + def _build_boolean_query_from_tuple(self, node) -> Dict[str, Any]:
  306 + """
  307 + Build query from boolean expression tuple.
  308 +
  309 + Args:
  310 + node: Boolean expression tuple (operator, terms...)
  311 +
  312 + Returns:
  313 + ES query clause
  314 + """
  315 + if not node:
  316 + return {"match_all": {}}
  317 +
  318 + # Handle different node types from boolean parser
  319 + if hasattr(node, 'operator'):
  320 + # QueryNode object
  321 + operator = node.operator
  322 + terms = node.terms
  323 + elif isinstance(node, tuple) and len(node) > 0:
  324 + # Tuple format from boolean parser
  325 + if hasattr(node[0], 'operator'):
  326 + # Nested tuple with QueryNode
  327 + operator = node[0].operator
  328 + terms = node[0].terms
  329 + elif isinstance(node[0], str):
  330 + # Simple tuple like ('TERM', 'field:value')
  331 + operator = node[0]
  332 + terms = node[1] if len(node) > 1 else ''
  333 + else:
  334 + # Complex tuple like (OR( TERM(...), TERM(...) ), score)
  335 + if hasattr(node[0], '__class__') and hasattr(node[0], '__name__'):
  336 + # Constructor call like OR(...)
  337 + operator = node[0].__name__
  338 + elif str(node[0]).startswith('('):
  339 + # String representation of constructor call
  340 + import re
  341 + match = re.match(r'(\w+)\(', str(node[0]))
  342 + if match:
  343 + operator = match.group(1)
  344 + else:
  345 + return {"match_all": {}}
  346 + else:
  347 + operator = str(node[0])
  348 +
  349 + # Extract terms from nested structure
  350 + terms = []
  351 + if len(node) > 1 and isinstance(node[1], tuple):
  352 + terms = node[1]
  353 + else:
  354 + return {"match_all": {}}
  355 +
  356 + print(f"[MultiLangQueryBuilder] Building boolean query for operator: {operator}")
  357 + print(f"[MultiLangQueryBuilder] Terms: {terms}")
  358 +
  359 + if operator == 'TERM':
  360 + # Leaf node - handle field:query format
  361 + if isinstance(terms, str) and ':' in terms:
  362 + field, value = terms.split(':', 1)
  363 + return {
  364 + "term": {
  365 + field: value
  366 + }
  367 + }
  368 + else:
  369 + return {"match_all": {}}
  370 +
  371 + elif operator == 'OR':
  372 + # Any term must match
  373 + should_clauses = []
  374 + for term in terms:
  375 + should_clauses.append(self._build_boolean_query_from_tuple(term))
  376 + return {
  377 + "bool": {
  378 + "should": should_clauses,
  379 + "minimum_should_match": 1
  380 + }
  381 + }
  382 +
  383 + elif operator == 'AND':
  384 + # All terms must match
  385 + must_clauses = []
  386 + for term in terms:
  387 + must_clauses.append(self._build_boolean_query_from_tuple(term))
  388 + return {
  389 + "bool": {
  390 + "must": must_clauses
  391 + }
  392 + }
  393 +
  394 + elif operator == 'ANDNOT':
  395 + # First term must match, second must not
  396 + if len(terms) >= 2:
  397 + return {
  398 + "bool": {
  399 + "must": [self._build_boolean_query_from_tuple(terms[0])],
  400 + "must_not": [self._build_boolean_query_from_tuple(terms[1])]
  401 + }
  402 + }
  403 + else:
  404 + return self._build_boolean_query_from_tuple(terms[0])
  405 +
  406 + elif operator == 'RANK':
  407 + # Like OR but for ranking (all terms contribute to score)
  408 + should_clauses = []
  409 + for term in terms:
  410 + should_clauses.append(self._build_boolean_query_from_tuple(term))
  411 + return {
  412 + "bool": {
  413 + "should": should_clauses
  414 + }
  415 + }
  416 +
  417 + else:
  418 + # Unknown operator
  419 + return {"match_all": {}}
  420 +
293 def get_domain_summary(self) -> Dict[str, Any]: 421 def get_domain_summary(self) -> Dict[str, Any]:
294 """Get summary of all configured domains.""" 422 """Get summary of all configured domains."""
295 summary = {} 423 summary = {}
search/searcher.py
@@ -149,6 +149,7 @@ class Searcher: @@ -149,6 +149,7 @@ class Searcher:
149 es_query = self.query_builder.build_multilang_query( 149 es_query = self.query_builder.build_multilang_query(
150 parsed_query=parsed_query, 150 parsed_query=parsed_query,
151 query_vector=parsed_query.query_vector if enable_embedding else None, 151 query_vector=parsed_query.query_vector if enable_embedding else None,
  152 + query_node=query_node,
152 filters=filters, 153 filters=filters,
153 size=size, 154 size=size,
154 from_=from_, 155 from_=from_,
@@ -170,6 +171,9 @@ class Searcher: @@ -170,6 +171,9 @@ class Searcher:
170 if agg_fields: 171 if agg_fields:
171 es_query = self.query_builder.add_aggregations(es_query, agg_fields) 172 es_query = self.query_builder.add_aggregations(es_query, agg_fields)
172 173
  174 + # Extract size and from from body for ES client parameters
  175 + body_for_es = {k: v for k, v in es_query.items() if k not in ['size', 'from']}
  176 +
173 print(f"[Searcher] ES Query:") 177 print(f"[Searcher] ES Query:")
174 import json 178 import json
175 print(json.dumps(es_query, indent=2)) 179 print(json.dumps(es_query, indent=2))
@@ -178,7 +182,7 @@ class Searcher: @@ -178,7 +182,7 @@ class Searcher:
178 print(f"[Searcher] Executing ES query...") 182 print(f"[Searcher] Executing ES query...")
179 es_response = self.es_client.search( 183 es_response = self.es_client.search(
180 index_name=self.config.es_index_name, 184 index_name=self.config.es_index_name,
181 - body=es_query, 185 + body=body_for_es,
182 size=size, 186 size=size,
183 from_=from_ 187 from_=from_
184 ) 188 )