Commit f739c5e33fb7eefb26d9c5ce9c3b5a5594761f54

Authored by tangwang
1 parent bb3c5ef8

fix sch

Showing 2 changed files with 134 additions and 2 deletions   Show diff stats
search/multilang_query_builder.py
... ... @@ -84,6 +84,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
84 84 self,
85 85 parsed_query: ParsedQuery,
86 86 query_vector: Optional[np.ndarray] = None,
  87 + query_node: Optional[Any] = None,
87 88 filters: Optional[Dict[str, Any]] = None,
88 89 size: int = 10,
89 90 from_: int = 0,
... ... @@ -136,7 +137,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
136 137 print(f"[MultiLangQueryBuilder] Available translations: {list(parsed_query.translations.keys())}")
137 138  
138 139 # Build query clause with multi-language support
139   - query_clause = self._build_multilang_text_query(parsed_query, domain_config)
  140 + if query_node and isinstance(query_node, tuple) and len(query_node) > 0:
  141 + # Handle boolean query from tuple (AST, score)
  142 + ast_node = query_node[0]
  143 + query_clause = self._build_boolean_query_from_tuple(ast_node)
  144 + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}")
  145 + elif query_node and hasattr(query_node, 'operator') and query_node.operator != 'TERM':
  146 + # Handle boolean query using base class method
  147 + query_clause = self._build_boolean_query(query_node)
  148 + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}")
  149 + else:
  150 + # Handle text query with multi-language support
  151 + query_clause = self._build_multilang_text_query(parsed_query, domain_config)
140 152  
141 153 es_query = {
142 154 "size": size,
... ... @@ -290,6 +302,122 @@ class MultiLanguageQueryBuilder(ESQueryBuilder):
290 302 result.append(field_name)
291 303 return result
292 304  
  305 + def _build_boolean_query_from_tuple(self, node) -> Dict[str, Any]:
  306 + """
  307 + Build query from boolean expression tuple.
  308 +
  309 + Args:
  310 + node: Boolean expression tuple (operator, terms...)
  311 +
  312 + Returns:
  313 + ES query clause
  314 + """
  315 + if not node:
  316 + return {"match_all": {}}
  317 +
  318 + # Handle different node types from boolean parser
  319 + if hasattr(node, 'operator'):
  320 + # QueryNode object
  321 + operator = node.operator
  322 + terms = node.terms
  323 + elif isinstance(node, tuple) and len(node) > 0:
  324 + # Tuple format from boolean parser
  325 + if hasattr(node[0], 'operator'):
  326 + # Nested tuple with QueryNode
  327 + operator = node[0].operator
  328 + terms = node[0].terms
  329 + elif isinstance(node[0], str):
  330 + # Simple tuple like ('TERM', 'field:value')
  331 + operator = node[0]
  332 + terms = node[1] if len(node) > 1 else ''
  333 + else:
  334 + # Complex tuple like (OR( TERM(...), TERM(...) ), score)
  335 + if hasattr(node[0], '__class__') and hasattr(node[0], '__name__'):
  336 + # Constructor call like OR(...)
  337 + operator = node[0].__name__
  338 + elif str(node[0]).startswith('('):
  339 + # String representation of constructor call
  340 + import re
  341 + match = re.match(r'(\w+)\(', str(node[0]))
  342 + if match:
  343 + operator = match.group(1)
  344 + else:
  345 + return {"match_all": {}}
  346 + else:
  347 + operator = str(node[0])
  348 +
  349 + # Extract terms from nested structure
  350 + terms = []
  351 + if len(node) > 1 and isinstance(node[1], tuple):
  352 + terms = node[1]
  353 + else:
  354 + return {"match_all": {}}
  355 +
  356 + print(f"[MultiLangQueryBuilder] Building boolean query for operator: {operator}")
  357 + print(f"[MultiLangQueryBuilder] Terms: {terms}")
  358 +
  359 + if operator == 'TERM':
  360 + # Leaf node - handle field:query format
  361 + if isinstance(terms, str) and ':' in terms:
  362 + field, value = terms.split(':', 1)
  363 + return {
  364 + "term": {
  365 + field: value
  366 + }
  367 + }
  368 + else:
  369 + return {"match_all": {}}
  370 +
  371 + elif operator == 'OR':
  372 + # Any term must match
  373 + should_clauses = []
  374 + for term in terms:
  375 + should_clauses.append(self._build_boolean_query_from_tuple(term))
  376 + return {
  377 + "bool": {
  378 + "should": should_clauses,
  379 + "minimum_should_match": 1
  380 + }
  381 + }
  382 +
  383 + elif operator == 'AND':
  384 + # All terms must match
  385 + must_clauses = []
  386 + for term in terms:
  387 + must_clauses.append(self._build_boolean_query_from_tuple(term))
  388 + return {
  389 + "bool": {
  390 + "must": must_clauses
  391 + }
  392 + }
  393 +
  394 + elif operator == 'ANDNOT':
  395 + # First term must match, second must not
  396 + if len(terms) >= 2:
  397 + return {
  398 + "bool": {
  399 + "must": [self._build_boolean_query_from_tuple(terms[0])],
  400 + "must_not": [self._build_boolean_query_from_tuple(terms[1])]
  401 + }
  402 + }
  403 + else:
  404 + return self._build_boolean_query_from_tuple(terms[0])
  405 +
  406 + elif operator == 'RANK':
  407 + # Like OR but for ranking (all terms contribute to score)
  408 + should_clauses = []
  409 + for term in terms:
  410 + should_clauses.append(self._build_boolean_query_from_tuple(term))
  411 + return {
  412 + "bool": {
  413 + "should": should_clauses
  414 + }
  415 + }
  416 +
  417 + else:
  418 + # Unknown operator
  419 + return {"match_all": {}}
  420 +
293 421 def get_domain_summary(self) -> Dict[str, Any]:
294 422 """Get summary of all configured domains."""
295 423 summary = {}
... ...
search/searcher.py
... ... @@ -149,6 +149,7 @@ class Searcher:
149 149 es_query = self.query_builder.build_multilang_query(
150 150 parsed_query=parsed_query,
151 151 query_vector=parsed_query.query_vector if enable_embedding else None,
  152 + query_node=query_node,
152 153 filters=filters,
153 154 size=size,
154 155 from_=from_,
... ... @@ -170,6 +171,9 @@ class Searcher:
170 171 if agg_fields:
171 172 es_query = self.query_builder.add_aggregations(es_query, agg_fields)
172 173  
  174 + # Extract size and from from body for ES client parameters
  175 + body_for_es = {k: v for k, v in es_query.items() if k not in ['size', 'from']}
  176 +
173 177 print(f"[Searcher] ES Query:")
174 178 import json
175 179 print(json.dumps(es_query, indent=2))
... ... @@ -178,7 +182,7 @@ class Searcher:
178 182 print(f"[Searcher] Executing ES query...")
179 183 es_response = self.es_client.search(
180 184 index_name=self.config.es_index_name,
181   - body=es_query,
  185 + body=body_for_es,
182 186 size=size,
183 187 from_=from_
184 188 )
... ...