Commit f739c5e33fb7eefb26d9c5ce9c3b5a5594761f54
1 parent
bb3c5ef8
fix sch
Showing
2 changed files
with
134 additions
and
2 deletions
Show diff stats
search/multilang_query_builder.py
| ... | ... | @@ -84,6 +84,7 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 84 | 84 | self, |
| 85 | 85 | parsed_query: ParsedQuery, |
| 86 | 86 | query_vector: Optional[np.ndarray] = None, |
| 87 | + query_node: Optional[Any] = None, | |
| 87 | 88 | filters: Optional[Dict[str, Any]] = None, |
| 88 | 89 | size: int = 10, |
| 89 | 90 | from_: int = 0, |
| ... | ... | @@ -136,7 +137,18 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 136 | 137 | print(f"[MultiLangQueryBuilder] Available translations: {list(parsed_query.translations.keys())}") |
| 137 | 138 | |
| 138 | 139 | # Build query clause with multi-language support |
| 139 | - query_clause = self._build_multilang_text_query(parsed_query, domain_config) | |
| 140 | + if query_node and isinstance(query_node, tuple) and len(query_node) > 0: | |
| 141 | + # Handle boolean query from tuple (AST, score) | |
| 142 | + ast_node = query_node[0] | |
| 143 | + query_clause = self._build_boolean_query_from_tuple(ast_node) | |
| 144 | + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}") | |
| 145 | + elif query_node and hasattr(query_node, 'operator') and query_node.operator != 'TERM': | |
| 146 | + # Handle boolean query using base class method | |
| 147 | + query_clause = self._build_boolean_query(query_node) | |
| 148 | + print(f"[MultiLangQueryBuilder] Using boolean query: {query_clause}") | |
| 149 | + else: | |
| 150 | + # Handle text query with multi-language support | |
| 151 | + query_clause = self._build_multilang_text_query(parsed_query, domain_config) | |
| 140 | 152 | |
| 141 | 153 | es_query = { |
| 142 | 154 | "size": size, |
| ... | ... | @@ -290,6 +302,122 @@ class MultiLanguageQueryBuilder(ESQueryBuilder): |
| 290 | 302 | result.append(field_name) |
| 291 | 303 | return result |
| 292 | 304 | |
| 305 | + def _build_boolean_query_from_tuple(self, node) -> Dict[str, Any]: | |
| 306 | + """ | |
| 307 | + Build query from boolean expression tuple. | |
| 308 | + | |
| 309 | + Args: | |
| 310 | + node: Boolean expression tuple (operator, terms...) | |
| 311 | + | |
| 312 | + Returns: | |
| 313 | + ES query clause | |
| 314 | + """ | |
| 315 | + if not node: | |
| 316 | + return {"match_all": {}} | |
| 317 | + | |
| 318 | + # Handle different node types from boolean parser | |
| 319 | + if hasattr(node, 'operator'): | |
| 320 | + # QueryNode object | |
| 321 | + operator = node.operator | |
| 322 | + terms = node.terms | |
| 323 | + elif isinstance(node, tuple) and len(node) > 0: | |
| 324 | + # Tuple format from boolean parser | |
| 325 | + if hasattr(node[0], 'operator'): | |
| 326 | + # Nested tuple with QueryNode | |
| 327 | + operator = node[0].operator | |
| 328 | + terms = node[0].terms | |
| 329 | + elif isinstance(node[0], str): | |
| 330 | + # Simple tuple like ('TERM', 'field:value') | |
| 331 | + operator = node[0] | |
| 332 | + terms = node[1] if len(node) > 1 else '' | |
| 333 | + else: | |
| 334 | + # Complex tuple like (OR( TERM(...), TERM(...) ), score) | |
| 335 | + if hasattr(node[0], '__class__') and hasattr(node[0], '__name__'): | |
| 336 | + # Constructor call like OR(...) | |
| 337 | + operator = node[0].__name__ | |
| 338 | + elif str(node[0]).startswith('('): | |
| 339 | + # String representation of constructor call | |
| 340 | + import re | |
| 341 | + match = re.match(r'(\w+)\(', str(node[0])) | |
| 342 | + if match: | |
| 343 | + operator = match.group(1) | |
| 344 | + else: | |
| 345 | + return {"match_all": {}} | |
| 346 | + else: | |
| 347 | + operator = str(node[0]) | |
| 348 | + | |
| 349 | + # Extract terms from nested structure | |
| 350 | + terms = [] | |
| 351 | + if len(node) > 1 and isinstance(node[1], tuple): | |
| 352 | + terms = node[1] | |
| 353 | + else: | |
| 354 | + return {"match_all": {}} | |
| 355 | + | |
| 356 | + print(f"[MultiLangQueryBuilder] Building boolean query for operator: {operator}") | |
| 357 | + print(f"[MultiLangQueryBuilder] Terms: {terms}") | |
| 358 | + | |
| 359 | + if operator == 'TERM': | |
| 360 | + # Leaf node - handle field:query format | |
| 361 | + if isinstance(terms, str) and ':' in terms: | |
| 362 | + field, value = terms.split(':', 1) | |
| 363 | + return { | |
| 364 | + "term": { | |
| 365 | + field: value | |
| 366 | + } | |
| 367 | + } | |
| 368 | + else: | |
| 369 | + return {"match_all": {}} | |
| 370 | + | |
| 371 | + elif operator == 'OR': | |
| 372 | + # Any term must match | |
| 373 | + should_clauses = [] | |
| 374 | + for term in terms: | |
| 375 | + should_clauses.append(self._build_boolean_query_from_tuple(term)) | |
| 376 | + return { | |
| 377 | + "bool": { | |
| 378 | + "should": should_clauses, | |
| 379 | + "minimum_should_match": 1 | |
| 380 | + } | |
| 381 | + } | |
| 382 | + | |
| 383 | + elif operator == 'AND': | |
| 384 | + # All terms must match | |
| 385 | + must_clauses = [] | |
| 386 | + for term in terms: | |
| 387 | + must_clauses.append(self._build_boolean_query_from_tuple(term)) | |
| 388 | + return { | |
| 389 | + "bool": { | |
| 390 | + "must": must_clauses | |
| 391 | + } | |
| 392 | + } | |
| 393 | + | |
| 394 | + elif operator == 'ANDNOT': | |
| 395 | + # First term must match, second must not | |
| 396 | + if len(terms) >= 2: | |
| 397 | + return { | |
| 398 | + "bool": { | |
| 399 | + "must": [self._build_boolean_query_from_tuple(terms[0])], | |
| 400 | + "must_not": [self._build_boolean_query_from_tuple(terms[1])] | |
| 401 | + } | |
| 402 | + } | |
| 403 | + else: | |
| 404 | + return self._build_boolean_query_from_tuple(terms[0]) | |
| 405 | + | |
| 406 | + elif operator == 'RANK': | |
| 407 | + # Like OR but for ranking (all terms contribute to score) | |
| 408 | + should_clauses = [] | |
| 409 | + for term in terms: | |
| 410 | + should_clauses.append(self._build_boolean_query_from_tuple(term)) | |
| 411 | + return { | |
| 412 | + "bool": { | |
| 413 | + "should": should_clauses | |
| 414 | + } | |
| 415 | + } | |
| 416 | + | |
| 417 | + else: | |
| 418 | + # Unknown operator | |
| 419 | + return {"match_all": {}} | |
| 420 | + | |
| 293 | 421 | def get_domain_summary(self) -> Dict[str, Any]: |
| 294 | 422 | """Get summary of all configured domains.""" |
| 295 | 423 | summary = {} | ... | ... |
search/searcher.py
| ... | ... | @@ -149,6 +149,7 @@ class Searcher: |
| 149 | 149 | es_query = self.query_builder.build_multilang_query( |
| 150 | 150 | parsed_query=parsed_query, |
| 151 | 151 | query_vector=parsed_query.query_vector if enable_embedding else None, |
| 152 | + query_node=query_node, | |
| 152 | 153 | filters=filters, |
| 153 | 154 | size=size, |
| 154 | 155 | from_=from_, |
| ... | ... | @@ -170,6 +171,9 @@ class Searcher: |
| 170 | 171 | if agg_fields: |
| 171 | 172 | es_query = self.query_builder.add_aggregations(es_query, agg_fields) |
| 172 | 173 | |
| 174 | + # Extract size and from from body for ES client parameters | |
| 175 | + body_for_es = {k: v for k, v in es_query.items() if k not in ['size', 'from']} | |
| 176 | + | |
| 173 | 177 | print(f"[Searcher] ES Query:") |
| 174 | 178 | import json |
| 175 | 179 | print(json.dumps(es_query, indent=2)) |
| ... | ... | @@ -178,7 +182,7 @@ class Searcher: |
| 178 | 182 | print(f"[Searcher] Executing ES query...") |
| 179 | 183 | es_response = self.es_client.search( |
| 180 | 184 | index_name=self.config.es_index_name, |
| 181 | - body=es_query, | |
| 185 | + body=body_for_es, | |
| 182 | 186 | size=size, |
| 183 | 187 | from_=from_ |
| 184 | 188 | ) | ... | ... |