Commit db9c469ca8aa5b4741751cfb70bf891a61136cc5
1 parent
de98daa3
log optimize
Showing
1 changed file
with
66 additions
and
33 deletions
Show diff stats
query/query_parser.py
| ... | ... | @@ -12,6 +12,7 @@ from dataclasses import dataclass, field |
| 12 | 12 | from typing import Any, Callable, Dict, List, Optional, Tuple |
| 13 | 13 | import numpy as np |
| 14 | 14 | import logging |
| 15 | +import time | |
| 15 | 16 | from concurrent.futures import ThreadPoolExecutor, wait |
| 16 | 17 | |
| 17 | 18 | from embeddings.image_encoder import CLIPImageEncoder |
| ... | ... | @@ -34,6 +35,44 @@ logger = logging.getLogger(__name__) |
| 34 | 35 | import hanlp # type: ignore |
| 35 | 36 | |
| 36 | 37 | |
| 38 | +def _async_enrichment_result_summary( | |
| 39 | + task_type: str, lang: Optional[str], result: Any | |
| 40 | +) -> str: | |
| 41 | + """One-line description of a completed translation/embedding task for logging.""" | |
| 42 | + if task_type == "translation": | |
| 43 | + if result: | |
| 44 | + return f"lang={lang} translated={result!r}" | |
| 45 | + return f"lang={lang} empty_translation" | |
| 46 | + if task_type in ("embedding", "image_embedding"): | |
| 47 | + if result is not None: | |
| 48 | + return f"vector_shape={tuple(result.shape)}" | |
| 49 | + return "no_vector" if task_type == "embedding" else "no_image_vector" | |
| 50 | + return f"unexpected_task_type={task_type!r}" | |
| 51 | + | |
| 52 | + | |
| 53 | +def _async_enrichment_failure_warning(task_type: str, lang: Optional[str], err: BaseException) -> str: | |
| 54 | + """Warning text aligned with historical messages for context.add_warning.""" | |
| 55 | + msg = str(err) | |
| 56 | + if task_type == "translation": | |
| 57 | + return f"Translation failed | Language: {lang} | Error: {msg}" | |
| 58 | + if task_type == "image_embedding": | |
| 59 | + return f"CLIP text query vector generation failed | Error: {msg}" | |
| 60 | + return f"Query vector generation failed | Error: {msg}" | |
| 61 | + | |
| 62 | + | |
| 63 | +def _log_async_enrichment_finished( | |
| 64 | + log_info: Callable[[str], None], | |
| 65 | + *, | |
| 66 | + task_type: str, | |
| 67 | + summary: str, | |
| 68 | + elapsed_ms: float, | |
| 69 | +) -> None: | |
| 70 | + log_info( | |
| 71 | + f"Async enrichment task finished | task_type={task_type} | " | |
| 72 | + f"summary={summary} | elapsed_ms={elapsed_ms:.1f}" | |
| 73 | + ) | |
| 74 | + | |
| 75 | + | |
| 37 | 76 | def rerank_query_text( |
| 38 | 77 | original_query: str, |
| 39 | 78 | *, |
| ... | ... | @@ -328,6 +367,7 @@ class QueryParser: |
| 328 | 367 | # caller decides translation targets and later search-field planning. |
| 329 | 368 | translations: Dict[str, str] = {} |
| 330 | 369 | future_to_task: Dict[Any, Tuple[str, Optional[str]]] = {} |
| 370 | + future_submit_at: Dict[Any, float] = {} | |
| 331 | 371 | async_executor: Optional[ThreadPoolExecutor] = None |
| 332 | 372 | detected_norm = str(detected_lang or "").strip().lower() |
| 333 | 373 | normalized_targets = self._normalize_language_codes(target_languages) |
| ... | ... | @@ -378,6 +418,7 @@ class QueryParser: |
| 378 | 418 | model_name, |
| 379 | 419 | ) |
| 380 | 420 | future_to_task[future] = ("translation", lang) |
| 421 | + future_submit_at[future] = time.perf_counter() | |
| 381 | 422 | |
| 382 | 423 | if should_generate_embedding: |
| 383 | 424 | if self.text_encoder is None: |
| ... | ... | @@ -400,6 +441,7 @@ class QueryParser: |
| 400 | 441 | |
| 401 | 442 | future = async_executor.submit(_encode_query_vector) |
| 402 | 443 | future_to_task[future] = ("embedding", None) |
| 444 | + future_submit_at[future] = time.perf_counter() | |
| 403 | 445 | |
| 404 | 446 | if should_generate_image_embedding: |
| 405 | 447 | if self.image_encoder is None: |
| ... | ... | @@ -422,6 +464,7 @@ class QueryParser: |
| 422 | 464 | |
| 423 | 465 | future = async_executor.submit(_encode_image_query_vector) |
| 424 | 466 | future_to_task[future] = ("image_embedding", None) |
| 467 | + future_submit_at[future] = time.perf_counter() | |
| 425 | 468 | except Exception as e: |
| 426 | 469 | error_msg = f"Async query enrichment submission failed | Error: {str(e)}" |
| 427 | 470 | log_info(error_msg) |
| ... | ... | @@ -431,6 +474,7 @@ class QueryParser: |
| 431 | 474 | async_executor.shutdown(wait=False) |
| 432 | 475 | async_executor = None |
| 433 | 476 | future_to_task.clear() |
| 477 | + future_submit_at.clear() | |
| 434 | 478 | |
| 435 | 479 | # Wait for translation + embedding concurrently; shared budget depends on whether |
| 436 | 480 | # the detected language belongs to caller-provided target_languages. |
| ... | ... | @@ -459,56 +503,45 @@ class QueryParser: |
| 459 | 503 | done, not_done = wait(list(future_to_task.keys()), timeout=budget_sec) |
| 460 | 504 | for future in done: |
| 461 | 505 | task_type, lang = future_to_task[future] |
| 506 | + t0 = future_submit_at.pop(future, None) | |
| 507 | + elapsed_ms = (time.perf_counter() - t0) * 1000.0 if t0 is not None else 0.0 | |
| 462 | 508 | try: |
| 463 | 509 | result = future.result() |
| 464 | 510 | if task_type == "translation": |
| 465 | 511 | if result: |
| 466 | 512 | translations[lang] = result |
| 467 | - log_info( | |
| 468 | - f"Translation completed | Query text: '{query_text}' | " | |
| 469 | - f"Target language: {lang} | Translation result: '{result}'" | |
| 470 | - ) | |
| 471 | 513 | if context: |
| 472 | 514 | context.store_intermediate_result(f"translation_{lang}", result) |
| 473 | 515 | elif task_type == "embedding": |
| 474 | 516 | query_vector = result |
| 475 | - if query_vector is not None: | |
| 476 | - log_debug(f"Query vector generation completed | Shape: {query_vector.shape}") | |
| 477 | - if context: | |
| 478 | - context.store_intermediate_result("query_vector_shape", query_vector.shape) | |
| 479 | - else: | |
| 480 | - log_info( | |
| 481 | - "Query vector generation completed but result is None, will process without vector" | |
| 482 | - ) | |
| 517 | + if query_vector is not None and context: | |
| 518 | + context.store_intermediate_result("query_vector_shape", query_vector.shape) | |
| 483 | 519 | elif task_type == "image_embedding": |
| 484 | 520 | image_query_vector = result |
| 485 | - if image_query_vector is not None: | |
| 486 | - log_debug( | |
| 487 | - f"CLIP text query vector generation completed | Shape: {image_query_vector.shape}" | |
| 488 | - ) | |
| 489 | - if context: | |
| 490 | - context.store_intermediate_result( | |
| 491 | - "image_query_vector_shape", | |
| 492 | - image_query_vector.shape, | |
| 493 | - ) | |
| 494 | - else: | |
| 495 | - log_info( | |
| 496 | - "CLIP text query vector generation completed but result is None, " | |
| 497 | - "will process without image vector" | |
| 521 | + if image_query_vector is not None and context: | |
| 522 | + context.store_intermediate_result( | |
| 523 | + "image_query_vector_shape", | |
| 524 | + image_query_vector.shape, | |
| 498 | 525 | ) |
| 526 | + _log_async_enrichment_finished( | |
| 527 | + log_info, | |
| 528 | + task_type=task_type, | |
| 529 | + summary=_async_enrichment_result_summary(task_type, lang, result), | |
| 530 | + elapsed_ms=elapsed_ms, | |
| 531 | + ) | |
| 499 | 532 | except Exception as e: |
| 500 | - if task_type == "translation": | |
| 501 | - error_msg = f"Translation failed | Language: {lang} | Error: {str(e)}" | |
| 502 | - elif task_type == "image_embedding": | |
| 503 | - error_msg = f"CLIP text query vector generation failed | Error: {str(e)}" | |
| 504 | - else: | |
| 505 | - error_msg = f"Query vector generation failed | Error: {str(e)}" | |
| 506 | - log_info(error_msg) | |
| 533 | + _log_async_enrichment_finished( | |
| 534 | + log_info, | |
| 535 | + task_type=task_type, | |
| 536 | + summary=f"error={e!s}", | |
| 537 | + elapsed_ms=elapsed_ms, | |
| 538 | + ) | |
| 507 | 539 | if context: |
| 508 | - context.add_warning(error_msg) | |
| 540 | + context.add_warning(_async_enrichment_failure_warning(task_type, lang, e)) | |
| 509 | 541 | |
| 510 | 542 | if not_done: |
| 511 | 543 | for future in not_done: |
| 544 | + future_submit_at.pop(future, None) | |
| 512 | 545 | task_type, lang = future_to_task[future] |
| 513 | 546 | if task_type == "translation": |
| 514 | 547 | timeout_msg = ( | ... | ... |