Commit db9c469ca8aa5b4741751cfb70bf891a61136cc5

Authored by tangwang
1 parent de98daa3

log optimize

Showing 1 changed file with 66 additions and 33 deletions   Show diff stats
query/query_parser.py
... ... @@ -12,6 +12,7 @@ from dataclasses import dataclass, field
12 12 from typing import Any, Callable, Dict, List, Optional, Tuple
13 13 import numpy as np
14 14 import logging
  15 +import time
15 16 from concurrent.futures import ThreadPoolExecutor, wait
16 17  
17 18 from embeddings.image_encoder import CLIPImageEncoder
... ... @@ -34,6 +35,44 @@ logger = logging.getLogger(__name__)
34 35 import hanlp # type: ignore
35 36  
36 37  
  38 +def _async_enrichment_result_summary(
  39 + task_type: str, lang: Optional[str], result: Any
  40 +) -> str:
  41 + """One-line description of a completed translation/embedding task for logging."""
  42 + if task_type == "translation":
  43 + if result:
  44 + return f"lang={lang} translated={result!r}"
  45 + return f"lang={lang} empty_translation"
  46 + if task_type in ("embedding", "image_embedding"):
  47 + if result is not None:
  48 + return f"vector_shape={tuple(result.shape)}"
  49 + return "no_vector" if task_type == "embedding" else "no_image_vector"
  50 + return f"unexpected_task_type={task_type!r}"
  51 +
  52 +
  53 +def _async_enrichment_failure_warning(task_type: str, lang: Optional[str], err: BaseException) -> str:
  54 + """Warning text aligned with historical messages for context.add_warning."""
  55 + msg = str(err)
  56 + if task_type == "translation":
  57 + return f"Translation failed | Language: {lang} | Error: {msg}"
  58 + if task_type == "image_embedding":
  59 + return f"CLIP text query vector generation failed | Error: {msg}"
  60 + return f"Query vector generation failed | Error: {msg}"
  61 +
  62 +
  63 +def _log_async_enrichment_finished(
  64 + log_info: Callable[[str], None],
  65 + *,
  66 + task_type: str,
  67 + summary: str,
  68 + elapsed_ms: float,
  69 +) -> None:
  70 + log_info(
  71 + f"Async enrichment task finished | task_type={task_type} | "
  72 + f"summary={summary} | elapsed_ms={elapsed_ms:.1f}"
  73 + )
  74 +
  75 +
37 76 def rerank_query_text(
38 77 original_query: str,
39 78 *,
... ... @@ -328,6 +367,7 @@ class QueryParser:
328 367 # caller decides translation targets and later search-field planning.
329 368 translations: Dict[str, str] = {}
330 369 future_to_task: Dict[Any, Tuple[str, Optional[str]]] = {}
  370 + future_submit_at: Dict[Any, float] = {}
331 371 async_executor: Optional[ThreadPoolExecutor] = None
332 372 detected_norm = str(detected_lang or "").strip().lower()
333 373 normalized_targets = self._normalize_language_codes(target_languages)
... ... @@ -378,6 +418,7 @@ class QueryParser:
378 418 model_name,
379 419 )
380 420 future_to_task[future] = ("translation", lang)
  421 + future_submit_at[future] = time.perf_counter()
381 422  
382 423 if should_generate_embedding:
383 424 if self.text_encoder is None:
... ... @@ -400,6 +441,7 @@ class QueryParser:
400 441  
401 442 future = async_executor.submit(_encode_query_vector)
402 443 future_to_task[future] = ("embedding", None)
  444 + future_submit_at[future] = time.perf_counter()
403 445  
404 446 if should_generate_image_embedding:
405 447 if self.image_encoder is None:
... ... @@ -422,6 +464,7 @@ class QueryParser:
422 464  
423 465 future = async_executor.submit(_encode_image_query_vector)
424 466 future_to_task[future] = ("image_embedding", None)
  467 + future_submit_at[future] = time.perf_counter()
425 468 except Exception as e:
426 469 error_msg = f"Async query enrichment submission failed | Error: {str(e)}"
427 470 log_info(error_msg)
... ... @@ -431,6 +474,7 @@ class QueryParser:
431 474 async_executor.shutdown(wait=False)
432 475 async_executor = None
433 476 future_to_task.clear()
  477 + future_submit_at.clear()
434 478  
435 479 # Wait for translation + embedding concurrently; shared budget depends on whether
436 480 # the detected language belongs to caller-provided target_languages.
... ... @@ -459,56 +503,45 @@ class QueryParser:
459 503 done, not_done = wait(list(future_to_task.keys()), timeout=budget_sec)
460 504 for future in done:
461 505 task_type, lang = future_to_task[future]
  506 + t0 = future_submit_at.pop(future, None)
  507 + elapsed_ms = (time.perf_counter() - t0) * 1000.0 if t0 is not None else 0.0
462 508 try:
463 509 result = future.result()
464 510 if task_type == "translation":
465 511 if result:
466 512 translations[lang] = result
467   - log_info(
468   - f"Translation completed | Query text: '{query_text}' | "
469   - f"Target language: {lang} | Translation result: '{result}'"
470   - )
471 513 if context:
472 514 context.store_intermediate_result(f"translation_{lang}", result)
473 515 elif task_type == "embedding":
474 516 query_vector = result
475   - if query_vector is not None:
476   - log_debug(f"Query vector generation completed | Shape: {query_vector.shape}")
477   - if context:
478   - context.store_intermediate_result("query_vector_shape", query_vector.shape)
479   - else:
480   - log_info(
481   - "Query vector generation completed but result is None, will process without vector"
482   - )
  517 + if query_vector is not None and context:
  518 + context.store_intermediate_result("query_vector_shape", query_vector.shape)
483 519 elif task_type == "image_embedding":
484 520 image_query_vector = result
485   - if image_query_vector is not None:
486   - log_debug(
487   - f"CLIP text query vector generation completed | Shape: {image_query_vector.shape}"
488   - )
489   - if context:
490   - context.store_intermediate_result(
491   - "image_query_vector_shape",
492   - image_query_vector.shape,
493   - )
494   - else:
495   - log_info(
496   - "CLIP text query vector generation completed but result is None, "
497   - "will process without image vector"
  521 + if image_query_vector is not None and context:
  522 + context.store_intermediate_result(
  523 + "image_query_vector_shape",
  524 + image_query_vector.shape,
498 525 )
  526 + _log_async_enrichment_finished(
  527 + log_info,
  528 + task_type=task_type,
  529 + summary=_async_enrichment_result_summary(task_type, lang, result),
  530 + elapsed_ms=elapsed_ms,
  531 + )
499 532 except Exception as e:
500   - if task_type == "translation":
501   - error_msg = f"Translation failed | Language: {lang} | Error: {str(e)}"
502   - elif task_type == "image_embedding":
503   - error_msg = f"CLIP text query vector generation failed | Error: {str(e)}"
504   - else:
505   - error_msg = f"Query vector generation failed | Error: {str(e)}"
506   - log_info(error_msg)
  533 + _log_async_enrichment_finished(
  534 + log_info,
  535 + task_type=task_type,
  536 + summary=f"error={e!s}",
  537 + elapsed_ms=elapsed_ms,
  538 + )
507 539 if context:
508   - context.add_warning(error_msg)
  540 + context.add_warning(_async_enrichment_failure_warning(task_type, lang, e))
509 541  
510 542 if not_done:
511 543 for future in not_done:
  544 + future_submit_at.pop(future, None)
512 545 task_type, lang = future_to_task[future]
513 546 if task_type == "translation":
514 547 timeout_msg = (
... ...