Commit 28e57bb16712929459a9d1eb43c2800a94ef2d8b
1 parent
af7ee060
日志体系优化
Showing
13 changed files
with
337 additions
and
78 deletions
Show diff stats
README.md
| @@ -29,7 +29,7 @@ source activate.sh | @@ -29,7 +29,7 @@ source activate.sh | ||
| 29 | ./run.sh | 29 | ./run.sh |
| 30 | 30 | ||
| 31 | # 可选:附加能力服务(按需开启) | 31 | # 可选:附加能力服务(按需开启) |
| 32 | -./scripts/service_ctl.sh start embedding translator reranker tei cnclip | 32 | +./scripts/service_ctl.sh start tei cnclip embedding translator reranker |
| 33 | 33 | ||
| 34 | # 查看状态 | 34 | # 查看状态 |
| 35 | ./scripts/service_ctl.sh status | 35 | ./scripts/service_ctl.sh status |
api/app.py
| @@ -11,6 +11,7 @@ import logging | @@ -11,6 +11,7 @@ import logging | ||
| 11 | import time | 11 | import time |
| 12 | import argparse | 12 | import argparse |
| 13 | import uvicorn | 13 | import uvicorn |
| 14 | +from logging.handlers import TimedRotatingFileHandler | ||
| 14 | from collections import defaultdict, deque | 15 | from collections import defaultdict, deque |
| 15 | from typing import Optional | 16 | from typing import Optional |
| 16 | from fastapi import FastAPI, Request, HTTPException | 17 | from fastapi import FastAPI, Request, HTTPException |
| @@ -23,18 +24,60 @@ from slowapi import Limiter, _rate_limit_exceeded_handler | @@ -23,18 +24,60 @@ from slowapi import Limiter, _rate_limit_exceeded_handler | ||
| 23 | from slowapi.util import get_remote_address | 24 | from slowapi.util import get_remote_address |
| 24 | from slowapi.errors import RateLimitExceeded | 25 | from slowapi.errors import RateLimitExceeded |
| 25 | 26 | ||
| 26 | -# Configure logging with better formatting | 27 | +# Configure backend logging |
| 27 | import pathlib | 28 | import pathlib |
| 28 | -log_dir = pathlib.Path('logs') | ||
| 29 | -log_dir.mkdir(exist_ok=True) | ||
| 30 | -logging.basicConfig( | ||
| 31 | - level=logging.INFO, | ||
| 32 | - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||
| 33 | - handlers=[ | ||
| 34 | - logging.StreamHandler(), | ||
| 35 | - logging.FileHandler(log_dir / 'api.log', mode='a', encoding='utf-8') | ||
| 36 | - ] | ||
| 37 | -) | 29 | + |
| 30 | + | ||
| 31 | +def configure_backend_logging() -> None: | ||
| 32 | + log_dir = pathlib.Path("logs") | ||
| 33 | + log_dir.mkdir(exist_ok=True) | ||
| 34 | + log_level = os.getenv("LOG_LEVEL", "INFO").upper() | ||
| 35 | + numeric_level = getattr(logging, log_level, logging.INFO) | ||
| 36 | + | ||
| 37 | + default_formatter = logging.Formatter( | ||
| 38 | + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | ||
| 39 | + ) | ||
| 40 | + | ||
| 41 | + root_logger = logging.getLogger() | ||
| 42 | + root_logger.setLevel(numeric_level) | ||
| 43 | + root_logger.handlers.clear() | ||
| 44 | + | ||
| 45 | + console_handler = logging.StreamHandler() | ||
| 46 | + console_handler.setLevel(numeric_level) | ||
| 47 | + console_handler.setFormatter(default_formatter) | ||
| 48 | + root_logger.addHandler(console_handler) | ||
| 49 | + | ||
| 50 | + backend_handler = TimedRotatingFileHandler( | ||
| 51 | + filename=log_dir / "api.log", | ||
| 52 | + when="midnight", | ||
| 53 | + interval=1, | ||
| 54 | + backupCount=30, | ||
| 55 | + encoding="utf-8", | ||
| 56 | + ) | ||
| 57 | + backend_handler.setLevel(numeric_level) | ||
| 58 | + backend_handler.setFormatter(default_formatter) | ||
| 59 | + root_logger.addHandler(backend_handler) | ||
| 60 | + | ||
| 61 | + verbose_logger = logging.getLogger("backend.verbose") | ||
| 62 | + verbose_logger.setLevel(numeric_level) | ||
| 63 | + verbose_logger.handlers.clear() | ||
| 64 | + verbose_logger.propagate = False | ||
| 65 | + | ||
| 66 | + verbose_handler = TimedRotatingFileHandler( | ||
| 67 | + filename=log_dir / "backend_verbose.log", | ||
| 68 | + when="midnight", | ||
| 69 | + interval=1, | ||
| 70 | + backupCount=30, | ||
| 71 | + encoding="utf-8", | ||
| 72 | + ) | ||
| 73 | + verbose_handler.setLevel(numeric_level) | ||
| 74 | + verbose_handler.setFormatter( | ||
| 75 | + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | ||
| 76 | + ) | ||
| 77 | + verbose_logger.addHandler(verbose_handler) | ||
| 78 | + | ||
| 79 | + | ||
| 80 | +configure_backend_logging() | ||
| 38 | logger = logging.getLogger(__name__) | 81 | logger = logging.getLogger(__name__) |
| 39 | 82 | ||
| 40 | # Initialize rate limiter | 83 | # Initialize rate limiter |
api/routes/search.py
| @@ -5,6 +5,9 @@ Search API routes. | @@ -5,6 +5,9 @@ Search API routes. | ||
| 5 | from fastapi import APIRouter, HTTPException, Query, Request | 5 | from fastapi import APIRouter, HTTPException, Query, Request |
| 6 | from typing import Optional | 6 | from typing import Optional |
| 7 | import uuid | 7 | import uuid |
| 8 | +import hashlib | ||
| 9 | +import json | ||
| 10 | +import logging | ||
| 8 | 11 | ||
| 9 | from ..models import ( | 12 | from ..models import ( |
| 10 | SearchRequest, | 13 | SearchRequest, |
| @@ -17,6 +20,15 @@ from ..models import ( | @@ -17,6 +20,15 @@ from ..models import ( | ||
| 17 | from context.request_context import create_request_context, set_current_request_context, clear_current_request_context | 20 | from context.request_context import create_request_context, set_current_request_context, clear_current_request_context |
| 18 | 21 | ||
| 19 | router = APIRouter(prefix="/search", tags=["search"]) | 22 | router = APIRouter(prefix="/search", tags=["search"]) |
| 23 | +backend_verbose_logger = logging.getLogger("backend.verbose") | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +def _log_backend_verbose(payload: dict) -> None: | ||
| 27 | + if not backend_verbose_logger.handlers: | ||
| 28 | + return | ||
| 29 | + backend_verbose_logger.info( | ||
| 30 | + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) | ||
| 31 | + ) | ||
| 20 | 32 | ||
| 21 | 33 | ||
| 22 | def extract_request_info(request: Request) -> tuple[str, str]: | 34 | def extract_request_info(request: Request) -> tuple[str, str]: |
| @@ -136,15 +148,32 @@ async def search(request: SearchRequest, http_request: Request): | @@ -136,15 +148,32 @@ async def search(request: SearchRequest, http_request: Request): | ||
| 136 | debug_info=result.debug_info | 148 | debug_info=result.debug_info |
| 137 | ) | 149 | ) |
| 138 | 150 | ||
| 139 | - # Log complete response JSON | 151 | + response_payload = response.model_dump(mode="json") |
| 152 | + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) | ||
| 153 | + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] | ||
| 154 | + max_score = float(response.max_score or 0.0) | ||
| 155 | + | ||
| 140 | context.logger.info( | 156 | context.logger.info( |
| 141 | - "Search response | " | ||
| 142 | - f"Total results: {response.total} | " | ||
| 143 | - f"Max score: {response.max_score:.4f} | " | ||
| 144 | - f"Time: {response.took_ms}ms | " | ||
| 145 | - f"Response: {response.model_dump_json()}", | 157 | + "Search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", |
| 158 | + response.total, | ||
| 159 | + max_score, | ||
| 160 | + response.took_ms, | ||
| 161 | + len(response_json), | ||
| 162 | + response_digest, | ||
| 146 | extra={'reqid': context.reqid, 'uid': context.uid} | 163 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 147 | ) | 164 | ) |
| 165 | + _log_backend_verbose({ | ||
| 166 | + "event": "search_response", | ||
| 167 | + "reqid": context.reqid, | ||
| 168 | + "uid": context.uid, | ||
| 169 | + "tenant_id": tenant_id, | ||
| 170 | + "total_results": response.total, | ||
| 171 | + "max_score": max_score, | ||
| 172 | + "took_ms": response.took_ms, | ||
| 173 | + "payload_size_chars": len(response_json), | ||
| 174 | + "sha256_16": response_digest, | ||
| 175 | + "response": response_payload, | ||
| 176 | + }) | ||
| 148 | 177 | ||
| 149 | return response | 178 | return response |
| 150 | 179 | ||
| @@ -233,15 +262,32 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | @@ -233,15 +262,32 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): | ||
| 233 | performance_info=performance_summary | 262 | performance_info=performance_summary |
| 234 | ) | 263 | ) |
| 235 | 264 | ||
| 236 | - # Log complete response JSON | 265 | + response_payload = response.model_dump(mode="json") |
| 266 | + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) | ||
| 267 | + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] | ||
| 268 | + max_score = float(response.max_score or 0.0) | ||
| 269 | + | ||
| 237 | context.logger.info( | 270 | context.logger.info( |
| 238 | - "Image search response | " | ||
| 239 | - f"Total results: {response.total} | " | ||
| 240 | - f"Max score: {response.max_score:.4f} | " | ||
| 241 | - f"Time: {response.took_ms}ms | " | ||
| 242 | - f"Response: {response.model_dump_json()}", | 271 | + "Image search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", |
| 272 | + response.total, | ||
| 273 | + max_score, | ||
| 274 | + response.took_ms, | ||
| 275 | + len(response_json), | ||
| 276 | + response_digest, | ||
| 243 | extra={'reqid': context.reqid, 'uid': context.uid} | 277 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 244 | ) | 278 | ) |
| 279 | + _log_backend_verbose({ | ||
| 280 | + "event": "image_search_response", | ||
| 281 | + "reqid": context.reqid, | ||
| 282 | + "uid": context.uid, | ||
| 283 | + "tenant_id": tenant_id, | ||
| 284 | + "total_results": response.total, | ||
| 285 | + "max_score": max_score, | ||
| 286 | + "took_ms": response.took_ms, | ||
| 287 | + "payload_size_chars": len(response_json), | ||
| 288 | + "sha256_16": response_digest, | ||
| 289 | + "response": response_payload, | ||
| 290 | + }) | ||
| 245 | 291 | ||
| 246 | return response | 292 | return response |
| 247 | 293 |
docs/QUICKSTART.md
| @@ -510,16 +510,11 @@ curl http://localhost:6007/health | @@ -510,16 +510,11 @@ curl http://localhost:6007/health | ||
| 510 | 510 | ||
| 511 | ### 5.2 常看日志 | 511 | ### 5.2 常看日志 |
| 512 | 512 | ||
| 513 | -- `logs/backend.log` | ||
| 514 | -- `logs/indexer.log` | ||
| 515 | -- `logs/frontend.log` | ||
| 516 | -- `logs/embedding.log` | ||
| 517 | -- `logs/translator.log` | ||
| 518 | -- `logs/reranker.log` | ||
| 519 | -- `logs/tei.log` | ||
| 520 | -- `logs/cnclip.log` | ||
| 521 | -- `logs/search_engine.log` | ||
| 522 | -- `logs/errors.log` | 513 | +- `logs/<service>-YYYY-MM-DD.log`(`service_ctl.sh` 按天写入的真实文件) |
| 514 | +- `logs/<service>.log`(指向当天文件的软链,推荐 `tail -F`) | ||
| 515 | +- `logs/api.log`(backend 进程内日志,按天轮转) | ||
| 516 | +- `logs/backend_verbose.log`(backend 大对象详细日志,按天轮转) | ||
| 517 | +- `logs/indexer.log`(索引结构化 JSON 日志,按天轮转) | ||
| 523 | 518 | ||
| 524 | ### 5.3 常用排障命令 | 519 | ### 5.3 常用排障命令 |
| 525 | 520 |
docs/TEI_SERVICE说明文档.md
| @@ -152,7 +152,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ | @@ -152,7 +152,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ | ||
| 152 | 启动全套(含 TEI): | 152 | 启动全套(含 TEI): |
| 153 | 153 | ||
| 154 | ```bash | 154 | ```bash |
| 155 | -TEI_DEVICE=cuda ./scripts/service_ctl.sh start embedding translator reranker tei cnclip | 155 | +TEI_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker |
| 156 | ``` | 156 | ``` |
| 157 | 157 | ||
| 158 | 仅启动 TEI: | 158 | 仅启动 TEI: |
docs/Usage-Guide.md
| @@ -368,31 +368,23 @@ RERANKER_PORT=6007 | @@ -368,31 +368,23 @@ RERANKER_PORT=6007 | ||
| 368 | 368 | ||
| 369 | 日志文件存储在 `logs/` 目录下: | 369 | 日志文件存储在 `logs/` 目录下: |
| 370 | 370 | ||
| 371 | -- `logs/backend.log` - 后端服务日志 | ||
| 372 | -- `logs/indexer.log` - 索引服务日志 | ||
| 373 | -- `logs/frontend.log` - 前端服务日志 | ||
| 374 | -- `logs/embedding.log` - 向量服务日志(可选) | ||
| 375 | -- `logs/translator.log` - 翻译服务日志(可选) | ||
| 376 | -- `logs/reranker.log` - 重排服务日志(可选) | ||
| 377 | -- `logs/tei.log` - TEI 启停日志(可选) | ||
| 378 | -- `logs/cnclip.log` - CN-CLIP 启停日志(可选) | ||
| 379 | -- `logs/search_engine.log` - 应用主日志(按天轮转) | ||
| 380 | -- `logs/errors.log` - 错误日志(按天轮转) | 371 | +- `logs/<service>-YYYY-MM-DD.log` - `service_ctl.sh` 统一管理的按天日志文件(真实文件) |
| 372 | +- `logs/<service>.log` - 指向当天日志文件的软链(兼容原有命令) | ||
| 373 | +- `logs/api.log` - backend 进程内日志(按天轮转) | ||
| 374 | +- `logs/backend_verbose.log` - backend 详细大对象日志(按天轮转,含 `Search response` / `ES query built` 完整内容) | ||
| 375 | +- `logs/indexer.log` - 索引结构化日志(JSON,按天轮转) | ||
| 381 | 376 | ||
| 382 | ### 查看实时日志 | 377 | ### 查看实时日志 |
| 383 | 378 | ||
| 384 | ```bash | 379 | ```bash |
| 385 | -# 查看后端日志 | ||
| 386 | -tail -f logs/backend.log | 380 | +# 查看后端日志(建议 -F,跨天自动跟随新文件) |
| 381 | +tail -F logs/backend.log | ||
| 387 | 382 | ||
| 388 | # 查看前端日志 | 383 | # 查看前端日志 |
| 389 | -tail -f logs/frontend.log | 384 | +tail -F logs/frontend.log |
| 390 | 385 | ||
| 391 | -# 查看应用主日志 | ||
| 392 | -tail -f logs/search_engine.log | ||
| 393 | - | ||
| 394 | -# 查看错误日志 | ||
| 395 | -tail -f logs/errors.log | 386 | +# 查看 backend 详细大对象日志 |
| 387 | +tail -F logs/backend_verbose.log | ||
| 396 | ``` | 388 | ``` |
| 397 | 389 | ||
| 398 | ### 日志级别 | 390 | ### 日志级别 |
| @@ -406,7 +398,7 @@ LOG_LEVEL=DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL | @@ -406,7 +398,7 @@ LOG_LEVEL=DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL | ||
| 406 | 398 | ||
| 407 | ### 日志轮转 | 399 | ### 日志轮转 |
| 408 | 400 | ||
| 409 | -日志文件按天自动轮转,保留30天的历史日志。 | 401 | +日志文件按天自动轮转,默认保留 30 天历史日志(可通过 `LOG_RETENTION_DAYS` 调整 `service_ctl.sh` 管理日志的保留天数)。 |
| 410 | 402 | ||
| 411 | --- | 403 | --- |
| 412 | 404 |
embeddings/server.py
| @@ -56,6 +56,29 @@ _TEXT_MICROBATCH_WINDOW_SEC = max( | @@ -56,6 +56,29 @@ _TEXT_MICROBATCH_WINDOW_SEC = max( | ||
| 56 | _TEXT_REQUEST_TIMEOUT_SEC = max( | 56 | _TEXT_REQUEST_TIMEOUT_SEC = max( |
| 57 | 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) | 57 | 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) |
| 58 | ) | 58 | ) |
| 59 | +_LOG_PREVIEW_COUNT = max(1, int(os.getenv("EMBEDDING_LOG_PREVIEW_COUNT", "3"))) | ||
| 60 | +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_TEXT_PREVIEW_CHARS", "120"))) | ||
| 61 | +_LOG_IMAGE_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_IMAGE_PREVIEW_CHARS", "180"))) | ||
| 62 | + | ||
| 63 | + | ||
| 64 | +def _compact_preview(text: str, max_chars: int) -> str: | ||
| 65 | + compact = " ".join((text or "").split()) | ||
| 66 | + if len(compact) <= max_chars: | ||
| 67 | + return compact | ||
| 68 | + return compact[:max_chars] + "..." | ||
| 69 | + | ||
| 70 | + | ||
| 71 | +def _preview_inputs(items: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: | ||
| 72 | + previews: List[Dict[str, Any]] = [] | ||
| 73 | + for idx, item in enumerate(items[:max_items]): | ||
| 74 | + previews.append( | ||
| 75 | + { | ||
| 76 | + "idx": idx, | ||
| 77 | + "len": len(item), | ||
| 78 | + "preview": _compact_preview(item, max_chars), | ||
| 79 | + } | ||
| 80 | + ) | ||
| 81 | + return previews | ||
| 59 | 82 | ||
| 60 | 83 | ||
| 61 | def _encode_local_st(texts: List[str], normalize_embeddings: bool) -> Any: | 84 | def _encode_local_st(texts: List[str], normalize_embeddings: bool) -> Any: |
| @@ -292,6 +315,14 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | @@ -292,6 +315,14 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | ||
| 292 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") | 315 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") |
| 293 | normalized.append(s) | 316 | normalized.append(s) |
| 294 | 317 | ||
| 318 | + logger.info( | ||
| 319 | + "embed_text request | backend=%s inputs=%d normalize=%s preview=%s", | ||
| 320 | + _text_backend_name, | ||
| 321 | + len(normalized), | ||
| 322 | + effective_normalize, | ||
| 323 | + _preview_inputs(normalized, _LOG_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), | ||
| 324 | + ) | ||
| 325 | + | ||
| 295 | t0 = time.perf_counter() | 326 | t0 = time.perf_counter() |
| 296 | try: | 327 | try: |
| 297 | # local_st backend uses in-process torch model, keep serialized encode for safety; | 328 | # local_st backend uses in-process torch model, keep serialized encode for safety; |
| @@ -301,10 +332,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | @@ -301,10 +332,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | ||
| 301 | out = [_encode_single_text_with_microbatch(normalized[0], normalize=effective_normalize)] | 332 | out = [_encode_single_text_with_microbatch(normalized[0], normalize=effective_normalize)] |
| 302 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 | 333 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 |
| 303 | logger.info( | 334 | logger.info( |
| 304 | - "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s elapsed_ms=%.2f", | 335 | + "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", |
| 305 | _text_backend_name, | 336 | _text_backend_name, |
| 306 | len(normalized), | 337 | len(normalized), |
| 307 | effective_normalize, | 338 | effective_normalize, |
| 339 | + len(out[0]) if out and out[0] is not None else 0, | ||
| 308 | elapsed_ms, | 340 | elapsed_ms, |
| 309 | ) | 341 | ) |
| 310 | return out | 342 | return out |
| @@ -335,10 +367,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | @@ -335,10 +367,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio | ||
| 335 | out.append(vec) | 367 | out.append(vec) |
| 336 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 | 368 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 |
| 337 | logger.info( | 369 | logger.info( |
| 338 | - "embed_text done | backend=%s inputs=%d normalize=%s elapsed_ms=%.2f", | 370 | + "embed_text done | backend=%s inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", |
| 339 | _text_backend_name, | 371 | _text_backend_name, |
| 340 | len(normalized), | 372 | len(normalized), |
| 341 | effective_normalize, | 373 | effective_normalize, |
| 374 | + len(out[0]) if out and out[0] is not None else 0, | ||
| 342 | elapsed_ms, | 375 | elapsed_ms, |
| 343 | ) | 376 | ) |
| 344 | return out | 377 | return out |
| @@ -358,6 +391,14 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt | @@ -358,6 +391,14 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt | ||
| 358 | raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path") | 391 | raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path") |
| 359 | urls.append(s) | 392 | urls.append(s) |
| 360 | 393 | ||
| 394 | + logger.info( | ||
| 395 | + "embed_image request | inputs=%d normalize=%s preview=%s", | ||
| 396 | + len(urls), | ||
| 397 | + effective_normalize, | ||
| 398 | + _preview_inputs(urls, _LOG_PREVIEW_COUNT, _LOG_IMAGE_PREVIEW_CHARS), | ||
| 399 | + ) | ||
| 400 | + | ||
| 401 | + t0 = time.perf_counter() | ||
| 361 | with _image_encode_lock: | 402 | with _image_encode_lock: |
| 362 | vectors = _image_model.encode_image_urls( | 403 | vectors = _image_model.encode_image_urls( |
| 363 | urls, | 404 | urls, |
| @@ -375,4 +416,12 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt | @@ -375,4 +416,12 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt | ||
| 375 | if out_vec is None: | 416 | if out_vec is None: |
| 376 | raise RuntimeError(f"Image model returned empty embedding for index {i}") | 417 | raise RuntimeError(f"Image model returned empty embedding for index {i}") |
| 377 | out.append(out_vec) | 418 | out.append(out_vec) |
| 419 | + elapsed_ms = (time.perf_counter() - t0) * 1000.0 | ||
| 420 | + logger.info( | ||
| 421 | + "embed_image done | inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", | ||
| 422 | + len(urls), | ||
| 423 | + effective_normalize, | ||
| 424 | + len(out[0]) if out and out[0] is not None else 0, | ||
| 425 | + elapsed_ms, | ||
| 426 | + ) | ||
| 378 | return out | 427 | return out |
reranker/server.py
| @@ -9,6 +9,7 @@ Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RER | @@ -9,6 +9,7 @@ Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RER | ||
| 9 | """ | 9 | """ |
| 10 | 10 | ||
| 11 | import logging | 11 | import logging |
| 12 | +import os | ||
| 12 | import time | 13 | import time |
| 13 | from typing import Any, Dict, List, Optional | 14 | from typing import Any, Dict, List, Optional |
| 14 | 15 | ||
| @@ -29,6 +30,28 @@ app = FastAPI(title="saas-search Reranker Service", version="1.0.0") | @@ -29,6 +30,28 @@ app = FastAPI(title="saas-search Reranker Service", version="1.0.0") | ||
| 29 | 30 | ||
| 30 | _reranker: Optional[RerankBackendProtocol] = None | 31 | _reranker: Optional[RerankBackendProtocol] = None |
| 31 | _backend_name: str = "" | 32 | _backend_name: str = "" |
| 33 | +_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3"))) | ||
| 34 | +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180"))) | ||
| 35 | + | ||
| 36 | + | ||
| 37 | +def _compact_preview(text: str, max_chars: int) -> str: | ||
| 38 | + compact = " ".join((text or "").split()) | ||
| 39 | + if len(compact) <= max_chars: | ||
| 40 | + return compact | ||
| 41 | + return compact[:max_chars] + "..." | ||
| 42 | + | ||
| 43 | + | ||
| 44 | +def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: | ||
| 45 | + previews: List[Dict[str, Any]] = [] | ||
| 46 | + for idx, doc in enumerate(docs[:max_items]): | ||
| 47 | + previews.append( | ||
| 48 | + { | ||
| 49 | + "idx": idx, | ||
| 50 | + "len": len(doc), | ||
| 51 | + "preview": _compact_preview(doc, max_chars), | ||
| 52 | + } | ||
| 53 | + ) | ||
| 54 | + return previews | ||
| 32 | 55 | ||
| 33 | 56 | ||
| 34 | class RerankRequest(BaseModel): | 57 | class RerankRequest(BaseModel): |
| @@ -100,19 +123,25 @@ def rerank(request: RerankRequest) -> RerankResponse: | @@ -100,19 +123,25 @@ def rerank(request: RerankRequest) -> RerankResponse: | ||
| 100 | 123 | ||
| 101 | start_ts = time.time() | 124 | start_ts = time.time() |
| 102 | logger.info( | 125 | logger.info( |
| 103 | - "Rerank request | docs=%d normalize=%s", | 126 | + "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s", |
| 104 | len(request.docs), | 127 | len(request.docs), |
| 105 | normalize, | 128 | normalize, |
| 129 | + len(query), | ||
| 130 | + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), | ||
| 131 | + _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), | ||
| 106 | ) | 132 | ) |
| 107 | scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize) | 133 | scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize) |
| 108 | meta = dict(meta) | 134 | meta = dict(meta) |
| 109 | meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)}) | 135 | meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)}) |
| 136 | + score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]] | ||
| 110 | logger.info( | 137 | logger.info( |
| 111 | - "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s", | 138 | + "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s", |
| 112 | meta.get("input_docs"), | 139 | meta.get("input_docs"), |
| 113 | meta.get("unique_docs"), | 140 | meta.get("unique_docs"), |
| 114 | meta.get("dedup_ratio"), | 141 | meta.get("dedup_ratio"), |
| 115 | meta.get("service_elapsed_ms"), | 142 | meta.get("service_elapsed_ms"), |
| 143 | + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), | ||
| 144 | + score_preview, | ||
| 116 | ) | 145 | ) |
| 117 | 146 | ||
| 118 | return RerankResponse(scores=scores, meta=meta) | 147 | return RerankResponse(scores=scores, meta=meta) |
| @@ -0,0 +1,56 @@ | @@ -0,0 +1,56 @@ | ||
| 1 | +#!/bin/bash | ||
| 2 | +# | ||
| 3 | +# Route incoming log stream into per-day files. | ||
| 4 | +# | ||
| 5 | +# Usage: | ||
| 6 | +# command 2>&1 | ./scripts/daily_log_router.sh <service> <log_dir> [retention_days] | ||
| 7 | +# | ||
| 8 | + | ||
| 9 | +set -euo pipefail | ||
| 10 | + | ||
| 11 | +if [ "$#" -lt 2 ]; then | ||
| 12 | + echo "Usage: $0 <service> <log_dir> [retention_days]" >&2 | ||
| 13 | + exit 1 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +SERVICE_NAME="$1" | ||
| 17 | +LOG_DIR="$2" | ||
| 18 | +RETENTION_DAYS="${3:-30}" | ||
| 19 | + | ||
| 20 | +mkdir -p "${LOG_DIR}" | ||
| 21 | + | ||
| 22 | +awk -v dir="${LOG_DIR}" -v service="${SERVICE_NAME}" -v retention_days="${RETENTION_DAYS}" ' | ||
| 23 | +function rotate_file(day) { | ||
| 24 | + return sprintf("%s/%s-%s.log", dir, service, day) | ||
| 25 | +} | ||
| 26 | + | ||
| 27 | +function update_symlink(day) { | ||
| 28 | + cmd = sprintf("ln -sfn \"%s-%s.log\" \"%s/%s.log\"", service, day, dir, service) | ||
| 29 | + system(cmd) | ||
| 30 | +} | ||
| 31 | + | ||
| 32 | +function cleanup_old_logs() { | ||
| 33 | + cmd = sprintf("find \"%s\" -maxdepth 1 -type f -name \"%s-*.log\" -mtime +%d -delete >/dev/null 2>&1", dir, service, retention_days) | ||
| 34 | + system(cmd) | ||
| 35 | +} | ||
| 36 | + | ||
| 37 | +{ | ||
| 38 | + day = strftime("%Y-%m-%d") | ||
| 39 | + target = rotate_file(day) | ||
| 40 | + | ||
| 41 | + if (target != current_target) { | ||
| 42 | + update_symlink(day) | ||
| 43 | + cleanup_old_logs() | ||
| 44 | + current_target = target | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + print >> current_target | ||
| 48 | + fflush(current_target) | ||
| 49 | +} | ||
| 50 | + | ||
| 51 | +END { | ||
| 52 | + if (current_target != "") { | ||
| 53 | + close(current_target) | ||
| 54 | + } | ||
| 55 | +} | ||
| 56 | +' |
scripts/service_ctl.sh
| @@ -8,6 +8,7 @@ set -euo pipefail | @@ -8,6 +8,7 @@ set -euo pipefail | ||
| 8 | 8 | ||
| 9 | PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | 9 | PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 10 | LOG_DIR="${PROJECT_ROOT}/logs" | 10 | LOG_DIR="${PROJECT_ROOT}/logs" |
| 11 | +LOG_RETENTION_DAYS="${LOG_RETENTION_DAYS:-30}" | ||
| 11 | 12 | ||
| 12 | mkdir -p "${LOG_DIR}" | 13 | mkdir -p "${LOG_DIR}" |
| 13 | 14 | ||
| @@ -46,6 +47,16 @@ log_file() { | @@ -46,6 +47,16 @@ log_file() { | ||
| 46 | echo "${LOG_DIR}/${service}.log" | 47 | echo "${LOG_DIR}/${service}.log" |
| 47 | } | 48 | } |
| 48 | 49 | ||
| 50 | +prepare_daily_log_target() { | ||
| 51 | + local service="$1" | ||
| 52 | + local day | ||
| 53 | + local today_file | ||
| 54 | + day="$(date +%F)" | ||
| 55 | + today_file="${LOG_DIR}/${service}-${day}.log" | ||
| 56 | + touch "${today_file}" | ||
| 57 | + ln -sfn "$(basename "${today_file}")" "$(log_file "${service}")" | ||
| 58 | +} | ||
| 59 | + | ||
| 49 | service_start_cmd() { | 60 | service_start_cmd() { |
| 50 | local service="$1" | 61 | local service="$1" |
| 51 | case "${service}" in | 62 | case "${service}" in |
| @@ -176,6 +187,7 @@ start_one() { | @@ -176,6 +187,7 @@ start_one() { | ||
| 176 | local pf lf | 187 | local pf lf |
| 177 | pf="$(pid_file "${service}")" | 188 | pf="$(pid_file "${service}")" |
| 178 | lf="$(log_file "${service}")" | 189 | lf="$(log_file "${service}")" |
| 190 | + prepare_daily_log_target "${service}" | ||
| 179 | 191 | ||
| 180 | if [ "${service}" != "tei" ]; then | 192 | if [ "${service}" != "tei" ]; then |
| 181 | if is_running_by_pid "${service}" || is_running_by_port "${service}"; then | 193 | if is_running_by_pid "${service}" || is_running_by_port "${service}"; then |
| @@ -203,12 +215,12 @@ start_one() { | @@ -203,12 +215,12 @@ start_one() { | ||
| 203 | cnclip|tei) | 215 | cnclip|tei) |
| 204 | echo "[start] ${service} (managed by native script)" | 216 | echo "[start] ${service} (managed by native script)" |
| 205 | if [ "${service}" = "cnclip" ]; then | 217 | if [ "${service}" = "cnclip" ]; then |
| 206 | - if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1; then | 218 | + if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then |
| 207 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 | 219 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 |
| 208 | return 1 | 220 | return 1 |
| 209 | fi | 221 | fi |
| 210 | else | 222 | else |
| 211 | - if ! bash -lc "${cmd}" >> "${lf}" 2>&1; then | 223 | + if ! "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then |
| 212 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 | 224 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 |
| 213 | return 1 | 225 | return 1 |
| 214 | fi | 226 | fi |
| @@ -229,7 +241,7 @@ start_one() { | @@ -229,7 +241,7 @@ start_one() { | ||
| 229 | ;; | 241 | ;; |
| 230 | backend|indexer|frontend|embedding|translator) | 242 | backend|indexer|frontend|embedding|translator) |
| 231 | echo "[start] ${service}" | 243 | echo "[start] ${service}" |
| 232 | - nohup bash -lc "${cmd}" > "${lf}" 2>&1 & | 244 | + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & |
| 233 | local pid=$! | 245 | local pid=$! |
| 234 | echo "${pid}" > "${pf}" | 246 | echo "${pid}" > "${pf}" |
| 235 | local retries=30 | 247 | local retries=30 |
| @@ -244,7 +256,7 @@ start_one() { | @@ -244,7 +256,7 @@ start_one() { | ||
| 244 | echo "[start] ${service}" | 256 | echo "[start] ${service}" |
| 245 | # Start reranker directly so pid file points to the script process that | 257 | # Start reranker directly so pid file points to the script process that |
| 246 | # will exec uvicorn, avoiding extra shell wrapper lifecycle edge-cases. | 258 | # will exec uvicorn, avoiding extra shell wrapper lifecycle edge-cases. |
| 247 | - nohup "${cmd}" > "${lf}" 2>&1 & | 259 | + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & |
| 248 | local pid=$! | 260 | local pid=$! |
| 249 | echo "${pid}" > "${pf}" | 261 | echo "${pid}" > "${pf}" |
| 250 | if wait_for_health "${service}" 90; then | 262 | if wait_for_health "${service}" 90; then |
| @@ -408,9 +420,12 @@ Default target set (when no service provided): | @@ -408,9 +420,12 @@ Default target set (when no service provided): | ||
| 408 | status -> all known services | 420 | status -> all known services |
| 409 | 421 | ||
| 410 | Optional service startup: | 422 | Optional service startup: |
| 411 | - ./scripts/service_ctl.sh start embedding translator reranker tei cnclip | 423 | + ./scripts/service_ctl.sh start tei cnclip embedding translator reranker |
| 412 | TEI_DEVICE=cuda|cpu ./scripts/service_ctl.sh start tei | 424 | TEI_DEVICE=cuda|cpu ./scripts/service_ctl.sh start tei |
| 413 | CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip | 425 | CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip |
| 426 | + | ||
| 427 | +Log retention: | ||
| 428 | + LOG_RETENTION_DAYS=30 ./scripts/service_ctl.sh start | ||
| 414 | EOF | 429 | EOF |
| 415 | } | 430 | } |
| 416 | 431 |
scripts/start.sh
| @@ -12,11 +12,11 @@ echo "saas-search 服务启动" | @@ -12,11 +12,11 @@ echo "saas-search 服务启动" | ||
| 12 | echo "========================================" | 12 | echo "========================================" |
| 13 | echo "默认启动核心服务: backend/indexer/frontend" | 13 | echo "默认启动核心服务: backend/indexer/frontend" |
| 14 | echo "可选服务请显式指定:" | 14 | echo "可选服务请显式指定:" |
| 15 | -echo " ./scripts/service_ctl.sh start embedding translator reranker tei cnclip" | 15 | +echo " ./scripts/service_ctl.sh start tei cnclip embedding translator reranker" |
| 16 | echo | 16 | echo |
| 17 | 17 | ||
| 18 | ./scripts/service_ctl.sh start | 18 | ./scripts/service_ctl.sh start |
| 19 | 19 | ||
| 20 | echo | 20 | echo |
| 21 | echo "当前服务状态:" | 21 | echo "当前服务状态:" |
| 22 | -./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker tei cnclip | 22 | +./scripts/service_ctl.sh status backend indexer frontend tei cnclip embedding translator reranker |
scripts/start_cnclip_service.sh
| @@ -43,7 +43,9 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | @@ -43,7 +43,9 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | ||
| 43 | CLIP_SERVER_DIR="${PROJECT_ROOT}/third-party/clip-as-service/server" | 43 | CLIP_SERVER_DIR="${PROJECT_ROOT}/third-party/clip-as-service/server" |
| 44 | LOG_DIR="${PROJECT_ROOT}/logs" | 44 | LOG_DIR="${PROJECT_ROOT}/logs" |
| 45 | PID_FILE="${LOG_DIR}/cnclip.pid" | 45 | PID_FILE="${LOG_DIR}/cnclip.pid" |
| 46 | -LOG_FILE="${LOG_DIR}/cnclip.log" | 46 | +LOG_LINK="${LOG_DIR}/cnclip.log" |
| 47 | +LOG_FILE="${LOG_DIR}/cnclip-$(date +%F).log" | ||
| 48 | +LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/daily_log_router.sh" | ||
| 47 | 49 | ||
| 48 | # 帮助信息 | 50 | # 帮助信息 |
| 49 | show_help() { | 51 | show_help() { |
| @@ -128,6 +130,11 @@ fi | @@ -128,6 +130,11 @@ fi | ||
| 128 | 130 | ||
| 129 | # 创建日志目录 | 131 | # 创建日志目录 |
| 130 | mkdir -p "${LOG_DIR}" | 132 | mkdir -p "${LOG_DIR}" |
| 133 | +if [ ! -x "${LOG_ROUTER_SCRIPT}" ]; then | ||
| 134 | + echo -e "${RED}错误: 日志路由脚本不存在或不可执行: ${LOG_ROUTER_SCRIPT}${NC}" | ||
| 135 | + exit 1 | ||
| 136 | +fi | ||
| 137 | +ln -sfn "$(basename "${LOG_FILE}")" "${LOG_LINK}" | ||
| 131 | 138 | ||
| 132 | # 检查是否已经有服务在运行 | 139 | # 检查是否已经有服务在运行 |
| 133 | if [ -f "${PID_FILE}" ]; then | 140 | if [ -f "${PID_FILE}" ]; then |
| @@ -208,7 +215,7 @@ echo " 服务端口: ${PORT}" | @@ -208,7 +215,7 @@ echo " 服务端口: ${PORT}" | ||
| 208 | echo " 协议: gRPC (默认,官方推荐)" | 215 | echo " 协议: gRPC (默认,官方推荐)" |
| 209 | echo " 其他参数: 使用官方默认值" | 216 | echo " 其他参数: 使用官方默认值" |
| 210 | echo " 副本数: ${REPLICAS}" | 217 | echo " 副本数: ${REPLICAS}" |
| 211 | -echo " 日志文件: ${LOG_FILE}" | 218 | +echo " 日志文件: ${LOG_LINK}" |
| 212 | echo "" | 219 | echo "" |
| 213 | 220 | ||
| 214 | # 副本数显存警告 | 221 | # 副本数显存警告 |
| @@ -263,9 +270,9 @@ EOF | @@ -263,9 +270,9 @@ EOF | ||
| 263 | 270 | ||
| 264 | echo -e "${GREEN}✓ 已生成配置文件: ${TEMP_FLOW_FILE}${NC}" | 271 | echo -e "${GREEN}✓ 已生成配置文件: ${TEMP_FLOW_FILE}${NC}" |
| 265 | 272 | ||
| 266 | -# 使用 nohup 在后台启动服务 | 273 | +# 使用 nohup 在后台启动服务(日志按天分流) |
| 267 | cd "${CLIP_SERVER_DIR}" | 274 | cd "${CLIP_SERVER_DIR}" |
| 268 | -nohup python -m clip_server "${TEMP_FLOW_FILE}" > "${LOG_FILE}" 2>&1 & | 275 | +nohup bash -lc "exec python -m clip_server \"${TEMP_FLOW_FILE}\" > >(\"${LOG_ROUTER_SCRIPT}\" \"cnclip\" \"${LOG_DIR}\" \"${LOG_RETENTION_DAYS:-30}\") 2>&1" >/dev/null 2>&1 & |
| 269 | 276 | ||
| 270 | # 保存 PID | 277 | # 保存 PID |
| 271 | SERVICE_PID=$! | 278 | SERVICE_PID=$! |
| @@ -293,7 +300,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then | @@ -293,7 +300,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then | ||
| 293 | echo -e " r = c.encode(['测试文本'])" | 300 | echo -e " r = c.encode(['测试文本'])" |
| 294 | echo "" | 301 | echo "" |
| 295 | echo -e "查看日志:" | 302 | echo -e "查看日志:" |
| 296 | - echo -e " tail -f ${LOG_FILE}" | 303 | + echo -e " tail -F ${LOG_LINK}" |
| 297 | echo "" | 304 | echo "" |
| 298 | echo -e "停止服务:" | 305 | echo -e "停止服务:" |
| 299 | echo -e " ./scripts/stop_cnclip_service.sh" | 306 | echo -e " ./scripts/stop_cnclip_service.sh" |
| @@ -303,7 +310,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then | @@ -303,7 +310,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then | ||
| 303 | echo -e "${YELLOW}等待模型加载完成(约30-60秒)...${NC}" | 310 | echo -e "${YELLOW}等待模型加载完成(约30-60秒)...${NC}" |
| 304 | sleep 30 | 311 | sleep 30 |
| 305 | echo -e "${GREEN}✓ 服务已启动,请查看日志确认模型是否加载完成${NC}" | 312 | echo -e "${GREEN}✓ 服务已启动,请查看日志确认模型是否加载完成${NC}" |
| 306 | - echo -e "${YELLOW}查看日志: tail -f ${LOG_FILE}${NC}" | 313 | + echo -e "${YELLOW}查看日志: tail -F ${LOG_LINK}${NC}" |
| 307 | 314 | ||
| 308 | else | 315 | else |
| 309 | echo -e "${RED}========================================${NC}" | 316 | echo -e "${RED}========================================${NC}" |
| @@ -311,7 +318,7 @@ else | @@ -311,7 +318,7 @@ else | ||
| 311 | echo -e "${RED}========================================${NC}" | 318 | echo -e "${RED}========================================${NC}" |
| 312 | echo "" | 319 | echo "" |
| 313 | echo -e "请查看日志获取详细错误信息:" | 320 | echo -e "请查看日志获取详细错误信息:" |
| 314 | - echo -e " tail -f ${LOG_FILE}" | 321 | + echo -e " tail -F ${LOG_LINK}" |
| 315 | echo "" | 322 | echo "" |
| 316 | rm -f "${PID_FILE}" | 323 | rm -f "${PID_FILE}" |
| 317 | exit 1 | 324 | exit 1 |
search/searcher.py
| @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union | @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union | ||
| 8 | import os | 8 | import os |
| 9 | import time, json | 9 | import time, json |
| 10 | import logging | 10 | import logging |
| 11 | +import hashlib | ||
| 11 | 12 | ||
| 12 | from utils.es_client import ESClient | 13 | from utils.es_client import ESClient |
| 13 | from query import QueryParser, ParsedQuery | 14 | from query import QueryParser, ParsedQuery |
| @@ -21,6 +22,15 @@ from api.result_formatter import ResultFormatter | @@ -21,6 +22,15 @@ from api.result_formatter import ResultFormatter | ||
| 21 | from indexer.mapping_generator import get_tenant_index_name | 22 | from indexer.mapping_generator import get_tenant_index_name |
| 22 | 23 | ||
| 23 | logger = logging.getLogger(__name__) | 24 | logger = logging.getLogger(__name__) |
| 25 | +backend_verbose_logger = logging.getLogger("backend.verbose") | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +def _log_backend_verbose(payload: Dict[str, Any]) -> None: | ||
| 29 | + if not backend_verbose_logger.handlers: | ||
| 30 | + return | ||
| 31 | + backend_verbose_logger.info( | ||
| 32 | + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) | ||
| 33 | + ) | ||
| 24 | 34 | ||
| 25 | 35 | ||
| 26 | class SearchResult: | 36 | class SearchResult: |
| @@ -320,16 +330,33 @@ class Searcher: | @@ -320,16 +330,33 @@ class Searcher: | ||
| 320 | context.store_intermediate_result('es_query', es_query) | 330 | context.store_intermediate_result('es_query', es_query) |
| 321 | context.store_intermediate_result('es_body_for_search', body_for_es) | 331 | context.store_intermediate_result('es_body_for_search', body_for_es) |
| 322 | 332 | ||
| 323 | - # Serialize ES query as a compact JSON string (no spaces or newlines) | ||
| 324 | - es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) | 333 | + # Serialize ES query to compute a compact size + stable digest for correlation |
| 334 | + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(",", ":")) | ||
| 335 | + es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16] | ||
| 336 | + knn_enabled = bool(enable_embedding and parsed_query.query_vector is not None) | ||
| 337 | + vector_dims = int(len(parsed_query.query_vector)) if parsed_query.query_vector is not None else 0 | ||
| 325 | 338 | ||
| 326 | context.logger.info( | 339 | context.logger.info( |
| 327 | - f"ES query built | size: {len(es_query_compact)} chars | " | ||
| 328 | - f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " | ||
| 329 | - f"facets: {'yes' if facets else 'no'} | " | ||
| 330 | - f"query: {es_query_compact}", | 340 | + "ES query built | size: %s chars | digest: %s | KNN: %s | vector_dims: %s | facets: %s", |
| 341 | + len(es_query_compact), | ||
| 342 | + es_query_digest, | ||
| 343 | + "yes" if knn_enabled else "no", | ||
| 344 | + vector_dims, | ||
| 345 | + "yes" if facets else "no", | ||
| 331 | extra={'reqid': context.reqid, 'uid': context.uid} | 346 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 332 | ) | 347 | ) |
| 348 | + _log_backend_verbose({ | ||
| 349 | + "event": "es_query_built", | ||
| 350 | + "reqid": context.reqid, | ||
| 351 | + "uid": context.uid, | ||
| 352 | + "tenant_id": tenant_id, | ||
| 353 | + "size_chars": len(es_query_compact), | ||
| 354 | + "sha256_16": es_query_digest, | ||
| 355 | + "knn_enabled": knn_enabled, | ||
| 356 | + "vector_dims": vector_dims, | ||
| 357 | + "has_facets": bool(facets), | ||
| 358 | + "query": es_query, | ||
| 359 | + }) | ||
| 333 | except Exception as e: | 360 | except Exception as e: |
| 334 | context.set_error(e) | 361 | context.set_error(e) |
| 335 | context.logger.error( | 362 | context.logger.error( |