Commit 28e57bb16712929459a9d1eb43c2800a94ef2d8b
1 parent
af7ee060
日志体系优化
Showing
13 changed files
with
337 additions
and
78 deletions
Show diff stats
README.md
| ... | ... | @@ -29,7 +29,7 @@ source activate.sh |
| 29 | 29 | ./run.sh |
| 30 | 30 | |
| 31 | 31 | # 可选:附加能力服务(按需开启) |
| 32 | -./scripts/service_ctl.sh start embedding translator reranker tei cnclip | |
| 32 | +./scripts/service_ctl.sh start tei cnclip embedding translator reranker | |
| 33 | 33 | |
| 34 | 34 | # 查看状态 |
| 35 | 35 | ./scripts/service_ctl.sh status | ... | ... |
api/app.py
| ... | ... | @@ -11,6 +11,7 @@ import logging |
| 11 | 11 | import time |
| 12 | 12 | import argparse |
| 13 | 13 | import uvicorn |
| 14 | +from logging.handlers import TimedRotatingFileHandler | |
| 14 | 15 | from collections import defaultdict, deque |
| 15 | 16 | from typing import Optional |
| 16 | 17 | from fastapi import FastAPI, Request, HTTPException |
| ... | ... | @@ -23,18 +24,60 @@ from slowapi import Limiter, _rate_limit_exceeded_handler |
| 23 | 24 | from slowapi.util import get_remote_address |
| 24 | 25 | from slowapi.errors import RateLimitExceeded |
| 25 | 26 | |
| 26 | -# Configure logging with better formatting | |
| 27 | +# Configure backend logging | |
| 27 | 28 | import pathlib |
| 28 | -log_dir = pathlib.Path('logs') | |
| 29 | -log_dir.mkdir(exist_ok=True) | |
| 30 | -logging.basicConfig( | |
| 31 | - level=logging.INFO, | |
| 32 | - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| 33 | - handlers=[ | |
| 34 | - logging.StreamHandler(), | |
| 35 | - logging.FileHandler(log_dir / 'api.log', mode='a', encoding='utf-8') | |
| 36 | - ] | |
| 37 | -) | |
| 29 | + | |
| 30 | + | |
| 31 | +def configure_backend_logging() -> None: | |
| 32 | + log_dir = pathlib.Path("logs") | |
| 33 | + log_dir.mkdir(exist_ok=True) | |
| 34 | + log_level = os.getenv("LOG_LEVEL", "INFO").upper() | |
| 35 | + numeric_level = getattr(logging, log_level, logging.INFO) | |
| 36 | + | |
| 37 | + default_formatter = logging.Formatter( | |
| 38 | + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| 39 | + ) | |
| 40 | + | |
| 41 | + root_logger = logging.getLogger() | |
| 42 | + root_logger.setLevel(numeric_level) | |
| 43 | + root_logger.handlers.clear() | |
| 44 | + | |
| 45 | + console_handler = logging.StreamHandler() | |
| 46 | + console_handler.setLevel(numeric_level) | |
| 47 | + console_handler.setFormatter(default_formatter) | |
| 48 | + root_logger.addHandler(console_handler) | |
| 49 | + | |
| 50 | + backend_handler = TimedRotatingFileHandler( | |
| 51 | + filename=log_dir / "api.log", | |
| 52 | + when="midnight", | |
| 53 | + interval=1, | |
| 54 | + backupCount=30, | |
| 55 | + encoding="utf-8", | |
| 56 | + ) | |
| 57 | + backend_handler.setLevel(numeric_level) | |
| 58 | + backend_handler.setFormatter(default_formatter) | |
| 59 | + root_logger.addHandler(backend_handler) | |
| 60 | + | |
| 61 | + verbose_logger = logging.getLogger("backend.verbose") | |
| 62 | + verbose_logger.setLevel(numeric_level) | |
| 63 | + verbose_logger.handlers.clear() | |
| 64 | + verbose_logger.propagate = False | |
| 65 | + | |
| 66 | + verbose_handler = TimedRotatingFileHandler( | |
| 67 | + filename=log_dir / "backend_verbose.log", | |
| 68 | + when="midnight", | |
| 69 | + interval=1, | |
| 70 | + backupCount=30, | |
| 71 | + encoding="utf-8", | |
| 72 | + ) | |
| 73 | + verbose_handler.setLevel(numeric_level) | |
| 74 | + verbose_handler.setFormatter( | |
| 75 | + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | |
| 76 | + ) | |
| 77 | + verbose_logger.addHandler(verbose_handler) | |
| 78 | + | |
| 79 | + | |
| 80 | +configure_backend_logging() | |
| 38 | 81 | logger = logging.getLogger(__name__) |
| 39 | 82 | |
| 40 | 83 | # Initialize rate limiter | ... | ... |
api/routes/search.py
| ... | ... | @@ -5,6 +5,9 @@ Search API routes. |
| 5 | 5 | from fastapi import APIRouter, HTTPException, Query, Request |
| 6 | 6 | from typing import Optional |
| 7 | 7 | import uuid |
| 8 | +import hashlib | |
| 9 | +import json | |
| 10 | +import logging | |
| 8 | 11 | |
| 9 | 12 | from ..models import ( |
| 10 | 13 | SearchRequest, |
| ... | ... | @@ -17,6 +20,15 @@ from ..models import ( |
| 17 | 20 | from context.request_context import create_request_context, set_current_request_context, clear_current_request_context |
| 18 | 21 | |
| 19 | 22 | router = APIRouter(prefix="/search", tags=["search"]) |
| 23 | +backend_verbose_logger = logging.getLogger("backend.verbose") | |
| 24 | + | |
| 25 | + | |
| 26 | +def _log_backend_verbose(payload: dict) -> None: | |
| 27 | + if not backend_verbose_logger.handlers: | |
| 28 | + return | |
| 29 | + backend_verbose_logger.info( | |
| 30 | + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) | |
| 31 | + ) | |
| 20 | 32 | |
| 21 | 33 | |
| 22 | 34 | def extract_request_info(request: Request) -> tuple[str, str]: |
| ... | ... | @@ -136,15 +148,32 @@ async def search(request: SearchRequest, http_request: Request): |
| 136 | 148 | debug_info=result.debug_info |
| 137 | 149 | ) |
| 138 | 150 | |
| 139 | - # Log complete response JSON | |
| 151 | + response_payload = response.model_dump(mode="json") | |
| 152 | + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) | |
| 153 | + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] | |
| 154 | + max_score = float(response.max_score or 0.0) | |
| 155 | + | |
| 140 | 156 | context.logger.info( |
| 141 | - "Search response | " | |
| 142 | - f"Total results: {response.total} | " | |
| 143 | - f"Max score: {response.max_score:.4f} | " | |
| 144 | - f"Time: {response.took_ms}ms | " | |
| 145 | - f"Response: {response.model_dump_json()}", | |
| 157 | + "Search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", | |
| 158 | + response.total, | |
| 159 | + max_score, | |
| 160 | + response.took_ms, | |
| 161 | + len(response_json), | |
| 162 | + response_digest, | |
| 146 | 163 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 147 | 164 | ) |
| 165 | + _log_backend_verbose({ | |
| 166 | + "event": "search_response", | |
| 167 | + "reqid": context.reqid, | |
| 168 | + "uid": context.uid, | |
| 169 | + "tenant_id": tenant_id, | |
| 170 | + "total_results": response.total, | |
| 171 | + "max_score": max_score, | |
| 172 | + "took_ms": response.took_ms, | |
| 173 | + "payload_size_chars": len(response_json), | |
| 174 | + "sha256_16": response_digest, | |
| 175 | + "response": response_payload, | |
| 176 | + }) | |
| 148 | 177 | |
| 149 | 178 | return response |
| 150 | 179 | |
| ... | ... | @@ -233,15 +262,32 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): |
| 233 | 262 | performance_info=performance_summary |
| 234 | 263 | ) |
| 235 | 264 | |
| 236 | - # Log complete response JSON | |
| 265 | + response_payload = response.model_dump(mode="json") | |
| 266 | + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) | |
| 267 | + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] | |
| 268 | + max_score = float(response.max_score or 0.0) | |
| 269 | + | |
| 237 | 270 | context.logger.info( |
| 238 | - "Image search response | " | |
| 239 | - f"Total results: {response.total} | " | |
| 240 | - f"Max score: {response.max_score:.4f} | " | |
| 241 | - f"Time: {response.took_ms}ms | " | |
| 242 | - f"Response: {response.model_dump_json()}", | |
| 271 | + "Image search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", | |
| 272 | + response.total, | |
| 273 | + max_score, | |
| 274 | + response.took_ms, | |
| 275 | + len(response_json), | |
| 276 | + response_digest, | |
| 243 | 277 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 244 | 278 | ) |
| 279 | + _log_backend_verbose({ | |
| 280 | + "event": "image_search_response", | |
| 281 | + "reqid": context.reqid, | |
| 282 | + "uid": context.uid, | |
| 283 | + "tenant_id": tenant_id, | |
| 284 | + "total_results": response.total, | |
| 285 | + "max_score": max_score, | |
| 286 | + "took_ms": response.took_ms, | |
| 287 | + "payload_size_chars": len(response_json), | |
| 288 | + "sha256_16": response_digest, | |
| 289 | + "response": response_payload, | |
| 290 | + }) | |
| 245 | 291 | |
| 246 | 292 | return response |
| 247 | 293 | ... | ... |
docs/QUICKSTART.md
| ... | ... | @@ -510,16 +510,11 @@ curl http://localhost:6007/health |
| 510 | 510 | |
| 511 | 511 | ### 5.2 常看日志 |
| 512 | 512 | |
| 513 | -- `logs/backend.log` | |
| 514 | -- `logs/indexer.log` | |
| 515 | -- `logs/frontend.log` | |
| 516 | -- `logs/embedding.log` | |
| 517 | -- `logs/translator.log` | |
| 518 | -- `logs/reranker.log` | |
| 519 | -- `logs/tei.log` | |
| 520 | -- `logs/cnclip.log` | |
| 521 | -- `logs/search_engine.log` | |
| 522 | -- `logs/errors.log` | |
| 513 | +- `logs/<service>-YYYY-MM-DD.log`(`service_ctl.sh` 按天写入的真实文件) | |
| 514 | +- `logs/<service>.log`(指向当天文件的软链,推荐 `tail -F`) | |
| 515 | +- `logs/api.log`(backend 进程内日志,按天轮转) | |
| 516 | +- `logs/backend_verbose.log`(backend 大对象详细日志,按天轮转) | |
| 517 | +- `logs/indexer.log`(索引结构化 JSON 日志,按天轮转) | |
| 523 | 518 | |
| 524 | 519 | ### 5.3 常用排障命令 |
| 525 | 520 | ... | ... |
docs/TEI_SERVICE说明文档.md
| ... | ... | @@ -152,7 +152,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ |
| 152 | 152 | 启动全套(含 TEI): |
| 153 | 153 | |
| 154 | 154 | ```bash |
| 155 | -TEI_DEVICE=cuda ./scripts/service_ctl.sh start embedding translator reranker tei cnclip | |
| 155 | +TEI_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker | |
| 156 | 156 | ``` |
| 157 | 157 | |
| 158 | 158 | 仅启动 TEI: | ... | ... |
docs/Usage-Guide.md
| ... | ... | @@ -368,31 +368,23 @@ RERANKER_PORT=6007 |
| 368 | 368 | |
| 369 | 369 | 日志文件存储在 `logs/` 目录下: |
| 370 | 370 | |
| 371 | -- `logs/backend.log` - 后端服务日志 | |
| 372 | -- `logs/indexer.log` - 索引服务日志 | |
| 373 | -- `logs/frontend.log` - 前端服务日志 | |
| 374 | -- `logs/embedding.log` - 向量服务日志(可选) | |
| 375 | -- `logs/translator.log` - 翻译服务日志(可选) | |
| 376 | -- `logs/reranker.log` - 重排服务日志(可选) | |
| 377 | -- `logs/tei.log` - TEI 启停日志(可选) | |
| 378 | -- `logs/cnclip.log` - CN-CLIP 启停日志(可选) | |
| 379 | -- `logs/search_engine.log` - 应用主日志(按天轮转) | |
| 380 | -- `logs/errors.log` - 错误日志(按天轮转) | |
| 371 | +- `logs/<service>-YYYY-MM-DD.log` - `service_ctl.sh` 统一管理的按天日志文件(真实文件) | |
| 372 | +- `logs/<service>.log` - 指向当天日志文件的软链(兼容原有命令) | |
| 373 | +- `logs/api.log` - backend 进程内日志(按天轮转) | |
| 374 | +- `logs/backend_verbose.log` - backend 详细大对象日志(按天轮转,含 `Search response` / `ES query built` 完整内容) | |
| 375 | +- `logs/indexer.log` - 索引结构化日志(JSON,按天轮转) | |
| 381 | 376 | |
| 382 | 377 | ### 查看实时日志 |
| 383 | 378 | |
| 384 | 379 | ```bash |
| 385 | -# 查看后端日志 | |
| 386 | -tail -f logs/backend.log | |
| 380 | +# 查看后端日志(建议 -F,跨天自动跟随新文件) | |
| 381 | +tail -F logs/backend.log | |
| 387 | 382 | |
| 388 | 383 | # 查看前端日志 |
| 389 | -tail -f logs/frontend.log | |
| 384 | +tail -F logs/frontend.log | |
| 390 | 385 | |
| 391 | -# 查看应用主日志 | |
| 392 | -tail -f logs/search_engine.log | |
| 393 | - | |
| 394 | -# 查看错误日志 | |
| 395 | -tail -f logs/errors.log | |
| 386 | +# 查看 backend 详细大对象日志 | |
| 387 | +tail -F logs/backend_verbose.log | |
| 396 | 388 | ``` |
| 397 | 389 | |
| 398 | 390 | ### 日志级别 |
| ... | ... | @@ -406,7 +398,7 @@ LOG_LEVEL=DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL |
| 406 | 398 | |
| 407 | 399 | ### 日志轮转 |
| 408 | 400 | |
| 409 | -日志文件按天自动轮转,保留30天的历史日志。 | |
| 401 | +日志文件按天自动轮转,默认保留 30 天历史日志(可通过 `LOG_RETENTION_DAYS` 调整 `service_ctl.sh` 管理日志的保留天数)。 | |
| 410 | 402 | |
| 411 | 403 | --- |
| 412 | 404 | ... | ... |
embeddings/server.py
| ... | ... | @@ -56,6 +56,29 @@ _TEXT_MICROBATCH_WINDOW_SEC = max( |
| 56 | 56 | _TEXT_REQUEST_TIMEOUT_SEC = max( |
| 57 | 57 | 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) |
| 58 | 58 | ) |
| 59 | +_LOG_PREVIEW_COUNT = max(1, int(os.getenv("EMBEDDING_LOG_PREVIEW_COUNT", "3"))) | |
| 60 | +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_TEXT_PREVIEW_CHARS", "120"))) | |
| 61 | +_LOG_IMAGE_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_IMAGE_PREVIEW_CHARS", "180"))) | |
| 62 | + | |
| 63 | + | |
| 64 | +def _compact_preview(text: str, max_chars: int) -> str: | |
| 65 | + compact = " ".join((text or "").split()) | |
| 66 | + if len(compact) <= max_chars: | |
| 67 | + return compact | |
| 68 | + return compact[:max_chars] + "..." | |
| 69 | + | |
| 70 | + | |
| 71 | +def _preview_inputs(items: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: | |
| 72 | + previews: List[Dict[str, Any]] = [] | |
| 73 | + for idx, item in enumerate(items[:max_items]): | |
| 74 | + previews.append( | |
| 75 | + { | |
| 76 | + "idx": idx, | |
| 77 | + "len": len(item), | |
| 78 | + "preview": _compact_preview(item, max_chars), | |
| 79 | + } | |
| 80 | + ) | |
| 81 | + return previews | |
| 59 | 82 | |
| 60 | 83 | |
| 61 | 84 | def _encode_local_st(texts: List[str], normalize_embeddings: bool) -> Any: |
| ... | ... | @@ -292,6 +315,14 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio |
| 292 | 315 | raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") |
| 293 | 316 | normalized.append(s) |
| 294 | 317 | |
| 318 | + logger.info( | |
| 319 | + "embed_text request | backend=%s inputs=%d normalize=%s preview=%s", | |
| 320 | + _text_backend_name, | |
| 321 | + len(normalized), | |
| 322 | + effective_normalize, | |
| 323 | + _preview_inputs(normalized, _LOG_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), | |
| 324 | + ) | |
| 325 | + | |
| 295 | 326 | t0 = time.perf_counter() |
| 296 | 327 | try: |
| 297 | 328 | # local_st backend uses in-process torch model, keep serialized encode for safety; |
| ... | ... | @@ -301,10 +332,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio |
| 301 | 332 | out = [_encode_single_text_with_microbatch(normalized[0], normalize=effective_normalize)] |
| 302 | 333 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 |
| 303 | 334 | logger.info( |
| 304 | - "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s elapsed_ms=%.2f", | |
| 335 | + "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", | |
| 305 | 336 | _text_backend_name, |
| 306 | 337 | len(normalized), |
| 307 | 338 | effective_normalize, |
| 339 | + len(out[0]) if out and out[0] is not None else 0, | |
| 308 | 340 | elapsed_ms, |
| 309 | 341 | ) |
| 310 | 342 | return out |
| ... | ... | @@ -335,10 +367,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio |
| 335 | 367 | out.append(vec) |
| 336 | 368 | elapsed_ms = (time.perf_counter() - t0) * 1000.0 |
| 337 | 369 | logger.info( |
| 338 | - "embed_text done | backend=%s inputs=%d normalize=%s elapsed_ms=%.2f", | |
| 370 | + "embed_text done | backend=%s inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", | |
| 339 | 371 | _text_backend_name, |
| 340 | 372 | len(normalized), |
| 341 | 373 | effective_normalize, |
| 374 | + len(out[0]) if out and out[0] is not None else 0, | |
| 342 | 375 | elapsed_ms, |
| 343 | 376 | ) |
| 344 | 377 | return out |
| ... | ... | @@ -358,6 +391,14 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt |
| 358 | 391 | raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path") |
| 359 | 392 | urls.append(s) |
| 360 | 393 | |
| 394 | + logger.info( | |
| 395 | + "embed_image request | inputs=%d normalize=%s preview=%s", | |
| 396 | + len(urls), | |
| 397 | + effective_normalize, | |
| 398 | + _preview_inputs(urls, _LOG_PREVIEW_COUNT, _LOG_IMAGE_PREVIEW_CHARS), | |
| 399 | + ) | |
| 400 | + | |
| 401 | + t0 = time.perf_counter() | |
| 361 | 402 | with _image_encode_lock: |
| 362 | 403 | vectors = _image_model.encode_image_urls( |
| 363 | 404 | urls, |
| ... | ... | @@ -375,4 +416,12 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt |
| 375 | 416 | if out_vec is None: |
| 376 | 417 | raise RuntimeError(f"Image model returned empty embedding for index {i}") |
| 377 | 418 | out.append(out_vec) |
| 419 | + elapsed_ms = (time.perf_counter() - t0) * 1000.0 | |
| 420 | + logger.info( | |
| 421 | + "embed_image done | inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", | |
| 422 | + len(urls), | |
| 423 | + effective_normalize, | |
| 424 | + len(out[0]) if out and out[0] is not None else 0, | |
| 425 | + elapsed_ms, | |
| 426 | + ) | |
| 378 | 427 | return out | ... | ... |
reranker/server.py
| ... | ... | @@ -9,6 +9,7 @@ Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RER |
| 9 | 9 | """ |
| 10 | 10 | |
| 11 | 11 | import logging |
| 12 | +import os | |
| 12 | 13 | import time |
| 13 | 14 | from typing import Any, Dict, List, Optional |
| 14 | 15 | |
| ... | ... | @@ -29,6 +30,28 @@ app = FastAPI(title="saas-search Reranker Service", version="1.0.0") |
| 29 | 30 | |
| 30 | 31 | _reranker: Optional[RerankBackendProtocol] = None |
| 31 | 32 | _backend_name: str = "" |
| 33 | +_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3"))) | |
| 34 | +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180"))) | |
| 35 | + | |
| 36 | + | |
| 37 | +def _compact_preview(text: str, max_chars: int) -> str: | |
| 38 | + compact = " ".join((text or "").split()) | |
| 39 | + if len(compact) <= max_chars: | |
| 40 | + return compact | |
| 41 | + return compact[:max_chars] + "..." | |
| 42 | + | |
| 43 | + | |
| 44 | +def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: | |
| 45 | + previews: List[Dict[str, Any]] = [] | |
| 46 | + for idx, doc in enumerate(docs[:max_items]): | |
| 47 | + previews.append( | |
| 48 | + { | |
| 49 | + "idx": idx, | |
| 50 | + "len": len(doc), | |
| 51 | + "preview": _compact_preview(doc, max_chars), | |
| 52 | + } | |
| 53 | + ) | |
| 54 | + return previews | |
| 32 | 55 | |
| 33 | 56 | |
| 34 | 57 | class RerankRequest(BaseModel): |
| ... | ... | @@ -100,19 +123,25 @@ def rerank(request: RerankRequest) -> RerankResponse: |
| 100 | 123 | |
| 101 | 124 | start_ts = time.time() |
| 102 | 125 | logger.info( |
| 103 | - "Rerank request | docs=%d normalize=%s", | |
| 126 | + "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s", | |
| 104 | 127 | len(request.docs), |
| 105 | 128 | normalize, |
| 129 | + len(query), | |
| 130 | + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), | |
| 131 | + _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), | |
| 106 | 132 | ) |
| 107 | 133 | scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize) |
| 108 | 134 | meta = dict(meta) |
| 109 | 135 | meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)}) |
| 136 | + score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]] | |
| 110 | 137 | logger.info( |
| 111 | - "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s", | |
| 138 | + "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s", | |
| 112 | 139 | meta.get("input_docs"), |
| 113 | 140 | meta.get("unique_docs"), |
| 114 | 141 | meta.get("dedup_ratio"), |
| 115 | 142 | meta.get("service_elapsed_ms"), |
| 143 | + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), | |
| 144 | + score_preview, | |
| 116 | 145 | ) |
| 117 | 146 | |
| 118 | 147 | return RerankResponse(scores=scores, meta=meta) | ... | ... |
| ... | ... | @@ -0,0 +1,56 @@ |
| 1 | +#!/bin/bash | |
| 2 | +# | |
| 3 | +# Route incoming log stream into per-day files. | |
| 4 | +# | |
| 5 | +# Usage: | |
| 6 | +# command 2>&1 | ./scripts/daily_log_router.sh <service> <log_dir> [retention_days] | |
| 7 | +# | |
| 8 | + | |
| 9 | +set -euo pipefail | |
| 10 | + | |
| 11 | +if [ "$#" -lt 2 ]; then | |
| 12 | + echo "Usage: $0 <service> <log_dir> [retention_days]" >&2 | |
| 13 | + exit 1 | |
| 14 | +fi | |
| 15 | + | |
| 16 | +SERVICE_NAME="$1" | |
| 17 | +LOG_DIR="$2" | |
| 18 | +RETENTION_DAYS="${3:-30}" | |
| 19 | + | |
| 20 | +mkdir -p "${LOG_DIR}" | |
| 21 | + | |
| 22 | +awk -v dir="${LOG_DIR}" -v service="${SERVICE_NAME}" -v retention_days="${RETENTION_DAYS}" ' | |
| 23 | +function rotate_file(day) { | |
| 24 | + return sprintf("%s/%s-%s.log", dir, service, day) | |
| 25 | +} | |
| 26 | + | |
| 27 | +function update_symlink(day) { | |
| 28 | + cmd = sprintf("ln -sfn \"%s-%s.log\" \"%s/%s.log\"", service, day, dir, service) | |
| 29 | + system(cmd) | |
| 30 | +} | |
| 31 | + | |
| 32 | +function cleanup_old_logs() { | |
| 33 | + cmd = sprintf("find \"%s\" -maxdepth 1 -type f -name \"%s-*.log\" -mtime +%d -delete >/dev/null 2>&1", dir, service, retention_days) | |
| 34 | + system(cmd) | |
| 35 | +} | |
| 36 | + | |
| 37 | +{ | |
| 38 | + day = strftime("%Y-%m-%d") | |
| 39 | + target = rotate_file(day) | |
| 40 | + | |
| 41 | + if (target != current_target) { | |
| 42 | + update_symlink(day) | |
| 43 | + cleanup_old_logs() | |
| 44 | + current_target = target | |
| 45 | + } | |
| 46 | + | |
| 47 | + print >> current_target | |
| 48 | + fflush(current_target) | |
| 49 | +} | |
| 50 | + | |
| 51 | +END { | |
| 52 | + if (current_target != "") { | |
| 53 | + close(current_target) | |
| 54 | + } | |
| 55 | +} | |
| 56 | +' | ... | ... |
scripts/service_ctl.sh
| ... | ... | @@ -8,6 +8,7 @@ set -euo pipefail |
| 8 | 8 | |
| 9 | 9 | PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 10 | 10 | LOG_DIR="${PROJECT_ROOT}/logs" |
| 11 | +LOG_RETENTION_DAYS="${LOG_RETENTION_DAYS:-30}" | |
| 11 | 12 | |
| 12 | 13 | mkdir -p "${LOG_DIR}" |
| 13 | 14 | |
| ... | ... | @@ -46,6 +47,16 @@ log_file() { |
| 46 | 47 | echo "${LOG_DIR}/${service}.log" |
| 47 | 48 | } |
| 48 | 49 | |
| 50 | +prepare_daily_log_target() { | |
| 51 | + local service="$1" | |
| 52 | + local day | |
| 53 | + local today_file | |
| 54 | + day="$(date +%F)" | |
| 55 | + today_file="${LOG_DIR}/${service}-${day}.log" | |
| 56 | + touch "${today_file}" | |
| 57 | + ln -sfn "$(basename "${today_file}")" "$(log_file "${service}")" | |
| 58 | +} | |
| 59 | + | |
| 49 | 60 | service_start_cmd() { |
| 50 | 61 | local service="$1" |
| 51 | 62 | case "${service}" in |
| ... | ... | @@ -176,6 +187,7 @@ start_one() { |
| 176 | 187 | local pf lf |
| 177 | 188 | pf="$(pid_file "${service}")" |
| 178 | 189 | lf="$(log_file "${service}")" |
| 190 | + prepare_daily_log_target "${service}" | |
| 179 | 191 | |
| 180 | 192 | if [ "${service}" != "tei" ]; then |
| 181 | 193 | if is_running_by_pid "${service}" || is_running_by_port "${service}"; then |
| ... | ... | @@ -203,12 +215,12 @@ start_one() { |
| 203 | 215 | cnclip|tei) |
| 204 | 216 | echo "[start] ${service} (managed by native script)" |
| 205 | 217 | if [ "${service}" = "cnclip" ]; then |
| 206 | - if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1; then | |
| 218 | + if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then | |
| 207 | 219 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 |
| 208 | 220 | return 1 |
| 209 | 221 | fi |
| 210 | 222 | else |
| 211 | - if ! bash -lc "${cmd}" >> "${lf}" 2>&1; then | |
| 223 | + if ! "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then | |
| 212 | 224 | echo "[error] ${service} start script failed, inspect ${lf}" >&2 |
| 213 | 225 | return 1 |
| 214 | 226 | fi |
| ... | ... | @@ -229,7 +241,7 @@ start_one() { |
| 229 | 241 | ;; |
| 230 | 242 | backend|indexer|frontend|embedding|translator) |
| 231 | 243 | echo "[start] ${service}" |
| 232 | - nohup bash -lc "${cmd}" > "${lf}" 2>&1 & | |
| 244 | + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & | |
| 233 | 245 | local pid=$! |
| 234 | 246 | echo "${pid}" > "${pf}" |
| 235 | 247 | local retries=30 |
| ... | ... | @@ -244,7 +256,7 @@ start_one() { |
| 244 | 256 | echo "[start] ${service}" |
| 245 | 257 | # Start reranker directly so pid file points to the script process that |
| 246 | 258 | # will exec uvicorn, avoiding extra shell wrapper lifecycle edge-cases. |
| 247 | - nohup "${cmd}" > "${lf}" 2>&1 & | |
| 259 | + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & | |
| 248 | 260 | local pid=$! |
| 249 | 261 | echo "${pid}" > "${pf}" |
| 250 | 262 | if wait_for_health "${service}" 90; then |
| ... | ... | @@ -408,9 +420,12 @@ Default target set (when no service provided): |
| 408 | 420 | status -> all known services |
| 409 | 421 | |
| 410 | 422 | Optional service startup: |
| 411 | - ./scripts/service_ctl.sh start embedding translator reranker tei cnclip | |
| 423 | + ./scripts/service_ctl.sh start tei cnclip embedding translator reranker | |
| 412 | 424 | TEI_DEVICE=cuda|cpu ./scripts/service_ctl.sh start tei |
| 413 | 425 | CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip |
| 426 | + | |
| 427 | +Log retention: | |
| 428 | + LOG_RETENTION_DAYS=30 ./scripts/service_ctl.sh start | |
| 414 | 429 | EOF |
| 415 | 430 | } |
| 416 | 431 | ... | ... |
scripts/start.sh
| ... | ... | @@ -12,11 +12,11 @@ echo "saas-search 服务启动" |
| 12 | 12 | echo "========================================" |
| 13 | 13 | echo "默认启动核心服务: backend/indexer/frontend" |
| 14 | 14 | echo "可选服务请显式指定:" |
| 15 | -echo " ./scripts/service_ctl.sh start embedding translator reranker tei cnclip" | |
| 15 | +echo " ./scripts/service_ctl.sh start tei cnclip embedding translator reranker" | |
| 16 | 16 | echo |
| 17 | 17 | |
| 18 | 18 | ./scripts/service_ctl.sh start |
| 19 | 19 | |
| 20 | 20 | echo |
| 21 | 21 | echo "当前服务状态:" |
| 22 | -./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker tei cnclip | |
| 22 | +./scripts/service_ctl.sh status backend indexer frontend tei cnclip embedding translator reranker | ... | ... |
scripts/start_cnclip_service.sh
| ... | ... | @@ -43,7 +43,9 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 43 | 43 | CLIP_SERVER_DIR="${PROJECT_ROOT}/third-party/clip-as-service/server" |
| 44 | 44 | LOG_DIR="${PROJECT_ROOT}/logs" |
| 45 | 45 | PID_FILE="${LOG_DIR}/cnclip.pid" |
| 46 | -LOG_FILE="${LOG_DIR}/cnclip.log" | |
| 46 | +LOG_LINK="${LOG_DIR}/cnclip.log" | |
| 47 | +LOG_FILE="${LOG_DIR}/cnclip-$(date +%F).log" | |
| 48 | +LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/daily_log_router.sh" | |
| 47 | 49 | |
| 48 | 50 | # 帮助信息 |
| 49 | 51 | show_help() { |
| ... | ... | @@ -128,6 +130,11 @@ fi |
| 128 | 130 | |
| 129 | 131 | # 创建日志目录 |
| 130 | 132 | mkdir -p "${LOG_DIR}" |
| 133 | +if [ ! -x "${LOG_ROUTER_SCRIPT}" ]; then | |
| 134 | + echo -e "${RED}错误: 日志路由脚本不存在或不可执行: ${LOG_ROUTER_SCRIPT}${NC}" | |
| 135 | + exit 1 | |
| 136 | +fi | |
| 137 | +ln -sfn "$(basename "${LOG_FILE}")" "${LOG_LINK}" | |
| 131 | 138 | |
| 132 | 139 | # 检查是否已经有服务在运行 |
| 133 | 140 | if [ -f "${PID_FILE}" ]; then |
| ... | ... | @@ -208,7 +215,7 @@ echo " 服务端口: ${PORT}" |
| 208 | 215 | echo " 协议: gRPC (默认,官方推荐)" |
| 209 | 216 | echo " 其他参数: 使用官方默认值" |
| 210 | 217 | echo " 副本数: ${REPLICAS}" |
| 211 | -echo " 日志文件: ${LOG_FILE}" | |
| 218 | +echo " 日志文件: ${LOG_LINK}" | |
| 212 | 219 | echo "" |
| 213 | 220 | |
| 214 | 221 | # 副本数显存警告 |
| ... | ... | @@ -263,9 +270,9 @@ EOF |
| 263 | 270 | |
| 264 | 271 | echo -e "${GREEN}✓ 已生成配置文件: ${TEMP_FLOW_FILE}${NC}" |
| 265 | 272 | |
| 266 | -# 使用 nohup 在后台启动服务 | |
| 273 | +# 使用 nohup 在后台启动服务(日志按天分流) | |
| 267 | 274 | cd "${CLIP_SERVER_DIR}" |
| 268 | -nohup python -m clip_server "${TEMP_FLOW_FILE}" > "${LOG_FILE}" 2>&1 & | |
| 275 | +nohup bash -lc "exec python -m clip_server \"${TEMP_FLOW_FILE}\" > >(\"${LOG_ROUTER_SCRIPT}\" \"cnclip\" \"${LOG_DIR}\" \"${LOG_RETENTION_DAYS:-30}\") 2>&1" >/dev/null 2>&1 & | |
| 269 | 276 | |
| 270 | 277 | # 保存 PID |
| 271 | 278 | SERVICE_PID=$! |
| ... | ... | @@ -293,7 +300,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then |
| 293 | 300 | echo -e " r = c.encode(['测试文本'])" |
| 294 | 301 | echo "" |
| 295 | 302 | echo -e "查看日志:" |
| 296 | - echo -e " tail -f ${LOG_FILE}" | |
| 303 | + echo -e " tail -F ${LOG_LINK}" | |
| 297 | 304 | echo "" |
| 298 | 305 | echo -e "停止服务:" |
| 299 | 306 | echo -e " ./scripts/stop_cnclip_service.sh" |
| ... | ... | @@ -303,7 +310,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then |
| 303 | 310 | echo -e "${YELLOW}等待模型加载完成(约30-60秒)...${NC}" |
| 304 | 311 | sleep 30 |
| 305 | 312 | echo -e "${GREEN}✓ 服务已启动,请查看日志确认模型是否加载完成${NC}" |
| 306 | - echo -e "${YELLOW}查看日志: tail -f ${LOG_FILE}${NC}" | |
| 313 | + echo -e "${YELLOW}查看日志: tail -F ${LOG_LINK}${NC}" | |
| 307 | 314 | |
| 308 | 315 | else |
| 309 | 316 | echo -e "${RED}========================================${NC}" |
| ... | ... | @@ -311,7 +318,7 @@ else |
| 311 | 318 | echo -e "${RED}========================================${NC}" |
| 312 | 319 | echo "" |
| 313 | 320 | echo -e "请查看日志获取详细错误信息:" |
| 314 | - echo -e " tail -f ${LOG_FILE}" | |
| 321 | + echo -e " tail -F ${LOG_LINK}" | |
| 315 | 322 | echo "" |
| 316 | 323 | rm -f "${PID_FILE}" |
| 317 | 324 | exit 1 | ... | ... |
search/searcher.py
| ... | ... | @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union |
| 8 | 8 | import os |
| 9 | 9 | import time, json |
| 10 | 10 | import logging |
| 11 | +import hashlib | |
| 11 | 12 | |
| 12 | 13 | from utils.es_client import ESClient |
| 13 | 14 | from query import QueryParser, ParsedQuery |
| ... | ... | @@ -21,6 +22,15 @@ from api.result_formatter import ResultFormatter |
| 21 | 22 | from indexer.mapping_generator import get_tenant_index_name |
| 22 | 23 | |
| 23 | 24 | logger = logging.getLogger(__name__) |
| 25 | +backend_verbose_logger = logging.getLogger("backend.verbose") | |
| 26 | + | |
| 27 | + | |
| 28 | +def _log_backend_verbose(payload: Dict[str, Any]) -> None: | |
| 29 | + if not backend_verbose_logger.handlers: | |
| 30 | + return | |
| 31 | + backend_verbose_logger.info( | |
| 32 | + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) | |
| 33 | + ) | |
| 24 | 34 | |
| 25 | 35 | |
| 26 | 36 | class SearchResult: |
| ... | ... | @@ -320,16 +330,33 @@ class Searcher: |
| 320 | 330 | context.store_intermediate_result('es_query', es_query) |
| 321 | 331 | context.store_intermediate_result('es_body_for_search', body_for_es) |
| 322 | 332 | |
| 323 | - # Serialize ES query as a compact JSON string (no spaces or newlines) | |
| 324 | - es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) | |
| 333 | + # Serialize ES query to compute a compact size + stable digest for correlation | |
| 334 | + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(",", ":")) | |
| 335 | + es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16] | |
| 336 | + knn_enabled = bool(enable_embedding and parsed_query.query_vector is not None) | |
| 337 | + vector_dims = int(len(parsed_query.query_vector)) if parsed_query.query_vector is not None else 0 | |
| 325 | 338 | |
| 326 | 339 | context.logger.info( |
| 327 | - f"ES query built | size: {len(es_query_compact)} chars | " | |
| 328 | - f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " | |
| 329 | - f"facets: {'yes' if facets else 'no'} | " | |
| 330 | - f"query: {es_query_compact}", | |
| 340 | + "ES query built | size: %s chars | digest: %s | KNN: %s | vector_dims: %s | facets: %s", | |
| 341 | + len(es_query_compact), | |
| 342 | + es_query_digest, | |
| 343 | + "yes" if knn_enabled else "no", | |
| 344 | + vector_dims, | |
| 345 | + "yes" if facets else "no", | |
| 331 | 346 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 332 | 347 | ) |
| 348 | + _log_backend_verbose({ | |
| 349 | + "event": "es_query_built", | |
| 350 | + "reqid": context.reqid, | |
| 351 | + "uid": context.uid, | |
| 352 | + "tenant_id": tenant_id, | |
| 353 | + "size_chars": len(es_query_compact), | |
| 354 | + "sha256_16": es_query_digest, | |
| 355 | + "knn_enabled": knn_enabled, | |
| 356 | + "vector_dims": vector_dims, | |
| 357 | + "has_facets": bool(facets), | |
| 358 | + "query": es_query, | |
| 359 | + }) | |
| 333 | 360 | except Exception as e: |
| 334 | 361 | context.set_error(e) |
| 335 | 362 | context.logger.error( | ... | ... |