From 28e57bb16712929459a9d1eb43c2800a94ef2d8b Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 11 Mar 2026 23:04:17 +0800 Subject: [PATCH] 日志体系优化 --- README.md | 2 +- api/app.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- api/routes/search.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ docs/QUICKSTART.md | 15 +++++---------- docs/TEI_SERVICE说明文档.md | 2 +- docs/Usage-Guide.md | 30 +++++++++++------------------- embeddings/server.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- reranker/server.py | 33 +++++++++++++++++++++++++++++++-- scripts/daily_log_router.sh | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/service_ctl.sh | 25 ++++++++++++++++++++----- scripts/start.sh | 4 ++-- scripts/start_cnclip_service.sh | 21 ++++++++++++++------- search/searcher.py | 39 +++++++++++++++++++++++++++++++++------ 13 files changed, 337 insertions(+), 78 deletions(-) create mode 100755 scripts/daily_log_router.sh diff --git a/README.md b/README.md index d63ce8c..6284c40 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ source activate.sh ./run.sh # 可选:附加能力服务(按需开启) -./scripts/service_ctl.sh start embedding translator reranker tei cnclip +./scripts/service_ctl.sh start tei cnclip embedding translator reranker # 查看状态 ./scripts/service_ctl.sh status diff --git a/api/app.py b/api/app.py index c11fc30..da821a9 100644 --- a/api/app.py +++ b/api/app.py @@ -11,6 +11,7 @@ import logging import time import argparse import uvicorn +from logging.handlers import TimedRotatingFileHandler from collections import defaultdict, deque from typing import Optional from fastapi import FastAPI, Request, HTTPException @@ -23,18 +24,60 @@ from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi.util import get_remote_address from slowapi.errors import RateLimitExceeded -# Configure logging with better formatting +# Configure backend logging import pathlib -log_dir = pathlib.Path('logs') -log_dir.mkdir(exist_ok=True) -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[ - logging.StreamHandler(), - logging.FileHandler(log_dir / 'api.log', mode='a', encoding='utf-8') - ] -) + + +def configure_backend_logging() -> None: + log_dir = pathlib.Path("logs") + log_dir.mkdir(exist_ok=True) + log_level = os.getenv("LOG_LEVEL", "INFO").upper() + numeric_level = getattr(logging, log_level, logging.INFO) + + default_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + + root_logger = logging.getLogger() + root_logger.setLevel(numeric_level) + root_logger.handlers.clear() + + console_handler = logging.StreamHandler() + console_handler.setLevel(numeric_level) + console_handler.setFormatter(default_formatter) + root_logger.addHandler(console_handler) + + backend_handler = TimedRotatingFileHandler( + filename=log_dir / "api.log", + when="midnight", + interval=1, + backupCount=30, + encoding="utf-8", + ) + backend_handler.setLevel(numeric_level) + backend_handler.setFormatter(default_formatter) + root_logger.addHandler(backend_handler) + + verbose_logger = logging.getLogger("backend.verbose") + verbose_logger.setLevel(numeric_level) + verbose_logger.handlers.clear() + verbose_logger.propagate = False + + verbose_handler = TimedRotatingFileHandler( + filename=log_dir / "backend_verbose.log", + when="midnight", + interval=1, + backupCount=30, + encoding="utf-8", + ) + verbose_handler.setLevel(numeric_level) + verbose_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + verbose_logger.addHandler(verbose_handler) + + +configure_backend_logging() logger = logging.getLogger(__name__) # Initialize rate limiter diff --git a/api/routes/search.py b/api/routes/search.py index 5a9401f..72e91b8 100644 --- a/api/routes/search.py +++ b/api/routes/search.py @@ -5,6 +5,9 @@ Search API routes. from fastapi import APIRouter, HTTPException, Query, Request from typing import Optional import uuid +import hashlib +import json +import logging from ..models import ( SearchRequest, @@ -17,6 +20,15 @@ from ..models import ( from context.request_context import create_request_context, set_current_request_context, clear_current_request_context router = APIRouter(prefix="/search", tags=["search"]) +backend_verbose_logger = logging.getLogger("backend.verbose") + + +def _log_backend_verbose(payload: dict) -> None: + if not backend_verbose_logger.handlers: + return + backend_verbose_logger.info( + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + ) def extract_request_info(request: Request) -> tuple[str, str]: @@ -136,15 +148,32 @@ async def search(request: SearchRequest, http_request: Request): debug_info=result.debug_info ) - # Log complete response JSON + response_payload = response.model_dump(mode="json") + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] + max_score = float(response.max_score or 0.0) + context.logger.info( - "Search response | " - f"Total results: {response.total} | " - f"Max score: {response.max_score:.4f} | " - f"Time: {response.took_ms}ms | " - f"Response: {response.model_dump_json()}", + "Search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", + response.total, + max_score, + response.took_ms, + len(response_json), + response_digest, extra={'reqid': context.reqid, 'uid': context.uid} ) + _log_backend_verbose({ + "event": "search_response", + "reqid": context.reqid, + "uid": context.uid, + "tenant_id": tenant_id, + "total_results": response.total, + "max_score": max_score, + "took_ms": response.took_ms, + "payload_size_chars": len(response_json), + "sha256_16": response_digest, + "response": response_payload, + }) return response @@ -233,15 +262,32 @@ async def search_by_image(request: ImageSearchRequest, http_request: Request): performance_info=performance_summary ) - # Log complete response JSON + response_payload = response.model_dump(mode="json") + response_json = json.dumps(response_payload, ensure_ascii=False, separators=(",", ":")) + response_digest = hashlib.sha256(response_json.encode("utf-8")).hexdigest()[:16] + max_score = float(response.max_score or 0.0) + context.logger.info( - "Image search response | " - f"Total results: {response.total} | " - f"Max score: {response.max_score:.4f} | " - f"Time: {response.took_ms}ms | " - f"Response: {response.model_dump_json()}", + "Image search response | Total results: %s | Max score: %.4f | Time: %sms | payload_size: %s chars | digest: %s", + response.total, + max_score, + response.took_ms, + len(response_json), + response_digest, extra={'reqid': context.reqid, 'uid': context.uid} ) + _log_backend_verbose({ + "event": "image_search_response", + "reqid": context.reqid, + "uid": context.uid, + "tenant_id": tenant_id, + "total_results": response.total, + "max_score": max_score, + "took_ms": response.took_ms, + "payload_size_chars": len(response_json), + "sha256_16": response_digest, + "response": response_payload, + }) return response diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 7d2d603..9f8e74c 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -510,16 +510,11 @@ curl http://localhost:6007/health ### 5.2 常看日志 -- `logs/backend.log` -- `logs/indexer.log` -- `logs/frontend.log` -- `logs/embedding.log` -- `logs/translator.log` -- `logs/reranker.log` -- `logs/tei.log` -- `logs/cnclip.log` -- `logs/search_engine.log` -- `logs/errors.log` +- `logs/-YYYY-MM-DD.log`(`service_ctl.sh` 按天写入的真实文件) +- `logs/.log`(指向当天文件的软链,推荐 `tail -F`) +- `logs/api.log`(backend 进程内日志,按天轮转) +- `logs/backend_verbose.log`(backend 大对象详细日志,按天轮转) +- `logs/indexer.log`(索引结构化 JSON 日志,按天轮转) ### 5.3 常用排障命令 diff --git a/docs/TEI_SERVICE说明文档.md b/docs/TEI_SERVICE说明文档.md index b20d3ef..f825b5f 100644 --- a/docs/TEI_SERVICE说明文档.md +++ b/docs/TEI_SERVICE说明文档.md @@ -152,7 +152,7 @@ curl -sS -X POST "http://127.0.0.1:6005/embed/text" \ 启动全套(含 TEI): ```bash -TEI_DEVICE=cuda ./scripts/service_ctl.sh start embedding translator reranker tei cnclip +TEI_DEVICE=cuda ./scripts/service_ctl.sh start tei cnclip embedding translator reranker ``` 仅启动 TEI: diff --git a/docs/Usage-Guide.md b/docs/Usage-Guide.md index 9cd4786..3837ee9 100644 --- a/docs/Usage-Guide.md +++ b/docs/Usage-Guide.md @@ -368,31 +368,23 @@ RERANKER_PORT=6007 日志文件存储在 `logs/` 目录下: -- `logs/backend.log` - 后端服务日志 -- `logs/indexer.log` - 索引服务日志 -- `logs/frontend.log` - 前端服务日志 -- `logs/embedding.log` - 向量服务日志(可选) -- `logs/translator.log` - 翻译服务日志(可选) -- `logs/reranker.log` - 重排服务日志(可选) -- `logs/tei.log` - TEI 启停日志(可选) -- `logs/cnclip.log` - CN-CLIP 启停日志(可选) -- `logs/search_engine.log` - 应用主日志(按天轮转) -- `logs/errors.log` - 错误日志(按天轮转) +- `logs/-YYYY-MM-DD.log` - `service_ctl.sh` 统一管理的按天日志文件(真实文件) +- `logs/.log` - 指向当天日志文件的软链(兼容原有命令) +- `logs/api.log` - backend 进程内日志(按天轮转) +- `logs/backend_verbose.log` - backend 详细大对象日志(按天轮转,含 `Search response` / `ES query built` 完整内容) +- `logs/indexer.log` - 索引结构化日志(JSON,按天轮转) ### 查看实时日志 ```bash -# 查看后端日志 -tail -f logs/backend.log +# 查看后端日志(建议 -F,跨天自动跟随新文件) +tail -F logs/backend.log # 查看前端日志 -tail -f logs/frontend.log +tail -F logs/frontend.log -# 查看应用主日志 -tail -f logs/search_engine.log - -# 查看错误日志 -tail -f logs/errors.log +# 查看 backend 详细大对象日志 +tail -F logs/backend_verbose.log ``` ### 日志级别 @@ -406,7 +398,7 @@ LOG_LEVEL=DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL ### 日志轮转 -日志文件按天自动轮转,保留30天的历史日志。 +日志文件按天自动轮转,默认保留 30 天历史日志(可通过 `LOG_RETENTION_DAYS` 调整 `service_ctl.sh` 管理日志的保留天数)。 --- diff --git a/embeddings/server.py b/embeddings/server.py index ebcaec8..4aafcd8 100644 --- a/embeddings/server.py +++ b/embeddings/server.py @@ -56,6 +56,29 @@ _TEXT_MICROBATCH_WINDOW_SEC = max( _TEXT_REQUEST_TIMEOUT_SEC = max( 1.0, float(os.getenv("TEXT_REQUEST_TIMEOUT_SEC", "30")) ) +_LOG_PREVIEW_COUNT = max(1, int(os.getenv("EMBEDDING_LOG_PREVIEW_COUNT", "3"))) +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_TEXT_PREVIEW_CHARS", "120"))) +_LOG_IMAGE_PREVIEW_CHARS = max(32, int(os.getenv("EMBEDDING_LOG_IMAGE_PREVIEW_CHARS", "180"))) + + +def _compact_preview(text: str, max_chars: int) -> str: + compact = " ".join((text or "").split()) + if len(compact) <= max_chars: + return compact + return compact[:max_chars] + "..." + + +def _preview_inputs(items: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: + previews: List[Dict[str, Any]] = [] + for idx, item in enumerate(items[:max_items]): + previews.append( + { + "idx": idx, + "len": len(item), + "preview": _compact_preview(item, max_chars), + } + ) + return previews def _encode_local_st(texts: List[str], normalize_embeddings: bool) -> Any: @@ -292,6 +315,14 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio raise HTTPException(status_code=400, detail=f"Invalid text at index {i}: empty string") normalized.append(s) + logger.info( + "embed_text request | backend=%s inputs=%d normalize=%s preview=%s", + _text_backend_name, + len(normalized), + effective_normalize, + _preview_inputs(normalized, _LOG_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), + ) + t0 = time.perf_counter() try: # local_st backend uses in-process torch model, keep serialized encode for safety; @@ -301,10 +332,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio out = [_encode_single_text_with_microbatch(normalized[0], normalize=effective_normalize)] elapsed_ms = (time.perf_counter() - t0) * 1000.0 logger.info( - "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s elapsed_ms=%.2f", + "embed_text done | backend=%s mode=microbatch-single inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", _text_backend_name, len(normalized), effective_normalize, + len(out[0]) if out and out[0] is not None else 0, elapsed_ms, ) return out @@ -335,10 +367,11 @@ def embed_text(texts: List[str], normalize: Optional[bool] = None) -> List[Optio out.append(vec) elapsed_ms = (time.perf_counter() - t0) * 1000.0 logger.info( - "embed_text done | backend=%s inputs=%d normalize=%s elapsed_ms=%.2f", + "embed_text done | backend=%s inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", _text_backend_name, len(normalized), effective_normalize, + len(out[0]) if out and out[0] is not None else 0, elapsed_ms, ) return out @@ -358,6 +391,14 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt raise HTTPException(status_code=400, detail=f"Invalid image at index {i}: empty URL/path") urls.append(s) + logger.info( + "embed_image request | inputs=%d normalize=%s preview=%s", + len(urls), + effective_normalize, + _preview_inputs(urls, _LOG_PREVIEW_COUNT, _LOG_IMAGE_PREVIEW_CHARS), + ) + + t0 = time.perf_counter() with _image_encode_lock: vectors = _image_model.encode_image_urls( urls, @@ -375,4 +416,12 @@ def embed_image(images: List[str], normalize: Optional[bool] = None) -> List[Opt if out_vec is None: raise RuntimeError(f"Image model returned empty embedding for index {i}") out.append(out_vec) + elapsed_ms = (time.perf_counter() - t0) * 1000.0 + logger.info( + "embed_image done | inputs=%d normalize=%s dim=%d elapsed_ms=%.2f", + len(urls), + effective_normalize, + len(out[0]) if out and out[0] is not None else 0, + elapsed_ms, + ) return out diff --git a/reranker/server.py b/reranker/server.py index 39f2baa..2c46cc3 100644 --- a/reranker/server.py +++ b/reranker/server.py @@ -9,6 +9,7 @@ Backend selected via config: services.rerank.backend (bge | qwen3_vllm), env RER """ import logging +import os import time from typing import Any, Dict, List, Optional @@ -29,6 +30,28 @@ app = FastAPI(title="saas-search Reranker Service", version="1.0.0") _reranker: Optional[RerankBackendProtocol] = None _backend_name: str = "" +_LOG_DOC_PREVIEW_COUNT = max(1, int(os.getenv("RERANK_LOG_DOC_PREVIEW_COUNT", "3"))) +_LOG_TEXT_PREVIEW_CHARS = max(32, int(os.getenv("RERANK_LOG_TEXT_PREVIEW_CHARS", "180"))) + + +def _compact_preview(text: str, max_chars: int) -> str: + compact = " ".join((text or "").split()) + if len(compact) <= max_chars: + return compact + return compact[:max_chars] + "..." + + +def _preview_docs(docs: List[str], max_items: int, max_chars: int) -> List[Dict[str, Any]]: + previews: List[Dict[str, Any]] = [] + for idx, doc in enumerate(docs[:max_items]): + previews.append( + { + "idx": idx, + "len": len(doc), + "preview": _compact_preview(doc, max_chars), + } + ) + return previews class RerankRequest(BaseModel): @@ -100,19 +123,25 @@ def rerank(request: RerankRequest) -> RerankResponse: start_ts = time.time() logger.info( - "Rerank request | docs=%d normalize=%s", + "Rerank request | docs=%d normalize=%s query_len=%d query=%r doc_preview=%s", len(request.docs), normalize, + len(query), + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), + _preview_docs(request.docs, _LOG_DOC_PREVIEW_COUNT, _LOG_TEXT_PREVIEW_CHARS), ) scores, meta = _reranker.score_with_meta(query, request.docs, normalize=normalize) meta = dict(meta) meta.update({"service_elapsed_ms": round((time.time() - start_ts) * 1000.0, 3)}) + score_preview = [round(float(s), 6) for s in scores[:_LOG_DOC_PREVIEW_COUNT]] logger.info( - "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s", + "Rerank done | docs=%d unique=%s dedup=%s elapsed_ms=%s query=%r score_preview=%s", meta.get("input_docs"), meta.get("unique_docs"), meta.get("dedup_ratio"), meta.get("service_elapsed_ms"), + _compact_preview(query, _LOG_TEXT_PREVIEW_CHARS), + score_preview, ) return RerankResponse(scores=scores, meta=meta) diff --git a/scripts/daily_log_router.sh b/scripts/daily_log_router.sh new file mode 100755 index 0000000..5c9ddda --- /dev/null +++ b/scripts/daily_log_router.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# +# Route incoming log stream into per-day files. +# +# Usage: +# command 2>&1 | ./scripts/daily_log_router.sh [retention_days] +# + +set -euo pipefail + +if [ "$#" -lt 2 ]; then + echo "Usage: $0 [retention_days]" >&2 + exit 1 +fi + +SERVICE_NAME="$1" +LOG_DIR="$2" +RETENTION_DAYS="${3:-30}" + +mkdir -p "${LOG_DIR}" + +awk -v dir="${LOG_DIR}" -v service="${SERVICE_NAME}" -v retention_days="${RETENTION_DAYS}" ' +function rotate_file(day) { + return sprintf("%s/%s-%s.log", dir, service, day) +} + +function update_symlink(day) { + cmd = sprintf("ln -sfn \"%s-%s.log\" \"%s/%s.log\"", service, day, dir, service) + system(cmd) +} + +function cleanup_old_logs() { + cmd = sprintf("find \"%s\" -maxdepth 1 -type f -name \"%s-*.log\" -mtime +%d -delete >/dev/null 2>&1", dir, service, retention_days) + system(cmd) +} + +{ + day = strftime("%Y-%m-%d") + target = rotate_file(day) + + if (target != current_target) { + update_symlink(day) + cleanup_old_logs() + current_target = target + } + + print >> current_target + fflush(current_target) +} + +END { + if (current_target != "") { + close(current_target) + } +} +' diff --git a/scripts/service_ctl.sh b/scripts/service_ctl.sh index a018cb9..3dc6c20 100755 --- a/scripts/service_ctl.sh +++ b/scripts/service_ctl.sh @@ -8,6 +8,7 @@ set -euo pipefail PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" LOG_DIR="${PROJECT_ROOT}/logs" +LOG_RETENTION_DAYS="${LOG_RETENTION_DAYS:-30}" mkdir -p "${LOG_DIR}" @@ -46,6 +47,16 @@ log_file() { echo "${LOG_DIR}/${service}.log" } +prepare_daily_log_target() { + local service="$1" + local day + local today_file + day="$(date +%F)" + today_file="${LOG_DIR}/${service}-${day}.log" + touch "${today_file}" + ln -sfn "$(basename "${today_file}")" "$(log_file "${service}")" +} + service_start_cmd() { local service="$1" case "${service}" in @@ -176,6 +187,7 @@ start_one() { local pf lf pf="$(pid_file "${service}")" lf="$(log_file "${service}")" + prepare_daily_log_target "${service}" if [ "${service}" != "tei" ]; then if is_running_by_pid "${service}" || is_running_by_port "${service}"; then @@ -203,12 +215,12 @@ start_one() { cnclip|tei) echo "[start] ${service} (managed by native script)" if [ "${service}" = "cnclip" ]; then - if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" bash -lc "${cmd}" >> "${lf}" 2>&1; then + if ! CNCLIP_DEVICE="${CNCLIP_DEVICE:-cuda}" "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then echo "[error] ${service} start script failed, inspect ${lf}" >&2 return 1 fi else - if ! bash -lc "${cmd}" >> "${lf}" 2>&1; then + if ! "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1; then echo "[error] ${service} start script failed, inspect ${lf}" >&2 return 1 fi @@ -229,7 +241,7 @@ start_one() { ;; backend|indexer|frontend|embedding|translator) echo "[start] ${service}" - nohup bash -lc "${cmd}" > "${lf}" 2>&1 & + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & local pid=$! echo "${pid}" > "${pf}" local retries=30 @@ -244,7 +256,7 @@ start_one() { echo "[start] ${service}" # Start reranker directly so pid file points to the script process that # will exec uvicorn, avoiding extra shell wrapper lifecycle edge-cases. - nohup "${cmd}" > "${lf}" 2>&1 & + nohup "${cmd}" > >("${PROJECT_ROOT}/scripts/daily_log_router.sh" "${service}" "${LOG_DIR}" "${LOG_RETENTION_DAYS}") 2>&1 & local pid=$! echo "${pid}" > "${pf}" if wait_for_health "${service}" 90; then @@ -408,9 +420,12 @@ Default target set (when no service provided): status -> all known services Optional service startup: - ./scripts/service_ctl.sh start embedding translator reranker tei cnclip + ./scripts/service_ctl.sh start tei cnclip embedding translator reranker TEI_DEVICE=cuda|cpu ./scripts/service_ctl.sh start tei CNCLIP_DEVICE=cuda|cpu ./scripts/service_ctl.sh start cnclip + +Log retention: + LOG_RETENTION_DAYS=30 ./scripts/service_ctl.sh start EOF } diff --git a/scripts/start.sh b/scripts/start.sh index 5c30615..2a1b81d 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -12,11 +12,11 @@ echo "saas-search 服务启动" echo "========================================" echo "默认启动核心服务: backend/indexer/frontend" echo "可选服务请显式指定:" -echo " ./scripts/service_ctl.sh start embedding translator reranker tei cnclip" +echo " ./scripts/service_ctl.sh start tei cnclip embedding translator reranker" echo ./scripts/service_ctl.sh start echo echo "当前服务状态:" -./scripts/service_ctl.sh status backend indexer frontend embedding translator reranker tei cnclip +./scripts/service_ctl.sh status backend indexer frontend tei cnclip embedding translator reranker diff --git a/scripts/start_cnclip_service.sh b/scripts/start_cnclip_service.sh index 3069b28..c3823cb 100755 --- a/scripts/start_cnclip_service.sh +++ b/scripts/start_cnclip_service.sh @@ -43,7 +43,9 @@ PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" CLIP_SERVER_DIR="${PROJECT_ROOT}/third-party/clip-as-service/server" LOG_DIR="${PROJECT_ROOT}/logs" PID_FILE="${LOG_DIR}/cnclip.pid" -LOG_FILE="${LOG_DIR}/cnclip.log" +LOG_LINK="${LOG_DIR}/cnclip.log" +LOG_FILE="${LOG_DIR}/cnclip-$(date +%F).log" +LOG_ROUTER_SCRIPT="${PROJECT_ROOT}/scripts/daily_log_router.sh" # 帮助信息 show_help() { @@ -128,6 +130,11 @@ fi # 创建日志目录 mkdir -p "${LOG_DIR}" +if [ ! -x "${LOG_ROUTER_SCRIPT}" ]; then + echo -e "${RED}错误: 日志路由脚本不存在或不可执行: ${LOG_ROUTER_SCRIPT}${NC}" + exit 1 +fi +ln -sfn "$(basename "${LOG_FILE}")" "${LOG_LINK}" # 检查是否已经有服务在运行 if [ -f "${PID_FILE}" ]; then @@ -208,7 +215,7 @@ echo " 服务端口: ${PORT}" echo " 协议: gRPC (默认,官方推荐)" echo " 其他参数: 使用官方默认值" echo " 副本数: ${REPLICAS}" -echo " 日志文件: ${LOG_FILE}" +echo " 日志文件: ${LOG_LINK}" echo "" # 副本数显存警告 @@ -263,9 +270,9 @@ EOF echo -e "${GREEN}✓ 已生成配置文件: ${TEMP_FLOW_FILE}${NC}" -# 使用 nohup 在后台启动服务 +# 使用 nohup 在后台启动服务(日志按天分流) cd "${CLIP_SERVER_DIR}" -nohup python -m clip_server "${TEMP_FLOW_FILE}" > "${LOG_FILE}" 2>&1 & +nohup bash -lc "exec python -m clip_server \"${TEMP_FLOW_FILE}\" > >(\"${LOG_ROUTER_SCRIPT}\" \"cnclip\" \"${LOG_DIR}\" \"${LOG_RETENTION_DAYS:-30}\") 2>&1" >/dev/null 2>&1 & # 保存 PID SERVICE_PID=$! @@ -293,7 +300,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then echo -e " r = c.encode(['测试文本'])" echo "" echo -e "查看日志:" - echo -e " tail -f ${LOG_FILE}" + echo -e " tail -F ${LOG_LINK}" echo "" echo -e "停止服务:" echo -e " ./scripts/stop_cnclip_service.sh" @@ -303,7 +310,7 @@ if ps -p ${SERVICE_PID} > /dev/null 2>&1; then echo -e "${YELLOW}等待模型加载完成(约30-60秒)...${NC}" sleep 30 echo -e "${GREEN}✓ 服务已启动,请查看日志确认模型是否加载完成${NC}" - echo -e "${YELLOW}查看日志: tail -f ${LOG_FILE}${NC}" + echo -e "${YELLOW}查看日志: tail -F ${LOG_LINK}${NC}" else echo -e "${RED}========================================${NC}" @@ -311,7 +318,7 @@ else echo -e "${RED}========================================${NC}" echo "" echo -e "请查看日志获取详细错误信息:" - echo -e " tail -f ${LOG_FILE}" + echo -e " tail -F ${LOG_LINK}" echo "" rm -f "${PID_FILE}" exit 1 diff --git a/search/searcher.py b/search/searcher.py index 1d7a1ac..378e0a7 100644 --- a/search/searcher.py +++ b/search/searcher.py @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union import os import time, json import logging +import hashlib from utils.es_client import ESClient from query import QueryParser, ParsedQuery @@ -21,6 +22,15 @@ from api.result_formatter import ResultFormatter from indexer.mapping_generator import get_tenant_index_name logger = logging.getLogger(__name__) +backend_verbose_logger = logging.getLogger("backend.verbose") + + +def _log_backend_verbose(payload: Dict[str, Any]) -> None: + if not backend_verbose_logger.handlers: + return + backend_verbose_logger.info( + json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + ) class SearchResult: @@ -320,16 +330,33 @@ class Searcher: context.store_intermediate_result('es_query', es_query) context.store_intermediate_result('es_body_for_search', body_for_es) - # Serialize ES query as a compact JSON string (no spaces or newlines) - es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(',', ':')) + # Serialize ES query to compute a compact size + stable digest for correlation + es_query_compact = json.dumps(es_query, ensure_ascii=False, separators=(",", ":")) + es_query_digest = hashlib.sha256(es_query_compact.encode("utf-8")).hexdigest()[:16] + knn_enabled = bool(enable_embedding and parsed_query.query_vector is not None) + vector_dims = int(len(parsed_query.query_vector)) if parsed_query.query_vector is not None else 0 context.logger.info( - f"ES query built | size: {len(es_query_compact)} chars | " - f"KNN: {'yes' if enable_embedding and parsed_query.query_vector is not None else 'no'} | " - f"facets: {'yes' if facets else 'no'} | " - f"query: {es_query_compact}", + "ES query built | size: %s chars | digest: %s | KNN: %s | vector_dims: %s | facets: %s", + len(es_query_compact), + es_query_digest, + "yes" if knn_enabled else "no", + vector_dims, + "yes" if facets else "no", extra={'reqid': context.reqid, 'uid': context.uid} ) + _log_backend_verbose({ + "event": "es_query_built", + "reqid": context.reqid, + "uid": context.uid, + "tenant_id": tenant_id, + "size_chars": len(es_query_compact), + "sha256_16": es_query_digest, + "knn_enabled": knn_enabled, + "vector_dims": vector_dims, + "has_facets": bool(facets), + "query": es_query, + }) except Exception as e: context.set_error(e) context.logger.error( -- libgit2 0.21.2