Blame view

scripts/start_tei_service.sh 5.99 KB
07cf5a93   tangwang   START_EMBEDDING=...
1
2
3
4
5
6
7
8
9
  #!/bin/bash
  #
  # Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
  #
  set -euo pipefail
  
  PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  cd "${PROJECT_ROOT}"
  
7fbca0d7   tangwang   启动脚本优化
10
11
12
13
  # Load .env.
  # shellcheck source=scripts/lib/load_env.sh
  source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
  load_env_file "${PROJECT_ROOT}/.env"
07cf5a93   tangwang   START_EMBEDDING=...
14
15
16
17
18
19
20
21
22
23
  
  if ! command -v docker >/dev/null 2>&1; then
    echo "ERROR: docker is required to run TEI service." >&2
    exit 1
  fi
  
  TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
  TEI_PORT="${TEI_PORT:-8080}"
  TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
  TEI_VERSION="${TEI_VERSION:-1.9}"
efd435cf   tangwang   tei性能调优:
24
25
  TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-4096}"
  TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-24}"
07cf5a93   tangwang   START_EMBEDDING=...
26
27
28
29
  TEI_DTYPE="${TEI_DTYPE:-float16}"
  HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
  TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
  
af7ee060   tangwang   service_ctl 简化为“显...
30
31
32
33
  TEI_DEVICE_RAW="${TEI_DEVICE:-cuda}"
  TEI_DEVICE="$(echo "${TEI_DEVICE_RAW}" | tr '[:upper:]' '[:lower:]')"
  if [[ "${TEI_DEVICE}" != "cuda" && "${TEI_DEVICE}" != "cpu" ]]; then
    echo "ERROR: invalid TEI_DEVICE=${TEI_DEVICE_RAW}. Use cuda/cpu." >&2
07cf5a93   tangwang   START_EMBEDDING=...
34
35
36
    exit 1
  fi
  
efd435cf   tangwang   tei性能调优:
37
38
39
40
41
42
43
44
45
46
47
48
  detect_gpu_tei_image() {
    # Prefer turing image for pre-Ampere GPUs (e.g. Tesla T4, compute capability 7.5).
    local compute_cap major
    compute_cap="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 || true)"
    major="${compute_cap%%.*}"
    if [[ -n "${major}" && "${major}" -lt 8 ]]; then
      echo "ghcr.io/huggingface/text-embeddings-inference:turing-${TEI_VERSION}"
    else
      echo "ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}"
    fi
  }
  
af7ee060   tangwang   service_ctl 简化为“显...
49
  if [[ "${TEI_DEVICE}" == "cuda" ]]; then
07cf5a93   tangwang   START_EMBEDDING=...
50
    if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
af7ee060   tangwang   service_ctl 简化为“显...
51
      echo "ERROR: TEI_DEVICE=cuda but NVIDIA GPU is not available. No CPU fallback." >&2
07cf5a93   tangwang   START_EMBEDDING=...
52
53
54
      exit 1
    fi
    if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then
af7ee060   tangwang   service_ctl 简化为“显...
55
      echo "ERROR: TEI_DEVICE=cuda but Docker nvidia runtime is not configured." >&2
07cf5a93   tangwang   START_EMBEDDING=...
56
57
58
      echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2
      exit 1
    fi
efd435cf   tangwang   tei性能调优:
59
    TEI_IMAGE="${TEI_IMAGE:-$(detect_gpu_tei_image)}"
07cf5a93   tangwang   START_EMBEDDING=...
60
    GPU_ARGS=(--gpus all)
af7ee060   tangwang   service_ctl 简化为“显...
61
    TEI_MODE="cuda"
07cf5a93   tangwang   START_EMBEDDING=...
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  else
    TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}"
    GPU_ARGS=()
    TEI_MODE="cpu"
  fi
  
  mkdir -p "${HF_CACHE_DIR}"
  
  existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
  if [[ -n "${existing_id}" ]]; then
    running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
    if [[ -n "${running_id}" ]]; then
      current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
      device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
efd435cf   tangwang   tei性能调优:
76
77
78
79
      current_is_gpu_image=0
      if [[ "${current_image}" == *":cuda-"* || "${current_image}" == *":turing-"* ]]; then
        current_is_gpu_image=1
      fi
af7ee060   tangwang   service_ctl 简化为“显...
80
      if [[ "${TEI_DEVICE}" == "cuda" ]]; then
efd435cf   tangwang   tei性能调优:
81
        if [[ "${current_is_gpu_image}" -eq 1 ]] && [[ "${device_req}" != "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
af7ee060   tangwang   service_ctl 简化为“显...
82
          echo "TEI already running (CUDA): ${TEI_CONTAINER_NAME}"
efd435cf   tangwang   tei性能调优:
83
          exit 0
07cf5a93   tangwang   START_EMBEDDING=...
84
        fi
efd435cf   tangwang   tei性能调优:
85
86
87
88
        echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
        echo "  current_image=${current_image:-unknown}"
        echo "  target_image=${TEI_IMAGE}"
        docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
07cf5a93   tangwang   START_EMBEDDING=...
89
      else
efd435cf   tangwang   tei性能调优:
90
91
92
        if [[ "${current_is_gpu_image}" -eq 0 ]] && [[ "${device_req}" == "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
          echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
          exit 0
07cf5a93   tangwang   START_EMBEDDING=...
93
        fi
efd435cf   tangwang   tei性能调优:
94
95
96
97
        echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
        echo "  current_image=${current_image:-unknown}"
        echo "  target_image=${TEI_IMAGE}"
        docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
07cf5a93   tangwang   START_EMBEDDING=...
98
      fi
07cf5a93   tangwang   START_EMBEDDING=...
99
    fi
efd435cf   tangwang   tei性能调优:
100
101
102
    if docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$ | grep -q .; then
      docker rm "${TEI_CONTAINER_NAME}" >/dev/null
    fi
07cf5a93   tangwang   START_EMBEDDING=...
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  fi
  
  echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
  echo "Image: ${TEI_IMAGE}"
  echo "Model: ${TEI_MODEL_ID}"
  echo "Port: ${TEI_PORT}"
  echo "Mode: ${TEI_MODE}"
  
  docker run -d \
    --name "${TEI_CONTAINER_NAME}" \
    -p "${TEI_PORT}:80" \
    "${GPU_ARGS[@]}" \
    -v "${HF_CACHE_DIR}:/data" \
    -e HF_TOKEN="${HF_TOKEN:-}" \
    "${TEI_IMAGE}" \
    --model-id "${TEI_MODEL_ID}" \
    --dtype "${TEI_DTYPE}" \
    --max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
    --max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
  
  echo "Waiting for TEI health..."
  for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
    if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
efd435cf   tangwang   tei性能调优:
126
127
      echo "TEI health is ready: http://127.0.0.1:${TEI_PORT}"
      break
07cf5a93   tangwang   START_EMBEDDING=...
128
129
    fi
    sleep 1
efd435cf   tangwang   tei性能调优:
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
    if [[ "${i}" == "${TEI_HEALTH_TIMEOUT_SEC}" ]]; then
      echo "ERROR: TEI failed to become healthy in time." >&2
      docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
      exit 1
    fi
  done
  
  echo "Running TEI output probe..."
  for probe_idx in 1 2; do
    probe_resp="$(curl -sf -X POST "http://127.0.0.1:${TEI_PORT}/embed" \
      -H "Content-Type: application/json" \
      -d '{"inputs":["health check","芭比娃娃 儿童玩具"]}' || true)"
    if [[ -z "${probe_resp}" ]]; then
      echo "ERROR: TEI probe ${probe_idx} failed: empty response" >&2
      docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
      docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
      exit 1
    fi
    # Detect non-finite-like payloads (observed as null/NaN on incompatible CUDA image + GPU).
    if echo "${probe_resp}" | rg -qi '(null|nan|inf)'; then
      echo "ERROR: TEI probe ${probe_idx} detected invalid embedding values (null/NaN/Inf)." >&2
      echo "Response preview: $(echo "${probe_resp}" | head -c 220)" >&2
      docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
      docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
      exit 1
    fi
07cf5a93   tangwang   START_EMBEDDING=...
156
157
  done
  
efd435cf   tangwang   tei性能调优:
158
159
  echo "TEI is ready and output probe passed: http://127.0.0.1:${TEI_PORT}"
  exit 0