Blame view

scripts/start_tei_service.sh 6.72 KB
07cf5a93   tangwang   START_EMBEDDING=...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  #!/bin/bash
  #
  # Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
  #
  set -euo pipefail
  
  PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  cd "${PROJECT_ROOT}"
  
  # Load .env if present
  ENV_FILE="${PROJECT_ROOT}/.env"
  if [ -f "${ENV_FILE}" ]; then
    while IFS= read -r line || [ -n "${line}" ]; do
      line="${line%$'\r'}"
      [[ -z "${line//[[:space:]]/}" ]] && continue
      [[ "${line}" =~ ^[[:space:]]*# ]] && continue
      [[ "${line}" != *=* ]] && continue
  
      key="${line%%=*}"
      value="${line#*=}"
      key="${key#"${key%%[![:space:]]*}"}"
      key="${key%"${key##*[![:space:]]}"}"
      value="${value#"${value%%[![:space:]]*}"}"
  
      if [[ ${#value} -ge 2 ]]; then
        first="${value:0:1}"
        last="${value: -1}"
        if [[ ("${first}" == '"' && "${last}" == '"') || ("${first}" == "'" && "${last}" == "'") ]]; then
          value="${value:1:${#value}-2}"
        fi
      fi
  
      export "${key}=${value}"
    done < "${ENV_FILE}"
  fi
  
  if ! command -v docker >/dev/null 2>&1; then
    echo "ERROR: docker is required to run TEI service." >&2
    exit 1
  fi
  
  TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
  TEI_PORT="${TEI_PORT:-8080}"
  TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
  TEI_VERSION="${TEI_VERSION:-1.9}"
efd435cf   tangwang   tei性能调优:
46
47
  TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-4096}"
  TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-24}"
07cf5a93   tangwang   START_EMBEDDING=...
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  TEI_DTYPE="${TEI_DTYPE:-float16}"
  HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
  TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
  
  USE_GPU_RAW="${TEI_USE_GPU:-1}"
  USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')"
  if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then
    USE_GPU="1"
  elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then
    USE_GPU="0"
  else
    echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2
    exit 1
  fi
  
efd435cf   tangwang   tei性能调优:
63
64
65
66
67
68
69
70
71
72
73
74
  detect_gpu_tei_image() {
    # Prefer turing image for pre-Ampere GPUs (e.g. Tesla T4, compute capability 7.5).
    local compute_cap major
    compute_cap="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 || true)"
    major="${compute_cap%%.*}"
    if [[ -n "${major}" && "${major}" -lt 8 ]]; then
      echo "ghcr.io/huggingface/text-embeddings-inference:turing-${TEI_VERSION}"
    else
      echo "ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}"
    fi
  }
  
07cf5a93   tangwang   START_EMBEDDING=...
75
76
77
78
79
80
81
82
83
84
  if [[ "${USE_GPU}" == "1" ]]; then
    if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
      echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2
      exit 1
    fi
    if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then
      echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2
      echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2
      exit 1
    fi
efd435cf   tangwang   tei性能调优:
85
    TEI_IMAGE="${TEI_IMAGE:-$(detect_gpu_tei_image)}"
07cf5a93   tangwang   START_EMBEDDING=...
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    GPU_ARGS=(--gpus all)
    TEI_MODE="gpu"
  else
    TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}"
    GPU_ARGS=()
    TEI_MODE="cpu"
  fi
  
  mkdir -p "${HF_CACHE_DIR}"
  
  existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
  if [[ -n "${existing_id}" ]]; then
    running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
    if [[ -n "${running_id}" ]]; then
      current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
      device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
efd435cf   tangwang   tei性能调优:
102
103
104
105
      current_is_gpu_image=0
      if [[ "${current_image}" == *":cuda-"* || "${current_image}" == *":turing-"* ]]; then
        current_is_gpu_image=1
      fi
07cf5a93   tangwang   START_EMBEDDING=...
106
      if [[ "${USE_GPU}" == "1" ]]; then
efd435cf   tangwang   tei性能调优:
107
108
109
        if [[ "${current_is_gpu_image}" -eq 1 ]] && [[ "${device_req}" != "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
          echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}"
          exit 0
07cf5a93   tangwang   START_EMBEDDING=...
110
        fi
efd435cf   tangwang   tei性能调优:
111
112
113
114
        echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
        echo "  current_image=${current_image:-unknown}"
        echo "  target_image=${TEI_IMAGE}"
        docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
07cf5a93   tangwang   START_EMBEDDING=...
115
      else
efd435cf   tangwang   tei性能调优:
116
117
118
        if [[ "${current_is_gpu_image}" -eq 0 ]] && [[ "${device_req}" == "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
          echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
          exit 0
07cf5a93   tangwang   START_EMBEDDING=...
119
        fi
efd435cf   tangwang   tei性能调优:
120
121
122
123
        echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
        echo "  current_image=${current_image:-unknown}"
        echo "  target_image=${TEI_IMAGE}"
        docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
07cf5a93   tangwang   START_EMBEDDING=...
124
      fi
07cf5a93   tangwang   START_EMBEDDING=...
125
    fi
efd435cf   tangwang   tei性能调优:
126
127
128
    if docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$ | grep -q .; then
      docker rm "${TEI_CONTAINER_NAME}" >/dev/null
    fi
07cf5a93   tangwang   START_EMBEDDING=...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  fi
  
  echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
  echo "Image: ${TEI_IMAGE}"
  echo "Model: ${TEI_MODEL_ID}"
  echo "Port: ${TEI_PORT}"
  echo "Mode: ${TEI_MODE}"
  
  docker run -d \
    --name "${TEI_CONTAINER_NAME}" \
    -p "${TEI_PORT}:80" \
    "${GPU_ARGS[@]}" \
    -v "${HF_CACHE_DIR}:/data" \
    -e HF_TOKEN="${HF_TOKEN:-}" \
    "${TEI_IMAGE}" \
    --model-id "${TEI_MODEL_ID}" \
    --dtype "${TEI_DTYPE}" \
    --max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
    --max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
  
  echo "Waiting for TEI health..."
  for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
    if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
efd435cf   tangwang   tei性能调优:
152
153
      echo "TEI health is ready: http://127.0.0.1:${TEI_PORT}"
      break
07cf5a93   tangwang   START_EMBEDDING=...
154
155
    fi
    sleep 1
efd435cf   tangwang   tei性能调优:
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    if [[ "${i}" == "${TEI_HEALTH_TIMEOUT_SEC}" ]]; then
      echo "ERROR: TEI failed to become healthy in time." >&2
      docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
      exit 1
    fi
  done
  
  echo "Running TEI output probe..."
  for probe_idx in 1 2; do
    probe_resp="$(curl -sf -X POST "http://127.0.0.1:${TEI_PORT}/embed" \
      -H "Content-Type: application/json" \
      -d '{"inputs":["health check","芭比娃娃 儿童玩具"]}' || true)"
    if [[ -z "${probe_resp}" ]]; then
      echo "ERROR: TEI probe ${probe_idx} failed: empty response" >&2
      docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
      docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
      exit 1
    fi
    # Detect non-finite-like payloads (observed as null/NaN on incompatible CUDA image + GPU).
    if echo "${probe_resp}" | rg -qi '(null|nan|inf)'; then
      echo "ERROR: TEI probe ${probe_idx} detected invalid embedding values (null/NaN/Inf)." >&2
      echo "Response preview: $(echo "${probe_resp}" | head -c 220)" >&2
      docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
      docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
      exit 1
    fi
07cf5a93   tangwang   START_EMBEDDING=...
182
183
  done
  
efd435cf   tangwang   tei性能调优:
184
185
  echo "TEI is ready and output probe passed: http://127.0.0.1:${TEI_PORT}"
  exit 0