Blame view

scripts/start_tei_service.sh 4.91 KB
07cf5a93   tangwang   START_EMBEDDING=...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
  #!/bin/bash
  #
  # Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
  #
  set -euo pipefail
  
  PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  cd "${PROJECT_ROOT}"
  
  # Load .env if present
  ENV_FILE="${PROJECT_ROOT}/.env"
  if [ -f "${ENV_FILE}" ]; then
    while IFS= read -r line || [ -n "${line}" ]; do
      line="${line%$'\r'}"
      [[ -z "${line//[[:space:]]/}" ]] && continue
      [[ "${line}" =~ ^[[:space:]]*# ]] && continue
      [[ "${line}" != *=* ]] && continue
  
      key="${line%%=*}"
      value="${line#*=}"
      key="${key#"${key%%[![:space:]]*}"}"
      key="${key%"${key##*[![:space:]]}"}"
      value="${value#"${value%%[![:space:]]*}"}"
  
      if [[ ${#value} -ge 2 ]]; then
        first="${value:0:1}"
        last="${value: -1}"
        if [[ ("${first}" == '"' && "${last}" == '"') || ("${first}" == "'" && "${last}" == "'") ]]; then
          value="${value:1:${#value}-2}"
        fi
      fi
  
      export "${key}=${value}"
    done < "${ENV_FILE}"
  fi
  
  if ! command -v docker >/dev/null 2>&1; then
    echo "ERROR: docker is required to run TEI service." >&2
    exit 1
  fi
  
  TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
  TEI_PORT="${TEI_PORT:-8080}"
  TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
  TEI_VERSION="${TEI_VERSION:-1.9}"
  TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-2048}"
  TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-8}"
  TEI_DTYPE="${TEI_DTYPE:-float16}"
  HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
  TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
  
  USE_GPU_RAW="${TEI_USE_GPU:-1}"
  USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')"
  if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then
    USE_GPU="1"
  elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then
    USE_GPU="0"
  else
    echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2
    exit 1
  fi
  
  if [[ "${USE_GPU}" == "1" ]]; then
    if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
      echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2
      exit 1
    fi
    if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then
      echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2
      echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2
      exit 1
    fi
    TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}}"
    GPU_ARGS=(--gpus all)
    TEI_MODE="gpu"
  else
    TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}"
    GPU_ARGS=()
    TEI_MODE="cpu"
  fi
  
  mkdir -p "${HF_CACHE_DIR}"
  
  existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
  if [[ -n "${existing_id}" ]]; then
    running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
    if [[ -n "${running_id}" ]]; then
      current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
      device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
      if [[ "${USE_GPU}" == "1" ]]; then
        if [[ "${current_image}" != *":cuda-"* ]] || [[ "${device_req}" == "null" ]]; then
          echo "ERROR: existing TEI container mode mismatch (need GPU): ${TEI_CONTAINER_NAME}" >&2
          echo "  image=${current_image:-unknown}" >&2
          echo "  device_requests=${device_req:-unknown}" >&2
          echo "Stop it first: ./scripts/stop_tei_service.sh" >&2
          exit 1
        fi
        echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}"
      else
        if [[ "${current_image}" == *":cuda-"* ]] || [[ "${device_req}" != "null" ]]; then
          echo "ERROR: existing TEI container mode mismatch (need CPU): ${TEI_CONTAINER_NAME}" >&2
          echo "  image=${current_image:-unknown}" >&2
          echo "  device_requests=${device_req:-unknown}" >&2
          echo "Stop it first: ./scripts/stop_tei_service.sh" >&2
          exit 1
        fi
        echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
      fi
      exit 0
    fi
    docker rm "${TEI_CONTAINER_NAME}" >/dev/null
  fi
  
  echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
  echo "Image: ${TEI_IMAGE}"
  echo "Model: ${TEI_MODEL_ID}"
  echo "Port: ${TEI_PORT}"
  echo "Mode: ${TEI_MODE}"
  
  docker run -d \
    --name "${TEI_CONTAINER_NAME}" \
    -p "${TEI_PORT}:80" \
    "${GPU_ARGS[@]}" \
    -v "${HF_CACHE_DIR}:/data" \
    -e HF_TOKEN="${HF_TOKEN:-}" \
    "${TEI_IMAGE}" \
    --model-id "${TEI_MODEL_ID}" \
    --dtype "${TEI_DTYPE}" \
    --max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
    --max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
  
  echo "Waiting for TEI health..."
  for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
    if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
      echo "TEI is ready: http://127.0.0.1:${TEI_PORT}"
      exit 0
    fi
    sleep 1
  done
  
  echo "ERROR: TEI failed to become healthy in time." >&2
  docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
  exit 1