start_tei_service.sh
4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
#
# Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
#
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${PROJECT_ROOT}"
# Load .env if present
ENV_FILE="${PROJECT_ROOT}/.env"
if [ -f "${ENV_FILE}" ]; then
while IFS= read -r line || [ -n "${line}" ]; do
line="${line%$'\r'}"
[[ -z "${line//[[:space:]]/}" ]] && continue
[[ "${line}" =~ ^[[:space:]]*# ]] && continue
[[ "${line}" != *=* ]] && continue
key="${line%%=*}"
value="${line#*=}"
key="${key#"${key%%[![:space:]]*}"}"
key="${key%"${key##*[![:space:]]}"}"
value="${value#"${value%%[![:space:]]*}"}"
if [[ ${#value} -ge 2 ]]; then
first="${value:0:1}"
last="${value: -1}"
if [[ ("${first}" == '"' && "${last}" == '"') || ("${first}" == "'" && "${last}" == "'") ]]; then
value="${value:1:${#value}-2}"
fi
fi
export "${key}=${value}"
done < "${ENV_FILE}"
fi
if ! command -v docker >/dev/null 2>&1; then
echo "ERROR: docker is required to run TEI service." >&2
exit 1
fi
TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
TEI_PORT="${TEI_PORT:-8080}"
TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
TEI_VERSION="${TEI_VERSION:-1.9}"
TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-2048}"
TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-8}"
TEI_DTYPE="${TEI_DTYPE:-float16}"
HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
USE_GPU_RAW="${TEI_USE_GPU:-1}"
USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')"
if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then
USE_GPU="1"
elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then
USE_GPU="0"
else
echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2
exit 1
fi
if [[ "${USE_GPU}" == "1" ]]; then
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2
exit 1
fi
if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then
echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2
echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2
exit 1
fi
TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}}"
GPU_ARGS=(--gpus all)
TEI_MODE="gpu"
else
TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}"
GPU_ARGS=()
TEI_MODE="cpu"
fi
mkdir -p "${HF_CACHE_DIR}"
existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${existing_id}" ]]; then
running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${running_id}" ]]; then
current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
if [[ "${USE_GPU}" == "1" ]]; then
if [[ "${current_image}" != *":cuda-"* ]] || [[ "${device_req}" == "null" ]]; then
echo "ERROR: existing TEI container mode mismatch (need GPU): ${TEI_CONTAINER_NAME}" >&2
echo " image=${current_image:-unknown}" >&2
echo " device_requests=${device_req:-unknown}" >&2
echo "Stop it first: ./scripts/stop_tei_service.sh" >&2
exit 1
fi
echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}"
else
if [[ "${current_image}" == *":cuda-"* ]] || [[ "${device_req}" != "null" ]]; then
echo "ERROR: existing TEI container mode mismatch (need CPU): ${TEI_CONTAINER_NAME}" >&2
echo " image=${current_image:-unknown}" >&2
echo " device_requests=${device_req:-unknown}" >&2
echo "Stop it first: ./scripts/stop_tei_service.sh" >&2
exit 1
fi
echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
fi
exit 0
fi
docker rm "${TEI_CONTAINER_NAME}" >/dev/null
fi
echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
echo "Image: ${TEI_IMAGE}"
echo "Model: ${TEI_MODEL_ID}"
echo "Port: ${TEI_PORT}"
echo "Mode: ${TEI_MODE}"
docker run -d \
--name "${TEI_CONTAINER_NAME}" \
-p "${TEI_PORT}:80" \
"${GPU_ARGS[@]}" \
-v "${HF_CACHE_DIR}:/data" \
-e HF_TOKEN="${HF_TOKEN:-}" \
"${TEI_IMAGE}" \
--model-id "${TEI_MODEL_ID}" \
--dtype "${TEI_DTYPE}" \
--max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
--max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
echo "Waiting for TEI health..."
for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
echo "TEI is ready: http://127.0.0.1:${TEI_PORT}"
exit 0
fi
sleep 1
done
echo "ERROR: TEI failed to become healthy in time." >&2
docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
exit 1