07cf5a93
tangwang
START_EMBEDDING=...
|
1
2
3
4
5
6
7
8
9
|
#!/bin/bash
#
# Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
#
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${PROJECT_ROOT}"
|
7fbca0d7
tangwang
启动脚本优化
|
10
11
12
13
|
# Load .env.
# shellcheck source=scripts/lib/load_env.sh
source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
load_env_file "${PROJECT_ROOT}/.env"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
14
15
16
17
18
19
20
21
22
23
|
if ! command -v docker >/dev/null 2>&1; then
echo "ERROR: docker is required to run TEI service." >&2
exit 1
fi
TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
TEI_PORT="${TEI_PORT:-8080}"
TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
TEI_VERSION="${TEI_VERSION:-1.9}"
|
efd435cf
tangwang
tei性能调优:
|
24
25
|
TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-4096}"
TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-24}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
TEI_DTYPE="${TEI_DTYPE:-float16}"
HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
USE_GPU_RAW="${TEI_USE_GPU:-1}"
USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')"
if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then
USE_GPU="1"
elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then
USE_GPU="0"
else
echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2
exit 1
fi
|
efd435cf
tangwang
tei性能调优:
|
41
42
43
44
45
46
47
48
49
50
51
52
|
detect_gpu_tei_image() {
# Prefer turing image for pre-Ampere GPUs (e.g. Tesla T4, compute capability 7.5).
local compute_cap major
compute_cap="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 || true)"
major="${compute_cap%%.*}"
if [[ -n "${major}" && "${major}" -lt 8 ]]; then
echo "ghcr.io/huggingface/text-embeddings-inference:turing-${TEI_VERSION}"
else
echo "ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}"
fi
}
|
07cf5a93
tangwang
START_EMBEDDING=...
|
53
54
55
56
57
58
59
60
61
62
|
if [[ "${USE_GPU}" == "1" ]]; then
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2
exit 1
fi
if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then
echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2
echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2
exit 1
fi
|
efd435cf
tangwang
tei性能调优:
|
63
|
TEI_IMAGE="${TEI_IMAGE:-$(detect_gpu_tei_image)}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
GPU_ARGS=(--gpus all)
TEI_MODE="gpu"
else
TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}"
GPU_ARGS=()
TEI_MODE="cpu"
fi
mkdir -p "${HF_CACHE_DIR}"
existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${existing_id}" ]]; then
running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${running_id}" ]]; then
current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
|
efd435cf
tangwang
tei性能调优:
|
80
81
82
83
|
current_is_gpu_image=0
if [[ "${current_image}" == *":cuda-"* || "${current_image}" == *":turing-"* ]]; then
current_is_gpu_image=1
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
84
|
if [[ "${USE_GPU}" == "1" ]]; then
|
efd435cf
tangwang
tei性能调优:
|
85
86
87
|
if [[ "${current_is_gpu_image}" -eq 1 ]] && [[ "${device_req}" != "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}"
exit 0
|
07cf5a93
tangwang
START_EMBEDDING=...
|
88
|
fi
|
efd435cf
tangwang
tei性能调优:
|
89
90
91
92
|
echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
echo " current_image=${current_image:-unknown}"
echo " target_image=${TEI_IMAGE}"
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
|
07cf5a93
tangwang
START_EMBEDDING=...
|
93
|
else
|
efd435cf
tangwang
tei性能调优:
|
94
95
96
|
if [[ "${current_is_gpu_image}" -eq 0 ]] && [[ "${device_req}" == "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
exit 0
|
07cf5a93
tangwang
START_EMBEDDING=...
|
97
|
fi
|
efd435cf
tangwang
tei性能调优:
|
98
99
100
101
|
echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
echo " current_image=${current_image:-unknown}"
echo " target_image=${TEI_IMAGE}"
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
|
07cf5a93
tangwang
START_EMBEDDING=...
|
102
|
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
103
|
fi
|
efd435cf
tangwang
tei性能调优:
|
104
105
106
|
if docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$ | grep -q .; then
docker rm "${TEI_CONTAINER_NAME}" >/dev/null
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
fi
echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
echo "Image: ${TEI_IMAGE}"
echo "Model: ${TEI_MODEL_ID}"
echo "Port: ${TEI_PORT}"
echo "Mode: ${TEI_MODE}"
docker run -d \
--name "${TEI_CONTAINER_NAME}" \
-p "${TEI_PORT}:80" \
"${GPU_ARGS[@]}" \
-v "${HF_CACHE_DIR}:/data" \
-e HF_TOKEN="${HF_TOKEN:-}" \
"${TEI_IMAGE}" \
--model-id "${TEI_MODEL_ID}" \
--dtype "${TEI_DTYPE}" \
--max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
--max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
echo "Waiting for TEI health..."
for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
|
efd435cf
tangwang
tei性能调优:
|
130
131
|
echo "TEI health is ready: http://127.0.0.1:${TEI_PORT}"
break
|
07cf5a93
tangwang
START_EMBEDDING=...
|
132
133
|
fi
sleep 1
|
efd435cf
tangwang
tei性能调优:
|
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
if [[ "${i}" == "${TEI_HEALTH_TIMEOUT_SEC}" ]]; then
echo "ERROR: TEI failed to become healthy in time." >&2
docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
exit 1
fi
done
echo "Running TEI output probe..."
for probe_idx in 1 2; do
probe_resp="$(curl -sf -X POST "http://127.0.0.1:${TEI_PORT}/embed" \
-H "Content-Type: application/json" \
-d '{"inputs":["health check","芭比娃娃 儿童玩具"]}' || true)"
if [[ -z "${probe_resp}" ]]; then
echo "ERROR: TEI probe ${probe_idx} failed: empty response" >&2
docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
exit 1
fi
# Detect non-finite-like payloads (observed as null/NaN on incompatible CUDA image + GPU).
if echo "${probe_resp}" | rg -qi '(null|nan|inf)'; then
echo "ERROR: TEI probe ${probe_idx} detected invalid embedding values (null/NaN/Inf)." >&2
echo "Response preview: $(echo "${probe_resp}" | head -c 220)" >&2
docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
exit 1
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
160
161
|
done
|
efd435cf
tangwang
tei性能调优:
|
162
163
|
echo "TEI is ready and output probe passed: http://127.0.0.1:${TEI_PORT}"
exit 0
|