07cf5a93
tangwang
START_EMBEDDING=...
|
1
2
3
4
5
6
7
8
9
|
#!/bin/bash
#
# Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker.
#
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${PROJECT_ROOT}"
|
7fbca0d7
tangwang
启动脚本优化
|
10
11
12
13
|
# Load .env.
# shellcheck source=scripts/lib/load_env.sh
source "${PROJECT_ROOT}/scripts/lib/load_env.sh"
load_env_file "${PROJECT_ROOT}/.env"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
14
15
16
17
18
19
20
21
22
23
|
if ! command -v docker >/dev/null 2>&1; then
echo "ERROR: docker is required to run TEI service." >&2
exit 1
fi
TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}"
TEI_PORT="${TEI_PORT:-8080}"
TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}"
TEI_VERSION="${TEI_VERSION:-1.9}"
|
efd435cf
tangwang
tei性能调优:
|
24
25
|
TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-4096}"
TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-24}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
26
27
28
|
TEI_DTYPE="${TEI_DTYPE:-float16}"
HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}"
TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}"
|
b85ffc9a
tangwang
Add test env TEI ...
|
29
|
TEI_IMAGE_REPO="${TEI_IMAGE_REPO:-ghcr.m.daocloud.io/huggingface/text-embeddings-inference}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
30
|
|
af7ee060
tangwang
service_ctl 简化为“显...
|
31
32
33
34
|
TEI_DEVICE_RAW="${TEI_DEVICE:-cuda}"
TEI_DEVICE="$(echo "${TEI_DEVICE_RAW}" | tr '[:upper:]' '[:lower:]')"
if [[ "${TEI_DEVICE}" != "cuda" && "${TEI_DEVICE}" != "cpu" ]]; then
echo "ERROR: invalid TEI_DEVICE=${TEI_DEVICE_RAW}. Use cuda/cpu." >&2
|
07cf5a93
tangwang
START_EMBEDDING=...
|
35
36
37
|
exit 1
fi
|
efd435cf
tangwang
tei性能调优:
|
38
39
40
41
42
43
|
detect_gpu_tei_image() {
# Prefer turing image for pre-Ampere GPUs (e.g. Tesla T4, compute capability 7.5).
local compute_cap major
compute_cap="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 || true)"
major="${compute_cap%%.*}"
if [[ -n "${major}" && "${major}" -lt 8 ]]; then
|
b85ffc9a
tangwang
Add test env TEI ...
|
44
|
echo "${TEI_IMAGE_REPO}:turing-${TEI_VERSION}"
|
efd435cf
tangwang
tei性能调优:
|
45
|
else
|
b85ffc9a
tangwang
Add test env TEI ...
|
46
|
echo "${TEI_IMAGE_REPO}:cuda-${TEI_VERSION}"
|
efd435cf
tangwang
tei性能调优:
|
47
48
49
|
fi
}
|
af7ee060
tangwang
service_ctl 简化为“显...
|
50
|
if [[ "${TEI_DEVICE}" == "cuda" ]]; then
|
07cf5a93
tangwang
START_EMBEDDING=...
|
51
|
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then
|
af7ee060
tangwang
service_ctl 简化为“显...
|
52
|
echo "ERROR: TEI_DEVICE=cuda but NVIDIA GPU is not available. No CPU fallback." >&2
|
07cf5a93
tangwang
START_EMBEDDING=...
|
53
54
|
exit 1
fi
|
b85ffc9a
tangwang
Add test env TEI ...
|
55
56
57
|
# Note: modern Docker setups can support `--gpus all` without exposing an explicit "nvidia" runtime
# in `docker info`. We rely on `docker run` failure to surface misconfiguration instead of
# blocking here with a false negative.
|
efd435cf
tangwang
tei性能调优:
|
58
|
TEI_IMAGE="${TEI_IMAGE:-$(detect_gpu_tei_image)}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
59
|
GPU_ARGS=(--gpus all)
|
af7ee060
tangwang
service_ctl 简化为“显...
|
60
|
TEI_MODE="cuda"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
61
|
else
|
b85ffc9a
tangwang
Add test env TEI ...
|
62
|
TEI_IMAGE="${TEI_IMAGE:-${TEI_IMAGE_REPO}:${TEI_VERSION}}"
|
07cf5a93
tangwang
START_EMBEDDING=...
|
63
64
65
66
67
68
69
70
71
72
73
74
|
GPU_ARGS=()
TEI_MODE="cpu"
fi
mkdir -p "${HF_CACHE_DIR}"
existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${existing_id}" ]]; then
running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)"
if [[ -n "${running_id}" ]]; then
current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)"
device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)"
|
efd435cf
tangwang
tei性能调优:
|
75
76
77
78
|
current_is_gpu_image=0
if [[ "${current_image}" == *":cuda-"* || "${current_image}" == *":turing-"* ]]; then
current_is_gpu_image=1
fi
|
af7ee060
tangwang
service_ctl 简化为“显...
|
79
|
if [[ "${TEI_DEVICE}" == "cuda" ]]; then
|
efd435cf
tangwang
tei性能调优:
|
80
|
if [[ "${current_is_gpu_image}" -eq 1 ]] && [[ "${device_req}" != "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
|
af7ee060
tangwang
service_ctl 简化为“显...
|
81
|
echo "TEI already running (CUDA): ${TEI_CONTAINER_NAME}"
|
efd435cf
tangwang
tei性能调优:
|
82
|
exit 0
|
07cf5a93
tangwang
START_EMBEDDING=...
|
83
|
fi
|
efd435cf
tangwang
tei性能调优:
|
84
85
86
87
|
echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
echo " current_image=${current_image:-unknown}"
echo " target_image=${TEI_IMAGE}"
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
|
07cf5a93
tangwang
START_EMBEDDING=...
|
88
|
else
|
efd435cf
tangwang
tei性能调优:
|
89
90
91
|
if [[ "${current_is_gpu_image}" -eq 0 ]] && [[ "${device_req}" == "null" ]] && [[ "${current_image}" == "${TEI_IMAGE}" ]]; then
echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}"
exit 0
|
07cf5a93
tangwang
START_EMBEDDING=...
|
92
|
fi
|
efd435cf
tangwang
tei性能调优:
|
93
94
95
96
|
echo "TEI running with different mode/image; recreating container ${TEI_CONTAINER_NAME}"
echo " current_image=${current_image:-unknown}"
echo " target_image=${TEI_IMAGE}"
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
|
07cf5a93
tangwang
START_EMBEDDING=...
|
97
|
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
98
|
fi
|
efd435cf
tangwang
tei性能调优:
|
99
100
101
|
if docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$ | grep -q .; then
docker rm "${TEI_CONTAINER_NAME}" >/dev/null
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
fi
echo "Starting TEI container: ${TEI_CONTAINER_NAME}"
echo "Image: ${TEI_IMAGE}"
echo "Model: ${TEI_MODEL_ID}"
echo "Port: ${TEI_PORT}"
echo "Mode: ${TEI_MODE}"
docker run -d \
--name "${TEI_CONTAINER_NAME}" \
-p "${TEI_PORT}:80" \
"${GPU_ARGS[@]}" \
-v "${HF_CACHE_DIR}:/data" \
-e HF_TOKEN="${HF_TOKEN:-}" \
"${TEI_IMAGE}" \
--model-id "${TEI_MODEL_ID}" \
--dtype "${TEI_DTYPE}" \
--max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \
--max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null
echo "Waiting for TEI health..."
for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do
if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then
|
efd435cf
tangwang
tei性能调优:
|
125
126
|
echo "TEI health is ready: http://127.0.0.1:${TEI_PORT}"
break
|
07cf5a93
tangwang
START_EMBEDDING=...
|
127
128
|
fi
sleep 1
|
efd435cf
tangwang
tei性能调优:
|
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
if [[ "${i}" == "${TEI_HEALTH_TIMEOUT_SEC}" ]]; then
echo "ERROR: TEI failed to become healthy in time." >&2
docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true
exit 1
fi
done
echo "Running TEI output probe..."
for probe_idx in 1 2; do
probe_resp="$(curl -sf -X POST "http://127.0.0.1:${TEI_PORT}/embed" \
-H "Content-Type: application/json" \
-d '{"inputs":["health check","芭比娃娃 儿童玩具"]}' || true)"
if [[ -z "${probe_resp}" ]]; then
echo "ERROR: TEI probe ${probe_idx} failed: empty response" >&2
docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
exit 1
fi
# Detect non-finite-like payloads (observed as null/NaN on incompatible CUDA image + GPU).
|
b85ffc9a
tangwang
Add test env TEI ...
|
148
|
if echo "${probe_resp}" | grep -Eqi '(null|nan|inf)'; then
|
efd435cf
tangwang
tei性能调优:
|
149
150
151
152
153
154
|
echo "ERROR: TEI probe ${probe_idx} detected invalid embedding values (null/NaN/Inf)." >&2
echo "Response preview: $(echo "${probe_resp}" | head -c 220)" >&2
docker logs --tail 120 "${TEI_CONTAINER_NAME}" >&2 || true
docker rm -f "${TEI_CONTAINER_NAME}" >/dev/null 2>&1 || true
exit 1
fi
|
07cf5a93
tangwang
START_EMBEDDING=...
|
155
156
|
done
|
efd435cf
tangwang
tei性能调优:
|
157
158
|
echo "TEI is ready and output probe passed: http://127.0.0.1:${TEI_PORT}"
exit 0
|