#!/bin/bash # # Start Hugging Face TEI service for Qwen3-Embedding-0.6B using Docker. # set -euo pipefail PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "${PROJECT_ROOT}" # Load .env if present ENV_FILE="${PROJECT_ROOT}/.env" if [ -f "${ENV_FILE}" ]; then while IFS= read -r line || [ -n "${line}" ]; do line="${line%$'\r'}" [[ -z "${line//[[:space:]]/}" ]] && continue [[ "${line}" =~ ^[[:space:]]*# ]] && continue [[ "${line}" != *=* ]] && continue key="${line%%=*}" value="${line#*=}" key="${key#"${key%%[![:space:]]*}"}" key="${key%"${key##*[![:space:]]}"}" value="${value#"${value%%[![:space:]]*}"}" if [[ ${#value} -ge 2 ]]; then first="${value:0:1}" last="${value: -1}" if [[ ("${first}" == '"' && "${last}" == '"') || ("${first}" == "'" && "${last}" == "'") ]]; then value="${value:1:${#value}-2}" fi fi export "${key}=${value}" done < "${ENV_FILE}" fi if ! command -v docker >/dev/null 2>&1; then echo "ERROR: docker is required to run TEI service." >&2 exit 1 fi TEI_CONTAINER_NAME="${TEI_CONTAINER_NAME:-saas-search-tei}" TEI_PORT="${TEI_PORT:-8080}" TEI_MODEL_ID="${TEI_MODEL_ID:-Qwen/Qwen3-Embedding-0.6B}" TEI_VERSION="${TEI_VERSION:-1.9}" TEI_MAX_BATCH_TOKENS="${TEI_MAX_BATCH_TOKENS:-2048}" TEI_MAX_CLIENT_BATCH_SIZE="${TEI_MAX_CLIENT_BATCH_SIZE:-8}" TEI_DTYPE="${TEI_DTYPE:-float16}" HF_CACHE_DIR="${HF_CACHE_DIR:-$HOME/.cache/huggingface}" TEI_HEALTH_TIMEOUT_SEC="${TEI_HEALTH_TIMEOUT_SEC:-300}" USE_GPU_RAW="${TEI_USE_GPU:-1}" USE_GPU="$(echo "${USE_GPU_RAW}" | tr '[:upper:]' '[:lower:]')" if [[ "${USE_GPU}" == "1" || "${USE_GPU}" == "true" || "${USE_GPU}" == "yes" ]]; then USE_GPU="1" elif [[ "${USE_GPU}" == "0" || "${USE_GPU}" == "false" || "${USE_GPU}" == "no" ]]; then USE_GPU="0" else echo "ERROR: invalid TEI_USE_GPU=${USE_GPU_RAW}. Use 1/0 (or true/false)." >&2 exit 1 fi if [[ "${USE_GPU}" == "1" ]]; then if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi >/dev/null 2>&1; then echo "ERROR: TEI_USE_GPU=1 but NVIDIA GPU is not available. No CPU fallback." >&2 exit 1 fi if ! docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q 'nvidia'; then echo "ERROR: TEI_USE_GPU=1 but Docker nvidia runtime is not configured." >&2 echo "Install and configure nvidia-container-toolkit, then restart Docker." >&2 exit 1 fi TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cuda-${TEI_VERSION}}" GPU_ARGS=(--gpus all) TEI_MODE="gpu" else TEI_IMAGE="${TEI_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:${TEI_VERSION}}" GPU_ARGS=() TEI_MODE="cpu" fi mkdir -p "${HF_CACHE_DIR}" existing_id="$(docker ps -aq -f name=^/${TEI_CONTAINER_NAME}$)" if [[ -n "${existing_id}" ]]; then running_id="$(docker ps -q -f name=^/${TEI_CONTAINER_NAME}$)" if [[ -n "${running_id}" ]]; then current_image="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{.Config.Image}}' 2>/dev/null || true)" device_req="$(docker inspect "${TEI_CONTAINER_NAME}" --format '{{json .HostConfig.DeviceRequests}}' 2>/dev/null || true)" if [[ "${USE_GPU}" == "1" ]]; then if [[ "${current_image}" != *":cuda-"* ]] || [[ "${device_req}" == "null" ]]; then echo "ERROR: existing TEI container mode mismatch (need GPU): ${TEI_CONTAINER_NAME}" >&2 echo " image=${current_image:-unknown}" >&2 echo " device_requests=${device_req:-unknown}" >&2 echo "Stop it first: ./scripts/stop_tei_service.sh" >&2 exit 1 fi echo "TEI already running (GPU): ${TEI_CONTAINER_NAME}" else if [[ "${current_image}" == *":cuda-"* ]] || [[ "${device_req}" != "null" ]]; then echo "ERROR: existing TEI container mode mismatch (need CPU): ${TEI_CONTAINER_NAME}" >&2 echo " image=${current_image:-unknown}" >&2 echo " device_requests=${device_req:-unknown}" >&2 echo "Stop it first: ./scripts/stop_tei_service.sh" >&2 exit 1 fi echo "TEI already running (CPU): ${TEI_CONTAINER_NAME}" fi exit 0 fi docker rm "${TEI_CONTAINER_NAME}" >/dev/null fi echo "Starting TEI container: ${TEI_CONTAINER_NAME}" echo "Image: ${TEI_IMAGE}" echo "Model: ${TEI_MODEL_ID}" echo "Port: ${TEI_PORT}" echo "Mode: ${TEI_MODE}" docker run -d \ --name "${TEI_CONTAINER_NAME}" \ -p "${TEI_PORT}:80" \ "${GPU_ARGS[@]}" \ -v "${HF_CACHE_DIR}:/data" \ -e HF_TOKEN="${HF_TOKEN:-}" \ "${TEI_IMAGE}" \ --model-id "${TEI_MODEL_ID}" \ --dtype "${TEI_DTYPE}" \ --max-batch-tokens "${TEI_MAX_BATCH_TOKENS}" \ --max-client-batch-size "${TEI_MAX_CLIENT_BATCH_SIZE}" >/dev/null echo "Waiting for TEI health..." for i in $(seq 1 "${TEI_HEALTH_TIMEOUT_SEC}"); do if curl -sf "http://127.0.0.1:${TEI_PORT}/health" >/dev/null 2>&1; then echo "TEI is ready: http://127.0.0.1:${TEI_PORT}" exit 0 fi sleep 1 done echo "ERROR: TEI failed to become healthy in time." >&2 docker logs --tail 100 "${TEI_CONTAINER_NAME}" >&2 || true exit 1