start_gpu_manual.sh 2.49 KB
#!/bin/bash

# Xinference GPU 模式启动脚本(手动设备映射)
# 不依赖 nvidia-container-toolkit

set -e

echo "========================================="
echo "  Xinference GPU 模式启动(手动配置)"
echo "========================================="
echo ""

# 检查 GPU 设备
if [ ! -e /dev/nvidia0 ]; then
    echo "❌ 错误: 未找到 NVIDIA 设备文件"
    echo "   请确保已安装 NVIDIA 驱动"
    exit 1
fi

echo "✅ 检测到 NVIDIA 设备文件:"
ls -la /dev/nvidia* | head -5
echo ""

# 停止旧容器
echo "🛑 停止旧容器..."
sudo docker stop xinference 2>/dev/null || true
sudo docker rm xinference 2>/dev/null || true
echo ""

# 创建模型目录
mkdir -p models

# 拉取镜像(如果需要)
echo "🐳 检查镜像..."
if ! sudo docker image inspect xprobe/xinference:latest &> /dev/null; then
    echo "拉取 Xinference 镜像..."
    sudo docker pull xprobe/xinference:latest
fi
echo ""

# 启动容器(手动映射 GPU 设备和库)
echo "🚀 启动 Xinference 容器(GPU 模式)..."
echo "映射设备: /dev/nvidia0, /dev/nvidiactl, /dev/nvidia-uvm"
echo "映射库: /usr/lib/libcuda.so*"
echo ""

sudo docker run -d \
    --name xinference \
    --restart unless-stopped \
    -p 9997:9997 \
    -p 9998:9998 \
    -v "$(pwd)/models:/data" \
    -v /usr/lib/libcuda.so.1:/usr/lib/x86_64-linux-gnu/libcuda.so.1 \
    -v /usr/lib/libnvidia-ml.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \
    -e XINFERENCE_HOME=/data \
    -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64 \
    --device /dev/nvidia0 \
    --device /dev/nvidiactl \
    --device /dev/nvidia-uvm \
    --device /dev/nvidia-uvm-tools \
    --security-opt "label=disable" \
    --privileged \
    xprobe/xinference:latest \
    xinference-local -H 0.0.0.0

echo ""
echo "⏳ 等待服务启动..."
sleep 3

# 检查容器状态
if sudo docker ps | grep -q xinference; then
    echo "✅ 容器启动成功!"
    echo ""
    echo "========================================="
    echo "  🎉 启动成功!"
    echo "========================================="
    echo ""
    echo "📍 服务地址:"
    echo "   - API: http://localhost:9997"
    echo "   - Dashboard: http://localhost:9998"
    echo ""
    echo "📝 查看日志:"
    echo "   sudo docker logs -f xinference"
    echo ""
    echo "📝 测试 GPU:"
    echo "   sudo docker exec xinference nvidia-smi"
    echo ""
else
    echo "❌ 容器启动失败,查看日志:"
    sudo docker logs xinference
    exit 1
fi