start_gpu_manual.sh
2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
# Xinference GPU 模式启动脚本(手动设备映射)
# 不依赖 nvidia-container-toolkit
set -e
echo "========================================="
echo " Xinference GPU 模式启动(手动配置)"
echo "========================================="
echo ""
# 检查 GPU 设备
if [ ! -e /dev/nvidia0 ]; then
echo "❌ 错误: 未找到 NVIDIA 设备文件"
echo " 请确保已安装 NVIDIA 驱动"
exit 1
fi
echo "✅ 检测到 NVIDIA 设备文件:"
ls -la /dev/nvidia* | head -5
echo ""
# 停止旧容器
echo "🛑 停止旧容器..."
sudo docker stop xinference 2>/dev/null || true
sudo docker rm xinference 2>/dev/null || true
echo ""
# 创建模型目录
mkdir -p models
# 拉取镜像(如果需要)
echo "🐳 检查镜像..."
if ! sudo docker image inspect xprobe/xinference:latest &> /dev/null; then
echo "拉取 Xinference 镜像..."
sudo docker pull xprobe/xinference:latest
fi
echo ""
# 启动容器(手动映射 GPU 设备和库)
echo "🚀 启动 Xinference 容器(GPU 模式)..."
echo "映射设备: /dev/nvidia0, /dev/nvidiactl, /dev/nvidia-uvm"
echo "映射库: /usr/lib/libcuda.so*"
echo ""
sudo docker run -d \
--name xinference \
--restart unless-stopped \
-p 9997:9997 \
-p 9998:9998 \
-v "$(pwd)/models:/data" \
-v /usr/lib/libcuda.so.1:/usr/lib/x86_64-linux-gnu/libcuda.so.1 \
-v /usr/lib/libnvidia-ml.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \
-e XINFERENCE_HOME=/data \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64 \
--device /dev/nvidia0 \
--device /dev/nvidiactl \
--device /dev/nvidia-uvm \
--device /dev/nvidia-uvm-tools \
--security-opt "label=disable" \
--privileged \
xprobe/xinference:latest \
xinference-local -H 0.0.0.0
echo ""
echo "⏳ 等待服务启动..."
sleep 3
# 检查容器状态
if sudo docker ps | grep -q xinference; then
echo "✅ 容器启动成功!"
echo ""
echo "========================================="
echo " 🎉 启动成功!"
echo "========================================="
echo ""
echo "📍 服务地址:"
echo " - API: http://localhost:9997"
echo " - Dashboard: http://localhost:9998"
echo ""
echo "📝 查看日志:"
echo " sudo docker logs -f xinference"
echo ""
echo "📝 测试 GPU:"
echo " sudo docker exec xinference nvidia-smi"
echo ""
else
echo "❌ 容器启动失败,查看日志:"
sudo docker logs xinference
exit 1
fi