Blame view

third-party/xinference/start_gpu_manual.sh 2.49 KB
b401ef94   tangwang   third-party/xinfe...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  #!/bin/bash
  
  # Xinference GPU 模式启动脚本(手动设备映射)
  # 不依赖 nvidia-container-toolkit
  
  set -e
  
  echo "========================================="
  echo "  Xinference GPU 模式启动(手动配置)"
  echo "========================================="
  echo ""
  
  # 检查 GPU 设备
  if [ ! -e /dev/nvidia0 ]; then
      echo "❌ 错误: 未找到 NVIDIA 设备文件"
      echo "   请确保已安装 NVIDIA 驱动"
      exit 1
  fi
  
  echo "✅ 检测到 NVIDIA 设备文件:"
  ls -la /dev/nvidia* | head -5
  echo ""
  
  # 停止旧容器
  echo "🛑 停止旧容器..."
  sudo docker stop xinference 2>/dev/null || true
  sudo docker rm xinference 2>/dev/null || true
  echo ""
  
  # 创建模型目录
  mkdir -p models
  
  # 拉取镜像(如果需要)
  echo "🐳 检查镜像..."
  if ! sudo docker image inspect xprobe/xinference:latest &> /dev/null; then
      echo "拉取 Xinference 镜像..."
      sudo docker pull xprobe/xinference:latest
  fi
  echo ""
  
  # 启动容器(手动映射 GPU 设备和库)
  echo "🚀 启动 Xinference 容器(GPU 模式)..."
  echo "映射设备: /dev/nvidia0, /dev/nvidiactl, /dev/nvidia-uvm"
  echo "映射库: /usr/lib/libcuda.so*"
  echo ""
  
  sudo docker run -d \
      --name xinference \
      --restart unless-stopped \
      -p 9997:9997 \
      -p 9998:9998 \
      -v "$(pwd)/models:/data" \
      -v /usr/lib/libcuda.so.1:/usr/lib/x86_64-linux-gnu/libcuda.so.1 \
      -v /usr/lib/libnvidia-ml.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \
      -e XINFERENCE_HOME=/data \
      -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64 \
      --device /dev/nvidia0 \
      --device /dev/nvidiactl \
      --device /dev/nvidia-uvm \
      --device /dev/nvidia-uvm-tools \
      --security-opt "label=disable" \
      --privileged \
      xprobe/xinference:latest \
      xinference-local -H 0.0.0.0
  
  echo ""
  echo "⏳ 等待服务启动..."
  sleep 3
  
  # 检查容器状态
  if sudo docker ps | grep -q xinference; then
      echo "✅ 容器启动成功!"
      echo ""
      echo "========================================="
      echo "  🎉 启动成功!"
      echo "========================================="
      echo ""
      echo "📍 服务地址:"
      echo "   - API: http://localhost:9997"
      echo "   - Dashboard: http://localhost:9998"
      echo ""
      echo "📝 查看日志:"
      echo "   sudo docker logs -f xinference"
      echo ""
      echo "📝 测试 GPU:"
      echo "   sudo docker exec xinference nvidia-smi"
      echo ""
  else
      echo "❌ 容器启动失败,查看日志:"
      sudo docker logs xinference
      exit 1
  fi