version: '3.8' services: xinference: image: xprobe/xinference:latest container_name: xinference ports: - "9997:9997" - "9998:9998" environment: - XINFERENCE_HOME=/data volumes: - ./models:/data deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped command: xinference-local -H 0.0.0.0 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9997/v1/models"] interval: 30s timeout: 10s retries: 3 start_period: 40s