version: '3.8'

services:
  xinference:
    image: xprobe/xinference:latest
    container_name: xinference
    ports:
      - "9997:9997"
      - "9998:9998"
    environment:
      - XINFERENCE_HOME=/data
    volumes:
      - ./models:/data
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
    command: xinference-local -H 0.0.0.0
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9997/v1/models"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s