gpustack-server


services:
  gpustack:
    image: gpustack/gpustack:v2.0.2
    container_name: gpustack
    restart: unless-stopped
    ports:
      - "4080:80"
      - "11150:10150"
    environment:
      TZ: Asia/Shanghai
      HF_ENDPOINT: https://hf-mirror.com ##HF的镜像
    volumes:
      - ./data:/var/lib/gpustack  # 数据目录挂载
      #- ./data/ui:/usr/local/lib/python3.11/dist-packages/gpustack/ui ## 前端页面

gpustack-worker

services:
  gpustack-worker:
    image: gpustack/gpustack:v2.0.2
    container_name: gpustack-worker
    restart: unless-stopped
    privileged: true
    network_mode: host
    runtime: nvidia
    ports:
      #- "4861:80"  
      - "10150:10150" ##必须使用这个端口,server会连接校验worker状态
    environment:
      TZ: Asia/Shanghai
      HF_ENDPOINT: https://hf-mirror.com ##HF的镜像
      GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME: gpustack-worker
      ## 可以让server先启动,获取到token后再启动worker
      GPUSTACK_TOKEN: gpustack_token    # 创建集群时会显示token
     ### server和worker是同一台服务器
    command: ["--worker-ip", "192.168.0.12","--server-url","http://192.168.0.12:4080"]
    volumes:
      - ./data:/var/lib/gpustack  # 数据目录挂载
      - /var/run/docker.sock:/var/run/docker.sock ##必须挂载,会下载运行时镜像