apiVersion: apps/v1 kind: StatefulSet metadata: name: rocky-dev-gpu labels: app: rocky-dev-gpu spec: serviceName: rocky-dev-gpu-svc replicas: 2 selector: matchLabels: app: rocky-dev-gpu template: metadata: labels: app: rocky-dev-gpu spec: containers: - name: rocky-dev-gpu image: rocky_dev_gpu:latest imagePullPolicy: IfNotPresent # Use local image ports: - containerPort: 22 name: ssh securityContext: privileged: true resources: limits: nvidia.com/gpu: 1 # Request 1 GPU per pod requests: nvidia.com/gpu: 1 env: - name: NVIDIA_VISIBLE_DEVICES value: "all" - name: NVIDIA_DRIVER_CAPABILITIES value: "compute,utility" volumeMounts: - name: workspace mountPath: /workspace livenessProbe: tcpSocket: port: 22 initialDelaySeconds: 30 periodSeconds: 30 readinessProbe: tcpSocket: port: 22 initialDelaySeconds: 5 periodSeconds: 10 volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 10Gi --- apiVersion: v1 kind: Service metadata: name: rocky-dev-gpu-svc spec: clusterIP: None selector: app: rocky-dev-gpu ports: - port: 22 targetPort: 22