summaryrefslogtreecommitdiff
path: root/containers/rocky-ssh-gpu-deployment.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'containers/rocky-ssh-gpu-deployment.yaml')
-rw-r--r--containers/rocky-ssh-gpu-deployment.yaml69
1 files changed, 69 insertions, 0 deletions
diff --git a/containers/rocky-ssh-gpu-deployment.yaml b/containers/rocky-ssh-gpu-deployment.yaml
new file mode 100644
index 0000000..062ccae
--- /dev/null
+++ b/containers/rocky-ssh-gpu-deployment.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: rocky-dev-gpu
+ labels:
+ app: rocky-dev-gpu
+spec:
+ serviceName: rocky-dev-gpu-svc
+ replicas: 2
+ selector:
+ matchLabels:
+ app: rocky-dev-gpu
+ template:
+ metadata:
+ labels:
+ app: rocky-dev-gpu
+ spec:
+ containers:
+ - name: rocky-dev-gpu
+ image: rocky_dev_gpu:latest
+ imagePullPolicy: IfNotPresent # Use local image
+ ports:
+ - containerPort: 22
+ name: ssh
+ securityContext:
+ privileged: true
+ resources:
+ limits:
+ nvidia.com/gpu: 1 # Request 1 GPU per pod
+ requests:
+ nvidia.com/gpu: 1
+ env:
+ - name: NVIDIA_VISIBLE_DEVICES
+ value: "all"
+ - name: NVIDIA_DRIVER_CAPABILITIES
+ value: "compute,utility"
+ volumeMounts:
+ - name: workspace
+ mountPath: /workspace
+ livenessProbe:
+ tcpSocket:
+ port: 22
+ initialDelaySeconds: 30
+ periodSeconds: 30
+ readinessProbe:
+ tcpSocket:
+ port: 22
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ volumeClaimTemplates:
+ - metadata:
+ name: workspace
+ spec:
+ accessModes: [ "ReadWriteOnce" ]
+ resources:
+ requests:
+ storage: 10Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: rocky-dev-gpu-svc
+spec:
+ clusterIP: None
+ selector:
+ app: rocky-dev-gpu
+ ports:
+ - port: 22
+ targetPort: 22 \ No newline at end of file