From d6eb567da3e6d2e64ebf22adf1fc6d21c47090f8 Mon Sep 17 00:00:00 2001 From: hc Date: Sat, 31 May 2025 23:44:40 +0800 Subject: hehe --- rocky-ssh-gpu-deployment.yaml | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 rocky-ssh-gpu-deployment.yaml (limited to 'rocky-ssh-gpu-deployment.yaml') diff --git a/rocky-ssh-gpu-deployment.yaml b/rocky-ssh-gpu-deployment.yaml new file mode 100644 index 0000000..062ccae --- /dev/null +++ b/rocky-ssh-gpu-deployment.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: rocky-dev-gpu + labels: + app: rocky-dev-gpu +spec: + serviceName: rocky-dev-gpu-svc + replicas: 2 + selector: + matchLabels: + app: rocky-dev-gpu + template: + metadata: + labels: + app: rocky-dev-gpu + spec: + containers: + - name: rocky-dev-gpu + image: rocky_dev_gpu:latest + imagePullPolicy: IfNotPresent # Use local image + ports: + - containerPort: 22 + name: ssh + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 # Request 1 GPU per pod + requests: + nvidia.com/gpu: 1 + env: + - name: NVIDIA_VISIBLE_DEVICES + value: "all" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + volumeMounts: + - name: workspace + mountPath: /workspace + livenessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 5 + periodSeconds: 10 + volumeClaimTemplates: + - metadata: + name: workspace + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 10Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: rocky-dev-gpu-svc +spec: + clusterIP: None + selector: + app: rocky-dev-gpu + ports: + - port: 22 + targetPort: 22 \ No newline at end of file -- cgit v1.2.3-70-g09d2