From ccdde5f4424836fc8e9cc98c204510fed9612e70 Mon Sep 17 00:00:00 2001 From: hc Date: Wed, 25 Jun 2025 19:40:43 +0800 Subject: merged setup and contaienrs --- containers/rocky-ssh-gpu-deployment.yaml | 69 ++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 containers/rocky-ssh-gpu-deployment.yaml (limited to 'containers/rocky-ssh-gpu-deployment.yaml') diff --git a/containers/rocky-ssh-gpu-deployment.yaml b/containers/rocky-ssh-gpu-deployment.yaml new file mode 100644 index 0000000..062ccae --- /dev/null +++ b/containers/rocky-ssh-gpu-deployment.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: rocky-dev-gpu + labels: + app: rocky-dev-gpu +spec: + serviceName: rocky-dev-gpu-svc + replicas: 2 + selector: + matchLabels: + app: rocky-dev-gpu + template: + metadata: + labels: + app: rocky-dev-gpu + spec: + containers: + - name: rocky-dev-gpu + image: rocky_dev_gpu:latest + imagePullPolicy: IfNotPresent # Use local image + ports: + - containerPort: 22 + name: ssh + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 # Request 1 GPU per pod + requests: + nvidia.com/gpu: 1 + env: + - name: NVIDIA_VISIBLE_DEVICES + value: "all" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + volumeMounts: + - name: workspace + mountPath: /workspace + livenessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 5 + periodSeconds: 10 + volumeClaimTemplates: + - metadata: + name: workspace + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 10Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: rocky-dev-gpu-svc +spec: + clusterIP: None + selector: + app: rocky-dev-gpu + ports: + - port: 22 + targetPort: 22 \ No newline at end of file -- cgit v1.2.3-70-g09d2