summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docker_build/Dockerfile4
-rw-r--r--docker_build/Dockerfile.gpu40
-rw-r--r--docker_build/ssh-keys/macm4-resident.pub (renamed from ssh-keys/macm4-resident.pub)0
-rw-r--r--docs69
-rwxr-xr-xpodman_launch_devenv.py6
-rw-r--r--rocky-ssh-deployment.yaml2
-rw-r--r--rocky-ssh-gpu-deployment.yaml69
-rwxr-xr-xtests/test_base_container.sh131
-rwxr-xr-xtests/test_gpu_container.sh146
9 files changed, 459 insertions, 8 deletions
diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile
index 5df57d2..16f74d6 100644
--- a/docker_build/Dockerfile
+++ b/docker_build/Dockerfile
@@ -19,8 +19,8 @@ RUN mkdir -p /var/run/sshd && \
RUN mkdir -p /root/.ssh && \
chmod 700 /root/.ssh && \
usermod -s /bin/bash root
-# Copy SSH public keys from ssh-keys directory into the image
-COPY ssh-keys/*.pub /tmp/ssh-keys/
+# Copy SSH public keys from docker_build/ssh-keys directory into the image
+COPY docker_build/ssh-keys/*.pub /tmp/ssh-keys/
RUN cat /tmp/ssh-keys/*.pub > /root/.ssh/authorized_keys && \
chmod 600 /root/.ssh/authorized_keys && \
rm -rf /tmp/ssh-keys
diff --git a/docker_build/Dockerfile.gpu b/docker_build/Dockerfile.gpu
new file mode 100644
index 0000000..7ed08a5
--- /dev/null
+++ b/docker_build/Dockerfile.gpu
@@ -0,0 +1,40 @@
+# Multi-stage build - GPU version builds on top of the base dev environment
+FROM rocky_dev:latest
+
+# Update and install GPU-specific packages
+RUN dnf update -y && \
+ dnf install -y kernel-headers kernel-devel pciutils && \
+ dnf clean all
+
+# Install NVIDIA container toolkit dependencies
+RUN dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo && \
+ dnf install -y nvidia-container-toolkit && \
+ dnf clean all
+
+# Set environment variables for NVIDIA
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Add GPU test script
+RUN echo '#!/bin/bash' > /usr/local/bin/gpu-test.sh && \
+ echo 'echo "=== System Information ==="' >> /usr/local/bin/gpu-test.sh && \
+ echo 'cat /etc/rocky-release' >> /usr/local/bin/gpu-test.sh && \
+ echo 'echo' >> /usr/local/bin/gpu-test.sh && \
+ echo 'echo "=== PCI Devices (GPUs) ==="' >> /usr/local/bin/gpu-test.sh && \
+ echo 'lspci | grep -i nvidia' >> /usr/local/bin/gpu-test.sh && \
+ echo 'echo' >> /usr/local/bin/gpu-test.sh && \
+ echo 'echo "=== NVIDIA SMI ==="' >> /usr/local/bin/gpu-test.sh && \
+ echo 'if command -v nvidia-smi &> /dev/null; then' >> /usr/local/bin/gpu-test.sh && \
+ echo ' nvidia-smi' >> /usr/local/bin/gpu-test.sh && \
+ echo 'else' >> /usr/local/bin/gpu-test.sh && \
+ echo ' echo "nvidia-smi not found. GPU might not be accessible inside container."' >> /usr/local/bin/gpu-test.sh && \
+ echo 'fi' >> /usr/local/bin/gpu-test.sh && \
+ chmod +x /usr/local/bin/gpu-test.sh
+
+# Create workspace directory for GPU workloads
+RUN mkdir -p /workspace
+
+# Keep the same working directory and CMD from base image
+WORKDIR /root
+EXPOSE 22
+CMD ["/usr/sbin/sshd", "-D", "-e"] \ No newline at end of file
diff --git a/ssh-keys/macm4-resident.pub b/docker_build/ssh-keys/macm4-resident.pub
index fbccb4f..fbccb4f 100644
--- a/ssh-keys/macm4-resident.pub
+++ b/docker_build/ssh-keys/macm4-resident.pub
diff --git a/docs b/docs
index 698feb5..3a0b3cc 100644
--- a/docs
+++ b/docs
@@ -1,4 +1,42 @@
# Rocky SSH Container
+## Setup
+### SSH Keys
+Place your SSH public keys in the `docker_build/ssh-keys/` directory:
+```bash
+cp ~/.ssh/id_ed25519.pub docker_build/ssh-keys/
+```
+The container will automatically add all `.pub` files from this directory to `/root/.ssh/authorized_keys`.
+
+## Building Containers
+### Base Development Container
+```bash
+# From the dev_env directory
+podman build -t rocky_dev:latest -f docker_build/Dockerfile .
+```
+### GPU-Enabled Container
+The GPU container builds on top of the base container using multi-stage build:
+```bash
+# First build the base container (from dev_env directory)
+podman build -t rocky_dev:latest -f docker_build/Dockerfile .
+# Then build the GPU version
+podman build -t rocky_dev_gpu:latest -f docker_build/Dockerfile.gpu .
+```
+
+## GPU Support
+The GPU-enabled container includes:
+- NVIDIA Container Toolkit for GPU access
+- GPU test script at `/usr/local/bin/gpu-test.sh`
+- Environment variables configured for NVIDIA GPU visibility
+- Workspace directory at `/workspace` for GPU workloads
+
+### Running with GPU Support
+```bash
+# Run GPU-enabled container
+podman run -it --device nvidia.com/gpu=all rocky_dev_gpu:latest
+# Test GPU inside container
+gpu-test.sh
+nvidia-smi
+```
## Podman
```bash
@@ -18,6 +56,15 @@ kubectl delete pod rocky-dev-0
kubectl scale statefulset rocky-dev --replicas=10
kubectl delete -f rocky-ssh-deployment.yaml
```
+### Kubernetes GPU Deployment
+```bash
+kubectl apply -f rocky-ssh-gpu-deployment.yaml
+kubectl get pods -l app=rocky-dev-gpu -o wide
+kubectl describe pod rocky-dev-gpu-0 | grep nvidia
+kubectl exec -it rocky-dev-gpu-0 -- nvidia-smi
+kubectl scale statefulset rocky-dev-gpu --replicas=4
+kubectl delete -f rocky-ssh-gpu-deployment.yaml
+```
## Local Registry
```bash
@@ -30,11 +77,29 @@ podman push localhost:5000/rocky_dev:latest --tls-verify=false
```bash
# Direct shell
kubectl exec -it rocky-dev-0 -- /bin/bash
-
# SSH with agent forwarding (2 terminals)
kubectl port-forward rocky-dev-0 2222:22
ssh-agent bash -c 'ssh-add ~/macm4-resident && ssh -A -p 2222 root@localhost'
-
# External
kubectl port-forward --address 0.0.0.0 rocky-dev-0 9999:22
```
+
+## Features
+### Development Tools
+- C/C++ development: gcc, gcc-c++, make, cmake
+- Python 3 with pip and development headers
+- Rust toolchain with cargo tools (cargo-edit, bacon, evcxr_jupyter)
+- Node.js v22 via nvm
+- Claude Code CLI tool
+
+### System Utilities
+- SSH server with key-based authentication
+- tmux, vim, nano editors
+- htop, bmon for system monitoring
+- git, wget, tree, bat
+- Network tools: nc, net-tools, wireguard-tools
+
+### GPU Computing (GPU version only)
+- NVIDIA GPU support via container toolkit
+- GPU test utilities
+- Dedicated /workspace directory for ML/GPU workloads
diff --git a/podman_launch_devenv.py b/podman_launch_devenv.py
index 2473404..3d0b5b0 100755
--- a/podman_launch_devenv.py
+++ b/podman_launch_devenv.py
@@ -15,9 +15,9 @@ import subprocess, argparse, os, glob
def run(cmd): return subprocess.run(cmd, shell=True, capture_output=True, text=True)
def build():
- if not glob.glob("ssh-keys/*.pub"): os.makedirs("ssh-keys", exist_ok=True); open("ssh-keys/dummy.pub", "w").write("# dummy")
+ if not glob.glob("docker_build/ssh-keys/*.pub"): os.makedirs("docker_build/ssh-keys", exist_ok=True); open("docker_build/ssh-keys/dummy.pub", "w").write("# dummy")
result = run("podman build -f docker_build/Dockerfile -t rocky_dev:latest .")
- if os.path.exists("ssh-keys/dummy.pub"): os.remove("ssh-keys/dummy.pub")
+ if os.path.exists("docker_build/ssh-keys/dummy.pub"): os.remove("docker_build/ssh-keys/dummy.pub")
return result.returncode == 0
def launch():
@@ -48,6 +48,6 @@ elif args.command == "run":
else:
print("❌ Image rocky_dev:latest not found")
else:
- print("Usage: python3 launcher.py {run|list|cleanup} [-p PORT]")
+ print("Usage: python3 podman_launch_devenv.py {run|list|cleanup} [-p PORT]")
print("🐚 Shell: podman exec -it rocky_dev-<port> /bin/bash")
print("💡 Tip: For direct shell without port forwarding, use: podman run -it rocky_dev:latest /bin/bash")
diff --git a/rocky-ssh-deployment.yaml b/rocky-ssh-deployment.yaml
index 0d30e59..bb6c37f 100644
--- a/rocky-ssh-deployment.yaml
+++ b/rocky-ssh-deployment.yaml
@@ -42,7 +42,7 @@ metadata:
spec:
clusterIP: None
selector:
- app: rocky-dev-deploy
+ app: rocky-dev
ports:
- port: 22
targetPort: 22
diff --git a/rocky-ssh-gpu-deployment.yaml b/rocky-ssh-gpu-deployment.yaml
new file mode 100644
index 0000000..062ccae
--- /dev/null
+++ b/rocky-ssh-gpu-deployment.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: rocky-dev-gpu
+ labels:
+ app: rocky-dev-gpu
+spec:
+ serviceName: rocky-dev-gpu-svc
+ replicas: 2
+ selector:
+ matchLabels:
+ app: rocky-dev-gpu
+ template:
+ metadata:
+ labels:
+ app: rocky-dev-gpu
+ spec:
+ containers:
+ - name: rocky-dev-gpu
+ image: rocky_dev_gpu:latest
+ imagePullPolicy: IfNotPresent # Use local image
+ ports:
+ - containerPort: 22
+ name: ssh
+ securityContext:
+ privileged: true
+ resources:
+ limits:
+ nvidia.com/gpu: 1 # Request 1 GPU per pod
+ requests:
+ nvidia.com/gpu: 1
+ env:
+ - name: NVIDIA_VISIBLE_DEVICES
+ value: "all"
+ - name: NVIDIA_DRIVER_CAPABILITIES
+ value: "compute,utility"
+ volumeMounts:
+ - name: workspace
+ mountPath: /workspace
+ livenessProbe:
+ tcpSocket:
+ port: 22
+ initialDelaySeconds: 30
+ periodSeconds: 30
+ readinessProbe:
+ tcpSocket:
+ port: 22
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ volumeClaimTemplates:
+ - metadata:
+ name: workspace
+ spec:
+ accessModes: [ "ReadWriteOnce" ]
+ resources:
+ requests:
+ storage: 10Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: rocky-dev-gpu-svc
+spec:
+ clusterIP: None
+ selector:
+ app: rocky-dev-gpu
+ ports:
+ - port: 22
+ targetPort: 22 \ No newline at end of file
diff --git a/tests/test_base_container.sh b/tests/test_base_container.sh
new file mode 100755
index 0000000..b5115ec
--- /dev/null
+++ b/tests/test_base_container.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# Container Test Script for rocky_dev:latest
+# This script tests all the functionality of the base container
+
+set -e
+
+CONTAINER_NAME="rocky_dev_test_$$"
+IMAGE_NAME="rocky_dev:latest"
+TEST_PORT=$(shuf -i 30000-40000 -n 1)
+
+# Cleanup function
+cleanup() {
+ echo ""
+ echo "Cleaning up..."
+ podman stop $CONTAINER_NAME >/dev/null 2>&1 || true
+ podman rm $CONTAINER_NAME >/dev/null 2>&1 || true
+ echo "Container $CONTAINER_NAME removed"
+}
+
+# Set trap to cleanup on exit
+trap cleanup EXIT
+
+echo "=== Rocky Dev Container Test Suite ==="
+echo "Container: $CONTAINER_NAME"
+echo "Port: $TEST_PORT"
+echo ""
+
+# Function to run commands in container
+run_in_container() {
+ podman exec $CONTAINER_NAME bash -c "$1"
+}
+
+# Function to check if command exists
+check_command() {
+ local cmd=$1
+ echo -n "Checking $cmd... "
+ if run_in_container "command -v $cmd" >/dev/null 2>&1; then
+ echo "✓"
+ return 0
+ else
+ echo "✗"
+ return 1
+ fi
+}
+
+# Start container
+echo "1. Starting container..."
+podman run -d -p ${TEST_PORT}:22 --name $CONTAINER_NAME $IMAGE_NAME
+sleep 5
+
+echo ""
+echo "2. Testing system packages..."
+# Test core development tools
+check_command gcc
+check_command g++
+check_command make
+check_command cmake
+check_command git
+check_command python3
+check_command pip3
+
+echo ""
+echo "3. Testing system utilities..."
+# Test system utilities
+check_command tmux
+check_command vim
+check_command nano
+check_command tree
+check_command htop
+check_command bmon
+check_command wget
+check_command nc
+check_command bat
+
+echo ""
+echo "4. Testing SSH configuration..."
+# Check SSH daemon
+run_in_container "ps aux | grep sshd | grep -v grep" && echo "✓ SSH daemon running" || echo "✗ SSH daemon not running"
+
+# Check SSH config
+run_in_container "grep -q 'PubkeyAuthentication yes' /etc/ssh/sshd_config" && echo "✓ PubkeyAuthentication enabled" || echo "✗ PubkeyAuthentication not enabled"
+run_in_container "grep -q 'PermitRootLogin yes' /etc/ssh/sshd_config" && echo "✓ PermitRootLogin enabled" || echo "✗ PermitRootLogin not enabled"
+
+# Check SSH directory
+run_in_container "test -d /root/.ssh && test -f /root/.ssh/authorized_keys" && echo "✓ SSH directory configured" || echo "✗ SSH directory not configured"
+
+echo ""
+echo "5. Testing Rust installation..."
+# Test Rust
+run_in_container "source /root/.cargo/env && cargo --version" && echo "✓ Cargo installed" || echo "✗ Cargo not installed"
+run_in_container "source /root/.cargo/env && rustc --version" && echo "✓ Rust compiler installed" || echo "✗ Rust compiler not installed"
+
+# Test Rust tools
+echo "Checking Rust tools..."
+for tool in cargo-clone cargo-add cargo-info bacon dust; do
+ run_in_container "source /root/.cargo/env && command -v $tool" >/dev/null 2>&1 && echo " ✓ $tool" || echo " ✗ $tool"
+done
+# Check evcxr_jupyter separately (it's a Jupyter kernel, not a CLI tool)
+run_in_container "source /root/.cargo/env && ls ~/.cargo/bin/evcxr_jupyter" >/dev/null 2>&1 && echo " ✓ evcxr_jupyter (Rust Jupyter kernel)" || echo " ✗ evcxr_jupyter"
+
+echo ""
+echo "6. Testing Node.js installation..."
+# Test Node.js
+run_in_container "source /root/.nvm/nvm.sh && node --version" && echo "✓ Node.js installed" || echo "✗ Node.js not installed"
+run_in_container "source /root/.nvm/nvm.sh && npm --version" && echo "✓ npm installed" || echo "✗ npm not installed"
+
+# Test claude-code
+run_in_container "source /root/.nvm/nvm.sh && claude --version" >/dev/null 2>&1 && echo "✓ claude-code installed" || echo "✗ claude-code not installed"
+
+echo ""
+echo "7. Testing environment configuration..."
+# Test bash configuration
+run_in_container "grep -q 'LS_COLORS' /etc/bashrc" && echo "✓ LS_COLORS configured" || echo "✗ LS_COLORS not configured"
+run_in_container "grep -q 'PS1=' /etc/bashrc" && echo "✓ Custom prompt configured" || echo "✗ Custom prompt not configured"
+
+echo ""
+echo "8. Testing SSH connectivity..."
+# Test SSH connection (this will fail without proper keys)
+echo -n "Testing SSH port accessibility... "
+nc -zv localhost $TEST_PORT 2>&1 | grep -q succeeded && echo "✓" || echo "✗"
+
+echo ""
+echo "9. Testing file system..."
+# Check working directory
+run_in_container "pwd" | grep -q "/root" && echo "✓ Working directory is /root" || echo "✗ Working directory incorrect"
+
+echo ""
+echo "=== Test Summary ==="
+echo "All tests completed successfully!"
+echo "Container will be automatically cleaned up." \ No newline at end of file
diff --git a/tests/test_gpu_container.sh b/tests/test_gpu_container.sh
new file mode 100755
index 0000000..593f927
--- /dev/null
+++ b/tests/test_gpu_container.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+# Container Test Script for rocky_dev_gpu:latest
+# This script tests all the functionality of the GPU-enabled container
+
+set -e
+
+CONTAINER_NAME="rocky_dev_gpu_test_$$"
+IMAGE_NAME="rocky_dev_gpu:latest"
+TEST_PORT=$(shuf -i 40000-50000 -n 1)
+
+# Cleanup function
+cleanup() {
+ echo ""
+ echo "Cleaning up..."
+ podman stop $CONTAINER_NAME >/dev/null 2>&1 || true
+ podman rm $CONTAINER_NAME >/dev/null 2>&1 || true
+ echo "Container $CONTAINER_NAME removed"
+}
+
+# Set trap to cleanup on exit
+trap cleanup EXIT
+
+echo "=== Rocky Dev GPU Container Test Suite ==="
+echo "Container: $CONTAINER_NAME"
+echo "Port: $TEST_PORT"
+echo ""
+
+# Function to run commands in container
+run_in_container() {
+ podman exec $CONTAINER_NAME bash -c "$1"
+}
+
+# Function to check if command exists
+check_command() {
+ local cmd=$1
+ echo -n "Checking $cmd... "
+ if run_in_container "command -v $cmd" >/dev/null 2>&1; then
+ echo "✓"
+ return 0
+ else
+ echo "✗"
+ return 1
+ fi
+}
+
+# Start container with GPU support
+echo "1. Starting GPU container..."
+podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME
+sleep 5
+
+echo ""
+echo "2. Testing base container functionality..."
+echo "(Inherited from rocky_dev:latest)"
+
+# Quick check of base tools
+echo -n "Development tools: "
+for cmd in gcc g++ make cmake git python3; do
+ run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; }
+done
+echo "✓"
+
+echo -n "Rust toolchain: "
+run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Node.js: "
+run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "3. Testing GPU-specific packages..."
+# Check for GPU utilities
+check_command lspci
+check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)"
+
+# Check for kernel packages
+echo -n "Checking kernel headers... "
+run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Checking kernel-devel... "
+run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Checking pciutils... "
+run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "4. Testing NVIDIA container toolkit..."
+echo -n "Checking nvidia-container-toolkit... "
+run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "5. Testing GPU environment variables..."
+# Check environment variables
+echo -n "NVIDIA_VISIBLE_DEVICES... "
+run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly"
+
+echo -n "NVIDIA_DRIVER_CAPABILITIES... "
+run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly"
+
+echo ""
+echo "6. Testing GPU test script..."
+# Check if gpu-test.sh exists and is executable
+echo -n "Checking /usr/local/bin/gpu-test.sh... "
+run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable"
+
+# Run the GPU test script
+echo ""
+echo "Running GPU test script:"
+echo "------------------------"
+run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware"
+echo "------------------------"
+
+echo ""
+echo "7. Testing workspace directory..."
+# Check workspace directory
+echo -n "Checking /workspace directory... "
+run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found"
+
+echo ""
+echo "8. Testing PCI device detection..."
+# Try to detect any NVIDIA devices
+echo "PCI devices (filtered for NVIDIA/GPU):"
+run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'"
+
+echo ""
+echo "9. Testing container GPU device access..."
+# Check if container has GPU device access
+echo -n "Checking /dev/nvidia* devices... "
+if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then
+ echo "✓ GPU devices found"
+ run_in_container "ls -la /dev/nvidia*"
+else
+ echo "✗ No GPU devices (normal without GPU hardware)"
+fi
+
+echo ""
+echo "=== Test Summary ==="
+echo "GPU Support Status:"
+if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then
+ echo " ✓ Full GPU support detected"
+else
+ echo " ⚠ GPU tools installed but no GPU hardware detected"
+ echo " This is normal when running without NVIDIA GPU"
+fi
+echo ""
+echo "All tests completed successfully!"
+echo "Container will be automatically cleaned up." \ No newline at end of file