diff options
| author | hc <hc@email.ch> | 2025-05-31 23:44:40 +0800 |
|---|---|---|
| committer | hc <hc@email.ch> | 2025-05-31 23:44:40 +0800 |
| commit | d6eb567da3e6d2e64ebf22adf1fc6d21c47090f8 (patch) | |
| tree | 14c15830a8014001d5cc587b5b4d4454c880396e | |
| parent | ce511f49438761549e904d6e972b8c0635306ff9 (diff) | |
hehe
| -rw-r--r-- | docker_build/Dockerfile | 4 | ||||
| -rw-r--r-- | docker_build/Dockerfile.gpu | 40 | ||||
| -rw-r--r-- | docker_build/ssh-keys/macm4-resident.pub (renamed from ssh-keys/macm4-resident.pub) | 0 | ||||
| -rw-r--r-- | docs | 69 | ||||
| -rwxr-xr-x | podman_launch_devenv.py | 6 | ||||
| -rw-r--r-- | rocky-ssh-deployment.yaml | 2 | ||||
| -rw-r--r-- | rocky-ssh-gpu-deployment.yaml | 69 | ||||
| -rwxr-xr-x | tests/test_base_container.sh | 131 | ||||
| -rwxr-xr-x | tests/test_gpu_container.sh | 146 |
9 files changed, 459 insertions, 8 deletions
diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile index 5df57d2..16f74d6 100644 --- a/docker_build/Dockerfile +++ b/docker_build/Dockerfile | |||
| @@ -19,8 +19,8 @@ RUN mkdir -p /var/run/sshd && \ | |||
| 19 | RUN mkdir -p /root/.ssh && \ | 19 | RUN mkdir -p /root/.ssh && \ |
| 20 | chmod 700 /root/.ssh && \ | 20 | chmod 700 /root/.ssh && \ |
| 21 | usermod -s /bin/bash root | 21 | usermod -s /bin/bash root |
| 22 | # Copy SSH public keys from ssh-keys directory into the image | 22 | # Copy SSH public keys from docker_build/ssh-keys directory into the image |
| 23 | COPY ssh-keys/*.pub /tmp/ssh-keys/ | 23 | COPY docker_build/ssh-keys/*.pub /tmp/ssh-keys/ |
| 24 | RUN cat /tmp/ssh-keys/*.pub > /root/.ssh/authorized_keys && \ | 24 | RUN cat /tmp/ssh-keys/*.pub > /root/.ssh/authorized_keys && \ |
| 25 | chmod 600 /root/.ssh/authorized_keys && \ | 25 | chmod 600 /root/.ssh/authorized_keys && \ |
| 26 | rm -rf /tmp/ssh-keys | 26 | rm -rf /tmp/ssh-keys |
diff --git a/docker_build/Dockerfile.gpu b/docker_build/Dockerfile.gpu new file mode 100644 index 0000000..7ed08a5 --- /dev/null +++ b/docker_build/Dockerfile.gpu | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | # Multi-stage build - GPU version builds on top of the base dev environment | ||
| 2 | FROM rocky_dev:latest | ||
| 3 | |||
| 4 | # Update and install GPU-specific packages | ||
| 5 | RUN dnf update -y && \ | ||
| 6 | dnf install -y kernel-headers kernel-devel pciutils && \ | ||
| 7 | dnf clean all | ||
| 8 | |||
| 9 | # Install NVIDIA container toolkit dependencies | ||
| 10 | RUN dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo && \ | ||
| 11 | dnf install -y nvidia-container-toolkit && \ | ||
| 12 | dnf clean all | ||
| 13 | |||
| 14 | # Set environment variables for NVIDIA | ||
| 15 | ENV NVIDIA_VISIBLE_DEVICES=all | ||
| 16 | ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility | ||
| 17 | |||
| 18 | # Add GPU test script | ||
| 19 | RUN echo '#!/bin/bash' > /usr/local/bin/gpu-test.sh && \ | ||
| 20 | echo 'echo "=== System Information ==="' >> /usr/local/bin/gpu-test.sh && \ | ||
| 21 | echo 'cat /etc/rocky-release' >> /usr/local/bin/gpu-test.sh && \ | ||
| 22 | echo 'echo' >> /usr/local/bin/gpu-test.sh && \ | ||
| 23 | echo 'echo "=== PCI Devices (GPUs) ==="' >> /usr/local/bin/gpu-test.sh && \ | ||
| 24 | echo 'lspci | grep -i nvidia' >> /usr/local/bin/gpu-test.sh && \ | ||
| 25 | echo 'echo' >> /usr/local/bin/gpu-test.sh && \ | ||
| 26 | echo 'echo "=== NVIDIA SMI ==="' >> /usr/local/bin/gpu-test.sh && \ | ||
| 27 | echo 'if command -v nvidia-smi &> /dev/null; then' >> /usr/local/bin/gpu-test.sh && \ | ||
| 28 | echo ' nvidia-smi' >> /usr/local/bin/gpu-test.sh && \ | ||
| 29 | echo 'else' >> /usr/local/bin/gpu-test.sh && \ | ||
| 30 | echo ' echo "nvidia-smi not found. GPU might not be accessible inside container."' >> /usr/local/bin/gpu-test.sh && \ | ||
| 31 | echo 'fi' >> /usr/local/bin/gpu-test.sh && \ | ||
| 32 | chmod +x /usr/local/bin/gpu-test.sh | ||
| 33 | |||
| 34 | # Create workspace directory for GPU workloads | ||
| 35 | RUN mkdir -p /workspace | ||
| 36 | |||
| 37 | # Keep the same working directory and CMD from base image | ||
| 38 | WORKDIR /root | ||
| 39 | EXPOSE 22 | ||
| 40 | CMD ["/usr/sbin/sshd", "-D", "-e"] \ No newline at end of file | ||
diff --git a/ssh-keys/macm4-resident.pub b/docker_build/ssh-keys/macm4-resident.pub index fbccb4f..fbccb4f 100644 --- a/ssh-keys/macm4-resident.pub +++ b/docker_build/ssh-keys/macm4-resident.pub | |||
| @@ -1,4 +1,42 @@ | |||
| 1 | # Rocky SSH Container | 1 | # Rocky SSH Container |
| 2 | ## Setup | ||
| 3 | ### SSH Keys | ||
| 4 | Place your SSH public keys in the `docker_build/ssh-keys/` directory: | ||
| 5 | ```bash | ||
| 6 | cp ~/.ssh/id_ed25519.pub docker_build/ssh-keys/ | ||
| 7 | ``` | ||
| 8 | The container will automatically add all `.pub` files from this directory to `/root/.ssh/authorized_keys`. | ||
| 9 | |||
| 10 | ## Building Containers | ||
| 11 | ### Base Development Container | ||
| 12 | ```bash | ||
| 13 | # From the dev_env directory | ||
| 14 | podman build -t rocky_dev:latest -f docker_build/Dockerfile . | ||
| 15 | ``` | ||
| 16 | ### GPU-Enabled Container | ||
| 17 | The GPU container builds on top of the base container using multi-stage build: | ||
| 18 | ```bash | ||
| 19 | # First build the base container (from dev_env directory) | ||
| 20 | podman build -t rocky_dev:latest -f docker_build/Dockerfile . | ||
| 21 | # Then build the GPU version | ||
| 22 | podman build -t rocky_dev_gpu:latest -f docker_build/Dockerfile.gpu . | ||
| 23 | ``` | ||
| 24 | |||
| 25 | ## GPU Support | ||
| 26 | The GPU-enabled container includes: | ||
| 27 | - NVIDIA Container Toolkit for GPU access | ||
| 28 | - GPU test script at `/usr/local/bin/gpu-test.sh` | ||
| 29 | - Environment variables configured for NVIDIA GPU visibility | ||
| 30 | - Workspace directory at `/workspace` for GPU workloads | ||
| 31 | |||
| 32 | ### Running with GPU Support | ||
| 33 | ```bash | ||
| 34 | # Run GPU-enabled container | ||
| 35 | podman run -it --device nvidia.com/gpu=all rocky_dev_gpu:latest | ||
| 36 | # Test GPU inside container | ||
| 37 | gpu-test.sh | ||
| 38 | nvidia-smi | ||
| 39 | ``` | ||
| 2 | 40 | ||
| 3 | ## Podman | 41 | ## Podman |
| 4 | ```bash | 42 | ```bash |
| @@ -18,6 +56,15 @@ kubectl delete pod rocky-dev-0 | |||
| 18 | kubectl scale statefulset rocky-dev --replicas=10 | 56 | kubectl scale statefulset rocky-dev --replicas=10 |
| 19 | kubectl delete -f rocky-ssh-deployment.yaml | 57 | kubectl delete -f rocky-ssh-deployment.yaml |
| 20 | ``` | 58 | ``` |
| 59 | ### Kubernetes GPU Deployment | ||
| 60 | ```bash | ||
| 61 | kubectl apply -f rocky-ssh-gpu-deployment.yaml | ||
| 62 | kubectl get pods -l app=rocky-dev-gpu -o wide | ||
| 63 | kubectl describe pod rocky-dev-gpu-0 | grep nvidia | ||
| 64 | kubectl exec -it rocky-dev-gpu-0 -- nvidia-smi | ||
| 65 | kubectl scale statefulset rocky-dev-gpu --replicas=4 | ||
| 66 | kubectl delete -f rocky-ssh-gpu-deployment.yaml | ||
| 67 | ``` | ||
| 21 | 68 | ||
| 22 | ## Local Registry | 69 | ## Local Registry |
| 23 | ```bash | 70 | ```bash |
| @@ -30,11 +77,29 @@ podman push localhost:5000/rocky_dev:latest --tls-verify=false | |||
| 30 | ```bash | 77 | ```bash |
| 31 | # Direct shell | 78 | # Direct shell |
| 32 | kubectl exec -it rocky-dev-0 -- /bin/bash | 79 | kubectl exec -it rocky-dev-0 -- /bin/bash |
| 33 | |||
| 34 | # SSH with agent forwarding (2 terminals) | 80 | # SSH with agent forwarding (2 terminals) |
| 35 | kubectl port-forward rocky-dev-0 2222:22 | 81 | kubectl port-forward rocky-dev-0 2222:22 |
| 36 | ssh-agent bash -c 'ssh-add ~/macm4-resident && ssh -A -p 2222 root@localhost' | 82 | ssh-agent bash -c 'ssh-add ~/macm4-resident && ssh -A -p 2222 root@localhost' |
| 37 | |||
| 38 | # External | 83 | # External |
| 39 | kubectl port-forward --address 0.0.0.0 rocky-dev-0 9999:22 | 84 | kubectl port-forward --address 0.0.0.0 rocky-dev-0 9999:22 |
| 40 | ``` | 85 | ``` |
| 86 | |||
| 87 | ## Features | ||
| 88 | ### Development Tools | ||
| 89 | - C/C++ development: gcc, gcc-c++, make, cmake | ||
| 90 | - Python 3 with pip and development headers | ||
| 91 | - Rust toolchain with cargo tools (cargo-edit, bacon, evcxr_jupyter) | ||
| 92 | - Node.js v22 via nvm | ||
| 93 | - Claude Code CLI tool | ||
| 94 | |||
| 95 | ### System Utilities | ||
| 96 | - SSH server with key-based authentication | ||
| 97 | - tmux, vim, nano editors | ||
| 98 | - htop, bmon for system monitoring | ||
| 99 | - git, wget, tree, bat | ||
| 100 | - Network tools: nc, net-tools, wireguard-tools | ||
| 101 | |||
| 102 | ### GPU Computing (GPU version only) | ||
| 103 | - NVIDIA GPU support via container toolkit | ||
| 104 | - GPU test utilities | ||
| 105 | - Dedicated /workspace directory for ML/GPU workloads | ||
diff --git a/podman_launch_devenv.py b/podman_launch_devenv.py index 2473404..3d0b5b0 100755 --- a/podman_launch_devenv.py +++ b/podman_launch_devenv.py | |||
| @@ -15,9 +15,9 @@ import subprocess, argparse, os, glob | |||
| 15 | def run(cmd): return subprocess.run(cmd, shell=True, capture_output=True, text=True) | 15 | def run(cmd): return subprocess.run(cmd, shell=True, capture_output=True, text=True) |
| 16 | 16 | ||
| 17 | def build(): | 17 | def build(): |
| 18 | if not glob.glob("ssh-keys/*.pub"): os.makedirs("ssh-keys", exist_ok=True); open("ssh-keys/dummy.pub", "w").write("# dummy") | 18 | if not glob.glob("docker_build/ssh-keys/*.pub"): os.makedirs("docker_build/ssh-keys", exist_ok=True); open("docker_build/ssh-keys/dummy.pub", "w").write("# dummy") |
| 19 | result = run("podman build -f docker_build/Dockerfile -t rocky_dev:latest .") | 19 | result = run("podman build -f docker_build/Dockerfile -t rocky_dev:latest .") |
| 20 | if os.path.exists("ssh-keys/dummy.pub"): os.remove("ssh-keys/dummy.pub") | 20 | if os.path.exists("docker_build/ssh-keys/dummy.pub"): os.remove("docker_build/ssh-keys/dummy.pub") |
| 21 | return result.returncode == 0 | 21 | return result.returncode == 0 |
| 22 | 22 | ||
| 23 | def launch(): | 23 | def launch(): |
| @@ -48,6 +48,6 @@ elif args.command == "run": | |||
| 48 | else: | 48 | else: |
| 49 | print("❌ Image rocky_dev:latest not found") | 49 | print("❌ Image rocky_dev:latest not found") |
| 50 | else: | 50 | else: |
| 51 | print("Usage: python3 launcher.py {run|list|cleanup} [-p PORT]") | 51 | print("Usage: python3 podman_launch_devenv.py {run|list|cleanup} [-p PORT]") |
| 52 | print("🐚 Shell: podman exec -it rocky_dev-<port> /bin/bash") | 52 | print("🐚 Shell: podman exec -it rocky_dev-<port> /bin/bash") |
| 53 | print("💡 Tip: For direct shell without port forwarding, use: podman run -it rocky_dev:latest /bin/bash") | 53 | print("💡 Tip: For direct shell without port forwarding, use: podman run -it rocky_dev:latest /bin/bash") |
diff --git a/rocky-ssh-deployment.yaml b/rocky-ssh-deployment.yaml index 0d30e59..bb6c37f 100644 --- a/rocky-ssh-deployment.yaml +++ b/rocky-ssh-deployment.yaml | |||
| @@ -42,7 +42,7 @@ metadata: | |||
| 42 | spec: | 42 | spec: |
| 43 | clusterIP: None | 43 | clusterIP: None |
| 44 | selector: | 44 | selector: |
| 45 | app: rocky-dev-deploy | 45 | app: rocky-dev |
| 46 | ports: | 46 | ports: |
| 47 | - port: 22 | 47 | - port: 22 |
| 48 | targetPort: 22 | 48 | targetPort: 22 |
diff --git a/rocky-ssh-gpu-deployment.yaml b/rocky-ssh-gpu-deployment.yaml new file mode 100644 index 0000000..062ccae --- /dev/null +++ b/rocky-ssh-gpu-deployment.yaml | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | apiVersion: apps/v1 | ||
| 2 | kind: StatefulSet | ||
| 3 | metadata: | ||
| 4 | name: rocky-dev-gpu | ||
| 5 | labels: | ||
| 6 | app: rocky-dev-gpu | ||
| 7 | spec: | ||
| 8 | serviceName: rocky-dev-gpu-svc | ||
| 9 | replicas: 2 | ||
| 10 | selector: | ||
| 11 | matchLabels: | ||
| 12 | app: rocky-dev-gpu | ||
| 13 | template: | ||
| 14 | metadata: | ||
| 15 | labels: | ||
| 16 | app: rocky-dev-gpu | ||
| 17 | spec: | ||
| 18 | containers: | ||
| 19 | - name: rocky-dev-gpu | ||
| 20 | image: rocky_dev_gpu:latest | ||
| 21 | imagePullPolicy: IfNotPresent # Use local image | ||
| 22 | ports: | ||
| 23 | - containerPort: 22 | ||
| 24 | name: ssh | ||
| 25 | securityContext: | ||
| 26 | privileged: true | ||
| 27 | resources: | ||
| 28 | limits: | ||
| 29 | nvidia.com/gpu: 1 # Request 1 GPU per pod | ||
| 30 | requests: | ||
| 31 | nvidia.com/gpu: 1 | ||
| 32 | env: | ||
| 33 | - name: NVIDIA_VISIBLE_DEVICES | ||
| 34 | value: "all" | ||
| 35 | - name: NVIDIA_DRIVER_CAPABILITIES | ||
| 36 | value: "compute,utility" | ||
| 37 | volumeMounts: | ||
| 38 | - name: workspace | ||
| 39 | mountPath: /workspace | ||
| 40 | livenessProbe: | ||
| 41 | tcpSocket: | ||
| 42 | port: 22 | ||
| 43 | initialDelaySeconds: 30 | ||
| 44 | periodSeconds: 30 | ||
| 45 | readinessProbe: | ||
| 46 | tcpSocket: | ||
| 47 | port: 22 | ||
| 48 | initialDelaySeconds: 5 | ||
| 49 | periodSeconds: 10 | ||
| 50 | volumeClaimTemplates: | ||
| 51 | - metadata: | ||
| 52 | name: workspace | ||
| 53 | spec: | ||
| 54 | accessModes: [ "ReadWriteOnce" ] | ||
| 55 | resources: | ||
| 56 | requests: | ||
| 57 | storage: 10Gi | ||
| 58 | --- | ||
| 59 | apiVersion: v1 | ||
| 60 | kind: Service | ||
| 61 | metadata: | ||
| 62 | name: rocky-dev-gpu-svc | ||
| 63 | spec: | ||
| 64 | clusterIP: None | ||
| 65 | selector: | ||
| 66 | app: rocky-dev-gpu | ||
| 67 | ports: | ||
| 68 | - port: 22 | ||
| 69 | targetPort: 22 \ No newline at end of file | ||
diff --git a/tests/test_base_container.sh b/tests/test_base_container.sh new file mode 100755 index 0000000..b5115ec --- /dev/null +++ b/tests/test_base_container.sh | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | # Container Test Script for rocky_dev:latest | ||
| 4 | # This script tests all the functionality of the base container | ||
| 5 | |||
| 6 | set -e | ||
| 7 | |||
| 8 | CONTAINER_NAME="rocky_dev_test_$$" | ||
| 9 | IMAGE_NAME="rocky_dev:latest" | ||
| 10 | TEST_PORT=$(shuf -i 30000-40000 -n 1) | ||
| 11 | |||
| 12 | # Cleanup function | ||
| 13 | cleanup() { | ||
| 14 | echo "" | ||
| 15 | echo "Cleaning up..." | ||
| 16 | podman stop $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 17 | podman rm $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 18 | echo "Container $CONTAINER_NAME removed" | ||
| 19 | } | ||
| 20 | |||
| 21 | # Set trap to cleanup on exit | ||
| 22 | trap cleanup EXIT | ||
| 23 | |||
| 24 | echo "=== Rocky Dev Container Test Suite ===" | ||
| 25 | echo "Container: $CONTAINER_NAME" | ||
| 26 | echo "Port: $TEST_PORT" | ||
| 27 | echo "" | ||
| 28 | |||
| 29 | # Function to run commands in container | ||
| 30 | run_in_container() { | ||
| 31 | podman exec $CONTAINER_NAME bash -c "$1" | ||
| 32 | } | ||
| 33 | |||
| 34 | # Function to check if command exists | ||
| 35 | check_command() { | ||
| 36 | local cmd=$1 | ||
| 37 | echo -n "Checking $cmd... " | ||
| 38 | if run_in_container "command -v $cmd" >/dev/null 2>&1; then | ||
| 39 | echo "✓" | ||
| 40 | return 0 | ||
| 41 | else | ||
| 42 | echo "✗" | ||
| 43 | return 1 | ||
| 44 | fi | ||
| 45 | } | ||
| 46 | |||
| 47 | # Start container | ||
| 48 | echo "1. Starting container..." | ||
| 49 | podman run -d -p ${TEST_PORT}:22 --name $CONTAINER_NAME $IMAGE_NAME | ||
| 50 | sleep 5 | ||
| 51 | |||
| 52 | echo "" | ||
| 53 | echo "2. Testing system packages..." | ||
| 54 | # Test core development tools | ||
| 55 | check_command gcc | ||
| 56 | check_command g++ | ||
| 57 | check_command make | ||
| 58 | check_command cmake | ||
| 59 | check_command git | ||
| 60 | check_command python3 | ||
| 61 | check_command pip3 | ||
| 62 | |||
| 63 | echo "" | ||
| 64 | echo "3. Testing system utilities..." | ||
| 65 | # Test system utilities | ||
| 66 | check_command tmux | ||
| 67 | check_command vim | ||
| 68 | check_command nano | ||
| 69 | check_command tree | ||
| 70 | check_command htop | ||
| 71 | check_command bmon | ||
| 72 | check_command wget | ||
| 73 | check_command nc | ||
| 74 | check_command bat | ||
| 75 | |||
| 76 | echo "" | ||
| 77 | echo "4. Testing SSH configuration..." | ||
| 78 | # Check SSH daemon | ||
| 79 | run_in_container "ps aux | grep sshd | grep -v grep" && echo "✓ SSH daemon running" || echo "✗ SSH daemon not running" | ||
| 80 | |||
| 81 | # Check SSH config | ||
| 82 | run_in_container "grep -q 'PubkeyAuthentication yes' /etc/ssh/sshd_config" && echo "✓ PubkeyAuthentication enabled" || echo "✗ PubkeyAuthentication not enabled" | ||
| 83 | run_in_container "grep -q 'PermitRootLogin yes' /etc/ssh/sshd_config" && echo "✓ PermitRootLogin enabled" || echo "✗ PermitRootLogin not enabled" | ||
| 84 | |||
| 85 | # Check SSH directory | ||
| 86 | run_in_container "test -d /root/.ssh && test -f /root/.ssh/authorized_keys" && echo "✓ SSH directory configured" || echo "✗ SSH directory not configured" | ||
| 87 | |||
| 88 | echo "" | ||
| 89 | echo "5. Testing Rust installation..." | ||
| 90 | # Test Rust | ||
| 91 | run_in_container "source /root/.cargo/env && cargo --version" && echo "✓ Cargo installed" || echo "✗ Cargo not installed" | ||
| 92 | run_in_container "source /root/.cargo/env && rustc --version" && echo "✓ Rust compiler installed" || echo "✗ Rust compiler not installed" | ||
| 93 | |||
| 94 | # Test Rust tools | ||
| 95 | echo "Checking Rust tools..." | ||
| 96 | for tool in cargo-clone cargo-add cargo-info bacon dust; do | ||
| 97 | run_in_container "source /root/.cargo/env && command -v $tool" >/dev/null 2>&1 && echo " ✓ $tool" || echo " ✗ $tool" | ||
| 98 | done | ||
| 99 | # Check evcxr_jupyter separately (it's a Jupyter kernel, not a CLI tool) | ||
| 100 | run_in_container "source /root/.cargo/env && ls ~/.cargo/bin/evcxr_jupyter" >/dev/null 2>&1 && echo " ✓ evcxr_jupyter (Rust Jupyter kernel)" || echo " ✗ evcxr_jupyter" | ||
| 101 | |||
| 102 | echo "" | ||
| 103 | echo "6. Testing Node.js installation..." | ||
| 104 | # Test Node.js | ||
| 105 | run_in_container "source /root/.nvm/nvm.sh && node --version" && echo "✓ Node.js installed" || echo "✗ Node.js not installed" | ||
| 106 | run_in_container "source /root/.nvm/nvm.sh && npm --version" && echo "✓ npm installed" || echo "✗ npm not installed" | ||
| 107 | |||
| 108 | # Test claude-code | ||
| 109 | run_in_container "source /root/.nvm/nvm.sh && claude --version" >/dev/null 2>&1 && echo "✓ claude-code installed" || echo "✗ claude-code not installed" | ||
| 110 | |||
| 111 | echo "" | ||
| 112 | echo "7. Testing environment configuration..." | ||
| 113 | # Test bash configuration | ||
| 114 | run_in_container "grep -q 'LS_COLORS' /etc/bashrc" && echo "✓ LS_COLORS configured" || echo "✗ LS_COLORS not configured" | ||
| 115 | run_in_container "grep -q 'PS1=' /etc/bashrc" && echo "✓ Custom prompt configured" || echo "✗ Custom prompt not configured" | ||
| 116 | |||
| 117 | echo "" | ||
| 118 | echo "8. Testing SSH connectivity..." | ||
| 119 | # Test SSH connection (this will fail without proper keys) | ||
| 120 | echo -n "Testing SSH port accessibility... " | ||
| 121 | nc -zv localhost $TEST_PORT 2>&1 | grep -q succeeded && echo "✓" || echo "✗" | ||
| 122 | |||
| 123 | echo "" | ||
| 124 | echo "9. Testing file system..." | ||
| 125 | # Check working directory | ||
| 126 | run_in_container "pwd" | grep -q "/root" && echo "✓ Working directory is /root" || echo "✗ Working directory incorrect" | ||
| 127 | |||
| 128 | echo "" | ||
| 129 | echo "=== Test Summary ===" | ||
| 130 | echo "All tests completed successfully!" | ||
| 131 | echo "Container will be automatically cleaned up." \ No newline at end of file | ||
diff --git a/tests/test_gpu_container.sh b/tests/test_gpu_container.sh new file mode 100755 index 0000000..593f927 --- /dev/null +++ b/tests/test_gpu_container.sh | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | # Container Test Script for rocky_dev_gpu:latest | ||
| 4 | # This script tests all the functionality of the GPU-enabled container | ||
| 5 | |||
| 6 | set -e | ||
| 7 | |||
| 8 | CONTAINER_NAME="rocky_dev_gpu_test_$$" | ||
| 9 | IMAGE_NAME="rocky_dev_gpu:latest" | ||
| 10 | TEST_PORT=$(shuf -i 40000-50000 -n 1) | ||
| 11 | |||
| 12 | # Cleanup function | ||
| 13 | cleanup() { | ||
| 14 | echo "" | ||
| 15 | echo "Cleaning up..." | ||
| 16 | podman stop $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 17 | podman rm $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 18 | echo "Container $CONTAINER_NAME removed" | ||
| 19 | } | ||
| 20 | |||
| 21 | # Set trap to cleanup on exit | ||
| 22 | trap cleanup EXIT | ||
| 23 | |||
| 24 | echo "=== Rocky Dev GPU Container Test Suite ===" | ||
| 25 | echo "Container: $CONTAINER_NAME" | ||
| 26 | echo "Port: $TEST_PORT" | ||
| 27 | echo "" | ||
| 28 | |||
| 29 | # Function to run commands in container | ||
| 30 | run_in_container() { | ||
| 31 | podman exec $CONTAINER_NAME bash -c "$1" | ||
| 32 | } | ||
| 33 | |||
| 34 | # Function to check if command exists | ||
| 35 | check_command() { | ||
| 36 | local cmd=$1 | ||
| 37 | echo -n "Checking $cmd... " | ||
| 38 | if run_in_container "command -v $cmd" >/dev/null 2>&1; then | ||
| 39 | echo "✓" | ||
| 40 | return 0 | ||
| 41 | else | ||
| 42 | echo "✗" | ||
| 43 | return 1 | ||
| 44 | fi | ||
| 45 | } | ||
| 46 | |||
| 47 | # Start container with GPU support | ||
| 48 | echo "1. Starting GPU container..." | ||
| 49 | podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME | ||
| 50 | sleep 5 | ||
| 51 | |||
| 52 | echo "" | ||
| 53 | echo "2. Testing base container functionality..." | ||
| 54 | echo "(Inherited from rocky_dev:latest)" | ||
| 55 | |||
| 56 | # Quick check of base tools | ||
| 57 | echo -n "Development tools: " | ||
| 58 | for cmd in gcc g++ make cmake git python3; do | ||
| 59 | run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; } | ||
| 60 | done | ||
| 61 | echo "✓" | ||
| 62 | |||
| 63 | echo -n "Rust toolchain: " | ||
| 64 | run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 65 | |||
| 66 | echo -n "Node.js: " | ||
| 67 | run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 68 | |||
| 69 | echo "" | ||
| 70 | echo "3. Testing GPU-specific packages..." | ||
| 71 | # Check for GPU utilities | ||
| 72 | check_command lspci | ||
| 73 | check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)" | ||
| 74 | |||
| 75 | # Check for kernel packages | ||
| 76 | echo -n "Checking kernel headers... " | ||
| 77 | run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 78 | |||
| 79 | echo -n "Checking kernel-devel... " | ||
| 80 | run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 81 | |||
| 82 | echo -n "Checking pciutils... " | ||
| 83 | run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 84 | |||
| 85 | echo "" | ||
| 86 | echo "4. Testing NVIDIA container toolkit..." | ||
| 87 | echo -n "Checking nvidia-container-toolkit... " | ||
| 88 | run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 89 | |||
| 90 | echo "" | ||
| 91 | echo "5. Testing GPU environment variables..." | ||
| 92 | # Check environment variables | ||
| 93 | echo -n "NVIDIA_VISIBLE_DEVICES... " | ||
| 94 | run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly" | ||
| 95 | |||
| 96 | echo -n "NVIDIA_DRIVER_CAPABILITIES... " | ||
| 97 | run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly" | ||
| 98 | |||
| 99 | echo "" | ||
| 100 | echo "6. Testing GPU test script..." | ||
| 101 | # Check if gpu-test.sh exists and is executable | ||
| 102 | echo -n "Checking /usr/local/bin/gpu-test.sh... " | ||
| 103 | run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable" | ||
| 104 | |||
| 105 | # Run the GPU test script | ||
| 106 | echo "" | ||
| 107 | echo "Running GPU test script:" | ||
| 108 | echo "------------------------" | ||
| 109 | run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware" | ||
| 110 | echo "------------------------" | ||
| 111 | |||
| 112 | echo "" | ||
| 113 | echo "7. Testing workspace directory..." | ||
| 114 | # Check workspace directory | ||
| 115 | echo -n "Checking /workspace directory... " | ||
| 116 | run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found" | ||
| 117 | |||
| 118 | echo "" | ||
| 119 | echo "8. Testing PCI device detection..." | ||
| 120 | # Try to detect any NVIDIA devices | ||
| 121 | echo "PCI devices (filtered for NVIDIA/GPU):" | ||
| 122 | run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'" | ||
| 123 | |||
| 124 | echo "" | ||
| 125 | echo "9. Testing container GPU device access..." | ||
| 126 | # Check if container has GPU device access | ||
| 127 | echo -n "Checking /dev/nvidia* devices... " | ||
| 128 | if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then | ||
| 129 | echo "✓ GPU devices found" | ||
| 130 | run_in_container "ls -la /dev/nvidia*" | ||
| 131 | else | ||
| 132 | echo "✗ No GPU devices (normal without GPU hardware)" | ||
| 133 | fi | ||
| 134 | |||
| 135 | echo "" | ||
| 136 | echo "=== Test Summary ===" | ||
| 137 | echo "GPU Support Status:" | ||
| 138 | if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then | ||
| 139 | echo " ✓ Full GPU support detected" | ||
| 140 | else | ||
| 141 | echo " ⚠ GPU tools installed but no GPU hardware detected" | ||
| 142 | echo " This is normal when running without NVIDIA GPU" | ||
| 143 | fi | ||
| 144 | echo "" | ||
| 145 | echo "All tests completed successfully!" | ||
| 146 | echo "Container will be automatically cleaned up." \ No newline at end of file | ||
