From ccdde5f4424836fc8e9cc98c204510fed9612e70 Mon Sep 17 00:00:00 2001 From: hc Date: Wed, 25 Jun 2025 19:40:43 +0800 Subject: merged setup and contaienrs --- containers/docker_build/Dockerfile | 61 +++++++++ containers/docker_build/Dockerfile.gpu | 40 ++++++ .../docker_build/ssh-keys/macm4-resident.pub | 1 + containers/docker_build/vimrc | 77 +++++++++++ containers/docs | 105 +++++++++++++++ containers/podman_launch_devenv.py | 53 ++++++++ containers/rocky-ssh-deployment.yaml | 48 +++++++ containers/rocky-ssh-gpu-deployment.yaml | 69 ++++++++++ containers/tests/test_base_container.sh | 131 ++++++++++++++++++ containers/tests/test_gpu_container.sh | 146 +++++++++++++++++++++ 10 files changed, 731 insertions(+) create mode 100644 containers/docker_build/Dockerfile create mode 100644 containers/docker_build/Dockerfile.gpu create mode 100644 containers/docker_build/ssh-keys/macm4-resident.pub create mode 100644 containers/docker_build/vimrc create mode 100644 containers/docs create mode 100755 containers/podman_launch_devenv.py create mode 100644 containers/rocky-ssh-deployment.yaml create mode 100644 containers/rocky-ssh-gpu-deployment.yaml create mode 100755 containers/tests/test_base_container.sh create mode 100755 containers/tests/test_gpu_container.sh (limited to 'containers') diff --git a/containers/docker_build/Dockerfile b/containers/docker_build/Dockerfile new file mode 100644 index 0000000..16f74d6 --- /dev/null +++ b/containers/docker_build/Dockerfile @@ -0,0 +1,61 @@ +FROM rockylinux:9 + +# Install required packages, resolving curl conflict +RUN dnf install -y epel-release +RUN dnf install -y --allowerasing openssh-server sudo procps-ng \ + gcc gcc-c++ make cmake pkg-config openssl-devel libicu-devel perl python3-devel \ + nc openssl bat autossh tmux htop tar bmon gzip tree wget \ + nano vim unzip net-tools git python3 python3-pip make wireguard-tools usbutils yum xclip \ + && dnf clean all + +# Configure SSH +RUN mkdir -p /var/run/sshd && \ + ssh-keygen -A && \ + sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config && \ + sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config && \ + echo "AllowAgentForwarding yes" >> /etc/ssh/sshd_config +# Setup SSH directory for root and ensure root has valid shell +RUN mkdir -p /root/.ssh && \ + chmod 700 /root/.ssh && \ + usermod -s /bin/bash root +# Copy SSH public keys from docker_build/ssh-keys directory into the image +COPY docker_build/ssh-keys/*.pub /tmp/ssh-keys/ +RUN cat /tmp/ssh-keys/*.pub > /root/.ssh/authorized_keys && \ + chmod 600 /root/.ssh/authorized_keys && \ + rm -rf /tmp/ssh-keys + +# Configure vim +COPY docker_build/vimrc /etc/vimrc + +# Configure bash prompt and colors +RUN echo 'LS_COLORS=$LS_COLORS:"di=38;5;135:ex=00;32:" ; export LS_COLORS' >> /etc/bashrc && \ + echo 'PS1="[\[\033[01;32m\]\u\[\033[00m\]@\h \[\033[38;5;135m\]\W\[\033[00m\]]\$ "' >> /etc/bashrc && \ + echo 'export PATH=$PATH:/root/.cargo/bin' >> /root/.bashrc + +# Install Rust and tools for root +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \ + echo '[ -f "$HOME/.cargo/env" ] && source "$HOME/.cargo/env"' >> ~/.bashrc && \ + source "$HOME/.cargo/env" && \ + cargo install cargo-clone-crate cargo-edit cargo-info evcxr_jupyter bacon du-dust + +# Install Node.js via nvm and claude-code +RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash && \ + export NVM_DIR="$HOME/.nvm" && \ + [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" && \ + nvm install 22 && \ + npm install -g @anthropic-ai/claude-code + +# Add nvm to bashrc for future sessions +RUN echo 'export NVM_DIR="$HOME/.nvm"' >> ~/.bashrc && \ + echo '[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"' >> ~/.bashrc && \ + echo '[ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion"' >> ~/.bashrc + +# Set working directory +WORKDIR /root + +# Expose SSH port +EXPOSE 22 + +# Start SSH daemon +CMD ["/usr/sbin/sshd", "-D", "-e"] diff --git a/containers/docker_build/Dockerfile.gpu b/containers/docker_build/Dockerfile.gpu new file mode 100644 index 0000000..7ed08a5 --- /dev/null +++ b/containers/docker_build/Dockerfile.gpu @@ -0,0 +1,40 @@ +# Multi-stage build - GPU version builds on top of the base dev environment +FROM rocky_dev:latest + +# Update and install GPU-specific packages +RUN dnf update -y && \ + dnf install -y kernel-headers kernel-devel pciutils && \ + dnf clean all + +# Install NVIDIA container toolkit dependencies +RUN dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo && \ + dnf install -y nvidia-container-toolkit && \ + dnf clean all + +# Set environment variables for NVIDIA +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility + +# Add GPU test script +RUN echo '#!/bin/bash' > /usr/local/bin/gpu-test.sh && \ + echo 'echo "=== System Information ==="' >> /usr/local/bin/gpu-test.sh && \ + echo 'cat /etc/rocky-release' >> /usr/local/bin/gpu-test.sh && \ + echo 'echo' >> /usr/local/bin/gpu-test.sh && \ + echo 'echo "=== PCI Devices (GPUs) ==="' >> /usr/local/bin/gpu-test.sh && \ + echo 'lspci | grep -i nvidia' >> /usr/local/bin/gpu-test.sh && \ + echo 'echo' >> /usr/local/bin/gpu-test.sh && \ + echo 'echo "=== NVIDIA SMI ==="' >> /usr/local/bin/gpu-test.sh && \ + echo 'if command -v nvidia-smi &> /dev/null; then' >> /usr/local/bin/gpu-test.sh && \ + echo ' nvidia-smi' >> /usr/local/bin/gpu-test.sh && \ + echo 'else' >> /usr/local/bin/gpu-test.sh && \ + echo ' echo "nvidia-smi not found. GPU might not be accessible inside container."' >> /usr/local/bin/gpu-test.sh && \ + echo 'fi' >> /usr/local/bin/gpu-test.sh && \ + chmod +x /usr/local/bin/gpu-test.sh + +# Create workspace directory for GPU workloads +RUN mkdir -p /workspace + +# Keep the same working directory and CMD from base image +WORKDIR /root +EXPOSE 22 +CMD ["/usr/sbin/sshd", "-D", "-e"] \ No newline at end of file diff --git a/containers/docker_build/ssh-keys/macm4-resident.pub b/containers/docker_build/ssh-keys/macm4-resident.pub new file mode 100644 index 0000000..fbccb4f --- /dev/null +++ b/containers/docker_build/ssh-keys/macm4-resident.pub @@ -0,0 +1 @@ +sk-ssh-ed25519@openssh.com AAAAGnNrLXNzaC1lZDI1NTE5QG9wZW5zc2guY29tAAAAIFdHP8n64jOV6Ok7U9TDnGW+LUkXP6V7cvXH6xqN0zcNAAAAEnNzaDptYWNtNC1yZXNpZGVudA== ssh:macm4-resident diff --git a/containers/docker_build/vimrc b/containers/docker_build/vimrc new file mode 100644 index 0000000..36583bc --- /dev/null +++ b/containers/docker_build/vimrc @@ -0,0 +1,77 @@ +" Basic vim configuration for development environment + +" Enable syntax highlighting +syntax on + +" Enable line numbers +set number + +" Enable relative line numbers for easier navigation +set relativenumber + +" Set tab width to 4 spaces +set tabstop=4 +set shiftwidth=4 +set expandtab + +" Enable auto-indentation +set autoindent +set smartindent + +" Enable incremental search +set incsearch + +" Highlight search results +set hlsearch + +" Case-insensitive search unless uppercase is used +set ignorecase +set smartcase + +" Show matching brackets +set showmatch + +" Enable mouse support +set mouse=a + +" Set backspace behavior +set backspace=indent,eol,start + +" Show current line and column +set ruler + +" Enable file type detection +filetype on +filetype plugin on +filetype indent on + +" Set color scheme (if available) +colorscheme default + +" Enable visual bell instead of beep +set visualbell + +" Set encoding +set encoding=utf-8 + +" Show command in status line +set showcmd + +" Enable wildmenu for command completion +set wildmenu + +" Set status line +set laststatus=2 +set statusline=%F%m%r%h%w\ [%l,%c]\ [%L\ lines] + +" Rust specific settings +autocmd FileType rust setlocal tabstop=4 shiftwidth=4 expandtab + +" Python specific settings +autocmd FileType python setlocal tabstop=4 shiftwidth=4 expandtab + +" JavaScript/TypeScript settings +autocmd FileType javascript,typescript setlocal tabstop=2 shiftwidth=2 expandtab + +" YAML settings +autocmd FileType yaml setlocal tabstop=2 shiftwidth=2 expandtab \ No newline at end of file diff --git a/containers/docs b/containers/docs new file mode 100644 index 0000000..3a0b3cc --- /dev/null +++ b/containers/docs @@ -0,0 +1,105 @@ +# Rocky SSH Container +## Setup +### SSH Keys +Place your SSH public keys in the `docker_build/ssh-keys/` directory: +```bash +cp ~/.ssh/id_ed25519.pub docker_build/ssh-keys/ +``` +The container will automatically add all `.pub` files from this directory to `/root/.ssh/authorized_keys`. + +## Building Containers +### Base Development Container +```bash +# From the dev_env directory +podman build -t rocky_dev:latest -f docker_build/Dockerfile . +``` +### GPU-Enabled Container +The GPU container builds on top of the base container using multi-stage build: +```bash +# First build the base container (from dev_env directory) +podman build -t rocky_dev:latest -f docker_build/Dockerfile . +# Then build the GPU version +podman build -t rocky_dev_gpu:latest -f docker_build/Dockerfile.gpu . +``` + +## GPU Support +The GPU-enabled container includes: +- NVIDIA Container Toolkit for GPU access +- GPU test script at `/usr/local/bin/gpu-test.sh` +- Environment variables configured for NVIDIA GPU visibility +- Workspace directory at `/workspace` for GPU workloads + +### Running with GPU Support +```bash +# Run GPU-enabled container +podman run -it --device nvidia.com/gpu=all rocky_dev_gpu:latest +# Test GPU inside container +gpu-test.sh +nvidia-smi +``` + +## Podman +```bash +python3 podman_launch_devenv.py +python3 podman_launch_devenv.py run +python3 podman_launch_devenv.py run -p 2222 +python3 podman_launch_devenv.py list +python3 podman_launch_devenv.py cleanup +``` + +## Kubernetes +```bash +kubectl apply -f rocky-ssh-deployment.yaml +kubectl get pods -l app=rocky-dev -o wide +kubectl get svc rocky-dev-svc +kubectl delete pod rocky-dev-0 +kubectl scale statefulset rocky-dev --replicas=10 +kubectl delete -f rocky-ssh-deployment.yaml +``` +### Kubernetes GPU Deployment +```bash +kubectl apply -f rocky-ssh-gpu-deployment.yaml +kubectl get pods -l app=rocky-dev-gpu -o wide +kubectl describe pod rocky-dev-gpu-0 | grep nvidia +kubectl exec -it rocky-dev-gpu-0 -- nvidia-smi +kubectl scale statefulset rocky-dev-gpu --replicas=4 +kubectl delete -f rocky-ssh-gpu-deployment.yaml +``` + +## Local Registry +```bash +podman run -d -p 5000:5000 --name registry registry:2 +podman tag localhost/rocky_dev:latest localhost:5000/rocky_dev:latest +podman push localhost:5000/rocky_dev:latest --tls-verify=false +``` + +## Access +```bash +# Direct shell +kubectl exec -it rocky-dev-0 -- /bin/bash +# SSH with agent forwarding (2 terminals) +kubectl port-forward rocky-dev-0 2222:22 +ssh-agent bash -c 'ssh-add ~/macm4-resident && ssh -A -p 2222 root@localhost' +# External +kubectl port-forward --address 0.0.0.0 rocky-dev-0 9999:22 +``` + +## Features +### Development Tools +- C/C++ development: gcc, gcc-c++, make, cmake +- Python 3 with pip and development headers +- Rust toolchain with cargo tools (cargo-edit, bacon, evcxr_jupyter) +- Node.js v22 via nvm +- Claude Code CLI tool + +### System Utilities +- SSH server with key-based authentication +- tmux, vim, nano editors +- htop, bmon for system monitoring +- git, wget, tree, bat +- Network tools: nc, net-tools, wireguard-tools + +### GPU Computing (GPU version only) +- NVIDIA GPU support via container toolkit +- GPU test utilities +- Dedicated /workspace directory for ML/GPU workloads diff --git a/containers/podman_launch_devenv.py b/containers/podman_launch_devenv.py new file mode 100755 index 0000000..3d0b5b0 --- /dev/null +++ b/containers/podman_launch_devenv.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Rocky SSH Container Launcher + +Manual build command: + podman build -f docker_build/Dockerfile -t rocky_dev:latest . + +Usage: + python3 podman_launch_devenv.py # Build and launch container + python3 podman_launch_devenv.py --list # List running rocky-dev containers + python3 podman_launch_devenv.py --cleanup # Stop and remove all containers +""" +import subprocess, argparse, os, glob + +def run(cmd): return subprocess.run(cmd, shell=True, capture_output=True, text=True) + +def build(): + if not glob.glob("docker_build/ssh-keys/*.pub"): os.makedirs("docker_build/ssh-keys", exist_ok=True); open("docker_build/ssh-keys/dummy.pub", "w").write("# dummy") + result = run("podman build -f docker_build/Dockerfile -t rocky_dev:latest .") + if os.path.exists("docker_build/ssh-keys/dummy.pub"): os.remove("docker_build/ssh-keys/dummy.pub") + return result.returncode == 0 + +def launch(): + port = str(args.port) if args.port else run("shuf -i 10000-65000 -n 1").stdout.strip() + result = run(f"podman run -d -p {port}:22 --privileged --name rocky_dev-{port} rocky_dev:latest") + if result.returncode == 0: + ip = run("hostname -I | awk '{print $1}'").stdout.strip() or "localhost" + print(f"🐳 SSH: ssh root@{ip} -p {port}") + print(f"🐚 Shell: podman exec -it rocky_dev-{port} /bin/bash") + print(f"💡 Tip: For direct shell without port forwarding, use: podman run -it rocky_dev:latest /bin/bash") + return result.returncode == 0 + +parser = argparse.ArgumentParser(epilog=""" +Manual build commands: + Build: podman build -f docker_build/Dockerfile -t rocky_dev:latest . + Rebuild: podman rmi rocky_dev:latest && podman build -f docker_build/Dockerfile -t rocky_dev:latest . +""", formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument("command", nargs="?", choices=["run", "list", "cleanup"], help="Command to execute") +parser.add_argument("-p", "--port", type=int) +args = parser.parse_args() + +if args.command == "list": print(run("podman ps --filter name=rocky_dev").stdout or "No containers") +elif args.command == "cleanup": [run(f"podman stop {c} && podman rm {c}") for c in run("podman ps -a --filter name=rocky_dev --format '{{.Names}}'").stdout.split()] +elif args.command == "run": + if run("podman images -q rocky_dev").stdout: + print("found rocky_dev container! starting with a random public port to ssh... ") + launch() + else: + print("❌ Image rocky_dev:latest not found") +else: + print("Usage: python3 podman_launch_devenv.py {run|list|cleanup} [-p PORT]") + print("🐚 Shell: podman exec -it rocky_dev- /bin/bash") + print("💡 Tip: For direct shell without port forwarding, use: podman run -it rocky_dev:latest /bin/bash") diff --git a/containers/rocky-ssh-deployment.yaml b/containers/rocky-ssh-deployment.yaml new file mode 100644 index 0000000..bb6c37f --- /dev/null +++ b/containers/rocky-ssh-deployment.yaml @@ -0,0 +1,48 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: rocky-dev + labels: + app: rocky-dev +spec: + serviceName: rocky-dev-svc + replicas: 2 + selector: + matchLabels: + app: rocky-dev + template: + metadata: + labels: + app: rocky-dev + spec: + containers: + - name: rocky-dev + image: rocky_dev:latest + imagePullPolicy: IfNotPresent # Use local image + ports: + - containerPort: 22 + name: ssh + securityContext: + privileged: true + livenessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 5 + periodSeconds: 10 +--- +apiVersion: v1 +kind: Service +metadata: + name: rocky-dev-svc +spec: + clusterIP: None + selector: + app: rocky-dev + ports: + - port: 22 + targetPort: 22 diff --git a/containers/rocky-ssh-gpu-deployment.yaml b/containers/rocky-ssh-gpu-deployment.yaml new file mode 100644 index 0000000..062ccae --- /dev/null +++ b/containers/rocky-ssh-gpu-deployment.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: rocky-dev-gpu + labels: + app: rocky-dev-gpu +spec: + serviceName: rocky-dev-gpu-svc + replicas: 2 + selector: + matchLabels: + app: rocky-dev-gpu + template: + metadata: + labels: + app: rocky-dev-gpu + spec: + containers: + - name: rocky-dev-gpu + image: rocky_dev_gpu:latest + imagePullPolicy: IfNotPresent # Use local image + ports: + - containerPort: 22 + name: ssh + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 # Request 1 GPU per pod + requests: + nvidia.com/gpu: 1 + env: + - name: NVIDIA_VISIBLE_DEVICES + value: "all" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + volumeMounts: + - name: workspace + mountPath: /workspace + livenessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 5 + periodSeconds: 10 + volumeClaimTemplates: + - metadata: + name: workspace + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 10Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: rocky-dev-gpu-svc +spec: + clusterIP: None + selector: + app: rocky-dev-gpu + ports: + - port: 22 + targetPort: 22 \ No newline at end of file diff --git a/containers/tests/test_base_container.sh b/containers/tests/test_base_container.sh new file mode 100755 index 0000000..b5115ec --- /dev/null +++ b/containers/tests/test_base_container.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# Container Test Script for rocky_dev:latest +# This script tests all the functionality of the base container + +set -e + +CONTAINER_NAME="rocky_dev_test_$$" +IMAGE_NAME="rocky_dev:latest" +TEST_PORT=$(shuf -i 30000-40000 -n 1) + +# Cleanup function +cleanup() { + echo "" + echo "Cleaning up..." + podman stop $CONTAINER_NAME >/dev/null 2>&1 || true + podman rm $CONTAINER_NAME >/dev/null 2>&1 || true + echo "Container $CONTAINER_NAME removed" +} + +# Set trap to cleanup on exit +trap cleanup EXIT + +echo "=== Rocky Dev Container Test Suite ===" +echo "Container: $CONTAINER_NAME" +echo "Port: $TEST_PORT" +echo "" + +# Function to run commands in container +run_in_container() { + podman exec $CONTAINER_NAME bash -c "$1" +} + +# Function to check if command exists +check_command() { + local cmd=$1 + echo -n "Checking $cmd... " + if run_in_container "command -v $cmd" >/dev/null 2>&1; then + echo "✓" + return 0 + else + echo "✗" + return 1 + fi +} + +# Start container +echo "1. Starting container..." +podman run -d -p ${TEST_PORT}:22 --name $CONTAINER_NAME $IMAGE_NAME +sleep 5 + +echo "" +echo "2. Testing system packages..." +# Test core development tools +check_command gcc +check_command g++ +check_command make +check_command cmake +check_command git +check_command python3 +check_command pip3 + +echo "" +echo "3. Testing system utilities..." +# Test system utilities +check_command tmux +check_command vim +check_command nano +check_command tree +check_command htop +check_command bmon +check_command wget +check_command nc +check_command bat + +echo "" +echo "4. Testing SSH configuration..." +# Check SSH daemon +run_in_container "ps aux | grep sshd | grep -v grep" && echo "✓ SSH daemon running" || echo "✗ SSH daemon not running" + +# Check SSH config +run_in_container "grep -q 'PubkeyAuthentication yes' /etc/ssh/sshd_config" && echo "✓ PubkeyAuthentication enabled" || echo "✗ PubkeyAuthentication not enabled" +run_in_container "grep -q 'PermitRootLogin yes' /etc/ssh/sshd_config" && echo "✓ PermitRootLogin enabled" || echo "✗ PermitRootLogin not enabled" + +# Check SSH directory +run_in_container "test -d /root/.ssh && test -f /root/.ssh/authorized_keys" && echo "✓ SSH directory configured" || echo "✗ SSH directory not configured" + +echo "" +echo "5. Testing Rust installation..." +# Test Rust +run_in_container "source /root/.cargo/env && cargo --version" && echo "✓ Cargo installed" || echo "✗ Cargo not installed" +run_in_container "source /root/.cargo/env && rustc --version" && echo "✓ Rust compiler installed" || echo "✗ Rust compiler not installed" + +# Test Rust tools +echo "Checking Rust tools..." +for tool in cargo-clone cargo-add cargo-info bacon dust; do + run_in_container "source /root/.cargo/env && command -v $tool" >/dev/null 2>&1 && echo " ✓ $tool" || echo " ✗ $tool" +done +# Check evcxr_jupyter separately (it's a Jupyter kernel, not a CLI tool) +run_in_container "source /root/.cargo/env && ls ~/.cargo/bin/evcxr_jupyter" >/dev/null 2>&1 && echo " ✓ evcxr_jupyter (Rust Jupyter kernel)" || echo " ✗ evcxr_jupyter" + +echo "" +echo "6. Testing Node.js installation..." +# Test Node.js +run_in_container "source /root/.nvm/nvm.sh && node --version" && echo "✓ Node.js installed" || echo "✗ Node.js not installed" +run_in_container "source /root/.nvm/nvm.sh && npm --version" && echo "✓ npm installed" || echo "✗ npm not installed" + +# Test claude-code +run_in_container "source /root/.nvm/nvm.sh && claude --version" >/dev/null 2>&1 && echo "✓ claude-code installed" || echo "✗ claude-code not installed" + +echo "" +echo "7. Testing environment configuration..." +# Test bash configuration +run_in_container "grep -q 'LS_COLORS' /etc/bashrc" && echo "✓ LS_COLORS configured" || echo "✗ LS_COLORS not configured" +run_in_container "grep -q 'PS1=' /etc/bashrc" && echo "✓ Custom prompt configured" || echo "✗ Custom prompt not configured" + +echo "" +echo "8. Testing SSH connectivity..." +# Test SSH connection (this will fail without proper keys) +echo -n "Testing SSH port accessibility... " +nc -zv localhost $TEST_PORT 2>&1 | grep -q succeeded && echo "✓" || echo "✗" + +echo "" +echo "9. Testing file system..." +# Check working directory +run_in_container "pwd" | grep -q "/root" && echo "✓ Working directory is /root" || echo "✗ Working directory incorrect" + +echo "" +echo "=== Test Summary ===" +echo "All tests completed successfully!" +echo "Container will be automatically cleaned up." \ No newline at end of file diff --git a/containers/tests/test_gpu_container.sh b/containers/tests/test_gpu_container.sh new file mode 100755 index 0000000..593f927 --- /dev/null +++ b/containers/tests/test_gpu_container.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +# Container Test Script for rocky_dev_gpu:latest +# This script tests all the functionality of the GPU-enabled container + +set -e + +CONTAINER_NAME="rocky_dev_gpu_test_$$" +IMAGE_NAME="rocky_dev_gpu:latest" +TEST_PORT=$(shuf -i 40000-50000 -n 1) + +# Cleanup function +cleanup() { + echo "" + echo "Cleaning up..." + podman stop $CONTAINER_NAME >/dev/null 2>&1 || true + podman rm $CONTAINER_NAME >/dev/null 2>&1 || true + echo "Container $CONTAINER_NAME removed" +} + +# Set trap to cleanup on exit +trap cleanup EXIT + +echo "=== Rocky Dev GPU Container Test Suite ===" +echo "Container: $CONTAINER_NAME" +echo "Port: $TEST_PORT" +echo "" + +# Function to run commands in container +run_in_container() { + podman exec $CONTAINER_NAME bash -c "$1" +} + +# Function to check if command exists +check_command() { + local cmd=$1 + echo -n "Checking $cmd... " + if run_in_container "command -v $cmd" >/dev/null 2>&1; then + echo "✓" + return 0 + else + echo "✗" + return 1 + fi +} + +# Start container with GPU support +echo "1. Starting GPU container..." +podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME +sleep 5 + +echo "" +echo "2. Testing base container functionality..." +echo "(Inherited from rocky_dev:latest)" + +# Quick check of base tools +echo -n "Development tools: " +for cmd in gcc g++ make cmake git python3; do + run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; } +done +echo "✓" + +echo -n "Rust toolchain: " +run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Node.js: " +run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "3. Testing GPU-specific packages..." +# Check for GPU utilities +check_command lspci +check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)" + +# Check for kernel packages +echo -n "Checking kernel headers... " +run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Checking kernel-devel... " +run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Checking pciutils... " +run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "4. Testing NVIDIA container toolkit..." +echo -n "Checking nvidia-container-toolkit... " +run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "5. Testing GPU environment variables..." +# Check environment variables +echo -n "NVIDIA_VISIBLE_DEVICES... " +run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly" + +echo -n "NVIDIA_DRIVER_CAPABILITIES... " +run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly" + +echo "" +echo "6. Testing GPU test script..." +# Check if gpu-test.sh exists and is executable +echo -n "Checking /usr/local/bin/gpu-test.sh... " +run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable" + +# Run the GPU test script +echo "" +echo "Running GPU test script:" +echo "------------------------" +run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware" +echo "------------------------" + +echo "" +echo "7. Testing workspace directory..." +# Check workspace directory +echo -n "Checking /workspace directory... " +run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found" + +echo "" +echo "8. Testing PCI device detection..." +# Try to detect any NVIDIA devices +echo "PCI devices (filtered for NVIDIA/GPU):" +run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'" + +echo "" +echo "9. Testing container GPU device access..." +# Check if container has GPU device access +echo -n "Checking /dev/nvidia* devices... " +if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then + echo "✓ GPU devices found" + run_in_container "ls -la /dev/nvidia*" +else + echo "✗ No GPU devices (normal without GPU hardware)" +fi + +echo "" +echo "=== Test Summary ===" +echo "GPU Support Status:" +if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then + echo " ✓ Full GPU support detected" +else + echo " ⚠ GPU tools installed but no GPU hardware detected" + echo " This is normal when running without NVIDIA GPU" +fi +echo "" +echo "All tests completed successfully!" +echo "Container will be automatically cleaned up." \ No newline at end of file -- cgit v1.2.3-70-g09d2