summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuper User <root@p.noml.ch>2026-03-23 19:08:56 +0800
committerSuper User <root@p.noml.ch>2026-03-23 19:08:56 +0800
commitdea4663f14bcb1703aa616ad05172667e452e701 (patch)
tree160f42bbcaca01ec4ac5ff53d4b035efde8c6a77
parent3b992aee2017112985d4c87fc613a61626b4bacf (diff)
remove containers directory
-rw-r--r--containers/docker_build/Dockerfile61
-rw-r--r--containers/docker_build/Dockerfile.gpu40
-rw-r--r--containers/docker_build/ssh-keys/macm4-resident.pub1
-rw-r--r--containers/docker_build/vimrc77
-rw-r--r--containers/docs105
-rw-r--r--containers/rocky-ssh-deployment.yaml48
-rw-r--r--containers/rocky-ssh-gpu-deployment.yaml69
-rwxr-xr-xcontainers/tests/test_base_container.sh131
-rwxr-xr-xcontainers/tests/test_gpu_container.sh146
9 files changed, 0 insertions, 678 deletions
diff --git a/containers/docker_build/Dockerfile b/containers/docker_build/Dockerfile
deleted file mode 100644
index 16f74d6..0000000
--- a/containers/docker_build/Dockerfile
+++ /dev/null
@@ -1,61 +0,0 @@
1FROM rockylinux:9
2
3# Install required packages, resolving curl conflict
4RUN dnf install -y epel-release
5RUN dnf install -y --allowerasing openssh-server sudo procps-ng \
6 gcc gcc-c++ make cmake pkg-config openssl-devel libicu-devel perl python3-devel \
7 nc openssl bat autossh tmux htop tar bmon gzip tree wget \
8 nano vim unzip net-tools git python3 python3-pip make wireguard-tools usbutils yum xclip \
9 && dnf clean all
10
11# Configure SSH
12RUN mkdir -p /var/run/sshd && \
13 ssh-keygen -A && \
14 sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config && \
15 sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
16 sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config && \
17 echo "AllowAgentForwarding yes" >> /etc/ssh/sshd_config
18# Setup SSH directory for root and ensure root has valid shell
19RUN mkdir -p /root/.ssh && \
20 chmod 700 /root/.ssh && \
21 usermod -s /bin/bash root
22# Copy SSH public keys from docker_build/ssh-keys directory into the image
23COPY docker_build/ssh-keys/*.pub /tmp/ssh-keys/
24RUN cat /tmp/ssh-keys/*.pub > /root/.ssh/authorized_keys && \
25 chmod 600 /root/.ssh/authorized_keys && \
26 rm -rf /tmp/ssh-keys
27
28# Configure vim
29COPY docker_build/vimrc /etc/vimrc
30
31# Configure bash prompt and colors
32RUN echo 'LS_COLORS=$LS_COLORS:"di=38;5;135:ex=00;32:" ; export LS_COLORS' >> /etc/bashrc && \
33 echo 'PS1="[\[\033[01;32m\]\u\[\033[00m\]@\h \[\033[38;5;135m\]\W\[\033[00m\]]\$ "' >> /etc/bashrc && \
34 echo 'export PATH=$PATH:/root/.cargo/bin' >> /root/.bashrc
35
36# Install Rust and tools for root
37RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
38 echo '[ -f "$HOME/.cargo/env" ] && source "$HOME/.cargo/env"' >> ~/.bashrc && \
39 source "$HOME/.cargo/env" && \
40 cargo install cargo-clone-crate cargo-edit cargo-info evcxr_jupyter bacon du-dust
41
42# Install Node.js via nvm and claude-code
43RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash && \
44 export NVM_DIR="$HOME/.nvm" && \
45 [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" && \
46 nvm install 22 && \
47 npm install -g @anthropic-ai/claude-code
48
49# Add nvm to bashrc for future sessions
50RUN echo 'export NVM_DIR="$HOME/.nvm"' >> ~/.bashrc && \
51 echo '[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"' >> ~/.bashrc && \
52 echo '[ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion"' >> ~/.bashrc
53
54# Set working directory
55WORKDIR /root
56
57# Expose SSH port
58EXPOSE 22
59
60# Start SSH daemon
61CMD ["/usr/sbin/sshd", "-D", "-e"]
diff --git a/containers/docker_build/Dockerfile.gpu b/containers/docker_build/Dockerfile.gpu
deleted file mode 100644
index 7ed08a5..0000000
--- a/containers/docker_build/Dockerfile.gpu
+++ /dev/null
@@ -1,40 +0,0 @@
1# Multi-stage build - GPU version builds on top of the base dev environment
2FROM rocky_dev:latest
3
4# Update and install GPU-specific packages
5RUN dnf update -y && \
6 dnf install -y kernel-headers kernel-devel pciutils && \
7 dnf clean all
8
9# Install NVIDIA container toolkit dependencies
10RUN dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo && \
11 dnf install -y nvidia-container-toolkit && \
12 dnf clean all
13
14# Set environment variables for NVIDIA
15ENV NVIDIA_VISIBLE_DEVICES=all
16ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
17
18# Add GPU test script
19RUN echo '#!/bin/bash' > /usr/local/bin/gpu-test.sh && \
20 echo 'echo "=== System Information ==="' >> /usr/local/bin/gpu-test.sh && \
21 echo 'cat /etc/rocky-release' >> /usr/local/bin/gpu-test.sh && \
22 echo 'echo' >> /usr/local/bin/gpu-test.sh && \
23 echo 'echo "=== PCI Devices (GPUs) ==="' >> /usr/local/bin/gpu-test.sh && \
24 echo 'lspci | grep -i nvidia' >> /usr/local/bin/gpu-test.sh && \
25 echo 'echo' >> /usr/local/bin/gpu-test.sh && \
26 echo 'echo "=== NVIDIA SMI ==="' >> /usr/local/bin/gpu-test.sh && \
27 echo 'if command -v nvidia-smi &> /dev/null; then' >> /usr/local/bin/gpu-test.sh && \
28 echo ' nvidia-smi' >> /usr/local/bin/gpu-test.sh && \
29 echo 'else' >> /usr/local/bin/gpu-test.sh && \
30 echo ' echo "nvidia-smi not found. GPU might not be accessible inside container."' >> /usr/local/bin/gpu-test.sh && \
31 echo 'fi' >> /usr/local/bin/gpu-test.sh && \
32 chmod +x /usr/local/bin/gpu-test.sh
33
34# Create workspace directory for GPU workloads
35RUN mkdir -p /workspace
36
37# Keep the same working directory and CMD from base image
38WORKDIR /root
39EXPOSE 22
40CMD ["/usr/sbin/sshd", "-D", "-e"] \ No newline at end of file
diff --git a/containers/docker_build/ssh-keys/macm4-resident.pub b/containers/docker_build/ssh-keys/macm4-resident.pub
deleted file mode 100644
index fbccb4f..0000000
--- a/containers/docker_build/ssh-keys/macm4-resident.pub
+++ /dev/null
@@ -1 +0,0 @@
1sk-ssh-ed25519@openssh.com AAAAGnNrLXNzaC1lZDI1NTE5QG9wZW5zc2guY29tAAAAIFdHP8n64jOV6Ok7U9TDnGW+LUkXP6V7cvXH6xqN0zcNAAAAEnNzaDptYWNtNC1yZXNpZGVudA== ssh:macm4-resident
diff --git a/containers/docker_build/vimrc b/containers/docker_build/vimrc
deleted file mode 100644
index 36583bc..0000000
--- a/containers/docker_build/vimrc
+++ /dev/null
@@ -1,77 +0,0 @@
1" Basic vim configuration for development environment
2
3" Enable syntax highlighting
4syntax on
5
6" Enable line numbers
7set number
8
9" Enable relative line numbers for easier navigation
10set relativenumber
11
12" Set tab width to 4 spaces
13set tabstop=4
14set shiftwidth=4
15set expandtab
16
17" Enable auto-indentation
18set autoindent
19set smartindent
20
21" Enable incremental search
22set incsearch
23
24" Highlight search results
25set hlsearch
26
27" Case-insensitive search unless uppercase is used
28set ignorecase
29set smartcase
30
31" Show matching brackets
32set showmatch
33
34" Enable mouse support
35set mouse=a
36
37" Set backspace behavior
38set backspace=indent,eol,start
39
40" Show current line and column
41set ruler
42
43" Enable file type detection
44filetype on
45filetype plugin on
46filetype indent on
47
48" Set color scheme (if available)
49colorscheme default
50
51" Enable visual bell instead of beep
52set visualbell
53
54" Set encoding
55set encoding=utf-8
56
57" Show command in status line
58set showcmd
59
60" Enable wildmenu for command completion
61set wildmenu
62
63" Set status line
64set laststatus=2
65set statusline=%F%m%r%h%w\ [%l,%c]\ [%L\ lines]
66
67" Rust specific settings
68autocmd FileType rust setlocal tabstop=4 shiftwidth=4 expandtab
69
70" Python specific settings
71autocmd FileType python setlocal tabstop=4 shiftwidth=4 expandtab
72
73" JavaScript/TypeScript settings
74autocmd FileType javascript,typescript setlocal tabstop=2 shiftwidth=2 expandtab
75
76" YAML settings
77autocmd FileType yaml setlocal tabstop=2 shiftwidth=2 expandtab \ No newline at end of file
diff --git a/containers/docs b/containers/docs
deleted file mode 100644
index 3a0b3cc..0000000
--- a/containers/docs
+++ /dev/null
@@ -1,105 +0,0 @@
1# Rocky SSH Container
2## Setup
3### SSH Keys
4Place your SSH public keys in the `docker_build/ssh-keys/` directory:
5```bash
6cp ~/.ssh/id_ed25519.pub docker_build/ssh-keys/
7```
8The container will automatically add all `.pub` files from this directory to `/root/.ssh/authorized_keys`.
9
10## Building Containers
11### Base Development Container
12```bash
13# From the dev_env directory
14podman build -t rocky_dev:latest -f docker_build/Dockerfile .
15```
16### GPU-Enabled Container
17The GPU container builds on top of the base container using multi-stage build:
18```bash
19# First build the base container (from dev_env directory)
20podman build -t rocky_dev:latest -f docker_build/Dockerfile .
21# Then build the GPU version
22podman build -t rocky_dev_gpu:latest -f docker_build/Dockerfile.gpu .
23```
24
25## GPU Support
26The GPU-enabled container includes:
27- NVIDIA Container Toolkit for GPU access
28- GPU test script at `/usr/local/bin/gpu-test.sh`
29- Environment variables configured for NVIDIA GPU visibility
30- Workspace directory at `/workspace` for GPU workloads
31
32### Running with GPU Support
33```bash
34# Run GPU-enabled container
35podman run -it --device nvidia.com/gpu=all rocky_dev_gpu:latest
36# Test GPU inside container
37gpu-test.sh
38nvidia-smi
39```
40
41## Podman
42```bash
43python3 podman_launch_devenv.py
44python3 podman_launch_devenv.py run
45python3 podman_launch_devenv.py run -p 2222
46python3 podman_launch_devenv.py list
47python3 podman_launch_devenv.py cleanup
48```
49
50## Kubernetes
51```bash
52kubectl apply -f rocky-ssh-deployment.yaml
53kubectl get pods -l app=rocky-dev -o wide
54kubectl get svc rocky-dev-svc
55kubectl delete pod rocky-dev-0
56kubectl scale statefulset rocky-dev --replicas=10
57kubectl delete -f rocky-ssh-deployment.yaml
58```
59### Kubernetes GPU Deployment
60```bash
61kubectl apply -f rocky-ssh-gpu-deployment.yaml
62kubectl get pods -l app=rocky-dev-gpu -o wide
63kubectl describe pod rocky-dev-gpu-0 | grep nvidia
64kubectl exec -it rocky-dev-gpu-0 -- nvidia-smi
65kubectl scale statefulset rocky-dev-gpu --replicas=4
66kubectl delete -f rocky-ssh-gpu-deployment.yaml
67```
68
69## Local Registry
70```bash
71podman run -d -p 5000:5000 --name registry registry:2
72podman tag localhost/rocky_dev:latest localhost:5000/rocky_dev:latest
73podman push localhost:5000/rocky_dev:latest --tls-verify=false
74```
75
76## Access
77```bash
78# Direct shell
79kubectl exec -it rocky-dev-0 -- /bin/bash
80# SSH with agent forwarding (2 terminals)
81kubectl port-forward rocky-dev-0 2222:22
82ssh-agent bash -c 'ssh-add ~/macm4-resident && ssh -A -p 2222 root@localhost'
83# External
84kubectl port-forward --address 0.0.0.0 rocky-dev-0 9999:22
85```
86
87## Features
88### Development Tools
89- C/C++ development: gcc, gcc-c++, make, cmake
90- Python 3 with pip and development headers
91- Rust toolchain with cargo tools (cargo-edit, bacon, evcxr_jupyter)
92- Node.js v22 via nvm
93- Claude Code CLI tool
94
95### System Utilities
96- SSH server with key-based authentication
97- tmux, vim, nano editors
98- htop, bmon for system monitoring
99- git, wget, tree, bat
100- Network tools: nc, net-tools, wireguard-tools
101
102### GPU Computing (GPU version only)
103- NVIDIA GPU support via container toolkit
104- GPU test utilities
105- Dedicated /workspace directory for ML/GPU workloads
diff --git a/containers/rocky-ssh-deployment.yaml b/containers/rocky-ssh-deployment.yaml
deleted file mode 100644
index bb6c37f..0000000
--- a/containers/rocky-ssh-deployment.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
1apiVersion: apps/v1
2kind: StatefulSet
3metadata:
4 name: rocky-dev
5 labels:
6 app: rocky-dev
7spec:
8 serviceName: rocky-dev-svc
9 replicas: 2
10 selector:
11 matchLabels:
12 app: rocky-dev
13 template:
14 metadata:
15 labels:
16 app: rocky-dev
17 spec:
18 containers:
19 - name: rocky-dev
20 image: rocky_dev:latest
21 imagePullPolicy: IfNotPresent # Use local image
22 ports:
23 - containerPort: 22
24 name: ssh
25 securityContext:
26 privileged: true
27 livenessProbe:
28 tcpSocket:
29 port: 22
30 initialDelaySeconds: 30
31 periodSeconds: 30
32 readinessProbe:
33 tcpSocket:
34 port: 22
35 initialDelaySeconds: 5
36 periodSeconds: 10
37---
38apiVersion: v1
39kind: Service
40metadata:
41 name: rocky-dev-svc
42spec:
43 clusterIP: None
44 selector:
45 app: rocky-dev
46 ports:
47 - port: 22
48 targetPort: 22
diff --git a/containers/rocky-ssh-gpu-deployment.yaml b/containers/rocky-ssh-gpu-deployment.yaml
deleted file mode 100644
index 062ccae..0000000
--- a/containers/rocky-ssh-gpu-deployment.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
1apiVersion: apps/v1
2kind: StatefulSet
3metadata:
4 name: rocky-dev-gpu
5 labels:
6 app: rocky-dev-gpu
7spec:
8 serviceName: rocky-dev-gpu-svc
9 replicas: 2
10 selector:
11 matchLabels:
12 app: rocky-dev-gpu
13 template:
14 metadata:
15 labels:
16 app: rocky-dev-gpu
17 spec:
18 containers:
19 - name: rocky-dev-gpu
20 image: rocky_dev_gpu:latest
21 imagePullPolicy: IfNotPresent # Use local image
22 ports:
23 - containerPort: 22
24 name: ssh
25 securityContext:
26 privileged: true
27 resources:
28 limits:
29 nvidia.com/gpu: 1 # Request 1 GPU per pod
30 requests:
31 nvidia.com/gpu: 1
32 env:
33 - name: NVIDIA_VISIBLE_DEVICES
34 value: "all"
35 - name: NVIDIA_DRIVER_CAPABILITIES
36 value: "compute,utility"
37 volumeMounts:
38 - name: workspace
39 mountPath: /workspace
40 livenessProbe:
41 tcpSocket:
42 port: 22
43 initialDelaySeconds: 30
44 periodSeconds: 30
45 readinessProbe:
46 tcpSocket:
47 port: 22
48 initialDelaySeconds: 5
49 periodSeconds: 10
50 volumeClaimTemplates:
51 - metadata:
52 name: workspace
53 spec:
54 accessModes: [ "ReadWriteOnce" ]
55 resources:
56 requests:
57 storage: 10Gi
58---
59apiVersion: v1
60kind: Service
61metadata:
62 name: rocky-dev-gpu-svc
63spec:
64 clusterIP: None
65 selector:
66 app: rocky-dev-gpu
67 ports:
68 - port: 22
69 targetPort: 22 \ No newline at end of file
diff --git a/containers/tests/test_base_container.sh b/containers/tests/test_base_container.sh
deleted file mode 100755
index b5115ec..0000000
--- a/containers/tests/test_base_container.sh
+++ /dev/null
@@ -1,131 +0,0 @@
1#!/bin/bash
2
3# Container Test Script for rocky_dev:latest
4# This script tests all the functionality of the base container
5
6set -e
7
8CONTAINER_NAME="rocky_dev_test_$$"
9IMAGE_NAME="rocky_dev:latest"
10TEST_PORT=$(shuf -i 30000-40000 -n 1)
11
12# Cleanup function
13cleanup() {
14 echo ""
15 echo "Cleaning up..."
16 podman stop $CONTAINER_NAME >/dev/null 2>&1 || true
17 podman rm $CONTAINER_NAME >/dev/null 2>&1 || true
18 echo "Container $CONTAINER_NAME removed"
19}
20
21# Set trap to cleanup on exit
22trap cleanup EXIT
23
24echo "=== Rocky Dev Container Test Suite ==="
25echo "Container: $CONTAINER_NAME"
26echo "Port: $TEST_PORT"
27echo ""
28
29# Function to run commands in container
30run_in_container() {
31 podman exec $CONTAINER_NAME bash -c "$1"
32}
33
34# Function to check if command exists
35check_command() {
36 local cmd=$1
37 echo -n "Checking $cmd... "
38 if run_in_container "command -v $cmd" >/dev/null 2>&1; then
39 echo "✓"
40 return 0
41 else
42 echo "✗"
43 return 1
44 fi
45}
46
47# Start container
48echo "1. Starting container..."
49podman run -d -p ${TEST_PORT}:22 --name $CONTAINER_NAME $IMAGE_NAME
50sleep 5
51
52echo ""
53echo "2. Testing system packages..."
54# Test core development tools
55check_command gcc
56check_command g++
57check_command make
58check_command cmake
59check_command git
60check_command python3
61check_command pip3
62
63echo ""
64echo "3. Testing system utilities..."
65# Test system utilities
66check_command tmux
67check_command vim
68check_command nano
69check_command tree
70check_command htop
71check_command bmon
72check_command wget
73check_command nc
74check_command bat
75
76echo ""
77echo "4. Testing SSH configuration..."
78# Check SSH daemon
79run_in_container "ps aux | grep sshd | grep -v grep" && echo "✓ SSH daemon running" || echo "✗ SSH daemon not running"
80
81# Check SSH config
82run_in_container "grep -q 'PubkeyAuthentication yes' /etc/ssh/sshd_config" && echo "✓ PubkeyAuthentication enabled" || echo "✗ PubkeyAuthentication not enabled"
83run_in_container "grep -q 'PermitRootLogin yes' /etc/ssh/sshd_config" && echo "✓ PermitRootLogin enabled" || echo "✗ PermitRootLogin not enabled"
84
85# Check SSH directory
86run_in_container "test -d /root/.ssh && test -f /root/.ssh/authorized_keys" && echo "✓ SSH directory configured" || echo "✗ SSH directory not configured"
87
88echo ""
89echo "5. Testing Rust installation..."
90# Test Rust
91run_in_container "source /root/.cargo/env && cargo --version" && echo "✓ Cargo installed" || echo "✗ Cargo not installed"
92run_in_container "source /root/.cargo/env && rustc --version" && echo "✓ Rust compiler installed" || echo "✗ Rust compiler not installed"
93
94# Test Rust tools
95echo "Checking Rust tools..."
96for tool in cargo-clone cargo-add cargo-info bacon dust; do
97 run_in_container "source /root/.cargo/env && command -v $tool" >/dev/null 2>&1 && echo " ✓ $tool" || echo " ✗ $tool"
98done
99# Check evcxr_jupyter separately (it's a Jupyter kernel, not a CLI tool)
100run_in_container "source /root/.cargo/env && ls ~/.cargo/bin/evcxr_jupyter" >/dev/null 2>&1 && echo " ✓ evcxr_jupyter (Rust Jupyter kernel)" || echo " ✗ evcxr_jupyter"
101
102echo ""
103echo "6. Testing Node.js installation..."
104# Test Node.js
105run_in_container "source /root/.nvm/nvm.sh && node --version" && echo "✓ Node.js installed" || echo "✗ Node.js not installed"
106run_in_container "source /root/.nvm/nvm.sh && npm --version" && echo "✓ npm installed" || echo "✗ npm not installed"
107
108# Test claude-code
109run_in_container "source /root/.nvm/nvm.sh && claude --version" >/dev/null 2>&1 && echo "✓ claude-code installed" || echo "✗ claude-code not installed"
110
111echo ""
112echo "7. Testing environment configuration..."
113# Test bash configuration
114run_in_container "grep -q 'LS_COLORS' /etc/bashrc" && echo "✓ LS_COLORS configured" || echo "✗ LS_COLORS not configured"
115run_in_container "grep -q 'PS1=' /etc/bashrc" && echo "✓ Custom prompt configured" || echo "✗ Custom prompt not configured"
116
117echo ""
118echo "8. Testing SSH connectivity..."
119# Test SSH connection (this will fail without proper keys)
120echo -n "Testing SSH port accessibility... "
121nc -zv localhost $TEST_PORT 2>&1 | grep -q succeeded && echo "✓" || echo "✗"
122
123echo ""
124echo "9. Testing file system..."
125# Check working directory
126run_in_container "pwd" | grep -q "/root" && echo "✓ Working directory is /root" || echo "✗ Working directory incorrect"
127
128echo ""
129echo "=== Test Summary ==="
130echo "All tests completed successfully!"
131echo "Container will be automatically cleaned up." \ No newline at end of file
diff --git a/containers/tests/test_gpu_container.sh b/containers/tests/test_gpu_container.sh
deleted file mode 100755
index 593f927..0000000
--- a/containers/tests/test_gpu_container.sh
+++ /dev/null
@@ -1,146 +0,0 @@
1#!/bin/bash
2
3# Container Test Script for rocky_dev_gpu:latest
4# This script tests all the functionality of the GPU-enabled container
5
6set -e
7
8CONTAINER_NAME="rocky_dev_gpu_test_$$"
9IMAGE_NAME="rocky_dev_gpu:latest"
10TEST_PORT=$(shuf -i 40000-50000 -n 1)
11
12# Cleanup function
13cleanup() {
14 echo ""
15 echo "Cleaning up..."
16 podman stop $CONTAINER_NAME >/dev/null 2>&1 || true
17 podman rm $CONTAINER_NAME >/dev/null 2>&1 || true
18 echo "Container $CONTAINER_NAME removed"
19}
20
21# Set trap to cleanup on exit
22trap cleanup EXIT
23
24echo "=== Rocky Dev GPU Container Test Suite ==="
25echo "Container: $CONTAINER_NAME"
26echo "Port: $TEST_PORT"
27echo ""
28
29# Function to run commands in container
30run_in_container() {
31 podman exec $CONTAINER_NAME bash -c "$1"
32}
33
34# Function to check if command exists
35check_command() {
36 local cmd=$1
37 echo -n "Checking $cmd... "
38 if run_in_container "command -v $cmd" >/dev/null 2>&1; then
39 echo "✓"
40 return 0
41 else
42 echo "✗"
43 return 1
44 fi
45}
46
47# Start container with GPU support
48echo "1. Starting GPU container..."
49podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME
50sleep 5
51
52echo ""
53echo "2. Testing base container functionality..."
54echo "(Inherited from rocky_dev:latest)"
55
56# Quick check of base tools
57echo -n "Development tools: "
58for cmd in gcc g++ make cmake git python3; do
59 run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; }
60done
61echo "✓"
62
63echo -n "Rust toolchain: "
64run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
65
66echo -n "Node.js: "
67run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
68
69echo ""
70echo "3. Testing GPU-specific packages..."
71# Check for GPU utilities
72check_command lspci
73check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)"
74
75# Check for kernel packages
76echo -n "Checking kernel headers... "
77run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗"
78
79echo -n "Checking kernel-devel... "
80run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗"
81
82echo -n "Checking pciutils... "
83run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗"
84
85echo ""
86echo "4. Testing NVIDIA container toolkit..."
87echo -n "Checking nvidia-container-toolkit... "
88run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗"
89
90echo ""
91echo "5. Testing GPU environment variables..."
92# Check environment variables
93echo -n "NVIDIA_VISIBLE_DEVICES... "
94run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly"
95
96echo -n "NVIDIA_DRIVER_CAPABILITIES... "
97run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly"
98
99echo ""
100echo "6. Testing GPU test script..."
101# Check if gpu-test.sh exists and is executable
102echo -n "Checking /usr/local/bin/gpu-test.sh... "
103run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable"
104
105# Run the GPU test script
106echo ""
107echo "Running GPU test script:"
108echo "------------------------"
109run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware"
110echo "------------------------"
111
112echo ""
113echo "7. Testing workspace directory..."
114# Check workspace directory
115echo -n "Checking /workspace directory... "
116run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found"
117
118echo ""
119echo "8. Testing PCI device detection..."
120# Try to detect any NVIDIA devices
121echo "PCI devices (filtered for NVIDIA/GPU):"
122run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'"
123
124echo ""
125echo "9. Testing container GPU device access..."
126# Check if container has GPU device access
127echo -n "Checking /dev/nvidia* devices... "
128if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then
129 echo "✓ GPU devices found"
130 run_in_container "ls -la /dev/nvidia*"
131else
132 echo "✗ No GPU devices (normal without GPU hardware)"
133fi
134
135echo ""
136echo "=== Test Summary ==="
137echo "GPU Support Status:"
138if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then
139 echo " ✓ Full GPU support detected"
140else
141 echo " ⚠ GPU tools installed but no GPU hardware detected"
142 echo " This is normal when running without NVIDIA GPU"
143fi
144echo ""
145echo "All tests completed successfully!"
146echo "Container will be automatically cleaned up." \ No newline at end of file