#!/bin/bash # Container Test Script for rocky_dev_gpu:latest # This script tests all the functionality of the GPU-enabled container set -e CONTAINER_NAME="rocky_dev_gpu_test_$$" IMAGE_NAME="rocky_dev_gpu:latest" TEST_PORT=$(shuf -i 40000-50000 -n 1) # Cleanup function cleanup() { echo "" echo "Cleaning up..." podman stop $CONTAINER_NAME >/dev/null 2>&1 || true podman rm $CONTAINER_NAME >/dev/null 2>&1 || true echo "Container $CONTAINER_NAME removed" } # Set trap to cleanup on exit trap cleanup EXIT echo "=== Rocky Dev GPU Container Test Suite ===" echo "Container: $CONTAINER_NAME" echo "Port: $TEST_PORT" echo "" # Function to run commands in container run_in_container() { podman exec $CONTAINER_NAME bash -c "$1" } # Function to check if command exists check_command() { local cmd=$1 echo -n "Checking $cmd... " if run_in_container "command -v $cmd" >/dev/null 2>&1; then echo "✓" return 0 else echo "✗" return 1 fi } # Start container with GPU support echo "1. Starting GPU container..." podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME sleep 5 echo "" echo "2. Testing base container functionality..." echo "(Inherited from rocky_dev:latest)" # Quick check of base tools echo -n "Development tools: " for cmd in gcc g++ make cmake git python3; do run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; } done echo "✓" echo -n "Rust toolchain: " run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗" echo -n "Node.js: " run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗" echo "" echo "3. Testing GPU-specific packages..." # Check for GPU utilities check_command lspci check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)" # Check for kernel packages echo -n "Checking kernel headers... " run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗" echo -n "Checking kernel-devel... " run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗" echo -n "Checking pciutils... " run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗" echo "" echo "4. Testing NVIDIA container toolkit..." echo -n "Checking nvidia-container-toolkit... " run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗" echo "" echo "5. Testing GPU environment variables..." # Check environment variables echo -n "NVIDIA_VISIBLE_DEVICES... " run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly" echo -n "NVIDIA_DRIVER_CAPABILITIES... " run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly" echo "" echo "6. Testing GPU test script..." # Check if gpu-test.sh exists and is executable echo -n "Checking /usr/local/bin/gpu-test.sh... " run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable" # Run the GPU test script echo "" echo "Running GPU test script:" echo "------------------------" run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware" echo "------------------------" echo "" echo "7. Testing workspace directory..." # Check workspace directory echo -n "Checking /workspace directory... " run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found" echo "" echo "8. Testing PCI device detection..." # Try to detect any NVIDIA devices echo "PCI devices (filtered for NVIDIA/GPU):" run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'" echo "" echo "9. Testing container GPU device access..." # Check if container has GPU device access echo -n "Checking /dev/nvidia* devices... " if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then echo "✓ GPU devices found" run_in_container "ls -la /dev/nvidia*" else echo "✗ No GPU devices (normal without GPU hardware)" fi echo "" echo "=== Test Summary ===" echo "GPU Support Status:" if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then echo " ✓ Full GPU support detected" else echo " ⚠ GPU tools installed but no GPU hardware detected" echo " This is normal when running without NVIDIA GPU" fi echo "" echo "All tests completed successfully!" echo "Container will be automatically cleaned up."