diff options
Diffstat (limited to 'containers/tests/test_gpu_container.sh')
| -rwxr-xr-x | containers/tests/test_gpu_container.sh | 146 |
1 files changed, 0 insertions, 146 deletions
diff --git a/containers/tests/test_gpu_container.sh b/containers/tests/test_gpu_container.sh deleted file mode 100755 index 593f927..0000000 --- a/containers/tests/test_gpu_container.sh +++ /dev/null | |||
| @@ -1,146 +0,0 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | # Container Test Script for rocky_dev_gpu:latest | ||
| 4 | # This script tests all the functionality of the GPU-enabled container | ||
| 5 | |||
| 6 | set -e | ||
| 7 | |||
| 8 | CONTAINER_NAME="rocky_dev_gpu_test_$$" | ||
| 9 | IMAGE_NAME="rocky_dev_gpu:latest" | ||
| 10 | TEST_PORT=$(shuf -i 40000-50000 -n 1) | ||
| 11 | |||
| 12 | # Cleanup function | ||
| 13 | cleanup() { | ||
| 14 | echo "" | ||
| 15 | echo "Cleaning up..." | ||
| 16 | podman stop $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 17 | podman rm $CONTAINER_NAME >/dev/null 2>&1 || true | ||
| 18 | echo "Container $CONTAINER_NAME removed" | ||
| 19 | } | ||
| 20 | |||
| 21 | # Set trap to cleanup on exit | ||
| 22 | trap cleanup EXIT | ||
| 23 | |||
| 24 | echo "=== Rocky Dev GPU Container Test Suite ===" | ||
| 25 | echo "Container: $CONTAINER_NAME" | ||
| 26 | echo "Port: $TEST_PORT" | ||
| 27 | echo "" | ||
| 28 | |||
| 29 | # Function to run commands in container | ||
| 30 | run_in_container() { | ||
| 31 | podman exec $CONTAINER_NAME bash -c "$1" | ||
| 32 | } | ||
| 33 | |||
| 34 | # Function to check if command exists | ||
| 35 | check_command() { | ||
| 36 | local cmd=$1 | ||
| 37 | echo -n "Checking $cmd... " | ||
| 38 | if run_in_container "command -v $cmd" >/dev/null 2>&1; then | ||
| 39 | echo "✓" | ||
| 40 | return 0 | ||
| 41 | else | ||
| 42 | echo "✗" | ||
| 43 | return 1 | ||
| 44 | fi | ||
| 45 | } | ||
| 46 | |||
| 47 | # Start container with GPU support | ||
| 48 | echo "1. Starting GPU container..." | ||
| 49 | podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME | ||
| 50 | sleep 5 | ||
| 51 | |||
| 52 | echo "" | ||
| 53 | echo "2. Testing base container functionality..." | ||
| 54 | echo "(Inherited from rocky_dev:latest)" | ||
| 55 | |||
| 56 | # Quick check of base tools | ||
| 57 | echo -n "Development tools: " | ||
| 58 | for cmd in gcc g++ make cmake git python3; do | ||
| 59 | run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; } | ||
| 60 | done | ||
| 61 | echo "✓" | ||
| 62 | |||
| 63 | echo -n "Rust toolchain: " | ||
| 64 | run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 65 | |||
| 66 | echo -n "Node.js: " | ||
| 67 | run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 68 | |||
| 69 | echo "" | ||
| 70 | echo "3. Testing GPU-specific packages..." | ||
| 71 | # Check for GPU utilities | ||
| 72 | check_command lspci | ||
| 73 | check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)" | ||
| 74 | |||
| 75 | # Check for kernel packages | ||
| 76 | echo -n "Checking kernel headers... " | ||
| 77 | run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 78 | |||
| 79 | echo -n "Checking kernel-devel... " | ||
| 80 | run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 81 | |||
| 82 | echo -n "Checking pciutils... " | ||
| 83 | run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 84 | |||
| 85 | echo "" | ||
| 86 | echo "4. Testing NVIDIA container toolkit..." | ||
| 87 | echo -n "Checking nvidia-container-toolkit... " | ||
| 88 | run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗" | ||
| 89 | |||
| 90 | echo "" | ||
| 91 | echo "5. Testing GPU environment variables..." | ||
| 92 | # Check environment variables | ||
| 93 | echo -n "NVIDIA_VISIBLE_DEVICES... " | ||
| 94 | run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly" | ||
| 95 | |||
| 96 | echo -n "NVIDIA_DRIVER_CAPABILITIES... " | ||
| 97 | run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly" | ||
| 98 | |||
| 99 | echo "" | ||
| 100 | echo "6. Testing GPU test script..." | ||
| 101 | # Check if gpu-test.sh exists and is executable | ||
| 102 | echo -n "Checking /usr/local/bin/gpu-test.sh... " | ||
| 103 | run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable" | ||
| 104 | |||
| 105 | # Run the GPU test script | ||
| 106 | echo "" | ||
| 107 | echo "Running GPU test script:" | ||
| 108 | echo "------------------------" | ||
| 109 | run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware" | ||
| 110 | echo "------------------------" | ||
| 111 | |||
| 112 | echo "" | ||
| 113 | echo "7. Testing workspace directory..." | ||
| 114 | # Check workspace directory | ||
| 115 | echo -n "Checking /workspace directory... " | ||
| 116 | run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found" | ||
| 117 | |||
| 118 | echo "" | ||
| 119 | echo "8. Testing PCI device detection..." | ||
| 120 | # Try to detect any NVIDIA devices | ||
| 121 | echo "PCI devices (filtered for NVIDIA/GPU):" | ||
| 122 | run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'" | ||
| 123 | |||
| 124 | echo "" | ||
| 125 | echo "9. Testing container GPU device access..." | ||
| 126 | # Check if container has GPU device access | ||
| 127 | echo -n "Checking /dev/nvidia* devices... " | ||
| 128 | if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then | ||
| 129 | echo "✓ GPU devices found" | ||
| 130 | run_in_container "ls -la /dev/nvidia*" | ||
| 131 | else | ||
| 132 | echo "✗ No GPU devices (normal without GPU hardware)" | ||
| 133 | fi | ||
| 134 | |||
| 135 | echo "" | ||
| 136 | echo "=== Test Summary ===" | ||
| 137 | echo "GPU Support Status:" | ||
| 138 | if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then | ||
| 139 | echo " ✓ Full GPU support detected" | ||
| 140 | else | ||
| 141 | echo " ⚠ GPU tools installed but no GPU hardware detected" | ||
| 142 | echo " This is normal when running without NVIDIA GPU" | ||
| 143 | fi | ||
| 144 | echo "" | ||
| 145 | echo "All tests completed successfully!" | ||
| 146 | echo "Container will be automatically cleaned up." \ No newline at end of file | ||
