summaryrefslogtreecommitdiff
path: root/tests/test_gpu_container.sh
diff options
context:
space:
mode:
authorhc <hc@email.ch>2025-05-31 23:44:40 +0800
committerhc <hc@email.ch>2025-05-31 23:44:40 +0800
commitd6eb567da3e6d2e64ebf22adf1fc6d21c47090f8 (patch)
tree14c15830a8014001d5cc587b5b4d4454c880396e /tests/test_gpu_container.sh
parentce511f49438761549e904d6e972b8c0635306ff9 (diff)
hehe
Diffstat (limited to 'tests/test_gpu_container.sh')
-rwxr-xr-xtests/test_gpu_container.sh146
1 files changed, 146 insertions, 0 deletions
diff --git a/tests/test_gpu_container.sh b/tests/test_gpu_container.sh
new file mode 100755
index 0000000..593f927
--- /dev/null
+++ b/tests/test_gpu_container.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+# Container Test Script for rocky_dev_gpu:latest
+# This script tests all the functionality of the GPU-enabled container
+
+set -e
+
+CONTAINER_NAME="rocky_dev_gpu_test_$$"
+IMAGE_NAME="rocky_dev_gpu:latest"
+TEST_PORT=$(shuf -i 40000-50000 -n 1)
+
+# Cleanup function
+cleanup() {
+ echo ""
+ echo "Cleaning up..."
+ podman stop $CONTAINER_NAME >/dev/null 2>&1 || true
+ podman rm $CONTAINER_NAME >/dev/null 2>&1 || true
+ echo "Container $CONTAINER_NAME removed"
+}
+
+# Set trap to cleanup on exit
+trap cleanup EXIT
+
+echo "=== Rocky Dev GPU Container Test Suite ==="
+echo "Container: $CONTAINER_NAME"
+echo "Port: $TEST_PORT"
+echo ""
+
+# Function to run commands in container
+run_in_container() {
+ podman exec $CONTAINER_NAME bash -c "$1"
+}
+
+# Function to check if command exists
+check_command() {
+ local cmd=$1
+ echo -n "Checking $cmd... "
+ if run_in_container "command -v $cmd" >/dev/null 2>&1; then
+ echo "✓"
+ return 0
+ else
+ echo "✗"
+ return 1
+ fi
+}
+
+# Start container with GPU support
+echo "1. Starting GPU container..."
+podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME
+sleep 5
+
+echo ""
+echo "2. Testing base container functionality..."
+echo "(Inherited from rocky_dev:latest)"
+
+# Quick check of base tools
+echo -n "Development tools: "
+for cmd in gcc g++ make cmake git python3; do
+ run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; }
+done
+echo "✓"
+
+echo -n "Rust toolchain: "
+run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Node.js: "
+run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "3. Testing GPU-specific packages..."
+# Check for GPU utilities
+check_command lspci
+check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)"
+
+# Check for kernel packages
+echo -n "Checking kernel headers... "
+run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Checking kernel-devel... "
+run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo -n "Checking pciutils... "
+run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "4. Testing NVIDIA container toolkit..."
+echo -n "Checking nvidia-container-toolkit... "
+run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗"
+
+echo ""
+echo "5. Testing GPU environment variables..."
+# Check environment variables
+echo -n "NVIDIA_VISIBLE_DEVICES... "
+run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly"
+
+echo -n "NVIDIA_DRIVER_CAPABILITIES... "
+run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly"
+
+echo ""
+echo "6. Testing GPU test script..."
+# Check if gpu-test.sh exists and is executable
+echo -n "Checking /usr/local/bin/gpu-test.sh... "
+run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable"
+
+# Run the GPU test script
+echo ""
+echo "Running GPU test script:"
+echo "------------------------"
+run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware"
+echo "------------------------"
+
+echo ""
+echo "7. Testing workspace directory..."
+# Check workspace directory
+echo -n "Checking /workspace directory... "
+run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found"
+
+echo ""
+echo "8. Testing PCI device detection..."
+# Try to detect any NVIDIA devices
+echo "PCI devices (filtered for NVIDIA/GPU):"
+run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'"
+
+echo ""
+echo "9. Testing container GPU device access..."
+# Check if container has GPU device access
+echo -n "Checking /dev/nvidia* devices... "
+if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then
+ echo "✓ GPU devices found"
+ run_in_container "ls -la /dev/nvidia*"
+else
+ echo "✗ No GPU devices (normal without GPU hardware)"
+fi
+
+echo ""
+echo "=== Test Summary ==="
+echo "GPU Support Status:"
+if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then
+ echo " ✓ Full GPU support detected"
+else
+ echo " ⚠ GPU tools installed but no GPU hardware detected"
+ echo " This is normal when running without NVIDIA GPU"
+fi
+echo ""
+echo "All tests completed successfully!"
+echo "Container will be automatically cleaned up." \ No newline at end of file