From d6eb567da3e6d2e64ebf22adf1fc6d21c47090f8 Mon Sep 17 00:00:00 2001 From: hc Date: Sat, 31 May 2025 23:44:40 +0800 Subject: hehe --- tests/test_gpu_container.sh | 146 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100755 tests/test_gpu_container.sh (limited to 'tests/test_gpu_container.sh') diff --git a/tests/test_gpu_container.sh b/tests/test_gpu_container.sh new file mode 100755 index 0000000..593f927 --- /dev/null +++ b/tests/test_gpu_container.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +# Container Test Script for rocky_dev_gpu:latest +# This script tests all the functionality of the GPU-enabled container + +set -e + +CONTAINER_NAME="rocky_dev_gpu_test_$$" +IMAGE_NAME="rocky_dev_gpu:latest" +TEST_PORT=$(shuf -i 40000-50000 -n 1) + +# Cleanup function +cleanup() { + echo "" + echo "Cleaning up..." + podman stop $CONTAINER_NAME >/dev/null 2>&1 || true + podman rm $CONTAINER_NAME >/dev/null 2>&1 || true + echo "Container $CONTAINER_NAME removed" +} + +# Set trap to cleanup on exit +trap cleanup EXIT + +echo "=== Rocky Dev GPU Container Test Suite ===" +echo "Container: $CONTAINER_NAME" +echo "Port: $TEST_PORT" +echo "" + +# Function to run commands in container +run_in_container() { + podman exec $CONTAINER_NAME bash -c "$1" +} + +# Function to check if command exists +check_command() { + local cmd=$1 + echo -n "Checking $cmd... " + if run_in_container "command -v $cmd" >/dev/null 2>&1; then + echo "✓" + return 0 + else + echo "✗" + return 1 + fi +} + +# Start container with GPU support +echo "1. Starting GPU container..." +podman run -d -p ${TEST_PORT}:22 --device nvidia.com/gpu=all --name $CONTAINER_NAME $IMAGE_NAME +sleep 5 + +echo "" +echo "2. Testing base container functionality..." +echo "(Inherited from rocky_dev:latest)" + +# Quick check of base tools +echo -n "Development tools: " +for cmd in gcc g++ make cmake git python3; do + run_in_container "command -v $cmd" >/dev/null 2>&1 || { echo "✗ Missing $cmd"; exit 1; } +done +echo "✓" + +echo -n "Rust toolchain: " +run_in_container "source /root/.cargo/env && cargo --version" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Node.js: " +run_in_container "source /root/.nvm/nvm.sh && node --version" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "3. Testing GPU-specific packages..." +# Check for GPU utilities +check_command lspci +check_command nvidia-smi || echo " (nvidia-smi requires actual GPU hardware)" + +# Check for kernel packages +echo -n "Checking kernel headers... " +run_in_container "rpm -q kernel-headers" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Checking kernel-devel... " +run_in_container "rpm -q kernel-devel" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo -n "Checking pciutils... " +run_in_container "rpm -q pciutils" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "4. Testing NVIDIA container toolkit..." +echo -n "Checking nvidia-container-toolkit... " +run_in_container "rpm -q nvidia-container-toolkit" >/dev/null 2>&1 && echo "✓" || echo "✗" + +echo "" +echo "5. Testing GPU environment variables..." +# Check environment variables +echo -n "NVIDIA_VISIBLE_DEVICES... " +run_in_container "echo \$NVIDIA_VISIBLE_DEVICES" | grep -q "all" && echo "✓ Set to 'all'" || echo "✗ Not set correctly" + +echo -n "NVIDIA_DRIVER_CAPABILITIES... " +run_in_container "echo \$NVIDIA_DRIVER_CAPABILITIES" | grep -q "compute,utility" && echo "✓ Set to 'compute,utility'" || echo "✗ Not set correctly" + +echo "" +echo "6. Testing GPU test script..." +# Check if gpu-test.sh exists and is executable +echo -n "Checking /usr/local/bin/gpu-test.sh... " +run_in_container "test -x /usr/local/bin/gpu-test.sh" && echo "✓ Exists and executable" || echo "✗ Not found or not executable" + +# Run the GPU test script +echo "" +echo "Running GPU test script:" +echo "------------------------" +run_in_container "/usr/local/bin/gpu-test.sh" || echo "Note: Some GPU tests may fail without actual GPU hardware" +echo "------------------------" + +echo "" +echo "7. Testing workspace directory..." +# Check workspace directory +echo -n "Checking /workspace directory... " +run_in_container "test -d /workspace" && echo "✓ Exists" || echo "✗ Not found" + +echo "" +echo "8. Testing PCI device detection..." +# Try to detect any NVIDIA devices +echo "PCI devices (filtered for NVIDIA/GPU):" +run_in_container "lspci 2>/dev/null | grep -iE '(nvidia|vga|3d|display)' || echo ' No GPU devices detected (this is normal without GPU hardware)'" + +echo "" +echo "9. Testing container GPU device access..." +# Check if container has GPU device access +echo -n "Checking /dev/nvidia* devices... " +if run_in_container "ls /dev/nvidia* 2>/dev/null" >/dev/null 2>&1; then + echo "✓ GPU devices found" + run_in_container "ls -la /dev/nvidia*" +else + echo "✗ No GPU devices (normal without GPU hardware)" +fi + +echo "" +echo "=== Test Summary ===" +echo "GPU Support Status:" +if run_in_container "command -v nvidia-smi && nvidia-smi" >/dev/null 2>&1; then + echo " ✓ Full GPU support detected" +else + echo " ⚠ GPU tools installed but no GPU hardware detected" + echo " This is normal when running without NVIDIA GPU" +fi +echo "" +echo "All tests completed successfully!" +echo "Container will be automatically cleaned up." \ No newline at end of file -- cgit v1.2.3-70-g09d2