From d01ecae74ce482b1d3f20342dafa1cf078f9fed5 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 12:35:29 +0200 Subject: [PATCH] x11-drivers/nvidia-drivers: make service re-entrant Currently the setup-nvidia script fails when re-executed. It should work in cases when the driver is already built and just needs to be loaded, or when it needs to be rebuilt for a new kernel (but driver version may not have changed). To make this work, several changes where necessary: * `./nvidia*.run -x -s` fails when already unpacked. Allow it so that we can rebuild * there are several module dependencies for nvidia modules that are implicit, related to i2c/ipmi. Probe those explicitly. * `[ -f /dev/nvidia* ]` fails because those are character devices, so need a `[ -c ...]` check. * `nvidia-modprobe` previously always failed, because it doesn't actually know the location of the modules and can only call modprobe (modprobe looks into /lib/modules/). We now explicitly probe the important modules, at that point nvidia-modprobe just creates additional device nodes. * `is_nvidia_installation_required` checks whether building and loading is needed. Factor out the loading check so that we can reload the module after an update. --- .../nvidia-drivers/files/bin/setup-nvidia | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 862c0c263f..3d1d6f1f20 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -63,7 +63,7 @@ function extract_nvidia_installer() { pushd "${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}" chmod +x "${NVIDIA_DRIVER_BASENAME}.run" - "./${NVIDIA_DRIVER_BASENAME}.run" -x -s + "./${NVIDIA_DRIVER_BASENAME}.run" -x -s || true popd } @@ -89,14 +89,17 @@ function copy_nvidia_build_artifacts() { } function install_and_load() { + modprobe -a i2c_core ipmi_msghandler ipmi_devintf insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia.ko" + insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-modeset.ko" + insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-uvm.ko" - if [ ! -f /dev/nvidiactl ] + if [ ! -c /dev/nvidiactl ] then mknod -m 666 /dev/nvidiactl c 195 255 fi - if [ ! -f /dev/nvidia0 ] + if [ ! -c /dev/nvidia0 ] then mknod -m 666 /dev/nvidia0 c 195 0 fi @@ -108,11 +111,18 @@ function install_and_load() { function verify_installation() { nvidia-smi - nvidia-modprobe -u -m -c 0 + nvidia-modprobe -u -m -c 0 || echo "nvidia-modprobe failed!" +} + +function is_nvidia_probe_required() { + if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then + return 1 + fi + return 0 } function is_nvidia_installation_required() { - if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then + if ! is_nvidia_probe_required; then return 1 fi @@ -142,3 +152,8 @@ then setup "$@" exit 0 fi +if is_nvidia_probe_required +then + install_and_load + verify_installation +fi