From 85f7b86c00d8b2d64239ea7d2786536d6f294189 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 09:50:56 +0200 Subject: [PATCH 1/8] x11-drivers/nvidia-drivers: build driver against /lib/modules There are two ways to build the nvidia-driver - either against a full kernel source tree in /usr/src/linux, or against a slim kernel-devel equivalent in /lib/modules/*/build. The /lib/modules/*/build is provided by sys-kernel/coreos-module, see `install_build_source`. The interesting thing is that in absence of --kernel-source-path, nvidia-installer will autodetect which to use and already builds against /lib/modules/*/build on Flatcar right now. By passing --kernel-name, we make that choice explicit and this allows us to skip the emerge steps of the build. Since this runs in the developer container, there is also no point in trying to execute systemctl or depmod, so pass the flags to disable usage of those. Signed-off-by: Jeremi Piotrowski --- .../x11-drivers/nvidia-drivers/files/bin/install-nvidia | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia index 19d482b913..ffc2903cea 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia @@ -2,17 +2,16 @@ . /usr/share/coreos/release NVIDIA_DRIVER_BASENAME="$1" - -emerge-gitclone -emerge -gKv coreos-sources > /dev/null -cp "/usr/lib64/modules/$(ls /usr/lib64/modules)/build/.config" /usr/src/linux/ -make -C /usr/src/linux modules_prepare +KERNEL_NAME="$(ls /lib/modules)" cd "/nvidia/${NVIDIA_DRIVER_BASENAME}" ./nvidia-installer -s -n \ --no-check-for-alternate-installs \ --no-kernel-module-source \ + --kernel-name="${KERNEL_NAME}" \ --no-opengl-files \ --no-distro-scripts \ + --no-systemd \ + --skip-depmod \ --kernel-install-path="${PWD}" \ --log-file-name="${PWD}/nvidia-installer.log" || true From d6ea20ddd69a4531043956e2730991c9fdfd4582 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 10:22:45 +0200 Subject: [PATCH 2/8] x11-drivers/nvidia-drivers: allow user to override nvidia-metadata ...by providing /etc/flatcar/nvidia-metadata. Newer driver packages do not support some older Nvidia cards. An example is the Tesla K80 cards in Standard_NC6 VMs on Azure, which are only supported up to the 470.x driver version. To allow users to continue using those, give them a way to override the driver version through /etc/flatcar/nvidia-metadata. For example, this entry could be used to pin a specific driver version: NVIDIA_DRIVER_VERSION=470.103.01 --- .../x11-drivers/nvidia-drivers/files/bin/setup-nvidia | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 49f74e6811..c371f0df77 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -4,6 +4,7 @@ set -euo pipefail . /usr/share/flatcar/release . /usr/share/flatcar/update.conf . /usr/share/flatcar/nvidia-metadata +[ -f /etc/flatcar/nvidia-metadata ] && . /etc/flatcar/nvidia-metadata NVIDIA_DOWNLOAD_BASEURL="https://us.download.nvidia.com/${NVIDIA_PRODUCT_TYPE}/" NVIDIA_DRIVER_BASENAME="NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}" From a4ac14c66c721d57fe017274ad4cdad46704e4f0 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 10:26:36 +0200 Subject: [PATCH 3/8] x11-drivers/nvidia-drivers: use lbzip2 to speed up developer container decompression --- .../x11-drivers/nvidia-drivers/files/bin/setup-nvidia | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index c371f0df77..18d100fc5e 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -40,7 +40,7 @@ function download_flatcar_developer_container() { fi curl -L --fail "${FLATCAR_DEVELOPER_CONTAINER_URL}" -o "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" - bzip2 -d "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" + lbzip2 -d "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" fi From 67bf5476c334785b722ebb6164ad8761799381f8 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 10:34:43 +0200 Subject: [PATCH 4/8] x11-drivers/nvidia-drivers: use versioned developer container filenames Currently the script will reuse a developer container that was downloaded once, without ensuring that the same version is used as the running image. This works on the first boot, but wouldn't be correct after an OS update. To resolve this, add a version number to the downloaded filename, and check for the versioned dev container file. When the file is missing we also cleanup all other dev container files via glob remove. --- .../nvidia-drivers/files/bin/setup-nvidia | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 18d100fc5e..862c0c263f 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -10,7 +10,7 @@ NVIDIA_DOWNLOAD_BASEURL="https://us.download.nvidia.com/${NVIDIA_PRODUCT_TYPE}/" NVIDIA_DRIVER_BASENAME="NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}" NVIDIA_WORKDIR='nvidia-workdir' -FLATCAR_DEVELOPER_CONTAINER="flatcar_developer_container.bin" +FLATCAR_DEVELOPER_CONTAINER="flatcar_developer_container-${FLATCAR_RELEASE_VERSION}.bin" FLATCAR_DEVELOPER_CONTAINER_URL="" FLATCAR_ROOT_WORKDIR="/opt/nvidia/workdir" FLATCAR_KERNEL_VERSION=$(uname -r) @@ -22,15 +22,16 @@ function download_flatcar_developer_container() { if [[ ! -f "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" ]] then - + rm -f "${FLATCAR_ROOT_WORKDIR}/"flatcar_developer_container*.bin + FLATCAR_REMOTE_FILENAME=${FLATCAR_DEVELOPER_CONTAINER%%-*}.bin.bz2 if [[ "$GROUP" == "developer" ]] then - FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/developer/developer/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" - else if [[ "$(curl -I -s -o /dev/null -w "%{http_code}" "https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2")" == 200 ]] + FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/developer/developer/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}" + else if [[ "$(curl -I -s -o /dev/null -w "%{http_code}" "https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}")" == 200 ]] then - FLATCAR_DEVELOPER_CONTAINER_URL="https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" + FLATCAR_DEVELOPER_CONTAINER_URL="https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}" else - FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/${GROUP}/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2" + FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/${GROUP}/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}" fi fi From d01ecae74ce482b1d3f20342dafa1cf078f9fed5 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 12:35:29 +0200 Subject: [PATCH 5/8] x11-drivers/nvidia-drivers: make service re-entrant Currently the setup-nvidia script fails when re-executed. It should work in cases when the driver is already built and just needs to be loaded, or when it needs to be rebuilt for a new kernel (but driver version may not have changed). To make this work, several changes where necessary: * `./nvidia*.run -x -s` fails when already unpacked. Allow it so that we can rebuild * there are several module dependencies for nvidia modules that are implicit, related to i2c/ipmi. Probe those explicitly. * `[ -f /dev/nvidia* ]` fails because those are character devices, so need a `[ -c ...]` check. * `nvidia-modprobe` previously always failed, because it doesn't actually know the location of the modules and can only call modprobe (modprobe looks into /lib/modules/). We now explicitly probe the important modules, at that point nvidia-modprobe just creates additional device nodes. * `is_nvidia_installation_required` checks whether building and loading is needed. Factor out the loading check so that we can reload the module after an update. --- .../nvidia-drivers/files/bin/setup-nvidia | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 862c0c263f..3d1d6f1f20 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -63,7 +63,7 @@ function extract_nvidia_installer() { pushd "${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}" chmod +x "${NVIDIA_DRIVER_BASENAME}.run" - "./${NVIDIA_DRIVER_BASENAME}.run" -x -s + "./${NVIDIA_DRIVER_BASENAME}.run" -x -s || true popd } @@ -89,14 +89,17 @@ function copy_nvidia_build_artifacts() { } function install_and_load() { + modprobe -a i2c_core ipmi_msghandler ipmi_devintf insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia.ko" + insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-modeset.ko" + insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-uvm.ko" - if [ ! -f /dev/nvidiactl ] + if [ ! -c /dev/nvidiactl ] then mknod -m 666 /dev/nvidiactl c 195 255 fi - if [ ! -f /dev/nvidia0 ] + if [ ! -c /dev/nvidia0 ] then mknod -m 666 /dev/nvidia0 c 195 0 fi @@ -108,11 +111,18 @@ function install_and_load() { function verify_installation() { nvidia-smi - nvidia-modprobe -u -m -c 0 + nvidia-modprobe -u -m -c 0 || echo "nvidia-modprobe failed!" +} + +function is_nvidia_probe_required() { + if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then + return 1 + fi + return 0 } function is_nvidia_installation_required() { - if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then + if ! is_nvidia_probe_required; then return 1 fi @@ -142,3 +152,8 @@ then setup "$@" exit 0 fi +if is_nvidia_probe_required +then + install_and_load + verify_installation +fi From 08b86ad7dd652fcb1ceebeb94c39cf0533cc18c6 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 13:54:51 +0200 Subject: [PATCH 6/8] x11-drivers/nvidia-drivers: keep developer container image read-only Since we no longer need to run emerge in the developer container, we can as well just treat the developer container more like a container image and use an ephemeral overlay. --- .../x11-drivers/nvidia-drivers/files/bin/setup-nvidia | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 3d1d6f1f20..8e48f52cde 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -70,7 +70,7 @@ function extract_nvidia_installer() { function run_nspawn_container() { echo Spawn system-nspawn container to install the NVIDIA drivers - sudo systemd-nspawn --image="${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" --bind="${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}":/nvidia --bind=/usr/share/oem/bin:/app/bin/ /app/bin/install-nvidia "$NVIDIA_DRIVER_BASENAME" + sudo systemd-nspawn --read-only --volatile=overlay --image="${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" --bind="${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}":/nvidia --bind=/usr/share/oem/bin:/app/bin/ /app/bin/install-nvidia "$NVIDIA_DRIVER_BASENAME" } function copy_nvidia_build_artifacts() { From a6c4454b360823f788513ceb27cf0a2ab92d674a Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 14:00:09 +0200 Subject: [PATCH 7/8] x11-drivers/nvidia-drivers: rework install paths of files to match OS This involves putting libraries under /usr/lib64 and kernel modules under /usr/lib/module. This is an experiment at making the nvidia installation work as a sysext as well, but there are still some issues around that. The major issue was that `systemd-sysext refresh` would remove the OEM symlink and I don't feel comfortable with `systemctl restart systemd-sysext` from within another unit. If anyone wants to try it, it's now a matter of: ln -s /opt/nvidia/current /run/extensions/nvidia-driver Bonus points for moving nvidia binaries from /opt/bin to /opt/nvidia/current/usr/bin. --- .../nvidia-drivers/files/bin/install-nvidia | 9 +++++- .../nvidia-drivers/files/bin/setup-nvidia | 32 +++++++++++++------ 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia index ffc2903cea..803934b190 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/install-nvidia @@ -13,5 +13,12 @@ cd "/nvidia/${NVIDIA_DRIVER_BASENAME}" --no-distro-scripts \ --no-systemd \ --skip-depmod \ - --kernel-install-path="${PWD}" \ + --kernel-install-path="${PWD}/install-mod" \ --log-file-name="${PWD}/nvidia-installer.log" || true + +mkdir -p /lib/modules/${KERNEL_NAME}/video +mkdir -p "${PWD}"/install-mod +cp "${PWD}"/kernel/*.ko "${PWD}"/install-mod +cp "${PWD}"/install-mod/*.ko /lib/modules/${KERNEL_NAME}/video/ +depmod -a +cp /lib/modules/${KERNEL_NAME}/modules.* "${PWD}/install-mod/" diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 8e48f52cde..0838357c25 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -74,14 +74,21 @@ function run_nspawn_container() { } function copy_nvidia_build_artifacts() { - mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64" - cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/*.so.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/" + mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib64" + cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/*.so.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib64/" mkdir -p /opt/bin cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/{nvidia-debugdump,nvidia-cuda-mps-control,nvidia-xconfig,nvidia-modprobe,nvidia-smi,nvidia-cuda-mps-server,nvidia-persistenced,nvidia-settings} /opt/bin/ - mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/modules/$(uname -r)/video/" - cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/kernel/*.ko "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/modules/$(uname -r)/video/" + mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/video/" + cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/install-mod/*.ko "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/video/" + cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/install-mod/modules.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/" + + mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/extension-release.d/" + cat <"/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/extension-release.d/extension-release.nvidia-driver" +ID=flatcar +SYSEXT_LEVEL=1.0 +EOF pushd /opt/nvidia ln -sfn "${NVIDIA_FLATCAR_VERSION_PAIR}" current @@ -89,10 +96,18 @@ function copy_nvidia_build_artifacts() { } function install_and_load() { + # This creates symlinks to sonames + mkdir -p /etc/ld.so.conf.d/ + echo "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/usr/lib64" > /etc/ld.so.conf.d/nvidia.conf + ldconfig + modprobe -a i2c_core ipmi_msghandler ipmi_devintf - insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia.ko" - insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-modeset.ko" - insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia-uvm.ko" + + pushd "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/usr/lib/modules/$(uname -r)/video/" + insmod nvidia.ko + insmod nvidia-modeset.ko + insmod nvidia-uvm.ko + popd if [ ! -c /dev/nvidiactl ] then @@ -104,9 +119,6 @@ function install_and_load() { mknod -m 666 /dev/nvidia0 c 195 0 fi - mkdir -p /etc/ld.so.conf.d/ - echo "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64" > /etc/ld.so.conf.d/nvidia.conf - ldconfig } function verify_installation() { From 76c313079142dc8ea6b14d7cb7bfa9e62a4fbb3f Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 25 Apr 2022 14:11:30 +0200 Subject: [PATCH 8/8] x11-drivers/nvidia-drivers: add more device node creation This is the fallback path that nvidia publishes for verifying device node creation was successful. It now handles multiple gpus and creating the nvidia-uvm node, with a dynamic major. The weird thing is that nvidia-smi and nvidia-modprobe also create some device nodes and files under /dev, but this does not appear to be well documented. So keep the static creation. --- .../nvidia-drivers/files/bin/setup-nvidia | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia index 0838357c25..7e3a48546c 100644 --- a/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia +++ b/sdk_container/src/third_party/coreos-overlay/x11-drivers/nvidia-drivers/files/bin/setup-nvidia @@ -109,6 +109,7 @@ function install_and_load() { insmod nvidia-uvm.ko popd + # based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#runfile-verifications if [ ! -c /dev/nvidiactl ] then mknod -m 666 /dev/nvidiactl c 195 255 @@ -116,9 +117,20 @@ function install_and_load() { if [ ! -c /dev/nvidia0 ] then - mknod -m 666 /dev/nvidia0 c 195 0 + NVDEVS=`lspci | grep -i NVIDIA` + N3D=`echo "$NVDEVS" | grep -c "3D controller" || true` + NVGA=`echo "$NVDEVS" | grep -c "VGA compatible controller" || true` + N=$(( $N3D + $NVGA - 1)) + for i in `seq 0 $N`; do + mknod -m 666 /dev/nvidia$i c 195 $i + done fi + if [ ! -c /dev/nvidia-uvm ] + then + D=`grep nvidia-uvm /proc/devices | awk '{print $1}'` + mknod -m 666 /dev/nvidia-uvm c $D 0 + fi } function verify_installation() {