Merge pull request #1840 from flatcar-linux/jepio/nvidia-service-improvement

nvidia driver build improvements
This commit is contained in:
Jeremi Piotrowski 2022-07-13 10:40:50 +02:00 committed by GitHub
commit 759df3dee2
2 changed files with 75 additions and 28 deletions

View File

@ -2,17 +2,23 @@
. /usr/share/coreos/release
NVIDIA_DRIVER_BASENAME="$1"
emerge-gitclone
emerge -gKv coreos-sources > /dev/null
cp "/usr/lib64/modules/$(ls /usr/lib64/modules)/build/.config" /usr/src/linux/
make -C /usr/src/linux modules_prepare
KERNEL_NAME="$(ls /lib/modules)"
cd "/nvidia/${NVIDIA_DRIVER_BASENAME}"
./nvidia-installer -s -n \
--no-check-for-alternate-installs \
--no-kernel-module-source \
--kernel-name="${KERNEL_NAME}" \
--no-opengl-files \
--no-distro-scripts \
--kernel-install-path="${PWD}" \
--no-systemd \
--skip-depmod \
--kernel-install-path="${PWD}/install-mod" \
--log-file-name="${PWD}/nvidia-installer.log" || true
mkdir -p /lib/modules/${KERNEL_NAME}/video
mkdir -p "${PWD}"/install-mod
cp "${PWD}"/kernel/*.ko "${PWD}"/install-mod
cp "${PWD}"/install-mod/*.ko /lib/modules/${KERNEL_NAME}/video/
depmod -a
cp /lib/modules/${KERNEL_NAME}/modules.* "${PWD}/install-mod/"

View File

@ -4,12 +4,13 @@ set -euo pipefail
. /usr/share/flatcar/release
. /usr/share/flatcar/update.conf
. /usr/share/flatcar/nvidia-metadata
[ -f /etc/flatcar/nvidia-metadata ] && . /etc/flatcar/nvidia-metadata
NVIDIA_DOWNLOAD_BASEURL="https://us.download.nvidia.com/${NVIDIA_PRODUCT_TYPE}/"
NVIDIA_DRIVER_BASENAME="NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}"
NVIDIA_WORKDIR='nvidia-workdir'
FLATCAR_DEVELOPER_CONTAINER="flatcar_developer_container.bin"
FLATCAR_DEVELOPER_CONTAINER="flatcar_developer_container-${FLATCAR_RELEASE_VERSION}.bin"
FLATCAR_DEVELOPER_CONTAINER_URL=""
FLATCAR_ROOT_WORKDIR="/opt/nvidia/workdir"
FLATCAR_KERNEL_VERSION=$(uname -r)
@ -21,15 +22,16 @@ function download_flatcar_developer_container() {
if [[ ! -f "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" ]]
then
rm -f "${FLATCAR_ROOT_WORKDIR}/"flatcar_developer_container*.bin
FLATCAR_REMOTE_FILENAME=${FLATCAR_DEVELOPER_CONTAINER%%-*}.bin.bz2
if [[ "$GROUP" == "developer" ]]
then
FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/developer/developer/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
else if [[ "$(curl -I -s -o /dev/null -w "%{http_code}" "https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2")" == 200 ]]
FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/developer/developer/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}"
else if [[ "$(curl -I -s -o /dev/null -w "%{http_code}" "https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}")" == 200 ]]
then
FLATCAR_DEVELOPER_CONTAINER_URL="https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
FLATCAR_DEVELOPER_CONTAINER_URL="https://${GROUP}.release.flatcar-linux.net/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}"
else
FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/${GROUP}/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
FLATCAR_DEVELOPER_CONTAINER_URL="https://bucket.release.flatcar-linux.net/flatcar-jenkins/${GROUP}/boards/${FLATCAR_RELEASE_BOARD}/${FLATCAR_RELEASE_VERSION}/${FLATCAR_REMOTE_FILENAME}"
fi
fi
@ -39,7 +41,7 @@ function download_flatcar_developer_container() {
fi
curl -L --fail "${FLATCAR_DEVELOPER_CONTAINER_URL}" -o "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
bzip2 -d "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
lbzip2 -d "${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}.bz2"
fi
@ -61,25 +63,32 @@ function extract_nvidia_installer() {
pushd "${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}"
chmod +x "${NVIDIA_DRIVER_BASENAME}.run"
"./${NVIDIA_DRIVER_BASENAME}.run" -x -s
"./${NVIDIA_DRIVER_BASENAME}.run" -x -s || true
popd
}
function run_nspawn_container() {
echo Spawn system-nspawn container to install the NVIDIA drivers
sudo systemd-nspawn --image="${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" --bind="${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}":/nvidia --bind=/usr/share/oem/bin:/app/bin/ /app/bin/install-nvidia "$NVIDIA_DRIVER_BASENAME"
sudo systemd-nspawn --read-only --volatile=overlay --image="${FLATCAR_ROOT_WORKDIR}/${FLATCAR_DEVELOPER_CONTAINER}" --bind="${FLATCAR_ROOT_WORKDIR}/${NVIDIA_WORKDIR}":/nvidia --bind=/usr/share/oem/bin:/app/bin/ /app/bin/install-nvidia "$NVIDIA_DRIVER_BASENAME"
}
function copy_nvidia_build_artifacts() {
mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64"
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/*.so.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/"
mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib64"
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/*.so.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib64/"
mkdir -p /opt/bin
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/{nvidia-debugdump,nvidia-cuda-mps-control,nvidia-xconfig,nvidia-modprobe,nvidia-smi,nvidia-cuda-mps-server,nvidia-persistenced,nvidia-settings} /opt/bin/
mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/modules/$(uname -r)/video/"
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/kernel/*.ko "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/lib64/modules/$(uname -r)/video/"
mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/video/"
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/install-mod/*.ko "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/video/"
cp "$FLATCAR_ROOT_WORKDIR/$NVIDIA_WORKDIR/${NVIDIA_DRIVER_BASENAME}"/install-mod/modules.* "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/modules/$(uname -r)/"
mkdir -p "/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/extension-release.d/"
cat <<EOF >"/opt/nvidia/${NVIDIA_FLATCAR_VERSION_PAIR}/usr/lib/extension-release.d/extension-release.nvidia-driver"
ID=flatcar
SYSEXT_LEVEL=1.0
EOF
pushd /opt/nvidia
ln -sfn "${NVIDIA_FLATCAR_VERSION_PAIR}" current
@ -87,30 +96,57 @@ function copy_nvidia_build_artifacts() {
}
function install_and_load() {
insmod "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64/modules/$(uname -r)/video/nvidia.ko"
# This creates symlinks to sonames
mkdir -p /etc/ld.so.conf.d/
echo "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/usr/lib64" > /etc/ld.so.conf.d/nvidia.conf
ldconfig
if [ ! -f /dev/nvidiactl ]
modprobe -a i2c_core ipmi_msghandler ipmi_devintf
pushd "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/usr/lib/modules/$(uname -r)/video/"
insmod nvidia.ko
insmod nvidia-modeset.ko
insmod nvidia-uvm.ko
popd
# based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#runfile-verifications
if [ ! -c /dev/nvidiactl ]
then
mknod -m 666 /dev/nvidiactl c 195 255
fi
if [ ! -f /dev/nvidia0 ]
if [ ! -c /dev/nvidia0 ]
then
mknod -m 666 /dev/nvidia0 c 195 0
NVDEVS=`lspci | grep -i NVIDIA`
N3D=`echo "$NVDEVS" | grep -c "3D controller" || true`
NVGA=`echo "$NVDEVS" | grep -c "VGA compatible controller" || true`
N=$(( $N3D + $NVGA - 1))
for i in `seq 0 $N`; do
mknod -m 666 /dev/nvidia$i c 195 $i
done
fi
mkdir -p /etc/ld.so.conf.d/
echo "/opt/nvidia/${NVIDIA_CURRENT_INSTALLATION}/lib64" > /etc/ld.so.conf.d/nvidia.conf
ldconfig
if [ ! -c /dev/nvidia-uvm ]
then
D=`grep nvidia-uvm /proc/devices | awk '{print $1}'`
mknod -m 666 /dev/nvidia-uvm c $D 0
fi
}
function verify_installation() {
nvidia-smi
nvidia-modprobe -u -m -c 0
nvidia-modprobe -u -m -c 0 || echo "nvidia-modprobe failed!"
}
function is_nvidia_probe_required() {
if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then
return 1
fi
return 0
}
function is_nvidia_installation_required() {
if [[ -z "$(lspci | grep -i "${NVIDIA_PRODUCT_TYPE}")" ]]; then
if ! is_nvidia_probe_required; then
return 1
fi
@ -140,3 +176,8 @@ then
setup "$@"
exit 0
fi
if is_nvidia_probe_required
then
install_and_load
verify_installation
fi