mirror of
https://github.com/siderolabs/extensions.git
synced 2025-09-23 14:41:02 +02:00
feat: support lts and production nvidia modules
Support LTS and production versions of NVIDIA kernel modules as per https://docs.nvidia.com/datacenter/tesla/drivers/index.html#lifecycle Part of: https://github.com/siderolabs/talos/issues/9086 Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
parent
6e6f0293e1
commit
37f2297e6b
6
.github/workflows/ci.yaml
vendored
6
.github/workflows/ci.yaml
vendored
@ -1,6 +1,6 @@
|
|||||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||||
#
|
#
|
||||||
# Generated on 2024-08-01T13:26:11Z by kres faf91e3.
|
# Generated on 2024-08-06T13:24:11Z by kres 133368e.
|
||||||
|
|
||||||
name: default
|
name: default
|
||||||
concurrency:
|
concurrency:
|
||||||
@ -33,7 +33,7 @@ jobs:
|
|||||||
labels: ${{ steps.retrieve-pr-labels.outputs.result }}
|
labels: ${{ steps.retrieve-pr-labels.outputs.result }}
|
||||||
services:
|
services:
|
||||||
buildkitd:
|
buildkitd:
|
||||||
image: moby/buildkit:v0.15.0
|
image: moby/buildkit:v0.15.1
|
||||||
options: --privileged
|
options: --privileged
|
||||||
ports:
|
ports:
|
||||||
- 1234:1234
|
- 1234:1234
|
||||||
@ -143,7 +143,7 @@ jobs:
|
|||||||
- default
|
- default
|
||||||
services:
|
services:
|
||||||
buildkitd:
|
buildkitd:
|
||||||
image: moby/buildkit:v0.15.0
|
image: moby/buildkit:v0.15.1
|
||||||
options: --privileged
|
options: --privileged
|
||||||
ports:
|
ports:
|
||||||
- 1234:1234
|
- 1234:1234
|
||||||
|
4
.github/workflows/weekly.yaml
vendored
4
.github/workflows/weekly.yaml
vendored
@ -1,6 +1,6 @@
|
|||||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||||
#
|
#
|
||||||
# Generated on 2024-08-01T13:26:11Z by kres faf91e3.
|
# Generated on 2024-08-06T13:24:11Z by kres 133368e.
|
||||||
|
|
||||||
name: weekly
|
name: weekly
|
||||||
concurrency:
|
concurrency:
|
||||||
@ -16,7 +16,7 @@ jobs:
|
|||||||
- pkgs
|
- pkgs
|
||||||
services:
|
services:
|
||||||
buildkitd:
|
buildkitd:
|
||||||
image: moby/buildkit:v0.15.0
|
image: moby/buildkit:v0.15.1
|
||||||
options: --privileged
|
options: --privileged
|
||||||
ports:
|
ports:
|
||||||
- 1234:1234
|
- 1234:1234
|
||||||
|
14
.kres.yaml
14
.kres.yaml
@ -24,9 +24,12 @@ spec:
|
|||||||
- mdadm
|
- mdadm
|
||||||
- mei
|
- mei
|
||||||
- nut-client
|
- nut-client
|
||||||
- nvidia-container-toolkit
|
- nvidia-container-toolkit-lts
|
||||||
- nvidia-fabricmanager
|
- nvidia-container-toolkit-production
|
||||||
- nvidia-open-gpu-kernel-modules
|
- nvidia-fabricmanager-lts
|
||||||
|
- nvidia-fabricmanager-production
|
||||||
|
- nvidia-open-gpu-kernel-modules-lts
|
||||||
|
- nvidia-open-gpu-kernel-modules-production
|
||||||
- qemu-guest-agent
|
- qemu-guest-agent
|
||||||
- qlogic-firmware
|
- qlogic-firmware
|
||||||
- realtek-firmware
|
- realtek-firmware
|
||||||
@ -43,7 +46,8 @@ spec:
|
|||||||
- zfs
|
- zfs
|
||||||
additionalTargets:
|
additionalTargets:
|
||||||
nonfree:
|
nonfree:
|
||||||
- nonfree-kmod-nvidia
|
- nonfree-kmod-nvidia-lts
|
||||||
|
- nonfree-kmod-nvidia-production
|
||||||
reproducibleTargetName: reproducibility
|
reproducibleTargetName: reproducibility
|
||||||
extraBuildArgs:
|
extraBuildArgs:
|
||||||
- TAG
|
- TAG
|
||||||
@ -54,7 +58,7 @@ spec:
|
|||||||
- name: EXTENSIONS_IMAGE_REF
|
- name: EXTENSIONS_IMAGE_REF
|
||||||
defaultValue: $(REGISTRY_AND_USERNAME)/extensions:$(TAG)
|
defaultValue: $(REGISTRY_AND_USERNAME)/extensions:$(TAG)
|
||||||
- name: PKGS
|
- name: PKGS
|
||||||
defaultValue: v1.8.0-alpha.0-41-ga97d58f
|
defaultValue: v1.8.0-alpha.0-45-gaf6b4e6
|
||||||
- name: PKGS_PREFIX
|
- name: PKGS_PREFIX
|
||||||
defaultValue: ghcr.io/siderolabs
|
defaultValue: ghcr.io/siderolabs
|
||||||
useBldrPkgTagResolver: true
|
useBldrPkgTagResolver: true
|
||||||
|
19
Makefile
19
Makefile
@ -1,6 +1,6 @@
|
|||||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||||
#
|
#
|
||||||
# Generated on 2024-08-01T17:25:51Z by kres faf91e3.
|
# Generated on 2024-08-06T11:36:28Z by kres 2fded2b.
|
||||||
|
|
||||||
# common variables
|
# common variables
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ SOURCE_DATE_EPOCH := $(shell git log $(INITIAL_COMMIT_SHA) --pretty=%ct)
|
|||||||
|
|
||||||
# sync bldr image with pkgfile
|
# sync bldr image with pkgfile
|
||||||
|
|
||||||
BLDR_RELEASE := v0.3.1
|
BLDR_RELEASE := v0.3.2
|
||||||
BLDR_IMAGE := ghcr.io/siderolabs/bldr:$(BLDR_RELEASE)
|
BLDR_IMAGE := ghcr.io/siderolabs/bldr:$(BLDR_RELEASE)
|
||||||
BLDR := docker run --rm --user $(shell id -u):$(shell id -g) --volume $(PWD):/src --entrypoint=/bldr $(BLDR_IMAGE) --root=/src
|
BLDR := docker run --rm --user $(shell id -u):$(shell id -g) --volume $(PWD):/src --entrypoint=/bldr $(BLDR_IMAGE) --root=/src
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ COMMON_ARGS += --build-arg=PKGS_PREFIX="$(PKGS_PREFIX)"
|
|||||||
# extra variables
|
# extra variables
|
||||||
|
|
||||||
EXTENSIONS_IMAGE_REF ?= $(REGISTRY_AND_USERNAME)/extensions:$(TAG)
|
EXTENSIONS_IMAGE_REF ?= $(REGISTRY_AND_USERNAME)/extensions:$(TAG)
|
||||||
PKGS ?= v1.8.0-alpha.0-41-ga97d58f
|
PKGS ?= v1.8.0-alpha.0-45-gaf6b4e6
|
||||||
PKGS_PREFIX ?= ghcr.io/siderolabs
|
PKGS_PREFIX ?= ghcr.io/siderolabs
|
||||||
|
|
||||||
# targets defines all the available targets
|
# targets defines all the available targets
|
||||||
@ -64,7 +64,6 @@ TARGETS += drbd
|
|||||||
TARGETS += ecr-credential-provider
|
TARGETS += ecr-credential-provider
|
||||||
TARGETS += fuse3
|
TARGETS += fuse3
|
||||||
TARGETS += gasket-driver
|
TARGETS += gasket-driver
|
||||||
TARGETS += crun
|
|
||||||
TARGETS += gvisor
|
TARGETS += gvisor
|
||||||
TARGETS += gvisor-debug
|
TARGETS += gvisor-debug
|
||||||
TARGETS += hello-world-service
|
TARGETS += hello-world-service
|
||||||
@ -76,9 +75,12 @@ TARGETS += kata-containers
|
|||||||
TARGETS += mdadm
|
TARGETS += mdadm
|
||||||
TARGETS += mei
|
TARGETS += mei
|
||||||
TARGETS += nut-client
|
TARGETS += nut-client
|
||||||
TARGETS += nvidia-container-toolkit
|
TARGETS += nvidia-container-toolkit-lts
|
||||||
TARGETS += nvidia-fabricmanager
|
TARGETS += nvidia-container-toolkit-production
|
||||||
TARGETS += nvidia-open-gpu-kernel-modules
|
TARGETS += nvidia-fabricmanager-lts
|
||||||
|
TARGETS += nvidia-fabricmanager-production
|
||||||
|
TARGETS += nvidia-open-gpu-kernel-modules-lts
|
||||||
|
TARGETS += nvidia-open-gpu-kernel-modules-production
|
||||||
TARGETS += qemu-guest-agent
|
TARGETS += qemu-guest-agent
|
||||||
TARGETS += qlogic-firmware
|
TARGETS += qlogic-firmware
|
||||||
TARGETS += realtek-firmware
|
TARGETS += realtek-firmware
|
||||||
@ -93,7 +95,8 @@ TARGETS += vmtoolsd-guest-agent
|
|||||||
TARGETS += wasmedge
|
TARGETS += wasmedge
|
||||||
TARGETS += xen-guest-agent
|
TARGETS += xen-guest-agent
|
||||||
TARGETS += zfs
|
TARGETS += zfs
|
||||||
NONFREE_TARGETS = nonfree-kmod-nvidia
|
NONFREE_TARGETS = nonfree-kmod-nvidia-lts
|
||||||
|
NONFREE_TARGETS += nonfree-kmod-nvidia-production
|
||||||
|
|
||||||
# help menu
|
# help menu
|
||||||
|
|
||||||
|
@ -31,6 +31,27 @@ Gvisor now ships an additional runtime using `kvm` as the sandboxing mechanism.
|
|||||||
title = "Intel Management Engine"
|
title = "Intel Management Engine"
|
||||||
description = """
|
description = """
|
||||||
Intel Management Engine (IME) modules is now shipped as a Talos System Extension.
|
Intel Management Engine (IME) modules is now shipped as a Talos System Extension.
|
||||||
|
"""
|
||||||
|
|
||||||
|
[notes.nvidia]
|
||||||
|
title = "NVIDIA Driver and Container Toolkit"
|
||||||
|
description = """
|
||||||
|
The NVIDIA drivers and the container toolkits now ships an LTS and Production version as per https://docs.nvidia.com/datacenter/tesla/drivers/index.html#lifecycle.
|
||||||
|
|
||||||
|
The new extensions are named below:
|
||||||
|
|
||||||
|
* nvidia-container-toolkit-production
|
||||||
|
* nvidia-container-toolkit-lts
|
||||||
|
* nvidia-open-gpu-kernel-modules-production
|
||||||
|
* nvidia-open-gpu-kernel-modules-lts
|
||||||
|
* nonfree-kmod-nvidia-lts
|
||||||
|
* nonfree-kmod-nvidia-production
|
||||||
|
|
||||||
|
The extensions would ship the latest version of LTS/Production drivers available at the time of Talos release.
|
||||||
|
|
||||||
|
Image Factory using an existing schematic id would upgrade the NVIDIA driver and container toolkit to the LTS version.
|
||||||
|
|
||||||
|
If production version is required, the schematic id should be updated to the production version.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
[notes.updates]
|
[notes.updates]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
version: v1alpha1
|
version: v1alpha1
|
||||||
metadata:
|
metadata:
|
||||||
name: nonfree-kmod-nvidia
|
name: nonfree-kmod-nvidia-lts
|
||||||
version: "$VERSION"
|
version: "$VERSION"
|
||||||
author: Sidero Labs
|
author: Sidero Labs
|
||||||
description: |
|
description: |
|
@ -1,11 +1,11 @@
|
|||||||
name: nonfree-kmod-nvidia
|
name: nonfree-kmod-nvidia-lts
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /toolchain/bin/bash
|
shell: /toolchain/bin/bash
|
||||||
dependencies:
|
dependencies:
|
||||||
- stage: base
|
- stage: base
|
||||||
# The pkgs version for a particular release of Talos as defined in
|
# The pkgs version for a particular release of Talos as defined in
|
||||||
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
||||||
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nonfree-kmod-nvidia-pkg:{{ .BUILD_ARG_PKGS }}"
|
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nonfree-kmod-nvidia-lts-pkg:{{ .BUILD_ARG_PKGS }}"
|
||||||
steps:
|
steps:
|
||||||
- prepare:
|
- prepare:
|
||||||
- |
|
- |
|
@ -1,2 +1,2 @@
|
|||||||
# the first part is the driver version and the second the talos version for which the module is built against
|
# the first part is the driver version and the second the talos version for which the module is built against
|
||||||
VERSION: "{{ .NVIDIA_DRIVER_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
VERSION: "{{ .NVIDIA_DRIVER_LTS_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
10
nvidia-gpu/nonfree/kmod-nvidia/production/manifest.yaml
Normal file
10
nvidia-gpu/nonfree/kmod-nvidia/production/manifest.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
version: v1alpha1
|
||||||
|
metadata:
|
||||||
|
name: nonfree-kmod-nvidia-production
|
||||||
|
version: "$VERSION"
|
||||||
|
author: Sidero Labs
|
||||||
|
description: |
|
||||||
|
This system extension provides nvidia proprietary kernel modules built against a specific Talos version.
|
||||||
|
compatibility:
|
||||||
|
talos:
|
||||||
|
version: ">= v1.5.0"
|
31
nvidia-gpu/nonfree/kmod-nvidia/production/pkg.yaml
Normal file
31
nvidia-gpu/nonfree/kmod-nvidia/production/pkg.yaml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
name: nonfree-kmod-nvidia-production
|
||||||
|
variant: scratch
|
||||||
|
shell: /toolchain/bin/bash
|
||||||
|
dependencies:
|
||||||
|
- stage: base
|
||||||
|
# The pkgs version for a particular release of Talos as defined in
|
||||||
|
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
||||||
|
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nonfree-kmod-nvidia-production-pkg:{{ .BUILD_ARG_PKGS }}"
|
||||||
|
steps:
|
||||||
|
- prepare:
|
||||||
|
- |
|
||||||
|
sed -i 's#$VERSION#{{ .VERSION }}#' /pkg/manifest.yaml
|
||||||
|
- install:
|
||||||
|
- |
|
||||||
|
mkdir -p /rootfs/lib/modules \
|
||||||
|
/rootfs/usr/local/lib/modprobe.d
|
||||||
|
|
||||||
|
cp /pkg/files/nvidia.conf /rootfs/usr/local/lib/modprobe.d/nvidia.conf
|
||||||
|
|
||||||
|
cp -R /lib/modules/* /rootfs/lib/modules
|
||||||
|
test:
|
||||||
|
- |
|
||||||
|
mkdir -p /extensions-validator-rootfs
|
||||||
|
cp -r /rootfs/ /extensions-validator-rootfs/rootfs
|
||||||
|
cp /pkg/manifest.yaml /extensions-validator-rootfs/manifest.yaml
|
||||||
|
/extensions-validator validate --rootfs=/extensions-validator-rootfs --pkg-name="${PKG_NAME}"
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
||||||
|
- from: /pkg/manifest.yaml
|
||||||
|
to: /
|
2
nvidia-gpu/nonfree/kmod-nvidia/production/vars.yaml
Normal file
2
nvidia-gpu/nonfree/kmod-nvidia/production/vars.yaml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# the first part is the driver version and the second the talos version for which the module is built against
|
||||||
|
VERSION: "{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
@ -1,6 +1,6 @@
|
|||||||
version: v1alpha1
|
version: v1alpha1
|
||||||
metadata:
|
metadata:
|
||||||
name: nvidia-container-toolkit
|
name: nvidia-container-toolkit-lts
|
||||||
version: "$VERSION"
|
version: "$VERSION"
|
||||||
author: Sidero Labs
|
author: Sidero Labs
|
||||||
description: |
|
description: |
|
@ -1,9 +1,9 @@
|
|||||||
name: nvidia-container-toolkit
|
name: nvidia-container-toolkit-lts
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /toolchain/bin/bash
|
shell: /toolchain/bin/bash
|
||||||
dependencies:
|
dependencies:
|
||||||
- stage: base
|
- stage: base
|
||||||
- stage: nvidia-container-cli
|
- stage: nvidia-container-cli-lts
|
||||||
- stage: elfutils
|
- stage: elfutils
|
||||||
- stage: zlib
|
- stage: zlib
|
||||||
- stage: libcap
|
- stage: libcap
|
@ -1,2 +1,2 @@
|
|||||||
# the first part is the driver version and the second the container-toolkit version
|
# the first part is the driver version and the second the container-toolkit version
|
||||||
VERSION: "{{ .NVIDIA_DRIVER_VERSION }}-{{ .CONTAINER_TOOLKIT_VERSION }}"
|
VERSION: "{{ .NVIDIA_DRIVER_LTS_VERSION }}-{{ .CONTAINER_TOOLKIT_VERSION }}"
|
@ -1,4 +1,4 @@
|
|||||||
name: nvidia-container-cli
|
name: nvidia-container-cli-lts
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /bin/bash
|
shell: /bin/bash
|
||||||
install:
|
install:
|
||||||
@ -15,7 +15,7 @@ dependencies:
|
|||||||
# nvidia-pkgs depends on glibc,
|
# nvidia-pkgs depends on glibc,
|
||||||
# so any stage depending on nvidia-container-cli will have the updated ld.so.cache,
|
# so any stage depending on nvidia-container-cli will have the updated ld.so.cache,
|
||||||
# from both nvidia-pkgs and nvidia-container-cli
|
# from both nvidia-pkgs and nvidia-container-cli
|
||||||
- stage: nvidia-pkgs
|
- stage: nvidia-pkgs-lts
|
||||||
- stage: libseccomp
|
- stage: libseccomp
|
||||||
from: /rootfs
|
from: /rootfs
|
||||||
- stage: libcap
|
- stage: libcap
|
||||||
@ -28,10 +28,10 @@ dependencies:
|
|||||||
from: /rootfs
|
from: /rootfs
|
||||||
steps:
|
steps:
|
||||||
- sources:
|
- sources:
|
||||||
- url: https://gitlab.com/nvidia/container-toolkit/libnvidia-container/-/archive/{{ .LIBNVIDIA_CONTAINER_VERSION }}/libnvidia-container-{{ .LIBNVIDIA_CONTAINER_VERSION }}.tar.gz
|
- url: https://github.com/NVIDIA/libnvidia-container/archive/refs/tags/{{ .LIBNVIDIA_CONTAINER_VERSION }}.tar.gz
|
||||||
destination: libnvidia-container.tar.gz
|
destination: libnvidia-container.tar.gz
|
||||||
sha256: d23984591004c59c33f6f13c8237f1fb84113b8eddb0f9943302df4c3b0cc549
|
sha256: cbc1dda7ee90b8b729c5f178292cd07b421863015d84b84c37e69c8d580ab3ff
|
||||||
sha512: a5a75b0cd29cf7c0484dbd650456c93bb495a0fe5449d6b8c7680af7509be3b9e1f12ab437b56309bfb4b66cfe2868b4adbe882e29b169c7733c0247ecf2489b
|
sha512: b304c284c5ab0c3544362307dc16ffcca8d34497e4356a520dc6da81a86a62b2a262b528cba559bb0d7a3addf018c3b50b6cb78669c82c1b4acae159e5922548
|
||||||
env:
|
env:
|
||||||
SOURCE_DATE_EPOCH: {{ .BUILD_ARG_SOURCE_DATE_EPOCH }}
|
SOURCE_DATE_EPOCH: {{ .BUILD_ARG_SOURCE_DATE_EPOCH }}
|
||||||
REVISION: {{ .LIBNVIDIA_CONTAINER_REF }}
|
REVISION: {{ .LIBNVIDIA_CONTAINER_REF }}
|
@ -0,0 +1,67 @@
|
|||||||
|
name: nvidia-container-cli-production
|
||||||
|
variant: scratch
|
||||||
|
shell: /bin/bash
|
||||||
|
install:
|
||||||
|
- build-base
|
||||||
|
- bash
|
||||||
|
- go
|
||||||
|
- coreutils
|
||||||
|
- sed
|
||||||
|
- curl
|
||||||
|
- rpcsvc-proto
|
||||||
|
- patch
|
||||||
|
dependencies:
|
||||||
|
- image: cgr.dev/chainguard/wolfi-base@{{ .WOLFI_BASE_REF }}
|
||||||
|
# nvidia-pkgs depends on glibc,
|
||||||
|
# so any stage depending on nvidia-container-cli will have the updated ld.so.cache,
|
||||||
|
# from both nvidia-pkgs and nvidia-container-cli
|
||||||
|
- stage: nvidia-pkgs-production
|
||||||
|
- stage: libseccomp
|
||||||
|
from: /rootfs
|
||||||
|
- stage: libcap
|
||||||
|
from: /rootfs
|
||||||
|
- stage: elfutils
|
||||||
|
from: /rootfs
|
||||||
|
- stage: zlib
|
||||||
|
from: /rootfs
|
||||||
|
- stage: libtirpc
|
||||||
|
from: /rootfs
|
||||||
|
steps:
|
||||||
|
- sources:
|
||||||
|
- url: https://github.com/NVIDIA/libnvidia-container/archive/refs/tags/{{ .LIBNVIDIA_CONTAINER_VERSION }}.tar.gz
|
||||||
|
destination: libnvidia-container.tar.gz
|
||||||
|
sha256: cbc1dda7ee90b8b729c5f178292cd07b421863015d84b84c37e69c8d580ab3ff
|
||||||
|
sha512: b304c284c5ab0c3544362307dc16ffcca8d34497e4356a520dc6da81a86a62b2a262b528cba559bb0d7a3addf018c3b50b6cb78669c82c1b4acae159e5922548
|
||||||
|
env:
|
||||||
|
SOURCE_DATE_EPOCH: {{ .BUILD_ARG_SOURCE_DATE_EPOCH }}
|
||||||
|
REVISION: {{ .LIBNVIDIA_CONTAINER_REF }}
|
||||||
|
LIB_VERSION: {{ .LIBNVIDIA_CONTAINER_VERSION | replace "v" "" }}
|
||||||
|
WITH_NVCGO: yes
|
||||||
|
WITH_LIBELF: yes
|
||||||
|
WITH_TIRPC: no # setting no means we'll use the system libtirpc
|
||||||
|
WITH_SECCOMP: yes
|
||||||
|
PKG_CONFIG_PATH: /usr/local/glibc/lib/pkgconfig # to find runtime libraries compiled in extensions (libseccomp)
|
||||||
|
PATH: "/usr/bin:{{ .PATH }}" # bldr doesn't have /usr/bin in PATH
|
||||||
|
prepare:
|
||||||
|
- |
|
||||||
|
mkdir libnvidia-container
|
||||||
|
tar -xzf libnvidia-container.tar.gz --strip-components=1 -C libnvidia-container
|
||||||
|
build:
|
||||||
|
- |
|
||||||
|
cd libnvidia-container
|
||||||
|
|
||||||
|
# LDLIBS=-L/usr/local/glibc/lib is set so that libnvidia-container-cli libs which are hardcoded as -llibname and not using pkg-config
|
||||||
|
CPPFLAGS="-I/usr/local/glibc/include/tirpc" LDLIBS="-L/usr/local/glibc/lib -ltirpc -lelf -lseccomp" LDFLAGS='-Wl,--rpath=\$$ORIGIN/../glibc/\$$LIB' make
|
||||||
|
install:
|
||||||
|
- |
|
||||||
|
mkdir -p /rootfs
|
||||||
|
|
||||||
|
cd libnvidia-container
|
||||||
|
|
||||||
|
make install DESTDIR=/rootfs
|
||||||
|
|
||||||
|
# run ldconfig to update the cache
|
||||||
|
/rootfs/usr/local/glibc/sbin/ldconfig -r /rootfs
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
@ -10,10 +10,10 @@ dependencies:
|
|||||||
- image: cgr.dev/chainguard/wolfi-base@{{ .WOLFI_BASE_REF }}
|
- image: cgr.dev/chainguard/wolfi-base@{{ .WOLFI_BASE_REF }}
|
||||||
steps:
|
steps:
|
||||||
- sources:
|
- sources:
|
||||||
- url: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/archive/{{ .CONTAINER_TOOLKIT_VERSION }}/container-toolkit-{{ .CONTAINER_TOOLKIT_VERSION }}.tar.gz
|
- url: https://github.com/NVIDIA/nvidia-container-toolkit/archive/refs/tags/{{ .CONTAINER_TOOLKIT_VERSION }}.tar.gz
|
||||||
destination: container-toolkit.tar.gz
|
destination: container-toolkit.tar.gz
|
||||||
sha256: b006700e31ed1475ed25695770cab10d74fdac55cdb94e66d70468740482fb53
|
sha256: 38a193444e0342c0a2c0d3664403e2c341eb77f1461b3f9172fd93c04de82165
|
||||||
sha512: 11ceffddb164194d0f10c60aeec2c1e20c699a6f3cb1887bca8f49496c9fda869c6c65f1f5f8e816467abee43da002fe2922b8e68ba8f6e61d30f635509da5e0
|
sha512: 691d4fc47ea60b730ec491b333aa8118bcfd62cdab20a42b84155c6a13484d920e758435b5029bbae4fbefce82352aa5764f1554992682f689c95615809fb83c
|
||||||
env:
|
env:
|
||||||
GIT_COMMIT: {{ substr 0 7 .CONTAINER_TOOLKIT_REF }} # build is using short sha
|
GIT_COMMIT: {{ substr 0 7 .CONTAINER_TOOLKIT_REF }} # build is using short sha
|
||||||
prepare:
|
prepare:
|
||||||
|
98
nvidia-gpu/nvidia-container-toolkit/nvidia-pkgs/lts/pkg.yaml
Normal file
98
nvidia-gpu/nvidia-container-toolkit/nvidia-pkgs/lts/pkg.yaml
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
name: nvidia-pkgs-lts
|
||||||
|
variant: scratch
|
||||||
|
shell: /bin/bash
|
||||||
|
install:
|
||||||
|
- bash
|
||||||
|
dependencies:
|
||||||
|
- image: cgr.dev/chainguard/wolfi-base@{{ .WOLFI_BASE_REF }}
|
||||||
|
# depends on glibc to update ld.so.cache
|
||||||
|
# so any stage depending on nvidia-pkgs will have the updated cache
|
||||||
|
- stage: glibc
|
||||||
|
steps:
|
||||||
|
- sources:
|
||||||
|
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/nvidia_driver/linux-sbsa/nvidia_driver-linux-sbsa-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
|
||||||
|
destination: nvidia.tar.xz
|
||||||
|
sha256: 970be3ae71332ca008f3e6589ae44a70aeffb9e29382980114e47b8fce7790d1
|
||||||
|
sha512: bd730a51a77d897509381ecb22eb21a9f4e0c2419288f1c1c26f8ef00e887b1cc09718d1d4c9d613912560e48185ff03ea221865be5c0e590a20868c45a8ea00
|
||||||
|
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/nvidia_driver/linux-x86_64/nvidia_driver-linux-x86_64-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
|
||||||
|
destination: nvidia.tar.xz
|
||||||
|
sha256: e66527c5c016d0bee9050a7a8573e38be86aad58adee2f40e808c88a4d0c6e90
|
||||||
|
sha512: 71624903e9d57a3f8a5dc7ffb2435991fe787b0609096e0e146d03ffef54bdb145940e8717510aa87cd6407c860e22938c840c126db7d4469c265f202db35e18
|
||||||
|
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
prepare:
|
||||||
|
- |
|
||||||
|
# the nvidia installer validates these packages are installed
|
||||||
|
ln -s /bin/true /bin/modprobe
|
||||||
|
ln -s /bin/true /bin/rmmod
|
||||||
|
ln -s /bin/true /bin/lsmod
|
||||||
|
ln -s /bin/true /bin/depmod
|
||||||
|
|
||||||
|
tar xf nvidia.tar.xz --strip-components=1
|
||||||
|
install:
|
||||||
|
- |
|
||||||
|
mkdir -p assets/{html,libglvnd_install_checker}
|
||||||
|
|
||||||
|
cp -r bin/* assets/
|
||||||
|
cp CHANGELOG assets/NVIDIA_Changelog
|
||||||
|
cp -r docs/* assets/html/
|
||||||
|
cp -r etc/* assets/
|
||||||
|
cp -r firmware assets/
|
||||||
|
cp -r lib/* assets/
|
||||||
|
cp LICENSE assets/
|
||||||
|
cp -r man/man1/* assets/
|
||||||
|
cp MANIFEST assets/.manifest
|
||||||
|
cp README assets/README.txt
|
||||||
|
cp -r sbin/* assets/
|
||||||
|
cp -r share/* assets/
|
||||||
|
cp -r supported-gpus assets/
|
||||||
|
cp -r systemd assets/
|
||||||
|
cp -r tests/glvnd/* assets/libglvnd_install_checker
|
||||||
|
# {{ if eq .ARCH "x86_64" }}cp -r wine/* assets/{{ end }}
|
||||||
|
|
||||||
|
cd assets
|
||||||
|
|
||||||
|
./nvidia-installer --silent \
|
||||||
|
--opengl-prefix=/rootfs/usr/local \
|
||||||
|
--utility-prefix=/rootfs/usr/local \
|
||||||
|
--utility-libdir=glibc/lib \
|
||||||
|
--documentation-prefix=/rootfs/usr/local \
|
||||||
|
--no-rpms \
|
||||||
|
--no-kernel-modules \
|
||||||
|
--log-file-name=/tmp/nvidia-installer.log \
|
||||||
|
--no-distro-scripts \
|
||||||
|
--no-wine-files \
|
||||||
|
--no-kernel-module-source \
|
||||||
|
--no-check-for-alternate-installs \
|
||||||
|
--override-file-type-destination=NVIDIA_MODPROBE:/rootfs/usr/local/bin \
|
||||||
|
--override-file-type-destination=FIRMWARE:/rootfs/lib/firmware/nvidia/{{ .NVIDIA_DRIVER_LTS_VERSION }} \
|
||||||
|
--no-systemd \
|
||||||
|
# {{ if eq .ARCH "x86_64" }}--no-install-compat32-libs{{ end }}
|
||||||
|
|
||||||
|
# copy vulkan/OpenGL json files
|
||||||
|
mkdir -p /rootfs/{etc/vulkan,usr/share/{glvnd,egl}}
|
||||||
|
|
||||||
|
cp -r /usr/share/glvnd/* /rootfs/usr/share/glvnd
|
||||||
|
cp -r /usr/share/egl/* /rootfs/usr/share/egl
|
||||||
|
cp -r /etc/vulkan/* /rootfs/etc/vulkan
|
||||||
|
|
||||||
|
# mv over files from /usr/local/lib -> /usr/local/glibc/lib
|
||||||
|
mv /rootfs/usr/local/lib/* /rootfs/usr/local/glibc/lib/
|
||||||
|
|
||||||
|
# copy xorg files
|
||||||
|
mkdir -p /rootfs/usr/local/glibc/lib/nvidia/xorg
|
||||||
|
find /usr/lib/xorg/modules -type f -exec cp {} /rootfs/usr/local/glibc/lib/nvidia/xorg \;
|
||||||
|
|
||||||
|
# run ldconfig to update the cache
|
||||||
|
/rootfs/usr/local/glibc/sbin/ldconfig -r /rootfs
|
||||||
|
|
||||||
|
mkdir -p /rootfs/usr/local/lib/containers/nvidia-persistenced \
|
||||||
|
/rootfs/usr/local/etc/containers \
|
||||||
|
/rootfs/usr/etc/udev/rules.d
|
||||||
|
|
||||||
|
# copy udev rule
|
||||||
|
cp /pkg/files/15-nvidia-device.rules /rootfs/usr/etc/udev/rules.d
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
@ -0,0 +1,5 @@
|
|||||||
|
# This will create the device nvidia device nodes
|
||||||
|
ACTION=="add", DEVPATH=="/bus/pci/drivers/nvidia", RUN+="/usr/local/bin/nvidia-modprobe -c 0"
|
||||||
|
|
||||||
|
# Create the device node for the nvidia-uvm module
|
||||||
|
ACTION=="add", DEVPATH=="/module/nvidia_uvm", SUBSYSTEM=="module", RUN+="/usr/local/bin/nvidia-modprobe -c 0 -u"
|
@ -1,4 +1,4 @@
|
|||||||
name: nvidia-pkgs
|
name: nvidia-pkgs-production
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /bin/bash
|
shell: /bin/bash
|
||||||
install:
|
install:
|
||||||
@ -11,15 +11,15 @@ dependencies:
|
|||||||
steps:
|
steps:
|
||||||
- sources:
|
- sources:
|
||||||
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
- url: https://download.nvidia.com/XFree86/Linux-aarch64/{{ .NVIDIA_DRIVER_VERSION }}/NVIDIA-Linux-aarch64-{{ .NVIDIA_DRIVER_VERSION }}.run
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/nvidia_driver/linux-sbsa/nvidia_driver-linux-sbsa-{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-archive.tar.xz
|
||||||
destination: nvidia.run
|
destination: nvidia.tar.xz
|
||||||
sha256: 8ba8d961457a241bcdf91b76d6fe2f36cb473c8bbdb02fb6650a622ce2e85b33
|
sha256: dd2892ac0c97abe69dd9ccb5e09d2fd5b5ce010c64ce5eb0950a0f6fceb9b4dc
|
||||||
sha512: 706de7e53b81f909d8bc6a12a39c594754a164c49f5d23c7939dc3abcfc04f5d5b12b7d65762ae574582149a098f06ee5fe95be4f8ad1056a3307a6ce93f3c00
|
sha512: 9c1466d9ea09a01dda4de0a2b3270cc6a5093636554eadfb58c3e2957e053592f7d628c3d5b31dbb36702e187561cb7f955e9bf2ddb1adb28e7ca4568d39a0f0
|
||||||
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
- url: https://download.nvidia.com/XFree86/Linux-x86_64/{{ .NVIDIA_DRIVER_VERSION }}/NVIDIA-Linux-x86_64-{{ .NVIDIA_DRIVER_VERSION }}-no-compat32.run
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/nvidia_driver/linux-x86_64/nvidia_driver-linux-x86_64-{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-archive.tar.xz
|
||||||
destination: nvidia.run
|
destination: nvidia.tar.xz
|
||||||
sha256: ffed07a30323fd6cf9caad3fb45e6259223135f6004d832511921a788f719ba6
|
sha256: 7959e9e0e15863c9242f8a0bda0b3b67b39701956890ff159961f59e89f92158
|
||||||
sha512: f75c288b27a17ea8c63dac68cda01b94184b41332778df6a702d30d814c407c1e45f30bd7c81511508ace6560a16e79c24e8698f457aaee3ee1d03c57725ab27
|
sha512: 89a4249bce2c15af56911afa6998c355d6522e2e7493e80ed9241a9d5009ccf2522bf7bceffc03673600bbfd0d89f3a46a3c21fb0f4977e6dc674648b4c6caea
|
||||||
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
prepare:
|
prepare:
|
||||||
- |
|
- |
|
||||||
@ -29,10 +29,28 @@ steps:
|
|||||||
ln -s /bin/true /bin/lsmod
|
ln -s /bin/true /bin/lsmod
|
||||||
ln -s /bin/true /bin/depmod
|
ln -s /bin/true /bin/depmod
|
||||||
|
|
||||||
bash nvidia.run --extract-only
|
tar xf nvidia.tar.xz --strip-components=1
|
||||||
install:
|
install:
|
||||||
- |
|
- |
|
||||||
cd NVIDIA-Linux-*
|
mkdir -p assets/html
|
||||||
|
|
||||||
|
cp -r bin/* assets/
|
||||||
|
cp CHANGELOG assets/NVIDIA_Changelog
|
||||||
|
cp -r docs/* assets/html/
|
||||||
|
cp -r etc/* assets/
|
||||||
|
cp -r firmware assets/
|
||||||
|
cp -r lib/* assets/
|
||||||
|
cp LICENSE assets/
|
||||||
|
cp -r man/man1/* assets/
|
||||||
|
cp MANIFEST assets/.manifest
|
||||||
|
cp README assets/README.txt
|
||||||
|
cp -r sbin/* assets/
|
||||||
|
cp -r share/* assets/
|
||||||
|
cp -r supported-gpus assets/
|
||||||
|
cp -r systemd assets/
|
||||||
|
# {{ if eq .ARCH "x86_64" }}cp -r wine/* assets/{{ end }}
|
||||||
|
|
||||||
|
cd assets
|
||||||
|
|
||||||
./nvidia-installer --silent \
|
./nvidia-installer --silent \
|
||||||
--opengl-prefix=/rootfs/usr/local \
|
--opengl-prefix=/rootfs/usr/local \
|
||||||
@ -47,11 +65,11 @@ steps:
|
|||||||
--no-kernel-module-source \
|
--no-kernel-module-source \
|
||||||
--no-check-for-alternate-installs \
|
--no-check-for-alternate-installs \
|
||||||
--override-file-type-destination=NVIDIA_MODPROBE:/rootfs/usr/local/bin \
|
--override-file-type-destination=NVIDIA_MODPROBE:/rootfs/usr/local/bin \
|
||||||
--override-file-type-destination=FIRMWARE:/rootfs/lib/firmware/nvidia/{{ .NVIDIA_DRIVER_VERSION }} \
|
--override-file-type-destination=FIRMWARE:/rootfs/lib/firmware/nvidia/{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }} \
|
||||||
--no-systemd
|
--no-systemd
|
||||||
|
|
||||||
# copy vulkan/OpenGL json files
|
# copy vulkan/OpenGL json files
|
||||||
mkdir -p /rootfs/{etc/vulkan,usr/{lib/xorg,share/{glvnd,egl}}}
|
mkdir -p /rootfs/{etc/vulkan,usr/share/{glvnd,egl}}
|
||||||
|
|
||||||
cp -r /usr/share/glvnd/* /rootfs/usr/share/glvnd
|
cp -r /usr/share/glvnd/* /rootfs/usr/share/glvnd
|
||||||
cp -r /usr/share/egl/* /rootfs/usr/share/egl
|
cp -r /usr/share/egl/* /rootfs/usr/share/egl
|
10
nvidia-gpu/nvidia-container-toolkit/production/manifest.yaml
Normal file
10
nvidia-gpu/nvidia-container-toolkit/production/manifest.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
version: v1alpha1
|
||||||
|
metadata:
|
||||||
|
name: nvidia-container-toolkit-production
|
||||||
|
version: "$VERSION"
|
||||||
|
author: Sidero Labs
|
||||||
|
description: |
|
||||||
|
This system extension provides nvidia runtime and it's dependencies using NVIDIA's runtime handler.
|
||||||
|
compatibility:
|
||||||
|
talos:
|
||||||
|
version: ">= v1.2.0"
|
@ -0,0 +1,55 @@
|
|||||||
|
# https://download.nvidia.com/XFree86/Linux-x86_64/515.65.01/README/nvidia-persistenced.html
|
||||||
|
name: nvidia-persistenced
|
||||||
|
container:
|
||||||
|
entrypoint: /usr/local/bin/nvidia-persistenced-wrapper
|
||||||
|
mounts:
|
||||||
|
# device files
|
||||||
|
- source: /dev
|
||||||
|
destination: /dev
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- rw
|
||||||
|
# shared libraries
|
||||||
|
- source: /lib64
|
||||||
|
destination: /lib64
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# shared libraries
|
||||||
|
- source: /usr/local/glibc
|
||||||
|
destination: /usr/local/glibc
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# nvidia libraries
|
||||||
|
- source: /usr/local/lib
|
||||||
|
destination: /usr/local/lib
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# service state file
|
||||||
|
- source: /var/run
|
||||||
|
destination: /var/run
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- rw
|
||||||
|
# binaries
|
||||||
|
- source: /usr/local/bin
|
||||||
|
destination: /usr/local/bin
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
depends:
|
||||||
|
- service: cri
|
||||||
|
# we need to depend on udevd so that the nvidia device files are created
|
||||||
|
- service: udevd
|
||||||
|
- path: /sys/bus/pci/drivers/nvidia
|
||||||
|
restart: always
|
34
nvidia-gpu/nvidia-container-toolkit/production/pkg.yaml
Normal file
34
nvidia-gpu/nvidia-container-toolkit/production/pkg.yaml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
name: nvidia-container-toolkit-production
|
||||||
|
variant: scratch
|
||||||
|
shell: /toolchain/bin/bash
|
||||||
|
dependencies:
|
||||||
|
- stage: base
|
||||||
|
- stage: nvidia-container-cli-production
|
||||||
|
- stage: elfutils
|
||||||
|
- stage: zlib
|
||||||
|
- stage: libcap
|
||||||
|
- stage: libseccomp
|
||||||
|
- stage: libtirpc
|
||||||
|
- stage: nvidia-container-runtime
|
||||||
|
- stage: nvidia-container-runtime-wrapper
|
||||||
|
- stage: nvidia-persistenced-wrapper
|
||||||
|
steps:
|
||||||
|
- prepare:
|
||||||
|
- |
|
||||||
|
sed -i 's#$VERSION#{{ .VERSION }}#' /pkg/manifest.yaml
|
||||||
|
install:
|
||||||
|
- |
|
||||||
|
mkdir -p /rootfs
|
||||||
|
test:
|
||||||
|
- |
|
||||||
|
mkdir -p /extensions-validator-rootfs
|
||||||
|
cp -r /rootfs/ /extensions-validator-rootfs/rootfs
|
||||||
|
cp /pkg/manifest.yaml /extensions-validator-rootfs/manifest.yaml
|
||||||
|
/extensions-validator validate --rootfs=/extensions-validator-rootfs --pkg-name="${PKG_NAME}"
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
||||||
|
- from: /pkg/nvidia-persistenced.yaml
|
||||||
|
to: /rootfs/usr/local/etc/containers/nvidia-persistenced.yaml
|
||||||
|
- from: /pkg/manifest.yaml
|
||||||
|
to: /
|
2
nvidia-gpu/nvidia-container-toolkit/production/vars.yaml
Normal file
2
nvidia-gpu/nvidia-container-toolkit/production/vars.yaml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# the first part is the driver version and the second the container-toolkit version
|
||||||
|
VERSION: "{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-{{ .CONTAINER_TOOLKIT_VERSION }}"
|
@ -1,6 +1,6 @@
|
|||||||
version: v1alpha1
|
version: v1alpha1
|
||||||
metadata:
|
metadata:
|
||||||
name: nvidia-fabricmanager
|
name: nvidia-fabricmanager-lts
|
||||||
version: "$VERSION"
|
version: "$VERSION"
|
||||||
author: Sidero Labs
|
author: Sidero Labs
|
||||||
description: |
|
description: |
|
@ -1,4 +1,4 @@
|
|||||||
name: nvidia-fabricmanager
|
name: nvidia-fabricmanager-lts
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /toolchain/bin/bash
|
shell: /toolchain/bin/bash
|
||||||
dependencies:
|
dependencies:
|
||||||
@ -6,15 +6,15 @@ dependencies:
|
|||||||
steps:
|
steps:
|
||||||
- sources:
|
- sources:
|
||||||
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-sbsa/fabricmanager-linux-sbsa-{{ .NVIDIA_DRIVER_VERSION }}-archive.tar.xz
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-sbsa/fabricmanager-linux-sbsa-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
|
||||||
destination: fabricmanager.tar.xz
|
destination: fabricmanager.tar.xz
|
||||||
sha256: d0fbe665669a3b68d138bec0edcc4920866935171bf12c24470328d10ca2403b
|
sha256: 235ed7e0a55215ec4d0467fe73f71445622debca87bdb990bf582e022d38d699
|
||||||
sha512: c705cc208225b8b83c91cca4a9e363c4862c0fb726e95fd68dd5e6a269620da0f1272138102c12b061c4b3ff20ceee4e35abb3bf8af4adbe1de9411ddec82f6a
|
sha512: c1d4b8983e274be5c881664e44ba558e0d7c92560a9058adaa158f5a88df2e40b6b4b95c555accd672e9aa366b2e9c0b945d5d9f320150791aea844b07db5bf0
|
||||||
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-{{ .NVIDIA_DRIVER_VERSION }}-archive.tar.xz
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
|
||||||
destination: fabricmanager.tar.xz
|
destination: fabricmanager.tar.xz
|
||||||
sha256: e4a4584be24b5408439019fc67b7b4b89bd42d0cba752a709d8b1b071c3b3318
|
sha256: a4d1ead61c684d2b83edbedbb09869b9cfa7a83838ea8210985a519fa36c9834
|
||||||
sha512: a9bb0bb7d52b576378f1d767c5b801c6421390aa5d2acc40e2bc2b264c1d1f41b2cd5b166a00e4893654975a20e68a8597c15e467ebaade137fa18a6015609ab
|
sha512: 1cd3a4662da46210695a759be6f962754d5168a72e1376b1a9d8464f80829519a0fc4498474009a426264ef17a8569a587e37560760a599068794705cf3fdcc7
|
||||||
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
prepare:
|
prepare:
|
||||||
- |
|
- |
|
1
nvidia-gpu/nvidia-fabricmanager/lts/vars.yaml
Normal file
1
nvidia-gpu/nvidia-fabricmanager/lts/vars.yaml
Normal file
@ -0,0 +1 @@
|
|||||||
|
VERSION: "{{ .NVIDIA_DRIVER_LTS_VERSION }}"
|
10
nvidia-gpu/nvidia-fabricmanager/production/lts/manifest.yaml
Normal file
10
nvidia-gpu/nvidia-fabricmanager/production/lts/manifest.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
version: v1alpha1
|
||||||
|
metadata:
|
||||||
|
name: nvidia-fabricmanager-production
|
||||||
|
version: "$VERSION"
|
||||||
|
author: Sidero Labs
|
||||||
|
description: |
|
||||||
|
This system extension provides the Nvidia fabricmanager for GPU's that need NVLink support.
|
||||||
|
compatibility:
|
||||||
|
talos:
|
||||||
|
version: ">= v1.2.0"
|
@ -0,0 +1,74 @@
|
|||||||
|
# https://docs.nvidia.com/datacenter/tesla/pdf/fabric-manager-user-guide.pdf
|
||||||
|
name: nvidia-fabricmanager
|
||||||
|
container:
|
||||||
|
entrypoint: /usr/local/bin/nv-fabricmanager
|
||||||
|
args:
|
||||||
|
- --config
|
||||||
|
- /usr/local/share/nvidia/nvswitch/fabricmanager.cfg
|
||||||
|
mounts:
|
||||||
|
# device files
|
||||||
|
- source: /dev
|
||||||
|
destination: /dev
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- rw
|
||||||
|
# shared libraries
|
||||||
|
- source: /lib64
|
||||||
|
destination: /lib64
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# shared libraries
|
||||||
|
- source: /usr/local/glibc
|
||||||
|
destination: /usr/local/glibc
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# nvidia libraries
|
||||||
|
- source: /usr/local/lib
|
||||||
|
destination: /usr/local/lib
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
# service state file
|
||||||
|
- source: /var/run/nvidia-fabricmanager
|
||||||
|
destination: /var/run/nvidia-fabricmanager
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- rw
|
||||||
|
# log files
|
||||||
|
- source: /var/log
|
||||||
|
destination: /var/log
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- rw
|
||||||
|
# fabric topology files
|
||||||
|
- source: /usr/local/share/nvidia/nvswitch
|
||||||
|
destination: /usr/local/share/nvidia/nvswitch
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- rshared
|
||||||
|
- rbind
|
||||||
|
- ro
|
||||||
|
# binaries
|
||||||
|
- source: /usr/local/bin
|
||||||
|
destination: /usr/local/bin
|
||||||
|
type: bind
|
||||||
|
options:
|
||||||
|
- bind
|
||||||
|
- ro
|
||||||
|
depends:
|
||||||
|
- service: cri
|
||||||
|
# we need to depend on udevd so that the nvidia device files are created
|
||||||
|
- service: udevd
|
||||||
|
- path: /sys/bus/pci/drivers/nvidia
|
||||||
|
restart: always
|
59
nvidia-gpu/nvidia-fabricmanager/production/lts/pkg.yaml
Normal file
59
nvidia-gpu/nvidia-fabricmanager/production/lts/pkg.yaml
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
name: nvidia-fabricmanager-production
|
||||||
|
variant: scratch
|
||||||
|
shell: /toolchain/bin/bash
|
||||||
|
dependencies:
|
||||||
|
- stage: base
|
||||||
|
steps:
|
||||||
|
- sources:
|
||||||
|
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-sbsa/fabricmanager-linux-sbsa-{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-archive.tar.xz
|
||||||
|
destination: fabricmanager.tar.xz
|
||||||
|
sha256: c9760f6f1c582fd43ea15b93ca66dd368459432a39338f648c585d32514ab6d6
|
||||||
|
sha512: 9b0b6b7c8ce19f5a7408d338ebf123b2a5a8184d20590ee82744f44b182fbc555c43b2278cade063836493f2162cdfa2d984466a05956c95cbe4f0c172589422
|
||||||
|
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-archive.tar.xz
|
||||||
|
destination: fabricmanager.tar.xz
|
||||||
|
sha256: 034c04ca2a6ce6a5d49bf293b969618609c90470e620fee97ec76cac1f4471f7
|
||||||
|
sha512: 6af90b415d82e448d81416daa36cb4588be6f796f53a3e04a1466a32c46212598ba3c60a96b4c066cde7af881a41f88f4f2015c499dedeed3c0d59611e0d6b21
|
||||||
|
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
|
||||||
|
prepare:
|
||||||
|
- |
|
||||||
|
tar -xf fabricmanager.tar.xz --strip-components=1
|
||||||
|
|
||||||
|
sed -i 's#$VERSION#{{ .VERSION }}#' /pkg/manifest.yaml
|
||||||
|
install:
|
||||||
|
- |
|
||||||
|
mkdir -p /rootfs/usr/local/bin \
|
||||||
|
/rootfs/usr/local/lib \
|
||||||
|
/rootfs/usr/local/share/nvidia/nvswitch \
|
||||||
|
/rootfs/usr/local/lib/containers/nvidia-fabricmanager \
|
||||||
|
/rootfs/usr/local/etc/containers
|
||||||
|
|
||||||
|
cp lib/libnvfm.so.1 /rootfs/usr/local/lib/libnvfm.so.1
|
||||||
|
ln -s libnvfm.so.1 /rootfs/usr/local/lib/libnvfm.so
|
||||||
|
|
||||||
|
cp bin/nv-fabricmanager /rootfs/usr/local/bin/
|
||||||
|
cp bin/nvswitch-audit /rootfs/usr/local/bin/
|
||||||
|
|
||||||
|
cp share/nvidia/nvswitch/dgx2_hgx2_topology /rootfs/usr/local/share/nvidia/nvswitch/
|
||||||
|
cp share/nvidia/nvswitch/dgxa100_hgxa100_topology /rootfs/usr/local/share/nvidia/nvswitch/
|
||||||
|
|
||||||
|
cp etc/fabricmanager.cfg /rootfs/usr/local/share/nvidia/nvswitch/
|
||||||
|
|
||||||
|
sed -i 's/DAEMONIZE=.*/DAEMONIZE=0/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
|
||||||
|
sed -i 's/STATE_FILE_NAME=.*/STATE_FILE_NAME=\/var\/run\/nvidia-fabricmanager\/fabricmanager.state/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
|
||||||
|
sed -i 's/TOPOLOGY_FILE_PATH=.*/TOPOLOGY_FILE_PATH=\/usr\/local\/share\/nvidia\/nvswitch/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
|
||||||
|
sed -i 's/DATABASE_PATH=.*/DATABASE_PATH=\/usr\/local\/share\/nvidia\/nvswitch/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
|
||||||
|
test:
|
||||||
|
- |
|
||||||
|
mkdir -p /extensions-validator-rootfs
|
||||||
|
cp -r /rootfs/ /extensions-validator-rootfs/rootfs
|
||||||
|
cp /pkg/manifest.yaml /extensions-validator-rootfs/manifest.yaml
|
||||||
|
/extensions-validator validate --rootfs=/extensions-validator-rootfs --pkg-name="${PKG_NAME}"
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
||||||
|
- from: /pkg/nvidia-fabricmanager.yaml
|
||||||
|
to: /rootfs/usr/local/etc/containers/nvidia-fabricmanager.yaml
|
||||||
|
- from: /pkg/manifest.yaml
|
||||||
|
to: /
|
1
nvidia-gpu/nvidia-fabricmanager/production/lts/vars.yaml
Normal file
1
nvidia-gpu/nvidia-fabricmanager/production/lts/vars.yaml
Normal file
@ -0,0 +1 @@
|
|||||||
|
VERSION: "{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}"
|
@ -1 +0,0 @@
|
|||||||
VERSION: "{{ .NVIDIA_DRIVER_VERSION }}"
|
|
4
nvidia-gpu/nvidia-modules/lts/files/nvidia.conf
Normal file
4
nvidia-gpu/nvidia-modules/lts/files/nvidia.conf
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
blacklist nvidia
|
||||||
|
blacklist nvidia_uvm
|
||||||
|
blacklist nvidia_drm
|
||||||
|
blacklist nvidia_modeset
|
@ -1,6 +1,6 @@
|
|||||||
version: v1alpha1
|
version: v1alpha1
|
||||||
metadata:
|
metadata:
|
||||||
name: nvidia-open-gpu-kernel-modules
|
name: nvidia-open-gpu-kernel-modules-lts
|
||||||
version: "$VERSION"
|
version: "$VERSION"
|
||||||
author: Sidero Labs
|
author: Sidero Labs
|
||||||
description: |
|
description: |
|
@ -1,11 +1,11 @@
|
|||||||
name: nvidia-open-gpu-kernel-modules
|
name: nvidia-open-gpu-kernel-modules-lts
|
||||||
variant: scratch
|
variant: scratch
|
||||||
shell: /toolchain/bin/bash
|
shell: /toolchain/bin/bash
|
||||||
dependencies:
|
dependencies:
|
||||||
- stage: base
|
- stage: base
|
||||||
# The pkgs version for a particular release of Talos as defined in
|
# The pkgs version for a particular release of Talos as defined in
|
||||||
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
||||||
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nvidia-open-gpu-kernel-modules-pkg:{{ .BUILD_ARG_PKGS }}"
|
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nvidia-open-gpu-kernel-modules-lts-pkg:{{ .BUILD_ARG_PKGS }}"
|
||||||
steps:
|
steps:
|
||||||
- prepare:
|
- prepare:
|
||||||
- |
|
- |
|
@ -1,2 +1,2 @@
|
|||||||
# the first part is the driver version and the second the talos version for which the module is built against
|
# the first part is the driver version and the second the talos version for which the module is built against
|
||||||
VERSION: "{{ .NVIDIA_DRIVER_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
VERSION: "{{ .NVIDIA_DRIVER_LTS_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
4
nvidia-gpu/nvidia-modules/production/files/nvidia.conf
Normal file
4
nvidia-gpu/nvidia-modules/production/files/nvidia.conf
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
blacklist nvidia
|
||||||
|
blacklist nvidia_uvm
|
||||||
|
blacklist nvidia_drm
|
||||||
|
blacklist nvidia_modeset
|
10
nvidia-gpu/nvidia-modules/production/manifest.yaml
Normal file
10
nvidia-gpu/nvidia-modules/production/manifest.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
version: v1alpha1
|
||||||
|
metadata:
|
||||||
|
name: nvidia-open-gpu-kernel-modules-production
|
||||||
|
version: "$VERSION"
|
||||||
|
author: Sidero Labs
|
||||||
|
description: |
|
||||||
|
This system extension provides nvidia open source driver kernel modules built against a specific Talos version.
|
||||||
|
compatibility:
|
||||||
|
talos:
|
||||||
|
version: ">= v1.2.0"
|
31
nvidia-gpu/nvidia-modules/production/pkg.yaml
Normal file
31
nvidia-gpu/nvidia-modules/production/pkg.yaml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
name: nvidia-open-gpu-kernel-modules-production
|
||||||
|
variant: scratch
|
||||||
|
shell: /toolchain/bin/bash
|
||||||
|
dependencies:
|
||||||
|
- stage: base
|
||||||
|
# The pkgs version for a particular release of Talos as defined in
|
||||||
|
# https://github.com/siderolabs/talos/blob/<talos version>/pkg/machinery/gendata/data/pkgs
|
||||||
|
- image: "{{ .BUILD_ARG_PKGS_PREFIX }}/nvidia-open-gpu-kernel-modules-production-pkg:{{ .BUILD_ARG_PKGS }}"
|
||||||
|
steps:
|
||||||
|
- prepare:
|
||||||
|
- |
|
||||||
|
sed -i 's#$VERSION#{{ .VERSION }}#' /pkg/manifest.yaml
|
||||||
|
- install:
|
||||||
|
- |
|
||||||
|
mkdir -p /rootfs/lib/modules \
|
||||||
|
/rootfs/usr/local/lib/modprobe.d
|
||||||
|
|
||||||
|
cp /pkg/files/nvidia.conf /rootfs/usr/local/lib/modprobe.d/nvidia.conf
|
||||||
|
|
||||||
|
cp -R /lib/modules/* /rootfs/lib/modules
|
||||||
|
test:
|
||||||
|
- |
|
||||||
|
mkdir -p /extensions-validator-rootfs
|
||||||
|
cp -r /rootfs/ /extensions-validator-rootfs/rootfs
|
||||||
|
cp /pkg/manifest.yaml /extensions-validator-rootfs/manifest.yaml
|
||||||
|
/extensions-validator validate --rootfs=/extensions-validator-rootfs --pkg-name="${PKG_NAME}"
|
||||||
|
finalize:
|
||||||
|
- from: /rootfs
|
||||||
|
to: /rootfs
|
||||||
|
- from: /pkg/manifest.yaml
|
||||||
|
to: /
|
2
nvidia-gpu/nvidia-modules/production/vars.yaml
Normal file
2
nvidia-gpu/nvidia-modules/production/vars.yaml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# the first part is the driver version and the second the talos version for which the module is built against
|
||||||
|
VERSION: "{{ .NVIDIA_DRIVER_PRODUCTION_VERSION }}-{{ .BUILD_ARG_TAG }}"
|
@ -1,12 +1,14 @@
|
|||||||
# only update if there's a matching fabric manager version
|
# only update if there's a matching fabric manager version
|
||||||
# renovate: datasource=github-releases depName=nvidia/open-gpu-kernel-modules
|
# renovate: datasource=github-releases extractVersion=^\d+\.(?<version>\d+\.\d+)$ depName=nvidia/open-gpu-kernel-modules
|
||||||
NVIDIA_DRIVER_VERSION: 535.129.03
|
NVIDIA_DRIVER_LTS_VERSION: 535.183.06
|
||||||
# renovate: datasource=git-tags depName=https://gitlab.com/nvidia/container-toolkit/container-toolkit.git
|
# renovate: datasource=github-releases extractVersion=^\d+\.(?<version>\d+\.\d+)$ depName=nvidia/open-gpu-kernel-modules
|
||||||
CONTAINER_TOOLKIT_VERSION: v1.15.0
|
NVIDIA_DRIVER_PRODUCTION_VERSION: 550.90.07
|
||||||
CONTAINER_TOOLKIT_REF: ddeeca392c7bd8b33d0a66400b77af7a97e16cef
|
# renovate: datasource=github-releases depName=nvidia/nvidia-container-toolkit
|
||||||
# renovate: datasource=git-tags depName=https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
CONTAINER_TOOLKIT_VERSION: v1.16.1
|
||||||
LIBNVIDIA_CONTAINER_VERSION: v1.15.0
|
CONTAINER_TOOLKIT_REF: a470818ba7d9166be282cd0039dd2fc9b0a34d73
|
||||||
LIBNVIDIA_CONTAINER_REF: 6c8f1df7fd32cea3280cf2a2c6e931c9b3132465
|
# renovate: datasource=git-tags depName=nvidia/libnvidia-container
|
||||||
|
LIBNVIDIA_CONTAINER_VERSION: v1.16.1
|
||||||
|
LIBNVIDIA_CONTAINER_REF: 4c2494f16573b585788a42e9c7bee76ecd48c73d
|
||||||
# renovate: datasource=docker versioning=docker depName=cgr.dev/chainguard/wolfi-base
|
# renovate: datasource=docker versioning=docker depName=cgr.dev/chainguard/wolfi-base
|
||||||
WOLFI_BASE_REF: sha256:c9339087a6de501ba6989756aeb1e1c89af82ac0e53c8b1ccd1feb44ec2246d9
|
WOLFI_BASE_REF: sha256:c9339087a6de501ba6989756aeb1e1c89af82ac0e53c8b1ccd1feb44ec2246d9
|
||||||
# renovate: datasource=git-tags extractVersion=^glibc-(?<version>.*)$ depName=https://sourceware.org/git/glibc.git
|
# renovate: datasource=git-tags extractVersion=^glibc-(?<version>.*)$ depName=https://sourceware.org/git/glibc.git
|
||||||
|
@ -27,8 +27,6 @@ steps:
|
|||||||
ln -s /toolchain/bin/pkg-config /usr/bin/pkg-config
|
ln -s /toolchain/bin/pkg-config /usr/bin/pkg-config
|
||||||
ln -s /toolchain/bin/file /usr/bin/file
|
ln -s /toolchain/bin/file /usr/bin/file
|
||||||
|
|
||||||
cp /toolchain/share/automake-1.16/config.guess config.guess
|
|
||||||
cp /toolchain/lib/libstdc++* /lib
|
|
||||||
autoreconf -if
|
autoreconf -if
|
||||||
|
|
||||||
export PKG_CONFIG_PATH=/usr/lib/pkgconfig
|
export PKG_CONFIG_PATH=/usr/lib/pkgconfig
|
||||||
|
@ -15,7 +15,6 @@ steps:
|
|||||||
tar -xzf open-isns.tar.gz --strip-components=1
|
tar -xzf open-isns.tar.gz --strip-components=1
|
||||||
|
|
||||||
- |
|
- |
|
||||||
cp /toolchain/share/automake-1.16/config.guess aclocal/
|
|
||||||
autoreconf -fi
|
autoreconf -fi
|
||||||
|
|
||||||
- |
|
- |
|
||||||
|
Loading…
x
Reference in New Issue
Block a user