mirror of
https://github.com/siderolabs/talos.git
synced 2026-05-05 12:26:21 +02:00
feat(ci): add nvidia arm64 matrix
Add NVIDIA arm64 test matrix. Also ensure we have a known baseline for nvidia cdi files, so if upstream adds more files and we don't install to right location the test would fail. Signed-off-by: Noel Georgi <git@frezbo.dev> (cherry picked from commit 6a3ab87c54f83f70869a2e298e6ed7722cf4afad)
This commit is contained in:
parent
cd73b4a822
commit
67a34a6eb3
602
.github/workflows/ci.yaml
vendored
602
.github/workflows/ci.yaml
vendored
@ -1,6 +1,6 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-15T14:42:53Z by kres b6d29bf.
|
||||
# Generated on 2026-04-15T14:54:12Z by kres b6d29bf.
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.head_ref || github.run_id }}
|
||||
@ -972,6 +972,156 @@ jobs:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-nonfree-lts-arm64:
|
||||
permissions:
|
||||
actions: read
|
||||
contents: write
|
||||
issues: read
|
||||
packages: write
|
||||
pull-requests: read
|
||||
runs-on:
|
||||
group: generic
|
||||
if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree-lts-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia')
|
||||
needs:
|
||||
- default
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-nonfree-lts
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-nonfree-lts
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-nonfree-production:
|
||||
permissions:
|
||||
actions: read
|
||||
@ -1125,6 +1275,156 @@ jobs:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-nonfree-production-arm64:
|
||||
permissions:
|
||||
actions: read
|
||||
contents: write
|
||||
issues: read
|
||||
packages: write
|
||||
pull-requests: read
|
||||
runs-on:
|
||||
group: generic
|
||||
if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree-production-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia')
|
||||
needs:
|
||||
- default
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-nonfree-production
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-oss-lts:
|
||||
permissions:
|
||||
actions: read
|
||||
@ -1278,6 +1578,156 @@ jobs:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-oss-lts-arm64:
|
||||
permissions:
|
||||
actions: read
|
||||
contents: write
|
||||
issues: read
|
||||
packages: write
|
||||
pull-requests: read
|
||||
runs-on:
|
||||
group: generic
|
||||
if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss-lts-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia')
|
||||
needs:
|
||||
- default
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-oss-lts
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-lts
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-oss-production:
|
||||
permissions:
|
||||
actions: read
|
||||
@ -1431,6 +1881,156 @@ jobs:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-aws-nvidia-oss-production-arm64:
|
||||
permissions:
|
||||
actions: read
|
||||
contents: write
|
||||
issues: read
|
||||
packages: write
|
||||
pull-requests: read
|
||||
runs-on:
|
||||
group: generic
|
||||
if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss-production-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia')
|
||||
needs:
|
||||
- default
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-oss-production
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
integration-cilium:
|
||||
permissions:
|
||||
actions: read
|
||||
|
||||
153
.github/workflows/integration-aws-nvidia-nonfree-lts-arm64-cron.yaml
vendored
Normal file
153
.github/workflows/integration-aws-nvidia-nonfree-lts-arm64-cron.yaml
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-11T06:20:55Z by kres b6d29bf.
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
"on":
|
||||
schedule:
|
||||
- cron: 30 7 * * *
|
||||
name: integration-aws-nvidia-nonfree-lts-arm64-cron
|
||||
jobs:
|
||||
default:
|
||||
runs-on:
|
||||
group: generic
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-nonfree-lts
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-nonfree-lts
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
153
.github/workflows/integration-aws-nvidia-nonfree-production-arm64-cron.yaml
vendored
Normal file
153
.github/workflows/integration-aws-nvidia-nonfree-production-arm64-cron.yaml
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-11T06:20:55Z by kres b6d29bf.
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
"on":
|
||||
schedule:
|
||||
- cron: 30 7 * * *
|
||||
name: integration-aws-nvidia-nonfree-production-arm64-cron
|
||||
jobs:
|
||||
default:
|
||||
runs-on:
|
||||
group: generic
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-nonfree-production
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
153
.github/workflows/integration-aws-nvidia-oss-lts-arm64-cron.yaml
vendored
Normal file
153
.github/workflows/integration-aws-nvidia-oss-lts-arm64-cron.yaml
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-08T12:37:05Z by kres b6d29bf.
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
"on":
|
||||
schedule:
|
||||
- cron: 30 7 * * *
|
||||
name: integration-aws-nvidia-oss-lts-arm64-cron
|
||||
jobs:
|
||||
default:
|
||||
runs-on:
|
||||
group: generic
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-oss-lts
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-lts
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
153
.github/workflows/integration-aws-nvidia-oss-production-arm64-cron.yaml
vendored
Normal file
153
.github/workflows/integration-aws-nvidia-oss-production-arm64-cron.yaml
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-11T06:20:55Z by kres b6d29bf.
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
"on":
|
||||
schedule:
|
||||
- cron: 30 7 * * *
|
||||
name: integration-aws-nvidia-oss-production-arm64-cron
|
||||
jobs:
|
||||
default:
|
||||
runs-on:
|
||||
group: generic
|
||||
steps:
|
||||
- name: gather-system-info
|
||||
id: system-info
|
||||
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
|
||||
continue-on-error: true
|
||||
- name: print-system-info
|
||||
run: |
|
||||
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
|
||||
|
||||
OUTPUTS=(
|
||||
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
|
||||
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
|
||||
"Hostname: ${{ steps.system-info.outputs.hostname }}"
|
||||
"NodeName: ${NODE_NAME}"
|
||||
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
|
||||
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
|
||||
"Name: ${{ steps.system-info.outputs.name }}"
|
||||
"Platform: ${{ steps.system-info.outputs.platform }}"
|
||||
"Release: ${{ steps.system-info.outputs.release }}"
|
||||
"Total memory: ${MEMORY_GB} GB"
|
||||
)
|
||||
|
||||
for OUTPUT in "${OUTPUTS[@]}";do
|
||||
echo "${OUTPUT}"
|
||||
done
|
||||
continue-on-error: true
|
||||
- name: checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
- name: Unshallow
|
||||
run: |
|
||||
git fetch --prune --unshallow
|
||||
- name: Set up Docker Buildx
|
||||
id: setup-buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
|
||||
with:
|
||||
driver: remote
|
||||
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
|
||||
timeout-minutes: 10
|
||||
- name: Mask secrets
|
||||
run: |
|
||||
echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
|
||||
- name: Set secrets for job
|
||||
run: |
|
||||
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
|
||||
- name: Download artifacts
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
|
||||
with:
|
||||
name: talos-artifacts
|
||||
path: _out
|
||||
- name: Fix artifact permissions
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
xargs -a _out/executable-artifacts -I {} chmod +x {}
|
||||
- name: ci-temp-release-tag
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
make ci-temp-release-tag
|
||||
- name: generate
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
make generate
|
||||
- name: uki-certs
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
run: |
|
||||
make uki-certs
|
||||
- name: build
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
run: |
|
||||
make installer-base imager _out/integration-test-linux-amd64
|
||||
- name: image-aws
|
||||
env:
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PLATFORM: linux/arm64
|
||||
run: |
|
||||
make image-aws
|
||||
- name: checkout extensions
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/extensions
|
||||
ref: main
|
||||
repository: siderolabs/extensions
|
||||
- name: set variables
|
||||
run: |
|
||||
cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: "true"
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
run: |
|
||||
make nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata -C _out/extensions
|
||||
- name: e2e-aws-prepare
|
||||
env:
|
||||
E2E_AWS_TARGET: nvidia-oss-production
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
run: |
|
||||
make e2e-aws-prepare
|
||||
- name: checkout contrib
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
|
||||
with:
|
||||
path: _out/contrib
|
||||
ref: main
|
||||
repository: siderolabs/contrib
|
||||
- name: setup tf
|
||||
uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0
|
||||
with:
|
||||
terraform_wrapper: "false"
|
||||
- name: tf apply
|
||||
env:
|
||||
TF_E2E_ACTION: apply
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
env:
|
||||
EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
run: |
|
||||
make e2e-aws
|
||||
- name: tf destroy
|
||||
if: always()
|
||||
env:
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: "false"
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
run: |
|
||||
make e2e-cloud-tf
|
||||
@ -1,6 +1,6 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-15T14:42:53Z by kres b6d29bf.
|
||||
# Generated on 2026-04-15T14:54:12Z by kres b6d29bf.
|
||||
|
||||
"on":
|
||||
workflow_run:
|
||||
@ -40,9 +40,13 @@
|
||||
- integration-image-factory-cron
|
||||
- integration-aws-cron
|
||||
- integration-aws-nvidia-oss-lts-cron
|
||||
- integration-aws-nvidia-oss-lts-arm64-cron
|
||||
- integration-aws-nvidia-oss-production-cron
|
||||
- integration-aws-nvidia-oss-production-arm64-cron
|
||||
- integration-aws-nvidia-nonfree-lts-cron
|
||||
- integration-aws-nvidia-nonfree-lts-arm64-cron
|
||||
- integration-aws-nvidia-nonfree-production-cron
|
||||
- integration-aws-nvidia-nonfree-production-arm64-cron
|
||||
- integration-gcp-cron
|
||||
types:
|
||||
- completed
|
||||
|
||||
6
.github/workflows/slack-notify.yaml
vendored
6
.github/workflows/slack-notify.yaml
vendored
@ -1,6 +1,6 @@
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
|
||||
#
|
||||
# Generated on 2026-04-15T14:42:53Z by kres b6d29bf.
|
||||
# Generated on 2026-04-15T14:54:12Z by kres b6d29bf.
|
||||
|
||||
"on":
|
||||
workflow_run:
|
||||
@ -40,9 +40,13 @@
|
||||
- integration-image-factory-cron
|
||||
- integration-aws-cron
|
||||
- integration-aws-nvidia-oss-lts-cron
|
||||
- integration-aws-nvidia-oss-lts-arm64-cron
|
||||
- integration-aws-nvidia-oss-production-cron
|
||||
- integration-aws-nvidia-oss-production-arm64-cron
|
||||
- integration-aws-nvidia-nonfree-lts-cron
|
||||
- integration-aws-nvidia-nonfree-lts-arm64-cron
|
||||
- integration-aws-nvidia-nonfree-production-cron
|
||||
- integration-aws-nvidia-nonfree-production-arm64-cron
|
||||
- integration-gcp-cron
|
||||
types:
|
||||
- completed
|
||||
|
||||
377
.kres.yaml
377
.kres.yaml
@ -89,6 +89,7 @@ spec:
|
||||
- integration-image-factory
|
||||
- integration-aws
|
||||
- integration-aws-nvidia-oss-lts
|
||||
- integration-aws-nvidia-oss-lts-arm64
|
||||
- integration-aws-nvidia-oss-production
|
||||
- integration-aws-nvidia-nonfree-lts
|
||||
- integration-aws-nvidia-nonfree-production
|
||||
@ -2865,6 +2866,100 @@ spec:
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-oss-lts-arm64
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
sops: true
|
||||
depends:
|
||||
- default
|
||||
runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster
|
||||
crons:
|
||||
- '30 7 * * *'
|
||||
triggerLabels:
|
||||
- integration/aws-nvidia-oss-lts-arm64
|
||||
- integration/aws-nvidia-oss
|
||||
- integration/aws-nvidia
|
||||
steps:
|
||||
- name: download-artifacts
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
artifactStep:
|
||||
type: download
|
||||
artifactName: talos-artifacts
|
||||
artifactPath: _out
|
||||
- name: ci-temp-release-tag
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
- name: generate
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
- name: uki-certs
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
environment:
|
||||
PLATFORM: linux/amd64
|
||||
- name: build
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
command: installer-base imager _out/integration-test-linux-amd64
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PUSH: true
|
||||
- name: image-aws
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
- name: checkout extensions
|
||||
checkoutStep:
|
||||
repository: siderolabs/extensions
|
||||
ref: main
|
||||
path: _out/extensions
|
||||
- name: set variables
|
||||
nonMakeStep: true
|
||||
command: cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
command: nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata
|
||||
arguments:
|
||||
- -C
|
||||
- _out/extensions
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: true
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
- name: e2e-aws-prepare
|
||||
environment:
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
E2E_AWS_TARGET: nvidia-oss-lts
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
- name: checkout contrib
|
||||
checkoutStep:
|
||||
repository: siderolabs/contrib
|
||||
ref: main
|
||||
path: _out/contrib
|
||||
- name: setup tf
|
||||
terraformStep: true
|
||||
- name: tf apply
|
||||
command: e2e-cloud-tf
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: apply
|
||||
- name: e2e-aws-nvidia-oss-lts
|
||||
command: e2e-aws
|
||||
environment:
|
||||
EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false"
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
- name: tf destroy
|
||||
command: e2e-cloud-tf
|
||||
conditions:
|
||||
- always
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-oss-production
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
@ -2961,6 +3056,100 @@ spec:
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-oss-production-arm64
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
sops: true
|
||||
depends:
|
||||
- default
|
||||
runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster
|
||||
crons:
|
||||
- '30 7 * * *'
|
||||
triggerLabels:
|
||||
- integration/aws-nvidia-oss-production-arm64
|
||||
- integration/aws-nvidia-oss
|
||||
- integration/aws-nvidia
|
||||
steps:
|
||||
- name: download-artifacts
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
artifactStep:
|
||||
type: download
|
||||
artifactName: talos-artifacts
|
||||
artifactPath: _out
|
||||
- name: ci-temp-release-tag
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
- name: generate
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
- name: uki-certs
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
environment:
|
||||
PLATFORM: linux/amd64
|
||||
- name: build
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
command: installer-base imager _out/integration-test-linux-amd64
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PUSH: true
|
||||
- name: image-aws
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
- name: checkout extensions
|
||||
checkoutStep:
|
||||
repository: siderolabs/extensions
|
||||
ref: main
|
||||
path: _out/extensions
|
||||
- name: set variables
|
||||
nonMakeStep: true
|
||||
command: cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
command: nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata
|
||||
arguments:
|
||||
- -C
|
||||
- _out/extensions
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: true
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
- name: e2e-aws-prepare
|
||||
environment:
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
E2E_AWS_TARGET: nvidia-oss-production
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
- name: checkout contrib
|
||||
checkoutStep:
|
||||
repository: siderolabs/contrib
|
||||
ref: main
|
||||
path: _out/contrib
|
||||
- name: setup tf
|
||||
terraformStep: true
|
||||
- name: tf apply
|
||||
command: e2e-cloud-tf
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: apply
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
command: e2e-aws
|
||||
environment:
|
||||
EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false"
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
- name: tf destroy
|
||||
command: e2e-cloud-tf
|
||||
conditions:
|
||||
- always
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-nonfree-lts
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
@ -3057,6 +3246,100 @@ spec:
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-nonfree-lts-arm64
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
sops: true
|
||||
depends:
|
||||
- default
|
||||
runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster
|
||||
crons:
|
||||
- '30 7 * * *'
|
||||
triggerLabels:
|
||||
- integration/aws-nvidia-nonfree-lts-arm64
|
||||
- integration/aws-nvidia-nonfree
|
||||
- integration/aws-nvidia
|
||||
steps:
|
||||
- name: download-artifacts
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
artifactStep:
|
||||
type: download
|
||||
artifactName: talos-artifacts
|
||||
artifactPath: _out
|
||||
- name: ci-temp-release-tag
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
- name: generate
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
- name: uki-certs
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
environment:
|
||||
PLATFORM: linux/amd64
|
||||
- name: build
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
command: installer-base imager _out/integration-test-linux-amd64
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PUSH: true
|
||||
- name: image-aws
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
- name: checkout extensions
|
||||
checkoutStep:
|
||||
repository: siderolabs/extensions
|
||||
ref: main
|
||||
path: _out/extensions
|
||||
- name: set variables
|
||||
nonMakeStep: true
|
||||
command: cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
command: nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata
|
||||
arguments:
|
||||
- -C
|
||||
- _out/extensions
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: true
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
- name: e2e-aws-prepare
|
||||
environment:
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
E2E_AWS_TARGET: nvidia-nonfree-lts
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
- name: checkout contrib
|
||||
checkoutStep:
|
||||
repository: siderolabs/contrib
|
||||
ref: main
|
||||
path: _out/contrib
|
||||
- name: setup tf
|
||||
terraformStep: true
|
||||
- name: tf apply
|
||||
command: e2e-cloud-tf
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: apply
|
||||
- name: e2e-aws-nvidia-nonfree-lts
|
||||
command: e2e-aws
|
||||
environment:
|
||||
EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false"
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
- name: tf destroy
|
||||
command: e2e-cloud-tf
|
||||
conditions:
|
||||
- always
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-nonfree-production
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
@ -3153,6 +3436,100 @@ spec:
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-aws-nvidia-nonfree-production-arm64
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
sops: true
|
||||
depends:
|
||||
- default
|
||||
runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster
|
||||
crons:
|
||||
- '30 7 * * *'
|
||||
triggerLabels:
|
||||
- integration/aws-nvidia-nonfree-production-arm64
|
||||
- integration/aws-nvidia-nonfree
|
||||
- integration/aws-nvidia
|
||||
steps:
|
||||
- name: download-artifacts
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
artifactStep:
|
||||
type: download
|
||||
artifactName: talos-artifacts
|
||||
artifactPath: _out
|
||||
- name: ci-temp-release-tag
|
||||
conditions:
|
||||
- not-on-schedule
|
||||
- name: generate
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
- name: uki-certs
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
environment:
|
||||
PLATFORM: linux/amd64
|
||||
- name: build
|
||||
conditions:
|
||||
- only-on-schedule
|
||||
command: installer-base imager _out/integration-test-linux-amd64
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
PUSH: true
|
||||
- name: image-aws
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
- name: checkout extensions
|
||||
checkoutStep:
|
||||
repository: siderolabs/extensions
|
||||
ref: main
|
||||
path: _out/extensions
|
||||
- name: set variables
|
||||
nonMakeStep: true
|
||||
command: cat _out/talos-metadata >> "$GITHUB_ENV"
|
||||
- name: build extensions
|
||||
command: nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata
|
||||
arguments:
|
||||
- -C
|
||||
- _out/extensions
|
||||
environment:
|
||||
PLATFORM: linux/arm64
|
||||
PUSH: true
|
||||
REGISTRY: registry.dev.siderolabs.io
|
||||
- name: e2e-aws-prepare
|
||||
environment:
|
||||
EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata
|
||||
E2E_AWS_TARGET: nvidia-nonfree-production
|
||||
IMAGE_REGISTRY: registry.dev.siderolabs.io
|
||||
TARGET_ARCH: arm64
|
||||
- name: checkout contrib
|
||||
checkoutStep:
|
||||
repository: siderolabs/contrib
|
||||
ref: main
|
||||
path: _out/contrib
|
||||
- name: setup tf
|
||||
terraformStep: true
|
||||
- name: tf apply
|
||||
command: e2e-cloud-tf
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: apply
|
||||
- name: e2e-aws-nvidia-oss-production
|
||||
command: e2e-aws
|
||||
environment:
|
||||
EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false"
|
||||
INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA
|
||||
- name: tf destroy
|
||||
command: e2e-cloud-tf
|
||||
conditions:
|
||||
- always
|
||||
environment:
|
||||
TF_SCRIPT_DIR: _out/contrib
|
||||
TF_E2E_TEST_TYPE: aws
|
||||
TF_E2E_ACTION: destroy
|
||||
TF_E2E_REFRESH_ON_DESTROY: false
|
||||
- name: integration-gcp
|
||||
buildxOptions:
|
||||
enabled: true
|
||||
|
||||
@ -6,10 +6,12 @@ source ./hack/test/e2e.sh
|
||||
|
||||
REGION="us-east-1"
|
||||
|
||||
ARCH="${TARGET_ARCH:-amd64}"
|
||||
|
||||
function cloud_image_upload() {
|
||||
RANDOM_SUFFIX=$(openssl rand -hex 4)
|
||||
|
||||
CLOUD_IMAGES_EXTRA_ARGS=("--name-prefix=${1}-${RANDOM_SUFFIX}" "--target-clouds=aws" "--architectures=amd64" "--aws-regions=${REGION}")
|
||||
CLOUD_IMAGES_EXTRA_ARGS=("--name-prefix=${1}-${RANDOM_SUFFIX}" "--target-clouds=aws" "--architectures=${ARCH}" "--aws-regions=${REGION}")
|
||||
|
||||
case "${1}" in
|
||||
talos-e2e-nvidia-oss-*)
|
||||
@ -21,7 +23,7 @@ function cloud_image_upload() {
|
||||
}
|
||||
|
||||
function get_ami_id() {
|
||||
jq -r ".[] | select(.cloud == \"aws\") | select(.region == \"${REGION}\") | select (.arch == \"amd64\") | .id" "${ARTIFACTS}/cloud-images.json"
|
||||
jq -r ".[] | select(.cloud == \"aws\") | select(.region == \"${REGION}\") | select (.arch == \"${ARCH}\") | .id" "${ARTIFACTS}/cloud-images.json"
|
||||
}
|
||||
|
||||
function cloud_image_upload_with_extensions() {
|
||||
@ -48,7 +50,7 @@ function cloud_image_upload_with_extensions() {
|
||||
;;
|
||||
esac
|
||||
|
||||
make image-aws IMAGER_ARGS="${EXTENSIONS}" PLATFORM=linux/amd64
|
||||
make image-aws IMAGER_ARGS="${EXTENSIONS}" PLATFORM="linux/${ARCH}"
|
||||
cloud_image_upload "talos-e2e-${1}"
|
||||
}
|
||||
|
||||
@ -73,7 +75,9 @@ esac
|
||||
|
||||
mkdir -p "${ARTIFACTS}/e2e-aws-generated"
|
||||
|
||||
NAME_PREFIX="${SHA}-${E2E_AWS_TARGET}"
|
||||
NAME_PREFIX="${SHA}-${E2E_AWS_TARGET}-${ARCH}"
|
||||
|
||||
AWS_JQ_TEMPLATE="aws-${ARCH}.jq"
|
||||
|
||||
jq --null-input \
|
||||
--arg WORKER_GROUP "${WORKER_GROUP}" \
|
||||
@ -90,6 +94,6 @@ jq --null-input \
|
||||
talos_version_contract: $TALOS_VERSION_CONTRACT,
|
||||
kubernetes_version: $KUBERNETES_VERSION
|
||||
}' \
|
||||
| jq -f hack/test/tfvars/aws.jq > "${ARTIFACTS}/e2e-aws-generated/vars.json"
|
||||
| jq -f "hack/test/tfvars/${AWS_JQ_TEMPLATE}" > "${ARTIFACTS}/e2e-aws-generated/vars.json"
|
||||
|
||||
cp hack/test/tfvars/*.yaml "${ARTIFACTS}/e2e-aws-generated"
|
||||
|
||||
35
hack/test/tfvars/aws-arm64.jq
Normal file
35
hack/test/tfvars/aws-arm64.jq
Normal file
@ -0,0 +1,35 @@
|
||||
{
|
||||
"cluster_name": .cluster_name,
|
||||
"ccm": true,
|
||||
"talos_version_contract": .talos_version_contract,
|
||||
"kubernetes_version": .kubernetes_version,
|
||||
"control_plane": {
|
||||
"ami_id": .ami_id,
|
||||
"instance_type": "t4g.large"
|
||||
},
|
||||
"worker_groups": (if .worker_group == "nvidia" then [
|
||||
{
|
||||
"name": "nvidia-t4",
|
||||
"ami_id": .nvidia_ami_id,
|
||||
"instance_type": "g5g.xlarge",
|
||||
"config_patch_files": [
|
||||
"nvidia.yaml"
|
||||
],
|
||||
"tags": {
|
||||
"Type": "nvidia-t4"
|
||||
}
|
||||
}
|
||||
] else [
|
||||
{
|
||||
"name": "default",
|
||||
"num_instances": 3,
|
||||
"ami_id": .ami_id,
|
||||
"instance_type": "t4g.large"
|
||||
}
|
||||
] end),
|
||||
"extra_tags": {
|
||||
"ClusterName": .cluster_name,
|
||||
"Project": "talos-e2e-ci",
|
||||
"Environment": "ci"
|
||||
}
|
||||
}
|
||||
@ -7,12 +7,15 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
"github.com/siderolabs/go-retry/retry"
|
||||
batchv1 "k8s.io/api/batch/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
@ -20,6 +23,10 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/siderolabs/talos/internal/integration/base"
|
||||
"github.com/siderolabs/talos/pkg/machinery/api/common"
|
||||
"github.com/siderolabs/talos/pkg/machinery/client"
|
||||
"github.com/siderolabs/talos/pkg/machinery/constants"
|
||||
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
|
||||
)
|
||||
|
||||
//go:embed testdata/nvidia-gpu-operator.yaml
|
||||
@ -69,13 +76,11 @@ func (suite *ExtensionsSuiteNVIDIA) TestExtensionsNVIDIA() {
|
||||
// if we're testing NVIDIA stuff we need to get the nodes having NVIDIA GPUs
|
||||
// we query k8s to get the nodes having the label node.kubernetes.io/instance-type.
|
||||
// this label is set by the cloud provider and it's value is the instance type.
|
||||
// the nvidia e2e-aws tests creates gpu nodes one with g4dn.xlarge and another
|
||||
// with p4d.24xlarge
|
||||
for _, nvidiaNode := range suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge)") {
|
||||
for _, nvidiaNode := range suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge, g5g.xlarge)") {
|
||||
suite.AssertExpectedModules(suite.ctx, nvidiaNode, expectedModulesModDep)
|
||||
}
|
||||
|
||||
nodes := suite.getNVIDIANodes("node.kubernetes.io/instance-type=g4dn.xlarge")
|
||||
nodes := suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge, g5g.xlarge)")
|
||||
for _, node := range nodes {
|
||||
suite.AssertServicesRunning(suite.ctx, node, map[string]string{
|
||||
"ext-nvidia-persistenced": "Running",
|
||||
@ -83,6 +88,95 @@ func (suite *ExtensionsSuiteNVIDIA) TestExtensionsNVIDIA() {
|
||||
})
|
||||
}
|
||||
|
||||
missingCDIFilesData := map[string]map[string]int{
|
||||
"amd64": {
|
||||
"nvidia-open-gpu-kernel-modules-production": 13,
|
||||
"nvidia-open-gpu-kernel-modules-lts": 9,
|
||||
"nonfree-kmod-nvidia-production": 13,
|
||||
"nonfree-kmod-nvidia-lts": 9,
|
||||
},
|
||||
"arm64": {
|
||||
"nvidia-open-gpu-kernel-modules-production": 11,
|
||||
"nvidia-open-gpu-kernel-modules-lts": 9,
|
||||
"nonfree-kmod-nvidia-production": 11,
|
||||
"nonfree-kmod-nvidia-lts": 9,
|
||||
},
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
nodeCtx := client.WithNode(suite.ctx, node)
|
||||
|
||||
versionInfo, err := suite.Client.Version(nodeCtx)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
suite.Require().NotNil(versionInfo.GetMessages(), "version info messages should not be nil")
|
||||
|
||||
extInfo := missingCDIFilesData[versionInfo.GetMessages()[0].Version.Arch]
|
||||
|
||||
list, err := safe.StateListAll[*runtime.ExtensionStatus](nodeCtx, suite.Client.COSI)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
extensionsList := safe.ToSlice(list, func(info *runtime.ExtensionStatus) string {
|
||||
return info.TypedSpec().Metadata.Name
|
||||
})
|
||||
|
||||
var expectedCount int
|
||||
|
||||
for _, name := range extensionsList {
|
||||
if count, exists := extInfo[name]; exists {
|
||||
expectedCount = count
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
suite.Require().NotZero(expectedCount, "did not find any matching nvidia extension in the list of extensions: %v", extensionsList)
|
||||
|
||||
logsStream, err := suite.Client.Logs(
|
||||
nodeCtx,
|
||||
constants.SystemContainerdNamespace,
|
||||
common.ContainerDriver_CONTAINERD,
|
||||
"ext-nvidia-cdi-gen",
|
||||
false,
|
||||
-1,
|
||||
)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
logReader, err := client.ReadStream(logsStream)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
defer logReader.Close() //nolint:errcheck
|
||||
|
||||
var buffer bytes.Buffer
|
||||
|
||||
_, err = io.Copy(&buffer, logReader)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
logData := buffer.String()
|
||||
|
||||
// we know as baseline we have different number of missing files that are not present in the extension
|
||||
// and manually verified, if some new files are not found we want to fix the extension
|
||||
// Adding an example of the current log message for reference:
|
||||
// ❯ talosctl -n 172.16.15.116 logs ext-nvidia-cdi-gen | grep "Could not"
|
||||
// msg="Could not locate libnvidia-vulkan-producer.so.580.126.20: libnvidia-vulkan-producer.so.580.126.20: not found\nlibnvidia-vulkan-producer.so.580.126.20: not found"
|
||||
// msg="Could not locate X11/xorg.conf.d/10-nvidia.conf: X11/xorg.conf.d/10-nvidia.conf: not found"
|
||||
// msg="Could not locate X11/xorg.conf.d/nvidia-drm-outputclass.conf: X11/xorg.conf.d/nvidia-drm-outputclass.conf: not found"
|
||||
// msg="Could not locate vulkan/implicit_layer.d/nvidia_layers.json: vulkan/implicit_layer.d/nvidia_layers.json: not found\nvulkan/implicit_layer.d/nvidia_layers.json: not found"
|
||||
// msg="Could not locate vulkan/icd.d/nvidia_icd.x86_64.json: vulkan/icd.d/nvidia_icd.x86_64.json: not found\nvulkan/icd.d/nvidia_icd.x86_64.json: not found"
|
||||
// msg="Could not locate /nvidia-fabricmanager/socket: /nvidia-fabricmanager/socket: not found"
|
||||
// msg="Could not locate /tmp/nvidia-mps: /tmp/nvidia-mps: not found"
|
||||
// msg="Could not locate nvidia-imex: nvidia-imex: not found"
|
||||
// msg="Could not locate nvidia-imex-ctl: nvidia-imex-ctl: not found"
|
||||
suite.Assert().Equal(
|
||||
expectedCount,
|
||||
strings.Count(logData, "Could not locate"),
|
||||
"expected exactly %d 'Could not locate' in the logs, got %d. Logs:\n%s",
|
||||
expectedCount,
|
||||
strings.Count(logData, "Could not"),
|
||||
logData,
|
||||
)
|
||||
}
|
||||
|
||||
// nodes = suite.getNVIDIANodes("node.kubernetes.io/instance-type=p4d.24xlarge")
|
||||
// for _, node := range nodes {
|
||||
// suite.testServicesRunning(node, map[string]string{
|
||||
@ -301,7 +395,7 @@ func nvidiaCUDATestJob() *batchv1.Job {
|
||||
{
|
||||
Key: "node.kubernetes.io/instance-type",
|
||||
Operator: corev1.NodeSelectorOpIn,
|
||||
Values: []string{"g4dn.xlarge", "p4d.24xlarge"},
|
||||
Values: []string{"g4dn.xlarge", "p4d.24xlarge", "g5g.xlarge"},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -354,7 +448,7 @@ func nvidiaCDITestJob() *batchv1.Job {
|
||||
{
|
||||
Key: "node.kubernetes.io/instance-type",
|
||||
Operator: corev1.NodeSelectorOpIn,
|
||||
Values: []string{"g4dn.xlarge", "p4d.24xlarge"},
|
||||
Values: []string{"g4dn.xlarge", "p4d.24xlarge", "g5g.xlarge"},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@ -3,4 +3,4 @@ driver:
|
||||
toolkit:
|
||||
enabled: false
|
||||
hostPaths:
|
||||
driverInstallDir: /usr/local/glibc/usr
|
||||
driverInstallDir: /usr/local/lib
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user