diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2f55faa2e..1e3583891 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2026-04-15T14:42:53Z by kres b6d29bf. +# Generated on 2026-04-15T14:54:12Z by kres b6d29bf. concurrency: group: ${{ github.head_ref || github.run_id }} @@ -972,6 +972,156 @@ jobs: TF_SCRIPT_DIR: _out/contrib run: | make e2e-cloud-tf + integration-aws-nvidia-nonfree-lts-arm64: + permissions: + actions: read + contents: write + issues: read + packages: write + pull-requests: read + runs-on: + group: generic + if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree-lts-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia') + needs: + - default + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-nonfree-lts + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-nonfree-lts + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf integration-aws-nvidia-nonfree-production: permissions: actions: read @@ -1125,6 +1275,156 @@ jobs: TF_SCRIPT_DIR: _out/contrib run: | make e2e-cloud-tf + integration-aws-nvidia-nonfree-production-arm64: + permissions: + actions: read + contents: write + issues: read + packages: write + pull-requests: read + runs-on: + group: generic + if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree-production-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-nonfree') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia') + needs: + - default + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-nonfree-production + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-production + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf integration-aws-nvidia-oss-lts: permissions: actions: read @@ -1278,6 +1578,156 @@ jobs: TF_SCRIPT_DIR: _out/contrib run: | make e2e-cloud-tf + integration-aws-nvidia-oss-lts-arm64: + permissions: + actions: read + contents: write + issues: read + packages: write + pull-requests: read + runs-on: + group: generic + if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss-lts-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia') + needs: + - default + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-oss-lts + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-lts + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf integration-aws-nvidia-oss-production: permissions: actions: read @@ -1431,6 +1881,156 @@ jobs: TF_SCRIPT_DIR: _out/contrib run: | make e2e-cloud-tf + integration-aws-nvidia-oss-production-arm64: + permissions: + actions: read + contents: write + issues: read + packages: write + pull-requests: read + runs-on: + group: generic + if: contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss-production-arm64') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia-oss') || contains(fromJSON(needs.default.outputs.labels), 'integration/aws-nvidia') + needs: + - default + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-oss-production + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-production + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf integration-cilium: permissions: actions: read diff --git a/.github/workflows/integration-aws-nvidia-nonfree-lts-arm64-cron.yaml b/.github/workflows/integration-aws-nvidia-nonfree-lts-arm64-cron.yaml new file mode 100644 index 000000000..2da665dc5 --- /dev/null +++ b/.github/workflows/integration-aws-nvidia-nonfree-lts-arm64-cron.yaml @@ -0,0 +1,153 @@ +# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. +# +# Generated on 2026-04-11T06:20:55Z by kres b6d29bf. + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true +"on": + schedule: + - cron: 30 7 * * * +name: integration-aws-nvidia-nonfree-lts-arm64-cron +jobs: + default: + runs-on: + group: generic + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-nonfree-lts + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-nonfree-lts + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf diff --git a/.github/workflows/integration-aws-nvidia-nonfree-production-arm64-cron.yaml b/.github/workflows/integration-aws-nvidia-nonfree-production-arm64-cron.yaml new file mode 100644 index 000000000..7f296146d --- /dev/null +++ b/.github/workflows/integration-aws-nvidia-nonfree-production-arm64-cron.yaml @@ -0,0 +1,153 @@ +# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. +# +# Generated on 2026-04-11T06:20:55Z by kres b6d29bf. + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true +"on": + schedule: + - cron: 30 7 * * * +name: integration-aws-nvidia-nonfree-production-arm64-cron +jobs: + default: + runs-on: + group: generic + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-nonfree-production + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-production + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf diff --git a/.github/workflows/integration-aws-nvidia-oss-lts-arm64-cron.yaml b/.github/workflows/integration-aws-nvidia-oss-lts-arm64-cron.yaml new file mode 100644 index 000000000..fc90ac71b --- /dev/null +++ b/.github/workflows/integration-aws-nvidia-oss-lts-arm64-cron.yaml @@ -0,0 +1,153 @@ +# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. +# +# Generated on 2026-04-08T12:37:05Z by kres b6d29bf. + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true +"on": + schedule: + - cron: 30 7 * * * +name: integration-aws-nvidia-oss-lts-arm64-cron +jobs: + default: + runs-on: + group: generic + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-oss-lts + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-lts + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf diff --git a/.github/workflows/integration-aws-nvidia-oss-production-arm64-cron.yaml b/.github/workflows/integration-aws-nvidia-oss-production-arm64-cron.yaml new file mode 100644 index 000000000..2d40840ae --- /dev/null +++ b/.github/workflows/integration-aws-nvidia-oss-production-arm64-cron.yaml @@ -0,0 +1,153 @@ +# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. +# +# Generated on 2026-04-11T06:20:55Z by kres b6d29bf. + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true +"on": + schedule: + - cron: 30 7 * * * +name: integration-aws-nvidia-oss-production-arm64-cron +jobs: + default: + runs-on: + group: generic + steps: + - name: gather-system-info + id: system-info + uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0 + continue-on-error: true + - name: print-system-info + run: | + MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024)) + + OUTPUTS=( + "CPU Core: ${{ steps.system-info.outputs.cpu-core }}" + "CPU Model: ${{ steps.system-info.outputs.cpu-model }}" + "Hostname: ${{ steps.system-info.outputs.hostname }}" + "NodeName: ${NODE_NAME}" + "Kernel release: ${{ steps.system-info.outputs.kernel-release }}" + "Kernel version: ${{ steps.system-info.outputs.kernel-version }}" + "Name: ${{ steps.system-info.outputs.name }}" + "Platform: ${{ steps.system-info.outputs.platform }}" + "Release: ${{ steps.system-info.outputs.release }}" + "Total memory: ${MEMORY_GB} GB" + ) + + for OUTPUT in "${OUTPUTS[@]}";do + echo "${OUTPUT}" + done + continue-on-error: true + - name: checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + - name: Unshallow + run: | + git fetch --prune --unshallow + - name: Set up Docker Buildx + id: setup-buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0 + with: + driver: remote + endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234 + timeout-minutes: 10 + - name: Mask secrets + run: | + echo "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')" + - name: Set secrets for job + run: | + sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV" + - name: Download artifacts + if: github.event_name != 'schedule' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1 + with: + name: talos-artifacts + path: _out + - name: Fix artifact permissions + if: github.event_name != 'schedule' + run: | + xargs -a _out/executable-artifacts -I {} chmod +x {} + - name: ci-temp-release-tag + if: github.event_name != 'schedule' + run: | + make ci-temp-release-tag + - name: generate + if: github.event_name == 'schedule' + run: | + make generate + - name: uki-certs + if: github.event_name == 'schedule' + env: + PLATFORM: linux/amd64 + run: | + make uki-certs + - name: build + if: github.event_name == 'schedule' + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + PUSH: "true" + run: | + make installer-base imager _out/integration-test-linux-amd64 + - name: image-aws + env: + IMAGE_REGISTRY: registry.dev.siderolabs.io + PLATFORM: linux/arm64 + run: | + make image-aws + - name: checkout extensions + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/extensions + ref: main + repository: siderolabs/extensions + - name: set variables + run: | + cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + env: + PLATFORM: linux/arm64 + PUSH: "true" + REGISTRY: registry.dev.siderolabs.io + run: | + make nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata -C _out/extensions + - name: e2e-aws-prepare + env: + E2E_AWS_TARGET: nvidia-oss-production + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + run: | + make e2e-aws-prepare + - name: checkout contrib + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2 + with: + path: _out/contrib + ref: main + repository: siderolabs/contrib + - name: setup tf + uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # version: v4.0.0 + with: + terraform_wrapper: "false" + - name: tf apply + env: + TF_E2E_ACTION: apply + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf + - name: e2e-aws-nvidia-oss-production + env: + EXTRA_TEST_ARGS: -talos.extensions.nvidia -talos.verifyukibooted=false + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + run: | + make e2e-aws + - name: tf destroy + if: always() + env: + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: "false" + TF_E2E_TEST_TYPE: aws + TF_SCRIPT_DIR: _out/contrib + run: | + make e2e-cloud-tf diff --git a/.github/workflows/slack-notify-ci-failure.yaml b/.github/workflows/slack-notify-ci-failure.yaml index ed8238876..1a0d9ee90 100644 --- a/.github/workflows/slack-notify-ci-failure.yaml +++ b/.github/workflows/slack-notify-ci-failure.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2026-04-15T14:42:53Z by kres b6d29bf. +# Generated on 2026-04-15T14:54:12Z by kres b6d29bf. "on": workflow_run: @@ -40,9 +40,13 @@ - integration-image-factory-cron - integration-aws-cron - integration-aws-nvidia-oss-lts-cron + - integration-aws-nvidia-oss-lts-arm64-cron - integration-aws-nvidia-oss-production-cron + - integration-aws-nvidia-oss-production-arm64-cron - integration-aws-nvidia-nonfree-lts-cron + - integration-aws-nvidia-nonfree-lts-arm64-cron - integration-aws-nvidia-nonfree-production-cron + - integration-aws-nvidia-nonfree-production-arm64-cron - integration-gcp-cron types: - completed diff --git a/.github/workflows/slack-notify.yaml b/.github/workflows/slack-notify.yaml index 7cd051d6b..02cd64579 100644 --- a/.github/workflows/slack-notify.yaml +++ b/.github/workflows/slack-notify.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2026-04-15T14:42:53Z by kres b6d29bf. +# Generated on 2026-04-15T14:54:12Z by kres b6d29bf. "on": workflow_run: @@ -40,9 +40,13 @@ - integration-image-factory-cron - integration-aws-cron - integration-aws-nvidia-oss-lts-cron + - integration-aws-nvidia-oss-lts-arm64-cron - integration-aws-nvidia-oss-production-cron + - integration-aws-nvidia-oss-production-arm64-cron - integration-aws-nvidia-nonfree-lts-cron + - integration-aws-nvidia-nonfree-lts-arm64-cron - integration-aws-nvidia-nonfree-production-cron + - integration-aws-nvidia-nonfree-production-arm64-cron - integration-gcp-cron types: - completed diff --git a/.kres.yaml b/.kres.yaml index 814af147c..6d11cf572 100644 --- a/.kres.yaml +++ b/.kres.yaml @@ -89,6 +89,7 @@ spec: - integration-image-factory - integration-aws - integration-aws-nvidia-oss-lts + - integration-aws-nvidia-oss-lts-arm64 - integration-aws-nvidia-oss-production - integration-aws-nvidia-nonfree-lts - integration-aws-nvidia-nonfree-production @@ -2865,6 +2866,100 @@ spec: TF_E2E_TEST_TYPE: aws TF_E2E_ACTION: destroy TF_E2E_REFRESH_ON_DESTROY: false + - name: integration-aws-nvidia-oss-lts-arm64 + buildxOptions: + enabled: true + sops: true + depends: + - default + runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster + crons: + - '30 7 * * *' + triggerLabels: + - integration/aws-nvidia-oss-lts-arm64 + - integration/aws-nvidia-oss + - integration/aws-nvidia + steps: + - name: download-artifacts + conditions: + - not-on-schedule + artifactStep: + type: download + artifactName: talos-artifacts + artifactPath: _out + - name: ci-temp-release-tag + conditions: + - not-on-schedule + - name: generate + conditions: + - only-on-schedule + - name: uki-certs + conditions: + - only-on-schedule + environment: + PLATFORM: linux/amd64 + - name: build + conditions: + - only-on-schedule + command: installer-base imager _out/integration-test-linux-amd64 + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + PUSH: true + - name: image-aws + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + - name: checkout extensions + checkoutStep: + repository: siderolabs/extensions + ref: main + path: _out/extensions + - name: set variables + nonMakeStep: true + command: cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + command: nvidia-container-toolkit-lts nvidia-open-gpu-kernel-modules-lts extensions-metadata + arguments: + - -C + - _out/extensions + environment: + PLATFORM: linux/arm64 + PUSH: true + REGISTRY: registry.dev.siderolabs.io + - name: e2e-aws-prepare + environment: + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + E2E_AWS_TARGET: nvidia-oss-lts + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + - name: checkout contrib + checkoutStep: + repository: siderolabs/contrib + ref: main + path: _out/contrib + - name: setup tf + terraformStep: true + - name: tf apply + command: e2e-cloud-tf + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: apply + - name: e2e-aws-nvidia-oss-lts + command: e2e-aws + environment: + EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false" + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + - name: tf destroy + command: e2e-cloud-tf + conditions: + - always + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: false - name: integration-aws-nvidia-oss-production buildxOptions: enabled: true @@ -2961,6 +3056,100 @@ spec: TF_E2E_TEST_TYPE: aws TF_E2E_ACTION: destroy TF_E2E_REFRESH_ON_DESTROY: false + - name: integration-aws-nvidia-oss-production-arm64 + buildxOptions: + enabled: true + sops: true + depends: + - default + runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster + crons: + - '30 7 * * *' + triggerLabels: + - integration/aws-nvidia-oss-production-arm64 + - integration/aws-nvidia-oss + - integration/aws-nvidia + steps: + - name: download-artifacts + conditions: + - not-on-schedule + artifactStep: + type: download + artifactName: talos-artifacts + artifactPath: _out + - name: ci-temp-release-tag + conditions: + - not-on-schedule + - name: generate + conditions: + - only-on-schedule + - name: uki-certs + conditions: + - only-on-schedule + environment: + PLATFORM: linux/amd64 + - name: build + conditions: + - only-on-schedule + command: installer-base imager _out/integration-test-linux-amd64 + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + PUSH: true + - name: image-aws + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + - name: checkout extensions + checkoutStep: + repository: siderolabs/extensions + ref: main + path: _out/extensions + - name: set variables + nonMakeStep: true + command: cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + command: nvidia-container-toolkit-production nvidia-open-gpu-kernel-modules-production extensions-metadata + arguments: + - -C + - _out/extensions + environment: + PLATFORM: linux/arm64 + PUSH: true + REGISTRY: registry.dev.siderolabs.io + - name: e2e-aws-prepare + environment: + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + E2E_AWS_TARGET: nvidia-oss-production + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + - name: checkout contrib + checkoutStep: + repository: siderolabs/contrib + ref: main + path: _out/contrib + - name: setup tf + terraformStep: true + - name: tf apply + command: e2e-cloud-tf + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: apply + - name: e2e-aws-nvidia-oss-production + command: e2e-aws + environment: + EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false" + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + - name: tf destroy + command: e2e-cloud-tf + conditions: + - always + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: false - name: integration-aws-nvidia-nonfree-lts buildxOptions: enabled: true @@ -3057,6 +3246,100 @@ spec: TF_E2E_TEST_TYPE: aws TF_E2E_ACTION: destroy TF_E2E_REFRESH_ON_DESTROY: false + - name: integration-aws-nvidia-nonfree-lts-arm64 + buildxOptions: + enabled: true + sops: true + depends: + - default + runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster + crons: + - '30 7 * * *' + triggerLabels: + - integration/aws-nvidia-nonfree-lts-arm64 + - integration/aws-nvidia-nonfree + - integration/aws-nvidia + steps: + - name: download-artifacts + conditions: + - not-on-schedule + artifactStep: + type: download + artifactName: talos-artifacts + artifactPath: _out + - name: ci-temp-release-tag + conditions: + - not-on-schedule + - name: generate + conditions: + - only-on-schedule + - name: uki-certs + conditions: + - only-on-schedule + environment: + PLATFORM: linux/amd64 + - name: build + conditions: + - only-on-schedule + command: installer-base imager _out/integration-test-linux-amd64 + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + PUSH: true + - name: image-aws + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + - name: checkout extensions + checkoutStep: + repository: siderolabs/extensions + ref: main + path: _out/extensions + - name: set variables + nonMakeStep: true + command: cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + command: nvidia-container-toolkit-lts nonfree-kmod-nvidia-lts extensions-metadata + arguments: + - -C + - _out/extensions + environment: + PLATFORM: linux/arm64 + PUSH: true + REGISTRY: registry.dev.siderolabs.io + - name: e2e-aws-prepare + environment: + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + E2E_AWS_TARGET: nvidia-nonfree-lts + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + - name: checkout contrib + checkoutStep: + repository: siderolabs/contrib + ref: main + path: _out/contrib + - name: setup tf + terraformStep: true + - name: tf apply + command: e2e-cloud-tf + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: apply + - name: e2e-aws-nvidia-nonfree-lts + command: e2e-aws + environment: + EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false" + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + - name: tf destroy + command: e2e-cloud-tf + conditions: + - always + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: false - name: integration-aws-nvidia-nonfree-production buildxOptions: enabled: true @@ -3153,6 +3436,100 @@ spec: TF_E2E_TEST_TYPE: aws TF_E2E_ACTION: destroy TF_E2E_REFRESH_ON_DESTROY: false + - name: integration-aws-nvidia-nonfree-production-arm64 + buildxOptions: + enabled: true + sops: true + depends: + - default + runnerGroup: generic # we can use generic here since the tests run against a remote talos cluster + crons: + - '30 7 * * *' + triggerLabels: + - integration/aws-nvidia-nonfree-production-arm64 + - integration/aws-nvidia-nonfree + - integration/aws-nvidia + steps: + - name: download-artifacts + conditions: + - not-on-schedule + artifactStep: + type: download + artifactName: talos-artifacts + artifactPath: _out + - name: ci-temp-release-tag + conditions: + - not-on-schedule + - name: generate + conditions: + - only-on-schedule + - name: uki-certs + conditions: + - only-on-schedule + environment: + PLATFORM: linux/amd64 + - name: build + conditions: + - only-on-schedule + command: installer-base imager _out/integration-test-linux-amd64 + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + PUSH: true + - name: image-aws + environment: + PLATFORM: linux/arm64 + IMAGE_REGISTRY: registry.dev.siderolabs.io + - name: checkout extensions + checkoutStep: + repository: siderolabs/extensions + ref: main + path: _out/extensions + - name: set variables + nonMakeStep: true + command: cat _out/talos-metadata >> "$GITHUB_ENV" + - name: build extensions + command: nvidia-container-toolkit-production nonfree-kmod-nvidia-production extensions-metadata + arguments: + - -C + - _out/extensions + environment: + PLATFORM: linux/arm64 + PUSH: true + REGISTRY: registry.dev.siderolabs.io + - name: e2e-aws-prepare + environment: + EXTENSIONS_METADATA_FILE: _out/extensions/_out/extensions-metadata + E2E_AWS_TARGET: nvidia-nonfree-production + IMAGE_REGISTRY: registry.dev.siderolabs.io + TARGET_ARCH: arm64 + - name: checkout contrib + checkoutStep: + repository: siderolabs/contrib + ref: main + path: _out/contrib + - name: setup tf + terraformStep: true + - name: tf apply + command: e2e-cloud-tf + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: apply + - name: e2e-aws-nvidia-oss-production + command: e2e-aws + environment: + EXTRA_TEST_ARGS: "-talos.extensions.nvidia -talos.verifyukibooted=false" + INTEGRATION_TEST_RUN: TestIntegration/api.ExtensionsSuiteNVIDIA + - name: tf destroy + command: e2e-cloud-tf + conditions: + - always + environment: + TF_SCRIPT_DIR: _out/contrib + TF_E2E_TEST_TYPE: aws + TF_E2E_ACTION: destroy + TF_E2E_REFRESH_ON_DESTROY: false - name: integration-gcp buildxOptions: enabled: true diff --git a/hack/test/e2e-aws-prepare.sh b/hack/test/e2e-aws-prepare.sh index 90e25143c..9969e5b40 100755 --- a/hack/test/e2e-aws-prepare.sh +++ b/hack/test/e2e-aws-prepare.sh @@ -6,10 +6,12 @@ source ./hack/test/e2e.sh REGION="us-east-1" +ARCH="${TARGET_ARCH:-amd64}" + function cloud_image_upload() { RANDOM_SUFFIX=$(openssl rand -hex 4) - CLOUD_IMAGES_EXTRA_ARGS=("--name-prefix=${1}-${RANDOM_SUFFIX}" "--target-clouds=aws" "--architectures=amd64" "--aws-regions=${REGION}") + CLOUD_IMAGES_EXTRA_ARGS=("--name-prefix=${1}-${RANDOM_SUFFIX}" "--target-clouds=aws" "--architectures=${ARCH}" "--aws-regions=${REGION}") case "${1}" in talos-e2e-nvidia-oss-*) @@ -21,7 +23,7 @@ function cloud_image_upload() { } function get_ami_id() { - jq -r ".[] | select(.cloud == \"aws\") | select(.region == \"${REGION}\") | select (.arch == \"amd64\") | .id" "${ARTIFACTS}/cloud-images.json" + jq -r ".[] | select(.cloud == \"aws\") | select(.region == \"${REGION}\") | select (.arch == \"${ARCH}\") | .id" "${ARTIFACTS}/cloud-images.json" } function cloud_image_upload_with_extensions() { @@ -48,7 +50,7 @@ function cloud_image_upload_with_extensions() { ;; esac - make image-aws IMAGER_ARGS="${EXTENSIONS}" PLATFORM=linux/amd64 + make image-aws IMAGER_ARGS="${EXTENSIONS}" PLATFORM="linux/${ARCH}" cloud_image_upload "talos-e2e-${1}" } @@ -73,7 +75,9 @@ esac mkdir -p "${ARTIFACTS}/e2e-aws-generated" -NAME_PREFIX="${SHA}-${E2E_AWS_TARGET}" +NAME_PREFIX="${SHA}-${E2E_AWS_TARGET}-${ARCH}" + +AWS_JQ_TEMPLATE="aws-${ARCH}.jq" jq --null-input \ --arg WORKER_GROUP "${WORKER_GROUP}" \ @@ -90,6 +94,6 @@ jq --null-input \ talos_version_contract: $TALOS_VERSION_CONTRACT, kubernetes_version: $KUBERNETES_VERSION }' \ - | jq -f hack/test/tfvars/aws.jq > "${ARTIFACTS}/e2e-aws-generated/vars.json" + | jq -f "hack/test/tfvars/${AWS_JQ_TEMPLATE}" > "${ARTIFACTS}/e2e-aws-generated/vars.json" cp hack/test/tfvars/*.yaml "${ARTIFACTS}/e2e-aws-generated" diff --git a/hack/test/tfvars/aws.jq b/hack/test/tfvars/aws-amd64.jq similarity index 100% rename from hack/test/tfvars/aws.jq rename to hack/test/tfvars/aws-amd64.jq diff --git a/hack/test/tfvars/aws-arm64.jq b/hack/test/tfvars/aws-arm64.jq new file mode 100644 index 000000000..7ef1d12d0 --- /dev/null +++ b/hack/test/tfvars/aws-arm64.jq @@ -0,0 +1,35 @@ +{ + "cluster_name": .cluster_name, + "ccm": true, + "talos_version_contract": .talos_version_contract, + "kubernetes_version": .kubernetes_version, + "control_plane": { + "ami_id": .ami_id, + "instance_type": "t4g.large" + }, + "worker_groups": (if .worker_group == "nvidia" then [ + { + "name": "nvidia-t4", + "ami_id": .nvidia_ami_id, + "instance_type": "g5g.xlarge", + "config_patch_files": [ + "nvidia.yaml" + ], + "tags": { + "Type": "nvidia-t4" + } + } + ] else [ + { + "name": "default", + "num_instances": 3, + "ami_id": .ami_id, + "instance_type": "t4g.large" + } + ] end), + "extra_tags": { + "ClusterName": .cluster_name, + "Project": "talos-e2e-ci", + "Environment": "ci" + } +} diff --git a/internal/integration/api/extensions_nvidia.go b/internal/integration/api/extensions_nvidia.go index db014a3ad..2d791e714 100644 --- a/internal/integration/api/extensions_nvidia.go +++ b/internal/integration/api/extensions_nvidia.go @@ -7,12 +7,15 @@ package api import ( + "bytes" "context" _ "embed" "fmt" "io" + "strings" "time" + "github.com/cosi-project/runtime/pkg/safe" "github.com/siderolabs/go-retry/retry" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -20,6 +23,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/siderolabs/talos/internal/integration/base" + "github.com/siderolabs/talos/pkg/machinery/api/common" + "github.com/siderolabs/talos/pkg/machinery/client" + "github.com/siderolabs/talos/pkg/machinery/constants" + "github.com/siderolabs/talos/pkg/machinery/resources/runtime" ) //go:embed testdata/nvidia-gpu-operator.yaml @@ -69,13 +76,11 @@ func (suite *ExtensionsSuiteNVIDIA) TestExtensionsNVIDIA() { // if we're testing NVIDIA stuff we need to get the nodes having NVIDIA GPUs // we query k8s to get the nodes having the label node.kubernetes.io/instance-type. // this label is set by the cloud provider and it's value is the instance type. - // the nvidia e2e-aws tests creates gpu nodes one with g4dn.xlarge and another - // with p4d.24xlarge - for _, nvidiaNode := range suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge)") { + for _, nvidiaNode := range suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge, g5g.xlarge)") { suite.AssertExpectedModules(suite.ctx, nvidiaNode, expectedModulesModDep) } - nodes := suite.getNVIDIANodes("node.kubernetes.io/instance-type=g4dn.xlarge") + nodes := suite.getNVIDIANodes("node.kubernetes.io/instance-type in (g4dn.xlarge, p4d.24xlarge, g5g.xlarge)") for _, node := range nodes { suite.AssertServicesRunning(suite.ctx, node, map[string]string{ "ext-nvidia-persistenced": "Running", @@ -83,6 +88,95 @@ func (suite *ExtensionsSuiteNVIDIA) TestExtensionsNVIDIA() { }) } + missingCDIFilesData := map[string]map[string]int{ + "amd64": { + "nvidia-open-gpu-kernel-modules-production": 13, + "nvidia-open-gpu-kernel-modules-lts": 9, + "nonfree-kmod-nvidia-production": 13, + "nonfree-kmod-nvidia-lts": 9, + }, + "arm64": { + "nvidia-open-gpu-kernel-modules-production": 11, + "nvidia-open-gpu-kernel-modules-lts": 9, + "nonfree-kmod-nvidia-production": 11, + "nonfree-kmod-nvidia-lts": 9, + }, + } + + for _, node := range nodes { + nodeCtx := client.WithNode(suite.ctx, node) + + versionInfo, err := suite.Client.Version(nodeCtx) + suite.Require().NoError(err) + + suite.Require().NotNil(versionInfo.GetMessages(), "version info messages should not be nil") + + extInfo := missingCDIFilesData[versionInfo.GetMessages()[0].Version.Arch] + + list, err := safe.StateListAll[*runtime.ExtensionStatus](nodeCtx, suite.Client.COSI) + suite.Require().NoError(err) + + extensionsList := safe.ToSlice(list, func(info *runtime.ExtensionStatus) string { + return info.TypedSpec().Metadata.Name + }) + + var expectedCount int + + for _, name := range extensionsList { + if count, exists := extInfo[name]; exists { + expectedCount = count + + break + } + } + + suite.Require().NotZero(expectedCount, "did not find any matching nvidia extension in the list of extensions: %v", extensionsList) + + logsStream, err := suite.Client.Logs( + nodeCtx, + constants.SystemContainerdNamespace, + common.ContainerDriver_CONTAINERD, + "ext-nvidia-cdi-gen", + false, + -1, + ) + suite.Require().NoError(err) + + logReader, err := client.ReadStream(logsStream) + suite.Require().NoError(err) + + defer logReader.Close() //nolint:errcheck + + var buffer bytes.Buffer + + _, err = io.Copy(&buffer, logReader) + suite.Require().NoError(err) + + logData := buffer.String() + + // we know as baseline we have different number of missing files that are not present in the extension + // and manually verified, if some new files are not found we want to fix the extension + // Adding an example of the current log message for reference: + // ❯ talosctl -n 172.16.15.116 logs ext-nvidia-cdi-gen | grep "Could not" + // msg="Could not locate libnvidia-vulkan-producer.so.580.126.20: libnvidia-vulkan-producer.so.580.126.20: not found\nlibnvidia-vulkan-producer.so.580.126.20: not found" + // msg="Could not locate X11/xorg.conf.d/10-nvidia.conf: X11/xorg.conf.d/10-nvidia.conf: not found" + // msg="Could not locate X11/xorg.conf.d/nvidia-drm-outputclass.conf: X11/xorg.conf.d/nvidia-drm-outputclass.conf: not found" + // msg="Could not locate vulkan/implicit_layer.d/nvidia_layers.json: vulkan/implicit_layer.d/nvidia_layers.json: not found\nvulkan/implicit_layer.d/nvidia_layers.json: not found" + // msg="Could not locate vulkan/icd.d/nvidia_icd.x86_64.json: vulkan/icd.d/nvidia_icd.x86_64.json: not found\nvulkan/icd.d/nvidia_icd.x86_64.json: not found" + // msg="Could not locate /nvidia-fabricmanager/socket: /nvidia-fabricmanager/socket: not found" + // msg="Could not locate /tmp/nvidia-mps: /tmp/nvidia-mps: not found" + // msg="Could not locate nvidia-imex: nvidia-imex: not found" + // msg="Could not locate nvidia-imex-ctl: nvidia-imex-ctl: not found" + suite.Assert().Equal( + expectedCount, + strings.Count(logData, "Could not locate"), + "expected exactly %d 'Could not locate' in the logs, got %d. Logs:\n%s", + expectedCount, + strings.Count(logData, "Could not"), + logData, + ) + } + // nodes = suite.getNVIDIANodes("node.kubernetes.io/instance-type=p4d.24xlarge") // for _, node := range nodes { // suite.testServicesRunning(node, map[string]string{ @@ -301,7 +395,7 @@ func nvidiaCUDATestJob() *batchv1.Job { { Key: "node.kubernetes.io/instance-type", Operator: corev1.NodeSelectorOpIn, - Values: []string{"g4dn.xlarge", "p4d.24xlarge"}, + Values: []string{"g4dn.xlarge", "p4d.24xlarge", "g5g.xlarge"}, }, }, }, @@ -354,7 +448,7 @@ func nvidiaCDITestJob() *batchv1.Job { { Key: "node.kubernetes.io/instance-type", Operator: corev1.NodeSelectorOpIn, - Values: []string{"g4dn.xlarge", "p4d.24xlarge"}, + Values: []string{"g4dn.xlarge", "p4d.24xlarge", "g5g.xlarge"}, }, }, }, diff --git a/internal/integration/api/testdata/nvidia-gpu-operator.yaml b/internal/integration/api/testdata/nvidia-gpu-operator.yaml index f733ee3be..8e37fde94 100644 --- a/internal/integration/api/testdata/nvidia-gpu-operator.yaml +++ b/internal/integration/api/testdata/nvidia-gpu-operator.yaml @@ -3,4 +3,4 @@ driver: toolkit: enabled: false hostPaths: - driverInstallDir: /usr/local/glibc/usr + driverInstallDir: /usr/local/lib