fix: volume mount permissions

Make Talos volumes mount options more stricter.

Fixes: #11946

Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
Noel Georgi 2026-04-25 23:36:32 +05:30
parent ff0f66bdfa
commit 689974bd55
No known key found for this signature in database
GPG Key ID: 21A9F444075C9E36
30 changed files with 913 additions and 214 deletions

View File

@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2026-05-05T17:21:59Z by kres 1762ab2.
# Generated on 2026-05-05T14:21:38Z by kres 1762ab2.
concurrency:
group: ${{ github.head_ref || github.run_id }}
@ -3125,7 +3125,7 @@ jobs:
pull-requests: read
runs-on:
group: large
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled() && github.event_name == 'pull_request'
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled()
strategy:
matrix:
include:
@ -3404,7 +3404,7 @@ jobs:
pull-requests: read
runs-on:
group: large
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled() && github.event_name == 'pull_request'
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled()
strategy:
matrix:
include:
@ -3534,7 +3534,7 @@ jobs:
pull-requests: read
runs-on:
group: large
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled() && github.event_name == 'pull_request'
if: (!startsWith(github.head_ref, 'renovate/') && !startsWith(github.head_ref, 'dependabot/')) && !cancelled()
strategy:
matrix:
include:
@ -4006,7 +4006,7 @@ jobs:
/tmp/logs-*.tar.gz
/tmp/support-*.zip
retention-days: "5"
integration-qemu-csi-longhorn:
integration-qemu-csi-longhorn-v1:
permissions:
actions: read
contents: write
@ -4015,7 +4015,7 @@ jobs:
pull-requests: read
runs-on:
group: large
if: contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/extensions') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi-longhorn') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/release-gate')
if: contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi-longhorn-v1') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/extensions') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi-longhorn') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/release-gate')
needs:
- default
steps:
@ -4094,8 +4094,127 @@ jobs:
make kubelet-fat-patch
- name: e2e-qemu-csi-longhorn
env:
EXTRA_TEST_ARGS: -talos.csi=longhorn
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn
EXTRA_TEST_ARGS: -talos.csi=longhorn-v1 -talos.skip-ephemeral-policy
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn-v1
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_CPUS_WORKERS: "3"
QEMU_EXTRA_DISKS: "1"
QEMU_EXTRA_DISKS_DRIVERS: nvme
QEMU_EXTRA_DISKS_SIZE: "12288"
QEMU_MEMORY_WORKERS: "10240"
QEMU_SYSTEM_DISK_SIZE: "20480"
QEMU_WORKERS: "3"
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_PATCH_CONTROLPLANE: '@hack/test/patches/longhorn-cp.yaml'
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@_out/kubelet-fat-patch.yaml:@hack/test/patches/longhorn.yaml:@hack/test/patches/ephemeral-insecure.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: fio-integration-qemu-csi-longhorn-v1
path: |
/tmp/fio-*.json
retention-days: "180"
- name: save artifacts
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: talos-logs-integration-qemu-csi-longhorn-v1
path: |-
/tmp/logs-*.tar.gz
/tmp/support-*.zip
retention-days: "5"
integration-qemu-csi-longhorn-v2:
permissions:
actions: read
contents: write
issues: read
packages: write
pull-requests: read
runs-on:
group: large
if: contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi-longhorn-v2') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/extensions') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/qemu-csi-longhorn') || contains(fromJSON(needs.default.outputs.labels || '[]'), 'integration/release-gate')
needs:
- default
steps:
- name: gather-system-info
id: system-info
uses: kenchan0130/actions-system-info@59699597e84e80085a750998045983daa49274c4 # version: v1.4.0
continue-on-error: true
- name: print-system-info
run: |
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
OUTPUTS=(
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
"Hostname: ${{ steps.system-info.outputs.hostname }}"
"NodeName: ${NODE_NAME}"
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
"Name: ${{ steps.system-info.outputs.name }}"
"Platform: ${{ steps.system-info.outputs.platform }}"
"Release: ${{ steps.system-info.outputs.release }}"
"Total memory: ${MEMORY_GB} GB"
)
for OUTPUT in "${OUTPUTS[@]}";do
echo "${OUTPUT}"
done
continue-on-error: true
- name: checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
- name: Unshallow
run: |
git fetch --prune --unshallow
- name: Set up Docker Buildx
id: setup-buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # version: v4.0.0
with:
driver: remote
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
timeout-minutes: 10
- name: Download artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # version: v8.0.1
with:
name: talos-artifacts
path: _out
- name: Fix artifact permissions
run: |
xargs -a _out/executable-artifacts -I {} chmod +x {}
- name: ci-temp-release-tag
run: |
make ci-temp-release-tag
- name: checkout extensions
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # version: v6.0.2
with:
path: _out/extensions
ref: main
repository: siderolabs/extensions
- name: set variables
run: |
cat _out/talos-metadata >> "$GITHUB_ENV"
- name: build extensions
env:
PLATFORM: linux/amd64
PUSH: "true"
REGISTRY: registry.dev.siderolabs.io
run: |
make iscsi-tools util-linux-tools extensions-metadata -C _out/extensions
- name: installer extensions
env:
EXTENSIONS_FILTER_COMMAND: grep -E '/iscsi-tools|util-linux-tools'
IMAGE_REGISTRY: registry.dev.siderolabs.io
run: |
make installer-with-extensions
- name: kubelet-fat-patch
run: |
make kubelet-fat-patch
- name: e2e-qemu-csi-longhorn
env:
EXTRA_TEST_ARGS: '-talos.csi=longhorn '
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn-v2
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_CPUS_WORKERS: "3"
QEMU_EXTRA_DISKS: "1"
@ -4112,7 +4231,7 @@ jobs:
- name: save artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: fio-integration-qemu-csi-longhorn
name: fio-integration-qemu-csi-longhorn-v2
path: |
/tmp/fio-*.json
retention-days: "180"
@ -4120,7 +4239,7 @@ jobs:
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: talos-logs-integration-qemu-csi-longhorn
name: talos-logs-integration-qemu-csi-longhorn-v2
path: |-
/tmp/logs-*.tar.gz
/tmp/support-*.zip

View File

@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2026-04-27T17:59:00Z by kres e4dc583.
# Generated on 2026-05-05T14:21:38Z by kres 1762ab2.
concurrency:
group: ${{ github.head_ref || github.run_id }}
@ -14,11 +14,23 @@ concurrency:
name: integration-qemu-csi-longhorn-triggered
jobs:
default:
name: ${{ matrix.longhornEngine }}
permissions:
actions: read
runs-on:
group: large
if: github.event.workflow_run.conclusion == 'success'
strategy:
matrix:
include:
- csi: longhorn
longhornEngine: v2
- csi: longhorn-v1
extraTestArgs: -talos.skip-ephemeral-policy
extraWorkerPatch: :@hack/test/patches/ephemeral-insecure.yaml
longhornEngine: v1
fail-fast: false
max-parallel: 2
steps:
- name: gather-system-info
id: system-info
@ -97,8 +109,8 @@ jobs:
make kubelet-fat-patch
- name: e2e-qemu-csi-longhorn
env:
EXTRA_TEST_ARGS: -talos.csi=longhorn
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn
EXTRA_TEST_ARGS: -talos.csi=${{ matrix.csi }} ${{ matrix.extraTestArgs }}
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn-${{ matrix.longhornEngine }}
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_CPUS_WORKERS: "3"
QEMU_EXTRA_DISKS: "1"
@ -109,13 +121,13 @@ jobs:
QEMU_WORKERS: "3"
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_PATCH_CONTROLPLANE: '@hack/test/patches/longhorn-cp.yaml'
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@_out/kubelet-fat-patch.yaml:@hack/test/patches/longhorn.yaml'
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@_out/kubelet-fat-patch.yaml:@hack/test/patches/longhorn.yaml${{ matrix.extraWorkerPatch }}'
run: |
sudo -E make e2e-qemu
- name: save artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: fio-integration-qemu-csi-longhorn
name: fio-integration-qemu-csi-longhorn-${{ matrix.longhornEngine }}
path: |
/tmp/fio-*.json
retention-days: "180"
@ -123,7 +135,7 @@ jobs:
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # version: v7.0.1
with:
name: talos-logs-integration-qemu-csi-longhorn
name: talos-logs-integration-qemu-csi-longhorn-${{ matrix.longhornEngine }}
path: |-
/tmp/logs-*.tar.gz
/tmp/support-*.zip

View File

@ -89,6 +89,8 @@ spec:
integration/qemu-csi: "Run QEMU CSI integration tests"
integration/qemu-csi-openebs: "Run QEMU CSI OpenEBS integration tests"
integration/qemu-csi-longhorn: "Run QEMU CSI Longhorn integration tests"
integration/qemu-csi-longhorn-v1: "Run QEMU CSI Longhorn v1 integration tests"
integration/qemu-csi-longhorn-v2: "Run QEMU CSI Longhorn v2 integration tests"
integration/qemu-csi-rook-ceph: "Run QEMU CSI Rook Ceph integration tests"
integration/qemu-default: "Run QEMU integration tests on default variant"
integration/qemu-encrypted-vip: "Run QEMU integration tests with disk encryption and virtual IP"
@ -934,7 +936,6 @@ spec:
- integration-build-enforcing
conditions:
- not-cancelled
- on-pull-request
runnerGroup: large
onWorkflowRun:
workflows: [integration-build-enforcing-triggered]
@ -1108,7 +1109,6 @@ spec:
- integration-build-enforcing
conditions:
- not-cancelled
- on-pull-request
runnerGroup: large
onWorkflowRun:
workflows: [integration-build-enforcing-triggered]
@ -1187,7 +1187,6 @@ spec:
- integration-build-enforcing
conditions:
- not-cancelled
- on-pull-request
runnerGroup: large
onWorkflowRun:
workflows: [integration-build-enforcing-triggered]
@ -1478,9 +1477,26 @@ spec:
types: [completed]
triggerLabels:
- integration/qemu-csi
- integration/extensions # since iscsi is tested with longhorn
- integration/extensions # since iscsi is tested with longhorn v1
- integration/qemu-csi-longhorn
- integration/release-gate
matrix:
maxParallel: 2
labelKeys: [longhornEngine]
# v2 (SPDK) is the default-secure path; v1 needs noexec disabled on
# /var so the instance-manager can exec engine binaries it drops
# under /var/lib/longhorn/engine-binaries/. The v1 path also tells
# MountsSuite to skip the /var policy check via
# -talos.skip-ephemeral-policy.
include:
- longhornEngine: v2
csi: longhorn
extraWorkerPatch: ""
extraTestArgs: ""
- longhornEngine: v1
csi: longhorn-v1
extraWorkerPatch: ":@hack/test/patches/ephemeral-insecure.yaml"
extraTestArgs: "-talos.skip-ephemeral-policy"
steps:
- name: download-artifacts
artifactStep:
@ -1515,7 +1531,7 @@ spec:
command: e2e-qemu
withSudo: true
environment:
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn
GITHUB_STEP_NAME: ${{ github.job}}-e2e-qemu-csi-longhorn-${{ matrix.longhornEngine }}
SHORT_INTEGRATION_TEST: yes
QEMU_WORKERS: 3
QEMU_MEMORY_WORKERS: 10240
@ -1525,13 +1541,13 @@ spec:
QEMU_EXTRA_DISKS_SIZE: 12288
QEMU_EXTRA_DISKS_DRIVERS: nvme
WITH_CONFIG_PATCH_CONTROLPLANE: "@hack/test/patches/longhorn-cp.yaml"
WITH_CONFIG_PATCH_WORKER: "@_out/installer-extensions-patch.yaml:@_out/kubelet-fat-patch.yaml:@hack/test/patches/longhorn.yaml"
EXTRA_TEST_ARGS: -talos.csi=longhorn
WITH_CONFIG_PATCH_WORKER: "@_out/installer-extensions-patch.yaml:@_out/kubelet-fat-patch.yaml:@hack/test/patches/longhorn.yaml${{ matrix.extraWorkerPatch }}"
EXTRA_TEST_ARGS: -talos.csi=${{ matrix.csi }} ${{ matrix.extraTestArgs }}
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: save-fio-benchmark
artifactStep:
type: upload
artifactName: fio-integration-qemu-csi-longhorn
artifactName: fio-integration-qemu-csi-longhorn-${{ matrix.longhornEngine }}
disableExecutableListGeneration: true
artifactPath: /tmp/fio-*.json
retentionDays: "180"
@ -1540,7 +1556,7 @@ spec:
- always
artifactStep:
type: upload
artifactName: talos-logs-integration-qemu-csi-longhorn
artifactName: talos-logs-integration-qemu-csi-longhorn-${{ matrix.longhornEngine }}
disableExecutableListGeneration: true
artifactPath: /tmp/logs-*.tar.gz
additionalArtifacts:

View File

@ -173,6 +173,9 @@ message MountSpec {
string bind_target = 9;
// Parameters are additional filesystem mount options used when mounting the volume.
repeated ParameterSpec parameters = 10;
// Secure applies MOUNT_ATTR_NOSUID\|NODEV\|NOEXEC to the mount. Set for
// config-only mounts; leave false for mounts hosting executables.
bool secure = 11;
}
// MountStatusSpec is the spec for MountStatus.

View File

@ -77,6 +77,29 @@ List of changes:
DHCPv4 search domains are now applied to the resolver configuration.
"""
[notes.EPHEMERAL]
title = "noexec on EPHEMERAL (/var)"
description = """\
The EPHEMERAL volume (`/var`) is now mounted with `noexec` in addition to the existing `nosuid` and `nodev`,
blocking binary execution from `/var`.
Workloads that exec binaries placed under `/var` will break.
For example, Longhorn v1's `instance-manager` exec's engine binaries the `engine-image` DaemonSet drops under `/var/lib/longhorn/engine-binaries/`,
which now fails with `permission denied`. Affected users can opt out via a `VolumeConfig` document:
```yaml
apiVersion: v1alpha1
kind: VolumeConfig
name: EPHEMERAL
mount:
secure: false
```
> NOTE: Setting `secure: false` will also disable `nosuid` and `nodev`, which may have security implications. Use with caution.
Upgrade note: apply this `VolumeConfig` patch *before* upgrading, otherwise affected workloads will fail after the next reboot. Longhorn v2 (SPDK data engine) runs the data plane inside the instance manager process and is not affected.
"""
[make_deps]
[make_deps.tools]

View File

@ -0,0 +1,6 @@
---
apiVersion: v1alpha1
kind: VolumeConfig
name: EPHEMERAL
mount:
secure: false

View File

@ -61,6 +61,7 @@ func GetStateVolumeTransformer(encryptionMeta *runtime.MetaKey, inContainer, isA
FileMode: 0o700,
UID: 0,
GID: 0,
Secure: true,
}).WriterFunc()
} else {
// STATE configuration should be always created, but it depends on the configuration presence
@ -133,6 +134,7 @@ func GetEphemeralVolumeTransformer(inContainer bool) volumeConfigTransformer {
UID: 0,
GID: 0,
ProjectQuotaSupport: cfg.Machine().Features().DiskQuotaSupportEnabled(),
Secure: extraVolumeConfig.Mount().Secure(),
}).
WithLocator(labelVolumeMatch(constants.EphemeralPartitionLabel)).
WithFunc(func(vcs *block.VolumeConfigSpec) error {
@ -185,6 +187,7 @@ func GetOverlayVolumesTransformer(inContainer bool) func(configconfig.Config) ([
FileMode: 0o755,
UID: 0,
GID: 0,
Secure: overlay.Secure,
}).WriterFunc(),
})
}
@ -207,6 +210,7 @@ func manageStateNoConfig(encryptionMeta *runtime.MetaKey, isAgent bool) func(vc
FileMode: 0o700,
UID: 0,
GID: 0,
Secure: true,
}).WithLocator(match).
WithFunc(func(spec *block.VolumeConfigSpec) error {
if encryptionMeta != nil {
@ -248,6 +252,7 @@ func manageStateConfigPresent(cfg configconfig.Config) func(vc *block.VolumeConf
FileMode: 0o700,
UID: 0,
GID: 0,
Secure: true,
}).
WithProvisioning(block.ProvisioningSpec{
Wave: block.WaveSystemDisk,

View File

@ -448,6 +448,46 @@ func TestEphemeralVolumeTransformerWithExtraConfig(t *testing.T) {
})
}
func TestEphemeralVolumeSecure(t *testing.T) {
t.Parallel()
t.Run("default is secure", func(t *testing.T) {
t.Parallel()
transformer := volumeconfig.GetEphemeralVolumeTransformer(false)
resources, err := transformer(container.NewV1Alpha1(&baseCfg))
require.NoError(t, err)
require.Len(t, resources, 1)
testTransformFunc(t, resources[0].TransformFunc, func(t *testing.T, vc *block.VolumeConfig, err error) {
require.NoError(t, err)
assert.True(t, vc.TypedSpec().Mount.Secure, "EPHEMERAL should be secure by default")
})
})
t.Run("secure=false via VolumeConfig overrides default", func(t *testing.T) {
t.Parallel()
secureOff := false
ephemeralCfg := blockcfg.NewVolumeConfigV1Alpha1()
ephemeralCfg.MetaName = constants.EphemeralPartitionLabel
ephemeralCfg.MountSpec.MountSecure = &secureOff
cfg, err := container.New(baseCfg.DeepCopy(), ephemeralCfg)
require.NoError(t, err)
transformer := volumeconfig.GetEphemeralVolumeTransformer(false)
resources, err := transformer(cfg)
require.NoError(t, err)
require.Len(t, resources, 1)
testTransformFunc(t, resources[0].TransformFunc, func(t *testing.T, vc *block.VolumeConfig, err error) {
require.NoError(t, err)
assert.False(t, vc.TypedSpec().Mount.Secure, "EPHEMERAL Secure should be overridable via VolumeConfig")
})
})
}
func testTransformFunc(t *testing.T,
transformer func(vc *block.VolumeConfig) error,
checkFunc func(t *testing.T, vc *block.VolumeConfig, err error),

View File

@ -756,11 +756,19 @@ func (ctrl *MountController) handleOverlayMountOperation(
return fmt.Errorf("overlay mount is not supported for %q", volumeStatus.TypedSpec().ParentID)
}
overlayOpts := []mount.ManagerOption{
mount.WithSelinuxLabel(volumeStatus.TypedSpec().MountSpec.SelinuxLabel),
}
if volumeStatus.TypedSpec().MountSpec.Secure {
overlayOpts = append(overlayOpts, mount.WithSecure())
}
manager := mount.NewVarOverlay(
[]string{mountTarget},
mountTarget,
logger.Sugar().Infof,
mount.WithSelinuxLabel(volumeStatus.TypedSpec().MountSpec.SelinuxLabel),
overlayOpts...,
)
mountpoint, err := manager.Mount()

View File

@ -385,6 +385,9 @@ func (ctrl *ImageCacheConfigController) analyzeImageCacheVolumes(ctx context.Con
mountRequest.TypedSpec().Requester = ctrl.Name()
mountRequest.TypedSpec().VolumeID = volumeID
mountRequest.TypedSpec().ReadOnly = !(volumeStatus.Metadata().ID() == VolumeImageCacheDISK && isoPresent)
// Image cache stores OCI image data only; Secure applies
// nosuid+nodev+noexec.
mountRequest.TypedSpec().Secure = true
return nil
},

View File

@ -1460,10 +1460,8 @@ func MountEphemeralPartition(runtime.Sequence, any) (runtime.TaskExecutionFunc,
mountRequest.TypedSpec().VolumeID = constants.EphemeralPartitionLabel
mountRequest.TypedSpec().Requester = "sequencer"
if cfg := r.Config(); cfg != nil {
vol, _ := cfg.Volumes().ByName(constants.EphemeralPartitionLabel)
mountRequest.TypedSpec().Secure = vol.Mount().Secure()
}
vol, _ := r.Config().Volumes().ByName(constants.EphemeralPartitionLabel)
mountRequest.TypedSpec().Secure = vol.Mount().Secure()
if err := r.State().V1Alpha2().Resources().Create(ctx, mountRequest); err != nil {
return fmt.Errorf("failed to create EPHEMERAL mount request: %w", err)

View File

@ -10,9 +10,9 @@ import (
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"github.com/siderolabs/talos/internal/pkg/containermode"
mount "github.com/siderolabs/talos/internal/pkg/mount/v3"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
@ -30,7 +30,7 @@ func prepareRootfs(id string) error {
return fmt.Errorf("failed to create empty executable %q: %w", executablePath, err)
}
if err := unix.Mount("/sbin/init", executablePath, "", unix.MS_BIND, ""); err != nil {
if err := mount.BindReadonly("/sbin/init", executablePath); err != nil {
return fmt.Errorf("failed to create bind mount for %q: %w", executablePath, err)
}

View File

@ -0,0 +1,259 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//go:build integration_api
package api
import (
"bufio"
"context"
"fmt"
"io"
"strings"
"time"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
// MountsSuite verifies mount flag policy on a running node.
//
// Policy (see siderolabs/talos#11946):
// - every rw mount must carry MOUNT_ATTR_NOSUID, MOUNT_ATTR_NOEXEC,
// MOUNT_ATTR_NODEV unless explicitly exempt
// - device nodes are not allowed outside /dev and /dev/pts: NODEV is
// non-negotiable for every other mountpoint
type MountsSuite struct {
base.APISuite
ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
}
// SuiteName implements suite.NamedSuite.
func (suite *MountsSuite) SuiteName() string {
return "api.MountsSuite"
}
// SetupTest sets up the test context.
func (suite *MountsSuite) SetupTest() {
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Second)
if suite.Cluster == nil || suite.Cluster.Provisioner() != base.ProvisionerQEMU {
suite.T().Skip("skipping mounts test since provisioner is not qemu")
}
}
// TearDownTest cancels the test context.
func (suite *MountsSuite) TearDownTest() {
if suite.ctxCancel != nil {
suite.ctxCancel()
}
}
// mountInfo is one parsed entry from /proc/self/mountinfo.
type mountInfo struct {
mountPoint string
fsType string
source string
options map[string]struct{} // per-mount options (field 6)
}
func (m mountInfo) has(opt string) bool {
_, ok := m.options[opt]
return ok
}
// parseMountInfo parses /proc/self/mountinfo per Linux kernel docs:
// fields[4] = mount point, fields[5] = per-mount options, after " - ":
// fstype, source, super-options.
func parseMountInfo(r io.Reader) ([]mountInfo, error) {
var out []mountInfo
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Text()
pre, post, ok := strings.Cut(line, " - ")
if !ok {
continue
}
preFields := strings.Fields(pre)
postFields := strings.Fields(post)
if len(preFields) < 6 || len(postFields) < 2 {
continue
}
opts := make(map[string]struct{})
for o := range strings.SplitSeq(preFields[5], ",") {
opts[o] = struct{}{}
}
out = append(out, mountInfo{
mountPoint: preFields[4],
options: opts,
fsType: postFields[0],
source: postFields[1],
})
}
return out, scanner.Err()
}
// nodevExempt returns true for mountpoints where device nodes are legitimate.
// Only devtmpfs at /dev and devpts at /dev/pts qualify.
func nodevExempt(m mountInfo) bool {
switch {
case m.fsType == "devtmpfs" && m.mountPoint == "/dev":
return true
case m.fsType == "devpts" && m.mountPoint == "/dev/pts":
return true
}
return false
}
// workloadManagedPrefixes lists mount path prefixes that are created by
// kubelet, containerd, or CNI plugins — not by Talos. Their flags are out
// of scope for the Talos mount policy.
var workloadManagedPrefixes = []string{
"/run/containerd/io.containerd.",
"/run/netns/",
"/var/lib/kubelet/pods/",
}
func workloadManaged(m mountInfo) bool {
for _, p := range workloadManagedPrefixes {
if strings.HasPrefix(m.mountPoint, p) {
return true
}
}
return false
}
// noexecExemptPrefixes lists mount path prefixes where executing binaries
// is part of the design. Read-only mounts are exempt elsewhere via the
// `ro` option. /var (EPHEMERAL) is intentionally NOT exempt: containerd
// container exec goes through overlay rootfs at /run/containerd/.../rootfs
// which is a separate mount with its own flags.
var noexecExemptPrefixes = []string{
"/opt", // CNI plugins, containerd plugins
"/usr/libexec/kubernetes", // kubelet plugins
"/usr/lib/udev", // udev helpers
constants.ExtensionServiceRootfsPath, // /usr/local/lib/containers — extension service rootfs overlays (iscsid, etc.)
}
func noexecExempt(m mountInfo) bool {
if m.has("ro") {
return true
}
// devtmpfs and hugetlbfs cannot host regular executable files in any
// way that a userspace exec() would care about; systemd matches this
// stance (see mount_table in systemd/src/shared/mount-setup.c — no
// MS_NOEXEC on /dev).
switch m.fsType {
case "devtmpfs", "hugetlbfs":
return true
}
for _, p := range noexecExemptPrefixes {
if m.mountPoint == p || strings.HasPrefix(m.mountPoint, p+"/") {
return true
}
}
return false
}
// TestNodevPolicy asserts every mount outside /dev and /dev/pts carries nodev.
func (suite *MountsSuite) TestNodevPolicy() {
suite.runPolicy("nodev", nodevExempt, "device nodes only in /dev and /dev/pts")
}
// TestNosuidPolicy asserts every mount carries nosuid. Talos has no
// legitimate SUID surface — even read-only signed rootfs/extension
// squashfs mounts ship no setuid binaries, so no exemptions.
func (suite *MountsSuite) TestNosuidPolicy() {
suite.runPolicy("nosuid", func(m mountInfo) bool {
return false
}, "no SUID binaries anywhere in Talos")
}
// TestNoexecPolicy asserts every rw mount carries noexec, except
// documented exemptions (EPHEMERAL, /opt/cni, kubelet plugins, udev
// helpers). Read-only mounts are exempt (signed rootfs / extension
// squashfs).
func (suite *MountsSuite) TestNoexecPolicy() {
suite.runPolicy("noexec", noexecExempt,
"binaries should only execute from RO or explicitly exempt mounts")
}
func (suite *MountsSuite) runPolicy(opt string, exempt func(mountInfo) bool, rationale string) {
for _, node := range suite.DiscoverNodeInternalIPs(suite.ctx) {
suite.Run(node, func() {
suite.checkOptOnNode(node, opt, exempt, rationale)
})
}
}
func (suite *MountsSuite) checkOptOnNode(node, opt string, exempt func(mountInfo) bool, rationale string) {
mounts := suite.readMountInfo(node)
var violations []string
for _, m := range mounts {
if workloadManaged(m) || exempt(m) {
continue
}
// /var honors the EPHEMERAL VolumeConfig's mount.secure setting; when
// the cluster was deployed with secure=false skip the assertion to match
// the configured policy rather than the secure-by-default one.
if suite.SkipEphemeralPolicy && m.mountPoint == constants.EphemeralMountPoint {
continue
}
if !m.has(opt) {
violations = append(
violations,
fmt.Sprintf("%s (fstype=%s, source=%s)", m.mountPoint, m.fsType, m.source),
)
}
}
suite.Assert().Empty(
violations,
"mounts missing %s (policy: %s):\n %s",
opt, rationale, strings.Join(violations, "\n "),
)
}
// readMountInfo fetches and parses /proc/self/mountinfo from a node.
func (suite *MountsSuite) readMountInfo(node string) []mountInfo {
nodeCtx := client.WithNode(suite.ctx, node)
r, err := suite.Client.Read(nodeCtx, "/proc/self/mountinfo")
suite.Require().NoError(err)
defer r.Close() //nolint:errcheck
mounts, err := parseMountInfo(r)
suite.Require().NoError(err)
suite.Require().NotEmpty(mounts)
return mounts
}
func init() {
allSuites = append(allSuites, new(MountsSuite))
}

View File

@ -66,6 +66,9 @@ type TalosSuite struct {
Virtiofsd bool
// Race informs test suites about race detector being enabled (e.g. for skipping incompatible tests)
Race bool
// SkipEphemeralPolicy disables MountsSuite's nosuid/nodev/noexec assertions
// for the EPHEMERAL (/var) mount point.
SkipEphemeralPolicy bool
discoveredNodes cluster.Info
}

View File

@ -34,15 +34,16 @@ var allSuites []suite.TestingSuite
// Flag values.
var (
failFast bool
trustedBoot bool
selinuxEnforcing bool
extensionsQEMU bool
extensionsNvidia bool
verifyUKIBooted bool
airgapped bool
virtiofsd bool
race bool
failFast bool
trustedBoot bool
selinuxEnforcing bool
extensionsQEMU bool
extensionsNvidia bool
verifyUKIBooted bool
airgapped bool
virtiofsd bool
race bool
skipEphemeralPolicy bool
talosConfig string
endpoint string
@ -101,27 +102,28 @@ func TestIntegration(t *testing.T) {
for _, s := range allSuites {
if configuredSuite, ok := s.(base.ConfiguredSuite); ok {
configuredSuite.SetConfig(base.TalosSuite{
Endpoint: endpoint,
K8sEndpoint: k8sEndpoint,
Cluster: cluster,
TalosConfig: talosConfig,
Version: expectedVersion,
GoVersion: expectedGoVersion,
TalosctlPath: talosctlPath,
KubectlPath: kubectlPath,
HelmPath: helmPath,
KubeStrPath: kubeStrPath,
ExtensionsQEMU: extensionsQEMU,
ExtensionsNvidia: extensionsNvidia,
TrustedBoot: trustedBoot,
SelinuxEnforcing: selinuxEnforcing,
VerifyUKIBooted: verifyUKIBooted,
TalosImage: talosImage,
CSITestName: csiTestName,
CSITestTimeout: csiTestTimeout,
Airgapped: airgapped,
Virtiofsd: virtiofsd,
Race: race,
Endpoint: endpoint,
K8sEndpoint: k8sEndpoint,
Cluster: cluster,
TalosConfig: talosConfig,
Version: expectedVersion,
GoVersion: expectedGoVersion,
TalosctlPath: talosctlPath,
KubectlPath: kubectlPath,
HelmPath: helmPath,
KubeStrPath: kubeStrPath,
ExtensionsQEMU: extensionsQEMU,
ExtensionsNvidia: extensionsNvidia,
TrustedBoot: trustedBoot,
SelinuxEnforcing: selinuxEnforcing,
VerifyUKIBooted: verifyUKIBooted,
TalosImage: talosImage,
CSITestName: csiTestName,
CSITestTimeout: csiTestTimeout,
Airgapped: airgapped,
Virtiofsd: virtiofsd,
Race: race,
SkipEphemeralPolicy: skipEphemeralPolicy,
})
}
@ -185,6 +187,8 @@ func init() {
flag.StringVar(&csiTestTimeout, "talos.csi.timeout", "15m", "CSI test timeout")
flag.BoolVar(&airgapped, "talos.airgapped", false, "Marker to skip tests that should not be run on airgapped talos cluster")
flag.BoolVar(&virtiofsd, "talos.virtiofsd", false, "Marker to skip tests that should not be run without virtiofsd")
flag.BoolVar(&skipEphemeralPolicy, "talos.skip-ephemeral-policy", false,
"Skip MountsSuite assertions for /var (EPHEMERAL); set when the cluster was deployed with VolumeConfig EPHEMERAL mount.secure=false")
flag.StringVar(&provision_test.DefaultSettings.CIDR, "talos.provision.cidr", provision_test.DefaultSettings.CIDR, "CIDR to use to provision clusters (provision tests only)")
flag.Var(&provision_test.DefaultSettings.RegistryMirrors, "talos.provision.registry-mirror", "registry mirrors to use (provision tests only)")

View File

@ -7,11 +7,8 @@
package k8s
import (
"bytes"
"context"
_ "embed"
"strings"
"text/template"
"time"
"github.com/siderolabs/talos/internal/integration/base"
@ -19,15 +16,6 @@ import (
)
var (
//go:embed testdata/longhorn-iscsi-volume.yaml
longHornISCSIVolumeManifest []byte
//go:embed testdata/longhorn-volumeattachment.yaml
longHornISCSIVolumeAttachmentManifestTemplate []byte
//go:embed testdata/pod-iscsi-volume.yaml
podWithISCSIVolumeTemplate []byte
//go:embed testdata/longhorn-v2-engine-values.yaml
longhornEngineV2Values []byte
@ -38,7 +26,12 @@ var (
longhornNodeDiskPatch []byte
)
// LongHornSuite tests deploying Longhorn.
// LongHornSuite tests deploying Longhorn with the v2 (SPDK) data engine.
//
// The v1 engine relies on exec'ing engine binaries the engine-image DaemonSet
// drops under /var/lib/longhorn/engine-binaries/, which is incompatible with
// noexec on /var (see LongHornV1Suite for the v1 path that opts out via the
// ephemeral-insecure VolumeConfig patch).
type LongHornSuite struct {
base.K8sSuite
}
@ -48,7 +41,7 @@ func (suite *LongHornSuite) SuiteName() string {
return "k8s.LongHornSuite"
}
// TestDeploy tests deploying Longhorn and running a simple test.
// TestDeploy tests deploying Longhorn (v2 data engine) and running fio against it.
func (suite *LongHornSuite) TestDeploy() {
if suite.Cluster == nil {
suite.T().Skip("without full cluster state reaching out to the node IP is not reliable")
@ -100,124 +93,35 @@ func (suite *LongHornSuite) TestDeploy() {
suite.Require().NoError(suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Node", "v1beta2", k8sNode.Name, "{.status.diskStatus.*.conditions[?(@.type==\"Schedulable\")].status}", "True"))
suite.PatchK8sObject(ctx, "longhorn-system", "longhorn.io", "Node", "v1beta2", k8sNode.Name, longhornNodeDiskPatch)
}
suite.Run("fio", func() {
suite.Require().NoError(suite.RunFIOTest(ctx, "longhorn", "10G"))
})
// Wait for the SPDK-managed nvme block disk to finish initializing
// before running fio: replica scheduling on this disk is what fio-v2
// exercises, and SPDK can take several seconds per node.
suite.Require().NoError(suite.WaitForResource(
ctx,
"longhorn-system",
"longhorn.io",
"Node",
"v1beta2",
k8sNode.Name,
"{.status.diskStatus.nvme.conditions[?(@.type==\"Ready\")].status}",
"True",
))
suite.Require().NoError(suite.WaitForResource(
ctx,
"longhorn-system",
"longhorn.io",
"Node",
"v1beta2",
k8sNode.Name,
"{.status.diskStatus.nvme.conditions[?(@.type==\"Schedulable\")].status}",
"True",
))
}
suite.Run("fio-v2", func() {
suite.Require().NoError(suite.RunFIOTest(ctx, "longhorn-v2", "10G"))
})
suite.Run("iscsi", func() {
suite.testDeployISCSI(ctx)
})
}
//nolint:gocyclo
func (suite *LongHornSuite) testDeployISCSI(ctx context.Context) {
longHornISCSIVolumeManifestUnstructured := suite.ParseManifests(longHornISCSIVolumeManifest)
defer func() {
cleanUpCtx, cleanupCancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cleanupCancel()
suite.DeleteManifests(cleanUpCtx, longHornISCSIVolumeManifestUnstructured)
}()
suite.ApplyManifests(ctx, longHornISCSIVolumeManifestUnstructured)
tmpl, err := template.New("longhorn-iscsi-volumeattachment").Parse(string(longHornISCSIVolumeAttachmentManifestTemplate))
suite.Require().NoError(err)
var longHornISCSIVolumeAttachmentManifest bytes.Buffer
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)
nodeInfo, err := suite.GetK8sNodeByInternalIP(ctx, node)
if err != nil {
suite.T().Fatalf("failed to get K8s node by internal IP: %v", err)
}
if err := tmpl.Execute(&longHornISCSIVolumeAttachmentManifest, struct {
NodeID string
}{
NodeID: nodeInfo.Name,
}); err != nil {
suite.T().Fatalf("failed to render Longhorn ISCSI volume manifest: %v", err)
}
longHornISCSIVolumeAttachmentManifestUnstructured := suite.ParseManifests(longHornISCSIVolumeAttachmentManifest.Bytes())
suite.ApplyManifests(ctx, longHornISCSIVolumeAttachmentManifestUnstructured)
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Volume", "v1beta2", "iscsi", "{.status.robustness}", "healthy"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Volume", "v1beta2", "iscsi", "{.status.state}", "attached"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Engine", "v1beta2", "iscsi-e-0", "{.status.currentState}", "running"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
unstructured, err := suite.GetUnstructuredResource(ctx, "longhorn-system", "longhorn.io", "Engine", "v1beta2", "iscsi-e-0")
if err != nil {
suite.T().Fatalf("failed to get LongHorn Engine resource: %v", err)
}
var endpointData string
if status, ok := unstructured.Object["status"].(map[string]any); ok {
endpointData, ok = status["endpoint"].(string)
if !ok {
suite.T().Fatalf("failed to get LongHorn Engine endpoint")
}
}
tmpl, err = template.New("pod-iscsi-volume").Parse(string(podWithISCSIVolumeTemplate))
suite.Require().NoError(err)
// endpoint is of the form `iscsi://10.244.0.5:3260/iqn.2019-10.io.longhorn:iscsi/1`
// trim the iscsi:// prefix
endpointData = strings.TrimPrefix(endpointData, "iscsi://")
// trim the /1 suffix
endpointData = strings.TrimSuffix(endpointData, "/1")
targetPortal, IQN, ok := strings.Cut(endpointData, "/")
if !ok {
suite.T().Fatalf("failed to parse endpoint data from %s", endpointData)
}
var podWithISCSIVolume bytes.Buffer
if err := tmpl.Execute(&podWithISCSIVolume, struct {
NodeName string
TargetPortal string
IQN string
}{
NodeName: nodeInfo.Name,
TargetPortal: targetPortal,
IQN: IQN,
}); err != nil {
suite.T().Fatalf("failed to render pod with ISCSI volume manifest: %v", err)
}
podWithISCSIVolumeUnstructured := suite.ParseManifests(podWithISCSIVolume.Bytes())
defer func() {
cleanUpCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute)
defer cleanupCancel()
suite.DeleteManifests(cleanUpCtx, podWithISCSIVolumeUnstructured)
}()
suite.ApplyManifests(ctx, podWithISCSIVolumeUnstructured)
suite.Require().NoError(suite.WaitForPodToBeRunning(ctx, 3*time.Minute, "default", "iscsipd"))
}
func init() {

View File

@ -0,0 +1,209 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//go:build integration_k8s
package k8s
import (
"bytes"
"context"
_ "embed"
"strings"
"text/template"
"time"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
)
var (
//go:embed testdata/longhorn-iscsi-volume.yaml
longHornISCSIVolumeManifest []byte
//go:embed testdata/longhorn-volumeattachment.yaml
longHornISCSIVolumeAttachmentManifestTemplate []byte
//go:embed testdata/pod-iscsi-volume.yaml
podWithISCSIVolumeTemplate []byte
)
// LongHornV1Suite tests deploying Longhorn with the v1 data engine.
//
// The v1 engine's instance-manager exec's engine binaries it drops under
// /var/lib/longhorn/engine-binaries/, which only works when /var is mounted
// without noexec. The matching CI matrix entry applies
// hack/test/patches/ephemeral-insecure.yaml to disable Secure on the EPHEMERAL
// VolumeConfig.
type LongHornV1Suite struct {
base.K8sSuite
}
// SuiteName returns the name of the suite.
func (suite *LongHornV1Suite) SuiteName() string {
return "k8s.LongHornV1Suite"
}
// TestDeploy tests deploying Longhorn (v1 data engine) and running fio + an
// in-tree Kubernetes iscsi volume against a v1 iscsi-frontend Longhorn volume.
func (suite *LongHornV1Suite) TestDeploy() {
if suite.Cluster == nil {
suite.T().Skip("without full cluster state reaching out to the node IP is not reliable")
}
if suite.CSITestName != "longhorn-v1" {
suite.T().Skip("skipping longhorn-v1 test as it is not enabled")
}
timeout, err := time.ParseDuration(suite.CSITestTimeout)
if err != nil {
suite.T().Fatalf("failed to parse timeout: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
suite.T().Cleanup(cancel)
if err := suite.HelmInstall(
ctx,
"longhorn-system",
"https://charts.longhorn.io",
LongHornHelmChartVersion,
"longhorn",
"longhorn",
nil,
); err != nil {
suite.T().Fatalf("failed to install Longhorn chart: %v", err)
}
nodes := suite.DiscoverNodeInternalIPsByType(ctx, machine.TypeWorker)
suite.Require().Equal(3, len(nodes), "expected 3 worker nodes")
for _, node := range nodes {
k8sNode, err := suite.GetK8sNodeByInternalIP(ctx, node)
suite.Require().NoError(err)
suite.Require().NoError(suite.WaitForResourceToBeAvailable(ctx, 2*time.Minute, "longhorn-system", "longhorn.io", "Node", "v1beta2", k8sNode.Name))
suite.Require().NoError(suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Node", "v1beta2", k8sNode.Name, "{.status.diskStatus.*.conditions[?(@.type==\"Ready\")].status}", "True"))
suite.Require().NoError(suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Node", "v1beta2", k8sNode.Name, "{.status.diskStatus.*.conditions[?(@.type==\"Schedulable\")].status}", "True"))
}
suite.Run("fio", func() {
suite.Require().NoError(suite.RunFIOTest(ctx, "longhorn", "10G"))
})
suite.Run("iscsi", func() {
suite.testDeployISCSI(ctx)
})
}
//nolint:gocyclo
func (suite *LongHornV1Suite) testDeployISCSI(ctx context.Context) {
longHornISCSIVolumeManifestUnstructured := suite.ParseManifests(longHornISCSIVolumeManifest)
defer func() {
cleanUpCtx, cleanupCancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cleanupCancel()
suite.DeleteManifests(cleanUpCtx, longHornISCSIVolumeManifestUnstructured)
}()
suite.ApplyManifests(ctx, longHornISCSIVolumeManifestUnstructured)
tmpl, err := template.New("longhorn-iscsi-volumeattachment").Parse(string(longHornISCSIVolumeAttachmentManifestTemplate))
suite.Require().NoError(err)
var longHornISCSIVolumeAttachmentManifest bytes.Buffer
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)
nodeInfo, err := suite.GetK8sNodeByInternalIP(ctx, node)
if err != nil {
suite.T().Fatalf("failed to get K8s node by internal IP: %v", err)
}
if err := tmpl.Execute(&longHornISCSIVolumeAttachmentManifest, struct {
NodeID string
}{
NodeID: nodeInfo.Name,
}); err != nil {
suite.T().Fatalf("failed to render Longhorn ISCSI volume manifest: %v", err)
}
longHornISCSIVolumeAttachmentManifestUnstructured := suite.ParseManifests(longHornISCSIVolumeAttachmentManifest.Bytes())
suite.ApplyManifests(ctx, longHornISCSIVolumeAttachmentManifestUnstructured)
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Volume", "v1beta2", "iscsi", "{.status.robustness}", "healthy"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Volume", "v1beta2", "iscsi", "{.status.state}", "attached"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
if err := suite.WaitForResource(ctx, "longhorn-system", "longhorn.io", "Engine", "v1beta2", "iscsi-e-0", "{.status.currentState}", "running"); err != nil {
suite.T().Fatalf("failed to wait for LongHorn Engine to be Ready: %v", err)
}
unstructured, err := suite.GetUnstructuredResource(ctx, "longhorn-system", "longhorn.io", "Engine", "v1beta2", "iscsi-e-0")
if err != nil {
suite.T().Fatalf("failed to get LongHorn Engine resource: %v", err)
}
var endpointData string
if status, ok := unstructured.Object["status"].(map[string]any); ok {
endpointData, ok = status["endpoint"].(string)
if !ok {
suite.T().Fatalf("failed to get LongHorn Engine endpoint")
}
}
tmpl, err = template.New("pod-iscsi-volume").Parse(string(podWithISCSIVolumeTemplate))
suite.Require().NoError(err)
// endpoint is of the form `iscsi://10.244.0.5:3260/iqn.2019-10.io.longhorn:iscsi/1`
// trim the iscsi:// prefix
endpointData = strings.TrimPrefix(endpointData, "iscsi://")
// trim the /1 suffix
endpointData = strings.TrimSuffix(endpointData, "/1")
targetPortal, IQN, ok := strings.Cut(endpointData, "/")
if !ok {
suite.T().Fatalf("failed to parse endpoint data from %s", endpointData)
}
var podWithISCSIVolume bytes.Buffer
if err := tmpl.Execute(&podWithISCSIVolume, struct {
NodeName string
TargetPortal string
IQN string
}{
NodeName: nodeInfo.Name,
TargetPortal: targetPortal,
IQN: IQN,
}); err != nil {
suite.T().Fatalf("failed to render pod with ISCSI volume manifest: %v", err)
}
podWithISCSIVolumeUnstructured := suite.ParseManifests(podWithISCSIVolume.Bytes())
defer func() {
cleanUpCtx, cleanupCancel := context.WithTimeout(context.Background(), time.Minute)
defer cleanupCancel()
suite.DeleteManifests(cleanUpCtx, podWithISCSIVolumeUnstructured)
}()
suite.ApplyManifests(ctx, podWithISCSIVolumeUnstructured)
suite.Require().NoError(suite.WaitForPodToBeRunning(ctx, 3*time.Minute, "default", "iscsipd"))
}
func init() {
allSuites = append(allSuites, new(LongHornV1Suite))
}

View File

@ -29,7 +29,8 @@ func discard(string, ...any) {}
func NewCgroup2() *Manager {
return NewManager(
WithTarget(constants.CgroupMountPath),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen(
"cgroup2",
fsopen.WithBoolParameter("nsdelegate"),
@ -59,6 +60,7 @@ func NewReadOnlyOverlay(sources []string, target string, printer func(string, ..
WithPrinter(printer),
WithTarget(target),
WithReadOnly(),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NODEV),
WithFsopen("overlay", fsOptions...),
)
@ -90,6 +92,7 @@ func NewOverlayWithBasePath(sources []string, target, basePath string, printer f
options,
WithTarget(target),
WithExtraDirs(diff, workdir),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NODEV),
WithFsopen("overlay", fsOptions...),
WithPrinter(printer),
)
@ -118,6 +121,7 @@ func Squashfs(target, squashfsFile string, printer func(string, ...any)) (*Manag
WithTarget(target),
WithPrinter(printer),
WithReadOnly(),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NODEV),
WithShared(),
WithExtraUnmountCallbacks(func(m *Manager) {
dev.Detach() //nolint:errcheck
@ -190,7 +194,7 @@ func Pseudo(printer func(string, ...any)) Managers {
WithPrinter(printer),
WithTarget("/proc"),
WithKeepOpenAfterMount(),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV),
WithSecure(),
WithFsopen("proc"),
),
newManager(
@ -198,6 +202,7 @@ func Pseudo(printer func(string, ...any)) Managers {
WithPrinter(printer),
WithTarget("/sys"),
WithKeepOpenAfterMount(),
WithSecure(),
WithFsopen("sysfs"),
),
)
@ -210,7 +215,8 @@ func PseudoLate(printer func(string, ...any)) Managers {
always,
WithPrinter(printer),
WithTarget("/run"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithSelinuxLabel(constants.RunSelinuxLabel),
WithRecursiveUnmount(),
WithFsopen(
@ -222,6 +228,8 @@ func PseudoLate(printer func(string, ...any)) Managers {
always,
WithPrinter(printer),
WithTarget("/system"),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithSelinuxLabel(constants.SystemSelinuxLabel),
WithRecursiveUnmount(),
WithFsopen(
@ -233,7 +241,7 @@ func PseudoLate(printer func(string, ...any)) Managers {
always,
WithPrinter(printer),
WithTarget("/tmp"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV),
WithSecure(),
WithFsopen(
"tmpfs",
fsopen.WithStringParameter("mode", "0755"),
@ -250,7 +258,8 @@ func PseudoSub(printer func(string, ...any)) Managers {
always,
WithPrinter(printer),
WithTarget("/dev/shm"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("tmpfs"),
),
newManager(
@ -276,48 +285,56 @@ func PseudoSub(printer func(string, ...any)) Managers {
always,
WithPrinter(printer),
WithTarget("/sys/fs/bpf"),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("bpf"),
),
newManager(
always,
WithPrinter(printer),
WithTarget("/sys/kernel/security"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("securityfs"),
),
newManager(
always,
WithPrinter(printer),
WithTarget("/sys/kernel/tracing"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV),
WithSecure(),
WithFsopen("tracefs"),
),
newManager(
always,
WithPrinter(printer),
WithTarget("/sys/kernel/config"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("configfs"),
),
newManager(
always,
WithPrinter(printer),
WithTarget("/sys/kernel/debug"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("debugfs"),
),
newManager(
selinux.IsEnabled,
WithPrinter(printer),
WithTarget("/sys/fs/selinux"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_RELATIME),
WithSecure(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("selinuxfs"),
),
newManager(
hasEFIVars,
WithPrinter(printer),
WithTarget(constants.EFIVarsMountPoint),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_NODEV|unix.MOUNT_ATTR_RELATIME|unix.MOUNT_ATTR_RDONLY),
WithSecure(),
WithReadOnly(),
WithMountAttributes(unix.MOUNT_ATTR_RELATIME),
WithFsopen("efivarfs"),
),
)

View File

@ -217,9 +217,9 @@ func WithDisableAccessTime() ManagerOption {
return WithMountAttributes(unix.MOUNT_ATTR_NOATIME)
}
// WithSecure sets MOUNT_ATTR_NOSUID and MOUNT_ATTR_NODEV.
// WithSecure sets MOUNT_ATTR_NOSUID, MOUNT_ATTR_NODEV, and MOUNT_ATTR_NOEXEC.
func WithSecure() ManagerOption {
return WithMountAttributes(unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV)
return WithMountAttributes(unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV | unix.MOUNT_ATTR_NOEXEC)
}
// WithReadOnly sets the mount as read only.

View File

@ -11,6 +11,11 @@ import (
"golang.org/x/sys/unix"
)
// bindHardenAttr is the baseline attribute set every read-only bind mount
// inherits: read-only, no setuid escalation, no device nodes (per
// siderolabs/talos#11946 — device nodes belong only in /dev and /dev/pts).
const bindHardenAttr = unix.MOUNT_ATTR_RDONLY | unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV
// BindReadonly creates a common way to create a readonly bind mounted destination.
func BindReadonly(src, dst string) error {
sourceFD, err := unix.OpenTree(unix.AT_FDCWD, src, unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)
@ -21,7 +26,7 @@ func BindReadonly(src, dst string) error {
defer unix.Close(sourceFD) //nolint:errcheck
if err := unix.MountSetattr(sourceFD, "", unix.AT_EMPTY_PATH, &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
Attr_set: bindHardenAttr,
}); err != nil {
return fmt.Errorf("failed to set mount attribute: %w", err)
}
@ -43,7 +48,7 @@ func BindReadonlyFd(dfd int, dst string) error {
defer unix.Close(sourceFD) //nolint:errcheck
if err := unix.MountSetattr(sourceFD, "", unix.AT_EMPTY_PATH, &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
Attr_set: bindHardenAttr,
}); err != nil {
return fmt.Errorf("failed to set mount attribute: %w", err)
}

View File

@ -333,7 +333,7 @@ func (p *Point) SetDisableAccessTime(disable bool) error {
}, 0)
}
// SetSecure sets or clears the nosuid and nodev mount attributes.
// SetSecure sets or clears the nosuid, nodev, and noexec mount attributes.
func (p *Point) SetSecure(secure bool) error {
if p.detached {
return nil
@ -341,12 +341,12 @@ func (p *Point) SetSecure(secure bool) error {
if secure {
return p.setattr(&unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV,
Attr_set: unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV | unix.MOUNT_ATTR_NOEXEC,
}, 0)
}
return p.setattr(&unix.MountAttr{
Attr_clr: unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV,
Attr_clr: unix.MOUNT_ATTR_NOSUID | unix.MOUNT_ATTR_NODEV | unix.MOUNT_ATTR_NOEXEC,
}, 0)
}

View File

@ -101,6 +101,10 @@
; Allow init to manage processes
(allow init_t service_p (fs_classes (rw)))
(allow init_t service_p (process_classes (full)))
; Service binaries exec from NOSUID mounts (rootfs squashfs, /sbin/init
; bind into /system/libexec/<svc>/<svc>); init_t needs nosuid_transition
; for every service_p member.
(allow init_t service_p (process2 (nosuid_transition)))
; kernel cmdline
(allow system_p proc_cmdline_t (fs_classes (ro)))

View File

@ -19,6 +19,8 @@
(call system_socket_f (dbus_client_socket_t))
(allow init_t service_p (process (transition)))
; (process2 nosuid_transition for init_t -> service_p lives in
; common/processes.cil — single source for the allow rule.)
; Manage processes
(allow init_t any_p (fs_classes (rw)))
(allow init_t any_p (process_classes (full)))
@ -98,6 +100,9 @@
(allow initramfs_t init_exec_t (file (execute)))
(typetransition initramfs_t init_exec_t process init_t)
(allow initramfs_t init_t (process_classes (full)))
; init_exec_t (/usr/bin/init) lives on the rootfs squashfs which is
; mounted NOSUID — allow the initramfs_t → init_t transition across it.
(allow initramfs_t init_t (process2 (nosuid_transition)))
(allow init_t initramfs_t (fd (use)))
; Direct child processes

View File

@ -60,6 +60,9 @@
(allow kernel_t modprobe_exec_t (file (execute)))
(allow kernel_t udev_t (process (all))) ; including transition
; modprobe_exec_t lives on rootfs squashfs (NOSUID); kernel_t is not
; in service_p so this rule is required separately.
(allow kernel_t udev_t (process2 (nosuid_transition)))
(allow init_t modprobe_exec_t (file (execute)))
(allow init_t udev_t (process (all))) ; including transition

View File

@ -1092,7 +1092,10 @@ type MountSpec struct {
// BindTarget is an optional path on the host to bind-mount the volume onto.
BindTarget string `protobuf:"bytes,9,opt,name=bind_target,json=bindTarget,proto3" json:"bind_target,omitempty"`
// Parameters are additional filesystem mount options used when mounting the volume.
Parameters []*ParameterSpec `protobuf:"bytes,10,rep,name=parameters,proto3" json:"parameters,omitempty"`
Parameters []*ParameterSpec `protobuf:"bytes,10,rep,name=parameters,proto3" json:"parameters,omitempty"`
// Secure applies MOUNT_ATTR_NOSUID\|NODEV\|NOEXEC to the mount. Set for
// config-only mounts; leave false for mounts hosting executables.
Secure bool `protobuf:"varint,11,opt,name=secure,proto3" json:"secure,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@ -1197,6 +1200,13 @@ func (x *MountSpec) GetParameters() []*ParameterSpec {
return nil
}
func (x *MountSpec) GetSecure() bool {
if x != nil {
return x.Secure
}
return false
}
// MountStatusSpec is the spec for MountStatus.
type MountStatusSpec struct {
state protoimpl.MessageState `protogen:"open.v1"`
@ -2630,7 +2640,7 @@ const file_resource_definitions_block_block_proto_rawDesc = "" +
"\tread_only\x18\x05 \x01(\bR\breadOnly\x12\x1a\n" +
"\bdetached\x18\x06 \x01(\bR\bdetached\x12.\n" +
"\x13disable_access_time\x18\a \x01(\bR\x11disableAccessTime\x12\x16\n" +
"\x06secure\x18\b \x01(\bR\x06secure\"\x82\x03\n" +
"\x06secure\x18\b \x01(\bR\x06secure\"\x9a\x03\n" +
"\tMountSpec\x12\x1f\n" +
"\vtarget_path\x18\x01 \x01(\tR\n" +
"targetPath\x12#\n" +
@ -2646,7 +2656,8 @@ const file_resource_definitions_block_block_proto_rawDesc = "" +
"\n" +
"parameters\x18\n" +
" \x03(\v2/.talos.resource.definitions.block.ParameterSpecR\n" +
"parameters\"\xbd\x03\n" +
"parameters\x12\x16\n" +
"\x06secure\x18\v \x01(\bR\x06secure\"\xbd\x03\n" +
"\x0fMountStatusSpec\x12F\n" +
"\x04spec\x18\x01 \x01(\v22.talos.resource.definitions.block.MountRequestSpecR\x04spec\x12\x16\n" +
"\x06target\x18\x02 \x01(\tR\x06target\x12\x16\n" +

View File

@ -1042,6 +1042,16 @@ func (m *MountSpec) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
i -= len(m.unknownFields)
copy(dAtA[i:], m.unknownFields)
}
if m.Secure {
i--
if m.Secure {
dAtA[i] = 1
} else {
dAtA[i] = 0
}
i--
dAtA[i] = 0x58
}
if len(m.Parameters) > 0 {
for iNdEx := len(m.Parameters) - 1; iNdEx >= 0; iNdEx-- {
size, err := m.Parameters[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
@ -2798,6 +2808,9 @@ func (m *MountSpec) SizeVT() (n int) {
n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
}
}
if m.Secure {
n += 2
}
n += len(m.unknownFields)
return n
}
@ -6317,6 +6330,26 @@ func (m *MountSpec) UnmarshalVT(dAtA []byte) error {
return err
}
iNdEx = postIndex
case 11:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Secure", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return protohelpers.ErrIntOverflow
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
v |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
m.Secure = bool(v != 0)
default:
iNdEx = preIndex
skippy, err := protohelpers.Skip(dAtA[iNdEx:])

View File

@ -1355,14 +1355,19 @@ const (
type SELinuxLabeledPath struct {
Path string
Label string
// Secure applies the nosuid+nodev+noexec triplet to the overlay. Set
// for config-only overlays (e.g. /etc/cni, /etc/kubernetes); leave
// false for overlays that host plugin/helper binaries (e.g. /opt,
// /usr/libexec/kubernetes).
Secure bool
}
// Overlays is the set of paths to create overlay mounts for.
var Overlays = []SELinuxLabeledPath{
{"/etc/cni", CNISELinuxLabel},
{KubernetesConfigBaseDir, KubernetesConfigSELinuxLabel},
{"/usr/libexec/kubernetes", KubeletPluginsSELinuxLabel},
{"/opt", OptSELinuxLabel},
{Path: "/etc/cni", Label: CNISELinuxLabel, Secure: true},
{Path: KubernetesConfigBaseDir, Label: KubernetesConfigSELinuxLabel, Secure: true},
{Path: "/usr/libexec/kubernetes", Label: KubeletPluginsSELinuxLabel},
{Path: "/opt", Label: OptSELinuxLabel},
}
// DefaultDroppedCapabilities is the default set of capabilities to drop.

View File

@ -250,6 +250,9 @@ type MountSpec struct {
BindTarget *string `yaml:"bindTarget,omitempty" protobuf:"9"`
// Parameters are additional filesystem mount options used when mounting the volume.
Parameters []ParameterSpec `yaml:"parameters,omitempty" protobuf:"10"`
// Secure applies MOUNT_ATTR_NOSUID\|NODEV\|NOEXEC to the mount. Set for
// config-only mounts; leave false for mounts hosting executables.
Secure bool `yaml:"secure,omitempty" protobuf:"11"`
}
// SymlinkProvisioningSpec is the spec for volume symlink.

View File

@ -6331,6 +6331,7 @@ MountSpec is the spec for volume mount.
| recursive_relabel | [bool](#bool) | | RecursiveRelabel is the recursive relabel/chown flag for the mount target. |
| bind_target | [string](#string) | | BindTarget is an optional path on the host to bind-mount the volume onto. |
| parameters | [ParameterSpec](#talos.resource.definitions.block.ParameterSpec) | repeated | Parameters are additional filesystem mount options used when mounting the volume. |
| secure | [bool](#bool) | | Secure applies MOUNT_ATTR_NOSUID\|NODEV\|NOEXEC to the mount. Set for config-only mounts; leave false for mounts hosting executables. |