From aca7f3740cdc407d4afa37a2aab1db0f3e73740c Mon Sep 17 00:00:00 2001 From: Vault Automation Date: Mon, 20 Apr 2026 14:48:55 -0400 Subject: [PATCH] [VAULT-44098] containers: never attempt to run `setcap` when running as non-root user (#13988) (#14106) In prior versions of the Vault container we'd set `ICP_LOCK` on the `vault` binary at runtime via the entrypoint script. As we now run the Vault container as an unprivileged user we have to set this capability at build time as `setcap` cannot be run by unprivileged users. This change updates the Alpine OCI and UBI container entrypoints to not attempt to run `setcap` when running as non-root user. Importantly, these changes introduce a *new requirement* whereby users of the container must add `IPC_LOCK` capability to the container or pod or the Vault service will fail to start. As running with locked memory is always our guidance for Vault the containers now require this. Users that do not wish to grant the `IPC_LOCK` capability will want to wrap the container unset the capability on the binary during build time: `setcap cap_ipc_lock=-ep /bin/vault`. Signed-off-by: Ryan Cragun Co-authored-by: Ryan Cragun --- .dockerignore | 2 + .release/docker/docker-entrypoint.sh | 67 +++++++++++++---------- .release/docker/ubi-docker-entrypoint.sh | 70 ++++++++++++++---------- Dockerfile | 10 +++- changelog/_13988.txt | 3 + enos/k8s/enos-providers-k8s.hcl | 2 +- enos/modules/k8s_deploy_vault/main.tf | 52 ++++++++++-------- 7 files changed, 120 insertions(+), 86 deletions(-) create mode 100644 .dockerignore create mode 100644 changelog/_13988.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..6b74c047ca --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.git +enos diff --git a/.release/docker/docker-entrypoint.sh b/.release/docker/docker-entrypoint.sh index b43a9fcedd..0f6da80a56 100755 --- a/.release/docker/docker-entrypoint.sh +++ b/.release/docker/docker-entrypoint.sh @@ -17,21 +17,21 @@ ulimit -c 0 # VAULT_REDIRECT_INTERFACE and VAULT_CLUSTER_INTERFACE environment variables. If # VAULT_*_ADDR is also set, the resulting URI will combine the protocol and port # number with the IP of the named interface. -get_addr () { +get_addr() { local if_name=$1 local uri_template=$2 - ip addr show dev $if_name | awk -v uri=$uri_template '/\s*inet\s/ { \ + ip addr show dev "$if_name" | awk -v uri="$uri_template" '/\s*inet\s/ { \ ip=gensub(/(.+)\/.+/, "\\1", "g", $2); \ print gensub(/^(.+:\/\/).+(:.+)$/, "\\1" ip "\\2", "g", uri); \ exit}' } if [ -n "$VAULT_REDIRECT_INTERFACE" ]; then - export VAULT_REDIRECT_ADDR=$(get_addr $VAULT_REDIRECT_INTERFACE ${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}) + export VAULT_REDIRECT_ADDR=$(get_addr "$VAULT_REDIRECT_INTERFACE" "${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}") echo "Using $VAULT_REDIRECT_INTERFACE for VAULT_REDIRECT_ADDR: $VAULT_REDIRECT_ADDR" fi if [ -n "$VAULT_CLUSTER_INTERFACE" ]; then - export VAULT_CLUSTER_ADDR=$(get_addr $VAULT_CLUSTER_INTERFACE ${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}) + export VAULT_CLUSTER_ADDR=$(get_addr "$VAULT_CLUSTER_INTERFACE" "${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}") echo "Using $VAULT_CLUSTER_INTERFACE for VAULT_CLUSTER_ADDR: $VAULT_CLUSTER_ADDR" fi @@ -69,38 +69,47 @@ elif vault --help "$1" 2>&1 | grep -q "vault $1"; then set -- vault "$@" fi -# If we are running Vault, make sure it executes as the proper user. +# If we are running Vault and the container user is root then execute as the vault user if [ "$1" = 'vault' ]; then - if [ -z "$SKIP_CHOWN" ]; then - # If the config dir is bind mounted then chown it - if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)" + if [ "$(id -u)" != '0' ]; then + [ -n "$SKIP_CHOWN" ] && echo "Container is running as non-root user, ignoring SKIP_CHOWN" >&2 + [ -n "$SKIP_SETCAP" ] && echo "Container is running as non-root user, ignoring SKIP_SETCAP" >&2 + else + if [ -z "$SKIP_CHOWN" ]; then + # If the config dir is bind mounted then chown it + if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)" + fi + + # If the logs dir is bind mounted then chown it + if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/logs + fi + + # If the file dir is bind mounted then chown it + if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/file + fi fi - # If the logs dir is bind mounted then chown it - if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/logs + if [ -z "$SKIP_SETCAP" ]; then + # Allow mlock to avoid swapping Vault memory to disk + setcap cap_ipc_lock=+ep $(readlink -f $(which vault)) + + # In the case vault has been started in a container without IPC_LOCK privileges + if ! vault -version 1> /dev/null 2> /dev/null; then + >&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK" + setcap cap_ipc_lock=-ep $(readlink -f $(which vault)) + fi fi - # If the file dir is bind mounted then chown it - if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/file - fi + set -- su-exec vault "$@" fi +fi - if [ -z "$SKIP_SETCAP" ]; then - # Allow mlock to avoid swapping Vault memory to disk - setcap cap_ipc_lock=+ep $(readlink -f $(which vault)) - - # In the case vault has been started in a container without IPC_LOCK privileges - if ! vault -version 1>/dev/null 2>/dev/null; then - >&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK" - setcap cap_ipc_lock=-ep $(readlink -f $(which vault)) - fi - fi - - if [ "$(id -u)" = '0' ]; then - set -- su-exec vault "$@" +if ! output=$(vault -version 2>&1); then + if echo "$output" | grep -q "not permitted"; then + echo "Vault requires the IPC_LOCK capability. Please use --cap-add IPC_LOCK or add it to the securityContext capabilities" >&2 fi fi diff --git a/.release/docker/ubi-docker-entrypoint.sh b/.release/docker/ubi-docker-entrypoint.sh index 8794416225..bfe5adb2b9 100755 --- a/.release/docker/ubi-docker-entrypoint.sh +++ b/.release/docker/ubi-docker-entrypoint.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright IBM Corp. 2016, 2025 # SPDX-License-Identifier: BUSL-1.1 @@ -12,21 +12,21 @@ ulimit -c 0 # VAULT_REDIRECT_INTERFACE and VAULT_CLUSTER_INTERFACE environment variables. If # VAULT_*_ADDR is also set, the resulting URI will combine the protocol and port # number with the IP of the named interface. -get_addr () { - local if_name=$1 - local uri_template=$2 - ip addr show dev $if_name | awk -v uri=$uri_template '/\s*inet\s/ { \ +get_addr() { + local if_name="$1" + local uri_template="$2" + ip addr show dev "$if_name" | awk -v uri="$uri_template" '/\s*inet\s/ { \ ip=gensub(/(.+)\/.+/, "\\1", "g", $2); \ print gensub(/^(.+:\/\/).+(:.+)$/, "\\1" ip "\\2", "g", uri); \ exit}' } if [ -n "$VAULT_REDIRECT_INTERFACE" ]; then - export VAULT_REDIRECT_ADDR=$(get_addr $VAULT_REDIRECT_INTERFACE ${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}) + export VAULT_REDIRECT_ADDR=$(get_addr "$VAULT_REDIRECT_INTERFACE" "${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}") echo "Using $VAULT_REDIRECT_INTERFACE for VAULT_REDIRECT_ADDR: $VAULT_REDIRECT_ADDR" fi if [ -n "$VAULT_CLUSTER_INTERFACE" ]; then - export VAULT_CLUSTER_ADDR=$(get_addr $VAULT_CLUSTER_INTERFACE ${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}) + export VAULT_CLUSTER_ADDR=$(get_addr "$VAULT_CLUSTER_INTERFACE" "${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}") echo "Using $VAULT_CLUSTER_INTERFACE for VAULT_CLUSTER_ADDR: $VAULT_CLUSTER_ADDR" fi @@ -69,34 +69,45 @@ elif vault --help "$1" 2>&1 | grep -q "vault $1"; then set -- vault "$@" fi -# If we are running Vault, make sure it executes as the proper user. +# If we are running Vault and the container user is root then execute as the vault user if [ "$1" = 'vault' ]; then - if [ -z "$SKIP_CHOWN" ]; then - # If the config dir is bind mounted then chown it - if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)" + if [ "$(id -u)" != '0' ]; then + [ -n "$SKIP_CHOWN" ] && echo "Container is running as non-root user, ignoring SKIP_CHOWN" >&2 + [ -n "$SKIP_SETCAP" ] && echo "Container is running as non-root user, ignoring SKIP_SETCAP" >&2 + else + if [ -z "$SKIP_CHOWN" ]; then + # If the config dir is bind mounted then chown it + if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)" + fi + + # If the logs dir is bind mounted then chown it + if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/logs + fi + + # If the file dir is bind mounted then chown it + if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then + chown -R vault:vault /vault/file + fi fi - # If the logs dir is bind mounted then chown it - if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/logs - fi + if [ -z "$SKIP_SETCAP" ]; then + # Allow mlock to avoid swapping Vault memory to disk + setcap cap_ipc_lock=+ep $(readlink -f $(which vault)) - # If the file dir is bind mounted then chown it - if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then - chown -R vault:vault /vault/file + # In the case vault has been started in a container without IPC_LOCK privileges + if ! vault -version 1> /dev/null 2> /dev/null; then + echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK" >&2 + setcap cap_ipc_lock=-ep $(readlink -f $(which vault)) + fi fi fi +fi - if [ -z "$SKIP_SETCAP" ]; then - # Allow mlock to avoid swapping Vault memory to disk - setcap cap_ipc_lock=+ep $(readlink -f /bin/vault) - - # In the case vault has been started in a container without IPC_LOCK privileges - if ! vault -version 1>/dev/null 2>/dev/null; then - >&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK" - setcap cap_ipc_lock=-ep $(readlink -f /bin/vault) - fi +if ! output=$(vault -version 2>&1); then + if echo "$output" | grep -q "not permitted"; then + echo "Vault requires the IPC_LOCK capability. Please use --cap-add IPC_LOCK or add it to the securityContext capabilities" >&2 fi fi @@ -106,8 +117,7 @@ fi # we're now rerunning the entrypoint script as the Vault # user but no longer need to run setup code for setcap # or chowning directories (previously done on the first run). -if [[ "$(id -u)" == '0' ]] -then +if [ "$(id -u)" = '0' ]; then export SKIP_CHOWN="true" export SKIP_SETCAP="true" exec su vault -p "$0" -- "$@" diff --git a/Dockerfile b/Dockerfile index 5086d65ae7..523f4dfc8c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,10 @@ RUN addgroup ${NAME} && adduser -S -G ${NAME} ${NAME} # when when our Alpine release is >= 3.23.4 RUN apk update && apk add --upgrade --no-cache libcap su-exec dumb-init tzdata zlib -COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/ +COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/${BIN_NAME} + +# Set IPC_LOCK at build time because the container runs as an unprivileged user +RUN setcap cap_ipc_lock=+ep /bin/${BIN_NAME} # /vault/logs is made available to use as a location to store audit logs, if # desired; /vault/file is made available to use as a location with the file @@ -131,7 +134,10 @@ RUN groupadd --gid 1000 vault && \ # Copy in the new Vault from CRT pipeline, rather than fetching it from our # public releases. -COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/ +COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/${BIN_NAME} + +# Set IPC_LOCK at build time because the container runs as an unprivileged user +RUN setcap cap_ipc_lock=+ep /bin/${BIN_NAME} # /vault/logs is made available to use as a location to store audit logs, if # desired; /vault/file is made available to use as a location with the file diff --git a/changelog/_13988.txt b/changelog/_13988.txt new file mode 100644 index 0000000000..717c5cf42a --- /dev/null +++ b/changelog/_13988.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +containers: set cap_ipc_lock capability on vault at build time. Container runtimes will need to add `IPC_LOCK` capabilities when running the vault container. +``` diff --git a/enos/k8s/enos-providers-k8s.hcl b/enos/k8s/enos-providers-k8s.hcl index 48ea5d826d..613055d7af 100644 --- a/enos/k8s/enos-providers-k8s.hcl +++ b/enos/k8s/enos-providers-k8s.hcl @@ -4,7 +4,7 @@ provider "enos" "default" {} provider "helm" "default" { - kubernetes { + kubernetes = { config_path = abspath(joinpath(path.root, "kubeconfig")) } } diff --git a/enos/modules/k8s_deploy_vault/main.tf b/enos/modules/k8s_deploy_vault/main.tf index 73aee7fa79..09f46a65c0 100644 --- a/enos/modules/k8s_deploy_vault/main.tf +++ b/enos/modules/k8s_deploy_vault/main.tf @@ -11,30 +11,40 @@ terraform { helm = { source = "hashicorp/helm" - version = "2.6.0" + version = "3.1.1" } } } locals { - helm_chart_settings = { - "server.ha.enabled" = "true" - "server.ha.replicas" = var.vault_instance_count - "server.ha.raft.enabled" = "true" - "server.affinity" = "" - "server.image.repository" = var.image_repository - "server.image.tag" = var.image_tag - "server.image.pullPolicy" = "Never" # Forces local image use - "server.resources.requests.cpu" = "50m" - "server.limits.memory" = "200m" - "server.limits.cpu" = "200m" - "server.ha.raft.config" = file("${abspath(path.module)}/raft-config.hcl") - "server.dataStorage.size" = "100m" - "server.logLevel" = var.vault_log_level + chart_settings = { + "server.affinity" = "" + "server.dataStorage.size" = "100m" + "server.ha.enabled" = "true" + "server.ha.raft.config" = file("${abspath(path.module)}/raft-config.hcl") + "server.ha.raft.enabled" = "true" + "server.ha.replicas" = var.vault_instance_count + "server.image.pullPolicy" = "Never" # Forces local image use + "server.image.repository" = var.image_repository + "server.image.tag" = var.image_tag + "server.limits.cpu" = "200m" + "server.limits.memory" = "200m" + "server.logLevel" = var.vault_log_level + "server.resources.requests.cpu" = "50m" + "server.statefulSet.securityContext.container.allowPrivilegeEscalation" = "false" + "server.statefulSet.securityContext.pod.runAsNonRoot" = "true" + "server.statefulSet.securityContext.pod.runAsGroup" = "1000" + "server.statefulSet.securityContext.pod.runAsUser" = "100" + "server.statefulSet.securityContext.pod.fsGroup" = "1000" } - all_helm_chart_settings = var.ent_license == null ? local.helm_chart_settings : merge(local.helm_chart_settings, { + all_chart_settings = var.ent_license == null ? local.chart_settings : merge(local.chart_settings, { "server.extraEnvironmentVars.VAULT_LICENSE" = var.ent_license }) + chart_list_settings = { + "server.statefulSet.securityContext.container.capabilities.add" = [ + "IPC_LOCK", + ], + } vault_address = "http://127.0.0.1:8200" @@ -50,14 +60,8 @@ resource "helm_release" "vault" { repository = "https://helm.releases.hashicorp.com" chart = "vault" - dynamic "set" { - for_each = local.all_helm_chart_settings - - content { - name = set.key - value = set.value - } - } + set = [for k, v in local.all_chart_settings : { name : k, value : v }] + set_list = [for k, v in local.chart_list_settings : { name : k, value : v }] } data "enos_kubernetes_pods" "vault_pods" {