[VAULT-44098] containers: never attempt to run setcap when running as non-root user (#13988) (#14106)

In prior versions of the Vault container we'd set `ICP_LOCK` on the `vault`
binary at runtime via the entrypoint script. As we now run the Vault
container as an unprivileged user we have to set this capability at build time
as `setcap` cannot be run by unprivileged users.

This change updates the Alpine OCI and UBI container entrypoints
to not attempt to run `setcap` when running as non-root user.

Importantly, these changes introduce a *new requirement* whereby users of the
container must add `IPC_LOCK` capability to the container or pod or the
Vault service will fail to start. As running with locked memory is always our
guidance for Vault the containers now require this. Users that do not wish to grant
the `IPC_LOCK` capability will want to wrap the container unset the capability on
the binary during build time: `setcap cap_ipc_lock=-ep /bin/vault`.

Signed-off-by: Ryan Cragun <me@ryan.ec>
Co-authored-by: Ryan Cragun <me@ryan.ec>
This commit is contained in:
Vault Automation 2026-04-20 14:48:55 -04:00 committed by GitHub
parent 51fa4ebbfe
commit aca7f3740c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 120 additions and 86 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.git
enos

View File

@ -17,21 +17,21 @@ ulimit -c 0
# VAULT_REDIRECT_INTERFACE and VAULT_CLUSTER_INTERFACE environment variables. If
# VAULT_*_ADDR is also set, the resulting URI will combine the protocol and port
# number with the IP of the named interface.
get_addr () {
get_addr() {
local if_name=$1
local uri_template=$2
ip addr show dev $if_name | awk -v uri=$uri_template '/\s*inet\s/ { \
ip addr show dev "$if_name" | awk -v uri="$uri_template" '/\s*inet\s/ { \
ip=gensub(/(.+)\/.+/, "\\1", "g", $2); \
print gensub(/^(.+:\/\/).+(:.+)$/, "\\1" ip "\\2", "g", uri); \
exit}'
}
if [ -n "$VAULT_REDIRECT_INTERFACE" ]; then
export VAULT_REDIRECT_ADDR=$(get_addr $VAULT_REDIRECT_INTERFACE ${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"})
export VAULT_REDIRECT_ADDR=$(get_addr "$VAULT_REDIRECT_INTERFACE" "${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}")
echo "Using $VAULT_REDIRECT_INTERFACE for VAULT_REDIRECT_ADDR: $VAULT_REDIRECT_ADDR"
fi
if [ -n "$VAULT_CLUSTER_INTERFACE" ]; then
export VAULT_CLUSTER_ADDR=$(get_addr $VAULT_CLUSTER_INTERFACE ${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"})
export VAULT_CLUSTER_ADDR=$(get_addr "$VAULT_CLUSTER_INTERFACE" "${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}")
echo "Using $VAULT_CLUSTER_INTERFACE for VAULT_CLUSTER_ADDR: $VAULT_CLUSTER_ADDR"
fi
@ -69,38 +69,47 @@ elif vault --help "$1" 2>&1 | grep -q "vault $1"; then
set -- vault "$@"
fi
# If we are running Vault, make sure it executes as the proper user.
# If we are running Vault and the container user is root then execute as the vault user
if [ "$1" = 'vault' ]; then
if [ -z "$SKIP_CHOWN" ]; then
# If the config dir is bind mounted then chown it
if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)"
if [ "$(id -u)" != '0' ]; then
[ -n "$SKIP_CHOWN" ] && echo "Container is running as non-root user, ignoring SKIP_CHOWN" >&2
[ -n "$SKIP_SETCAP" ] && echo "Container is running as non-root user, ignoring SKIP_SETCAP" >&2
else
if [ -z "$SKIP_CHOWN" ]; then
# If the config dir is bind mounted then chown it
if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)"
fi
# If the logs dir is bind mounted then chown it
if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/logs
fi
# If the file dir is bind mounted then chown it
if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/file
fi
fi
# If the logs dir is bind mounted then chown it
if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/logs
if [ -z "$SKIP_SETCAP" ]; then
# Allow mlock to avoid swapping Vault memory to disk
setcap cap_ipc_lock=+ep $(readlink -f $(which vault))
# In the case vault has been started in a container without IPC_LOCK privileges
if ! vault -version 1> /dev/null 2> /dev/null; then
>&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK"
setcap cap_ipc_lock=-ep $(readlink -f $(which vault))
fi
fi
# If the file dir is bind mounted then chown it
if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/file
fi
set -- su-exec vault "$@"
fi
fi
if [ -z "$SKIP_SETCAP" ]; then
# Allow mlock to avoid swapping Vault memory to disk
setcap cap_ipc_lock=+ep $(readlink -f $(which vault))
# In the case vault has been started in a container without IPC_LOCK privileges
if ! vault -version 1>/dev/null 2>/dev/null; then
>&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK"
setcap cap_ipc_lock=-ep $(readlink -f $(which vault))
fi
fi
if [ "$(id -u)" = '0' ]; then
set -- su-exec vault "$@"
if ! output=$(vault -version 2>&1); then
if echo "$output" | grep -q "not permitted"; then
echo "Vault requires the IPC_LOCK capability. Please use --cap-add IPC_LOCK or add it to the securityContext capabilities" >&2
fi
fi

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
# Copyright IBM Corp. 2016, 2025
# SPDX-License-Identifier: BUSL-1.1
@ -12,21 +12,21 @@ ulimit -c 0
# VAULT_REDIRECT_INTERFACE and VAULT_CLUSTER_INTERFACE environment variables. If
# VAULT_*_ADDR is also set, the resulting URI will combine the protocol and port
# number with the IP of the named interface.
get_addr () {
local if_name=$1
local uri_template=$2
ip addr show dev $if_name | awk -v uri=$uri_template '/\s*inet\s/ { \
get_addr() {
local if_name="$1"
local uri_template="$2"
ip addr show dev "$if_name" | awk -v uri="$uri_template" '/\s*inet\s/ { \
ip=gensub(/(.+)\/.+/, "\\1", "g", $2); \
print gensub(/^(.+:\/\/).+(:.+)$/, "\\1" ip "\\2", "g", uri); \
exit}'
}
if [ -n "$VAULT_REDIRECT_INTERFACE" ]; then
export VAULT_REDIRECT_ADDR=$(get_addr $VAULT_REDIRECT_INTERFACE ${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"})
export VAULT_REDIRECT_ADDR=$(get_addr "$VAULT_REDIRECT_INTERFACE" "${VAULT_REDIRECT_ADDR:-"http://0.0.0.0:8200"}")
echo "Using $VAULT_REDIRECT_INTERFACE for VAULT_REDIRECT_ADDR: $VAULT_REDIRECT_ADDR"
fi
if [ -n "$VAULT_CLUSTER_INTERFACE" ]; then
export VAULT_CLUSTER_ADDR=$(get_addr $VAULT_CLUSTER_INTERFACE ${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"})
export VAULT_CLUSTER_ADDR=$(get_addr "$VAULT_CLUSTER_INTERFACE" "${VAULT_CLUSTER_ADDR:-"https://0.0.0.0:8201"}")
echo "Using $VAULT_CLUSTER_INTERFACE for VAULT_CLUSTER_ADDR: $VAULT_CLUSTER_ADDR"
fi
@ -69,34 +69,45 @@ elif vault --help "$1" 2>&1 | grep -q "vault $1"; then
set -- vault "$@"
fi
# If we are running Vault, make sure it executes as the proper user.
# If we are running Vault and the container user is root then execute as the vault user
if [ "$1" = 'vault' ]; then
if [ -z "$SKIP_CHOWN" ]; then
# If the config dir is bind mounted then chown it
if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)"
if [ "$(id -u)" != '0' ]; then
[ -n "$SKIP_CHOWN" ] && echo "Container is running as non-root user, ignoring SKIP_CHOWN" >&2
[ -n "$SKIP_SETCAP" ] && echo "Container is running as non-root user, ignoring SKIP_SETCAP" >&2
else
if [ -z "$SKIP_CHOWN" ]; then
# If the config dir is bind mounted then chown it
if [ "$(stat -c %u /vault/config)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/config || echo "Could not chown /vault/config (may not have appropriate permissions)"
fi
# If the logs dir is bind mounted then chown it
if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/logs
fi
# If the file dir is bind mounted then chown it
if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/file
fi
fi
# If the logs dir is bind mounted then chown it
if [ "$(stat -c %u /vault/logs)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/logs
fi
if [ -z "$SKIP_SETCAP" ]; then
# Allow mlock to avoid swapping Vault memory to disk
setcap cap_ipc_lock=+ep $(readlink -f $(which vault))
# If the file dir is bind mounted then chown it
if [ "$(stat -c %u /vault/file)" != "$(id -u vault)" ]; then
chown -R vault:vault /vault/file
# In the case vault has been started in a container without IPC_LOCK privileges
if ! vault -version 1> /dev/null 2> /dev/null; then
echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK" >&2
setcap cap_ipc_lock=-ep $(readlink -f $(which vault))
fi
fi
fi
fi
if [ -z "$SKIP_SETCAP" ]; then
# Allow mlock to avoid swapping Vault memory to disk
setcap cap_ipc_lock=+ep $(readlink -f /bin/vault)
# In the case vault has been started in a container without IPC_LOCK privileges
if ! vault -version 1>/dev/null 2>/dev/null; then
>&2 echo "Couldn't start vault with IPC_LOCK. Disabling IPC_LOCK, please use --cap-add IPC_LOCK"
setcap cap_ipc_lock=-ep $(readlink -f /bin/vault)
fi
if ! output=$(vault -version 2>&1); then
if echo "$output" | grep -q "not permitted"; then
echo "Vault requires the IPC_LOCK capability. Please use --cap-add IPC_LOCK or add it to the securityContext capabilities" >&2
fi
fi
@ -106,8 +117,7 @@ fi
# we're now rerunning the entrypoint script as the Vault
# user but no longer need to run setup code for setcap
# or chowning directories (previously done on the first run).
if [[ "$(id -u)" == '0' ]]
then
if [ "$(id -u)" = '0' ]; then
export SKIP_CHOWN="true"
export SKIP_SETCAP="true"
exec su vault -p "$0" -- "$@"

View File

@ -41,7 +41,10 @@ RUN addgroup ${NAME} && adduser -S -G ${NAME} ${NAME}
# when when our Alpine release is >= 3.23.4
RUN apk update && apk add --upgrade --no-cache libcap su-exec dumb-init tzdata zlib
COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/
COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/${BIN_NAME}
# Set IPC_LOCK at build time because the container runs as an unprivileged user
RUN setcap cap_ipc_lock=+ep /bin/${BIN_NAME}
# /vault/logs is made available to use as a location to store audit logs, if
# desired; /vault/file is made available to use as a location with the file
@ -131,7 +134,10 @@ RUN groupadd --gid 1000 vault && \
# Copy in the new Vault from CRT pipeline, rather than fetching it from our
# public releases.
COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/
COPY dist/$TARGETOS/$TARGETARCH/$BIN_NAME /bin/${BIN_NAME}
# Set IPC_LOCK at build time because the container runs as an unprivileged user
RUN setcap cap_ipc_lock=+ep /bin/${BIN_NAME}
# /vault/logs is made available to use as a location to store audit logs, if
# desired; /vault/file is made available to use as a location with the file

3
changelog/_13988.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:breaking-change
containers: set cap_ipc_lock capability on vault at build time. Container runtimes will need to add `IPC_LOCK` capabilities when running the vault container.
```

View File

@ -4,7 +4,7 @@
provider "enos" "default" {}
provider "helm" "default" {
kubernetes {
kubernetes = {
config_path = abspath(joinpath(path.root, "kubeconfig"))
}
}

View File

@ -11,30 +11,40 @@ terraform {
helm = {
source = "hashicorp/helm"
version = "2.6.0"
version = "3.1.1"
}
}
}
locals {
helm_chart_settings = {
"server.ha.enabled" = "true"
"server.ha.replicas" = var.vault_instance_count
"server.ha.raft.enabled" = "true"
"server.affinity" = ""
"server.image.repository" = var.image_repository
"server.image.tag" = var.image_tag
"server.image.pullPolicy" = "Never" # Forces local image use
"server.resources.requests.cpu" = "50m"
"server.limits.memory" = "200m"
"server.limits.cpu" = "200m"
"server.ha.raft.config" = file("${abspath(path.module)}/raft-config.hcl")
"server.dataStorage.size" = "100m"
"server.logLevel" = var.vault_log_level
chart_settings = {
"server.affinity" = ""
"server.dataStorage.size" = "100m"
"server.ha.enabled" = "true"
"server.ha.raft.config" = file("${abspath(path.module)}/raft-config.hcl")
"server.ha.raft.enabled" = "true"
"server.ha.replicas" = var.vault_instance_count
"server.image.pullPolicy" = "Never" # Forces local image use
"server.image.repository" = var.image_repository
"server.image.tag" = var.image_tag
"server.limits.cpu" = "200m"
"server.limits.memory" = "200m"
"server.logLevel" = var.vault_log_level
"server.resources.requests.cpu" = "50m"
"server.statefulSet.securityContext.container.allowPrivilegeEscalation" = "false"
"server.statefulSet.securityContext.pod.runAsNonRoot" = "true"
"server.statefulSet.securityContext.pod.runAsGroup" = "1000"
"server.statefulSet.securityContext.pod.runAsUser" = "100"
"server.statefulSet.securityContext.pod.fsGroup" = "1000"
}
all_helm_chart_settings = var.ent_license == null ? local.helm_chart_settings : merge(local.helm_chart_settings, {
all_chart_settings = var.ent_license == null ? local.chart_settings : merge(local.chart_settings, {
"server.extraEnvironmentVars.VAULT_LICENSE" = var.ent_license
})
chart_list_settings = {
"server.statefulSet.securityContext.container.capabilities.add" = [
"IPC_LOCK",
],
}
vault_address = "http://127.0.0.1:8200"
@ -50,14 +60,8 @@ resource "helm_release" "vault" {
repository = "https://helm.releases.hashicorp.com"
chart = "vault"
dynamic "set" {
for_each = local.all_helm_chart_settings
content {
name = set.key
value = set.value
}
}
set = [for k, v in local.all_chart_settings : { name : k, value : v }]
set_list = [for k, v in local.chart_list_settings : { name : k, value : v }]
}
data "enos_kubernetes_pods" "vault_pods" {