app-emulation/docker-runc: Fix CVE-2019-5736 for Docker 17.03

This commit is contained in:
David Michael 2019-02-08 21:54:46 +00:00
parent e3d4c6f38d
commit fad562fd17
7 changed files with 346 additions and 8 deletions

View File

@ -44,6 +44,7 @@ src_unpack() {
PATCHES=(
"${FILESDIR}/${PN}-1.0.0_rc2-mount-propagation.patch"
"${FILESDIR}/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b.patch"
)
src_compile() {

View File

@ -0,0 +1,337 @@
From 2d069bb79260e594870ce3e7466477e54a0c5307 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Wed, 9 Jan 2019 13:40:01 +1100
Subject: [PATCH] nsenter: clone /proc/self/exe to avoid exposing host binary
to container
There are quite a few circumstances where /proc/self/exe pointing to a
pretty important container binary is a _bad_ thing, so to avoid this we
have to make a copy (preferably doing self-clean-up and not being
writeable).
We require memfd_create(2) -- though there is an O_TMPFILE fallback --
but we can always extend this to use a scratch MNT_DETACH overlayfs or
tmpfs. The main downside to this approach is no page-cache sharing for
the runc binary (which overlayfs would give us) but this is far less
complicated.
This is only done during nsenter so that it happens transparently to the
Go code, and any libcontainer users benefit from it. This also makes
ExtraFiles and --preserve-fds handling trivial (because we don't need to
worry about it).
Fixes: CVE-2019-5736
Co-developed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
libcontainer/nsenter/cloned_binary.c | 268 +++++++++++++++++++++++++++
libcontainer/nsenter/nsexec.c | 11 ++
2 files changed, 279 insertions(+)
create mode 100644 libcontainer/nsenter/cloned_binary.c
diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
new file mode 100644
index 000000000000..c8a42c23f73f
--- /dev/null
+++ b/libcontainer/nsenter/cloned_binary.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2019 SUSE LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/mman.h>
+#include <sys/sendfile.h>
+#include <sys/syscall.h>
+
+/* Use our own wrapper for memfd_create. */
+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
+# define SYS_memfd_create __NR_memfd_create
+#endif
+#ifdef SYS_memfd_create
+# define HAVE_MEMFD_CREATE
+/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
+# ifndef MFD_CLOEXEC
+# define MFD_CLOEXEC 0x0001U
+# define MFD_ALLOW_SEALING 0x0002U
+# endif
+int memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(SYS_memfd_create, name, flags);
+}
+#endif
+
+/* This comes directly from <linux/fcntl.h>. */
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE 1024
+#endif
+#ifndef F_ADD_SEALS
+# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+#endif
+#ifndef F_SEAL_SEAL
+# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+# define F_SEAL_GROW 0x0004 /* prevent file from growing */
+# define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#define RUNC_SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
+#ifdef HAVE_MEMFD_CREATE
+# define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
+# define RUNC_MEMFD_SEALS \
+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
+#endif
+
+static void *must_realloc(void *ptr, size_t size)
+{
+ void *old = ptr;
+ do {
+ ptr = realloc(old, size);
+ } while(!ptr);
+ return ptr;
+}
+
+/*
+ * Verify whether we are currently in a self-cloned program (namely, is
+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
+ * for shmem files), and we want to be sure it's actually sealed.
+ */
+static int is_self_cloned(void)
+{
+ int fd, ret, is_cloned = 0;
+
+ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -ENOTRECOVERABLE;
+
+#ifdef HAVE_MEMFD_CREATE
+ ret = fcntl(fd, F_GET_SEALS);
+ is_cloned = (ret == RUNC_MEMFD_SEALS);
+#else
+ struct stat statbuf = {0};
+ ret = fstat(fd, &statbuf);
+ if (ret >= 0)
+ is_cloned = (statbuf.st_nlink == 0);
+#endif
+ close(fd);
+ return is_cloned;
+}
+
+/*
+ * Basic wrapper around mmap(2) that gives you the file length so you can
+ * safely treat it as an ordinary buffer. Only gives you read access.
+ */
+static char *read_file(char *path, size_t *length)
+{
+ int fd;
+ char buf[4096], *copy = NULL;
+
+ if (!length)
+ return NULL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return NULL;
+
+ *length = 0;
+ for (;;) {
+ int n;
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 0)
+ goto error;
+ if (!n)
+ break;
+
+ copy = must_realloc(copy, (*length + n) * sizeof(*copy));
+ memcpy(copy + *length, buf, n);
+ *length += n;
+ }
+ close(fd);
+ return copy;
+
+error:
+ close(fd);
+ free(copy);
+ return NULL;
+}
+
+/*
+ * A poor-man's version of "xargs -0". Basically parses a given block of
+ * NUL-delimited data, within the given length and adds a pointer to each entry
+ * to the array of pointers.
+ */
+static int parse_xargs(char *data, int data_length, char ***output)
+{
+ int num = 0;
+ char *cur = data;
+
+ if (!data || *output != NULL)
+ return -1;
+
+ while (cur < data + data_length) {
+ num++;
+ *output = must_realloc(*output, (num + 1) * sizeof(**output));
+ (*output)[num - 1] = cur;
+ cur += strlen(cur) + 1;
+ }
+ (*output)[num] = NULL;
+ return num;
+}
+
+/*
+ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ.
+ * This is necessary because we are running in a context where we don't have a
+ * main() that we can just get the arguments from.
+ */
+static int fetchve(char ***argv, char ***envp)
+{
+ char *cmdline = NULL, *environ = NULL;
+ size_t cmdline_size, environ_size;
+
+ cmdline = read_file("/proc/self/cmdline", &cmdline_size);
+ if (!cmdline)
+ goto error;
+ environ = read_file("/proc/self/environ", &environ_size);
+ if (!environ)
+ goto error;
+
+ if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
+ goto error;
+ if (parse_xargs(environ, environ_size, envp) <= 0)
+ goto error;
+
+ return 0;
+
+error:
+ free(environ);
+ free(cmdline);
+ return -EINVAL;
+}
+
+static int clone_binary(void)
+{
+ int binfd, memfd;
+ ssize_t sent = 0;
+
+#ifdef HAVE_MEMFD_CREATE
+ memfd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
+#else
+ memfd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0711);
+#endif
+ if (memfd < 0)
+ return -ENOTRECOVERABLE;
+
+ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
+ if (binfd < 0)
+ goto error;
+
+ sent = sendfile(memfd, binfd, NULL, RUNC_SENDFILE_MAX);
+ close(binfd);
+ if (sent < 0)
+ goto error;
+
+#ifdef HAVE_MEMFD_CREATE
+ int err = fcntl(memfd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
+ if (err < 0)
+ goto error;
+#else
+ /* Need to re-open "memfd" as read-only to avoid execve(2) giving -EXTBUSY. */
+ int newfd;
+ char *fdpath = NULL;
+
+ if (asprintf(&fdpath, "/proc/self/fd/%d", memfd) < 0)
+ goto error;
+ newfd = open(fdpath, O_RDONLY | O_CLOEXEC);
+ free(fdpath);
+ if (newfd < 0)
+ goto error;
+
+ close(memfd);
+ memfd = newfd;
+#endif
+ return memfd;
+
+error:
+ close(memfd);
+ return -EIO;
+}
+
+int ensure_cloned_binary(void)
+{
+ int execfd;
+ char **argv = NULL, **envp = NULL;
+
+ /* Check that we're not self-cloned, and if we are then bail. */
+ int cloned = is_self_cloned();
+ if (cloned > 0 || cloned == -ENOTRECOVERABLE)
+ return cloned;
+
+ if (fetchve(&argv, &envp) < 0)
+ return -EINVAL;
+
+ execfd = clone_binary();
+ if (execfd < 0)
+ return -EIO;
+
+ fexecve(execfd, argv, envp);
+ return -ENOEXEC;
+}
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
index 28269dfc027f..7750af35ea92 100644
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -534,6 +534,9 @@ void join_namespaces(char *nslist)
free(namespaces);
}
+/* Defined in cloned_binary.c. */
+extern int ensure_cloned_binary(void);
+
void nsexec(void)
{
int pipenum;
@@ -549,6 +552,14 @@ void nsexec(void)
if (pipenum == -1)
return;
+ /*
+ * We need to re-exec if we are not in a cloned binary. This is necessary
+ * to ensure that containers won't be able to access the host binary
+ * through /proc/self/exe. See CVE-2019-5736.
+ */
+ if (ensure_cloned_binary() < 0)
+ bail("could not ensure we are a cloned binary");
+
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
--
2.20.1

View File

@ -63,7 +63,7 @@ RDEPEND="
>=app-arch/xz-utils-4.9
=app-emulation/containerd-0.2.6[seccomp?]
=app-emulation/docker-runc-1.0.0_rc2_p136[apparmor?,seccomp?]
=app-emulation/docker-runc-1.0.0_rc2_p136-r1[apparmor?,seccomp?]
=app-emulation/docker-proxy-0.8.0_p20161019
container-init? ( >=sys-process/tini-0.13.0 )
"

View File

@ -14,13 +14,13 @@ RDEPEND="
=app-emulation/docker-17.03.2-r1
=app-emulation/containerd-0.2.6
=app-emulation/docker-proxy-0.8.0_p20161019
=app-emulation/docker-runc-1.0.0_rc2_p136
=app-emulation/docker-runc-1.0.0_rc2_p136-r1
=sys-process/tini-0.13.2
"
src_install() {
insinto /.torcx
newins "${FILESDIR}/${PN}-${PV}-manifest.json" manifest.json
newins "${FILESDIR}/${P}-manifest.json" manifest.json
# Enable the Docker socket by default.
local unitdir=/usr/lib/systemd/system

View File

@ -6,9 +6,9 @@ HOMEPAGE=https://dockerproject.org
IUSE=apparmor aufs +btrfs +container-init +device-mapper hardened +overlay pkcs11 seccomp +journald +selinux kernel_linux cros_workon_tree_ profiling +go_version_go1_7
KEYWORDS=amd64 arm64
LICENSE=Apache-2.0
RDEPEND=>=dev-db/sqlite-3.7.9:3 device-mapper? ( >=sys-fs/lvm2-2.02.89[thin] ) journald? ( >=sys-apps/systemd-225 ) seccomp? ( >=sys-libs/libseccomp-2.2.1[static-libs] ) apparmor? ( sys-libs/libapparmor ) !app-emulation/docker-bin >=net-firewall/iptables-1.4 sys-process/procps >=dev-vcs/git-1.7 >=app-arch/xz-utils-4.9 =app-emulation/containerd-0.2.6[seccomp?] =app-emulation/docker-runc-1.0.0_rc2_p136[apparmor?,seccomp?] =app-emulation/docker-proxy-0.8.0_p20161019 container-init? ( >=sys-process/tini-0.13.0 )
RDEPEND=>=dev-db/sqlite-3.7.9:3 device-mapper? ( >=sys-fs/lvm2-2.02.89[thin] ) journald? ( >=sys-apps/systemd-225 ) seccomp? ( >=sys-libs/libseccomp-2.2.1[static-libs] ) apparmor? ( sys-libs/libapparmor ) !app-emulation/docker-bin >=net-firewall/iptables-1.4 sys-process/procps >=dev-vcs/git-1.7 >=app-arch/xz-utils-4.9 =app-emulation/containerd-0.2.6[seccomp?] =app-emulation/docker-runc-1.0.0_rc2_p136-r1[apparmor?,seccomp?] =app-emulation/docker-proxy-0.8.0_p20161019 container-init? ( >=sys-process/tini-0.13.0 )
REQUIRED_USE=go_version_go1_7
RESTRICT=installsources strip
SLOT=0
_eclasses_=bash-completion-r1 47a7402d95930413ce25ba8d857339bb coreos-go-depend cec6567f1c69a9b3e529a49eedaeed55 coreos-go-utils 67004337b6f831adc5f1ff107ee2f157 cros-workon 4ad6e6491a1010ad7c875302b3be18ba desktop b1d22ac8bdd4679ab79c71aca235009d eapi7-ver 756b3f27d8e46131d5cf3c51bd876446 epatch a1bf4756dba418a7238f3be0cb010c54 estack 43ddf5aaffa7a8d0482df54d25a66a1f eutils 6e6c2737b59a4b982de6fb3ecefd87f8 flag-o-matic 55aaa148741116aa54ad0d80e361818e git-r3 0d4635eeb5a96cd5315597a47eba25c9 linux-info 953c3b1c472dcadbf62098a9301327f2 ltprune 08f9e1d9ee0af8f5d9a7854efbcd8c0e multilib b2f01ad412baf81650c23fcf0975fa33 preserve-libs ef207dc62baddfddfd39a164d9797648 systemd 71fd8d2065d102753fb9e4d20eaf3e9f toolchain-funcs f164325a2cdb5b3ea39311d483988861 udev 7752f306eec7b286d00bdb47b763e7ac user 8bc2845510e2109af75e3eeac607ec81 vcs-clean 2a0f74a496fa2b1552c4f3398258b7bf
_md5_=641baf86697973082859ee2815d7e975
_md5_=82756a565e4e6a5a36e2d0c68c0ff3fc

View File

@ -12,4 +12,4 @@ RESTRICT=test
SLOT=0
SRC_URI=https://github.com/docker/runc/archive/54296cf40ad8143b62dbcaa1d90e520a2136ddfe.tar.gz -> docker-runc-1.0.0_rc2_p136.tar.gz
_eclasses_=coreos-go e0b9bd13413783cf7a2859afc20534a2 coreos-go-depend cec6567f1c69a9b3e529a49eedaeed55 coreos-go-utils 67004337b6f831adc5f1ff107ee2f157 desktop b1d22ac8bdd4679ab79c71aca235009d epatch a1bf4756dba418a7238f3be0cb010c54 estack 43ddf5aaffa7a8d0482df54d25a66a1f eutils 6e6c2737b59a4b982de6fb3ecefd87f8 flag-o-matic 55aaa148741116aa54ad0d80e361818e ltprune 08f9e1d9ee0af8f5d9a7854efbcd8c0e multilib b2f01ad412baf81650c23fcf0975fa33 multiprocessing cac3169468f893670dac3e7cb940e045 preserve-libs ef207dc62baddfddfd39a164d9797648 toolchain-funcs f164325a2cdb5b3ea39311d483988861 vcs-clean 2a0f74a496fa2b1552c4f3398258b7bf vcs-snapshot b77011b62e2053c646ad720defe6d921
_md5_=612d76f65f2d17f115b4e4f2a968bea3
_md5_=f9267783f89fc7c0ecfcf1454d98b6ad

View File

@ -3,6 +3,6 @@ DESCRIPTION=Packages to be installed in a torcx image for Docker
EAPI=2
KEYWORDS=amd64 arm64
LICENSE=GPL-2
RDEPEND==app-emulation/docker-17.03.2-r1 =app-emulation/containerd-0.2.6 =app-emulation/docker-proxy-0.8.0_p20161019 =app-emulation/docker-runc-1.0.0_rc2_p136 =sys-process/tini-0.13.2
RDEPEND==app-emulation/docker-17.03.2-r1 =app-emulation/containerd-0.2.6 =app-emulation/docker-proxy-0.8.0_p20161019 =app-emulation/docker-runc-1.0.0_rc2_p136-r1 =sys-process/tini-0.13.2
SLOT=0
_md5_=65f735ecb66def19d9ae803bd9fd1122
_md5_=2c70db8f7c0e8d599286f9f7e5a655db