overlay sys-apps/systemd: Apply Flatcar modifications

Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>
This commit is contained in:
Krzesimir Nowak 2024-03-26 15:51:18 +01:00 committed by Adrian Vladu
parent 5081a66df5
commit 278c4963cf
16 changed files with 1921 additions and 335 deletions

View File

@ -0,0 +1,32 @@
From 98cbd0a4576464478f0f9fcd2066efc08bef9491 Mon Sep 17 00:00:00 2001
From: David Michael <dm0@redhat.com>
Date: Tue, 16 Apr 2019 02:44:51 +0000
Subject: [PATCH 1/8] wait-online: set --any by default
The systemd-networkd-wait-online command would normally continue
waiting after a network interface is usable if other interfaces are
still configuring. There is a new flag --any to change this.
Preserve previous Container Linux behavior for compatibility by
setting the --any flag by default. See patches from v241 (or
earlier) for the original implementation.
---
src/network/wait-online/wait-online.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c
index 5328bba2d8..95294df607 100644
--- a/src/network/wait-online/wait-online.c
+++ b/src/network/wait-online/wait-online.c
@@ -21,7 +21,7 @@ static Hashmap *arg_interfaces = NULL;
static char **arg_ignore = NULL;
static LinkOperationalStateRange arg_required_operstate = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID };
static AddressFamily arg_required_family = ADDRESS_FAMILY_NO;
-static bool arg_any = false;
+static bool arg_any = true;
STATIC_DESTRUCTOR_REGISTER(arg_interfaces, hashmap_free_free_freep);
STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep);
--
2.34.1

View File

@ -0,0 +1,24 @@
From e3fd50ec704b5d48e9d756c1cc5c40e72b7d1fa4 Mon Sep 17 00:00:00 2001
From: Nick Owens <nick.owens@coreos.com>
Date: Tue, 2 Jun 2015 18:22:32 -0700
Subject: [PATCH 2/8] networkd: default to "kernel" IPForwarding setting
---
src/network/networkd-network.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
index dcd3e5ae12..2ae481d1ec 100644
--- a/src/network/networkd-network.c
+++ b/src/network/networkd-network.c
@@ -461,6 +461,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi
.link_local = _ADDRESS_FAMILY_INVALID,
.ipv6ll_address_gen_mode = _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_INVALID,
+ .ip_forward = _ADDRESS_FAMILY_INVALID,
.ipv4_accept_local = -1,
.ipv4_route_localnet = -1,
.ipv6_privacy_extensions = _IPV6_PRIVACY_EXTENSIONS_INVALID,
--
2.34.1

View File

@ -0,0 +1,58 @@
From 0be1b5367c24427e3285d33fb87aa4acdf3c4dce Mon Sep 17 00:00:00 2001
From: Alex Crawford <alex.crawford@coreos.com>
Date: Wed, 2 Mar 2016 10:46:33 -0800
Subject: [PATCH 3/8] needs-update: don't require strictly newer usr
Updates should be triggered whenever usr changes, not only when it is newer.
---
man/systemd-update-done.service.xml | 2 +-
src/shared/condition.c | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/man/systemd-update-done.service.xml b/man/systemd-update-done.service.xml
index 3393010ff6..5478baca25 100644
--- a/man/systemd-update-done.service.xml
+++ b/man/systemd-update-done.service.xml
@@ -50,7 +50,7 @@
<varname>ConditionNeedsUpdate=</varname> (see
<citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>)
condition to make sure to run when <filename>/etc/</filename> or
- <filename>/var/</filename> are older than <filename>/usr/</filename>
+ <filename>/var/</filename> aren't the same age as <filename>/usr/</filename>
according to the modification times of the files described above.
This requires that updates to <filename>/usr/</filename> are always
followed by an update of the modification time of
diff --git a/src/shared/condition.c b/src/shared/condition.c
index d3446e8a9d..3f7cc9ea58 100644
--- a/src/shared/condition.c
+++ b/src/shared/condition.c
@@ -793,7 +793,7 @@ static int condition_test_needs_update(Condition *c, char **env) {
* First, compare seconds as they are always accurate...
*/
if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec)
- return usr.st_mtim.tv_sec > other.st_mtim.tv_sec;
+ return true;
/*
* ...then compare nanoseconds.
@@ -804,7 +804,7 @@ static int condition_test_needs_update(Condition *c, char **env) {
* (otherwise the filesystem supports nsec timestamps, see stat(2)).
*/
if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0)
- return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
+ return usr.st_mtim.tv_nsec != other.st_mtim.tv_nsec;
_cleanup_free_ char *timestamp_str = NULL;
r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", &timestamp_str);
@@ -824,7 +824,7 @@ static int condition_test_needs_update(Condition *c, char **env) {
return true;
}
- return timespec_load_nsec(&usr.st_mtim) > timestamp;
+ return timespec_load_nsec(&usr.st_mtim) != timestamp;
}
static bool in_first_boot(void) {
--
2.34.1

View File

@ -0,0 +1,64 @@
From d21ebfcf17ffc1dba635389193f10d2b93eba730 Mon Sep 17 00:00:00 2001
From: Adrian Vladu <avladu@cloudbasesolutions.com>
Date: Fri, 16 Feb 2024 11:22:08 +0000
Subject: [PATCH 4/8] core: use max for DefaultTasksMax
Since systemd v228, systemd has a DefaultTasksMax which defaulted
to 512, later 15% of the system's maximum number of PIDs. This
limit is low and a change in behavior that people running services
in containers will hit frequently, so revert to previous behavior.
Though later the TasksMax was changed in the a dynamic property to
accommodate stale values.
This change is built on previous patch by David Michael(dm0-).
Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>
---
man/systemd-system.conf.xml | 2 +-
src/core/manager.c | 2 +-
src/core/system.conf.in | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index 3c06b65f93..71f38692b6 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -501,7 +501,7 @@
<listitem><para>Configure the default value for the per-unit <varname>TasksMax=</varname> setting. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details. This setting applies to all unit types that support resource control settings, with the exception
- of slice units. Defaults to 15% of the minimum of <varname>kernel.pid_max=</varname>, <varname>kernel.threads-max=</varname>
+ of slice units. Defaults to 100% of the minimum of <varname>kernel.pid_max=</varname>, <varname>kernel.threads-max=</varname>
and root cgroup <varname>pids.max</varname>.
Kernel has a default value for <varname>kernel.pid_max=</varname> and an algorithm of counting in case of more than 32 cores.
For example, with the default <varname>kernel.pid_max=</varname>, <varname>DefaultTasksMax=</varname> defaults to 4915,
diff --git a/src/core/manager.c b/src/core/manager.c
index 88eebfc626..8992c8c3e3 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -114,7 +114,7 @@
/* How many units and jobs to process of the bus queue before returning to the event loop. */
#define MANAGER_BUS_MESSAGE_BUDGET 100U
-#define DEFAULT_TASKS_MAX ((CGroupTasksMax) { 15U, 100U }) /* 15% */
+#define DEFAULT_TASKS_MAX ((CGroupTasksMax) { 100U, 100U }) /* 15% */
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
index 05eb681270..94d0365244 100644
--- a/src/core/system.conf.in
+++ b/src/core/system.conf.in
@@ -58,7 +58,7 @@
#DefaultIPAccounting=no
#DefaultMemoryAccounting={{ 'yes' if MEMORY_ACCOUNTING_DEFAULT else 'no' }}
#DefaultTasksAccounting=yes
-#DefaultTasksMax=15%
+#DefaultTasksMax=100%
#DefaultLimitCPU=
#DefaultLimitFSIZE=
#DefaultLimitDATA=
--
2.34.1

View File

@ -0,0 +1,29 @@
From 374cca5b2f9aea1c506352cf58b09db5c216a0d3 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@coreos.com>
Date: Tue, 20 Dec 2016 16:43:22 +0000
Subject: [PATCH 5/8] systemd: Disable SELinux permissions checks
We don't care about the interaction between systemd and SELinux policy, so
let's just disable these checks rather than having to incorporate policy
support. This has no impact on our SELinux use-case, which is purely intended
to limit containers and not anything running directly on the host.
---
src/core/selinux-access.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
index 62181a6309..448f9211d6 100644
--- a/src/core/selinux-access.c
+++ b/src/core/selinux-access.c
@@ -2,7 +2,7 @@
#include "selinux-access.h"
-#if HAVE_SELINUX
+#if 0
#include <errno.h>
#include <selinux/avc.h>
--
2.34.1

View File

@ -0,0 +1,95 @@
From bffb2a48796a2736d7fb7328d2a88b1cbb812b12 Mon Sep 17 00:00:00 2001
From: Sayan Chowdhury <schowdhury@microsoft.com>
Date: Fri, 16 Dec 2022 16:28:26 +0530
Subject: [PATCH 6/8] Revert "getty: Pass tty to use by agetty via stdin"
This reverts commit b4bf9007cbee7dc0b1356897344ae2a7890df84c.
This is to work around a SELinux denial that happens when setting up standard
input for serial consoles (which is used for SSH connections).
Signed-off-by: Sayan Chowdhury <schowdhury@microsoft.com>
---
units/console-getty.service.in | 4 +---
units/container-getty@.service.in | 4 +---
units/getty@.service.in | 4 +---
units/serial-getty@.service.in | 4 +---
4 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/units/console-getty.service.in b/units/console-getty.service.in
index d64112be5e..b908708d8c 100644
--- a/units/console-getty.service.in
+++ b/units/console-getty.service.in
@@ -22,12 +22,10 @@ ConditionPathExists=/dev/console
[Service]
# The '-o' option value tells agetty to replace 'login' arguments with an option to preserve environment (-p),
# followed by '--' for safety, and then the entered username.
-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud - 115200,38400,9600 $TERM
+ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud console 115200,38400,9600 $TERM
Type=idle
Restart=always
UtmpIdentifier=cons
-StandardInput=tty
-StandardOutput=tty
TTYPath=/dev/console
TTYReset=yes
TTYVHangup=yes
diff --git a/units/container-getty@.service.in b/units/container-getty@.service.in
index 8847d735fb..8be25663f5 100644
--- a/units/container-getty@.service.in
+++ b/units/container-getty@.service.in
@@ -27,13 +27,11 @@ Before=rescue.service
[Service]
# The '-o' option value tells agetty to replace 'login' arguments with an option to preserve environment (-p),
# followed by '--' for safety, and then the entered username.
-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear - $TERM
+ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear --keep-baud pts/%I 115200,38400,9600 $TERM
Type=idle
Restart=always
RestartSec=0
UtmpIdentifier=pts/%I
-StandardInput=tty
-StandardOutput=tty
TTYPath=/dev/pts/%I
TTYReset=yes
TTYVHangup=yes
diff --git a/units/getty@.service.in b/units/getty@.service.in
index 80b8f3e922..b57666c123 100644
--- a/units/getty@.service.in
+++ b/units/getty@.service.in
@@ -38,13 +38,11 @@ ConditionPathExists=/dev/tty0
# The '-o' option value tells agetty to replace 'login' arguments with an
# option to preserve environment (-p), followed by '--' for safety, and then
# the entered username.
-ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear - $TERM
+ExecStart=-/sbin/agetty -o '-p -- \\u' --noclear %I $TERM
Type=idle
Restart=always
RestartSec=0
UtmpIdentifier=%I
-StandardInput=tty
-StandardOutput=tty
TTYPath=/dev/%I
TTYReset=yes
TTYVHangup=yes
diff --git a/units/serial-getty@.service.in b/units/serial-getty@.service.in
index 6bf101eac9..479b8759a9 100644
--- a/units/serial-getty@.service.in
+++ b/units/serial-getty@.service.in
@@ -33,12 +33,10 @@ Before=rescue.service
# The '-o' option value tells agetty to replace 'login' arguments with an
# option to preserve environment (-p), followed by '--' for safety, and then
# the entered username.
-ExecStart=-/sbin/agetty -o '-p -- \\u' --keep-baud 115200,57600,38400,9600 - $TERM
+ExecStart=-/sbin/agetty -o '-p -- \\u' --keep-baud 115200,57600,38400,9600 %I $TERM
Type=idle
Restart=always
UtmpIdentifier=%I
-StandardInput=tty
-StandardOutput=tty
TTYPath=/dev/%I
TTYReset=yes
TTYVHangup=yes
--
2.34.1

View File

@ -0,0 +1,42 @@
From 6a4c6f97742afc9ca5de40335b2d041095990aa2 Mon Sep 17 00:00:00 2001
From: Adrian Vladu <avladu@cloudbasesolutions.com>
Date: Fri, 16 Feb 2024 11:29:04 +0000
Subject: [PATCH 7/8] units: Keep using old journal file format
Systemd 252 made an incompatible change in journal file format. Temporarily
force journald to use the old journal format to give logging containers more
time to adapt to the new format.
Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>
---
units/systemd-journald.service.in | 1 +
units/systemd-journald@.service.in | 1 +
2 files changed, 2 insertions(+)
diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in
index 37eeabc510..e5030a81bd 100644
--- a/units/systemd-journald.service.in
+++ b/units/systemd-journald.service.in
@@ -27,6 +27,7 @@ IgnoreOnIsolate=yes
[Service]
DeviceAllow=char-* rw
+Environment=SYSTEMD_JOURNAL_COMPACT=0
ExecStart={{LIBEXECDIR}}/systemd-journald
FileDescriptorStoreMax=4224
IPAddressDeny=any
diff --git a/units/systemd-journald@.service.in b/units/systemd-journald@.service.in
index c3bcb08533..8780783cf6 100644
--- a/units/systemd-journald@.service.in
+++ b/units/systemd-journald@.service.in
@@ -21,6 +21,7 @@ Conflicts=soft-reboot.target
[Service]
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID CAP_MAC_OVERRIDE
DevicePolicy=closed
+Environment=SYSTEMD_JOURNAL_COMPACT=0
ExecStart={{LIBEXECDIR}}/systemd-journald %i
FileDescriptorStoreMax=4224
Group=systemd-journal
--
2.34.1

View File

@ -1,242 +0,0 @@
https://bugs.gentoo.org/920331
https://github.com/systemd/systemd/issues/30535
From 4a9e03aa6bb2cbd23dac00f2b2a7642cc79eaade Mon Sep 17 00:00:00 2001
From: Daan De Meyer <daan.j.demeyer@gmail.com>
Date: Wed, 27 Sep 2023 11:55:59 +0200
Subject: [PATCH 1/2] core: Make private /dev read-only after populating it
---
src/core/namespace.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e2304f5d066da..d1153f7690140 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -995,6 +995,11 @@ static int mount_private_dev(MountEntry *m) {
if (r < 0)
log_debug_errno(r, "Failed to set up basic device tree at '%s', ignoring: %m", temporary_mount);
+ /* Make the bind mount read-only. */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, dev, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+
/* Create the /dev directory if missing. It is more likely to be missing when the service is started
* with RootDirectory. This is consistent with mount units creating the mount points when missing. */
(void) mkdir_p_label(mount_entry_path(m), 0755);
From cd7f3702eb47c82a50bf74c2b7c15c2e4e1f5c79 Mon Sep 17 00:00:00 2001
From: Daan De Meyer <daan.j.demeyer@gmail.com>
Date: Wed, 27 Sep 2023 10:52:50 +0200
Subject: [PATCH 2/2] core: Use a subdirectory of /run/ for PrivateDevices=
When we're starting early boot services such as systemd-userdbd.service,
/tmp might not yet be mounted, so let's use a directory in /run instead
which is guaranteed to be available.
---
src/core/execute.c | 1 +
src/core/namespace.c | 61 +++++++++++++++++++++++++++++----------
src/core/namespace.h | 2 ++
src/test/test-namespace.c | 1 +
src/test/test-ns.c | 1 +
5 files changed, 50 insertions(+), 16 deletions(-)
diff --git a/src/core/execute.c b/src/core/execute.c
index a52df64d01081..89c3868d55f6c 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -3307,6 +3307,7 @@ static int apply_mount_namespace(
extension_dir,
root_dir || root_image ? params->notify_socket : NULL,
host_os_release_stage,
+ params->runtime_scope,
error_path);
/* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
diff --git a/src/core/namespace.c b/src/core/namespace.c
index d1153f7690140..a0471ac8884bf 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -909,7 +909,19 @@ static int clone_device_node(
return 0;
}
-static int mount_private_dev(MountEntry *m) {
+static char *settle_runtime_dir(RuntimeScope scope) {
+ char *runtime_dir;
+
+ if (scope != RUNTIME_SCOPE_USER)
+ return strdup("/run/");
+
+ if (asprintf(&runtime_dir, "/run/user/" UID_FMT, geteuid()) < 0)
+ return NULL;
+
+ return runtime_dir;
+}
+
+static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
static const char devnodes[] =
"/dev/null\0"
"/dev/zero\0"
@@ -918,13 +930,21 @@ static int mount_private_dev(MountEntry *m) {
"/dev/urandom\0"
"/dev/tty\0";
- char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+ _cleanup_free_ char *runtime_dir = NULL, *temporary_mount = NULL;
const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
bool can_mknod = true;
int r;
assert(m);
+ runtime_dir = settle_runtime_dir(scope);
+ if (!runtime_dir)
+ return log_oom_debug();
+
+ temporary_mount = path_join(runtime_dir, "systemd/namespace-dev-XXXXXX");
+ if (!temporary_mount)
+ return log_oom_debug();
+
if (!mkdtemp(temporary_mount))
return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
@@ -1364,7 +1384,8 @@ static int apply_one_mount(
MountEntry *m,
const ImagePolicy *mount_image_policy,
const ImagePolicy *extension_image_policy,
- const NamespaceInfo *ns_info) {
+ const NamespaceInfo *ns_info,
+ RuntimeScope scope) {
_cleanup_free_ char *inaccessible = NULL;
bool rbind = true, make = false;
@@ -1379,8 +1400,7 @@ static int apply_one_mount(
switch (m->mode) {
case INACCESSIBLE: {
- _cleanup_free_ char *tmp = NULL;
- const char *runtime_dir;
+ _cleanup_free_ char *runtime_dir = NULL;
struct stat target;
/* First, get rid of everything that is below if there
@@ -1396,14 +1416,14 @@ static int apply_one_mount(
mount_entry_path(m));
}
- if (geteuid() == 0)
- runtime_dir = "/run";
- else {
- if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0)
- return -ENOMEM;
-
- runtime_dir = tmp;
- }
+ /* We don't pass the literal runtime scope through here but one based purely on our UID. This
+ * means that the root user's --user services will use the host's inaccessible inodes rather
+ * then root's private ones. This is preferable since it means device nodes that are
+ * overmounted to make them inaccessible will be overmounted with a device node, rather than
+ * an AF_UNIX socket inode. */
+ runtime_dir = settle_runtime_dir(geteuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER);
+ if (!runtime_dir)
+ return log_oom_debug();
r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
if (r < 0)
@@ -1523,7 +1543,7 @@ static int apply_one_mount(
break;
case PRIVATE_DEV:
- return mount_private_dev(m);
+ return mount_private_dev(m, scope);
case BIND_DEV:
return mount_bind_dev(m);
@@ -1824,6 +1844,7 @@ static int apply_mounts(
const NamespaceInfo *ns_info,
MountEntry *mounts,
size_t *n_mounts,
+ RuntimeScope scope,
char **symlinks,
char **error_path) {
@@ -1875,7 +1896,7 @@ static int apply_mounts(
break;
}
- r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info);
+ r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info, scope);
if (r < 0) {
if (error_path && mount_entry_path(m))
*error_path = strdup(mount_entry_path(m));
@@ -2030,6 +2051,7 @@ int setup_namespace(
const char *extension_dir,
const char *notify_socket,
const char *host_os_release_stage,
+ RuntimeScope scope,
char **error_path) {
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
@@ -2490,7 +2512,14 @@ int setup_namespace(
(void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
/* Now make the magic happen */
- r = apply_mounts(root, mount_image_policy, extension_image_policy, ns_info, mounts, &n_mounts, symlinks, error_path);
+ r = apply_mounts(root,
+ mount_image_policy,
+ extension_image_policy,
+ ns_info,
+ mounts, &n_mounts,
+ scope,
+ symlinks,
+ error_path);
if (r < 0)
goto finish;
diff --git a/src/core/namespace.h b/src/core/namespace.h
index b6132154c5132..581403d89826d 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -16,6 +16,7 @@ typedef struct MountImage MountImage;
#include "fs-util.h"
#include "macro.h"
#include "namespace-util.h"
+#include "runtime-scope.h"
#include "string-util.h"
typedef enum ProtectHome {
@@ -134,6 +135,7 @@ int setup_namespace(
const char *extension_dir,
const char *notify_socket,
const char *host_os_release_stage,
+ RuntimeScope scope,
char **error_path);
#define RUN_SYSTEMD_EMPTY "/run/systemd/empty"
diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c
index 25aafc35ca837..42ac65d08c87a 100644
--- a/src/test/test-namespace.c
+++ b/src/test/test-namespace.c
@@ -206,6 +206,7 @@ TEST(protect_kernel_logs) {
NULL,
NULL,
NULL,
+ RUNTIME_SCOPE_SYSTEM,
NULL);
assert_se(r == 0);
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 77afd2f6b9eb8..eb3afed9e1c66 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -108,6 +108,7 @@ int main(int argc, char *argv[]) {
NULL,
NULL,
NULL,
+ RUNTIME_SCOPE_SYSTEM,
NULL);
if (r < 0) {
log_error_errno(r, "Failed to set up namespace: %m");

View File

@ -0,0 +1,2 @@
# Do not enable any services if /etc is detected as empty.
disable *

View File

@ -1,27 +0,0 @@
# Sample nss configuration for systemd
# systemd-specific modules
# See the manual pages fore further information.
# nss-myhostname - host resolution for the local hostname
# nss-mymachines - host, user, group resolution for containers
# nss-resolve - host resolution using resolved
# nss-systemd - dynamic user/group resolution (DynamicUser in unit files)
passwd: files mymachines systemd
shadow: files
group: files mymachines systemd
gshadow: files
hosts: files mymachines resolve [!UNAVAIL=return] dns myhostname
networks: files
services: db files
protocols: db files
rpc: db files
ethers: db files
netmasks: files
netgroup: files
bootparams: files
automount: files
aliases: files

View File

@ -1,34 +0,0 @@
https://bugs.gentoo.org/896364
Workaround for bug in sys-kernel/dracut.
From 6b25470ee28843a49c50442e9d8a98edc842ceca Mon Sep 17 00:00:00 2001
From: Yu Watanabe <watanabe.yu+github@gmail.com>
Date: Mon, 20 Feb 2023 12:00:30 +0900
Subject: [PATCH] core/manager: run generators directly when we are in initrd
Some initrd system write files at ourside of /run, /etc, or other
allowed places. This is a kind of workaround, but in most cases, such
sandboxing is not necessary as the filesystem is on ramfs when we are in
initrd.
Fixes #26488.
---
src/core/manager.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/core/manager.c b/src/core/manager.c
index 7b394794b0d4..306477c6e6c2 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -3822,8 +3822,8 @@ static int manager_run_generators(Manager *m) {
/* If we are the system manager, we fork and invoke the generators in a sanitized mount namespace. If
* we are the user manager, let's just execute the generators directly. We might not have the
* necessary privileges, and the system manager has already mounted /tmp/ and everything else for us.
- */
- if (MANAGER_IS_USER(m)) {
+ * If we are in initrd, let's also execute the generators directly, as we are in ramfs. */
+ if (MANAGER_IS_USER(m) || in_initrd()) {
r = manager_execute_generators(m, paths, /* remount_ro= */ false);
goto finish;
}

View File

@ -0,0 +1,10 @@
# The list of directories is taken from Gentoo ebuild, where they use
# keepdir. The list isn't sorted, but tries to preserve the order of
# keepdir lines from Gentoo ebuild for easier comparisons. We skip the
# directories in /usr, though.
d /var/lib/systemd - - - - -
d /var/log/journal - - - - -
d /etc/sysctl.d - - - - -
# This seems to be our own addition.
d /var/log/journal/remote - systemd-journal-remote systemd-journal-remote - -

View File

@ -0,0 +1,2 @@
d /run/systemd/network - - - - -
L /run/systemd/network/resolv.conf - - - - ../resolve/resolv.conf

View File

@ -1,5 +0,0 @@
account include system-auth
session required pam_loginuid.so
session include system-auth
session optional pam_systemd.so

View File

@ -25,12 +25,13 @@ else
SRC_URI="https://github.com/systemd/${MY_PN}/archive/v${MY_PV}/${MY_P}.tar.gz"
if [[ ${PV} != *rc* ]] ; then
KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86"
# Flatcar: mark as stable
KEYWORDS="~alpha amd64 ~arm arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86"
fi
fi
inherit bash-completion-r1 linux-info meson-multilib optfeature pam python-single-r1
inherit secureboot systemd toolchain-funcs udev
inherit secureboot systemd tmpfiles toolchain-funcs udev
DESCRIPTION="System and service manager for Linux"
HOMEPAGE="http://systemd.io/"
@ -101,6 +102,14 @@ DEPEND="${COMMON_DEPEND}
PEFILE_DEPEND='dev-python/pefile[${PYTHON_USEDEP}]'
# baselayout-2.2 has /run
#
# Flatcar: Drop sec-policy/selinux-ntp from deps (under selinux use
# flag). The image stage fails with "Failed to resolve
# typeattributeset statement at
# /var/lib/selinux/mcs/tmp/modules/400/ntp/cil:120"
#
# Flatcar: Added a dep on sys-apps/kbd. It provides a loadkeys binary
# needed by dracut's systemd-vconsole-setup module.
RDEPEND="${COMMON_DEPEND}
>=acct-group/adm-0-r1
>=acct-group/wheel-0-r1
@ -129,13 +138,13 @@ RDEPEND="${COMMON_DEPEND}
>=acct-user/systemd-resolve-0-r1
>=acct-user/systemd-timesync-0-r1
>=sys-apps/baselayout-2.2
sys-apps/kbd
ukify? (
${PYTHON_DEPS}
$(python_gen_cond_dep "${PEFILE_DEPEND}")
)
selinux? (
sec-policy/selinux-base-policy[systemd]
sec-policy/selinux-ntp
)
sysv-utils? (
!sys-apps/openrc[sysv-utils(-)]
@ -186,11 +195,12 @@ QA_FLAGS_IGNORED="usr/lib/systemd/boot/efi/.*"
QA_EXECSTACK="usr/lib/systemd/boot/efi/*"
pkg_pretend() {
if use split-usr; then
eerror "Please complete the migration to merged-usr."
eerror "https://wiki.gentoo.org/wiki/Merge-usr"
die "systemd no longer supports split-usr"
fi
# Flatcar: We keep using split-usr for SDK.
# if use split-usr; then
# eerror "Please complete the migration to merged-usr."
# eerror "https://wiki.gentoo.org/wiki/Merge-usr"
# die "systemd no longer supports split-usr"
# fi
if [[ ${MERGE_TYPE} != buildonly ]]; then
local CONFIG_CHECK="~BLK_DEV_BSG ~CGROUPS
~CGROUP_BPF ~DEVTMPFS ~EPOLL ~FANOTIFY ~FHANDLE
@ -246,6 +256,17 @@ src_unpack() {
src_prepare() {
local PATCHES=(
"${FILESDIR}/systemd-test-process-util.patch"
"${FILESDIR}"/255-install-format-overflow.patch
# Flatcar: Adding our own patches here.
"${FILESDIR}/0001-wait-online-set-any-by-default.patch"
"${FILESDIR}/0002-networkd-default-to-kernel-IPForwarding-setting.patch"
"${FILESDIR}/0003-needs-update-don-t-require-strictly-newer-usr.patch"
"${FILESDIR}/0004-core-use-max-for-DefaultTasksMax.patch"
"${FILESDIR}/0005-systemd-Disable-SELinux-permissions-checks.patch"
"${FILESDIR}/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch"
"${FILESDIR}/0007-units-Keep-using-old-journal-file-format.patch"
# Flatcar: This can be dropped when updating to 256.
"${FILESDIR}/0008-sysext-Mutable-overlays.patch"
)
if ! use vanilla; then
@ -255,6 +276,23 @@ src_prepare() {
)
fi
# Fails with split-usr.
sed -i -e '2i exit 77' test/test-rpm-macros.sh || die
# Flatcar: The Kubelet takes /etc/resolv.conf for, e.g.,
# CoreDNS which has dnsPolicy "default", but unless the
# kubelet --resolv-conf flag is set to point to
# /run/systemd/resolve/resolv.conf this won't work with
# /etc/resolv.conf pointing to
# /run/systemd/resolve/stub-resolv.conf which configures
# 127.0.0.53. See
# https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#known-issues
# This means that users who need split DNS to work should
# point /etc/resolv.conf back to
# /run/systemd/resolve/stub-resolv.conf (and if using K8s
# configure the kubelet resolvConf variable/--resolv-conf flag
# to /run/systemd/resolve/resolv.conf).
sed -i -e 's,/run/systemd/resolve/stub-resolv.conf,/run/systemd/resolve/resolv.conf,' tmpfiles.d/systemd-resolve.conf || die
default
}
@ -267,16 +305,34 @@ src_configure() {
multilib-minimal_src_configure
}
# Flatcar: Our function, we use it in some places below.
get_rootprefix() {
usex split-usr "${EPREFIX:-/}" "${EPREFIX}/usr"
}
multilib_src_configure() {
local myconf=(
--localstatedir="${EPREFIX}/var"
# default is developer, bug 918671
-Dmode=release
-Dsupport-url="https://gentoo.org/support/"
# Flatcar: Point to our user mailing list.
-Dsupport-url="https://groups.google.com/forum/#!forum/flatcar-linux-user"
-Dpamlibdir="$(getpam_mod_dir)"
# avoid bash-completion dep
-Dbashcompletiondir="$(get_bashcompdir)"
-Dsplit-bin=false
# Flatcar: We keep using split-usr in SDK.
$(meson_use split-usr)
# Flatcar: Always set split-bin to true, we always
# have separate bin and sbin directories
-Dsplit-bin=true
# Flatcar: Use get_rootprefix. No functional change
# from upstream, just refactoring the common code used
# in some places.
#
# TODO: Drop -Drootprefix and -Drootlibdir we get rid
# of split-usr in SDK
-Drootprefix="$(get_rootprefix)"
-Drootlibdir="${EPREFIX}/usr/$(get_libdir)"
# Disable compatibility with sysvinit
-Dsysvinit-path=
-Dsysvrcnd-path=
@ -326,9 +382,11 @@ multilib_src_configure() {
$(meson_native_use_bool test dbus)
$(meson_native_use_bool ukify)
$(meson_native_use_bool xkb xkbcommon)
-Dntp-servers="0.gentoo.pool.ntp.org 1.gentoo.pool.ntp.org 2.gentoo.pool.ntp.org 3.gentoo.pool.ntp.org"
# Flatcar: Use our ntp servers.
-Dntp-servers="0.flatcar.pool.ntp.org 1.flatcar.pool.ntp.org 2.flatcar.pool.ntp.org 3.flatcar.pool.ntp.org"
# Breaks screen, tmux, etc.
-Ddefault-kill-user-processes=false
# Flatcar: TODO: Investigate if we want this.
-Dcreate-log-dirs=false
# multilib options
@ -352,6 +410,42 @@ multilib_src_configure() {
$(meson_native_true tmpfiles)
$(meson_native_true vconsole)
$(meson_native_enabled vmspawn)
# Flatcar: Specify this, or meson breaks due to no
# /etc/login.defs.
-Dsystem-gid-max=999
-Dsystem-uid-max=999
# Flatcar: DBus paths.
-Ddbussessionservicedir="${EPREFIX}/usr/share/dbus-1/services"
-Ddbussystemservicedir="${EPREFIX}/usr/share/dbus-1/system-services"
# Flatcar: PAM config directory.
-Dpamconfdir=/usr/share/pam.d
# Flatcar: The CoreOS epoch, Mon Jul 1 00:00:00 UTC
# 2013. Used by timesyncd as a sanity check for the
# minimum acceptable time. Explicitly set to avoid
# using the current build time.
-Dtime-epoch=1372636800
# Flatcar: No default name servers.
-Ddns-servers=
# Flatcar: Disable the "First Boot Wizard", it isn't
# very applicable to us.
-Dfirstboot=false
# Flatcar: Set latest network interface naming scheme
# for https://github.com/flatcar/Flatcar/issues/36
-Ddefault-net-naming-scheme=latest
# Flatcar: Combined log format: name plus description
-Dstatus-unit-format-default=combined
# Flatcar: Unported options, still needed?
-Dquotaon-path=/usr/sbin/quotaon
-Dquotacheck-path=/usr/sbin/quotacheck
-Ddefault-mdns=no
)
meson_src_configure "${myconf[@]}"
@ -374,7 +468,9 @@ multilib_src_install_all() {
mv "${ED}"/usr/share/doc/{systemd,${PF}} || die
einstalldocs
dodoc "${FILESDIR}"/nsswitch.conf
# Flatcar: Do not install sample nsswitch.conf, we don't
# provide it.
# dodoc "${FILESDIR}"/nsswitch.conf
insinto /usr/lib/tmpfiles.d
doins "${FILESDIR}"/legacy.conf
@ -392,6 +488,8 @@ multilib_src_install_all() {
# https://bugs.gentoo.org/761763
rm -r "${ED}"/usr/lib/sysusers.d || die
# Flatcar: Upstream uses keepdir commands to keep some empty
# directories. We use tmpfiles.
# Preserve empty dirs in /etc & /var, bug #437008
keepdir /etc/{binfmt.d,modules-load.d,tmpfiles.d}
keepdir /etc/kernel/install.d
@ -400,25 +498,134 @@ multilib_src_install_all() {
keepdir /etc/udev/hwdb.d
keepdir /usr/lib/systemd/{system-sleep,system-shutdown}
keepdir /usr/lib/{binfmt.d,modules-load.d}
keepdir /usr/lib/systemd/user-generators
keepdir /var/lib/systemd
keepdir /var/log/journal
# keepdir /usr/lib/systemd/{system-sleep,system-shutdown}
# keepdir /usr/lib/{binfmt.d,modules-load.d}
# keepdir /usr/lib/systemd/user-generators
# keepdir /var/lib/systemd
# keepdir /var/log/journal
if use pam; then
newpamd "${FILESDIR}"/systemd-user.pam systemd-user
fi
# if use pam; then
# newpamd "${FILESDIR}"/systemd-user.pam systemd-user
# fi
if use kernel-install; then
# Dummy config, remove to make room for sys-kernel/installkernel
rm "${ED}/usr/lib/kernel/install.conf" || die
fi
# Flatcar: Ensure journal directory has correct ownership/mode
# in inital image. This is fixed by systemd-tmpfiles *but*
# journald starts before that and will create the journal if
# the filesystem is already read-write. Conveniently the
# systemd Makefile sets this up completely wrong.
#
# Flatcar: TODO: Is this still a problem?
dodir /var/log/journal
fowners root:systemd-journal /var/log/journal
fperms 2755 /var/log/journal
# Flatcar: Don't prune systemd dirs.
dotmpfiles "${FILESDIR}"/systemd-flatcar.conf
# Flatcar: Add tmpfiles rule for resolv.conf. This path has
# changed after v213 so it must be handled here instead of
# baselayout now.
dotmpfiles "${FILESDIR}"/systemd-resolv.conf
# Flatcar: Don't default to graphical.target.
local unitdir=$(builddir_systemd_get_systemunitdir)
dosym multi-user.target "${unitdir}"/default.target
# Flatcar: Don't set any extra environment variables by default.
rm "${ED}/usr/lib/environment.d/99-environment.conf" || die
# Flatcar: These lines more or less follow the systemd's
# preset file (90-systemd.preset). We do it that way, to avoid
# putting symlinks in /etc. Please keep the lines in the same
# order as the "enable" lines appear in the preset file. For a
# single enable line in preset, there may be more lines if the
# unit file had Also: clause which has units we enable here
# too.
# Flatcar: enable remote-fs.target
builddir_systemd_enable_service multi-user.target remote-fs.target
# Flatcar: enable remote-cryptsetup.target
if use cryptsetup; then
builddir_systemd_enable_service multi-user.target remote-cryptsetup.target
fi
# Flatcar: enable machines.target
builddir_systemd_enable_service multi-user.target machines.target
# Flatcar: enable getty@.service
dodir "${unitdir}/getty.target.wants"
dosym ../getty@.service "${unitdir}/getty.target.wants/getty@tty1.service"
# Flatcar: enable systemd-timesyncd.service
builddir_systemd_enable_service sysinit.target systemd-timesyncd.service
# Flatcar: enable systemd-networkd.service (Also: systemd-networkd.socket, systemd-networkd-wait-online.service)
builddir_systemd_enable_service multi-user.target systemd-networkd.service
builddir_systemd_enable_service sockets.target systemd-networkd.socket
builddir_systemd_enable_service network-online.target systemd-networkd-wait-online.service
# Flatcar: enable systemd-network-generator.service
builddir_systemd_enable_service sysinit.target systemd-network-generator.service
# Flatcar: enable systemd-resolved.service
builddir_systemd_enable_service multi-user.target systemd-resolved.service
# Flatcar: enable systemd-homed.service (Also: systemd-userdbd.service [not enabled - has no WantedBy entry])
if use homed; then
builddir_systemd_enable_service multi-user.target systemd-homed.target
fi
# Flatcar: enable systemd-userdbd.socket
builddir_systemd_enable_service sockets.target systemd-userdbd.socket
# Flatcar: enable systemd-pstore.service
builddir_systemd_enable_service sysinit.target systemd-pstore.service
# Flatcar: enable systemd-boot-update.service
if use boot; then
builddir_systemd_enable_service sysinit.target systemd-boot-update.service
fi
# Flatcar: enable reboot.target (not enabled - has no WantedBy
# entry)
# Flatcar: enable systemd-sysext.service by default
builddir_systemd_enable_service sysinit.target systemd-sysext.service
# Flatcar: Use an empty preset file, because systemctl
# preset-all puts symlinks in /etc, not in /usr. We don't use
# /etc, because it is not autoupdated. We do the "preset" above.
rm "${ED}/usr/lib/systemd/system-preset/90-systemd.preset" || die
insinto /usr/lib/systemd/system-preset
doins "${FILESDIR}"/99-default.preset
# Flatcar: Do not ship distro-specific files (nsswitch.conf
# pam.d). This conflicts with our own configuration provided
# by baselayout.
rm -rf "${ED}"/usr/share/factory
sed -i "${ED}"/usr/lib/tmpfiles.d/etc.conf \
-e '/^C!* \/etc\/nsswitch\.conf/d' \
-e '/^C!* \/etc\/pam\.d/d' \
-e '/^C!* \/etc\/issue/d'
use ukify && python_fix_shebang "${ED}"
use boot && secureboot_auto_sign
}
# Flatcar: Our own version of systemd_get_systemunitdir, that returns
# a path inside /usr, not /etc.
builddir_systemd_get_systemunitdir() {
echo "$(get_rootprefix)/lib/systemd/system"
}
# Flatcar: Our own version of systemd_enable_service, that does
# operations inside /usr, not /etc.
builddir_systemd_enable_service() {
local target=${1}
local service=${2}
local ud=$(builddir_systemd_get_systemunitdir)
local destname=${service##*/}
dodir "${ud}"/"${target}".wants && \
dosym ../"${service}" "${ud}"/"${target}".wants/"${destname}"
if use boot; then
python_fix_shebang "${ED}"
secureboot_auto_sign
fi
}
migrate_locale() {
local envd_locale_def="${EROOT}/etc/env.d/02locale"
local envd_locale=( "${EROOT}"/etc/env.d/??locale )
@ -469,6 +676,23 @@ pkg_preinst() {
dosym ../../../etc/sysctl.conf /usr/lib/sysctl.d/99-sysctl.conf
fi
# Flatcar: This used to be in upstream ebuild, but now it's
# gone. We should drop it once we get rid of split-usr in SDK.
if ! use split-usr; then
local dir
# Flatcar: We still use separate bin and sbin, so drop usr/sbin from the list.
for dir in bin sbin lib; do
if [[ ! -L ${EROOT}/${dir} ]]; then
eerror "'${EROOT}/${dir}' is not a symbolic link."
FAIL=1
fi
done
if [[ ${FAIL} ]]; then
eerror "Migration to system layout with merged directories must be performed before"
eerror "installing ${CATEGORY}/${PN} with USE=\"-split-usr\" to avoid run-time breakage."
die "System layout with split directories still used"
fi
fi
if ! use boot && has_version "sys-apps/systemd[gnuefi(-)]"; then
ewarn "The 'gnuefi' USE flag has been renamed to 'boot'."
ewarn "Make sure to enable the 'boot' USE flag if you use systemd-boot."
@ -488,13 +712,15 @@ pkg_postinst() {
# between OpenRC & systemd
migrate_locale
if [[ -z ${REPLACING_VERSIONS} ]]; then
if type systemctl &>/dev/null; then
systemctl --root="${ROOT:-/}" enable getty@.service remote-fs.target || FAIL=1
fi
elog "To enable a useful set of services, run the following:"
elog " systemctl preset-all --preset-mode=enable-only"
fi
# Flatcar: We enable getty and remote-fs targets in /usr
# ourselves above.
# if [[ -z ${REPLACING_VERSIONS} ]]; then
# if type systemctl &>/dev/null; then
# systemctl --root="${ROOT:-/}" enable getty@.service remote-fs.target || FAIL=1
# fi
# elog "To enable a useful set of services, run the following:"
# elog " systemctl preset-all --preset-mode=enable-only"
# fi
if [[ -L ${EROOT}/var/lib/systemd/timesync ]]; then
rm "${EROOT}/var/lib/systemd/timesync"