From 80ca8ff7135b0950b83d2ceaa32ee1eacce049e0 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 6 Jun 2024 16:45:49 +0400 Subject: [PATCH] fix: update the cgroups for Talos core services There was a bit of a mess here which worked fine until we bumped runc/containerd, and the problem shows up in Talos-in-Kubernetes tests. Use consistently `runner.WithCgroupPath`, as it handles cgroup nesting for cases when Talos runs in a container. Assign each service its own unique cgroup. Signed-off-by: Andrey Smirnov --- .../pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go | 2 ++ internal/app/machined/pkg/system/services/apid.go | 1 + internal/app/machined/pkg/system/services/etcd.go | 1 + internal/app/machined/pkg/system/services/extension.go | 2 +- internal/app/machined/pkg/system/services/kubelet.go | 2 +- internal/app/machined/pkg/system/services/trustd.go | 1 + pkg/machinery/constants/constants.go | 9 +++++++++ 7 files changed, 16 insertions(+), 2 deletions(-) diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go index dfca4b921..138e79449 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go @@ -170,6 +170,8 @@ func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, stri return fmt.Errorf("error initializing cgroups root path: %w", err) } + logger.Printf("using cgroups root: %s", cgroup.Root()) + groups := []struct { name string resources *cgroup2.Resources diff --git a/internal/app/machined/pkg/system/services/apid.go b/internal/app/machined/pkg/system/services/apid.go index 937065f28..1f8e55547 100644 --- a/internal/app/machined/pkg/system/services/apid.go +++ b/internal/app/machined/pkg/system/services/apid.go @@ -192,6 +192,7 @@ func (o *APID) Runner(r runtime.Runtime) (runner.Runner, error) { runner.WithLoggingManager(r.Logging()), runner.WithContainerdAddress(constants.SystemContainerdAddress), runner.WithEnv(env), + runner.WithCgroupPath(constants.CgroupApid), runner.WithOCISpecOpts( oci.WithDroppedCapabilities(cap.Known()), oci.WithHostNamespace(specs.NetworkNamespace), diff --git a/internal/app/machined/pkg/system/services/etcd.go b/internal/app/machined/pkg/system/services/etcd.go index 90b54ea0f..0bc7baf65 100644 --- a/internal/app/machined/pkg/system/services/etcd.go +++ b/internal/app/machined/pkg/system/services/etcd.go @@ -218,6 +218,7 @@ func (e *Etcd) Runner(r runtime.Runtime) (runner.Runner, error) { runner.WithNamespace(constants.SystemContainerdNamespace), runner.WithContainerImage(e.imgRef), runner.WithEnv(env), + runner.WithCgroupPath(constants.CgroupEtcd), runner.WithOCISpecOpts( oci.WithDroppedCapabilities(cap.Known()), oci.WithHostNamespace(specs.NetworkNamespace), diff --git a/internal/app/machined/pkg/system/services/extension.go b/internal/app/machined/pkg/system/services/extension.go index 0d7fbfcf1..99b3fd356 100644 --- a/internal/app/machined/pkg/system/services/extension.go +++ b/internal/app/machined/pkg/system/services/extension.go @@ -113,7 +113,6 @@ func (svc *Extension) getOCIOptions(envVars []string, mounts []specs.Mount) []oc ociOpts := []oci.SpecOpts{ oci.WithRootFSPath(filepath.Join(constants.ExtensionServiceRootfsPath, svc.Spec.Name)), containerd.WithRootfsPropagation(svc.Spec.Container.Security.RootfsPropagation), - oci.WithCgroup(filepath.Join(constants.CgroupExtensions, svc.Spec.Name)), oci.WithMounts(mounts), oci.WithHostNamespace(specs.NetworkNamespace), oci.WithSelinuxLabel(""), @@ -216,6 +215,7 @@ func (svc *Extension) Runner(r runtime.Runtime) (runner.Runner, error) { runner.WithContainerdAddress(constants.SystemContainerdAddress), runner.WithEnv(environment.Get(r.Config())), runner.WithOCISpecOpts(ociSpecOpts...), + runner.WithCgroupPath(filepath.Join(constants.CgroupExtensions, svc.Spec.Name)), runner.WithOOMScoreAdj(-600), ), restart.WithType(restartType), diff --git a/internal/app/machined/pkg/system/services/kubelet.go b/internal/app/machined/pkg/system/services/kubelet.go index ad1c14ee5..0a6a25df3 100644 --- a/internal/app/machined/pkg/system/services/kubelet.go +++ b/internal/app/machined/pkg/system/services/kubelet.go @@ -157,9 +157,9 @@ func (k *Kubelet) Runner(r runtime.Runtime) (runner.Runner, error) { runner.WithNamespace(constants.SystemContainerdNamespace), runner.WithContainerImage(k.imgRef), runner.WithEnv(environment.Get(r.Config())), + runner.WithCgroupPath(constants.CgroupKubelet), runner.WithOCISpecOpts( containerd.WithRootfsPropagation("shared"), - oci.WithCgroup(constants.CgroupKubelet), oci.WithMounts(mounts), oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostNamespace(specs.PIDNamespace), diff --git a/internal/app/machined/pkg/system/services/trustd.go b/internal/app/machined/pkg/system/services/trustd.go index d42bdb9b1..4c344777e 100644 --- a/internal/app/machined/pkg/system/services/trustd.go +++ b/internal/app/machined/pkg/system/services/trustd.go @@ -155,6 +155,7 @@ func (t *Trustd) Runner(r runtime.Runtime) (runner.Runner, error) { runner.WithLoggingManager(r.Logging()), runner.WithContainerdAddress(constants.SystemContainerdAddress), runner.WithEnv(env), + runner.WithCgroupPath(constants.CgroupTrustd), runner.WithOCISpecOpts( containerd.WithMemoryLimit(int64(1000000*512)), oci.WithDroppedCapabilities(cap.Known()), diff --git a/pkg/machinery/constants/constants.go b/pkg/machinery/constants/constants.go index cb0345961..d13e2d8d0 100644 --- a/pkg/machinery/constants/constants.go +++ b/pkg/machinery/constants/constants.go @@ -666,6 +666,12 @@ const ( // CgroupSystemRuntime is the cgroup name for containerd runtime processes. CgroupSystemRuntime = CgroupSystem + "/runtime" + // CgroupApid is the cgroup name for apid runtime processes. + CgroupApid = CgroupSystem + "/apid" + + // CgroupTrustd is the cgroup name for trustd runtime processes. + CgroupTrustd = CgroupSystem + "/trustd" + // CgroupUdevd is the cgroup name for udevd runtime processes. CgroupUdevd = CgroupSystem + "/udevd" @@ -681,6 +687,9 @@ const ( // CgroupPodRuntimeReservedMemory is the hard memory protection for the cri runtime processes. CgroupPodRuntimeReservedMemory = 128 * 1024 * 1024 + // CgroupEtcd is the cgroup name for etcd process. + CgroupEtcd = "/podruntime/etcd" + // CgroupKubelet is the cgroup name for kubelet process. CgroupKubelet = "/podruntime/kubelet"