From e0915daa4a84ea0494625de81402457a19a731a4 Mon Sep 17 00:00:00 2001 From: Joe Hohertz Date: Tue, 5 Nov 2019 15:39:25 -0500 Subject: [PATCH] add patches for CFS throttling fixes --- .../sys-kernel/coreos-kernel-4.19.81 | 13 - .../sys-kernel/coreos-kernel-4.19.81-r1 | 13 + ...ules-4.19.81 => coreos-modules-4.19.81-r1} | 4 +- ...rces-4.19.81 => coreos-sources-4.19.81-r1} | 2 +- ...ebuild => coreos-kernel-4.19.81-r1.ebuild} | 2 +- ...build => coreos-modules-4.19.81-r1.ebuild} | 2 +- ...build => coreos-sources-4.19.81-r1.ebuild} | 2 + ...lative-path-for-KBUILD_SRC-from-CURD.patch | 4 +- ...kefile-Don-t-fail-on-fallthrough-wit.patch | 4 +- ...d-nf_conntrack_ipv4-compat-module-fo.patch | 4 +- ...ow-cpu-usage-with-high-throttling-by.patch | 287 ++++++++++++++++++ ...ix-Wunused-but-set-variable-warnings.patch | 85 ++++++ 12 files changed, 398 insertions(+), 24 deletions(-) delete mode 100644 sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81 create mode 100644 sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81-r1 rename sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/{coreos-modules-4.19.81 => coreos-modules-4.19.81-r1} (91%) rename sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/{coreos-sources-4.19.81 => coreos-sources-4.19.81-r1} (98%) rename sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/{coreos-kernel-4.19.81.ebuild => coreos-kernel-4.19.81-r1.ebuild} (99%) rename sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/{coreos-modules-4.19.81.ebuild => coreos-modules-4.19.81-r1.ebuild} (98%) rename sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/{coreos-sources-4.19.81.ebuild => coreos-sources-4.19.81-r1.ebuild} (88%) create mode 100644 sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0004-sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch create mode 100644 sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0005-sched-fair-Fix-Wunused-but-set-variable-warnings.patch diff --git a/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81 b/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81 deleted file mode 100644 index e744f794ba..0000000000 --- a/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81 +++ /dev/null @@ -1,13 +0,0 @@ -DEFINED_PHASES=compile configure install prepare pretend setup unpack -DEPEND==sys-kernel/coreos-modules-4.19.81 app-arch/gzip app-shells/bash coreos-base/coreos-init:= sys-apps/coreutils sys-apps/findutils sys-apps/grep sys-apps/ignition:= sys-apps/less sys-apps/nvme-cli sys-apps/sed sys-apps/shadow sys-apps/systemd[cryptsetup] sys-apps/seismograph sys-apps/util-linux sys-fs/btrfs-progs sys-fs/e2fsprogs sys-fs/mdadm sys-fs/xfsprogs >=sys-kernel/coreos-firmware-20180103-r1:= >=sys-kernel/bootengine-0.0.4:= sys-kernel/dracut virtual/udev amd64? ( sys-firmware/intel-microcode:= ) =sys-kernel/coreos-sources-4.19.81 -DESCRIPTION=CoreOS Linux kernel -EAPI=5 -HOMEPAGE=http://www.kernel.org -IUSE=kernel_linux -KEYWORDS=amd64 -LICENSE=GPL-2 freedist -RDEPEND==sys-kernel/coreos-modules-4.19.81 -RESTRICT=binchecks strip -SLOT=0/4.19.81 -_eclasses_=coreos-kernel 588460f26859c559935beb69c53cb5c0 eapi7-ver 756b3f27d8e46131d5cf3c51bd876446 linux-info 953c3b1c472dcadbf62098a9301327f2 multilib 1d91b03d42ab6308b5f4f6b598ed110e toolchain-funcs 8c7f9d80beedd16f2e5a7f612c609529 -_md5_=1a19bf8157015b4dcd933d7281d1920d diff --git a/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81-r1 b/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81-r1 new file mode 100644 index 0000000000..36bc8f7677 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-kernel-4.19.81-r1 @@ -0,0 +1,13 @@ +DEFINED_PHASES=compile configure install prepare pretend setup unpack +DEPEND==sys-kernel/coreos-modules-4.19.81-r1 app-arch/gzip app-shells/bash coreos-base/coreos-init:= sys-apps/coreutils sys-apps/findutils sys-apps/grep sys-apps/ignition:= sys-apps/less sys-apps/nvme-cli sys-apps/sed sys-apps/shadow sys-apps/systemd[cryptsetup] sys-apps/seismograph sys-apps/util-linux sys-fs/btrfs-progs sys-fs/e2fsprogs sys-fs/mdadm sys-fs/xfsprogs >=sys-kernel/coreos-firmware-20180103-r1:= >=sys-kernel/bootengine-0.0.4:= sys-kernel/dracut virtual/udev amd64? ( sys-firmware/intel-microcode:= ) =sys-kernel/coreos-sources-4.19.81-r1 +DESCRIPTION=CoreOS Linux kernel +EAPI=5 +HOMEPAGE=http://www.kernel.org +IUSE=kernel_linux +KEYWORDS=amd64 +LICENSE=GPL-2 freedist +RDEPEND==sys-kernel/coreos-modules-4.19.81-r1 +RESTRICT=binchecks strip +SLOT=0/4.19.81-r1 +_eclasses_=coreos-kernel 588460f26859c559935beb69c53cb5c0 eapi7-ver 756b3f27d8e46131d5cf3c51bd876446 linux-info 953c3b1c472dcadbf62098a9301327f2 multilib 1d91b03d42ab6308b5f4f6b598ed110e toolchain-funcs 8c7f9d80beedd16f2e5a7f612c609529 +_md5_=1a19bf8157015b4dcd933d7281d1920d diff --git a/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81 b/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81-r1 similarity index 91% rename from sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81 rename to sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81-r1 index 88820ea8c0..a565a3a9e7 100644 --- a/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81 +++ b/sdk_container/src/third_party/coreos-overlay/metadata/md5-cache/sys-kernel/coreos-modules-4.19.81-r1 @@ -1,5 +1,5 @@ DEFINED_PHASES=compile configure install postinst prepare pretend setup unpack -DEPEND==sys-kernel/coreos-sources-4.19.81 +DEPEND==sys-kernel/coreos-sources-4.19.81-r1 DESCRIPTION=CoreOS Linux kernel modules EAPI=5 HOMEPAGE=http://www.kernel.org @@ -8,6 +8,6 @@ KEYWORDS=amd64 LICENSE=GPL-2 freedist RDEPEND=!=sys-libs/ncurses-5.2 virtual/libelf ) RESTRICT=binchecks strip -SLOT=4.19.81 +SLOT=4.19.81-r1 SRC_URI=mirror://kernel/linux/kernel/v4.x/patch-4.19.81.xz mirror://kernel/linux/kernel/v4.x/linux-4.19.tar.xz _eclasses_=desktop b1d22ac8bdd4679ab79c71aca235009d eapi7-ver 756b3f27d8e46131d5cf3c51bd876446 epatch a1bf4756dba418a7238f3be0cb010c54 estack 43ddf5aaffa7a8d0482df54d25a66a1f eutils 6e6c2737b59a4b982de6fb3ecefd87f8 kernel-2 b2fdd65b5edb72d87dedaa82df19553c ltprune 2729691420b6deeda2a90b1f1183fb55 multilib 1d91b03d42ab6308b5f4f6b598ed110e preserve-libs ef207dc62baddfddfd39a164d9797648 python-any-r1 4900ae970f827a22d33d41bd8b8f9ace python-utils-r1 08e17157a6807add7db1f8d01e7e391f toolchain-funcs 8c7f9d80beedd16f2e5a7f612c609529 vcs-clean 2a0f74a496fa2b1552c4f3398258b7bf _md5_=d79cabc85557a16224cb73e5803fb741 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81.ebuild b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81-r1.ebuild similarity index 99% rename from sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81.ebuild rename to sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81-r1.ebuild index 6981197793..db5b45613e 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81.ebuild +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-kernel/coreos-kernel-4.19.81-r1.ebuild @@ -2,7 +2,7 @@ # Distributed under the terms of the GNU General Public License v2 EAPI=5 -COREOS_SOURCE_REVISION="" +COREOS_SOURCE_REVISION="-r1" inherit coreos-kernel DESCRIPTION="CoreOS Linux kernel" diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81.ebuild b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81-r1.ebuild similarity index 98% rename from sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81.ebuild rename to sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81-r1.ebuild index bba80e7e2d..2ec41a9dc5 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81.ebuild +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-modules/coreos-modules-4.19.81-r1.ebuild @@ -2,7 +2,7 @@ # Distributed under the terms of the GNU General Public License v2 EAPI=5 -COREOS_SOURCE_REVISION="" +COREOS_SOURCE_REVISION="-r1" inherit coreos-kernel savedconfig DESCRIPTION="CoreOS Linux kernel modules" diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81.ebuild b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81-r1.ebuild similarity index 88% rename from sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81.ebuild rename to sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81-r1.ebuild index 2eb2f2f26c..5fb516a5d2 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81.ebuild +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/coreos-sources-4.19.81-r1.ebuild @@ -34,4 +34,6 @@ UNIPATCH_LIST=" ${PATCH_DIR}/z0001-kbuild-derive-relative-path-for-KBUILD_SRC-from-CURD.patch \ ${PATCH_DIR}/z0002-tools-objtool-Makefile-Don-t-fail-on-fallthrough-wit.patch \ ${PATCH_DIR}/z0003-net-netfilter-add-nf_conntrack_ipv4-compat-module-fo.patch \ + ${PATCH_DIR}/z0004-sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch \ + ${PATCH_DIR}/z0005-sched-fair-Fix-Wunused-but-set-variable-warnings.patch \ " diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0001-kbuild-derive-relative-path-for-KBUILD_SRC-from-CURD.patch b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0001-kbuild-derive-relative-path-for-KBUILD_SRC-from-CURD.patch index 508f028d81..1ae0fb4456 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0001-kbuild-derive-relative-path-for-KBUILD_SRC-from-CURD.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0001-kbuild-derive-relative-path-for-KBUILD_SRC-from-CURD.patch @@ -1,7 +1,7 @@ From d1500b219a53aa192005c8337bb772ad162772a9 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Wed, 25 Nov 2015 02:59:45 -0800 -Subject: [PATCH 1/3] kbuild: derive relative path for KBUILD_SRC from CURDIR +Subject: [PATCH 1/5] kbuild: derive relative path for KBUILD_SRC from CURDIR This enables relocating source and build trees to different roots, provided they stay reachable relative to one another. Useful for @@ -26,5 +26,5 @@ index 3c146e8d93dc..ec5034c27382 100644 # Leave processing to above invocation of make -- -2.21.0 +2.17.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0002-tools-objtool-Makefile-Don-t-fail-on-fallthrough-wit.patch b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0002-tools-objtool-Makefile-Don-t-fail-on-fallthrough-wit.patch index 2c151f751e..9f75dbddc7 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0002-tools-objtool-Makefile-Don-t-fail-on-fallthrough-wit.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0002-tools-objtool-Makefile-Don-t-fail-on-fallthrough-wit.patch @@ -1,7 +1,7 @@ From 22fcdbfe69c4df8a60d34615a931d4f40f13bef4 Mon Sep 17 00:00:00 2001 From: David Michael Date: Thu, 8 Feb 2018 21:23:12 -0500 -Subject: [PATCH 2/3] tools/objtool/Makefile: Don't fail on fallthrough with +Subject: [PATCH 2/5] tools/objtool/Makefile: Don't fail on fallthrough with new GCCs --- @@ -23,5 +23,5 @@ index ed61fb3a46c0..146ec9ec6737 100644 CFLAGS += -I$(srctree)/tools/include/ -- -2.21.0 +2.17.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0003-net-netfilter-add-nf_conntrack_ipv4-compat-module-fo.patch b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0003-net-netfilter-add-nf_conntrack_ipv4-compat-module-fo.patch index e5a833a27f..48aba760cf 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0003-net-netfilter-add-nf_conntrack_ipv4-compat-module-fo.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0003-net-netfilter-add-nf_conntrack_ipv4-compat-module-fo.patch @@ -1,7 +1,7 @@ From 904854feef2a05d28b51a4a0187058a5c90b0901 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Fri, 26 Oct 2018 17:00:56 -0700 -Subject: [PATCH 3/3] net/netfilter: add nf_conntrack_ipv4 compat module for +Subject: [PATCH 3/5] net/netfilter: add nf_conntrack_ipv4 compat module for kube-proxy kube-proxy won't enable ipvs unless it can modprobe nf_conntrack_ipv4 and @@ -83,5 +83,5 @@ index 000000000000..8308772022c6 +MODULE_DESCRIPTION("kube-proxy compatibility wrapper for nf_conntrack.ko"); +MODULE_LICENSE("GPL"); -- -2.21.0 +2.17.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0004-sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0004-sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch new file mode 100644 index 0000000000..61528692f8 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0004-sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch @@ -0,0 +1,287 @@ +From 382ea3deece2ce4ef7571b5b6635e0fa608fba42 Mon Sep 17 00:00:00 2001 +From: Joe Hohertz +Date: Tue, 5 Nov 2019 15:00:41 -0500 +Subject: [PATCH 4/5] sched/fair: Fix low cpu usage with high throttling by + removing expiration of cpu-local slices + +It has been observed, that highly-threaded, non-cpu-bound applications +running under cpu.cfs_quota_us constraints can hit a high percentage of +periods throttled while simultaneously not consuming the allocated +amount of quota. This use case is typical of user-interactive non-cpu +bound applications, such as those running in kubernetes or mesos when +run on multiple cpu cores. + +This has been root caused to cpu-local run queue being allocated per cpu +bandwidth slices, and then not fully using that slice within the period. +At which point the slice and quota expires. This expiration of unused +slice results in applications not being able to utilize the quota for +which they are allocated. + +The non-expiration of per-cpu slices was recently fixed by +'commit 512ac999d275 ("sched/fair: Fix bandwidth timer clock drift +condition")'. Prior to that it appears that this had been broken since +at least 'commit 51f2176d74ac ("sched/fair: Fix unlocked reads of some +cfs_b->quota/period")' which was introduced in v3.16-rc1 in 2014. That +added the following conditional which resulted in slices never being +expired. + +if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { + /* extend local deadline, drift is bounded above by 2 ticks */ + cfs_rq->runtime_expires += TICK_NSEC; + +Because this was broken for nearly 5 years, and has recently been fixed +and is now being noticed by many users running kubernetes +(https://github.com/kubernetes/kubernetes/issues/67577) it is my opinion +that the mechanisms around expiring runtime should be removed +altogether. + +This allows quota already allocated to per-cpu run-queues to live longer +than the period boundary. This allows threads on runqueues that do not +use much CPU to continue to use their remaining slice over a longer +period of time than cpu.cfs_period_us. However, this helps prevent the +above condition of hitting throttling while also not fully utilizing +your cpu quota. + +This theoretically allows a machine to use slightly more than its +allotted quota in some periods. This overflow would be bounded by the +remaining quota left on each per-cpu runqueueu. This is typically no +more than min_cfs_rq_runtime=1ms per cpu. For CPU bound tasks this will +change nothing, as they should theoretically fully utilize all of their +quota in each period. For user-interactive tasks as described above this +provides a much better user/application experience as their cpu +utilization will more closely match the amount they requested when they +hit throttling. This means that cpu limits no longer strictly apply per +period for non-cpu bound applications, but that they are still accurate +over longer timeframes. + +This greatly improves performance of high-thread-count, non-cpu bound +applications with low cfs_quota_us allocation on high-core-count +machines. In the case of an artificial testcase (10ms/100ms of quota on +80 CPU machine), this commit resulted in almost 30x performance +improvement, while still maintaining correct cpu quota restrictions. +That testcase is available at https://github.com/indeedeng/fibtest. + +Fixes: 512ac999d275 ("sched/fair: Fix bandwidth timer clock drift condition") +Signed-off-by: Dave Chiluk +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Phil Auld +Reviewed-by: Ben Segall +Cc: Ingo Molnar +Cc: John Hammond +Cc: Jonathan Corbet +Cc: Kyle Anderson +Cc: Gabriel Munos +Cc: Peter Oskolkov +Cc: Cong Wang +Cc: Brendan Gregg +Link: https://lkml.kernel.org/r/1563900266-19734-2-git-send-email-chiluk+linux@indeed.com +--- + kernel/sched/fair.c | 72 +++++--------------------------------------- + kernel/sched/sched.h | 4 --- + 2 files changed, 7 insertions(+), 69 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 32d2dac680a7..cf0f4760d399 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4320,8 +4320,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) + + now = sched_clock_cpu(smp_processor_id()); + cfs_b->runtime = cfs_b->quota; +- cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); +- cfs_b->expires_seq++; + } + + static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +@@ -4343,8 +4341,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) + { + struct task_group *tg = cfs_rq->tg; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); +- u64 amount = 0, min_amount, expires; +- int expires_seq; ++ u64 amount = 0, min_amount; + + /* note: this is a positive sum as runtime_remaining <= 0 */ + min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; +@@ -4361,61 +4358,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) + cfs_b->idle = 0; + } + } +- expires_seq = cfs_b->expires_seq; +- expires = cfs_b->runtime_expires; + raw_spin_unlock(&cfs_b->lock); + + cfs_rq->runtime_remaining += amount; +- /* +- * we may have advanced our local expiration to account for allowed +- * spread between our sched_clock and the one on which runtime was +- * issued. +- */ +- if (cfs_rq->expires_seq != expires_seq) { +- cfs_rq->expires_seq = expires_seq; +- cfs_rq->runtime_expires = expires; +- } + + return cfs_rq->runtime_remaining > 0; + } + +-/* +- * Note: This depends on the synchronization provided by sched_clock and the +- * fact that rq->clock snapshots this value. +- */ +-static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) +-{ +- struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); +- +- /* if the deadline is ahead of our clock, nothing to do */ +- if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0)) +- return; +- +- if (cfs_rq->runtime_remaining < 0) +- return; +- +- /* +- * If the local deadline has passed we have to consider the +- * possibility that our sched_clock is 'fast' and the global deadline +- * has not truly expired. +- * +- * Fortunately we can check determine whether this the case by checking +- * whether the global deadline(cfs_b->expires_seq) has advanced. +- */ +- if (cfs_rq->expires_seq == cfs_b->expires_seq) { +- /* extend local deadline, drift is bounded above by 2 ticks */ +- cfs_rq->runtime_expires += TICK_NSEC; +- } else { +- /* global deadline is ahead, expiration has passed */ +- cfs_rq->runtime_remaining = 0; +- } +-} +- + static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) + { + /* dock delta_exec before expiring quota (as it could span periods) */ + cfs_rq->runtime_remaining -= delta_exec; +- expire_cfs_rq_runtime(cfs_rq); + + if (likely(cfs_rq->runtime_remaining > 0)) + return; +@@ -4600,8 +4553,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + resched_curr(rq); + } + +-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, +- u64 remaining, u64 expires) ++static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) + { + struct cfs_rq *cfs_rq; + u64 runtime; +@@ -4626,7 +4578,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + remaining -= runtime; + + cfs_rq->runtime_remaining += runtime; +- cfs_rq->runtime_expires = expires; + + /* we check whether we're throttled above */ + if (cfs_rq->runtime_remaining > 0) +@@ -4651,7 +4602,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + */ + static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) + { +- u64 runtime, runtime_expires; ++ u64 runtime; + int throttled; + + /* no need to continue the timer with no bandwidth constraint */ +@@ -4679,8 +4630,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) + /* account preceding periods in which throttling occurred */ + cfs_b->nr_throttled += overrun; + +- runtime_expires = cfs_b->runtime_expires; +- + /* + * This check is repeated as we are holding onto the new bandwidth while + * we unthrottle. This can potentially race with an unthrottled group +@@ -4693,8 +4642,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); + /* we can't nest cfs_b->lock while distributing bandwidth */ +- runtime = distribute_cfs_runtime(cfs_b, runtime, +- runtime_expires); ++ runtime = distribute_cfs_runtime(cfs_b, runtime); + raw_spin_lock(&cfs_b->lock); + + cfs_b->distribute_running = 0; +@@ -4771,8 +4719,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) + return; + + raw_spin_lock(&cfs_b->lock); +- if (cfs_b->quota != RUNTIME_INF && +- cfs_rq->runtime_expires == cfs_b->runtime_expires) { ++ if (cfs_b->quota != RUNTIME_INF) { + cfs_b->runtime += slack_runtime; + + /* we are under rq->lock, defer unthrottling using a timer */ +@@ -4804,7 +4751,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) + static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) + { + u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); +- u64 expires; + + /* confirm we're still not at a refresh boundary */ + raw_spin_lock(&cfs_b->lock); +@@ -4821,7 +4767,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) + if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) + runtime = cfs_b->runtime; + +- expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + +@@ -4830,11 +4775,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) + if (!runtime) + return; + +- runtime = distribute_cfs_runtime(cfs_b, runtime, expires); ++ runtime = distribute_cfs_runtime(cfs_b, runtime); + + raw_spin_lock(&cfs_b->lock); +- if (expires == cfs_b->runtime_expires) +- cfs_b->runtime -= min(runtime, cfs_b->runtime); ++ cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; + raw_spin_unlock(&cfs_b->lock); + } +@@ -4989,8 +4933,6 @@ void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) + + cfs_b->period_active = 1; + overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); +- cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period); +- cfs_b->expires_seq++; + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); + } + +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 9a7c3d08b39f..62058fd6dcf6 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -334,8 +334,6 @@ struct cfs_bandwidth { + u64 quota; + u64 runtime; + s64 hierarchical_quota; +- u64 runtime_expires; +- int expires_seq; + + short idle; + short period_active; +@@ -555,8 +553,6 @@ struct cfs_rq { + + #ifdef CONFIG_CFS_BANDWIDTH + int runtime_enabled; +- int expires_seq; +- u64 runtime_expires; + s64 runtime_remaining; + + u64 throttled_clock; +-- +2.17.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0005-sched-fair-Fix-Wunused-but-set-variable-warnings.patch b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0005-sched-fair-Fix-Wunused-but-set-variable-warnings.patch new file mode 100644 index 0000000000..13f8d01056 --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-kernel/coreos-sources/files/4.19/z0005-sched-fair-Fix-Wunused-but-set-variable-warnings.patch @@ -0,0 +1,85 @@ +From 00273c35bff525e75c99a8576541ba29761ee8b8 Mon Sep 17 00:00:00 2001 +From: Joe Hohertz +Date: Tue, 5 Nov 2019 15:04:09 -0500 +Subject: [PATCH 5/5] sched/fair: Fix -Wunused-but-set-variable warnings + +Commit: + + de53fd7aedb1 ("sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices") + +introduced a few compilation warnings: + + kernel/sched/fair.c: In function '__refill_cfs_bandwidth_runtime': + kernel/sched/fair.c:4365:6: warning: variable 'now' set but not used [-Wunused-but-set-variable] + kernel/sched/fair.c: In function 'start_cfs_bandwidth': + kernel/sched/fair.c:4992:6: warning: variable 'overrun' set but not used [-Wunused-but-set-variable] + +Also, __refill_cfs_bandwidth_runtime() does no longer update the +expiration time, so fix the comments accordingly. + +Signed-off-by: Qian Cai +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Ben Segall +Reviewed-by: Dave Chiluk +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: pauld@redhat.com +Fixes: de53fd7aedb1 ("sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices") +Link: https://lkml.kernel.org/r/1566326455-8038-1-git-send-email-cai@lca.pw +Signed-off-by: Ingo Molnar +--- + kernel/sched/fair.c | 19 ++++++------------- + 1 file changed, 6 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index cf0f4760d399..e5e8f6721872 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4305,21 +4305,16 @@ static inline u64 sched_cfs_bandwidth_slice(void) + } + + /* +- * Replenish runtime according to assigned quota and update expiration time. +- * We use sched_clock_cpu directly instead of rq->clock to avoid adding +- * additional synchronization around rq->lock. ++ * Replenish runtime according to assigned quota. We use sched_clock_cpu ++ * directly instead of rq->clock to avoid adding additional synchronization ++ * around rq->lock. + * + * requires cfs_b->lock + */ + void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) + { +- u64 now; +- +- if (cfs_b->quota == RUNTIME_INF) +- return; +- +- now = sched_clock_cpu(smp_processor_id()); +- cfs_b->runtime = cfs_b->quota; ++ if (cfs_b->quota != RUNTIME_INF) ++ cfs_b->runtime = cfs_b->quota; + } + + static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +@@ -4924,15 +4919,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) + + void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) + { +- u64 overrun; +- + lockdep_assert_held(&cfs_b->lock); + + if (cfs_b->period_active) + return; + + cfs_b->period_active = 1; +- overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); ++ hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); + } + +-- +2.17.1 +