Merge pull request #2996 from coreosbot/master-4.14.11

Upgrade Linux in master to 4.14.11
This commit is contained in:
Benjamin Gilbert 2018-01-04 21:05:30 -08:00 committed by GitHub
commit 35378cdb8c
18 changed files with 720 additions and 14 deletions

View File

@ -2,7 +2,7 @@
# Distributed under the terms of the GNU General Public License v2 # Distributed under the terms of the GNU General Public License v2
EAPI=5 EAPI=5
COREOS_SOURCE_REVISION="-r1" COREOS_SOURCE_REVISION=""
inherit coreos-kernel inherit coreos-kernel
DESCRIPTION="CoreOS Linux kernel" DESCRIPTION="CoreOS Linux kernel"

View File

@ -2,7 +2,7 @@
# Distributed under the terms of the GNU General Public License v2 # Distributed under the terms of the GNU General Public License v2
EAPI=5 EAPI=5
COREOS_SOURCE_REVISION="-r1" COREOS_SOURCE_REVISION=""
inherit coreos-kernel savedconfig inherit coreos-kernel savedconfig
DESCRIPTION="CoreOS Linux kernel modules" DESCRIPTION="CoreOS Linux kernel modules"

View File

@ -1,2 +1,2 @@
DIST linux-4.14.tar.xz 100770500 SHA256 f81d59477e90a130857ce18dc02f4fbe5725854911db1e7ba770c7cd350f96a7 SHA512 77e43a02d766c3d73b7e25c4aafb2e931d6b16e870510c22cef0cdb05c3acb7952b8908ebad12b10ef982c6efbe286364b1544586e715cf38390e483927904d8 WHIRLPOOL fee10d54ecb210156aa55364ecc15867127819e9f7ff9ec5f6ef159b1013e2ae3d3a28d35c62d663886cbe826b996a1387671766093be002536309045a8e4d10 DIST linux-4.14.tar.xz 100770500 SHA256 f81d59477e90a130857ce18dc02f4fbe5725854911db1e7ba770c7cd350f96a7 SHA512 77e43a02d766c3d73b7e25c4aafb2e931d6b16e870510c22cef0cdb05c3acb7952b8908ebad12b10ef982c6efbe286364b1544586e715cf38390e483927904d8 WHIRLPOOL fee10d54ecb210156aa55364ecc15867127819e9f7ff9ec5f6ef159b1013e2ae3d3a28d35c62d663886cbe826b996a1387671766093be002536309045a8e4d10
DIST patch-4.14.8.xz 218568 SHA256 42eaed731b716244514b765c199e8f675d79287d7630e5c2911053ad52a1fa0a SHA512 62aa92e671cfc9265cf1690e0d64058dfa779400074cd909161d4e49f5313f58c1303ad301cecdbb64ee85d653f0bcb42fa609f25827289aad3bc94561d94390 WHIRLPOOL 4e269afc5591896574972ff89973f26db88985b3e458c6c8536e9947e19c65afdf498bbad5a7f30951a5db3892a79e927bc42ff93f6503d5c88915a15a99c51a DIST patch-4.14.11.xz 379976 SHA256 f588b62d7ee1d2ebdc24afa0e256ff2f8812d5cab3bf572bf02e7c4525922bf9 SHA512 dbf5488f0ba4e18b253da02c5cc862096a3253689986fbf5cd89b835c94c2057f4196d8d278973254fdf6dd07629784bf1dc3bdc7d1ac3bb0682c6f9ad9d21ad WHIRLPOOL 47d1a8c13d7f1f61c0f29131e89c2bf0676018984c220ee20e5e67bd3766a5b70f00378dc70633bf018f1044eec134b0db0ef67d2d02c778fd84156725395862

View File

@ -46,4 +46,13 @@ UNIPATCH_LIST="
${PATCH_DIR}/z0003-dccp-CVE-2017-8824-use-after-free-in-DCCP-code.patch \ ${PATCH_DIR}/z0003-dccp-CVE-2017-8824-use-after-free-in-DCCP-code.patch \
${PATCH_DIR}/z0004-block-factor-out-__blkdev_issue_zero_pages.patch \ ${PATCH_DIR}/z0004-block-factor-out-__blkdev_issue_zero_pages.patch \
${PATCH_DIR}/z0005-block-cope-with-WRITE-ZEROES-failing-in-blkdev_issue.patch \ ${PATCH_DIR}/z0005-block-cope-with-WRITE-ZEROES-failing-in-blkdev_issue.patch \
${PATCH_DIR}/z0006-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch \
${PATCH_DIR}/z0007-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch \
${PATCH_DIR}/z0008-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch \
${PATCH_DIR}/z0009-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch \
${PATCH_DIR}/z0010-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch \
${PATCH_DIR}/z0011-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch \
${PATCH_DIR}/z0012-x86-kaslr-Fix-the-vaddr_end-mess.patch \
${PATCH_DIR}/z0013-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch \
${PATCH_DIR}/z0014-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch \
" "

View File

@ -1,7 +1,7 @@
From 390d740f7a27402317e7d911ac6fb2699e2918fa Mon Sep 17 00:00:00 2001 From 7c25b75b41bad6bb84644c9630d8c7d35a638d46 Mon Sep 17 00:00:00 2001
From: Vito Caputo <vito.caputo@coreos.com> From: Vito Caputo <vito.caputo@coreos.com>
Date: Wed, 25 Nov 2015 02:59:45 -0800 Date: Wed, 25 Nov 2015 02:59:45 -0800
Subject: [PATCH 1/5] kbuild: derive relative path for KBUILD_SRC from CURDIR Subject: [PATCH 01/14] kbuild: derive relative path for KBUILD_SRC from CURDIR
This enables relocating source and build trees to different roots, This enables relocating source and build trees to different roots,
provided they stay reachable relative to one another. Useful for provided they stay reachable relative to one another. Useful for
@ -12,7 +12,7 @@ by some undesirable path component.
1 file changed, 2 insertions(+), 1 deletion(-) 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile diff --git a/Makefile b/Makefile
index 97b5ae76ac8c..154a8d19dc4e 100644 index 655887067dc7..d4040d10df07 100644
--- a/Makefile --- a/Makefile
+++ b/Makefile +++ b/Makefile
@@ -143,7 +143,8 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make @@ -143,7 +143,8 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make

View File

@ -1,7 +1,7 @@
From 2ab0bfdefcac217d3c0b3e8e32c8e36c5002ff91 Mon Sep 17 00:00:00 2001 From 20720b9b549fc81b38a947870f719c521f57a224 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org> From: Geoff Levand <geoff@infradead.org>
Date: Fri, 11 Nov 2016 17:28:52 -0800 Date: Fri, 11 Nov 2016 17:28:52 -0800
Subject: [PATCH 2/5] Add arm64 coreos verity hash Subject: [PATCH 02/14] Add arm64 coreos verity hash
Signed-off-by: Geoff Levand <geoff@infradead.org> Signed-off-by: Geoff Levand <geoff@infradead.org>
--- ---

View File

@ -1,7 +1,7 @@
From 4f543fbc2ca094a39ad82fe53f7e3400635f353d Mon Sep 17 00:00:00 2001 From d60268fbdf436724ad6407412f76a0625d4dad64 Mon Sep 17 00:00:00 2001
From: Mohamed Ghannam <simo.ghannam@gmail.com> From: Mohamed Ghannam <simo.ghannam@gmail.com>
Date: Tue, 5 Dec 2017 12:23:04 -0800 Date: Tue, 5 Dec 2017 12:23:04 -0800
Subject: [PATCH 3/5] dccp: CVE-2017-8824: use-after-free in DCCP code Subject: [PATCH 03/14] dccp: CVE-2017-8824: use-after-free in DCCP code
Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect()
must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL.

View File

@ -1,7 +1,7 @@
From 204e1ea55e52693ab1951b42eea8992ac1075d89 Mon Sep 17 00:00:00 2001 From a893a5e86b327ba2451f33c2dc7b49b7025f930d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com> From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Oct 2017 15:59:09 +0200 Date: Mon, 16 Oct 2017 15:59:09 +0200
Subject: [PATCH 4/5] block: factor out __blkdev_issue_zero_pages() Subject: [PATCH 04/14] block: factor out __blkdev_issue_zero_pages()
blkdev_issue_zeroout() will use this in !BLKDEV_ZERO_NOFALLBACK case. blkdev_issue_zeroout() will use this in !BLKDEV_ZERO_NOFALLBACK case.

View File

@ -1,7 +1,7 @@
From c8d8b528893b8e3c07891f8427333a7bcd8ab482 Mon Sep 17 00:00:00 2001 From 0d68ba211ef1da9c35ae57b116aa11a620efc950 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com> From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Oct 2017 15:59:10 +0200 Date: Mon, 16 Oct 2017 15:59:10 +0200
Subject: [PATCH 5/5] block: cope with WRITE ZEROES failing in Subject: [PATCH 05/14] block: cope with WRITE ZEROES failing in
blkdev_issue_zeroout() blkdev_issue_zeroout()
sd_config_write_same() ignores ->max_ws_blocks == 0 and resets it to sd_config_write_same() ignores ->max_ws_blocks == 0 and resets it to

View File

@ -0,0 +1,44 @@
From 65e80d5382694684b7a6fef5bace975721384457 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Dec 2017 23:43:54 -0600
Subject: [PATCH 06/14] x86/cpu, x86/pti: Do not enable PTI on AMD processors
AMD processors are not subject to the types of attacks that the kernel
page table isolation feature protects against. The AMD microarchitecture
does not allow memory references, including speculative references, that
access higher privileged data when running in a lesser privileged mode
when that access would result in a page fault.
Disable page table isolation by default on AMD processors by not setting
the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI
is set.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net
---
arch/x86/kernel/cpu/common.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2a94dfb434e..b1be494ab4e8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- /* Assume for now that ALL x86 CPUs are insecure */
- setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
fpu__init_system(c);
--
2.14.1

View File

@ -0,0 +1,56 @@
From 612c0c992840573935920427a13cb7cb44dcdc8e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 3 Jan 2018 15:57:59 +0100
Subject: [PATCH 07/14] x86/pti: Make sure the user/kernel PTEs match
Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is
enabled:
[Hardware Error]: Error Addr: 0x0000ffff81e000e0
[Hardware Error]: MC1 Error: L1 TLB multimatch.
[Hardware Error]: cache level: L1, tx: INSN
The address is in the entry area, which is mapped into kernel _AND_ user
space. That's special because we switch CR3 while we are executing
there.
User mapping:
0xffffffff81e00000-0xffffffff82000000 2M ro PSE GLB x pmd
Kernel mapping:
0xffffffff81000000-0xffffffff82000000 16M ro PSE x pmd
So the K8 is complaining that the TLB entries differ. They differ in the
GLB bit.
Drop the GLB bit when installing the user shared mapping.
Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD")
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Meelis Roos <mroos@linux.ee>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos
---
arch/x86/mm/pti.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index bce8aea65606..2da28ba97508 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void)
static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
- (unsigned long) __irqentry_text_end, _PAGE_RW);
+ (unsigned long) __irqentry_text_end,
+ _PAGE_RW | _PAGE_GLOBAL);
}
/*
--
2.14.1

View File

@ -0,0 +1,69 @@
From 6224d8f70510155e7a8d008564616d09c03e3236 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 3 Jan 2018 19:52:04 +0100
Subject: [PATCH 08/14] x86/pti: Switch to kernel CR3 at early in
entry_SYSCALL_compat()
The preparation for PTI which added CR3 switching to the entry code
misplaced the CR3 switch in entry_SYSCALL_compat().
With PTI enabled the entry code tries to access a per cpu variable after
switching to kernel GS. This fails because that variable is not mapped to
user space. This results in a double fault and in the worst case a kernel
crash.
Move the switch ahead of the access and clobber RSP which has been saved
already.
Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching")
Reported-by: Lars Wendler <wendler.lars@web.de>
Reported-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>,
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
Cc: Peter Zijlstra <peterz@infradead.org>,
Cc: Greg KH <gregkh@linuxfoundation.org>, ,
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>,
Cc: Juergen Gross <jgross@suse.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos
---
arch/x86/entry/entry_64_compat.S | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 40f17009ec20..98d5358e4041 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
swapgs
- /* Stash user ESP and switch to the kernel stack. */
+ /* Stash user ESP */
movl %esp, %r8d
+
+ /* Use %rsp as scratch reg. User ESP is stashed in r8 */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+ /* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
@@ -219,12 +224,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */
- /*
- * We just saved %rdi so it is safe to clobber. It is not
- * preserved during the C calls inside TRACE_IRQS_OFF anyway.
- */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
--
2.14.1

View File

@ -0,0 +1,54 @@
From 2183883de54dd666d1e4a7d85125f9c40afc2ef1 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 3 Jan 2018 12:39:52 -0800
Subject: [PATCH 09/14] x86/process: Define cpu_tss_rw in same section as
declaration
cpu_tss_rw is declared with DECLARE_PER_CPU_PAGE_ALIGNED
but then defined with DEFINE_PER_CPU_SHARED_ALIGNED
leading to section mismatch warnings.
Use DEFINE_PER_CPU_PAGE_ALIGNED consistently. This is necessary because
it's mapped to the cpu entry area and must be page aligned.
[ tglx: Massaged changelog a bit ]
Fixes: 1a935bc3d4ea ("x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct")
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: thomas.lendacky@amd.com
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: tklauser@distanz.ch
Cc: minipli@googlemail.com
Cc: me@kylehuey.com
Cc: namit@vmware.com
Cc: luto@kernel.org
Cc: jpoimboe@redhat.com
Cc: tj@kernel.org
Cc: cl@linux.com
Cc: bp@suse.de
Cc: thgarnie@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180103203954.183360-1-ndesaulniers@google.com
---
arch/x86/kernel/process.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 517415978409..3cb2486c47e4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
+__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
--
2.14.1

View File

@ -0,0 +1,98 @@
From a058d76b8da67c03e3ce13328b2ec8811e494184 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 28 Dec 2017 19:06:20 +0300
Subject: [PATCH 10/14] x86/mm: Set MODULES_END to 0xffffffffff000000
Since f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
kasan_mem_to_shadow(MODULES_END) could be not aligned to a page boundary.
So passing page unaligned address to kasan_populate_zero_shadow() have two
possible effects:
1) It may leave one page hole in supposed to be populated area. After commit
21506525fb8d ("x86/kasan/64: Teach KASAN about the cpu_entry_area") that
hole happens to be in the shadow covering fixmap area and leads to crash:
BUG: unable to handle kernel paging request at fffffbffffe8ee04
RIP: 0010:check_memory_region+0x5c/0x190
Call Trace:
<NMI>
memcpy+0x1f/0x50
ghes_copy_tofrom_phys+0xab/0x180
ghes_read_estatus+0xfb/0x280
ghes_notify_nmi+0x2b2/0x410
nmi_handle+0x115/0x2c0
default_do_nmi+0x57/0x110
do_nmi+0xf8/0x150
end_repeat_nmi+0x1a/0x1e
Note, the crash likely disappeared after commit 92a0f81d8957, which
changed kasan_populate_zero_shadow() call the way it was before
commit 21506525fb8d.
2) Attempt to load module near MODULES_END will fail, because
__vmalloc_node_range() called from kasan_module_alloc() will hit the
WARN_ON(!pte_none(*pte)) in the vmap_pte_range() and bail out with error.
To fix this we need to make kasan_mem_to_shadow(MODULES_END) page aligned
which means that MODULES_END should be 8*PAGE_SIZE aligned.
The whole point of commit f06bdd4001c2 was to move MODULES_END down if
NR_CPUS is big, so the cpu_entry_area takes a lot of space.
But since 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
the cpu_entry_area is no longer in fixmap, so we could just set
MODULES_END to a fixed 8*PAGE_SIZE aligned address.
Fixes: f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
Reported-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Garnier <thgarnie@google.com>
Link: https://lkml.kernel.org/r/20171228160620.23818-1-aryabinin@virtuozzo.com
---
Documentation/x86/x86_64/mm.txt | 5 +----
arch/x86/include/asm/pgtable_64_types.h | 2 +-
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index ad41b3813f0a..ddd5ffd31bd0 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -43,7 +43,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
@@ -67,9 +67,6 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
-The module mapping space size changes based on the CONFIG requirements for the
-following fixmap section.
-
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index b97a539bcdee..6233e5595389 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -104,7 +104,7 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
-#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
--
2.14.1

View File

@ -0,0 +1,93 @@
From c13125488c7ff76d8d7e8c5b47d1f6f41b901c6e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Jan 2018 13:01:40 +0100
Subject: [PATCH 11/14] x86/mm: Map cpu_entry_area at the same place on 4/5
level
There is no reason for 4 and 5 level pagetables to have a different
layout. It just makes determining vaddr_end for KASLR harder than
necessary.
Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable <stable@vger.kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>,
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
---
Documentation/x86/x86_64/mm.txt | 7 ++++---
arch/x86/include/asm/pgtable_64_types.h | 4 ++--
arch/x86/mm/dump_pagetables.c | 2 +-
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index ddd5ffd31bd0..f7dabe1f01e9 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,8 +12,8 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
-fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
@@ -37,7 +37,8 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6233e5595389..61b4b60bdc13 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -88,7 +88,7 @@ typedef struct { pteval_t pte; } pte_t;
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-4, UL)
+# define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
@@ -110,7 +110,7 @@ typedef struct { pteval_t pte; } pte_t;
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
+#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f56902c1f04b..2a4849e92831 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -61,10 +61,10 @@ enum address_markers_idx {
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
+ CPU_ENTRY_AREA_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
- CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
--
2.14.1

View File

@ -0,0 +1,138 @@
From 3842b860bc0edf60b14b64c56cf35b3b85101626 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Jan 2018 12:32:03 +0100
Subject: [PATCH 12/14] x86/kaslr: Fix the vaddr_end mess
vaddr_end for KASLR is only documented in the KASLR code itself and is
adjusted depending on config options. So it's not surprising that a change
of the memory layout causes KASLR to have the wrong vaddr_end. This can map
arbitrary stuff into other areas causing hard to understand problems.
Remove the whole ifdef magic and define the start of the cpu_entry_area to
be the end of the KASLR vaddr range.
Add documentation to that effect.
Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable <stable@vger.kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>,
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
---
Documentation/x86/x86_64/mm.txt | 6 ++++++
arch/x86/include/asm/pgtable_64_types.h | 8 +++++++-
arch/x86/mm/kaslr.c | 32 +++++++++-----------------------
3 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index f7dabe1f01e9..ea91cb61a602 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
+ vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
@@ -37,6 +38,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
+ vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
@@ -71,3 +73,7 @@ during EFI runtime calls.
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 61b4b60bdc13..6b8f73dcbc2c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/*
+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ *
+ * Be very careful vs. KASLR when changing anything here. The KASLR address
+ * range must not overlap with anything except the KASAN shadow area, which
+ * is correct as KASAN disables KASLR.
+ */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 879ef930e2c2..aedebd2ebf1e 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,25 +34,14 @@
#define TB_SHIFT 40
/*
- * Virtual address start and end range for randomization. The end changes base
- * on configuration to have the highest amount of space for randomization.
- * It increases the possible random position for each randomized region.
+ * Virtual address start and end range for randomization.
*
- * You need to add an if/def entry if you introduce a new memory region
- * compatible with KASLR. Your entry must be in logical order with memory
- * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
- * ensure that this order is correct and won't be changed.
+ * The end address could depend on more configuration options to make the
+ * highest amount of space for randomization available, but that's too hard
+ * to keep straight and caused issues already.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-
-#if defined(CONFIG_X86_ESPFIX64)
-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
-#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_END;
-#else
-static const unsigned long vaddr_end = __START_KERNEL_map;
-#endif
+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
unsigned long remain_entropy;
/*
- * All these BUILD_BUG_ON checks ensures the memory layout is
- * consistent with the vaddr_start/vaddr_end variables.
+ * These BUILD_BUG_ON checks ensure the memory layout is consistent
+ * with the vaddr_start/vaddr_end variables. These checks are very
+ * limited....
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
- BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
- vaddr_end >= EFI_VA_END);
- BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
- IS_ENABLED(CONFIG_EFI)) &&
- vaddr_end >= __START_KERNEL_map);
+ BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
if (!kaslr_memory_enabled())
--
2.14.1

View File

@ -0,0 +1,99 @@
From ac76d022de724f8a414d8862b2a10b6be2dffd10 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 4 Jan 2018 18:07:12 +0100
Subject: [PATCH 13/14] x86/events/intel/ds: Use the proper cache flush method
for mapping ds buffers
Thomas reported the following warning:
BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498
caller is native_flush_tlb_single+0x57/0xc0
native_flush_tlb_single+0x57/0xc0
__set_pte_vaddr+0x2d/0x40
set_pte_vaddr+0x2f/0x40
cea_set_pte+0x30/0x40
ds_update_cea.constprop.4+0x4d/0x70
reserve_ds_buffers+0x159/0x410
x86_reserve_hardware+0x150/0x160
x86_pmu_event_init+0x3e/0x1f0
perf_try_init_event+0x69/0x80
perf_event_alloc+0x652/0x740
SyS_perf_event_open+0x3f6/0xd60
do_syscall_64+0x5c/0x190
set_pte_vaddr is used to map the ds buffers into the cpu entry area, but
there are two problems with that:
1) The resulting flush is not supposed to be called in preemptible context
2) The cpu entry area is supposed to be per CPU, but the debug store
buffers are mapped for all CPUs so these mappings need to be flushed
globally.
Add the necessary preemption protection across the mapping code and flush
TLBs globally.
Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area")
Reported-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180104170712.GB3040@hirez.programming.kicks-ass.net
---
arch/x86/events/intel/ds.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8f0aace08b87..8156e47da7ba 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,6 +5,7 @@
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
+#include <asm/tlbflush.h>
#include <asm/insn.h>
#include "../perf_event.h"
@@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
+ unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;
pa = virt_to_phys(addr);
+
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
+
+ /*
+ * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+ * all TLB entries for it.
+ */
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}
static void ds_clear_cea(void *cea, size_t size)
{
+ unsigned long start = (unsigned long)cea;
size_t msz = 0;
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
+
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
--
2.14.1

View File

@ -0,0 +1,46 @@
From 0c74b4e882fce4f634177a45e704ddae82b90bbd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Jan 2018 22:19:04 +0100
Subject: [PATCH 14/14] x86/tlb: Drop the _GPL from the cpu_tlbstate export
The recent changes for PTI touch cpu_tlbstate from various tlb_flush
inlines. cpu_tlbstate is exported as GPL symbol, so this causes a
regression when building the most beloved out of tree drivers for certain
graphics card.
Aside of that the export was wrong since it was introduced as it should
have been EXPORT_PER_CPU_SYMBOL_GPL().
Use the correct PER_CPU export and drop the _GPL to restore the previous
state which allows users to utilize the cards they payed for. I'm always
happy to make this kind of change to support our #friends (or however this
hot hashtag is named today) from the closet sauce graphics world..
Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4")
Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
Reported-by: Kees Cook <keescook@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: stable@vger.kernel.org
---
arch/x86/mm/init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 80259ad8c386..6b462a472a7b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -870,7 +870,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
--
2.14.1