main/xen: add mitigations for XSA-452 & XSA-453

Upstream patches did not apply cleanly to RELEASE-4.18.0 source, patch
created with:

  git format-patch 52be29d^..4da8ca9 --output xen-stable-4.18-20240312.patch
This commit is contained in:
omni 2024-03-18 21:25:57 +00:00
parent 2af544aa3f
commit a63062c701
6 changed files with 8496 additions and 462 deletions

View File

@ -2,7 +2,7 @@
# Maintainer: Natanael Copa <ncopa@alpinelinux.org>
pkgname=xen
pkgver=4.18.0
pkgrel=4
pkgrel=5
pkgdesc="Xen hypervisor"
url="https://www.xenproject.org/"
arch="x86_64 armv7 aarch64"
@ -364,6 +364,9 @@ options="!strip"
# - CVE-2023-46839 XSA-449
# 4.18.0-r4:
# - CVE-2023-46841 XSA-451
# 4.18.0-r5:
# - CVE-2023-28746 XSA-452
# - CVE-2024-2193 XSA-453
case "$CARCH" in
x86*)
@ -409,10 +412,7 @@ source="https://downloads.xenproject.org/release/xen/$pkgver/xen-$pkgver.tar.gz
https://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz
https://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz
xsa447.patch
xsa449.patch
xsa450.patch
xsa451-4.18.patch
xen-stable-4.18-20240312.patch
mini-os-__divmoddi4.patch
qemu-xen_paths.patch
@ -701,10 +701,7 @@ qemu_openrc() {
sha512sums="
4cc9fd155144045a173c5f8ecc45f149817f1034eec618cb6f8b0494ef2fb5b95c4c60cf0bf4bec4bef8a622c35b6a3cb7dedc38e6d95e726f1611c73ddb3273 xen-4.18.0.tar.gz
459e490b33b95202167862a84eadb656a418b252ffa786db05640f025886bf1e2a5c59387d4b99ced552ae316eb64b6f9888a850bf6860a115e7f3eabed52d20 xsa447.patch
ea185b6f7ca375b49351a4006f22e449312e0a8180c93db2bb1aca43658de5abc8d1a21c1b6eedf320dd51a5e1475ace1652eddaacee28d36cc83d5beb05a918 xsa449.patch
901359c8fd08adc49961e1296e45fa98da6e090a82f8888fef6cccebf5b443e80cd905dff51e336e43c22bfac118481d65f8e4a9aa56ddd5c8e1775c6083e08d xsa450.patch
394fe51160f5ce79086d0f250c99daa3ecde1012ebdb5c6301f0033e79809e8b2061de7988f1a713c9674ac9b73d88df8be89e8cc668efb64c5b53039c574eef xsa451-4.18.patch
8df958195290a39b54493766e7555d71c68083d75edd13a2f77ad237d6b6fb52bce816b9e975c0c14024a01042e599415360dcf475f7d2e0c6bee8f9fd2ed6ef xen-stable-4.18-20240312.patch
2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2
c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz
1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz

File diff suppressed because it is too large Load Diff

View File

@ -1,117 +0,0 @@
From 084c7312fa6c1d4a7fa343efa1d7d73693dafff4 Mon Sep 17 00:00:00 2001
From: Michal Orzel <michal.orzel@amd.com>
Date: Thu, 23 Nov 2023 15:53:02 +0100
Subject: [PATCH] xen/arm: page: Avoid pointer overflow on cache clean &
invalidate
On Arm32, after cleaning and invalidating the last dcache line of the top
domheap page i.e. VA = 0xfffff000 (as a result of flushing the page to
RAM), we end up adding the value of a dcache line size to the pointer
once again, which results in a pointer arithmetic overflow (with 64B line
size, operation 0xffffffc0 + 0x40 overflows to 0x0). Such behavior is
undefined and given the wide range of compiler versions we support, it is
difficult to determine what could happen in such scenario.
Modify clean_and_invalidate_dcache_va_range() as well as
clean_dcache_va_range() and invalidate_dcache_va_range() due to similarity
of handling to prevent pointer arithmetic overflow. Modify the loops to
use an additional variable to store the index of the next cacheline.
Add an assert to prevent passing a region that wraps around which is
illegal and would end up in a page fault anyway (region 0-2MB is
unmapped). Lastly, return early if size passed is 0.
Note that on Arm64, we don't have this problem given that the max VA
space we support is 48-bits.
This is XSA-447 / CVE-2023-46837.
Signed-off-by: Michal Orzel <michal.orzel@amd.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
---
xen/arch/arm/include/asm/page.h | 35 ++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
index ebaf5964f114..69f817d1e68a 100644
--- a/xen/arch/arm/include/asm/page.h
+++ b/xen/arch/arm/include/asm/page.h
@@ -162,6 +162,13 @@ static inline size_t read_dcache_line_bytes(void)
static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
{
size_t cacheline_mask = dcache_line_bytes - 1;
+ unsigned long idx = 0;
+
+ if ( !size )
+ return 0;
+
+ /* Passing a region that wraps around is illegal */
+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
dsb(sy); /* So the CPU issues all writes to the range */
@@ -174,11 +181,11 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
}
for ( ; size >= dcache_line_bytes;
- p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__invalidate_dcache_one(0) : : "r" (p));
+ idx += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx));
if ( size > 0 )
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
dsb(sy); /* So we know the flushes happen before continuing */
@@ -188,14 +195,21 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
static inline int clean_dcache_va_range(const void *p, unsigned long size)
{
size_t cacheline_mask = dcache_line_bytes - 1;
+ unsigned long idx = 0;
+
+ if ( !size )
+ return 0;
+
+ /* Passing a region that wraps around is illegal */
+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
dsb(sy); /* So the CPU issues all writes to the range */
size += (uintptr_t)p & cacheline_mask;
size = (size + cacheline_mask) & ~cacheline_mask;
p = (void *)((uintptr_t)p & ~cacheline_mask);
for ( ; size >= dcache_line_bytes;
- p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__clean_dcache_one(0) : : "r" (p));
+ idx += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_dcache_one(0) : : "r" (p + idx));
dsb(sy); /* So we know the flushes happen before continuing */
/* ARM callers assume that dcache_* functions cannot fail. */
return 0;
@@ -205,14 +219,21 @@ static inline int clean_and_invalidate_dcache_va_range
(const void *p, unsigned long size)
{
size_t cacheline_mask = dcache_line_bytes - 1;
+ unsigned long idx = 0;
+
+ if ( !size )
+ return 0;
+
+ /* Passing a region that wraps around is illegal */
+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p);
dsb(sy); /* So the CPU issues all writes to the range */
size += (uintptr_t)p & cacheline_mask;
size = (size + cacheline_mask) & ~cacheline_mask;
p = (void *)((uintptr_t)p & ~cacheline_mask);
for ( ; size >= dcache_line_bytes;
- p += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ idx += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
dsb(sy); /* So we know the flushes happen before continuing */
/* ARM callers assume that dcache_* functions cannot fail. */
return 0;
--
2.40.1

View File

@ -1,89 +0,0 @@
From d8b92b21b224126860978e4c604302f3c1e3bf75 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 13 Dec 2023 15:51:59 +0100
Subject: [PATCH] pci: fail device assignment if phantom functions cannot be
assigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The current behavior is that no error is reported if (some) phantom functions
fail to be assigned during device add or assignment, so the operation succeeds
even if some phantom functions are not correctly setup.
This can lead to devices possibly being successfully assigned to a domU while
some of the device phantom functions are still assigned to dom0. Even when the
device is assigned domIO before being assigned to a domU phantom functions
might fail to be assigned to domIO, and also fail to be assigned to the domU,
leaving them assigned to dom0.
Since the device can generate requests using the IDs of those phantom
functions, given the scenario above a device in such state would be in control
of a domU, but still capable of generating transactions that use a context ID
targeting dom0 owned memory.
Modify device assign in order to attempt to deassign the device if phantom
functions failed to be assigned.
Note that device addition is not modified in the same way, as in that case the
device is assigned to a trusted domain, and hence partial assign can lead to
device malfunction but not a security issue.
This is XSA-449 / CVE-2023-46839
Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 1439d1ef2b26..47c0eee7bdcc 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1488,11 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
pdev->fault.count = 0;
- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
- pci_to_dev(pdev), flag)) )
- goto done;
+ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev),
+ flag);
- for ( ; pdev->phantom_stride; rc = 0 )
+ while ( pdev->phantom_stride && !rc )
{
devfn += pdev->phantom_stride;
if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
@@ -1503,8 +1502,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
done:
if ( rc )
- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n",
- d, &PCI_SBDF(seg, bus, devfn), rc);
+ {
+ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n",
+ d, devfn != pdev->devfn ? "phantom function " : "",
+ &PCI_SBDF(seg, bus, devfn), rc);
+
+ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) )
+ {
+ /*
+ * Device with phantom functions that failed to both assign and
+ * rollback. Mark the device as broken and crash the target domain,
+ * as the state of the functions at this point is unknown and Xen
+ * has no way to assert consistent context assignment among them.
+ */
+ pdev->broken = true;
+ if ( !is_hardware_domain(d) && d != dom_io )
+ domain_crash(d);
+ }
+ }
/* The device is assigned to dom_io so mark it as quarantined */
else if ( d == dom_io )
pdev->quarantine = true;
--
2.43.0

View File

@ -1,59 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: VT-d: Fix "else" vs "#endif" misplacement
In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This
generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body
is executed unconditionally.
Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's
clearer to follow. This in turn involves adjusting p2m_get_pagetable() to
compile when CONFIG_HVM is disabled.
This is XSA-450 / CVE-2023-46840.
Reported-by: Reported-by: Teddy Astie <teddy.astie@vates.tech>
Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only")
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
index 32f3f394b05a..6ada585eaac2 100644
--- a/xen/arch/x86/include/asm/p2m.h
+++ b/xen/arch/x86/include/asm/p2m.h
@@ -435,7 +435,14 @@ static inline bool p2m_is_altp2m(const struct p2m_domain *p2m)
return p2m->p2m_class == p2m_alternate;
}
-#define p2m_get_pagetable(p2m) ((p2m)->phys_table)
+#ifdef CONFIG_HVM
+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m)
+{
+ return p2m->phys_table;
+}
+#else
+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m);
+#endif
/*
* Ensure any deferred p2m TLB flush has been completed on all VCPUs.
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 99b642f12ef9..4244855032ee 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -438,15 +438,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr,
if ( pgd_maddr )
/* nothing */;
-#ifdef CONFIG_HVM
- else if ( iommu_use_hap_pt(d) )
+ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) )
{
pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d));
pgd_maddr = pagetable_get_paddr(pgt);
}
else
-#endif
{
if ( !hd->arch.vtd.pgd_maddr )
{

View File

@ -1,188 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: account for shadow stack in exception-from-stub recovery
Dealing with exceptions raised from within emulation stubs involves
discarding return address (replaced by exception related information).
Such discarding of course also requires removing the corresponding entry
from the shadow stack.
Also amend the comment in fixup_exception_return(), to further clarify
why use of ptr[1] can't be an out-of-bounds access.
While touching do_invalid_op() also add a missing fall-through
annotation.
This is CVE-2023-46841 / XSA-451.
Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/extable.c
+++ b/xen/arch/x86/extable.c
@@ -86,26 +86,29 @@ search_one_extable(const struct exceptio
}
unsigned long
-search_exception_table(const struct cpu_user_regs *regs)
+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
{
const struct virtual_region *region = find_text_region(regs->rip);
unsigned long stub = this_cpu(stubs.addr);
if ( region && region->ex )
+ {
+ *stub_ra = 0;
return search_one_extable(region->ex, region->ex_end, regs->rip);
+ }
if ( regs->rip >= stub + STUB_BUF_SIZE / 2 &&
regs->rip < stub + STUB_BUF_SIZE &&
regs->rsp > (unsigned long)regs &&
regs->rsp < (unsigned long)get_cpu_info() )
{
- unsigned long retptr = *(unsigned long *)regs->rsp;
+ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup;
- region = find_text_region(retptr);
- retptr = region && region->ex
- ? search_one_extable(region->ex, region->ex_end, retptr)
- : 0;
- if ( retptr )
+ region = find_text_region(retaddr);
+ fixup = region && region->ex
+ ? search_one_extable(region->ex, region->ex_end, retaddr)
+ : 0;
+ if ( fixup )
{
/*
* Put trap number and error code on the stack (in place of the
@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_
};
*(unsigned long *)regs->rsp = token.raw;
- return retptr;
+ *stub_ra = retaddr;
+ return fixup;
}
}
--- a/xen/arch/x86/include/asm/uaccess.h
+++ b/xen/arch/x86/include/asm/uaccess.h
@@ -421,7 +421,8 @@ union stub_exception_token {
unsigned long raw;
};
-extern unsigned long search_exception_table(const struct cpu_user_regs *regs);
+extern unsigned long search_exception_table(const struct cpu_user_regs *regs,
+ unsigned long *stub_ra);
extern void sort_exception_tables(void);
extern void sort_exception_table(struct exception_table_entry *start,
const struct exception_table_entry *stop);
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -845,7 +845,7 @@ void do_unhandled_trap(struct cpu_user_r
}
static void fixup_exception_return(struct cpu_user_regs *regs,
- unsigned long fixup)
+ unsigned long fixup, unsigned long stub_ra)
{
if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
{
@@ -862,7 +862,8 @@ static void fixup_exception_return(struc
/*
* Search for %rip. The shstk currently looks like this:
*
- * ... [Likely pointed to by SSP]
+ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive]
+ * ... [Pointed to by SSP for most exceptions, empty in IST cases]
* %cs [== regs->cs]
* %rip [== regs->rip]
* SSP [Likely points to 3 slots higher, above %cs]
@@ -880,7 +881,56 @@ static void fixup_exception_return(struc
*/
if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
{
+ unsigned long primary_shstk =
+ (ssp & ~(STACK_SIZE - 1)) +
+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
+
wrss(fixup, ptr);
+
+ if ( !stub_ra )
+ goto shstk_done;
+
+ /*
+ * Stub recovery ought to happen only when the outer context
+ * was on the main shadow stack. We need to also "pop" the
+ * stub's return address from the interrupted context's shadow
+ * stack. That is,
+ * - if we're still on the main stack, we need to move the
+ * entire stack (up to and including the exception frame)
+ * up by one slot, incrementing the original SSP in the
+ * exception frame,
+ * - if we're on an IST stack, we need to increment the
+ * original SSP.
+ */
+ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
+
+ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
+ {
+ /*
+ * We're on an IST stack. First make sure the two return
+ * addresses actually match. Then increment the interrupted
+ * context's SSP.
+ */
+ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
+ wrss(ptr[-1] + 8, &ptr[-1]);
+ goto shstk_done;
+ }
+
+ /* Make sure the two return addresses actually match. */
+ BUG_ON(stub_ra != ptr[2]);
+
+ /* Move exception frame, updating SSP there. */
+ wrss(ptr[1], &ptr[2]); /* %cs */
+ wrss(ptr[0], &ptr[1]); /* %rip */
+ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
+
+ /* Move all newer entries. */
+ while ( --ptr != _p(ssp) )
+ wrss(ptr[-1], &ptr[0]);
+
+ /* Finally account for our own stack having shifted up. */
+ asm volatile ( "incsspd %0" :: "r" (2) );
+
goto shstk_done;
}
}
@@ -901,7 +951,8 @@ static void fixup_exception_return(struc
static bool extable_fixup(struct cpu_user_regs *regs, bool print)
{
- unsigned long fixup = search_exception_table(regs);
+ unsigned long stub_ra = 0;
+ unsigned long fixup = search_exception_table(regs, &stub_ra);
if ( unlikely(fixup == 0) )
return false;
@@ -915,7 +966,7 @@ static bool extable_fixup(struct cpu_use
vector_name(regs->entry_vector), regs->error_code,
_p(regs->rip), _p(regs->rip), _p(fixup));
- fixup_exception_return(regs, fixup);
+ fixup_exception_return(regs, fixup, stub_ra);
this_cpu(last_extable_addr) = regs->rip;
return true;
@@ -1183,7 +1234,8 @@ void do_invalid_op(struct cpu_user_regs
{
case BUGFRAME_run_fn:
case BUGFRAME_warn:
- fixup_exception_return(regs, (unsigned long)eip);
+ fixup_exception_return(regs, (unsigned long)eip, 0);
+ fallthrough;
case BUGFRAME_bug:
case BUGFRAME_assert:
return;