From 17cf5cdf4873dba8ea09dd4be26ce20594b110ca Mon Sep 17 00:00:00 2001
From: Ariadne Conill <ariadne@dereferenced.org>
Date: Tue, 23 Nov 2021 15:16:50 -0600
Subject: [PATCH] main/xen: add mitigations for XSA-388 and XSA-389

---
 main/xen/APKBUILD            |  15 ++-
 main/xen/xsa388-4.15-1.patch | 174 +++++++++++++++++++++++++++++++++
 main/xen/xsa388-4.15-2.patch |  36 +++++++
 main/xen/xsa389-4.15.patch   | 182 +++++++++++++++++++++++++++++++++++
 4 files changed, 406 insertions(+), 1 deletion(-)
 create mode 100644 main/xen/xsa388-4.15-1.patch
 create mode 100644 main/xen/xsa388-4.15-2.patch
 create mode 100644 main/xen/xsa389-4.15.patch

diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index f244b5cdf69..d2fa99d07c4 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,7 +2,7 @@
 # Maintainer: Natanael Copa <ncopa@alpinelinux.org>
 pkgname=xen
 pkgver=4.15.1
-pkgrel=1
+pkgrel=2
 pkgdesc="Xen hypervisor"
 url="https://www.xenproject.org/"
 arch="x86_64 armhf aarch64" # enable armv7 when builds with gcc8
@@ -263,6 +263,12 @@ options="!strip"
 #   4.15.1-r1:
 #     - CVE-2021-28702 XSA-386
 #     - CVE-2021-28710 XSA-390
+#   4.15.1-r2:
+#     - CVE-2021-28704 XSA-388
+#     - CVE-2021-28707 XSA-388
+#     - CVE-2021-28708 XSA-388
+#     - CVE-2021-28705 XSA-389
+#     - CVE-2021-28709 XSA-389
 
 case "$CARCH" in
 x86*)
@@ -324,6 +330,10 @@ source="https://downloads.xenproject.org/release/xen/$pkgver/xen-$pkgver.tar.gz
 	xsa386.patch
 	xsa390.patch
 
+	xsa388-4.15-1.patch
+	xsa388-4.15-2.patch
+	xsa389-4.15.patch
+
 	xenstored.initd
 	xenstored.confd
 	xenconsoled.initd
@@ -565,6 +575,9 @@ b9c754220187955d01ffbb6e030dace9d9aaae755db1765d07e407858c71a2cb0de04e0ab2099cd1
 6c28470dab368ce94d94db9e66954e4d915394ea730f6d4abb198ae122dbd7412453d6d8054f0a348d43d7f807fb13294363162f8b19f47311e802ffa9a40a90  stubdom-hack.patch
 77811232c5cf199d24fb8e4a5367a56d56e61ad218397913fa22bd89d0dffabe92acfded246aa731d450f80dcffee84268b27e73e60f19eec15d0ada988a0574  xsa386.patch
 cce33b310272224b5974725804544f5fb4557efd8e29c8d2a4cb7ed62ae0346f90dcf22d38c39c4a55c6058b2af2f385901f202437daef64c006b8b0ba9e9f4c  xsa390.patch
+af8ea4ad35a29270761c381f70acb5d6406dc964fb72193be38b3c28fc06fa0b8c18a91e73a97ebdb3a5ae420d72a87671370bd40ebda22815f85a5fb4217450  xsa388-4.15-1.patch
+837a80111ac436e637dece8396e0937ef6159c085465b63900a49d269e818264e38e8a3982a8aef03e236d77c23d80b4c7aaed2e021f0cbd1f89c77d86684dcd  xsa388-4.15-2.patch
+84d5623aa06991767786be77d7d01b3224bcafa0a6acf648c1267199465945773247ef75ee77fd4d25063315627f820f2f4c6d63cb9cbdce1f9c96f28d784eb7  xsa389-4.15.patch
 a8dda349cab62febf2ef506eb26d2ba494a649b1c37206519ae23f02a36f600b19996bb8a148e5f21a240ec53ecfcf971a07686b9ddcdad417563fdf39b2215f  xenstored.initd
 093f7fbd43faf0a16a226486a0776bade5dc1681d281c5946a3191c32d74f9699c6bf5d0ab8de9d1195a2461165d1660788e92a3156c9b3c7054d7b2d52d7ff0  xenstored.confd
 1dd04f4bf1890771aa7eef0b6e46f7139487da0907d28dcdbef9fbe335dcf731ca391cfcb175dd82924f637a308de00a69ae981f67348c34f04489ec5e5dc3b7  xenconsoled.initd
diff --git a/main/xen/xsa388-4.15-1.patch b/main/xen/xsa388-4.15-1.patch
new file mode 100644
index 00000000000..b4d900336b4
--- /dev/null
+++ b/main/xen/xsa388-4.15-1.patch
@@ -0,0 +1,174 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/PoD: deal with misaligned GFNs
+
+Users of XENMEM_decrease_reservation and XENMEM_populate_physmap aren't
+required to pass in order-aligned GFN values. (While I consider this
+bogus, I don't think we can fix this there, as that might break existing
+code, e.g Linux'es swiotlb, which - while affecting PV only - until
+recently had been enforcing only page alignment on the original
+allocation.) Only non-PoD code paths (guest_physmap_{add,remove}_page(),
+p2m_set_entry()) look to be dealing with this properly (in part by being
+implemented inefficiently, handling every 4k page separately).
+
+Introduce wrappers taking care of splitting the incoming request into
+aligned chunks, without putting much effort in trying to determine the
+largest possible chunk at every iteration.
+
+Also "handle" p2m_set_entry() failure for non-order-0 requests by
+crashing the domain in one more place. Alongside putting a log message
+there, also add one to the other similar path.
+
+Note regarding locking: This is left in the actual worker functions on
+the assumption that callers aren't guaranteed atomicity wrt acting on
+multiple pages at a time. For mis-aligned GFNs gfn_lock() wouldn't have
+locked the correct GFN range anyway, if it didn't simply resolve to
+p2m_lock(), and for well-behaved callers there continues to be only a
+single iteration, i.e. behavior is unchanged for them. (FTAOD pulling
+out just pod_lock() into p2m_pod_decrease_reservation() would result in
+a lock order violation.)
+
+This is CVE-2021-28704 and CVE-2021-28707 / part of XSA-388.
+
+Fixes: 3c352011c0d3 ("x86/PoD: shorten certain operations on higher order ranges")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -496,7 +496,7 @@ p2m_pod_zero_check_superpage(struct p2m_
+ 
+ 
+ /*
+- * This function is needed for two reasons:
++ * This pair of functions is needed for two reasons:
+  * + To properly handle clearing of PoD entries
+  * + To "steal back" memory being freed for the PoD cache, rather than
+  *   releasing it.
+@@ -504,8 +504,8 @@ p2m_pod_zero_check_superpage(struct p2m_
+  * Once both of these functions have been completed, we can return and
+  * allow decrease_reservation() to handle everything else.
+  */
+-unsigned long
+-p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
++static unsigned long
++decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
+ {
+     unsigned long ret = 0, i, n;
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+@@ -552,8 +552,10 @@ p2m_pod_decrease_reservation(struct doma
+          * All PoD: Mark the whole region invalid and tell caller
+          * we're done.
+          */
+-        if ( p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
+-                           p2m->default_access) )
++        int rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
++                               p2m->default_access);
++
++        if ( rc )
+         {
+             /*
+              * If this fails, we can't tell how much of the range was changed.
+@@ -561,7 +563,12 @@ p2m_pod_decrease_reservation(struct doma
+              * impossible.
+              */
+             if ( order != 0 )
++            {
++                printk(XENLOG_G_ERR
++                       "%pd: marking GFN %#lx (order %u) as non-PoD failed: %d\n",
++                       d, gfn_x(gfn), order, rc);
+                 domain_crash(d);
++            }
+             goto out_unlock;
+         }
+         ret = 1UL << order;
+@@ -670,6 +677,22 @@ out_unlock:
+     return ret;
+ }
+ 
++unsigned long
++p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
++{
++    unsigned long left = 1UL << order, ret = 0;
++    unsigned int chunk_order = find_first_set_bit(gfn_x(gfn) | left);
++
++    do {
++        ret += decrease_reservation(d, gfn, chunk_order);
++
++        left -= 1UL << chunk_order;
++        gfn = gfn_add(gfn, 1UL << chunk_order);
++    } while ( left );
++
++    return ret;
++}
++
+ void p2m_pod_dump_data(struct domain *d)
+ {
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+@@ -1273,19 +1296,15 @@ remap_and_retry:
+     return true;
+ }
+ 
+-
+-int
+-guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn_l,
+-                                      unsigned int order)
++static int
++mark_populate_on_demand(struct domain *d, unsigned long gfn_l,
++                        unsigned int order)
+ {
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+     gfn_t gfn = _gfn(gfn_l);
+     unsigned long i, n, pod_count = 0;
+     int rc = 0;
+ 
+-    if ( !paging_mode_translate(d) )
+-        return -EINVAL;
+-
+     gfn_lock(p2m, gfn, order);
+ 
+     P2M_DEBUG("mark pod gfn=%#lx\n", gfn_l);
+@@ -1325,12 +1344,44 @@ guest_physmap_mark_populate_on_demand(st
+ 
+         ioreq_request_mapcache_invalidate(d);
+     }
++    else if ( order )
++    {
++        /*
++         * If this failed, we can't tell how much of the range was changed.
++         * Best to crash the domain.
++         */
++        printk(XENLOG_G_ERR
++               "%pd: marking GFN %#lx (order %u) as PoD failed: %d\n",
++               d, gfn_l, order, rc);
++        domain_crash(d);
++    }
+ 
+ out:
+     gfn_unlock(p2m, gfn, order);
+ 
+     return rc;
+ }
++
++int
++guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
++                                      unsigned int order)
++{
++    unsigned long left = 1UL << order;
++    unsigned int chunk_order = find_first_set_bit(gfn | left);
++    int rc;
++
++    if ( !paging_mode_translate(d) )
++        return -EINVAL;
++
++    do {
++        rc = mark_populate_on_demand(d, gfn, chunk_order);
++
++        left -= 1UL << chunk_order;
++        gfn += 1UL << chunk_order;
++    } while ( !rc && left );
++
++    return rc;
++}
+ 
+ void p2m_pod_init(struct p2m_domain *p2m)
+ {
diff --git a/main/xen/xsa388-4.15-2.patch b/main/xen/xsa388-4.15-2.patch
new file mode 100644
index 00000000000..ccccb20263b
--- /dev/null
+++ b/main/xen/xsa388-4.15-2.patch
@@ -0,0 +1,36 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/PoD: handle intermediate page orders in p2m_pod_cache_add()
+
+p2m_pod_decrease_reservation() may pass pages to the function which
+aren't 4k, 2M, or 1G. Handle all intermediate orders as well, to avoid
+hitting the BUG() at the switch() statement's "default" case.
+
+This is CVE-2021-28708 / part of XSA-388.
+
+Fixes: 3c352011c0d3 ("x86/PoD: shorten certain operations on higher order ranges")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -112,15 +112,13 @@ p2m_pod_cache_add(struct p2m_domain *p2m
+     /* Then add to the appropriate populate-on-demand list. */
+     switch ( order )
+     {
+-    case PAGE_ORDER_1G:
+-        for ( i = 0; i < (1UL << PAGE_ORDER_1G); i += 1UL << PAGE_ORDER_2M )
++    case PAGE_ORDER_2M ... PAGE_ORDER_1G:
++        for ( i = 0; i < (1UL << order); i += 1UL << PAGE_ORDER_2M )
+             page_list_add_tail(page + i, &p2m->pod.super);
+         break;
+-    case PAGE_ORDER_2M:
+-        page_list_add_tail(page, &p2m->pod.super);
+-        break;
+-    case PAGE_ORDER_4K:
+-        page_list_add_tail(page, &p2m->pod.single);
++    case PAGE_ORDER_4K ... PAGE_ORDER_2M - 1:
++        for ( i = 0; i < (1UL << order); i += 1UL << PAGE_ORDER_4K )
++            page_list_add_tail(page + i, &p2m->pod.single);
+         break;
+     default:
+         BUG();
diff --git a/main/xen/xsa389-4.15.patch b/main/xen/xsa389-4.15.patch
new file mode 100644
index 00000000000..402a38e2d4e
--- /dev/null
+++ b/main/xen/xsa389-4.15.patch
@@ -0,0 +1,182 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/P2M: deal with partial success of p2m_set_entry()
+
+M2P and PoD stats need to remain in sync with P2M; if an update succeeds
+only partially, respective adjustments need to be made. If updates get
+made before the call, they may also need undoing upon complete failure
+(i.e. including the single-page case).
+
+Log-dirty state would better also be kept in sync.
+
+Note that the change to set_typed_p2m_entry() may not be strictly
+necessary (due to the order restriction enforced near the top of the
+function), but is being kept here to be on the safe side.
+
+This is CVE-2021-28705 and CVE-2021-28709 / XSA-389.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -784,6 +784,7 @@ p2m_remove_page(struct p2m_domain *p2m,
+     unsigned long i;
+     p2m_type_t t;
+     p2m_access_t a;
++    int rc;
+ 
+     /* IOMMU for PV guests is handled in get_page_type() and put_page(). */
+     if ( !paging_mode_translate(p2m->domain) )
+@@ -819,8 +820,27 @@ p2m_remove_page(struct p2m_domain *p2m,
+ 
+     ioreq_request_mapcache_invalidate(p2m->domain);
+ 
+-    return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
+-                         p2m->default_access);
++    rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
++                       p2m->default_access);
++    if ( likely(!rc) || !mfn_valid(mfn) )
++        return rc;
++
++    /*
++     * The operation may have partially succeeded. For the failed part we need
++     * to undo the M2P update and, out of precaution, mark the pages dirty
++     * again.
++     */
++    for ( i = 0; i < (1UL << page_order); ++i )
++    {
++        p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL);
++        if ( !p2m_is_hole(t) && !p2m_is_special(t) && !p2m_is_shared(t) )
++        {
++            set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_x(gfn) + i);
++            paging_mark_pfn_dirty(p2m->domain, _pfn(gfn_x(gfn) + i));
++        }
++    }
++
++    return rc;
+ }
+ 
+ int
+@@ -1009,13 +1029,8 @@ guest_physmap_add_entry(struct domain *d
+ 
+     /* Now, actually do the two-way mapping */
+     rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, p2m->default_access);
+-    if ( rc == 0 )
++    if ( likely(!rc) )
+     {
+-        pod_lock(p2m);
+-        p2m->pod.entry_count -= pod_count;
+-        BUG_ON(p2m->pod.entry_count < 0);
+-        pod_unlock(p2m);
+-
+         if ( !p2m_is_grant(t) )
+         {
+             for ( i = 0; i < (1UL << page_order); i++ )
+@@ -1023,6 +1038,42 @@ guest_physmap_add_entry(struct domain *d
+                                   gfn_x(gfn_add(gfn, i)));
+         }
+     }
++    else
++    {
++        /*
++         * The operation may have partially succeeded. For the successful part
++         * we need to update M2P and dirty state, while for the failed part we
++         * may need to adjust PoD stats as well as undo the earlier M2P update.
++         */
++        for ( i = 0; i < (1UL << page_order); ++i )
++        {
++            omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot, &a, 0, NULL, NULL);
++            if ( p2m_is_pod(ot) )
++            {
++                BUG_ON(!pod_count);
++                --pod_count;
++            }
++            else if ( mfn_eq(omfn, mfn_add(mfn, i)) && ot == t &&
++                      a == p2m->default_access && !p2m_is_grant(t) )
++            {
++                set_gpfn_from_mfn(mfn_x(omfn), gfn_x(gfn) + i);
++                paging_mark_pfn_dirty(d, _pfn(gfn_x(gfn) + i));
++            }
++            else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
++            {
++                ASSERT(mfn_valid(omfn));
++                set_gpfn_from_mfn(mfn_x(omfn), gfn_x(gfn) + i);
++            }
++        }
++    }
++
++    if ( pod_count )
++    {
++        pod_lock(p2m);
++        p2m->pod.entry_count -= pod_count;
++        BUG_ON(p2m->pod.entry_count < 0);
++        pod_unlock(p2m);
++    }
+ 
+ out:
+     p2m_unlock(p2m);
+@@ -1314,6 +1365,51 @@ static int set_typed_p2m_entry(struct do
+             return 0;
+         }
+     }
++
++    P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
++    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
++    if ( unlikely(rc) )
++    {
++        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
++                 gfn_l, order, rc, mfn_x(mfn));
++
++        /*
++         * The operation may have partially succeeded. For the successful part
++         * we need to update PoD stats, M2P, and dirty state.
++         */
++        if ( order != PAGE_ORDER_4K )
++        {
++            unsigned long i;
++
++            for ( i = 0; i < (1UL << order); ++i )
++            {
++                p2m_type_t t;
++                mfn_t cmfn = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
++                                            NULL, NULL);
++
++                if ( !mfn_eq(cmfn, mfn_add(mfn, i)) || t != gfn_p2mt ||
++                     a != access )
++                    continue;
++
++                if ( p2m_is_ram(ot) )
++                {
++                    ASSERT(mfn_valid(mfn_add(omfn, i)));
++                    set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
++
++                    ioreq_request_mapcache_invalidate(d);
++                }
++#ifdef CONFIG_HVM
++                else if ( p2m_is_pod(ot) )
++                {
++                    pod_lock(p2m);
++                    BUG_ON(!p2m->pod.entry_count);
++                    --p2m->pod.entry_count;
++                    pod_unlock(p2m);
++                }
++#endif
++            }
++        }
++    }
+     else if ( p2m_is_ram(ot) )
+     {
+         unsigned long i;
+@@ -1326,12 +1422,6 @@ static int set_typed_p2m_entry(struct do
+ 
+         ioreq_request_mapcache_invalidate(d);
+     }
+-
+-    P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
+-    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
+-    if ( rc )
+-        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
+-                 gfn_l, order, rc, mfn_x(mfn));
+ #ifdef CONFIG_HVM
+     else if ( p2m_is_pod(ot) )
+     {