~alpine/aports

[alpine-aports] [PATCH] main/xen: security fixes

Daniel Sabogal <dsabogalcc@gmail.com>
Details
Message ID
<20170823031153.19784-1-dsabogalcc@gmail.com>
Sender timestamp
1503457913
DKIM signature
missing
Download raw message
Patch: +719 -1
fixes #7732

CVE-2017-12135 XSA-226
CVE-2017-12137 XSA-227
CVE-2017-12136 XSA-228
CVE-2017-12855 XSA-230
---
 main/xen/APKBUILD       |  18 +++-
 main/xen/xsa226-1.patch | 134 +++++++++++++++++++++++
 main/xen/xsa226-2.patch | 280 ++++++++++++++++++++++++++++++++++++++++++++++++
 main/xen/xsa227.patch   |  52 +++++++++
 main/xen/xsa228.patch   | 198 ++++++++++++++++++++++++++++++++++
 main/xen/xsa230.patch   |  38 +++++++
 6 files changed, 719 insertions(+), 1 deletion(-)
 create mode 100644 main/xen/xsa226-1.patch
 create mode 100644 main/xen/xsa226-2.patch
 create mode 100644 main/xen/xsa227.patch
 create mode 100644 main/xen/xsa228.patch
 create mode 100644 main/xen/xsa230.patch

diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 374292e7f6..1274b35f4a 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -3,7 +3,7 @@
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.9.0
pkgrel=0
pkgrel=1
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86_64 armhf"
@@ -73,6 +73,11 @@ options="!strip"
#     - CVE-2017-10921 XSA-224
#     - CVE-2017-10922 XSA-224
#     - CVE-2017-10923 XSA-225
#   4.9.0-r1:
#     - CVE-2017-12135 XSA-226
#     - CVE-2017-12137 XSA-227
#     - CVE-2017-12136 XSA-228
#     - CVE-2017-12855 XSA-230

case "$CARCH" in
x86*)
@@ -117,6 +122,12 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
	http://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz
	http://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz

	xsa226-1.patch
	xsa226-2.patch
	xsa227.patch
	xsa228.patch
	xsa230.patch

	qemu-coroutine-gthread.patch
	qemu-xen_paths.patch

@@ -366,6 +377,11 @@ c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a36
4928b5b82f57645be9408362706ff2c4d9baa635b21b0d41b1c82930e8c60a759b1ea4fa74d7e6c7cae1b7692d006aa5cb72df0c3b88bf049779aa2b566f9d35  tpm_emulator-0.7.4.tar.gz
021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e  zlib-1.2.3.tar.gz
82ba65e1c676d32b29c71e6395c9506cab952c8f8b03f692e2b50133be8f0c0146d0f22c223262d81a4df579986fde5abc6507869f4965be4846297ef7b4b890  ipxe-git-827dd1bfee67daa683935ce65316f7e0f057fe1c.tar.gz
e934ba5be6a526d164cb4c8bb71a679f2fedeaddb82d8f5ebbbbe3cbfaa6dd639c4e94662c6b7a9d066195f2a59e8d14dc3ee55dc94c09b4475d455d881b2741  xsa226-1.patch
4d1e729c592efefd705233b49484991801606b2122a64ff14abbf994bb3e77ec75c4989d43753ce2043cc4fe13d34fb1cef7ee1adb291ff16625bb3b125e5508  xsa226-2.patch
7d66494e833d46f8a213af0f2b107a12617d5e8b45c3b07daee229c75bd6aad98284bc0e19f15706d044b58273cc7f0c193ef8553faa22fadeae349689e763c8  xsa227.patch
d406f14531af707325790909d08ce299ac2f2cb4b87f9a8ddb0fba10bd83bed84cc1633e07632cc2f841c50bc1a9af6240c89539a2e6ba6028cb127e218f86fc  xsa228.patch
df174a1675f74b73e78bc3cb1c9f16536199dfd1922c0cc545a807e92bc24941a816891838258e118f477109548487251a7eaccb2d1dd9b6994c8c76fc5b058f  xsa230.patch
c3c46f232f0bd9f767b232af7e8ce910a6166b126bd5427bb8dc325aeb2c634b956de3fc225cab5af72649070c8205cc8e1cab7689fc266c204f525086f1a562  qemu-coroutine-gthread.patch
1936ab39a1867957fa640eb81c4070214ca4856a2743ba7e49c0cd017917071a9680d015f002c57fa7b9600dbadd29dcea5887f50e6c133305df2669a7a933f3  qemu-xen_paths.patch
f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2446729adfdb321e01468e377793f6563a67d68b8b0f7ffe3  hotplug-vif-vtrill.patch
diff --git a/main/xen/xsa226-1.patch b/main/xen/xsa226-1.patch
new file mode 100644
index 0000000000..7711d3f888
--- /dev/null
+++ b/main/xen/xsa226-1.patch
@@ -0,0 +1,134 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: don't use possibly unbounded tail calls

There is no guarantee that the compiler would actually translate them
to branches instead of calls, so only ones with a known recursion limit
are okay:
- __release_grant_for_copy() can call itself only once, as
  __acquire_grant_for_copy() won't permit use of multi-level transitive
  grants,
- __acquire_grant_for_copy() is fine to call itself with the last
  argument false, as that prevents further recursion,
- __acquire_grant_for_copy() must not call itself to recover from an
  observed change to the active entry's pin count

This is part of CVE-2017-12135 / XSA-226.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2103,8 +2103,10 @@ __release_grant_for_copy(
 
     if ( td != rd )
     {
-        /* Recursive calls, but they're tail calls, so it's
-           okay. */
+        /*
+         * Recursive calls, but they're bounded (acquire permits only a single
+         * level of transitivity), so it's okay.
+         */
         if ( released_write )
             __release_grant_for_copy(td, trans_gref, 0);
         else if ( released_read )
@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy(
                 return rc;
             }
 
-            /* We dropped the lock, so we have to check that nobody
-               else tried to pin (or, for that matter, unpin) the
-               reference in *this* domain.  If they did, just give up
-               and try again. */
+            /*
+             * We dropped the lock, so we have to check that nobody else tried
+             * to pin (or, for that matter, unpin) the reference in *this*
+             * domain.  If they did, just give up and tell the caller to retry.
+             */
             if ( act->pin != old_pin )
             {
                 __fixup_status_for_copy_pin(act, status);
@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy(
                 active_entry_release(act);
                 grant_read_unlock(rgt);
                 put_page(*page);
-                return __acquire_grant_for_copy(rd, gref, ldom, readonly,
-                                                frame, page, page_off, length,
-                                                allow_transitive);
+                *page = NULL;
+                return ERESTART;
             }
 
             /* The actual remote remote grant may or may not be a
@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(src);
         rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(dest);
         rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct
     return rc;
 }
 
+/*
+ * gnttab_copy(), other than the various other helpers of
+ * do_grant_table_op(), returns (besides possible error indicators)
+ * "count - i" rather than "i" to ensure that even if no progress
+ * was made at all (perhaps due to gnttab_copy_one() returning a
+ * positive value) a non-zero value is being handed back (zero needs
+ * to be avoided, as that means "success, all done").
+ */
 static long gnttab_copy(
     XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
 {
@@ -2606,7 +2616,7 @@ static long gnttab_copy(
     {
         if ( i && hypercall_preempt_check() )
         {
-            rc = i;
+            rc = count - i;
             break;
         }
 
@@ -2616,13 +2626,20 @@ static long gnttab_copy(
             break;
         }
 
-        op.status = gnttab_copy_one(&op, &dest, &src);
-        if ( op.status != GNTST_okay )
+        rc = gnttab_copy_one(&op, &dest, &src);
+        if ( rc > 0 )
+        {
+            rc = count - i;
+            break;
+        }
+        if ( rc != GNTST_okay )
         {
             gnttab_copy_release_buf(&src);
             gnttab_copy_release_buf(&dest);
         }
 
+        op.status = rc;
+        rc = 0;
         if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
         {
             rc = -EFAULT;
@@ -3160,6 +3177,7 @@ do_grant_table_op(
         rc = gnttab_copy(copy, count);
         if ( rc > 0 )
         {
+            rc = count - rc;
             guest_handle_add_offset(copy, rc);
             uop = guest_handle_cast(copy, void);
         }
diff --git a/main/xen/xsa226-2.patch b/main/xen/xsa226-2.patch
new file mode 100644
index 0000000000..2cf93bdd29
--- /dev/null
+++ b/main/xen/xsa226-2.patch
@@ -0,0 +1,280 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: fix transitive grant handling

Processing of transitive grants must not use the fast path, or else
reference counting breaks due to the skipped recursive call to
__acquire_grant_for_copy() (its __release_grant_for_copy()
counterpart occurs independent of original pin count). Furthermore
after re-acquiring temporarily dropped locks we need to verify no grant
properties changed if the original pin count was non-zero; checking
just the pin counts is sufficient only for well-behaved guests. As a
result, __release_grant_for_copy() needs to mirror that new behavior.

Furthermore a __release_grant_for_copy() invocation was missing on the
retry path of __acquire_grant_for_copy(), and gnttab_set_version() also
needs to bail out upon encountering a transitive grant.

This is part of CVE-2017-12135 / XSA-226.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2050,13 +2050,8 @@ __release_grant_for_copy(
     unsigned long r_frame;
     uint16_t *status;
     grant_ref_t trans_gref;
-    int released_read;
-    int released_write;
     struct domain *td;
 
-    released_read = 0;
-    released_write = 0;
-
     grant_read_lock(rgt);
 
     act = active_entry_acquire(rgt, gref);
@@ -2086,17 +2081,11 @@ __release_grant_for_copy(
 
         act->pin -= GNTPIN_hstw_inc;
         if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
-        {
-            released_write = 1;
             gnttab_clear_flag(_GTF_writing, status);
-        }
     }
 
     if ( !act->pin )
-    {
         gnttab_clear_flag(_GTF_reading, status);
-        released_read = 1;
-    }
 
     active_entry_release(act);
     grant_read_unlock(rgt);
@@ -2104,13 +2093,10 @@ __release_grant_for_copy(
     if ( td != rd )
     {
         /*
-         * Recursive calls, but they're bounded (acquire permits only a single
+         * Recursive call, but it is bounded (acquire permits only a single
          * level of transitivity), so it's okay.
          */
-        if ( released_write )
-            __release_grant_for_copy(td, trans_gref, 0);
-        else if ( released_read )
-            __release_grant_for_copy(td, trans_gref, 1);
+        __release_grant_for_copy(td, trans_gref, readonly);
 
         rcu_unlock_domain(td);
     }
@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy(
                  act->domid, ldom, act->pin);
 
     old_pin = act->pin;
-    if ( !act->pin ||
-         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+    if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
+    {
+        if ( (!old_pin || (!readonly &&
+                           !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) &&
+             (rc = _set_status_v2(ldom, readonly, 0, shah, act,
+                                  status)) != GNTST_okay )
+            goto unlock_out;
+
+        if ( !allow_transitive )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant when transitivity not allowed\n");
+
+        trans_domid = sha2->transitive.trans_domid;
+        trans_gref = sha2->transitive.gref;
+        barrier(); /* Stop the compiler from re-loading
+                      trans_domid from shared memory */
+        if ( trans_domid == rd->domain_id )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grants cannot be self-referential\n");
+
+        /*
+         * We allow the trans_domid == ldom case, which corresponds to a
+         * grant being issued by one domain, sent to another one, and then
+         * transitively granted back to the original domain.  Allowing it
+         * is easy, and means that you don't need to go out of your way to
+         * avoid it in the guest.
+         */
+
+        /* We need to leave the rrd locked during the grant copy. */
+        td = rcu_lock_domain_by_id(trans_domid);
+        if ( td == NULL )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant referenced bad domain %d\n",
+                     trans_domid);
+
+        /*
+         * __acquire_grant_for_copy() could take the lock on the
+         * remote table (if rd == td), so we have to drop the lock
+         * here and reacquire.
+         */
+        active_entry_release(act);
+        grant_read_unlock(rgt);
+
+        rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+                                      readonly, &grant_frame, page,
+                                      &trans_page_off, &trans_length, 0);
+
+        grant_read_lock(rgt);
+        act = active_entry_acquire(rgt, gref);
+
+        if ( rc != GNTST_okay )
+        {
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            return rc;
+        }
+
+        /*
+         * We dropped the lock, so we have to check that the grant didn't
+         * change, and that nobody else tried to pin/unpin it. If anything
+         * changed, just give up and tell the caller to retry.
+         */
+        if ( rgt->gt_version != 2 ||
+             act->pin != old_pin ||
+             (old_pin && (act->domid != ldom || act->frame != grant_frame ||
+                          act->start != trans_page_off ||
+                          act->length != trans_length ||
+                          act->trans_domain != td ||
+                          act->trans_gref != trans_gref ||
+                          !act->is_sub_page)) )
+        {
+            __release_grant_for_copy(td, trans_gref, readonly);
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            put_page(*page);
+            *page = NULL;
+            return ERESTART;
+        }
+
+        if ( !old_pin )
+        {
+            act->domid = ldom;
+            act->start = trans_page_off;
+            act->length = trans_length;
+            act->trans_domain = td;
+            act->trans_gref = trans_gref;
+            act->frame = grant_frame;
+            act->gfn = -1ul;
+            /*
+             * The actual remote remote grant may or may not be a sub-page,
+             * but we always treat it as one because that blocks mappings of
+             * transitive grants.
+             */
+            act->is_sub_page = 1;
+        }
+    }
+    else if ( !old_pin ||
+              (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
     {
         if ( (rc = _set_status(rgt->gt_version, ldom,
                                readonly, 0, shah, act,
@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy(
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
         }
-        else if ( (shah->flags & GTF_type_mask) == GTF_transitive )
-        {
-            if ( !allow_transitive )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant when transitivity not allowed\n");
-
-            trans_domid = sha2->transitive.trans_domid;
-            trans_gref = sha2->transitive.gref;
-            barrier(); /* Stop the compiler from re-loading
-                          trans_domid from shared memory */
-            if ( trans_domid == rd->domain_id )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grants cannot be self-referential\n");
-
-            /* We allow the trans_domid == ldom case, which
-               corresponds to a grant being issued by one domain, sent
-               to another one, and then transitively granted back to
-               the original domain.  Allowing it is easy, and means
-               that you don't need to go out of your way to avoid it
-               in the guest. */
-
-            /* We need to leave the rrd locked during the grant copy */
-            td = rcu_lock_domain_by_id(trans_domid);
-            if ( td == NULL )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant referenced bad domain %d\n",
-                         trans_domid);
-
-            /*
-             * __acquire_grant_for_copy() could take the lock on the
-             * remote table (if rd == td), so we have to drop the lock
-             * here and reacquire
-             */
-            active_entry_release(act);
-            grant_read_unlock(rgt);
-
-            rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
-                                          readonly, &grant_frame, page,
-                                          &trans_page_off, &trans_length, 0);
-
-            grant_read_lock(rgt);
-            act = active_entry_acquire(rgt, gref);
-
-            if ( rc != GNTST_okay ) {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                return rc;
-            }
-
-            /*
-             * We dropped the lock, so we have to check that nobody else tried
-             * to pin (or, for that matter, unpin) the reference in *this*
-             * domain.  If they did, just give up and tell the caller to retry.
-             */
-            if ( act->pin != old_pin )
-            {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                put_page(*page);
-                *page = NULL;
-                return ERESTART;
-            }
-
-            /* The actual remote remote grant may or may not be a
-               sub-page, but we always treat it as one because that
-               blocks mappings of transitive grants. */
-            is_sub_page = 1;
-            act->gfn = -1ul;
-        }
         else if ( !(sha2->hdr.flags & GTF_sub_page) )
         {
             rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd);
@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     case 2:
         for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
         {
-            if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) ==
-                  GTF_permit_access) &&
-                 (shared_entry_v2(gt, i).full_page.frame >> 32) )
+            switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
             {
+            case GTF_permit_access:
+                 if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
+                     break;
+                 /* fall through */
+            case GTF_transitive:
                 gdprintk(XENLOG_WARNING,
                          "tried to change grant table version to 1 with non-representable entries\n");
                 res = -ERANGE;
diff --git a/main/xen/xsa227.patch b/main/xen/xsa227.patch
new file mode 100644
index 0000000000..86aa41e2d4
--- /dev/null
+++ b/main/xen/xsa227.patch
@@ -0,0 +1,52 @@
From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 20 Jun 2017 19:18:54 +0100
Subject: [PATCH] x86/grant: Disallow misaligned PTEs

Pagetable entries must be aligned to function correctly.  Disallow attempts
from the guest to have a grant PTE created at a misaligned address, which
would result in corruption of the L1 table with largely-guest-controlled
values.

This is XSA-227

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/mm.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 97b3b4b..00f517a 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping(
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
+    if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) )
+        return GNTST_general_error;
+
     adjust_guest_l1e(nl1e, d);
 
     gmfn = pte_addr >> PAGE_SHIFT;
@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping(
     struct page_info *page;
     l1_pgentry_t ol1e;
 
+    /*
+     * addr comes from Xen's active_entry tracking so isn't guest controlled,
+     * but it had still better be PTE-aligned.
+     */
+    if ( !IS_ALIGNED(addr, sizeof(ol1e)) )
+    {
+        ASSERT_UNREACHABLE();
+        return GNTST_general_error;
+    }
+
     gmfn = addr >> PAGE_SHIFT;
     page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
 
-- 
2.1.4

diff --git a/main/xen/xsa228.patch b/main/xen/xsa228.patch
new file mode 100644
index 0000000000..65add3a588
--- /dev/null
+++ b/main/xen/xsa228.patch
@@ -0,0 +1,198 @@
From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 31 Jul 2017 15:17:56 +0100
Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose
 again

The way the lock is currently being used in get_maptrack_handle(), it
protects only the maptrack limit: The function acts on current's list
only, so races on list accesses are impossible even without the lock.

Otoh list access races are possible between __get_maptrack_handle() and
put_maptrack_handle(), due to the invocation of the former for other
than current from steal_maptrack_handle(). Introduce a per-vCPU lock
for list accesses to become race free again. This lock will be
uncontended except when it becomes necessary to take the steal path,
i.e. in the common case there should be no meaningful performance
impact.

When in get_maptrack_handle adds a stolen entry to a fresh, empty,
freelist, we think that there is probably no concurrency.  However,
this is not a fast path and adding the locking there makes the code
clearly correct.

Also, while we are here: the stolen maptrack_entry's tail pointer was
not properly set.  Set it.

This is XSA-228.

Reported-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
---
 docs/misc/grant-tables.txt    |  7 ++++++-
 xen/common/grant_table.c      | 30 ++++++++++++++++++++++++------
 xen/include/xen/grant_table.h |  2 +-
 xen/include/xen/sched.h       |  1 +
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
index 417ce2d..64da5cf 100644
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -87,7 +87,8 @@ is complete.
                                inconsistent grant table state such as current
                                version, partially initialized active table pages,
                                etc.
-  grant_table->maptrack_lock : spinlock used to protect the maptrack free list
+  grant_table->maptrack_lock : spinlock used to protect the maptrack limit
+  v->maptrack_freelist_lock  : spinlock used to protect the maptrack free list
   active_grant_entry->lock   : spinlock used to serialize modifications to
                                active entries
 
@@ -102,6 +103,10 @@ is complete.
  The maptrack free list is protected by its own spinlock. The maptrack
  lock may be locked while holding the grant table lock.
 
+ The maptrack_freelist_lock is an innermost lock.  It may be locked
+ while holding other locks, but no other locks may be acquired within
+ it.
+
  Active entries are obtained by calling active_entry_acquire(gt, ref).
  This function returns a pointer to the active entry after locking its
  spinlock. The caller must hold the grant table read lock before
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index ae34547..ee33bd8 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -304,11 +304,16 @@ __get_maptrack_handle(
 {
     unsigned int head, next, prev_head;
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     do {
         /* No maptrack pages allocated for this VCPU yet? */
         head = read_atomic(&v->maptrack_head);
         if ( unlikely(head == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         /*
          * Always keep one entry in the free list to make it easier to
@@ -316,12 +321,17 @@ __get_maptrack_handle(
          */
         next = read_atomic(&maptrack_entry(t, head).ref);
         if ( unlikely(next == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         prev_head = head;
         head = cmpxchg(&v->maptrack_head, prev_head, next);
     } while ( head != prev_head );
 
+    spin_unlock(&v->maptrack_freelist_lock);
+
     return head;
 }
 
@@ -380,6 +390,8 @@ put_maptrack_handle(
     /* 2. Add entry to the tail of the list on the original VCPU. */
     v = currd->vcpu[maptrack_entry(t, handle).vcpu];
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     cur_tail = read_atomic(&v->maptrack_tail);
     do {
         prev_tail = cur_tail;
@@ -388,6 +400,8 @@ put_maptrack_handle(
 
     /* 3. Update the old tail entry to point to the new entry. */
     write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
+
+    spin_unlock(&v->maptrack_freelist_lock);
 }
 
 static inline int
@@ -411,10 +425,6 @@ get_maptrack_handle(
      */
     if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
     {
-        /*
-         * Can drop the lock since no other VCPU can be adding a new
-         * frame once they've run out.
-         */
         spin_unlock(&lgt->maptrack_lock);
 
         /*
@@ -426,8 +436,12 @@ get_maptrack_handle(
             handle = steal_maptrack_handle(lgt, curr);
             if ( handle == -1 )
                 return -1;
+            spin_lock(&curr->maptrack_freelist_lock);
+            maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL;
             curr->maptrack_tail = handle;
-            write_atomic(&curr->maptrack_head, handle);
+            if ( curr->maptrack_head == MAPTRACK_TAIL )
+                write_atomic(&curr->maptrack_head, handle);
+            spin_unlock(&curr->maptrack_freelist_lock);
         }
         return steal_maptrack_handle(lgt, curr);
     }
@@ -460,12 +474,15 @@ get_maptrack_handle(
     smp_wmb();
     lgt->maptrack_limit += MAPTRACK_PER_PAGE;
 
+    spin_unlock(&lgt->maptrack_lock);
+    spin_lock(&curr->maptrack_freelist_lock);
+
     do {
         new_mt[i - 1].ref = read_atomic(&curr->maptrack_head);
         head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1);
     } while ( head != new_mt[i - 1].ref );
 
-    spin_unlock(&lgt->maptrack_lock);
+    spin_unlock(&curr->maptrack_freelist_lock);
 
     return handle;
 }
@@ -3475,6 +3492,7 @@ grant_table_destroy(
 
 void grant_table_init_vcpu(struct vcpu *v)
 {
+    spin_lock_init(&v->maptrack_freelist_lock);
     v->maptrack_head = MAPTRACK_TAIL;
     v->maptrack_tail = MAPTRACK_TAIL;
 }
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 4e77899..100f2b3 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -78,7 +78,7 @@ struct grant_table {
     /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
     unsigned int          maptrack_limit;
-    /* Lock protecting the maptrack page list, head, and limit */
+    /* Lock protecting the maptrack limit */
     spinlock_t            maptrack_lock;
     /* The defined versions are 1 and 2.  Set to 0 if we don't know
        what version to use yet. */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 6673b27..8690f29 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -230,6 +230,7 @@ struct vcpu
     int              controller_pause_count;
 
     /* Grant table map tracking. */
+    spinlock_t       maptrack_freelist_lock;
     unsigned int     maptrack_head;
     unsigned int     maptrack_tail;
 
-- 
2.1.4

diff --git a/main/xen/xsa230.patch b/main/xen/xsa230.patch
new file mode 100644
index 0000000000..c3b50c8aaa
--- /dev/null
+++ b/main/xen/xsa230.patch
@@ -0,0 +1,38 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: correct pin status fixup for copy

Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev*
also need to be taken into account when deciding whether to clear
_GTF_{read,writ}ing. At least for consistency with code elsewhere the
read part better doesn't use any mask at all.

This is XSA-230.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index ae34547..9c9d33c 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2107,10 +2107,10 @@ __release_grant_for_copy(
 static void __fixup_status_for_copy_pin(const struct active_grant_entry *act,
                                    uint16_t *status)
 {
-    if ( !(act->pin & GNTPIN_hstw_mask) )
+    if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
-    if ( !(act->pin & GNTPIN_hstr_mask) )
+    if ( !act->pin )
         gnttab_clear_flag(_GTF_reading, status);
 }
 
@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy(
  
  unlock_out_clear:
     if ( !(readonly) &&
-         !(act->pin & GNTPIN_hstw_mask) )
+         !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
     if ( !act->pin )
-- 
2.14.1



---
Unsubscribe:  alpine-aports+unsubscribe@lists.alpinelinux.org
Help:         alpine-aports+help@lists.alpinelinux.org
---
Reply to thread Export thread (mbox)