~alpine/aports

[alpine-aports] [PATCH] main/xen: upgrade to 4.9.1

Daniel Sabogal <dsabogalcc@gmail.com>
Details
Message ID
<20171127000542.32617-1-dsabogalcc@gmail.com>
Sender timestamp
1511741142
DKIM signature
missing
Download raw message
Patch: +3 -2756
Security fixes for all applicable XSAs up to (and including) XSA-245
Upstream rombios makefile now enforces non-PIC/PIE mode
---
 main/xen/APKBUILD             |  63 +-----
 main/xen/rombios-no-pie.patch |  22 --
 main/xen/xsa226-1.patch       | 149 -------------
 main/xen/xsa226-2.patch       | 280 ------------------------
 main/xen/xsa227.patch         |  52 -----
 main/xen/xsa228.patch         | 198 -----------------
 main/xen/xsa230.patch         |  38 ----
 main/xen/xsa231-4.9.patch     | 108 ---------
 main/xen/xsa232.patch         |  23 --
 main/xen/xsa233.patch         |  52 -----
 main/xen/xsa234-4.9.patch     | 192 ----------------
 main/xen/xsa235-4.9.patch     |  49 -----
 main/xen/xsa236-4.9.patch     |  66 ------
 main/xen/xsa237-1.patch       |  27 ---
 main/xen/xsa237-2.patch       |  66 ------
 main/xen/xsa237-3.patch       |  55 -----
 main/xen/xsa237-4.patch       | 124 -----------
 main/xen/xsa237-5.patch       |  37 ----
 main/xen/xsa238.patch         |  45 ----
 main/xen/xsa239.patch         |  46 ----
 main/xen/xsa240-1.patch       | 494 ------------------------------------------
 main/xen/xsa240-2.patch       |  83 -------
 main/xen/xsa241-4.9.patch     | 120 ----------
 main/xen/xsa242-4.9.patch     |  43 ----
 main/xen/xsa243-1.patch       |  93 --------
 main/xen/xsa243-2.patch       |  54 -----
 main/xen/xsa244.patch         |  59 -----
 main/xen/xsa245-1.patch       |  48 ----
 main/xen/xsa245-2.patch       |  73 -------
 29 files changed, 3 insertions(+), 2756 deletions(-)
 delete mode 100644 main/xen/rombios-no-pie.patch
 delete mode 100644 main/xen/xsa226-1.patch
 delete mode 100644 main/xen/xsa226-2.patch
 delete mode 100644 main/xen/xsa227.patch
 delete mode 100644 main/xen/xsa228.patch
 delete mode 100644 main/xen/xsa230.patch
 delete mode 100644 main/xen/xsa231-4.9.patch
 delete mode 100644 main/xen/xsa232.patch
 delete mode 100644 main/xen/xsa233.patch
 delete mode 100644 main/xen/xsa234-4.9.patch
 delete mode 100644 main/xen/xsa235-4.9.patch
 delete mode 100644 main/xen/xsa236-4.9.patch
 delete mode 100644 main/xen/xsa237-1.patch
 delete mode 100644 main/xen/xsa237-2.patch
 delete mode 100644 main/xen/xsa237-3.patch
 delete mode 100644 main/xen/xsa237-4.patch
 delete mode 100644 main/xen/xsa237-5.patch
 delete mode 100644 main/xen/xsa238.patch
 delete mode 100644 main/xen/xsa239.patch
 delete mode 100644 main/xen/xsa240-1.patch
 delete mode 100644 main/xen/xsa240-2.patch
 delete mode 100644 main/xen/xsa241-4.9.patch
 delete mode 100644 main/xen/xsa242-4.9.patch
 delete mode 100644 main/xen/xsa243-1.patch
 delete mode 100644 main/xen/xsa243-2.patch
 delete mode 100644 main/xen/xsa244.patch
 delete mode 100644 main/xen/xsa245-1.patch
 delete mode 100644 main/xen/xsa245-2.patch

diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 19d4fbd78e..e128a57be4 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,8 +2,8 @@
# Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
pkgver=4.9.0
pkgrel=8
pkgver=4.9.1
pkgrel=0
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
arch="x86_64 armhf aarch64"
@@ -145,39 +145,10 @@ source="https://downloads.xenproject.org/release/$pkgname/$pkgver/$pkgname-$pkgv
	http://xenbits.xen.org/xen-extfiles/zlib-$_ZLIB_VERSION.tar.gz
	http://xenbits.xen.org/xen-extfiles/ipxe-git-$_IPXE_GIT_TAG.tar.gz

	xsa226-1.patch
	xsa226-2.patch
	xsa227.patch
	xsa228.patch
	xsa230.patch
	xsa231-4.9.patch
	xsa232.patch
	xsa233.patch
	xsa234-4.9.patch
	xsa235-4.9.patch
	xsa236-4.9.patch
	xsa237-1.patch
	xsa237-2.patch
	xsa237-3.patch
	xsa237-4.patch
	xsa237-5.patch
	xsa238.patch
	xsa239.patch
	xsa240-1.patch
	xsa240-2.patch
	xsa241-4.9.patch
	xsa242-4.9.patch
	xsa243-1.patch
	xsa243-2.patch
	xsa244.patch
	xsa245-1.patch
	xsa245-2.patch

	qemu-coroutine-gthread.patch
	qemu-xen_paths.patch

	hotplug-vif-vtrill.patch
	rombios-no-pie.patch

	musl-support.patch
	musl-hvmloader-fix-stdint.patch
@@ -422,7 +393,7 @@ EOF

}

sha512sums="97f8075c49ef9ec0adbe95106c0cff4f9379578fd568777697565476c3fd948335d72ddcacf8be65fd9db219c0a35dcdc007f355f7e5874dd950fd4c0a0f966f  xen-4.9.0.tar.gz
sha512sums="9d22f0aa5dcd01a1c105d17c14bce570cc597e884ddb9b4a46b80a72f647625b76ae5213cede423d0458c14e1906983595a9269bb6e6ff2e9e7e4dea840f4274  xen-4.9.1.tar.gz
2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf  gmp-4.3.2.tar.bz2
c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb  grub-0.97.tar.gz
1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d  lwip-1.3.0.tar.gz
@@ -432,37 +403,9 @@ c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a36
4928b5b82f57645be9408362706ff2c4d9baa635b21b0d41b1c82930e8c60a759b1ea4fa74d7e6c7cae1b7692d006aa5cb72df0c3b88bf049779aa2b566f9d35  tpm_emulator-0.7.4.tar.gz
021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e  zlib-1.2.3.tar.gz
82ba65e1c676d32b29c71e6395c9506cab952c8f8b03f692e2b50133be8f0c0146d0f22c223262d81a4df579986fde5abc6507869f4965be4846297ef7b4b890  ipxe-git-827dd1bfee67daa683935ce65316f7e0f057fe1c.tar.gz
45fed43bbdcf63fc3ded0a2629e27a5d58306a244dba2e005cf8814aa50cde962c41e5e72075a1d678eb9c18af17e1cbf078884214fd29df0ad551977c9880c2  xsa226-1.patch
4d1e729c592efefd705233b49484991801606b2122a64ff14abbf994bb3e77ec75c4989d43753ce2043cc4fe13d34fb1cef7ee1adb291ff16625bb3b125e5508  xsa226-2.patch
7d66494e833d46f8a213af0f2b107a12617d5e8b45c3b07daee229c75bd6aad98284bc0e19f15706d044b58273cc7f0c193ef8553faa22fadeae349689e763c8  xsa227.patch
d406f14531af707325790909d08ce299ac2f2cb4b87f9a8ddb0fba10bd83bed84cc1633e07632cc2f841c50bc1a9af6240c89539a2e6ba6028cb127e218f86fc  xsa228.patch
df174a1675f74b73e78bc3cb1c9f16536199dfd1922c0cc545a807e92bc24941a816891838258e118f477109548487251a7eaccb2d1dd9b6994c8c76fc5b058f  xsa230.patch
7ef6637112c3d24a3541d40ca79b8d5ed8a152401c8b7bfa4cde3e2e264544ba22d27c199d58f24616af2228f5e4075c2af2744d21a70b8cb4d4b1f1a7feedde  xsa231-4.9.patch
fb742225a4f3dbf2a574c4a6e3ef61a5da0c91aaeed77a2247023bdefcd4e0b6c08f1c9ffb42eaac3d38739c401443c3cf7aebb507b1d779c415b6cbffabbc10  xsa232.patch
a322ac6c5ac2f858a59096108032fd42974eaaeeebd8f4966119149665f32bed281e333e743136e79add2e6f3844d88b6a3e4d5a685c2808702fd3a9e6396cd4  xsa233.patch
cafeef137cd82cefc3e974b42b974c6562e822c9b359efb654ac374e663d9fc123be210eec17b278f40eabb77c93d3bf0ff03e445607159ad0712808a609a906  xsa234-4.9.patch
8bab6e59577b51f0c6b8a547c9a37a257bd0460e7219512e899d25f80a74084745d2a4c54e55ad12526663d40f218cb8f833b71350220d36e3750d002ff43d29  xsa235-4.9.patch
a951c3d29a6b05b42021bd49419becff51123a245256659240a3af5701bbf51e7d3c1a79835a7cc9a5fdf7c1c6aa330a35a586cb56d69d847c256642f0fc8e55  xsa236-4.9.patch
a447b4f0a5379da46b5f0eb5b77eab07c3cfe8d303af6e116e03c7d88a9fc9ea154043165631d29248c07516ab8fdfd5de4da1ccf0ab7358d90fb7f9c87bf221  xsa237-1.patch
10f2d84f783fb8bae5a39c463a32f4ac5d4d2614a7eecf109dcccd5418b8ec5e523691e79b3578d9c7b113f368a94d360acb9534808c440852a91c36369f88fd  xsa237-2.patch
50607fca2e02eed322927e0288c77e7a6c541794fa2c70c78ada0c2fa762b5ad0f3b5108ecb9f01d8826f89dab492d56c502236c70234e6ba741e94a39356ea3  xsa237-3.patch
c8aab545cd4118e74cbb2010f0a844e88608d44bd3117e765221445a8007a0dfa9ee0f4d5c10c3cd7f307a8b7718d896b928c4e06449dc63f7cfec7a10d356bb  xsa237-4.patch
d3dfb9208deb6fc2be949e34fc1be84fbed1269bd99b8b31c176da34cc916c50955c7768b4aac75dbe4377634ae5d8390a03e0ff5ffa2ad00b9fab788449deaa  xsa237-5.patch
b154c0925bbceab40e8f3b689e2d1fb321b42c685fdcb6bd29b0411ccd856731480a2fbb8025c633f9edf34cec938e5d8888cc71e8158212c078bb595d07a29d  xsa238.patch
8b09cd12c7adfef69a02a2965cda22ef6499fd42c8a84a20a6af231f422a6e8a0e597501c327532e1580c1067ee4bf35579e3cf98dee9302ed34ba87f74bf6d2  xsa239.patch
e209e629757b3471eae415913c34c662882172daad634083ee29823c2cb3f00e98886352085c1afc5d0f622781e65fae7b41ebfcbe6fd5e37c428337f5d2506c  xsa240-1.patch
344519cd83ad13245de0e183b7afe564597b30d20756e44f98c0a00df55020d7ef85b92e71701c9791842a48cec93e0fcb9bfba2443313b3aafd8d21ea36abf4  xsa240-2.patch
560d8062b5683b6533a67eebafdd81e6a9d2c9194cc9f9b1404544503238d4f1d98bccb1afac477f6a55ffbc67cf9629a43fd67a745ca9093e2adc0478dd0ddb  xsa241-4.9.patch
86aa763949ca36a36120a40eafbdf3a8e8bc04acd32ee6bc1e3ae90b189b86b9b166b81a9e0a4f86a7eb1fcc8723ae8ba6bd0f84fa9732e7e4e1ccea45d0b7c1  xsa242-4.9.patch
9f269e262aa67ff9a304ed6fc64ee9c5c9f6fd606d520fc2614cd173cddc9735ad42f91a97b91f1b9c5368d54d514820937edd0ce302dc3839b426398dc6b700  xsa243-1.patch
8aaf0599259b1ff34171684467089da4a26af8fe67eedf22066955b34b2460c45abdf0f19a5a5e3dd3231b944674c62b9d3112ad7d765afc4bdbcdcfbad226e1  xsa243-2.patch
0fd35e74be6f049f1f376aa8295b14f57b92f5e45e7487e5b485c2b8f6faa2950d0fe7d8a863692b3dab8a3a7ef1d9dd94be2c6b55d01802b0d86c84d2fa9e29  xsa244.patch
b19197934e8685fc2af73f404b5c8cbed66d9241e5ff902d1a77fdc227e001a13b775a53d6e303d5f27419f5590561c84ec69409152d9773a5e6050c16e92f1b  xsa245-1.patch
75369673232b2107b59dc0242d6fc224c016b9dcbf3299eab90a1d7c365d617fbc91f7b25075b394fee92782db37ce83c416387fa5ad4c4fcd51d0775a8a754f  xsa245-2.patch
c3c46f232f0bd9f767b232af7e8ce910a6166b126bd5427bb8dc325aeb2c634b956de3fc225cab5af72649070c8205cc8e1cab7689fc266c204f525086f1a562  qemu-coroutine-gthread.patch
1936ab39a1867957fa640eb81c4070214ca4856a2743ba7e49c0cd017917071a9680d015f002c57fa7b9600dbadd29dcea5887f50e6c133305df2669a7a933f3  qemu-xen_paths.patch
f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2446729adfdb321e01468e377793f6563a67d68b8b0f7ffe3  hotplug-vif-vtrill.patch
5514d7697c87f7d54d64723d44446b9bd84f6c984e763bd21d4eeaf502bf0c5b765f7b2180f8ca496b3baf97e7efd600b1cc1fdd1284b6ecbffe9846190ca069  rombios-no-pie.patch
e635cf27ca022ca5bc829e089b5e9a3ce9e566d4701d06bc38a22e356de45a71bc33e170d6db333d4efe8389144419cc27834a2eee0bcae9118d4ca9aff64306  musl-support.patch
77b08e9655e091b0352e4630d520b54c6ca6d659d1d38fbb4b3bfc9ff3e66db433a2e194ead32bb10ff962c382d800a670e82b7a62835b238e294b22808290ea  musl-hvmloader-fix-stdint.patch
8c3b57eab8641bcee3dbdc1937ea7874f77b9722a5a0aa3ddb8dff8cc0ced7e19703ef5d998621b3809bea7c16f3346cfa47610ec9ab014ad0de12651c94e5ff  stdint_local.h
diff --git a/main/xen/rombios-no-pie.patch b/main/xen/rombios-no-pie.patch
deleted file mode 100644
index 6387f4ef47..0000000000
--- a/main/xen/rombios-no-pie.patch
@@ -1,22 +0,0 @@
--- ./tools/firmware/rombios/32bit/tcgbios/Makefile.orig
+++ ./tools/firmware/rombios/32bit/tcgbios/Makefile
@@ -3,7 +3,7 @@
 
 TARGET  = tcgbiosext.o
 
-CFLAGS += $(CFLAGS_xeninclude) -I.. -I../.. -I../../../../libacpi
+CFLAGS += $(CFLAGS_xeninclude) -I.. -I../.. -I../../../../libacpi -fno-pie
 
 .PHONY: all
 all: $(TARGET)
--- ./tools/firmware/rombios/32bit/Makefile.orig
+++ ./tools/firmware/rombios/32bit/Makefile
@@ -3,7 +3,7 @@
 
 TARGET = 32bitbios_flat.h
 
-CFLAGS += $(CFLAGS_xeninclude) -I.. -I../../../libacpi
+CFLAGS += $(CFLAGS_xeninclude) -I.. -I../../../libacpi -fno-pie
 
 SUBDIRS = tcgbios
 
diff --git a/main/xen/xsa226-1.patch b/main/xen/xsa226-1.patch
deleted file mode 100644
index d60bbe2db1..0000000000
--- a/main/xen/xsa226-1.patch
@@ -1,149 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: don't use possibly unbounded tail calls

There is no guarantee that the compiler would actually translate them
to branches instead of calls, so only ones with a known recursion limit
are okay:
- __release_grant_for_copy() can call itself only once, as
  __acquire_grant_for_copy() won't permit use of multi-level transitive
  grants,
- __acquire_grant_for_copy() is fine to call itself with the last
  argument false, as that prevents further recursion,
- __acquire_grant_for_copy() must not call itself to recover from an
  observed change to the active entry's pin count

This is part of CVE-2017-12135 / XSA-226.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int cmd,
                 rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n);
             if ( rc > 0 )
             {
-                ASSERT(rc < n);
-                i -= n - rc;
-                n = rc;
+                ASSERT(rc <= n);
+                i -= rc;
+                n -= rc;
             }
             if ( rc >= 0 )
             {
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2103,8 +2103,10 @@ __release_grant_for_copy(
 
     if ( td != rd )
     {
-        /* Recursive calls, but they're tail calls, so it's
-           okay. */
+        /*
+         * Recursive calls, but they're bounded (acquire permits only a single
+         * level of transitivity), so it's okay.
+         */
         if ( released_write )
             __release_grant_for_copy(td, trans_gref, 0);
         else if ( released_read )
@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy(
                 return rc;
             }
 
-            /* We dropped the lock, so we have to check that nobody
-               else tried to pin (or, for that matter, unpin) the
-               reference in *this* domain.  If they did, just give up
-               and try again. */
+            /*
+             * We dropped the lock, so we have to check that nobody else tried
+             * to pin (or, for that matter, unpin) the reference in *this*
+             * domain.  If they did, just give up and tell the caller to retry.
+             */
             if ( act->pin != old_pin )
             {
                 __fixup_status_for_copy_pin(act, status);
@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy(
                 active_entry_release(act);
                 grant_read_unlock(rgt);
                 put_page(*page);
-                return __acquire_grant_for_copy(rd, gref, ldom, readonly,
-                                                frame, page, page_off, length,
-                                                allow_transitive);
+                *page = NULL;
+                return ERESTART;
             }
 
             /* The actual remote remote grant may or may not be a
@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(src);
         rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(dest);
         rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct
     return rc;
 }
 
+/*
+ * gnttab_copy(), other than the various other helpers of
+ * do_grant_table_op(), returns (besides possible error indicators)
+ * "count - i" rather than "i" to ensure that even if no progress
+ * was made at all (perhaps due to gnttab_copy_one() returning a
+ * positive value) a non-zero value is being handed back (zero needs
+ * to be avoided, as that means "success, all done").
+ */
 static long gnttab_copy(
     XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
 {
@@ -2606,7 +2616,7 @@ static long gnttab_copy(
     {
         if ( i && hypercall_preempt_check() )
         {
-            rc = i;
+            rc = count - i;
             break;
         }
 
@@ -2616,13 +2626,20 @@ static long gnttab_copy(
             break;
         }
 
-        op.status = gnttab_copy_one(&op, &dest, &src);
-        if ( op.status != GNTST_okay )
+        rc = gnttab_copy_one(&op, &dest, &src);
+        if ( rc > 0 )
+        {
+            rc = count - i;
+            break;
+        }
+        if ( rc != GNTST_okay )
         {
             gnttab_copy_release_buf(&src);
             gnttab_copy_release_buf(&dest);
         }
 
+        op.status = rc;
+        rc = 0;
         if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
         {
             rc = -EFAULT;
@@ -3160,6 +3177,7 @@ do_grant_table_op(
         rc = gnttab_copy(copy, count);
         if ( rc > 0 )
         {
+            rc = count - rc;
             guest_handle_add_offset(copy, rc);
             uop = guest_handle_cast(copy, void);
         }
diff --git a/main/xen/xsa226-2.patch b/main/xen/xsa226-2.patch
deleted file mode 100644
index 2cf93bdd29..0000000000
--- a/main/xen/xsa226-2.patch
@@ -1,280 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: fix transitive grant handling

Processing of transitive grants must not use the fast path, or else
reference counting breaks due to the skipped recursive call to
__acquire_grant_for_copy() (its __release_grant_for_copy()
counterpart occurs independent of original pin count). Furthermore
after re-acquiring temporarily dropped locks we need to verify no grant
properties changed if the original pin count was non-zero; checking
just the pin counts is sufficient only for well-behaved guests. As a
result, __release_grant_for_copy() needs to mirror that new behavior.

Furthermore a __release_grant_for_copy() invocation was missing on the
retry path of __acquire_grant_for_copy(), and gnttab_set_version() also
needs to bail out upon encountering a transitive grant.

This is part of CVE-2017-12135 / XSA-226.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2050,13 +2050,8 @@ __release_grant_for_copy(
     unsigned long r_frame;
     uint16_t *status;
     grant_ref_t trans_gref;
-    int released_read;
-    int released_write;
     struct domain *td;
 
-    released_read = 0;
-    released_write = 0;
--
     grant_read_lock(rgt);
 
     act = active_entry_acquire(rgt, gref);
@@ -2086,17 +2081,11 @@ __release_grant_for_copy(
 
         act->pin -= GNTPIN_hstw_inc;
         if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
-        {
-            released_write = 1;
             gnttab_clear_flag(_GTF_writing, status);
-        }
     }
 
     if ( !act->pin )
-    {
         gnttab_clear_flag(_GTF_reading, status);
-        released_read = 1;
-    }
 
     active_entry_release(act);
     grant_read_unlock(rgt);
@@ -2104,13 +2093,10 @@ __release_grant_for_copy(
     if ( td != rd )
     {
         /*
-         * Recursive calls, but they're bounded (acquire permits only a single
+         * Recursive call, but it is bounded (acquire permits only a single
          * level of transitivity), so it's okay.
          */
-        if ( released_write )
-            __release_grant_for_copy(td, trans_gref, 0);
-        else if ( released_read )
-            __release_grant_for_copy(td, trans_gref, 1);
+        __release_grant_for_copy(td, trans_gref, readonly);
 
         rcu_unlock_domain(td);
     }
@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy(
                  act->domid, ldom, act->pin);
 
     old_pin = act->pin;
-    if ( !act->pin ||
-         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+    if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
+    {
+        if ( (!old_pin || (!readonly &&
+                           !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) &&
+             (rc = _set_status_v2(ldom, readonly, 0, shah, act,
+                                  status)) != GNTST_okay )
+            goto unlock_out;
+
+        if ( !allow_transitive )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant when transitivity not allowed\n");
+
+        trans_domid = sha2->transitive.trans_domid;
+        trans_gref = sha2->transitive.gref;
+        barrier(); /* Stop the compiler from re-loading
+                      trans_domid from shared memory */
+        if ( trans_domid == rd->domain_id )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grants cannot be self-referential\n");
+
+        /*
+         * We allow the trans_domid == ldom case, which corresponds to a
+         * grant being issued by one domain, sent to another one, and then
+         * transitively granted back to the original domain.  Allowing it
+         * is easy, and means that you don't need to go out of your way to
+         * avoid it in the guest.
+         */
+
+        /* We need to leave the rrd locked during the grant copy. */
+        td = rcu_lock_domain_by_id(trans_domid);
+        if ( td == NULL )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant referenced bad domain %d\n",
+                     trans_domid);
+
+        /*
+         * __acquire_grant_for_copy() could take the lock on the
+         * remote table (if rd == td), so we have to drop the lock
+         * here and reacquire.
+         */
+        active_entry_release(act);
+        grant_read_unlock(rgt);
+
+        rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+                                      readonly, &grant_frame, page,
+                                      &trans_page_off, &trans_length, 0);
+
+        grant_read_lock(rgt);
+        act = active_entry_acquire(rgt, gref);
+
+        if ( rc != GNTST_okay )
+        {
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            return rc;
+        }
+
+        /*
+         * We dropped the lock, so we have to check that the grant didn't
+         * change, and that nobody else tried to pin/unpin it. If anything
+         * changed, just give up and tell the caller to retry.
+         */
+        if ( rgt->gt_version != 2 ||
+             act->pin != old_pin ||
+             (old_pin && (act->domid != ldom || act->frame != grant_frame ||
+                          act->start != trans_page_off ||
+                          act->length != trans_length ||
+                          act->trans_domain != td ||
+                          act->trans_gref != trans_gref ||
+                          !act->is_sub_page)) )
+        {
+            __release_grant_for_copy(td, trans_gref, readonly);
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            put_page(*page);
+            *page = NULL;
+            return ERESTART;
+        }
+
+        if ( !old_pin )
+        {
+            act->domid = ldom;
+            act->start = trans_page_off;
+            act->length = trans_length;
+            act->trans_domain = td;
+            act->trans_gref = trans_gref;
+            act->frame = grant_frame;
+            act->gfn = -1ul;
+            /*
+             * The actual remote remote grant may or may not be a sub-page,
+             * but we always treat it as one because that blocks mappings of
+             * transitive grants.
+             */
+            act->is_sub_page = 1;
+        }
+    }
+    else if ( !old_pin ||
+              (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
     {
         if ( (rc = _set_status(rgt->gt_version, ldom,
                                readonly, 0, shah, act,
@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy(
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
         }
-        else if ( (shah->flags & GTF_type_mask) == GTF_transitive )
-        {
-            if ( !allow_transitive )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant when transitivity not allowed\n");
--
-            trans_domid = sha2->transitive.trans_domid;
-            trans_gref = sha2->transitive.gref;
-            barrier(); /* Stop the compiler from re-loading
-                          trans_domid from shared memory */
-            if ( trans_domid == rd->domain_id )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grants cannot be self-referential\n");
--
-            /* We allow the trans_domid == ldom case, which
-               corresponds to a grant being issued by one domain, sent
-               to another one, and then transitively granted back to
-               the original domain.  Allowing it is easy, and means
-               that you don't need to go out of your way to avoid it
-               in the guest. */
--
-            /* We need to leave the rrd locked during the grant copy */
-            td = rcu_lock_domain_by_id(trans_domid);
-            if ( td == NULL )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant referenced bad domain %d\n",
-                         trans_domid);
--
-            /*
-             * __acquire_grant_for_copy() could take the lock on the
-             * remote table (if rd == td), so we have to drop the lock
-             * here and reacquire
-             */
-            active_entry_release(act);
-            grant_read_unlock(rgt);
--
-            rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
-                                          readonly, &grant_frame, page,
-                                          &trans_page_off, &trans_length, 0);
--
-            grant_read_lock(rgt);
-            act = active_entry_acquire(rgt, gref);
--
-            if ( rc != GNTST_okay ) {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                return rc;
-            }
--
-            /*
-             * We dropped the lock, so we have to check that nobody else tried
-             * to pin (or, for that matter, unpin) the reference in *this*
-             * domain.  If they did, just give up and tell the caller to retry.
-             */
-            if ( act->pin != old_pin )
-            {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                put_page(*page);
-                *page = NULL;
-                return ERESTART;
-            }
--
-            /* The actual remote remote grant may or may not be a
-               sub-page, but we always treat it as one because that
-               blocks mappings of transitive grants. */
-            is_sub_page = 1;
-            act->gfn = -1ul;
-        }
         else if ( !(sha2->hdr.flags & GTF_sub_page) )
         {
             rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd);
@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     case 2:
         for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
         {
-            if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) ==
-                  GTF_permit_access) &&
-                 (shared_entry_v2(gt, i).full_page.frame >> 32) )
+            switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
             {
+            case GTF_permit_access:
+                 if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
+                     break;
+                 /* fall through */
+            case GTF_transitive:
                 gdprintk(XENLOG_WARNING,
                          "tried to change grant table version to 1 with non-representable entries\n");
                 res = -ERANGE;
diff --git a/main/xen/xsa227.patch b/main/xen/xsa227.patch
deleted file mode 100644
index 86aa41e2d4..0000000000
--- a/main/xen/xsa227.patch
@@ -1,52 +0,0 @@
From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 20 Jun 2017 19:18:54 +0100
Subject: [PATCH] x86/grant: Disallow misaligned PTEs

Pagetable entries must be aligned to function correctly.  Disallow attempts
from the guest to have a grant PTE created at a misaligned address, which
would result in corruption of the L1 table with largely-guest-controlled
values.

This is XSA-227

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/mm.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 97b3b4b..00f517a 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping(
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
+    if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) )
+        return GNTST_general_error;
+
     adjust_guest_l1e(nl1e, d);
 
     gmfn = pte_addr >> PAGE_SHIFT;
@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping(
     struct page_info *page;
     l1_pgentry_t ol1e;
 
+    /*
+     * addr comes from Xen's active_entry tracking so isn't guest controlled,
+     * but it had still better be PTE-aligned.
+     */
+    if ( !IS_ALIGNED(addr, sizeof(ol1e)) )
+    {
+        ASSERT_UNREACHABLE();
+        return GNTST_general_error;
+    }
+
     gmfn = addr >> PAGE_SHIFT;
     page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
 
--- 
2.1.4

diff --git a/main/xen/xsa228.patch b/main/xen/xsa228.patch
deleted file mode 100644
index 65add3a588..0000000000
--- a/main/xen/xsa228.patch
@@ -1,198 +0,0 @@
From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 31 Jul 2017 15:17:56 +0100
Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose
 again

The way the lock is currently being used in get_maptrack_handle(), it
protects only the maptrack limit: The function acts on current's list
only, so races on list accesses are impossible even without the lock.

Otoh list access races are possible between __get_maptrack_handle() and
put_maptrack_handle(), due to the invocation of the former for other
than current from steal_maptrack_handle(). Introduce a per-vCPU lock
for list accesses to become race free again. This lock will be
uncontended except when it becomes necessary to take the steal path,
i.e. in the common case there should be no meaningful performance
impact.

When in get_maptrack_handle adds a stolen entry to a fresh, empty,
freelist, we think that there is probably no concurrency.  However,
this is not a fast path and adding the locking there makes the code
clearly correct.

Also, while we are here: the stolen maptrack_entry's tail pointer was
not properly set.  Set it.

This is XSA-228.

Reported-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
---
 docs/misc/grant-tables.txt    |  7 ++++++-
 xen/common/grant_table.c      | 30 ++++++++++++++++++++++++------
 xen/include/xen/grant_table.h |  2 +-
 xen/include/xen/sched.h       |  1 +
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
index 417ce2d..64da5cf 100644
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -87,7 +87,8 @@ is complete.
                                inconsistent grant table state such as current
                                version, partially initialized active table pages,
                                etc.
-  grant_table->maptrack_lock : spinlock used to protect the maptrack free list
+  grant_table->maptrack_lock : spinlock used to protect the maptrack limit
+  v->maptrack_freelist_lock  : spinlock used to protect the maptrack free list
   active_grant_entry->lock   : spinlock used to serialize modifications to
                                active entries
 
@@ -102,6 +103,10 @@ is complete.
  The maptrack free list is protected by its own spinlock. The maptrack
  lock may be locked while holding the grant table lock.
 
+ The maptrack_freelist_lock is an innermost lock.  It may be locked
+ while holding other locks, but no other locks may be acquired within
+ it.
+
  Active entries are obtained by calling active_entry_acquire(gt, ref).
  This function returns a pointer to the active entry after locking its
  spinlock. The caller must hold the grant table read lock before
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index ae34547..ee33bd8 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -304,11 +304,16 @@ __get_maptrack_handle(
 {
     unsigned int head, next, prev_head;
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     do {
         /* No maptrack pages allocated for this VCPU yet? */
         head = read_atomic(&v->maptrack_head);
         if ( unlikely(head == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         /*
          * Always keep one entry in the free list to make it easier to
@@ -316,12 +321,17 @@ __get_maptrack_handle(
          */
         next = read_atomic(&maptrack_entry(t, head).ref);
         if ( unlikely(next == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         prev_head = head;
         head = cmpxchg(&v->maptrack_head, prev_head, next);
     } while ( head != prev_head );
 
+    spin_unlock(&v->maptrack_freelist_lock);
+
     return head;
 }
 
@@ -380,6 +390,8 @@ put_maptrack_handle(
     /* 2. Add entry to the tail of the list on the original VCPU. */
     v = currd->vcpu[maptrack_entry(t, handle).vcpu];
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     cur_tail = read_atomic(&v->maptrack_tail);
     do {
         prev_tail = cur_tail;
@@ -388,6 +400,8 @@ put_maptrack_handle(
 
     /* 3. Update the old tail entry to point to the new entry. */
     write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
+
+    spin_unlock(&v->maptrack_freelist_lock);
 }
 
 static inline int
@@ -411,10 +425,6 @@ get_maptrack_handle(
      */
     if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
     {
-        /*
-         * Can drop the lock since no other VCPU can be adding a new
-         * frame once they've run out.
-         */
         spin_unlock(&lgt->maptrack_lock);
 
         /*
@@ -426,8 +436,12 @@ get_maptrack_handle(
             handle = steal_maptrack_handle(lgt, curr);
             if ( handle == -1 )
                 return -1;
+            spin_lock(&curr->maptrack_freelist_lock);
+            maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL;
             curr->maptrack_tail = handle;
-            write_atomic(&curr->maptrack_head, handle);
+            if ( curr->maptrack_head == MAPTRACK_TAIL )
+                write_atomic(&curr->maptrack_head, handle);
+            spin_unlock(&curr->maptrack_freelist_lock);
         }
         return steal_maptrack_handle(lgt, curr);
     }
@@ -460,12 +474,15 @@ get_maptrack_handle(
     smp_wmb();
     lgt->maptrack_limit += MAPTRACK_PER_PAGE;
 
+    spin_unlock(&lgt->maptrack_lock);
+    spin_lock(&curr->maptrack_freelist_lock);
+
     do {
         new_mt[i - 1].ref = read_atomic(&curr->maptrack_head);
         head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1);
     } while ( head != new_mt[i - 1].ref );
 
-    spin_unlock(&lgt->maptrack_lock);
+    spin_unlock(&curr->maptrack_freelist_lock);
 
     return handle;
 }
@@ -3475,6 +3492,7 @@ grant_table_destroy(
 
 void grant_table_init_vcpu(struct vcpu *v)
 {
+    spin_lock_init(&v->maptrack_freelist_lock);
     v->maptrack_head = MAPTRACK_TAIL;
     v->maptrack_tail = MAPTRACK_TAIL;
 }
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 4e77899..100f2b3 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -78,7 +78,7 @@ struct grant_table {
     /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
     unsigned int          maptrack_limit;
-    /* Lock protecting the maptrack page list, head, and limit */
+    /* Lock protecting the maptrack limit */
     spinlock_t            maptrack_lock;
     /* The defined versions are 1 and 2.  Set to 0 if we don't know
        what version to use yet. */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 6673b27..8690f29 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -230,6 +230,7 @@ struct vcpu
     int              controller_pause_count;
 
     /* Grant table map tracking. */
+    spinlock_t       maptrack_freelist_lock;
     unsigned int     maptrack_head;
     unsigned int     maptrack_tail;
 
--- 
2.1.4

diff --git a/main/xen/xsa230.patch b/main/xen/xsa230.patch
deleted file mode 100644
index c3b50c8aaa..0000000000
--- a/main/xen/xsa230.patch
@@ -1,38 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: correct pin status fixup for copy

Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev*
also need to be taken into account when deciding whether to clear
_GTF_{read,writ}ing. At least for consistency with code elsewhere the
read part better doesn't use any mask at all.

This is XSA-230.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index ae34547..9c9d33c 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2107,10 +2107,10 @@ __release_grant_for_copy(
 static void __fixup_status_for_copy_pin(const struct active_grant_entry *act,
                                    uint16_t *status)
 {
-    if ( !(act->pin & GNTPIN_hstw_mask) )
+    if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
-    if ( !(act->pin & GNTPIN_hstr_mask) )
+    if ( !act->pin )
         gnttab_clear_flag(_GTF_reading, status);
 }
 
@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy(
  
  unlock_out_clear:
     if ( !(readonly) &&
-         !(act->pin & GNTPIN_hstw_mask) )
+         !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
     if ( !act->pin )
diff --git a/main/xen/xsa231-4.9.patch b/main/xen/xsa231-4.9.patch
deleted file mode 100644
index 251165e6bd..0000000000
--- a/main/xen/xsa231-4.9.patch
@@ -1,108 +0,0 @@
From: George Dunlap <george.dunlap@citrix.com>
Subject: xen/mm: make sure node is less than MAX_NUMNODES

The output of MEMF_get_node(memflags) can be as large as nodeid_t can
hold (currently 255).  This is then used as an index to arrays of size
MAX_NUMNODE, which is 64 on x86 and 1 on ARM, can be passed in by an
untrusted guest (via memory_exchange and increase_reservation) and is
not currently bounds-checked.

Check the value in page_alloc.c before using it, and also check the
value in the hypercall call sites and return -EINVAL if appropriate.
Don't permit domains other than the hardware or control domain to
allocate node-constrained memory.

This is XSA-231.

Reported-by: Matthew Daley <mattd@bugfuzz.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -411,6 +411,31 @@ static void decrease_reservation(struct
     a->nr_done = i;
 }
 
+static bool propagate_node(unsigned int xmf, unsigned int *memflags)
+{
+    const struct domain *currd = current->domain;
+
+    BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE);
+    BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE);
+
+    if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE )
+        return true;
+
+    if ( is_hardware_domain(currd) || is_control_domain(currd) )
+    {
+        if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES )
+            return false;
+
+        *memflags |= MEMF_node(XENMEMF_get_node(xmf));
+        if ( xmf & XENMEMF_exact_node_request )
+            *memflags |= MEMF_exact_node;
+    }
+    else if ( xmf & XENMEMF_exact_node_request )
+        return false;
+
+    return true;
+}
+
 static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
 {
     struct xen_memory_exchange exch;
@@ -483,6 +508,12 @@ static long memory_exchange(XEN_GUEST_HA
         }
     }
 
+    if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) )
+    {
+        rc = -EINVAL;
+        goto fail_early;
+    }
+
     d = rcu_lock_domain_by_any_id(exch.in.domid);
     if ( d == NULL )
     {
@@ -501,7 +532,6 @@ static long memory_exchange(XEN_GUEST_HA
         d,
         XENMEMF_get_address_bits(exch.out.mem_flags) ? :
         (BITS_PER_LONG+PAGE_SHIFT)));
-    memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags));
 
     for ( i = (exch.nr_exchanged >> in_chunk_order);
           i < (exch.in.nr_extents >> in_chunk_order);
@@ -864,12 +894,8 @@ static int construct_memop_from_reservat
         }
         read_unlock(&d->vnuma_rwlock);
     }
-    else
-    {
-        a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags));
-        if ( r->mem_flags & XENMEMF_exact_node_request )
-            a->memflags |= MEMF_exact_node;
-    }
+    else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) )
+        return -EINVAL;
 
     return 0;
 }
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -706,9 +706,13 @@ static struct page_info *alloc_heap_page
         if ( node >= MAX_NUMNODES )
             node = cpu_to_node(smp_processor_id());
     }
+    else if ( unlikely(node >= MAX_NUMNODES) )
+    {
+        ASSERT_UNREACHABLE();
+        return NULL;
+    }
     first_node = node;
 
-    ASSERT(node < MAX_NUMNODES);
     ASSERT(zone_lo <= zone_hi);
     ASSERT(zone_hi < NR_ZONES);
 
diff --git a/main/xen/xsa232.patch b/main/xen/xsa232.patch
deleted file mode 100644
index 9e5f35c7d6..0000000000
--- a/main/xen/xsa232.patch
@@ -1,23 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: grant_table: fix GNTTABOP_cache_flush handling

Don't fall over a NULL grant_table pointer when the owner of the domain
is a system domain (DOMID_{XEN,IO} etc).

This is XSA-232.

Reported-by: Matthew Daley <mattd@bugfuzz.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -3053,7 +3053,7 @@ static int cache_flush(gnttab_cache_flus
 
     page = mfn_to_page(mfn);
     owner = page_get_owner_and_reference(page);
-    if ( !owner )
+    if ( !owner || !owner->grant_table )
     {
         rcu_unlock_domain(d);
         return -EPERM;
diff --git a/main/xen/xsa233.patch b/main/xen/xsa233.patch
deleted file mode 100644
index 6013c52b41..0000000000
--- a/main/xen/xsa233.patch
@@ -1,52 +0,0 @@
From: Juergen Gross <jgross@suse.com>
Subject: tools/xenstore: dont unlink connection object twice

A connection object of a domain with associated stubdom has two
parents: the domain and the stubdom. When cleaning up the list of
active domains in domain_cleanup() make sure not to unlink the
connection twice from the same domain. This could happen when the
domain and its stubdom are being destroyed at the same time leading
to the domain loop being entered twice.

Additionally don't use talloc_free() in this case as it will remove
a random parent link, leading eventually to a memory leak. Use
talloc_unlink() instead specifying the context from which the
connection object should be removed.

This is XSA-233.

Reported-by: Eric Chanudet <chanudete@ainfosec.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com>

--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -221,10 +221,11 @@ static int destroy_domain(void *_domain)
 static void domain_cleanup(void)
 {
 	xc_dominfo_t dominfo;
-	struct domain *domain, *tmp;
+	struct domain *domain;
 	int notify = 0;
 
-	list_for_each_entry_safe(domain, tmp, &domains, list) {
+ again:
+	list_for_each_entry(domain, &domains, list) {
 		if (xc_domain_getinfo(*xc_handle, domain->domid, 1,
 				      &dominfo) == 1 &&
 		    dominfo.domid == domain->domid) {
@@ -236,8 +237,12 @@ static void domain_cleanup(void)
 			if (!dominfo.dying)
 				continue;
 		}
-		talloc_free(domain->conn);
-		notify = 0; /* destroy_domain() fires the watch */
+		if (domain->conn) {
+			talloc_unlink(talloc_autofree_context(), domain->conn);
+			domain->conn = NULL;
+			notify = 0; /* destroy_domain() fires the watch */
+			goto again;
+		}
 	}
 
 	if (notify)
diff --git a/main/xen/xsa234-4.9.patch b/main/xen/xsa234-4.9.patch
deleted file mode 100644
index 8dbf401720..0000000000
--- a/main/xen/xsa234-4.9.patch
@@ -1,192 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: also validate PTE permissions upon destroy/replace

In order for PTE handling to match up with the reference counting done
by common code, presence and writability of grant mapping PTEs must
also be taken into account; validating just the frame number is not
enough. This is in particular relevant if a guest fiddles with grant
PTEs via non-grant hypercalls.

Note that the flags being passed to replace_grant_host_mapping()
already happen to be those of the existing mapping, so no new function
parameter is needed.

This is XSA-234.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4058,7 +4058,8 @@ static int create_grant_pte_mapping(
 }
 
 static int destroy_grant_pte_mapping(
-    uint64_t addr, unsigned long frame, struct domain *d)
+    uint64_t addr, unsigned long frame, unsigned int grant_pte_flags,
+    struct domain *d)
 {
     int rc = GNTST_okay;
     void *va;
@@ -4104,17 +4105,29 @@ static int destroy_grant_pte_mapping(
 
     ol1e = *(l1_pgentry_t *)va;
     
-    /* Check that the virtual address supplied is actually mapped to frame. */
-    if ( unlikely(l1e_get_pfn(ol1e) != frame) )
+    /*
+     * Check that the PTE supplied actually maps frame (with appropriate
+     * permissions).
+     */
+    if ( unlikely(l1e_get_pfn(ol1e) != frame) ||
+         unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) &
+                  (_PAGE_PRESENT | _PAGE_RW)) )
     {
         page_unlock(page);
-        gdprintk(XENLOG_WARNING,
-                 "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n",
-                 l1e_get_intpte(ol1e), addr, frame);
+        gdprintk(XENLOG_ERR,
+                 "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n",
+                 l1e_get_intpte(ol1e), addr,
+                 l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags)));
         rc = GNTST_general_error;
         goto failed;
     }
 
+    if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) &
+                  ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) )
+        gdprintk(XENLOG_WARNING,
+                 "PTE flags %x at %"PRIx64" don't match grant (%x)\n",
+                 l1e_get_flags(ol1e), addr, grant_pte_flags);
+
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY
                   (l1, 
@@ -4123,7 +4136,8 @@ static int destroy_grant_pte_mapping(
                    0)) )
     {
         page_unlock(page);
-        gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va);
+        gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n",
+                 addr);
         rc = GNTST_general_error;
         goto failed;
     }
@@ -4191,7 +4205,8 @@ static int create_grant_va_mapping(
 }
 
 static int replace_grant_va_mapping(
-    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v)
+    unsigned long addr, unsigned long frame, unsigned int grant_pte_flags,
+    l1_pgentry_t nl1e, struct vcpu *v)
 {
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
@@ -4227,20 +4242,33 @@ static int replace_grant_va_mapping(
 
     ol1e = *pl1e;
 
-    /* Check that the virtual address supplied is actually mapped to frame. */
-    if ( unlikely(l1e_get_pfn(ol1e) != frame) )
-    {
-        gdprintk(XENLOG_WARNING,
-                 "PTE entry %lx for address %lx doesn't match frame %lx\n",
-                 l1e_get_pfn(ol1e), addr, frame);
+    /*
+     * Check that the virtual address supplied is actually mapped to frame
+     * (with appropriate permissions).
+     */
+    if ( unlikely(l1e_get_pfn(ol1e) != frame) ||
+         unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) &
+                  (_PAGE_PRESENT | _PAGE_RW)) )
+    {
+        gdprintk(XENLOG_ERR,
+                 "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n",
+                 l1e_get_intpte(ol1e), addr,
+                 l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags)));
         rc = GNTST_general_error;
         goto unlock_and_out;
     }
 
+    if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) &
+                  ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) )
+        gdprintk(XENLOG_WARNING,
+                 "PTE flags %x for %"PRIx64" don't match grant (%x)\n",
+                 l1e_get_flags(ol1e), addr, grant_pte_flags);
+
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
     {
-        gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e);
+        gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n",
+                 addr);
         rc = GNTST_general_error;
         goto unlock_and_out;
     }
@@ -4254,9 +4282,11 @@ static int replace_grant_va_mapping(
 }
 
 static int destroy_grant_va_mapping(
-    unsigned long addr, unsigned long frame, struct vcpu *v)
+    unsigned long addr, unsigned long frame, unsigned int grant_pte_flags,
+    struct vcpu *v)
 {
-    return replace_grant_va_mapping(addr, frame, l1e_empty(), v);
+    return replace_grant_va_mapping(addr, frame, grant_pte_flags,
+                                    l1e_empty(), v);
 }
 
 static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame,
@@ -4351,20 +4381,39 @@ int replace_grant_host_mapping(
     unsigned long gl1mfn;
     struct page_info *l1pg;
     int rc;
+    unsigned int grant_pte_flags;
     
     if ( paging_mode_external(current->domain) )
         return replace_grant_p2m_mapping(addr, frame, new_addr, flags);
 
+    grant_pte_flags =
+        _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX;
+
+    if ( flags & GNTMAP_application_map )
+        grant_pte_flags |= _PAGE_USER;
+    if ( !(flags & GNTMAP_readonly) )
+        grant_pte_flags |= _PAGE_RW;
+    /*
+     * On top of the explicit settings done by create_grant_host_mapping()
+     * also open-code relevant parts of adjust_guest_l1e(). Don't mirror
+     * available and cachability flags, though.
+     */
+    if ( !is_pv_32bit_domain(curr->domain) )
+        grant_pte_flags |= (grant_pte_flags & _PAGE_USER)
+                           ? _PAGE_GLOBAL
+                           : _PAGE_GUEST_KERNEL | _PAGE_USER;
+
     if ( flags & GNTMAP_contains_pte )
     {
         if ( !new_addr )
-            return destroy_grant_pte_mapping(addr, frame, curr->domain);
+            return destroy_grant_pte_mapping(addr, frame, grant_pte_flags,
+                                             curr->domain);
         
         return GNTST_general_error;
     }
 
     if ( !new_addr )
-        return destroy_grant_va_mapping(addr, frame, curr);
+        return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr);
 
     pl1e = guest_map_l1e(new_addr, &gl1mfn);
     if ( !pl1e )
@@ -4412,7 +4461,7 @@ int replace_grant_host_mapping(
     put_page(l1pg);
     guest_unmap_l1e(pl1e);
 
-    rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
+    rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr);
     if ( rc && !paging_mode_refcounts(curr->domain) )
         put_page_from_l1e(ol1e, curr->domain);
 
diff --git a/main/xen/xsa235-4.9.patch b/main/xen/xsa235-4.9.patch
deleted file mode 100644
index 25dd650755..0000000000
--- a/main/xen/xsa235-4.9.patch
@@ -1,49 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: arm/mm: release grant lock on xenmem_add_to_physmap_one() error paths

Commit 55021ff9ab ("xen/arm: add_to_physmap_one: Avoid to map mfn 0 if
an error occurs") introduced error paths not releasing the grant table
lock. Replace them by a suitable check after the lock was dropped.

This is XSA-235.

Reported-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>

--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1164,7 +1164,7 @@ int xenmem_add_to_physmap_one(
             if ( idx < nr_status_frames(d->grant_table) )
                 mfn = virt_to_mfn(d->grant_table->status[idx]);
             else
-                return -EINVAL;
+                mfn = mfn_x(INVALID_MFN);
         }
         else
         {
@@ -1175,14 +1175,21 @@ int xenmem_add_to_physmap_one(
             if ( idx < nr_grant_frames(d->grant_table) )
                 mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
             else
-                return -EINVAL;
+                mfn = mfn_x(INVALID_MFN);
         }
 
-        d->arch.grant_table_gfn[idx] = gfn;
+        if ( mfn != mfn_x(INVALID_MFN) )
+        {
+            d->arch.grant_table_gfn[idx] = gfn;
 
-        t = p2m_ram_rw;
+            t = p2m_ram_rw;
+        }
 
         grant_write_unlock(d->grant_table);
+
+        if ( mfn == mfn_x(INVALID_MFN) )
+            return -EINVAL;
+
         break;
     case XENMAPSPACE_shared_info:
         if ( idx != 0 )
diff --git a/main/xen/xsa236-4.9.patch b/main/xen/xsa236-4.9.patch
deleted file mode 100644
index 203025dbae..0000000000
--- a/main/xen/xsa236-4.9.patch
@@ -1,66 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: fix pin count / page reference race

Dropping page references before decrementing pin counts is a bad idea
if assumptions are being made that a non-zero pin count implies a valid
page. Fix the order of operations in gnttab_copy_release_buf(), but at
the same time also remove the assertion that was found to trigger:
map_grant_ref() also has the potential of causing a race here, and
changing the order of operations there would likely be quite a bit more
involved.

This is XSA-236.

Reported-by: Pawel Wieczorkiewicz <wipawel@amazon.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2330,9 +2330,20 @@ __acquire_grant_for_copy(
         td = page_get_owner_and_reference(*page);
         /*
          * act->pin being non-zero should guarantee the page to have a
-         * non-zero refcount and hence a valid owner.
+         * non-zero refcount and hence a valid owner (matching the one on
+         * record), with one exception: If the owning domain is dying we
+         * had better not make implications from pin count (map_grant_ref()
+         * updates pin counts before obtaining page references, for
+         * example).
          */
-        ASSERT(td);
+        if ( td != rd || rd->is_dying )
+        {
+            if ( td )
+                put_page(*page);
+            *page = NULL;
+            rc = GNTST_bad_domain;
+            goto unlock_out_clear;
+        }
     }
 
     act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
@@ -2451,6 +2462,11 @@ static void gnttab_copy_release_buf(stru
         unmap_domain_page(buf->virt);
         buf->virt = NULL;
     }
+    if ( buf->have_grant )
+    {
+        __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only);
+        buf->have_grant = 0;
+    }
     if ( buf->have_type )
     {
         put_page_type(buf->page);
@@ -2461,11 +2477,6 @@ static void gnttab_copy_release_buf(stru
         put_page(buf->page);
         buf->page = NULL;
     }
-    if ( buf->have_grant )
-    {
-        __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only);
-        buf->have_grant = 0;
-    }
 }
 
 static int gnttab_copy_claim_buf(const struct gnttab_copy *op,
diff --git a/main/xen/xsa237-1.patch b/main/xen/xsa237-1.patch
deleted file mode 100644
index 7c9dff9672..0000000000
--- a/main/xen/xsa237-1.patch
@@ -1,27 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: don't allow MSI pIRQ mapping on unowned device

MSI setup should be permitted only for existing devices owned by the
respective guest (the operation may still be carried out by the domain
controlling that guest).

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1963,7 +1963,10 @@ int map_domain_pirq(
         if ( !cpu_has_apic )
             goto done;
 
-        pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
+        pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
+        if ( !pdev )
+            goto done;
+
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
         {
diff --git a/main/xen/xsa237-2.patch b/main/xen/xsa237-2.patch
deleted file mode 100644
index 0add704587..0000000000
--- a/main/xen/xsa237-2.patch
@@ -1,66 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: enforce proper privilege when (un)mapping pIRQ-s

(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE*
actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET.
This in turn requires bypassing the XSM check in physdev_unmap_pirq()
for the HVM emuirq case just like is being done in physdev_map_pirq().
The primary goal security wise, however, is to no longer allow HVM
guests, by specifying their own domain ID instead of DOMID_SELF, to
enter code paths intended for PV guest and the control domains of HVM
guests only.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -111,7 +111,7 @@ int physdev_map_pirq(domid_t domid, int
     if ( d == NULL )
         return -ESRCH;
 
-    ret = xsm_map_domain_pirq(XSM_TARGET, d);
+    ret = xsm_map_domain_pirq(XSM_DM_PRIV, d);
     if ( ret )
         goto free_domain;
 
@@ -256,13 +256,14 @@ int physdev_map_pirq(domid_t domid, int
 int physdev_unmap_pirq(domid_t domid, int pirq)
 {
     struct domain *d;
-    int ret;
+    int ret = 0;
 
     d = rcu_lock_domain_by_any_id(domid);
     if ( d == NULL )
         return -ESRCH;
 
-    ret = xsm_unmap_domain_pirq(XSM_TARGET, d);
+    if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) )
+        ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d);
     if ( ret )
         goto free_domain;
 
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid
 
 static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
 {
-    XSM_ASSERT_ACTION(XSM_TARGET);
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
 
@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq
 
 static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
 {
-    XSM_ASSERT_ACTION(XSM_TARGET);
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
 
diff --git a/main/xen/xsa237-3.patch b/main/xen/xsa237-3.patch
deleted file mode 100644
index 5c69c48265..0000000000
--- a/main/xen/xsa237-3.patch
@@ -1,55 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/MSI: disallow redundant enabling

At the moment, Xen attempts to allow redundant enabling of MSI by
having pci_enable_msi() return 0, and point to the existing MSI
descriptor, when the msi already exists.

Unfortunately, if subsequent errors are encountered, the cleanup
paths assume pci_enable_msi() had done full initialization, and
hence undo everything that was assumed to be done by that
function without also undoing other setup that would normally
occur only after that function was called (in map_domain_pirq()
itself).

Rather than try to make the redundant enabling case work properly, just
forbid it entirely by having pci_enable_msi() return -EEXIST when MSI
is already set up.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i
     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
     if ( old_desc )
     {
-        printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
+        printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
                msi->irq, msi->seg, msi->bus,
                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
-        *desc = old_desc;
-        return 0;
+        return -EEXIST;
     }
 
     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_
     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
     if ( old_desc )
     {
-        printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
+        printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
                msi->irq, msi->seg, msi->bus,
                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
-        *desc = old_desc;
-        return 0;
+        return -EEXIST;
     }
 
     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
diff --git a/main/xen/xsa237-4.patch b/main/xen/xsa237-4.patch
deleted file mode 100644
index a16ec1bba1..0000000000
--- a/main/xen/xsa237-4.patch
@@ -1,124 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths

Mappings that had been set up before should not be torn down when
handling unrelated errors.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1251,7 +1251,8 @@ static int prepare_domain_irq_pirq(struc
         return -ENOMEM;
     }
     *pinfo = info;
-    return 0;
+
+    return !!err;
 }
 
 static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
@@ -1294,7 +1295,10 @@ int init_domain_irq_mapping(struct domai
             continue;
         err = prepare_domain_irq_pirq(d, i, i, &info);
         if ( err )
+        {
+            ASSERT(err < 0);
             break;
+        }
         set_domain_irq_pirq(d, i, info);
     }
 
@@ -1902,6 +1906,7 @@ int map_domain_pirq(
     struct pirq *info;
     struct irq_desc *desc;
     unsigned long flags;
+    DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
 
     ASSERT(spin_is_locked(&d->event_lock));
 
@@ -1945,8 +1950,10 @@ int map_domain_pirq(
     }
 
     ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
-    if ( ret )
+    if ( ret < 0 )
         goto revoke;
+    if ( !ret )
+        __set_bit(0, prepared);
 
     desc = irq_to_desc(irq);
 
@@ -2018,8 +2025,10 @@ int map_domain_pirq(
             irq = create_irq(NUMA_NO_NODE);
             ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
                            : irq;
-            if ( ret )
+            if ( ret < 0 )
                 break;
+            if ( !ret )
+                __set_bit(nr, prepared);
             msi_desc[nr].irq = irq;
 
             if ( irq_permit_access(d, irq) != 0 )
@@ -2052,15 +2061,15 @@ int map_domain_pirq(
                 desc->msi_desc = NULL;
                 spin_unlock_irqrestore(&desc->lock, flags);
             }
-            while ( nr-- )
+            while ( nr )
             {
                 if ( irq >= 0 && irq_deny_access(d, irq) )
                     printk(XENLOG_G_ERR
                            "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
                            d->domain_id, irq, pirq);
-                if ( info )
+                if ( info && test_bit(nr, prepared) )
                     cleanup_domain_irq_pirq(d, irq, info);
-                info = pirq_info(d, pirq + nr);
+                info = pirq_info(d, pirq + --nr);
                 irq = info->arch.irq;
             }
             msi_desc->irq = -1;
@@ -2076,12 +2085,14 @@ int map_domain_pirq(
         spin_lock_irqsave(&desc->lock, flags);
         set_domain_irq_pirq(d, irq, info);
         spin_unlock_irqrestore(&desc->lock, flags);
+        ret = 0;
     }
 
 done:
     if ( ret )
     {
-        cleanup_domain_irq_pirq(d, irq, info);
+        if ( test_bit(0, prepared) )
+            cleanup_domain_irq_pirq(d, irq, info);
  revoke:
         if ( irq_deny_access(d, irq) )
             printk(XENLOG_G_ERR
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -186,7 +186,7 @@ int physdev_map_pirq(domid_t domid, int
         }
         else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
         {
-            if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+            if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS )
                 ret = -EDOM;
             else if ( msi->entry_nr != 1 && !iommu_intremap )
                 ret = -EOPNOTSUPP;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -56,6 +56,8 @@
 /* MAX fixed pages reserved for mapping MSIX tables. */
 #define FIX_MSIX_MAX_PAGES              512
 
+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
+
 struct msi_info {
     u16 seg;
     u8 bus;
diff --git a/main/xen/xsa237-5.patch b/main/xen/xsa237-5.patch
deleted file mode 100644
index 155ba15d08..0000000000
--- a/main/xen/xsa237-5.patch
@@ -1,37 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook

The caller and the FLASK implementation of xsm_unmap_domain_irq()
disagreed about what the "data" argument points to in the MSI case:
Change both sides to pass/take a PCI device.

This is part of XSA-237.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -2143,7 +2143,8 @@ int unmap_domain_pirq(struct domain *d,
         nr = msi_desc->msi.nvec;
     }
 
-    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
+    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
+                               msi_desc ? msi_desc->dev : NULL);
     if ( ret )
         goto done;
 
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -918,8 +918,8 @@ static int flask_unmap_domain_msi (struc
                                    u32 *sid, struct avc_audit_data *ad)
 {
 #ifdef CONFIG_HAS_PCI
-    struct msi_info *msi = data;
-    u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn;
+    const struct pci_dev *pdev = data;
+    u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn;
 
     AVC_AUDIT_DATA_INIT(ad, DEV);
     ad->device = machine_bdf;
diff --git a/main/xen/xsa238.patch b/main/xen/xsa238.patch
deleted file mode 100644
index 0d7d48fef8..0000000000
--- a/main/xen/xsa238.patch
@@ -1,45 +0,0 @@
From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001
From: XenProject Security Team <security@xenproject.org>
Date: Tue, 5 Sep 2017 13:41:37 +0200
Subject: x86/ioreq server: correctly handle bogus
 XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments

Misbehaving device model can pass incorrect XEN_DMOP_map/
unmap_io_range_to_ioreq_server arguments, namely end < start when
specifying address range. When this happens we hit ASSERT(s <= e) in
rangeset_contains_range()/rangeset_overlaps_range() with debug builds.
Production builds will not trap right away but may misbehave later
while handling such bogus ranges.

This is XSA-238.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/hvm/ioreq.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index b2a8b0e986..8c8bf1f0ec 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( start > end )
+        return -EINVAL;
+
     spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
 
     rc = -ENOENT;
@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
     struct hvm_ioreq_server *s;
     int rc;
 
+    if ( start > end )
+        return -EINVAL;
+
     spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
 
     rc = -ENOENT;
diff --git a/main/xen/xsa239.patch b/main/xen/xsa239.patch
deleted file mode 100644
index 5daecb5e47..0000000000
--- a/main/xen/xsa239.patch
@@ -1,46 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/HVM: prefill partially used variable on emulation paths

Certain handlers ignore the access size (vioapic_write() being the
example this was found with), perhaps leading to subsequent reads
seeing data that wasn't actually written by the guest. For
consistency and extra safety also do this on the read path of
hvm_process_io_intercept(), even if this doesn't directly affect what
guests get to see, as we've supposedly already dealt with read handlers
leaving data completely unitialized.

This is XSA-239.

Reported-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -129,7 +129,7 @@ static int hvmemul_do_io(
         .count = *reps,
         .dir = dir,
         .df = df,
-        .data = data,
+        .data = data_is_addr ? data : 0,
         .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */
         .state = STATE_IOREQ_READY,
     };
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc
             addr = (p->type == IOREQ_TYPE_COPY) ?
                    p->addr + step * i :
                    p->addr;
+            data = 0;
             rc = ops->read(handler, addr, p->size, &data);
             if ( rc != X86EMUL_OKAY )
                 break;
@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc
         {
             if ( p->data_is_ptr )
             {
+                data = 0;
                 switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
                                                   p->size) )
                 {
diff --git a/main/xen/xsa240-1.patch b/main/xen/xsa240-1.patch
deleted file mode 100644
index 515ad22b66..0000000000
--- a/main/xen/xsa240-1.patch
@@ -1,494 +0,0 @@
From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 28 Sep 2017 15:17:25 +0100
Subject: [PATCH 1/2] x86: limit linear page table use to a single level

That's the only way that they're meant to be used. Without such a
restriction arbitrarily long chains of same-level page tables can be
built, tearing down of which may then cause arbitrarily deep recursion,
causing a stack overflow. To facilitate this restriction, a counter is
being introduced to track both the number of same-level entries in a
page table as well as the number of uses of a page table in another
same-level one (counting into positive and negative direction
respectively, utilizing the fact that both counts can't be non-zero at
the same time).

Note that the added accounting introduces a restriction on the number
of times a page can be used in other same-level page tables - more than
32k of such uses are no longer possible.

Note also that some put_page_and_type[_preemptible]() calls are
replaced with open-coded equivalents.  This seemed preferrable to
adding "parent_table" to the matrix of functions.

Note further that cross-domain same-level page table references are no
longer permitted (they probably never should have been).

This is XSA-240.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
 xen/arch/x86/domain.c        |   1 +
 xen/arch/x86/mm.c            | 171 ++++++++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/domain.h |   2 +
 xen/include/asm-x86/mm.h     |  25 +++++--
 4 files changed, 175 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d7e699228c..d7ed72c246 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1226,6 +1226,7 @@ int arch_set_info_guest(
                     rc = -ERESTART;
                     /* Fallthrough */
                 case -ERESTART:
+                    v->arch.old_guest_ptpg = NULL;
                     v->arch.old_guest_table =
                         pagetable_get_page(v->arch.guest_table);
                     v->arch.guest_table = pagetable_null();
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 86f5eda52d..1e469bd354 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -747,6 +747,61 @@ static void put_data_page(
         put_page(page);
 }
 
+static bool inc_linear_entries(struct page_info *pg)
+{
+    typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
+
+    do {
+        /*
+         * The check below checks for the "linear use" count being non-zero
+         * as well as overflow.  Signed integer overflow is undefined behavior
+         * according to the C spec.  However, as long as linear_pt_count is
+         * smaller in size than 'int', the arithmetic operation of the
+         * increment below won't overflow; rather the result will be truncated
+         * when stored.  Ensure that this is always true.
+         */
+        BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
+        oc = nc++;
+        if ( nc <= 0 )
+            return false;
+        nc = cmpxchg(&pg->linear_pt_count, oc, nc);
+    } while ( oc != nc );
+
+    return true;
+}
+
+static void dec_linear_entries(struct page_info *pg)
+{
+    typeof(pg->linear_pt_count) oc;
+
+    oc = arch_fetch_and_add(&pg->linear_pt_count, -1);
+    ASSERT(oc > 0);
+}
+
+static bool inc_linear_uses(struct page_info *pg)
+{
+    typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
+
+    do {
+        /* See the respective comment in inc_linear_entries(). */
+        BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
+        oc = nc--;
+        if ( nc >= 0 )
+            return false;
+        nc = cmpxchg(&pg->linear_pt_count, oc, nc);
+    } while ( oc != nc );
+
+    return true;
+}
+
+static void dec_linear_uses(struct page_info *pg)
+{
+    typeof(pg->linear_pt_count) oc;
+
+    oc = arch_fetch_and_add(&pg->linear_pt_count, 1);
+    ASSERT(oc < 0);
+}
+
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -777,15 +832,35 @@ get_##level##_linear_pagetable(                                             \
                                                                             \
     if ( (pfn = level##e_get_pfn(pde)) != pde_pfn )                         \
     {                                                                       \
+        struct page_info *ptpg = mfn_to_page(pde_pfn);                      \
+                                                                            \
+        /* Make sure the page table belongs to the correct domain. */       \
+        if ( unlikely(page_get_owner(ptpg) != d) )                          \
+            return 0;                                                       \
+                                                                            \
         /* Make sure the mapped frame belongs to the correct domain. */     \
         if ( unlikely(!get_page_from_pagenr(pfn, d)) )                      \
             return 0;                                                       \
                                                                             \
         /*                                                                  \
-         * Ensure that the mapped frame is an already-validated page table. \
+         * Ensure that the mapped frame is an already-validated page table  \
+         * and is not itself having linear entries, as well as that the     \
+         * containing page table is not iself in use as a linear page table \
+         * elsewhere.                                                       \
          * If so, atomically increment the count (checking for overflow).   \
          */                                                                 \
         page = mfn_to_page(pfn);                                            \
+        if ( !inc_linear_entries(ptpg) )                                    \
+        {                                                                   \
+            put_page(page);                                                 \
+            return 0;                                                       \
+        }                                                                   \
+        if ( !inc_linear_uses(page) )                                       \
+        {                                                                   \
+            dec_linear_entries(ptpg);                                       \
+            put_page(page);                                                 \
+            return 0;                                                       \
+        }                                                                   \
         y = page->u.inuse.type_info;                                        \
         do {                                                                \
             x = y;                                                          \
@@ -793,6 +868,8 @@ get_##level##_linear_pagetable(                                             \
                  unlikely((x & (PGT_type_mask|PGT_validated)) !=            \
                           (PGT_##level##_page_table|PGT_validated)) )       \
             {                                                               \
+                dec_linear_uses(page);                                      \
+                dec_linear_entries(ptpg);                                   \
                 put_page(page);                                             \
                 return 0;                                                   \
             }                                                               \
@@ -1226,6 +1303,9 @@ get_page_from_l4e(
             l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);   \
     } while ( 0 )
 
+static int _put_page_type(struct page_info *page, bool preemptible,
+                          struct page_info *ptpg);
+
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
 {
     unsigned long     pfn = l1e_get_pfn(l1e);
@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
     if ( l2e_get_flags(l2e) & _PAGE_PSE )
         put_superpage(l2e_get_pfn(l2e));
     else
-        put_page_and_type(l2e_get_page(l2e));
+    {
+        struct page_info *pg = l2e_get_page(l2e);
+        int rc = _put_page_type(pg, false, mfn_to_page(pfn));
+
+        ASSERT(!rc);
+        put_page(pg);
+    }
 
     return 0;
 }
 
-static int __put_page_type(struct page_info *, int preemptible);
--
 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
                              int partial, bool_t defer)
 {
     struct page_info *pg;
+    int rc;
 
     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
         return 1;
@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
     if ( unlikely(partial > 0) )
     {
         ASSERT(!defer);
-        return __put_page_type(pg, 1);
+        return _put_page_type(pg, true, mfn_to_page(pfn));
     }
 
     if ( defer )
     {
+        current->arch.old_guest_ptpg = mfn_to_page(pfn);
         current->arch.old_guest_table = pg;
         return 0;
     }
 
-    return put_page_and_type_preemptible(pg);
+    rc = _put_page_type(pg, true, mfn_to_page(pfn));
+    if ( likely(!rc) )
+        put_page(pg);
+
+    return rc;
 }
 
 static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
                              int partial, bool_t defer)
 {
+    int rc = 1;
+
     if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 
          (l4e_get_pfn(l4e) != pfn) )
     {
@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
         if ( unlikely(partial > 0) )
         {
             ASSERT(!defer);
-            return __put_page_type(pg, 1);
+            return _put_page_type(pg, true, mfn_to_page(pfn));
         }
 
         if ( defer )
         {
+            current->arch.old_guest_ptpg = mfn_to_page(pfn);
             current->arch.old_guest_table = pg;
             return 0;
         }
 
-        return put_page_and_type_preemptible(pg);
+        rc = _put_page_type(pg, true, mfn_to_page(pfn));
+        if ( likely(!rc) )
+            put_page(pg);
     }
-    return 1;
+
+    return rc;
 }
 
 static int alloc_l1_table(struct page_info *page)
@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page)
         {
             page->nr_validated_ptes = i;
             page->partial_pte = 0;
+            current->arch.old_guest_ptpg = NULL;
             current->arch.old_guest_table = page;
         }
         while ( i-- > 0 )
@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page)
                 {
                     if ( current->arch.old_guest_table )
                         page->nr_validated_ptes++;
+                    current->arch.old_guest_ptpg = NULL;
                     current->arch.old_guest_table = page;
                 }
             }
@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag
 }
 
 
-static int __put_final_page_type(
-    struct page_info *page, unsigned long type, int preemptible)
+static int _put_final_page_type(struct page_info *page, unsigned long type,
+                                bool preemptible, struct page_info *ptpg)
 {
     int rc = free_page_type(page, type, preemptible);
 
     /* No need for atomic update of type_info here: noone else updates it. */
     if ( rc == 0 )
     {
+        if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) )
+        {
+            dec_linear_uses(page);
+            dec_linear_entries(ptpg);
+        }
+        ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying);
         /*
          * Record TLB information for flush later. We do not stamp page tables
          * when running in shadow mode:
@@ -2446,8 +2550,8 @@ static int __put_final_page_type(
 }
 
 
-static int __put_page_type(struct page_info *page,
-                           int preemptible)
+static int _put_page_type(struct page_info *page, bool preemptible,
+                          struct page_info *ptpg)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
     int rc = 0;
@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page,
                                            x, nx)) != x) )
                     continue;
                 /* We cleared the 'valid bit' so we do the clean up. */
-                rc = __put_final_page_type(page, x, preemptible);
+                rc = _put_final_page_type(page, x, preemptible, ptpg);
+                ptpg = NULL;
                 if ( x & PGT_partial )
                     put_page(page);
                 break;
             }
 
+            if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
+            {
+                /*
+                 * page_set_tlbflush_timestamp() accesses the same union
+                 * linear_pt_count lives in. Unvalidated page table pages,
+                 * however, should occur during domain destruction only
+                 * anyway.  Updating of linear_pt_count luckily is not
+                 * necessary anymore for a dying domain.
+                 */
+                ASSERT(page_get_owner(page)->is_dying);
+                ASSERT(page->linear_pt_count < 0);
+                ASSERT(ptpg->linear_pt_count > 0);
+                ptpg = NULL;
+            }
+
             /*
              * Record TLB information for flush later. We do not stamp page
              * tables when running in shadow mode:
@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page,
             return -EINTR;
     }
 
+    if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
+    {
+        ASSERT(!rc);
+        dec_linear_uses(page);
+        dec_linear_entries(ptpg);
+    }
+
     return rc;
 }
 
@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
             page->nr_validated_ptes = 0;
             page->partial_pte = 0;
         }
+        page->linear_pt_count = 0;
         rc = alloc_page_type(page, type, preemptible);
     }
 
@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
 
 void put_page_type(struct page_info *page)
 {
-    int rc = __put_page_type(page, 0);
+    int rc = _put_page_type(page, false, NULL);
     ASSERT(rc == 0);
     (void)rc;
 }
@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type)
 
 int put_page_type_preemptible(struct page_info *page)
 {
-    return __put_page_type(page, 1);
+    return _put_page_type(page, true, NULL);
 }
 
 int get_page_type_preemptible(struct page_info *page, unsigned long type)
@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v)
     if ( !v->arch.old_guest_table )
         return 0;
 
-    switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) )
+    switch ( rc = _put_page_type(v->arch.old_guest_table, true,
+                                 v->arch.old_guest_ptpg) )
     {
     case -EINTR:
     case -ERESTART:
         return -ERESTART;
+    case 0:
+        put_page(v->arch.old_guest_table);
     }
 
     v->arch.old_guest_table = NULL;
@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn)
                 rc = -ERESTART;
                 /* fallthrough */
             case -ERESTART:
+                curr->arch.old_guest_ptpg = NULL;
                 curr->arch.old_guest_table = page;
                 break;
             default:
@@ -3310,7 +3442,10 @@ long do_mmuext_op(
                     if ( type == PGT_l1_page_table )
                         put_page_and_type(page);
                     else
+                    {
+                        curr->arch.old_guest_ptpg = NULL;
                         curr->arch.old_guest_table = page;
+                    }
                 }
             }
 
@@ -3346,6 +3481,7 @@ long do_mmuext_op(
             {
             case -EINTR:
             case -ERESTART:
+                curr->arch.old_guest_ptpg = NULL;
                 curr->arch.old_guest_table = page;
                 rc = 0;
                 break;
@@ -3425,6 +3561,7 @@ long do_mmuext_op(
                         rc = -ERESTART;
                         /* fallthrough */
                     case -ERESTART:
+                        curr->arch.old_guest_ptpg = NULL;
                         curr->arch.old_guest_table = page;
                         break;
                     default:
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 924caac834..5a512918cc 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -527,6 +527,8 @@ struct arch_vcpu
     pagetable_t guest_table_user;       /* (MFN) x86/64 user-space pagetable */
     pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
     struct page_info *old_guest_table;  /* partially destructed pagetable */
+    struct page_info *old_guest_ptpg;   /* containing page table of the */
+                                        /* former, if any */
     /* guest_table holds a ref to the page, and also a type-count unless
      * shadow refcounts are in use */
     pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 119d7dec6b..445da50d47 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -124,11 +124,11 @@ struct page_info
         u32 tlbflush_timestamp;
 
         /*
-         * When PGT_partial is true then this field is valid and indicates
-         * that PTEs in the range [0, @nr_validated_ptes) have been validated.
-         * An extra page reference must be acquired (or not dropped) whenever
-         * PGT_partial gets set, and it must be dropped when the flag gets
-         * cleared. This is so that a get() leaving a page in partially
+         * When PGT_partial is true then the first two fields are valid and
+         * indicate that PTEs in the range [0, @nr_validated_ptes) have been
+         * validated. An extra page reference must be acquired (or not dropped)
+         * whenever PGT_partial gets set, and it must be dropped when the flag
+         * gets cleared. This is so that a get() leaving a page in partially
          * validated state (where the caller would drop the reference acquired
          * due to the getting of the type [apparently] failing [-ERESTART])
          * would not accidentally result in a page left with zero general
@@ -152,10 +152,18 @@ struct page_info
          * put_page_from_lNe() (due to the apparent failure), and hence it
          * must be dropped when the put operation is resumed (and completes),
          * but it must not be acquired if picking up the page for validation.
+         *
+         * The 3rd field, @linear_pt_count, indicates
+         * - by a positive value, how many same-level page table entries a page
+         *   table has,
+         * - by a negative value, in how many same-level page tables a page is
+         *   in use.
          */
         struct {
-            u16 nr_validated_ptes;
-            s8 partial_pte;
+            u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
+            u16 :16 - PAGETABLE_ORDER - 1 - 2;
+            s16 partial_pte:2;
+            s16 linear_pt_count;
         };
 
         /*
@@ -206,6 +214,9 @@ struct page_info
 #define PGT_count_width   PG_shift(9)
 #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
 
+/* Are the 'type mask' bits identical? */
+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask))
+
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated    PG_shift(1)
 #define PGC_allocated     PG_mask(1, 1)
--- 
2.14.1

diff --git a/main/xen/xsa240-2.patch b/main/xen/xsa240-2.patch
deleted file mode 100644
index 5e057c5652..0000000000
--- a/main/xen/xsa240-2.patch
@@ -1,83 +0,0 @@
From e614979ce054044d9e19023f1ef10dae6e38baf4 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Fri, 22 Sep 2017 11:46:55 +0100
Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default

Allowing pagetables to point to other pagetables of the same level
(often called 'linear pagetables') has been included in Xen since its
inception.  But it is not used by the most common PV guests (Linux,
NetBSD, minios), and has been the source of a number of subtle
reference-counting bugs.

Add a command-line option to control whether PV linear pagetables are
allowed (disabled by default).

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
Changes since v2:
- s/_/-/; in command-line option
- Added __read_mostly
---
 docs/misc/xen-command-line.markdown | 15 +++++++++++++++
 xen/arch/x86/mm.c                   | 10 ++++++++++
 2 files changed, 25 insertions(+)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 44d99852aa..45ef873abb 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1374,6 +1374,21 @@ The following resources are available:
     CDP, one COS will corespond two CBMs other than one with CAT, due to the
     sum of CBMs is fixed, that means actual `cos_max` in use will automatically
     reduce to half when CDP is enabled.
+
+### pv-linear-pt
+> `= <boolean>`
+
+> Default: `false`
+
+Allow PV guests to have pagetable entries pointing to other pagetables
+of the same level (i.e., allowing L2 PTEs to point to other L2 pages).
+This technique is often called "linear pagetables", and is sometimes
+used to allow operating systems a simple way to consistently map the
+current process's pagetables into its own virtual address space.
+
+None of the most common PV operating systems (Linux, NetBSD, MiniOS)
+use this technique, but there may be custom operating systems which
+do.
 
 ### reboot
 > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]`
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1e469bd354..32952a46b9 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -814,6 +814,9 @@ static void dec_linear_uses(struct page_info *pg)
  *     frame if it is mapped by a different root table. This is sufficient and
  *     also necessary to allow validation of a root table mapping itself.
  */
+static bool __read_mostly pv_linear_pt_enable = false;
+boolean_param("pv-linear-pt", pv_linear_pt_enable);
+
 #define define_get_linear_pagetable(level)                                  \
 static int                                                                  \
 get_##level##_linear_pagetable(                                             \
@@ -823,6 +826,13 @@ get_##level##_linear_pagetable(                                             \
     struct page_info *page;                                                 \
     unsigned long pfn;                                                      \
                                                                             \
+    if ( !pv_linear_pt_enable )                                             \
+    {                                                                       \
+        gdprintk(XENLOG_WARNING,                                            \
+                 "Attempt to create linear p.t. (feature disabled)\n");     \
+        return 0;                                                           \
+    }                                                                       \
+                                                                            \
     if ( (level##e_get_flags(pde) & _PAGE_RW) )                             \
     {                                                                       \
         gdprintk(XENLOG_WARNING,                                            \
--- 
2.14.1

diff --git a/main/xen/xsa241-4.9.patch b/main/xen/xsa241-4.9.patch
deleted file mode 100644
index 514e4c7a4b..0000000000
--- a/main/xen/xsa241-4.9.patch
@@ -1,120 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: don't store possibly stale TLB flush time stamp

While the timing window is extremely narrow, it is theoretically
possible for an update to the TLB flush clock and a subsequent flush
IPI to happen between the read and write parts of the update of the
per-page stamp. Exclude this possibility by disabling interrupts
across the update, preventing the IPI to be serviced in the middle.

This is XSA-241.

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- a/xen/arch/arm/smp.c
+++ b/xen/arch/arm/smp.c
@@ -1,3 +1,4 @@
+#include <xen/mm.h>
 #include <asm/system.h>
 #include <asm/smp.h>
 #include <asm/cpregs.h>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p
          */
         if ( !(shadow_mode_enabled(page_get_owner(page)) &&
                (page->count_info & PGC_page_table)) )
-            page->tlbflush_timestamp = tlbflush_current_time();
+            page_set_tlbflush_timestamp(page);
         wmb();
         page->u.inuse.type_info--;
     }
@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p
                 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
         if ( !(shadow_mode_enabled(page_get_owner(page)) &&
                (page->count_info & PGC_page_table)) )
-            page->tlbflush_timestamp = tlbflush_current_time();
+            page_set_tlbflush_timestamp(page);
         wmb();
         page->u.inuse.type_info |= PGT_validated;
     }
@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in
             if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
             {
                 /*
-                 * page_set_tlbflush_timestamp() accesses the same union
+                 * set_tlbflush_timestamp() accesses the same union
                  * linear_pt_count lives in. Unvalidated page table pages,
                  * however, should occur during domain destruction only
                  * anyway.  Updating of linear_pt_count luckily is not
@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in
              */
             if ( !(shadow_mode_enabled(page_get_owner(page)) &&
                    (page->count_info & PGC_page_table)) )
-                page->tlbflush_timestamp = tlbflush_current_time();
+                page_set_tlbflush_timestamp(page);
         }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t
          * TLBs when we reuse the page.  Because the destructors leave the
          * contents of the pages in place, we can delay TLB flushes until
          * just before the allocator hands the page out again. */
-        sp->tlbflush_timestamp = tlbflush_current_time();
+        page_set_tlbflush_timestamp(sp);
         perfc_decr(shadow_alloc_count);
         page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
         sp = next;
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -960,7 +960,7 @@ static void free_heap_pages(
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
-            pg[i].tlbflush_timestamp = tlbflush_current_time();
+            page_set_tlbflush_timestamp(&pg[i]);
 
         /* This page is not a guest frame any more. */
         page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
--- a/xen/include/asm-arm/flushtlb.h
+++ b/xen/include/asm-arm/flushtlb.h
@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma
 
 #define tlbflush_current_time()                 (0)
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+    page->tlbflush_timestamp = tlbflush_current_time();
+}
+
 #if defined(CONFIG_ARM_32)
 # include <asm/arm32/flushtlb.h>
 #elif defined(CONFIG_ARM_64)
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time);
 
 #define tlbflush_current_time() tlbflush_clock
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+    /*
+     * Prevent storing a stale time stamp, which could happen if an update
+     * to tlbflush_clock plus a subsequent flush IPI happen between the
+     * reading of tlbflush_clock and the writing of the struct page_info
+     * field.
+     */
+    ASSERT(local_irq_is_enabled());
+    local_irq_disable();
+    page->tlbflush_timestamp = tlbflush_current_time();
+    local_irq_enable();
+}
+
 /*
  * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing.
  * @lastuse_stamp is a timestamp taken when the PFN we are testing was last 
diff --git a/main/xen/xsa242-4.9.patch b/main/xen/xsa242-4.9.patch
deleted file mode 100644
index 8adfa61fd7..0000000000
--- a/main/xen/xsa242-4.9.patch
@@ -1,43 +0,0 @@
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: don't allow page_unlock() to drop the last type reference

Only _put_page_type() does the necessary cleanup, and hence not all
domain pages can be released during guest cleanup (leaving around
zombie domains) if we get this wrong.

This is XSA-242.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page)
 
     do {
         x = y;
+        ASSERT((x & PGT_count_mask) && (x & PGT_locked));
+
         nx = x - (1 | PGT_locked);
+        /* We must not drop the last reference here. */
+        ASSERT(nx & PGT_count_mask);
     } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
 }
 
@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in
                    (page->count_info & PGC_page_table)) )
                 page_set_tlbflush_timestamp(page);
         }
+        else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) ==
+                           (PGT_locked | 1)) )
+        {
+            /*
+             * We must not drop the second to last reference when the page is
+             * locked, as page_unlock() doesn't do any cleanup of the type.
+             */
+            cpu_relax();
+            y = page->u.inuse.type_info;
+            continue;
+        }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
             break;
diff --git a/main/xen/xsa243-1.patch b/main/xen/xsa243-1.patch
deleted file mode 100644
index aaff277514..0000000000
--- a/main/xen/xsa243-1.patch
@@ -1,93 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests

When initially creating a monitor table for 4-level translated guests, don't
install a shadow-linear mapping.  This mapping is actually self-linear, and
trips up the writeable heuristic logic into following Xen's mappings, not the
guests' shadows it was expecting to follow.

A consequence of this is that sh_guess_wrmap() needs to cope with there being
no shadow-linear mapping present, which in practice occurs once each time a
vcpu switches to 4-level paging from a different paging mode.

An appropriate shadow-linear slot will be inserted into the monitor table
either while constructing lower level monitor tables, or by sh_update_cr3().

While fixing this, clarify the safety of the other mappings.  Despite
appearing unsafe, it is correct to create a guest-linear mapping for
translated domains; this is self-linear and doesn't point into the translated
domain.  Drop a dead clause for translate != external guests.

This is XSA-243.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>

diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 8d4f244..a18d286 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1485,26 +1485,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn)
         sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
     }
 
-    /* Shadow linear mapping for 4-level shadows.  N.B. for 3-level
-     * shadows on 64-bit xen, this linear mapping is later replaced by the
-     * monitor pagetable structure, which is built in make_monitor_table
-     * and maintained by sh_update_linear_entries. */
-    sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
-        shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW);
--
-    /* Self linear mapping.  */
-    if ( shadow_mode_translate(d) && !shadow_mode_external(d) )
+    /*
+     * Linear mapping slots:
+     *
+     * Calling this function with gl4mfn == sl4mfn is used to construct a
+     * monitor table for translated domains.  In this case, gl4mfn forms the
+     * self-linear mapping (i.e. not pointing into the translated domain), and
+     * the shadow-linear slot is skipped.  The shadow-linear slot is either
+     * filled when constructing lower level monitor tables, or via
+     * sh_update_cr3() for 4-level guests.
+     *
+     * Calling this function with gl4mfn != sl4mfn is used for non-translated
+     * guests, where the shadow-linear slot is actually self-linear, and the
+     * guest-linear slot points into the guests view of its pagetables.
+     */
+    if ( shadow_mode_translate(d) )
     {
-        // linear tables may not be used with translated PV guests
-        sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+        ASSERT(mfn_eq(gl4mfn, sl4mfn));
+
+        sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
             shadow_l4e_empty();
     }
     else
     {
-        sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
-            shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW);
+        ASSERT(!mfn_eq(gl4mfn, sl4mfn));
+
+        sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
+            shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW);
     }
 
+    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW);
+
     unmap_domain_page(sl4e);
 }
 #endif
@@ -4405,6 +4417,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
 
     /* Carefully look in the shadow linear map for the l1e we expect */
 #if SHADOW_PAGING_LEVELS >= 4
+    /* Is a shadow linear map is installed in the first place? */
+    sl4p  = v->arch.paging.shadow.guest_vtable;
+    sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
+    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
+        return 0;
     sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
     if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
         return 0;
diff --git a/main/xen/xsa243-2.patch b/main/xen/xsa243-2.patch
deleted file mode 100644
index 1aca5d3dbd..0000000000
--- a/main/xen/xsa243-2.patch
@@ -1,54 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/shadow: correct SH_LINEAR mapping detection in sh_guess_wrmap()

The fix for XSA-243 / CVE-2017-15592 (c/s bf2b4eadcf379) introduced a change
in behaviour for sh_guest_wrmap(), where it had to cope with no shadow linear
mapping being present.

As the name suggests, guest_vtable is a mapping of the guests pagetable, not
Xen's pagetable, meaning that it isn't the pagetable we need to check for the
shadow linear slot in.

The practical upshot is that a shadow HVM vcpu which switches into 4-level
paging mode, with an L4 pagetable that contains a mapping which aliases Xen's
SH_LINEAR_PT_VIRT_START will fool the safety check for whether a SHADOW_LINEAR
mapping is present.  As the check passes (when it should have failed), Xen
subsequently falls over the missing mapping with a pagefault such as:

    (XEN) Pagetable walk from ffff8140a0503880:
    (XEN)  L4[0x102] = 000000046c218063 ffffffffffffffff
    (XEN)  L3[0x102] = 000000046c218063 ffffffffffffffff
    (XEN)  L2[0x102] = 000000046c218063 ffffffffffffffff
    (XEN)  L1[0x103] = 0000000000000000 ffffffffffffffff

This is part of XSA-243.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>

--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4350,11 +4350,18 @@ static int sh_guess_wrmap(struct vcpu *v
 
     /* Carefully look in the shadow linear map for the l1e we expect */
 #if SHADOW_PAGING_LEVELS >= 4
-    /* Is a shadow linear map is installed in the first place? */
-    sl4p  = v->arch.paging.shadow.guest_vtable;
-    sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
-    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
-        return 0;
+    /*
+     * Non-external guests (i.e. PV) have a SHADOW_LINEAR mapping from the
+     * moment their shadows are created.  External guests (i.e. HVM) may not,
+     * but always have a regular linear mapping, which we can use to observe
+     * whether a SHADOW_LINEAR mapping is present.
+     */
+    if ( paging_mode_external(d) )
+    {
+        sl4p =  __linear_l4_table + l4_linear_offset(SH_LINEAR_PT_VIRT_START);
+        if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
+            return 0;
+    }
     sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
     if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
         return 0;
diff --git a/main/xen/xsa244.patch b/main/xen/xsa244.patch
deleted file mode 100644
index c35a80be32..0000000000
--- a/main/xen/xsa244.patch
@@ -1,59 +0,0 @@
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH] x86/cpu: Fix IST handling during PCPU bringup

Clear IST references in newly allocated IDTs.  Nothing good will come of
having them set before the TSS is suitably constructed (although the chances
of the CPU surviving such an IST interrupt/exception is extremely slim).

Uniformly set the IST references after the TSS is in place.  This fixes an
issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context
will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable
to privilege escalation from PV guests until it subsequently schedules an HVM
guest.

This is XSA-244

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/cpu/common.c | 5 +++++
 xen/arch/x86/smpboot.c    | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 78f5667..6cf3628 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -640,6 +640,7 @@ void __init early_cpu_init(void)
  * - Sets up TSS with stack pointers, including ISTs
  * - Inserts TSS selector into regular and compat GDTs
  * - Loads GDT, IDT, TR then null LDT
+ * - Sets up IST references in the IDT
  */
 void load_system_tables(void)
 {
@@ -702,6 +703,10 @@ void load_system_tables(void)
 	asm volatile ("ltr  %w0" : : "rm" (TSS_ENTRY << 3) );
 	asm volatile ("lldt %w0" : : "rm" (0) );
 
+	set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_DF);
+	set_ist(&idt_tables[cpu][TRAP_nmi],	      IST_NMI);
+	set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
+
 	/*
 	 * Bottom-of-stack must be 16-byte aligned!
 	 *
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 3ca716c..1609b62 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -724,6 +724,9 @@ static int cpu_smpboot_alloc(unsigned int cpu)
     if ( idt_tables[cpu] == NULL )
         goto oom;
     memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t));
+    set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_NONE);
+    set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
+    set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
 
     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
diff --git a/main/xen/xsa245-1.patch b/main/xen/xsa245-1.patch
deleted file mode 100644
index 2047686903..0000000000
--- a/main/xen/xsa245-1.patch
@@ -1,48 +0,0 @@
From a48d47febc1340f27d6c716545692641a09b414c Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Thu, 21 Sep 2017 14:13:08 +0100
Subject: [PATCH 1/2] xen/page_alloc: Cover memory unreserved after boot in
 first_valid_mfn

On Arm, some regions (e.g Initramfs, Dom0 Kernel...) are marked as
reserved until the hardware domain is built and they are copied into its
memory. Therefore, they will not be added in the boot allocator via
init_boot_pages.

Instead, init_xenheap_pages will be called once the region are not used
anymore.

Update first_valid_mfn in both init_heap_pages and init_boot_pages
(already exist) to cover all the cases.

Signed-off-by: Julien Grall <julien.grall@arm.com>
[Adjust comment, added locking around first_valid_mfn update]
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 xen/common/page_alloc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 0b9f6cc6df..fbe5a8af39 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1700,6 +1700,16 @@ static void init_heap_pages(
 {
     unsigned long i;
 
+    /*
+     * Some pages may not go through the boot allocator (e.g reserved
+     * memory at boot but released just after --- kernel, initramfs,
+     * etc.).
+     * Update first_valid_mfn to ensure those regions are covered.
+     */
+    spin_lock(&heap_lock);
+    first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn);
+    spin_unlock(&heap_lock);
+
     for ( i = 0; i < nr_pages; i++ )
     {
         unsigned int nid = phys_to_nid(page_to_maddr(pg+i));
--- 
2.11.0

diff --git a/main/xen/xsa245-2.patch b/main/xen/xsa245-2.patch
deleted file mode 100644
index cd4d2709be..0000000000
--- a/main/xen/xsa245-2.patch
@@ -1,73 +0,0 @@
From cbfcf039d0e0b6f4c4cb3de612f7bf788a0c47cd Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Mon, 18 Sep 2017 14:24:08 +0100
Subject: [PATCH 2/2] xen/arm: Correctly report the memory region in the dummy
 NUMA helpers

NUMA is currently not supported on Arm. Because common code is
NUMA-aware, dummy helpers are instead provided to expose a single node.

Those helpers are for instance used to know the region to scrub.

However the memory region is not reported correctly. Indeed, the
frametable may not be at the beginning of the memory and there might be
multiple memory banks. This will lead to not scrub some part of the
memory.

The memory information can be found using:
    * first_valid_mfn as the start of the memory
    * max_page - first_valid_mfn as the spanned pages

Note that first_valid_mfn is now been exported. The prototype has been
added in asm-arm/numa.h and not in a common header because I would
expect the variable to become static once NUMA is fully supported on
Arm.

Signed-off-by: Julien Grall <julien.grall@arm.com>
---
 xen/common/page_alloc.c    |  6 +++++-
 xen/include/asm-arm/numa.h | 10 ++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index fbe5a8af39..472c6fe329 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -192,7 +192,11 @@ PAGE_LIST_HEAD(page_broken_list);
  * BOOT-TIME ALLOCATOR
  */
 
-static unsigned long __initdata first_valid_mfn = ~0UL;
+/*
+ * first_valid_mfn is exported because it is use in ARM specific NUMA
+ * helpers. See comment in asm-arm/numa.h.
+ */
+unsigned long first_valid_mfn = ~0UL;
 
 static struct bootmem_region {
     unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index a2c1a3476d..3e7384da9e 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -12,9 +12,15 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
     return 0;
 }
 
+/*
+ * TODO: make first_valid_mfn static when NUMA is supported on Arm, this
+ * is required because the dummy helpers is using it.
+ */
+extern unsigned long first_valid_mfn;
+
 /* XXX: implement NUMA support */
-#define node_spanned_pages(nid) (total_pages)
-#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
+#define node_spanned_pages(nid) (max_page - first_valid_mfn)
+#define node_start_pfn(nid) (first_valid_mfn)
 #define __node_distance(a, b) (20)
 
 static inline unsigned int arch_get_dma_bitsize(void)
--- 
2.11.0

-- 
2.15.0



---
Unsubscribe:  alpine-aports+unsubscribe@lists.alpinelinux.org
Help:         alpine-aports+help@lists.alpinelinux.org
---
Reply to thread Export thread (mbox)