Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 20 Nov 2018 14:17:07 +0000 (UTC)
From:      =?UTF-8?Q?Roger_Pau_Monn=c3=a9?= <royger@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r485430 - in head/emulators/xen-kernel411: . files
Message-ID:  <201811201417.wAKEH7Dm012650@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: royger (src committer)
Date: Tue Nov 20 14:17:07 2018
New Revision: 485430
URL: https://svnweb.freebsd.org/changeset/ports/485430

Log:
  xen: add XSA patches
  
  Fixes for XSA-{275,276,277,279,280,282}
  
  Sponsored by: Citrix Systems R&D

Added:
  head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa275-4.11-1.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa275-4.11-2.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa277.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa279.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa280-1.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa280-4.11-2.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa282-2.patch   (contents, props changed)
  head/emulators/xen-kernel411/files/xsa282-4.11-1.patch   (contents, props changed)
Modified:
  head/emulators/xen-kernel411/Makefile

Modified: head/emulators/xen-kernel411/Makefile
==============================================================================
--- head/emulators/xen-kernel411/Makefile	Tue Nov 20 14:05:01 2018	(r485429)
+++ head/emulators/xen-kernel411/Makefile	Tue Nov 20 14:17:07 2018	(r485430)
@@ -2,7 +2,7 @@
 
 PORTNAME=	xen
 PORTVERSION=	4.11.0
-PORTREVISION=	2
+PORTREVISION=	3
 CATEGORIES=	emulators
 MASTER_SITES=	http://downloads.xenproject.org/release/xen/${PORTVERSION}/
 PKGNAMESUFFIX=	-kernel411
@@ -92,6 +92,17 @@ EXTRA_PATCHES+= ${FILESDIR}/0001-xen-Port-the-array_in
 		${FILESDIR}/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch:-p1
 # XSA-278: x86: Nested VT-x usable even when disabled
 EXTRA_PATCHES+=	${FILESDIR}/xsa278-4.11.patch:-p1
+# XSA-{275,276,277,279,280,282}
+EXTRA_PATCHES+=	${FILESDIR}/xsa275-4.11-1.patch:-p1 \
+		${FILESDIR}/xsa275-4.11-2.patch:-p1 \
+		${FILESDIR}/0001-x86-hvm-ioreq-fix-page-referencing.patch:-p1 \
+		${FILESDIR}/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch:-p1 \
+		${FILESDIR}/xsa277.patch:-p1 \
+		${FILESDIR}/xsa279.patch:-p1 \
+		${FILESDIR}/xsa280-1.patch:-p1 \
+		${FILESDIR}/xsa280-4.11-2.patch:-p1 \
+		${FILESDIR}/xsa282-4.11-1.patch:-p1 \
+		${FILESDIR}/xsa282-2.patch:-p1
 
 .include <bsd.port.options.mk>
 

Added: head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,120 @@
+From bcc115ba39d2985dcf356ba8a9ac291e314f1f0f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <JBeulich@suse.com>
+Date: Thu, 11 Oct 2018 04:00:26 -0600
+Subject: [PATCH 1/2] x86/hvm/ioreq: fix page referencing
+
+The code does not take a page reference in hvm_alloc_ioreq_mfn(), only a
+type reference. This can lead to a situation where a malicious domain with
+XSM_DM_PRIV can engineer a sequence as follows:
+
+- create IOREQ server: no pages as yet.
+- acquire resource: page allocated, total 0.
+- decrease reservation: -1 ref, total -1.
+
+This will cause Xen to hit a BUG_ON() in free_domheap_pages().
+
+This patch fixes the issue by changing the call to get_page_type() in
+hvm_alloc_ioreq_mfn() to a call to get_page_and_type(). This change
+in turn requires an extra put_page() in hvm_free_ioreq_mfn() in the case
+that _PGC_allocated is still set (i.e. a decrease reservation has not
+occurred) to avoid the page being leaked.
+
+This is part of XSA-276.
+
+Reported-by: Julien Grall <julien.grall@arm.com>
+Reported-by: Julien Grall <julien.grall@arm.com>
+Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/hvm/ioreq.c | 46 +++++++++++++++++++++++++++-------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
+index f39f391929..bdc2687014 100644
+--- a/xen/arch/x86/hvm/ioreq.c
++++ b/xen/arch/x86/hvm/ioreq.c
+@@ -327,6 +327,7 @@ static int hvm_map_ioreq_gfn(struct hvm_ioreq_server *s, bool buf)
+ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ {
+     struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
++    struct page_info *page;
+ 
+     if ( iorp->page )
+     {
+@@ -349,27 +350,33 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+      * could fail if the emulating domain has already reached its
+      * maximum allocation.
+      */
+-    iorp->page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
++    page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
+ 
+-    if ( !iorp->page )
++    if ( !page )
+         return -ENOMEM;
+ 
+-    if ( !get_page_type(iorp->page, PGT_writable_page) )
+-        goto fail1;
++    if ( !get_page_and_type(page, s->emulator, PGT_writable_page) )
++    {
++        /*
++         * The domain can't possibly know about this page yet, so failure
++         * here is a clear indication of something fishy going on.
++         */
++        domain_crash(s->emulator);
++        return -ENODATA;
++    }
+ 
+-    iorp->va = __map_domain_page_global(iorp->page);
++    iorp->va = __map_domain_page_global(page);
+     if ( !iorp->va )
+-        goto fail2;
++        goto fail;
+ 
++    iorp->page = page;
+     clear_page(iorp->va);
+     return 0;
+ 
+- fail2:
+-    put_page_type(iorp->page);
+-
+- fail1:
+-    put_page(iorp->page);
+-    iorp->page = NULL;
++ fail:
++    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
++        put_page(page);
++    put_page_and_type(page);
+ 
+     return -ENOMEM;
+ }
+@@ -377,15 +384,24 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ static void hvm_free_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ {
+     struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
++    struct page_info *page = iorp->page;
+ 
+-    if ( !iorp->page )
++    if ( !page )
+         return;
+ 
++    iorp->page = NULL;
++
+     unmap_domain_page_global(iorp->va);
+     iorp->va = NULL;
+ 
+-    put_page_and_type(iorp->page);
+-    iorp->page = NULL;
++    /*
++     * Check whether we need to clear the allocation reference before
++     * dropping the explicit references taken by get_page_and_type().
++     */
++    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
++        put_page(page);
++
++    put_page_and_type(page);
+ }
+ 
+ bool is_ioreq_server_page(struct domain *d, const struct page_info *page)
+-- 
+2.19.1
+

Added: head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,83 @@
+From 0bb2969630fbc92a0510bf120578b58efb74cdab Mon Sep 17 00:00:00 2001
+From: Paul Durrant <Paul.Durrant@citrix.com>
+Date: Thu, 1 Nov 2018 17:30:20 +0000
+Subject: [PATCH 2/2] x86/hvm/ioreq: use ref-counted target-assigned shared
+ pages
+
+Passing MEMF_no_refcount to alloc_domheap_pages() will allocate, as
+expected, a page that is assigned to the specified domain but is not
+accounted for in tot_pages. Unfortunately there is no logic for tracking
+such allocations and avoiding any adjustment to tot_pages when the page
+is freed.
+
+The only caller of alloc_domheap_pages() that passes MEMF_no_refcount is
+hvm_alloc_ioreq_mfn() so this patch removes use of the flag from that
+call-site to avoid the possibility of a domain using an ioreq server as
+a means to adjust its tot_pages and hence allocate more memory than it
+should be able to.
+
+However, the reason for using the flag in the first place was to avoid
+the allocation failing if the emulator domain is already at its maximum
+memory limit. Hence this patch switches to allocating memory from the
+target domain instead of the emulator domain. There is already an extra
+memory allowance of 2MB (LIBXL_HVM_EXTRA_MEMORY) applied to HVM guests,
+which is sufficient to cover the pages required by the supported
+configuration of a single IOREQ server for QEMU. (Stub-domains do not,
+so far, use resource mapping). It also also the case the QEMU will have
+mapped the IOREQ server pages before the guest boots, hence it is not
+possible for the guest to inflate its balloon to consume these pages.
+
+Reported-by: Julien Grall <julien.grall@arm.com>
+Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
+---
+ xen/arch/x86/hvm/ioreq.c | 12 ++----------
+ xen/arch/x86/mm.c        |  6 ------
+ 2 files changed, 2 insertions(+), 16 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
+index bdc2687014..fd10ee6146 100644
+--- a/xen/arch/x86/hvm/ioreq.c
++++ b/xen/arch/x86/hvm/ioreq.c
+@@ -342,20 +342,12 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+         return 0;
+     }
+ 
+-    /*
+-     * Allocated IOREQ server pages are assigned to the emulating
+-     * domain, not the target domain. This is safe because the emulating
+-     * domain cannot be destroyed until the ioreq server is destroyed.
+-     * Also we must use MEMF_no_refcount otherwise page allocation
+-     * could fail if the emulating domain has already reached its
+-     * maximum allocation.
+-     */
+-    page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
++    page = alloc_domheap_page(s->target, 0);
+ 
+     if ( !page )
+         return -ENOMEM;
+ 
+-    if ( !get_page_and_type(page, s->emulator, PGT_writable_page) )
++    if ( !get_page_and_type(page, s->target, PGT_writable_page) )
+     {
+         /*
+          * The domain can't possibly know about this page yet, so failure
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 7d4871b791..24b215d785 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4396,12 +4396,6 @@ int arch_acquire_resource(struct domain *d, unsigned int type,
+ 
+             mfn_list[i] = mfn_x(mfn);
+         }
+-
+-        /*
+-         * The frames will have been assigned to the domain that created
+-         * the ioreq server.
+-         */
+-        *flags |= XENMEM_rsrc_acq_caller_owned;
+         break;
+     }
+ 
+-- 
+2.19.1
+

Added: head/emulators/xen-kernel411/files/xsa275-4.11-1.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa275-4.11-1.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,104 @@
+From: Roger Pau Monné <roger.pau@citrix.com>
+Subject: amd/iommu: fix flush checks
+
+Flush checking for AMD IOMMU didn't check whether the previous entry
+was present, or whether the flags (writable/readable) changed in order
+to decide whether a flush should be executed.
+
+Fix this by taking the writable/readable/next-level fields into account,
+together with the present bit.
+
+Along these lines the flushing in amd_iommu_map_page() must not be
+omitted for PV domains. The comment there was simply wrong: Mappings may
+very well change, both their addresses and their permissions. Ultimately
+this should honor iommu_dont_flush_iotlb, but to achieve this
+amd_iommu_ops first needs to gain an .iotlb_flush hook.
+
+Also make clear_iommu_pte_present() static, to demonstrate there's no
+caller omitting the (subsequent) flush.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig
+     return idx;
+ }
+ 
+-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
++static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+ {
+     u64 *table, *pte;
+ 
+@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32
+                                     unsigned int next_level,
+                                     bool_t iw, bool_t ir)
+ {
+-    u64 addr_lo, addr_hi, maddr_old, maddr_next;
++    uint64_t addr_lo, addr_hi, maddr_next;
+     u32 entry;
+-    bool_t need_flush = 0;
++    bool need_flush = false, old_present;
+ 
+     maddr_next = (u64)next_mfn << PAGE_SHIFT;
+ 
+-    addr_hi = get_field_from_reg_u32(pde[1],
+-                                     IOMMU_PTE_ADDR_HIGH_MASK,
+-                                     IOMMU_PTE_ADDR_HIGH_SHIFT);
+-    addr_lo = get_field_from_reg_u32(pde[0],
+-                                     IOMMU_PTE_ADDR_LOW_MASK,
+-                                     IOMMU_PTE_ADDR_LOW_SHIFT);
+-
+-    maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
+-
+-    if ( maddr_old != maddr_next )
+-        need_flush = 1;
++    old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK,
++                                         IOMMU_PTE_PRESENT_SHIFT);
++    if ( old_present )
++    {
++        bool old_r, old_w;
++        unsigned int old_level;
++        uint64_t maddr_old;
++
++        addr_hi = get_field_from_reg_u32(pde[1],
++                                         IOMMU_PTE_ADDR_HIGH_MASK,
++                                         IOMMU_PTE_ADDR_HIGH_SHIFT);
++        addr_lo = get_field_from_reg_u32(pde[0],
++                                         IOMMU_PTE_ADDR_LOW_MASK,
++                                         IOMMU_PTE_ADDR_LOW_SHIFT);
++        old_level = get_field_from_reg_u32(pde[0],
++                                           IOMMU_PDE_NEXT_LEVEL_MASK,
++                                           IOMMU_PDE_NEXT_LEVEL_SHIFT);
++        old_w = get_field_from_reg_u32(pde[1],
++                                       IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
++                                       IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT);
++        old_r = get_field_from_reg_u32(pde[1],
++                                       IOMMU_PTE_IO_READ_PERMISSION_MASK,
++                                       IOMMU_PTE_IO_READ_PERMISSION_SHIFT);
++
++        maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
++
++        if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
++             old_level != next_level )
++            need_flush = true;
++    }
+ 
+     addr_lo = maddr_next & DMA_32BIT_MASK;
+     addr_hi = maddr_next >> 32;
+@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d,
+     if ( !need_flush )
+         goto out;
+ 
+-    /* 4K mapping for PV guests never changes, 
+-     * no need to flush if we trust non-present bits */
+-    if ( is_hvm_domain(d) )
+-        amd_iommu_flush_pages(d, gfn, 0);
++    amd_iommu_flush_pages(d, gfn, 0);
+ 
+     for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+           merge_level <= hd->arch.paging_mode; merge_level++ )

Added: head/emulators/xen-kernel411/files/xsa275-4.11-2.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa275-4.11-2.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,68 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: suppress PTE merging after initial table creation
+
+The logic is not fit for this purpose, so simply disable its use until
+it can be fixed / replaced. Note that this re-enables merging for the
+table creation case, which was disabled as a (perhaps unintended) side
+effect of the earlier "amd/iommu: fix flush checks". It relies on no
+page getting mapped more than once (with different properties) in this
+process, as that would still be beyond what the merging logic can cope
+with. But arch_iommu_populate_page_table() guarantees this afaict.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -702,11 +702,24 @@ int amd_iommu_map_page(struct domain *d,
+                                        !!(flags & IOMMUF_writable),
+                                        !!(flags & IOMMUF_readable));
+ 
+-    /* Do not increase pde count if io mapping has not been changed */
+-    if ( !need_flush )
+-        goto out;
++    if ( need_flush )
++    {
++        amd_iommu_flush_pages(d, gfn, 0);
++        /* No further merging, as the logic doesn't cope. */
++        hd->arch.no_merge = true;
++    }
+ 
+-    amd_iommu_flush_pages(d, gfn, 0);
++    /*
++     * Suppress merging of non-R/W mappings or after initial table creation,
++     * as the merge logic does not cope with this.
++     */
++    if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) )
++        goto out;
++    if ( d->creation_finished )
++    {
++        hd->arch.no_merge = true;
++        goto out;
++    }
+ 
+     for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+           merge_level <= hd->arch.paging_mode; merge_level++ )
+@@ -780,6 +793,10 @@ int amd_iommu_unmap_page(struct domain *
+ 
+     /* mark PTE as 'page not present' */
+     clear_iommu_pte_present(pt_mfn[1], gfn);
++
++    /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */
++    hd->arch.no_merge = true;
++
+     spin_unlock(&hd->arch.mapping_lock);
+ 
+     amd_iommu_flush_pages(d, gfn, 0);
+--- a/xen/include/asm-x86/iommu.h
++++ b/xen/include/asm-x86/iommu.h
+@@ -40,6 +40,7 @@ struct arch_iommu
+ 
+     /* amd iommu support */
+     int paging_mode;
++    bool no_merge;
+     struct page_info *root_table;
+     struct guest_iommu *g_iommu;
+ };

Added: head/emulators/xen-kernel411/files/xsa277.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa277.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,47 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/mm: Put the gfn on all paths after get_gfn_query()
+
+c/s 7867181b2 "x86/PoD: correctly handle non-order-0 decrease-reservation
+requests" introduced an early exit in guest_remove_page() for unexpected p2m
+types.  However, get_gfn_query() internally takes the p2m lock, and must be
+matched with a put_gfn() call later.
+
+Fix the erroneous comment beside the declaration of get_gfn_query().
+
+This is XSA-277.
+
+Reported-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 987395f..26b7123 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -305,7 +305,11 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
+ #ifdef CONFIG_X86
+     mfn = get_gfn_query(d, gmfn, &p2mt);
+     if ( unlikely(p2mt == p2m_invalid) || unlikely(p2mt == p2m_mmio_dm) )
++    {
++        put_gfn(d, gmfn);
++
+         return -ENOENT;
++    }
+ 
+     if ( unlikely(p2m_is_paging(p2mt)) )
+     {
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index ac33f50..6d849a5 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -448,10 +448,7 @@ static inline mfn_t __nonnull(3) get_gfn_type(
+     return get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, q, NULL);
+ }
+ 
+-/* Syntactic sugar: most callers will use one of these. 
+- * N.B. get_gfn_query() is the _only_ one guaranteed not to take the
+- * p2m lock; none of the others can be called with the p2m or paging
+- * lock held. */
++/* Syntactic sugar: most callers will use one of these. */
+ #define get_gfn(d, g, t)         get_gfn_type((d), (g), (t), P2M_ALLOC)
+ #define get_gfn_query(d, g, t)   get_gfn_type((d), (g), (t), 0)
+ #define get_gfn_unshare(d, g, t) get_gfn_type((d), (g), (t), \

Added: head/emulators/xen-kernel411/files/xsa279.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa279.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,37 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/mm: Don't perform flush after failing to update a guests L1e
+
+If the L1e update hasn't occured, the flush cannot do anything useful.  This
+skips the potentially expensive vcpumask_to_pcpumask() conversion, and
+broadcast TLB shootdown.
+
+More importantly however, we might be in the error path due to a bad va
+parameter from the guest, and this should not propagate into the TLB flushing
+logic.  The INVPCID instruction for example raises #GP for a non-canonical
+address.
+
+This is XSA-279.
+
+Reported-by: Matthew Daley <mattd@bugfuzz.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 703f330..75663c6 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4155,6 +4155,14 @@ static int __do_update_va_mapping(
+     if ( pl1e )
+         unmap_domain_page(pl1e);
+ 
++    /*
++     * Any error at this point means that we haven't change the L1e.  Skip the
++     * flush, as it won't do anything useful.  Furthermore, va is guest
++     * controlled and not necesserily audited by this point.
++     */
++    if ( rc )
++        return rc;
++
+     switch ( flags & UVMF_FLUSHTYPE_MASK )
+     {
+     case UVMF_TLB_FLUSH:

Added: head/emulators/xen-kernel411/files/xsa280-1.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa280-1.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,116 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: move OOS flag bit positions
+
+In preparation of reducing struct page_info's shadow_flags field to 16
+bits, lower the bit positions used for SHF_out_of_sync and
+SHF_oos_may_write.
+
+Instead of also adjusting the open coded use in _get_page_type(),
+introduce shadow_prepare_page_type_change() to contain knowledge of the
+bit positions to shadow code.
+
+This is part of XSA-280.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+---
+v2: Rename function and pass full type.
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2712,17 +2712,8 @@ static int _get_page_type(struct page_in
+         {
+             struct domain *d = page_get_owner(page);
+ 
+-            /*
+-             * Normally we should never let a page go from type count 0
+-             * to type count 1 when it is shadowed. One exception:
+-             * out-of-sync shadowed pages are allowed to become
+-             * writeable.
+-             */
+-            if ( d && shadow_mode_enabled(d)
+-                 && (page->count_info & PGC_page_table)
+-                 && !((page->shadow_flags & (1u<<29))
+-                      && type == PGT_writable_page) )
+-               shadow_remove_all_shadows(d, page_to_mfn(page));
++            if ( d && shadow_mode_enabled(d) )
++               shadow_prepare_page_type_change(d, page, type);
+ 
+             ASSERT(!(x & PGT_pae_xen_l2));
+             if ( (x & PGT_type_mask) != type )
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -749,6 +749,9 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
+          || !v->domain->arch.paging.shadow.oos_active )
+         return 0;
+ 
++    BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_out_of_sync);
++    BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_oos_may_write);
++
+     pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
+     oos_hash_add(v, gmfn);
+     perfc_incr(shadow_unsync);
+@@ -2413,6 +2416,26 @@ void sh_remove_shadows(struct domain *d,
+     paging_unlock(d);
+ }
+ 
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++                                     unsigned long new_type)
++{
++    if ( !(page->count_info & PGC_page_table) )
++        return;
++
++#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
++    /*
++     * Normally we should never let a page go from type count 0 to type
++     * count 1 when it is shadowed. One exception: out-of-sync shadowed
++     * pages are allowed to become writeable.
++     */
++    if ( (page->shadow_flags & SHF_oos_may_write) &&
++         new_type == PGT_writable_page )
++        return;
++#endif
++
++    shadow_remove_all_shadows(d, page_to_mfn(page));
++}
++
+ static void
+ sh_remove_all_shadows_and_parents(struct domain *d, mfn_t gmfn)
+ /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -285,8 +285,8 @@ static inline void sh_terminate_list(str
+  * codepath is called during that time and is sensitive to oos issues, it may
+  * need to use the second flag.
+  */
+-#define SHF_out_of_sync (1u<<30)
+-#define SHF_oos_may_write (1u<<29)
++#define SHF_out_of_sync (1u << (SH_type_max_shadow + 1))
++#define SHF_oos_may_write (1u << (SH_type_max_shadow + 2))
+ 
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+ 
+--- a/xen/include/asm-x86/shadow.h
++++ b/xen/include/asm-x86/shadow.h
+@@ -81,6 +81,10 @@ void shadow_final_teardown(struct domain
+ 
+ void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all);
+ 
++/* Adjust shadows ready for a guest page to change its type. */
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++                                     unsigned long new_type);
++
+ /* Discard _all_ mappings from the domain's shadows. */
+ void shadow_blow_tables_per_domain(struct domain *d);
+ 
+@@ -105,6 +109,10 @@ int shadow_set_allocation(struct domain
+ static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
+                                      int fast, int all) {}
+ 
++static inline void shadow_prepare_page_type_change(struct domain *d,
++                                                   struct page_info *page,
++                                                   unsigned long new_type) {}
++
+ static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+ 
+ static inline int shadow_domctl(struct domain *d,

Added: head/emulators/xen-kernel411/files/xsa280-4.11-2.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa280-4.11-2.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,141 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: shrink struct page_info's shadow_flags to 16 bits
+
+This is to avoid it overlapping the linear_pt_count field needed for PV
+domains. Introduce a separate, HVM-only pagetable_dying field to replace
+the sole one left in the upper 16 bits.
+
+Note that the accesses to ->shadow_flags in shadow_{pro,de}mote() get
+switched to non-atomic, non-bitops operations, as {test,set,clear}_bit()
+are not allowed on uint16_t fields and hence their use would have
+required ugly casts. This is fine because all updates of the field ought
+to occur with the paging lock held, and other updates of it use |= and
+&= as well (i.e. using atomic operations here didn't really guard
+against potentially racing updates elsewhere).
+
+This is part of XSA-280.
+
+Reported-by: Prgmr.com Security <security@prgmr.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1028,10 +1028,14 @@ void shadow_promote(struct domain *d, mf
+ 
+     /* Is the page already shadowed? */
+     if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
++    {
+         page->shadow_flags = 0;
++        if ( is_hvm_domain(d) )
++            page->pagetable_dying = false;
++    }
+ 
+-    ASSERT(!test_bit(type, &page->shadow_flags));
+-    set_bit(type, &page->shadow_flags);
++    ASSERT(!(page->shadow_flags & (1u << type)));
++    page->shadow_flags |= 1u << type;
+     TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
+ }
+ 
+@@ -1040,9 +1044,9 @@ void shadow_demote(struct domain *d, mfn
+     struct page_info *page = mfn_to_page(gmfn);
+ 
+     ASSERT(test_bit(_PGC_page_table, &page->count_info));
+-    ASSERT(test_bit(type, &page->shadow_flags));
++    ASSERT(page->shadow_flags & (1u << type));
+ 
+-    clear_bit(type, &page->shadow_flags);
++    page->shadow_flags &= ~(1u << type);
+ 
+     if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+     {
+@@ -2921,7 +2925,7 @@ void sh_remove_shadows(struct domain *d,
+     if ( !fast && all && (pg->count_info & PGC_page_table) )
+     {
+         SHADOW_ERROR("can't find all shadows of mfn %"PRI_mfn" "
+-                     "(shadow_flags=%08x)\n",
++                     "(shadow_flags=%04x)\n",
+                       mfn_x(gmfn), pg->shadow_flags);
+         domain_crash(d);
+     }
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3299,8 +3299,8 @@ static int sh_page_fault(struct vcpu *v,
+ 
+     /* Unshadow if we are writing to a toplevel pagetable that is
+      * flagged as a dying process, and that is not currently used. */
+-    if ( sh_mfn_is_a_page_table(gmfn)
+-         && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) )
++    if ( sh_mfn_is_a_page_table(gmfn) && is_hvm_domain(d) &&
++         mfn_to_page(gmfn)->pagetable_dying )
+     {
+         int used = 0;
+         struct vcpu *tmp;
+@@ -4254,9 +4254,9 @@ int sh_rm_write_access_from_sl1p(struct
+     ASSERT(mfn_valid(smfn));
+ 
+     /* Remember if we've been told that this process is being torn down */
+-    if ( curr->domain == d )
++    if ( curr->domain == d && is_hvm_domain(d) )
+         curr->arch.paging.shadow.pagetable_dying
+-            = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
++            = mfn_to_page(gmfn)->pagetable_dying;
+ 
+     sp = mfn_to_page(smfn);
+ 
+@@ -4572,10 +4572,10 @@ static void sh_pagetable_dying(struct vc
+                    : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow);
+         }
+ 
+-        if ( mfn_valid(smfn) )
++        if ( mfn_valid(smfn) && is_hvm_domain(d) )
+         {
+             gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+-            mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++            mfn_to_page(gmfn)->pagetable_dying = true;
+             shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+             flush = 1;
+         }
+@@ -4612,9 +4612,9 @@ static void sh_pagetable_dying(struct vc
+     smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow);
+ #endif
+ 
+-    if ( mfn_valid(smfn) )
++    if ( mfn_valid(smfn) && is_hvm_domain(d) )
+     {
+-        mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++        mfn_to_page(gmfn)->pagetable_dying = true;
+         shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+         /* Now flush the TLB: we removed toplevel mappings. */
+         flush_tlb_mask(d->dirty_cpumask);
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -292,8 +292,6 @@ static inline void sh_terminate_list(str
+ 
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+ 
+-#define SHF_pagetable_dying (1u<<31)
+-
+ static inline int sh_page_has_multiple_shadows(struct page_info *pg)
+ {
+     u32 shadows;
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -259,8 +259,15 @@ struct page_info
+          * Guest pages with a shadow.  This does not conflict with
+          * tlbflush_timestamp since page table pages are explicitly not
+          * tracked for TLB-flush avoidance when a guest runs in shadow mode.
++         *
++         * pagetable_dying is used for HVM domains only. The layout here has
++         * to avoid re-use of the space used by linear_pt_count, which (only)
++         * PV guests use.
+          */
+-        u32 shadow_flags;
++        struct {
++            uint16_t shadow_flags;
++            bool pagetable_dying;
++        };
+ 
+         /* When in use as a shadow, next shadow in this hash chain. */
+         __pdx_t next_shadow;

Added: head/emulators/xen-kernel411/files/xsa282-2.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa282-2.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,42 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86: work around HLE host lockup erratum
+
+XACQUIRE prefixed accesses to the 4Mb range of memory starting at 1Gb
+are liable to lock up the processor. Disallow use of this memory range.
+
+Unfortunately the available Core Gen7 and Gen8 spec updates are pretty
+old, so I can only guess that they're similarly affected when Core Gen6
+is and the Xeon counterparts are, too.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+---
+v2: Don't apply the workaround when running ourselves virtualized.
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5853,6 +5853,22 @@ const struct platform_bad_page *__init g
+         { .mfn = 0x20138000 >> PAGE_SHIFT },
+         { .mfn = 0x40004000 >> PAGE_SHIFT },
+     };
++    static const struct platform_bad_page __initconst hle_bad_page = {
++        .mfn = 0x40000000 >> PAGE_SHIFT, .order = 10
++    };
++
++    switch ( cpuid_eax(1) & 0x000f3ff0 )
++    {
++    case 0x000406e0: /* erratum SKL167 */
++    case 0x00050650: /* erratum SKZ63 */
++    case 0x000506e0: /* errata SKL167 / SKW159 */
++    case 0x000806e0: /* erratum KBL??? */
++    case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */
++        *array_size = (cpuid_eax(0) >= 7 &&
++                       !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) &&
++                       (cpuid_count_ebx(7, 0) & cpufeat_mask(X86_FEATURE_HLE)));
++        return &hle_bad_page;
++    }
+ 
+     *array_size = ARRAY_SIZE(snb_bad_pages);
+     igd_id = pci_conf_read32(0, 0, 2, 0, 0);

Added: head/emulators/xen-kernel411/files/xsa282-4.11-1.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa282-4.11-1.patch	Tue Nov 20 14:17:07 2018	(r485430)
@@ -0,0 +1,147 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86: extend get_platform_badpages() interface
+
+Use a structure so along with an address (now frame number) an order can
+also be specified.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -40,7 +40,7 @@ bool __read_mostly xen_guest;
+ static __read_mostly uint32_t xen_cpuid_base;
+ extern char hypercall_page[];
+ static struct rangeset *mem;
+-static unsigned long __initdata reserved_pages[2];
++static struct platform_bad_page __initdata reserved_pages[2];
+ 
+ DEFINE_PER_CPU(unsigned int, vcpu_id);
+ 
+@@ -326,7 +326,7 @@ void __init hypervisor_fixup_e820(struct
+         panic("Unable to get " #p);             \
+     mark_pfn_as_ram(e820, pfn);                 \
+     ASSERT(i < ARRAY_SIZE(reserved_pages));     \
+-    reserved_pages[i++] = pfn << PAGE_SHIFT;    \
++    reserved_pages[i++].mfn = pfn;              \
+ })
+     MARK_PARAM_RAM(HVM_PARAM_STORE_PFN);
+     if ( !pv_console )
+@@ -334,7 +334,7 @@ void __init hypervisor_fixup_e820(struct
+ #undef MARK_PARAM_RAM
+ }
+ 
+-const unsigned long *__init hypervisor_reserved_pages(unsigned int *size)
++const struct platform_bad_page *__init hypervisor_reserved_pages(unsigned int *size)
+ {
+     ASSERT(xen_guest);
+ 
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5768,23 +5768,23 @@ void arch_dump_shared_mem_info(void)
+             mem_sharing_get_nr_saved_mfns());
+ }
+ 
+-const unsigned long *__init get_platform_badpages(unsigned int *array_size)
++const struct platform_bad_page *__init get_platform_badpages(unsigned int *array_size)
+ {
+     u32 igd_id;
+-    static unsigned long __initdata bad_pages[] = {
+-        0x20050000,
+-        0x20110000,
+-        0x20130000,
+-        0x20138000,
+-        0x40004000,
++    static const struct platform_bad_page __initconst snb_bad_pages[] = {
++        { .mfn = 0x20050000 >> PAGE_SHIFT },
++        { .mfn = 0x20110000 >> PAGE_SHIFT },
++        { .mfn = 0x20130000 >> PAGE_SHIFT },
++        { .mfn = 0x20138000 >> PAGE_SHIFT },
++        { .mfn = 0x40004000 >> PAGE_SHIFT },
+     };
+ 
+-    *array_size = ARRAY_SIZE(bad_pages);
++    *array_size = ARRAY_SIZE(snb_bad_pages);
+     igd_id = pci_conf_read32(0, 0, 2, 0, 0);
+-    if ( !IS_SNB_GFX(igd_id) )
+-        return NULL;
++    if ( IS_SNB_GFX(igd_id) )
++        return snb_bad_pages;
+ 
+-    return bad_pages;
++    return NULL;
+ }
+ 
+ void paging_invlpg(struct vcpu *v, unsigned long va)
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -270,7 +270,7 @@ void __init init_boot_pages(paddr_t ps,
+     unsigned long bad_spfn, bad_epfn;
+     const char *p;
+ #ifdef CONFIG_X86
+-    const unsigned long *badpage = NULL;
++    const struct platform_bad_page *badpage;
+     unsigned int i, array_size;
+ 
+     BUILD_BUG_ON(8 * sizeof(frame_table->u.free.first_dirty) <
+@@ -299,8 +299,8 @@ void __init init_boot_pages(paddr_t ps,
+     {
+         for ( i = 0; i < array_size; i++ )
+         {
+-            bootmem_region_zap(*badpage >> PAGE_SHIFT,
+-                               (*badpage >> PAGE_SHIFT) + 1);
++            bootmem_region_zap(badpage->mfn,
++                               badpage->mfn + (1U << badpage->order));
+             badpage++;
+         }
+     }
+@@ -312,8 +312,8 @@ void __init init_boot_pages(paddr_t ps,
+         {
+             for ( i = 0; i < array_size; i++ )
+             {
+-                bootmem_region_zap(*badpage >> PAGE_SHIFT,
+-                                   (*badpage >> PAGE_SHIFT) + 1);
++                bootmem_region_zap(badpage->mfn,
++                                   badpage->mfn + (1U << badpage->order));
+                 badpage++;
+             }
+         }
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -37,7 +37,7 @@ void hypervisor_ap_setup(void);
+ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
+ void hypervisor_fixup_e820(struct e820map *e820);
+-const unsigned long *hypervisor_reserved_pages(unsigned int *size);
++const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size);
+ uint32_t hypervisor_cpuid_base(void);
+ void hypervisor_resume(void);
+ 
+@@ -65,7 +65,7 @@ static inline void hypervisor_fixup_e820
+     ASSERT_UNREACHABLE();
+ }
+ 
+-static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size)
++static inline const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size)
+ {
+     ASSERT_UNREACHABLE();
+     return NULL;
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -348,7 +348,13 @@ void zap_ro_mpt(mfn_t mfn);
+ 
+ bool is_iomem_page(mfn_t mfn);
+ 
+-const unsigned long *get_platform_badpages(unsigned int *array_size);
++struct platform_bad_page {
++    unsigned long mfn;
++    unsigned int order;
++};
++
++const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
++
+ /* Per page locks:
+  * page_lock() is used for two purposes: pte serialization, and memory sharing.
+  *



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811201417.wAKEH7Dm012650>