Date: Tue, 20 Nov 2018 14:17:07 +0000 (UTC) From: =?UTF-8?Q?Roger_Pau_Monn=c3=a9?= <royger@FreeBSD.org> To: ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org Subject: svn commit: r485430 - in head/emulators/xen-kernel411: . files Message-ID: <201811201417.wAKEH7Dm012650@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: royger (src committer) Date: Tue Nov 20 14:17:07 2018 New Revision: 485430 URL: https://svnweb.freebsd.org/changeset/ports/485430 Log: xen: add XSA patches Fixes for XSA-{275,276,277,279,280,282} Sponsored by: Citrix Systems R&D Added: head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch (contents, props changed) head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa275-4.11-1.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa275-4.11-2.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa277.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa279.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa280-1.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa280-4.11-2.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa282-2.patch (contents, props changed) head/emulators/xen-kernel411/files/xsa282-4.11-1.patch (contents, props changed) Modified: head/emulators/xen-kernel411/Makefile Modified: head/emulators/xen-kernel411/Makefile ============================================================================== --- head/emulators/xen-kernel411/Makefile Tue Nov 20 14:05:01 2018 (r485429) +++ head/emulators/xen-kernel411/Makefile Tue Nov 20 14:17:07 2018 (r485430) @@ -2,7 +2,7 @@ PORTNAME= xen PORTVERSION= 4.11.0 -PORTREVISION= 2 +PORTREVISION= 3 CATEGORIES= emulators MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/ PKGNAMESUFFIX= -kernel411 @@ -92,6 +92,17 @@ EXTRA_PATCHES+= ${FILESDIR}/0001-xen-Port-the-array_in ${FILESDIR}/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch:-p1 # XSA-278: x86: Nested VT-x usable even when disabled EXTRA_PATCHES+= ${FILESDIR}/xsa278-4.11.patch:-p1 +# XSA-{275,276,277,279,280,282} +EXTRA_PATCHES+= ${FILESDIR}/xsa275-4.11-1.patch:-p1 \ + ${FILESDIR}/xsa275-4.11-2.patch:-p1 \ + ${FILESDIR}/0001-x86-hvm-ioreq-fix-page-referencing.patch:-p1 \ + ${FILESDIR}/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch:-p1 \ + ${FILESDIR}/xsa277.patch:-p1 \ + ${FILESDIR}/xsa279.patch:-p1 \ + ${FILESDIR}/xsa280-1.patch:-p1 \ + ${FILESDIR}/xsa280-4.11-2.patch:-p1 \ + ${FILESDIR}/xsa282-4.11-1.patch:-p1 \ + ${FILESDIR}/xsa282-2.patch:-p1 .include <bsd.port.options.mk> Added: head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,120 @@ +From bcc115ba39d2985dcf356ba8a9ac291e314f1f0f Mon Sep 17 00:00:00 2001 +From: Jan Beulich <JBeulich@suse.com> +Date: Thu, 11 Oct 2018 04:00:26 -0600 +Subject: [PATCH 1/2] x86/hvm/ioreq: fix page referencing + +The code does not take a page reference in hvm_alloc_ioreq_mfn(), only a +type reference. This can lead to a situation where a malicious domain with +XSM_DM_PRIV can engineer a sequence as follows: + +- create IOREQ server: no pages as yet. +- acquire resource: page allocated, total 0. +- decrease reservation: -1 ref, total -1. + +This will cause Xen to hit a BUG_ON() in free_domheap_pages(). + +This patch fixes the issue by changing the call to get_page_type() in +hvm_alloc_ioreq_mfn() to a call to get_page_and_type(). This change +in turn requires an extra put_page() in hvm_free_ioreq_mfn() in the case +that _PGC_allocated is still set (i.e. a decrease reservation has not +occurred) to avoid the page being leaked. + +This is part of XSA-276. + +Reported-by: Julien Grall <julien.grall@arm.com> +Reported-by: Julien Grall <julien.grall@arm.com> +Signed-off-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +--- + xen/arch/x86/hvm/ioreq.c | 46 +++++++++++++++++++++++++++------------- + 1 file changed, 31 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c +index f39f391929..bdc2687014 100644 +--- a/xen/arch/x86/hvm/ioreq.c ++++ b/xen/arch/x86/hvm/ioreq.c +@@ -327,6 +327,7 @@ static int hvm_map_ioreq_gfn(struct hvm_ioreq_server *s, bool buf) + static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf) + { + struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq; ++ struct page_info *page; + + if ( iorp->page ) + { +@@ -349,27 +350,33 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf) + * could fail if the emulating domain has already reached its + * maximum allocation. + */ +- iorp->page = alloc_domheap_page(s->emulator, MEMF_no_refcount); ++ page = alloc_domheap_page(s->emulator, MEMF_no_refcount); + +- if ( !iorp->page ) ++ if ( !page ) + return -ENOMEM; + +- if ( !get_page_type(iorp->page, PGT_writable_page) ) +- goto fail1; ++ if ( !get_page_and_type(page, s->emulator, PGT_writable_page) ) ++ { ++ /* ++ * The domain can't possibly know about this page yet, so failure ++ * here is a clear indication of something fishy going on. ++ */ ++ domain_crash(s->emulator); ++ return -ENODATA; ++ } + +- iorp->va = __map_domain_page_global(iorp->page); ++ iorp->va = __map_domain_page_global(page); + if ( !iorp->va ) +- goto fail2; ++ goto fail; + ++ iorp->page = page; + clear_page(iorp->va); + return 0; + +- fail2: +- put_page_type(iorp->page); +- +- fail1: +- put_page(iorp->page); +- iorp->page = NULL; ++ fail: ++ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) ++ put_page(page); ++ put_page_and_type(page); + + return -ENOMEM; + } +@@ -377,15 +384,24 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf) + static void hvm_free_ioreq_mfn(struct hvm_ioreq_server *s, bool buf) + { + struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq; ++ struct page_info *page = iorp->page; + +- if ( !iorp->page ) ++ if ( !page ) + return; + ++ iorp->page = NULL; ++ + unmap_domain_page_global(iorp->va); + iorp->va = NULL; + +- put_page_and_type(iorp->page); +- iorp->page = NULL; ++ /* ++ * Check whether we need to clear the allocation reference before ++ * dropping the explicit references taken by get_page_and_type(). ++ */ ++ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) ++ put_page(page); ++ ++ put_page_and_type(page); + } + + bool is_ioreq_server_page(struct domain *d, const struct page_info *page) +-- +2.19.1 + Added: head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,83 @@ +From 0bb2969630fbc92a0510bf120578b58efb74cdab Mon Sep 17 00:00:00 2001 +From: Paul Durrant <Paul.Durrant@citrix.com> +Date: Thu, 1 Nov 2018 17:30:20 +0000 +Subject: [PATCH 2/2] x86/hvm/ioreq: use ref-counted target-assigned shared + pages + +Passing MEMF_no_refcount to alloc_domheap_pages() will allocate, as +expected, a page that is assigned to the specified domain but is not +accounted for in tot_pages. Unfortunately there is no logic for tracking +such allocations and avoiding any adjustment to tot_pages when the page +is freed. + +The only caller of alloc_domheap_pages() that passes MEMF_no_refcount is +hvm_alloc_ioreq_mfn() so this patch removes use of the flag from that +call-site to avoid the possibility of a domain using an ioreq server as +a means to adjust its tot_pages and hence allocate more memory than it +should be able to. + +However, the reason for using the flag in the first place was to avoid +the allocation failing if the emulator domain is already at its maximum +memory limit. Hence this patch switches to allocating memory from the +target domain instead of the emulator domain. There is already an extra +memory allowance of 2MB (LIBXL_HVM_EXTRA_MEMORY) applied to HVM guests, +which is sufficient to cover the pages required by the supported +configuration of a single IOREQ server for QEMU. (Stub-domains do not, +so far, use resource mapping). It also also the case the QEMU will have +mapped the IOREQ server pages before the guest boots, hence it is not +possible for the guest to inflate its balloon to consume these pages. + +Reported-by: Julien Grall <julien.grall@arm.com> +Signed-off-by: Paul Durrant <paul.durrant@citrix.com> +--- + xen/arch/x86/hvm/ioreq.c | 12 ++---------- + xen/arch/x86/mm.c | 6 ------ + 2 files changed, 2 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c +index bdc2687014..fd10ee6146 100644 +--- a/xen/arch/x86/hvm/ioreq.c ++++ b/xen/arch/x86/hvm/ioreq.c +@@ -342,20 +342,12 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf) + return 0; + } + +- /* +- * Allocated IOREQ server pages are assigned to the emulating +- * domain, not the target domain. This is safe because the emulating +- * domain cannot be destroyed until the ioreq server is destroyed. +- * Also we must use MEMF_no_refcount otherwise page allocation +- * could fail if the emulating domain has already reached its +- * maximum allocation. +- */ +- page = alloc_domheap_page(s->emulator, MEMF_no_refcount); ++ page = alloc_domheap_page(s->target, 0); + + if ( !page ) + return -ENOMEM; + +- if ( !get_page_and_type(page, s->emulator, PGT_writable_page) ) ++ if ( !get_page_and_type(page, s->target, PGT_writable_page) ) + { + /* + * The domain can't possibly know about this page yet, so failure +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 7d4871b791..24b215d785 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -4396,12 +4396,6 @@ int arch_acquire_resource(struct domain *d, unsigned int type, + + mfn_list[i] = mfn_x(mfn); + } +- +- /* +- * The frames will have been assigned to the domain that created +- * the ioreq server. +- */ +- *flags |= XENMEM_rsrc_acq_caller_owned; + break; + } + +-- +2.19.1 + Added: head/emulators/xen-kernel411/files/xsa275-4.11-1.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa275-4.11-1.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,104 @@ +From: Roger Pau Monné <roger.pau@citrix.com> +Subject: amd/iommu: fix flush checks + +Flush checking for AMD IOMMU didn't check whether the previous entry +was present, or whether the flags (writable/readable) changed in order +to decide whether a flush should be executed. + +Fix this by taking the writable/readable/next-level fields into account, +together with the present bit. + +Along these lines the flushing in amd_iommu_map_page() must not be +omitted for PV domains. The comment there was simply wrong: Mappings may +very well change, both their addresses and their permissions. Ultimately +this should honor iommu_dont_flush_iotlb, but to achieve this +amd_iommu_ops first needs to gain an .iotlb_flush hook. + +Also make clear_iommu_pte_present() static, to demonstrate there's no +caller omitting the (subsequent) flush. + +This is part of XSA-275. + +Reported-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/drivers/passthrough/amd/iommu_map.c ++++ b/xen/drivers/passthrough/amd/iommu_map.c +@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig + return idx; + } + +-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn) ++static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn) + { + u64 *table, *pte; + +@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32 + unsigned int next_level, + bool_t iw, bool_t ir) + { +- u64 addr_lo, addr_hi, maddr_old, maddr_next; ++ uint64_t addr_lo, addr_hi, maddr_next; + u32 entry; +- bool_t need_flush = 0; ++ bool need_flush = false, old_present; + + maddr_next = (u64)next_mfn << PAGE_SHIFT; + +- addr_hi = get_field_from_reg_u32(pde[1], +- IOMMU_PTE_ADDR_HIGH_MASK, +- IOMMU_PTE_ADDR_HIGH_SHIFT); +- addr_lo = get_field_from_reg_u32(pde[0], +- IOMMU_PTE_ADDR_LOW_MASK, +- IOMMU_PTE_ADDR_LOW_SHIFT); +- +- maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); +- +- if ( maddr_old != maddr_next ) +- need_flush = 1; ++ old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK, ++ IOMMU_PTE_PRESENT_SHIFT); ++ if ( old_present ) ++ { ++ bool old_r, old_w; ++ unsigned int old_level; ++ uint64_t maddr_old; ++ ++ addr_hi = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_ADDR_HIGH_MASK, ++ IOMMU_PTE_ADDR_HIGH_SHIFT); ++ addr_lo = get_field_from_reg_u32(pde[0], ++ IOMMU_PTE_ADDR_LOW_MASK, ++ IOMMU_PTE_ADDR_LOW_SHIFT); ++ old_level = get_field_from_reg_u32(pde[0], ++ IOMMU_PDE_NEXT_LEVEL_MASK, ++ IOMMU_PDE_NEXT_LEVEL_SHIFT); ++ old_w = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_IO_WRITE_PERMISSION_MASK, ++ IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT); ++ old_r = get_field_from_reg_u32(pde[1], ++ IOMMU_PTE_IO_READ_PERMISSION_MASK, ++ IOMMU_PTE_IO_READ_PERMISSION_SHIFT); ++ ++ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); ++ ++ if ( maddr_old != maddr_next || iw != old_w || ir != old_r || ++ old_level != next_level ) ++ need_flush = true; ++ } + + addr_lo = maddr_next & DMA_32BIT_MASK; + addr_hi = maddr_next >> 32; +@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d, + if ( !need_flush ) + goto out; + +- /* 4K mapping for PV guests never changes, +- * no need to flush if we trust non-present bits */ +- if ( is_hvm_domain(d) ) +- amd_iommu_flush_pages(d, gfn, 0); ++ amd_iommu_flush_pages(d, gfn, 0); + + for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2; + merge_level <= hd->arch.paging_mode; merge_level++ ) Added: head/emulators/xen-kernel411/files/xsa275-4.11-2.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa275-4.11-2.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,68 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: AMD/IOMMU: suppress PTE merging after initial table creation + +The logic is not fit for this purpose, so simply disable its use until +it can be fixed / replaced. Note that this re-enables merging for the +table creation case, which was disabled as a (perhaps unintended) side +effect of the earlier "amd/iommu: fix flush checks". It relies on no +page getting mapped more than once (with different properties) in this +process, as that would still be beyond what the merging logic can cope +with. But arch_iommu_populate_page_table() guarantees this afaict. + +This is part of XSA-275. + +Reported-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- a/xen/drivers/passthrough/amd/iommu_map.c ++++ b/xen/drivers/passthrough/amd/iommu_map.c +@@ -702,11 +702,24 @@ int amd_iommu_map_page(struct domain *d, + !!(flags & IOMMUF_writable), + !!(flags & IOMMUF_readable)); + +- /* Do not increase pde count if io mapping has not been changed */ +- if ( !need_flush ) +- goto out; ++ if ( need_flush ) ++ { ++ amd_iommu_flush_pages(d, gfn, 0); ++ /* No further merging, as the logic doesn't cope. */ ++ hd->arch.no_merge = true; ++ } + +- amd_iommu_flush_pages(d, gfn, 0); ++ /* ++ * Suppress merging of non-R/W mappings or after initial table creation, ++ * as the merge logic does not cope with this. ++ */ ++ if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) ) ++ goto out; ++ if ( d->creation_finished ) ++ { ++ hd->arch.no_merge = true; ++ goto out; ++ } + + for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2; + merge_level <= hd->arch.paging_mode; merge_level++ ) +@@ -780,6 +793,10 @@ int amd_iommu_unmap_page(struct domain * + + /* mark PTE as 'page not present' */ + clear_iommu_pte_present(pt_mfn[1], gfn); ++ ++ /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */ ++ hd->arch.no_merge = true; ++ + spin_unlock(&hd->arch.mapping_lock); + + amd_iommu_flush_pages(d, gfn, 0); +--- a/xen/include/asm-x86/iommu.h ++++ b/xen/include/asm-x86/iommu.h +@@ -40,6 +40,7 @@ struct arch_iommu + + /* amd iommu support */ + int paging_mode; ++ bool no_merge; + struct page_info *root_table; + struct guest_iommu *g_iommu; + }; Added: head/emulators/xen-kernel411/files/xsa277.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa277.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,47 @@ +From: Andrew Cooper <andrew.cooper3@citrix.com> +Subject: x86/mm: Put the gfn on all paths after get_gfn_query() + +c/s 7867181b2 "x86/PoD: correctly handle non-order-0 decrease-reservation +requests" introduced an early exit in guest_remove_page() for unexpected p2m +types. However, get_gfn_query() internally takes the p2m lock, and must be +matched with a put_gfn() call later. + +Fix the erroneous comment beside the declaration of get_gfn_query(). + +This is XSA-277. + +Reported-by: Paul Durrant <paul.durrant@citrix.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> + +diff --git a/xen/common/memory.c b/xen/common/memory.c +index 987395f..26b7123 100644 +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -305,7 +305,11 @@ int guest_remove_page(struct domain *d, unsigned long gmfn) + #ifdef CONFIG_X86 + mfn = get_gfn_query(d, gmfn, &p2mt); + if ( unlikely(p2mt == p2m_invalid) || unlikely(p2mt == p2m_mmio_dm) ) ++ { ++ put_gfn(d, gmfn); ++ + return -ENOENT; ++ } + + if ( unlikely(p2m_is_paging(p2mt)) ) + { +diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h +index ac33f50..6d849a5 100644 +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -448,10 +448,7 @@ static inline mfn_t __nonnull(3) get_gfn_type( + return get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, q, NULL); + } + +-/* Syntactic sugar: most callers will use one of these. +- * N.B. get_gfn_query() is the _only_ one guaranteed not to take the +- * p2m lock; none of the others can be called with the p2m or paging +- * lock held. */ ++/* Syntactic sugar: most callers will use one of these. */ + #define get_gfn(d, g, t) get_gfn_type((d), (g), (t), P2M_ALLOC) + #define get_gfn_query(d, g, t) get_gfn_type((d), (g), (t), 0) + #define get_gfn_unshare(d, g, t) get_gfn_type((d), (g), (t), \ Added: head/emulators/xen-kernel411/files/xsa279.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa279.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,37 @@ +From: Andrew Cooper <andrew.cooper3@citrix.com> +Subject: x86/mm: Don't perform flush after failing to update a guests L1e + +If the L1e update hasn't occured, the flush cannot do anything useful. This +skips the potentially expensive vcpumask_to_pcpumask() conversion, and +broadcast TLB shootdown. + +More importantly however, we might be in the error path due to a bad va +parameter from the guest, and this should not propagate into the TLB flushing +logic. The INVPCID instruction for example raises #GP for a non-canonical +address. + +This is XSA-279. + +Reported-by: Matthew Daley <mattd@bugfuzz.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 703f330..75663c6 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -4155,6 +4155,14 @@ static int __do_update_va_mapping( + if ( pl1e ) + unmap_domain_page(pl1e); + ++ /* ++ * Any error at this point means that we haven't change the L1e. Skip the ++ * flush, as it won't do anything useful. Furthermore, va is guest ++ * controlled and not necesserily audited by this point. ++ */ ++ if ( rc ) ++ return rc; ++ + switch ( flags & UVMF_FLUSHTYPE_MASK ) + { + case UVMF_TLB_FLUSH: Added: head/emulators/xen-kernel411/files/xsa280-1.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa280-1.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,116 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: move OOS flag bit positions + +In preparation of reducing struct page_info's shadow_flags field to 16 +bits, lower the bit positions used for SHF_out_of_sync and +SHF_oos_may_write. + +Instead of also adjusting the open coded use in _get_page_type(), +introduce shadow_prepare_page_type_change() to contain knowledge of the +bit positions to shadow code. + +This is part of XSA-280. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> +--- +v2: Rename function and pass full type. + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -2712,17 +2712,8 @@ static int _get_page_type(struct page_in + { + struct domain *d = page_get_owner(page); + +- /* +- * Normally we should never let a page go from type count 0 +- * to type count 1 when it is shadowed. One exception: +- * out-of-sync shadowed pages are allowed to become +- * writeable. +- */ +- if ( d && shadow_mode_enabled(d) +- && (page->count_info & PGC_page_table) +- && !((page->shadow_flags & (1u<<29)) +- && type == PGT_writable_page) ) +- shadow_remove_all_shadows(d, page_to_mfn(page)); ++ if ( d && shadow_mode_enabled(d) ) ++ shadow_prepare_page_type_change(d, page, type); + + ASSERT(!(x & PGT_pae_xen_l2)); + if ( (x & PGT_type_mask) != type ) +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -749,6 +749,9 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn + || !v->domain->arch.paging.shadow.oos_active ) + return 0; + ++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_out_of_sync); ++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_oos_may_write); ++ + pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write; + oos_hash_add(v, gmfn); + perfc_incr(shadow_unsync); +@@ -2413,6 +2416,26 @@ void sh_remove_shadows(struct domain *d, + paging_unlock(d); + } + ++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page, ++ unsigned long new_type) ++{ ++ if ( !(page->count_info & PGC_page_table) ) ++ return; ++ ++#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) ++ /* ++ * Normally we should never let a page go from type count 0 to type ++ * count 1 when it is shadowed. One exception: out-of-sync shadowed ++ * pages are allowed to become writeable. ++ */ ++ if ( (page->shadow_flags & SHF_oos_may_write) && ++ new_type == PGT_writable_page ) ++ return; ++#endif ++ ++ shadow_remove_all_shadows(d, page_to_mfn(page)); ++} ++ + static void + sh_remove_all_shadows_and_parents(struct domain *d, mfn_t gmfn) + /* Even harsher: this is a HVM page that we thing is no longer a pagetable. +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -285,8 +285,8 @@ static inline void sh_terminate_list(str + * codepath is called during that time and is sensitive to oos issues, it may + * need to use the second flag. + */ +-#define SHF_out_of_sync (1u<<30) +-#define SHF_oos_may_write (1u<<29) ++#define SHF_out_of_sync (1u << (SH_type_max_shadow + 1)) ++#define SHF_oos_may_write (1u << (SH_type_max_shadow + 2)) + + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ + +--- a/xen/include/asm-x86/shadow.h ++++ b/xen/include/asm-x86/shadow.h +@@ -81,6 +81,10 @@ void shadow_final_teardown(struct domain + + void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all); + ++/* Adjust shadows ready for a guest page to change its type. */ ++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page, ++ unsigned long new_type); ++ + /* Discard _all_ mappings from the domain's shadows. */ + void shadow_blow_tables_per_domain(struct domain *d); + +@@ -105,6 +109,10 @@ int shadow_set_allocation(struct domain + static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn, + int fast, int all) {} + ++static inline void shadow_prepare_page_type_change(struct domain *d, ++ struct page_info *page, ++ unsigned long new_type) {} ++ + static inline void shadow_blow_tables_per_domain(struct domain *d) {} + + static inline int shadow_domctl(struct domain *d, Added: head/emulators/xen-kernel411/files/xsa280-4.11-2.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa280-4.11-2.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,141 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: shrink struct page_info's shadow_flags to 16 bits + +This is to avoid it overlapping the linear_pt_count field needed for PV +domains. Introduce a separate, HVM-only pagetable_dying field to replace +the sole one left in the upper 16 bits. + +Note that the accesses to ->shadow_flags in shadow_{pro,de}mote() get +switched to non-atomic, non-bitops operations, as {test,set,clear}_bit() +are not allowed on uint16_t fields and hence their use would have +required ugly casts. This is fine because all updates of the field ought +to occur with the paging lock held, and other updates of it use |= and +&= as well (i.e. using atomic operations here didn't really guard +against potentially racing updates elsewhere). + +This is part of XSA-280. + +Reported-by: Prgmr.com Security <security@prgmr.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -1028,10 +1028,14 @@ void shadow_promote(struct domain *d, mf + + /* Is the page already shadowed? */ + if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) ++ { + page->shadow_flags = 0; ++ if ( is_hvm_domain(d) ) ++ page->pagetable_dying = false; ++ } + +- ASSERT(!test_bit(type, &page->shadow_flags)); +- set_bit(type, &page->shadow_flags); ++ ASSERT(!(page->shadow_flags & (1u << type))); ++ page->shadow_flags |= 1u << type; + TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE); + } + +@@ -1040,9 +1044,9 @@ void shadow_demote(struct domain *d, mfn + struct page_info *page = mfn_to_page(gmfn); + + ASSERT(test_bit(_PGC_page_table, &page->count_info)); +- ASSERT(test_bit(type, &page->shadow_flags)); ++ ASSERT(page->shadow_flags & (1u << type)); + +- clear_bit(type, &page->shadow_flags); ++ page->shadow_flags &= ~(1u << type); + + if ( (page->shadow_flags & SHF_page_type_mask) == 0 ) + { +@@ -2921,7 +2925,7 @@ void sh_remove_shadows(struct domain *d, + if ( !fast && all && (pg->count_info & PGC_page_table) ) + { + SHADOW_ERROR("can't find all shadows of mfn %"PRI_mfn" " +- "(shadow_flags=%08x)\n", ++ "(shadow_flags=%04x)\n", + mfn_x(gmfn), pg->shadow_flags); + domain_crash(d); + } +--- a/xen/arch/x86/mm/shadow/multi.c ++++ b/xen/arch/x86/mm/shadow/multi.c +@@ -3299,8 +3299,8 @@ static int sh_page_fault(struct vcpu *v, + + /* Unshadow if we are writing to a toplevel pagetable that is + * flagged as a dying process, and that is not currently used. */ +- if ( sh_mfn_is_a_page_table(gmfn) +- && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) ) ++ if ( sh_mfn_is_a_page_table(gmfn) && is_hvm_domain(d) && ++ mfn_to_page(gmfn)->pagetable_dying ) + { + int used = 0; + struct vcpu *tmp; +@@ -4254,9 +4254,9 @@ int sh_rm_write_access_from_sl1p(struct + ASSERT(mfn_valid(smfn)); + + /* Remember if we've been told that this process is being torn down */ +- if ( curr->domain == d ) ++ if ( curr->domain == d && is_hvm_domain(d) ) + curr->arch.paging.shadow.pagetable_dying +- = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying); ++ = mfn_to_page(gmfn)->pagetable_dying; + + sp = mfn_to_page(smfn); + +@@ -4572,10 +4572,10 @@ static void sh_pagetable_dying(struct vc + : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow); + } + +- if ( mfn_valid(smfn) ) ++ if ( mfn_valid(smfn) && is_hvm_domain(d) ) + { + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); +- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; ++ mfn_to_page(gmfn)->pagetable_dying = true; + shadow_unhook_mappings(d, smfn, 1/* user pages only */); + flush = 1; + } +@@ -4612,9 +4612,9 @@ static void sh_pagetable_dying(struct vc + smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow); + #endif + +- if ( mfn_valid(smfn) ) ++ if ( mfn_valid(smfn) && is_hvm_domain(d) ) + { +- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; ++ mfn_to_page(gmfn)->pagetable_dying = true; + shadow_unhook_mappings(d, smfn, 1/* user pages only */); + /* Now flush the TLB: we removed toplevel mappings. */ + flush_tlb_mask(d->dirty_cpumask); +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -292,8 +292,6 @@ static inline void sh_terminate_list(str + + #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ + +-#define SHF_pagetable_dying (1u<<31) +- + static inline int sh_page_has_multiple_shadows(struct page_info *pg) + { + u32 shadows; +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -259,8 +259,15 @@ struct page_info + * Guest pages with a shadow. This does not conflict with + * tlbflush_timestamp since page table pages are explicitly not + * tracked for TLB-flush avoidance when a guest runs in shadow mode. ++ * ++ * pagetable_dying is used for HVM domains only. The layout here has ++ * to avoid re-use of the space used by linear_pt_count, which (only) ++ * PV guests use. + */ +- u32 shadow_flags; ++ struct { ++ uint16_t shadow_flags; ++ bool pagetable_dying; ++ }; + + /* When in use as a shadow, next shadow in this hash chain. */ + __pdx_t next_shadow; Added: head/emulators/xen-kernel411/files/xsa282-2.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa282-2.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,42 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: work around HLE host lockup erratum + +XACQUIRE prefixed accesses to the 4Mb range of memory starting at 1Gb +are liable to lock up the processor. Disallow use of this memory range. + +Unfortunately the available Core Gen7 and Gen8 spec updates are pretty +old, so I can only guess that they're similarly affected when Core Gen6 +is and the Xeon counterparts are, too. + +This is part of XSA-282. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +--- +v2: Don't apply the workaround when running ourselves virtualized. + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -5853,6 +5853,22 @@ const struct platform_bad_page *__init g + { .mfn = 0x20138000 >> PAGE_SHIFT }, + { .mfn = 0x40004000 >> PAGE_SHIFT }, + }; ++ static const struct platform_bad_page __initconst hle_bad_page = { ++ .mfn = 0x40000000 >> PAGE_SHIFT, .order = 10 ++ }; ++ ++ switch ( cpuid_eax(1) & 0x000f3ff0 ) ++ { ++ case 0x000406e0: /* erratum SKL167 */ ++ case 0x00050650: /* erratum SKZ63 */ ++ case 0x000506e0: /* errata SKL167 / SKW159 */ ++ case 0x000806e0: /* erratum KBL??? */ ++ case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */ ++ *array_size = (cpuid_eax(0) >= 7 && ++ !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) && ++ (cpuid_count_ebx(7, 0) & cpufeat_mask(X86_FEATURE_HLE))); ++ return &hle_bad_page; ++ } + + *array_size = ARRAY_SIZE(snb_bad_pages); + igd_id = pci_conf_read32(0, 0, 2, 0, 0); Added: head/emulators/xen-kernel411/files/xsa282-4.11-1.patch ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/emulators/xen-kernel411/files/xsa282-4.11-1.patch Tue Nov 20 14:17:07 2018 (r485430) @@ -0,0 +1,147 @@ +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: extend get_platform_badpages() interface + +Use a structure so along with an address (now frame number) an order can +also be specified. + +This is part of XSA-282. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> + +--- a/xen/arch/x86/guest/xen.c ++++ b/xen/arch/x86/guest/xen.c +@@ -40,7 +40,7 @@ bool __read_mostly xen_guest; + static __read_mostly uint32_t xen_cpuid_base; + extern char hypercall_page[]; + static struct rangeset *mem; +-static unsigned long __initdata reserved_pages[2]; ++static struct platform_bad_page __initdata reserved_pages[2]; + + DEFINE_PER_CPU(unsigned int, vcpu_id); + +@@ -326,7 +326,7 @@ void __init hypervisor_fixup_e820(struct + panic("Unable to get " #p); \ + mark_pfn_as_ram(e820, pfn); \ + ASSERT(i < ARRAY_SIZE(reserved_pages)); \ +- reserved_pages[i++] = pfn << PAGE_SHIFT; \ ++ reserved_pages[i++].mfn = pfn; \ + }) + MARK_PARAM_RAM(HVM_PARAM_STORE_PFN); + if ( !pv_console ) +@@ -334,7 +334,7 @@ void __init hypervisor_fixup_e820(struct + #undef MARK_PARAM_RAM + } + +-const unsigned long *__init hypervisor_reserved_pages(unsigned int *size) ++const struct platform_bad_page *__init hypervisor_reserved_pages(unsigned int *size) + { + ASSERT(xen_guest); + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -5768,23 +5768,23 @@ void arch_dump_shared_mem_info(void) + mem_sharing_get_nr_saved_mfns()); + } + +-const unsigned long *__init get_platform_badpages(unsigned int *array_size) ++const struct platform_bad_page *__init get_platform_badpages(unsigned int *array_size) + { + u32 igd_id; +- static unsigned long __initdata bad_pages[] = { +- 0x20050000, +- 0x20110000, +- 0x20130000, +- 0x20138000, +- 0x40004000, ++ static const struct platform_bad_page __initconst snb_bad_pages[] = { ++ { .mfn = 0x20050000 >> PAGE_SHIFT }, ++ { .mfn = 0x20110000 >> PAGE_SHIFT }, ++ { .mfn = 0x20130000 >> PAGE_SHIFT }, ++ { .mfn = 0x20138000 >> PAGE_SHIFT }, ++ { .mfn = 0x40004000 >> PAGE_SHIFT }, + }; + +- *array_size = ARRAY_SIZE(bad_pages); ++ *array_size = ARRAY_SIZE(snb_bad_pages); + igd_id = pci_conf_read32(0, 0, 2, 0, 0); +- if ( !IS_SNB_GFX(igd_id) ) +- return NULL; ++ if ( IS_SNB_GFX(igd_id) ) ++ return snb_bad_pages; + +- return bad_pages; ++ return NULL; + } + + void paging_invlpg(struct vcpu *v, unsigned long va) +--- a/xen/common/page_alloc.c ++++ b/xen/common/page_alloc.c +@@ -270,7 +270,7 @@ void __init init_boot_pages(paddr_t ps, + unsigned long bad_spfn, bad_epfn; + const char *p; + #ifdef CONFIG_X86 +- const unsigned long *badpage = NULL; ++ const struct platform_bad_page *badpage; + unsigned int i, array_size; + + BUILD_BUG_ON(8 * sizeof(frame_table->u.free.first_dirty) < +@@ -299,8 +299,8 @@ void __init init_boot_pages(paddr_t ps, + { + for ( i = 0; i < array_size; i++ ) + { +- bootmem_region_zap(*badpage >> PAGE_SHIFT, +- (*badpage >> PAGE_SHIFT) + 1); ++ bootmem_region_zap(badpage->mfn, ++ badpage->mfn + (1U << badpage->order)); + badpage++; + } + } +@@ -312,8 +312,8 @@ void __init init_boot_pages(paddr_t ps, + { + for ( i = 0; i < array_size; i++ ) + { +- bootmem_region_zap(*badpage >> PAGE_SHIFT, +- (*badpage >> PAGE_SHIFT) + 1); ++ bootmem_region_zap(badpage->mfn, ++ badpage->mfn + (1U << badpage->order)); + badpage++; + } + } +--- a/xen/include/asm-x86/guest/xen.h ++++ b/xen/include/asm-x86/guest/xen.h +@@ -37,7 +37,7 @@ void hypervisor_ap_setup(void); + int hypervisor_alloc_unused_page(mfn_t *mfn); + int hypervisor_free_unused_page(mfn_t mfn); + void hypervisor_fixup_e820(struct e820map *e820); +-const unsigned long *hypervisor_reserved_pages(unsigned int *size); ++const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size); + uint32_t hypervisor_cpuid_base(void); + void hypervisor_resume(void); + +@@ -65,7 +65,7 @@ static inline void hypervisor_fixup_e820 + ASSERT_UNREACHABLE(); + } + +-static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size) ++static inline const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size) + { + ASSERT_UNREACHABLE(); + return NULL; +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -348,7 +348,13 @@ void zap_ro_mpt(mfn_t mfn); + + bool is_iomem_page(mfn_t mfn); + +-const unsigned long *get_platform_badpages(unsigned int *array_size); ++struct platform_bad_page { ++ unsigned long mfn; ++ unsigned int order; ++}; ++ ++const struct platform_bad_page *get_platform_badpages(unsigned int *array_size); ++ + /* Per page locks: + * page_lock() is used for two purposes: pte serialization, and memory sharing. + *
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811201417.wAKEH7Dm012650>