Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 29 Jul 2018 20:47:00 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r336876 - in head/sys: amd64/amd64 amd64/ia32 amd64/include conf dev/hyperv/vmbus/amd64
Message-ID:  <201807292047.w6TKl0hV004691@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sun Jul 29 20:47:00 2018
New Revision: 336876
URL: https://svnweb.freebsd.org/changeset/base/336876

Log:
  Use SMAP on amd64.
  
  Ifuncs selectors dispatch copyin(9) family to the suitable variant, to
  set rflags.AC around userspace access.  Rflags.AC bit is cleared in
  all kernel entry points unconditionally even on machines not
  supporting SMAP.
  
  Reviewed by:	jhb
  Sponsored by:	The FreeBSD Foundation
  Differential revision:	https://reviews.freebsd.org/D13838

Added:
  head/sys/amd64/amd64/copyout.c   (contents, props changed)
Modified:
  head/sys/amd64/amd64/exception.S
  head/sys/amd64/amd64/initcpu.c
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/amd64/support.S
  head/sys/amd64/amd64/trap.c
  head/sys/amd64/ia32/ia32_exception.S
  head/sys/amd64/include/asmacros.h
  head/sys/conf/files.amd64
  head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S

Added: head/sys/amd64/amd64/copyout.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/amd64/copyout.c	Sun Jul 29 20:47:00 2018	(r336876)
@@ -0,0 +1,178 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/ifunc.h>
+
+int fubyte_nosmap(volatile const void *base);
+int fubyte_smap(volatile const void *base);
+DEFINE_IFUNC(, int, fubyte, (volatile const void *), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    fubyte_smap : fubyte_nosmap);
+}
+
+int fuword16_nosmap(volatile const void *base);
+int fuword16_smap(volatile const void *base);
+DEFINE_IFUNC(, int, fuword16, (volatile const void *), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    fuword16_smap : fuword16_nosmap);
+}
+
+int fueword_nosmap(volatile const void *base, long *val);
+int fueword_smap(volatile const void *base, long *val);
+DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    fueword_smap : fueword_nosmap);
+}
+DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    fueword_smap : fueword_nosmap);
+}
+
+int	fueword32_nosmap(volatile const void *base, int32_t *val);
+int	fueword32_smap(volatile const void *base, int32_t *val);
+DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    fueword32_smap : fueword32_nosmap);
+}
+
+int	subyte_nosmap(volatile void *base, int byte);
+int	subyte_smap(volatile void *base, int byte);
+DEFINE_IFUNC(, int, subyte, (volatile void *, int), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    subyte_smap : subyte_nosmap);
+}
+
+int	suword16_nosmap(volatile void *base, int word);
+int	suword16_smap(volatile void *base, int word);
+DEFINE_IFUNC(, int, suword16, (volatile void *, int), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    suword16_smap : suword16_nosmap);
+}
+
+int	suword32_nosmap(volatile void *base, int32_t word);
+int	suword32_smap(volatile void *base, int32_t word);
+DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    suword32_smap : suword32_nosmap);
+}
+
+int	suword_nosmap(volatile void *base, long word);
+int	suword_smap(volatile void *base, long word);
+DEFINE_IFUNC(, int, suword, (volatile void *, long), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    suword_smap : suword_nosmap);
+}
+DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    suword_smap : suword_nosmap);
+}
+
+int	casueword32_nosmap(volatile uint32_t *base, uint32_t oldval,
+	    uint32_t *oldvalp, uint32_t newval);
+int	casueword32_smap(volatile uint32_t *base, uint32_t oldval,
+	    uint32_t *oldvalp, uint32_t newval);
+DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t *,
+    uint32_t), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    casueword32_smap : casueword32_nosmap);
+}
+
+int	casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp,
+	    u_long newval);
+int	casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp,
+	    u_long newval);
+DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *, u_long),
+    static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    casueword_smap : casueword_nosmap);
+}
+
+int	copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len,
+	    size_t *lencopied);
+int	copyinstr_smap(const void *udaddr, void *kaddr, size_t len,
+	    size_t *lencopied);
+DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *),
+    static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    copyinstr_smap : copyinstr_nosmap);
+}
+
+int	copyin_nosmap(const void *udaddr, void *kaddr, size_t len);
+int	copyin_smap(const void *udaddr, void *kaddr, size_t len);
+DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    copyin_smap : copyin_nosmap);
+}
+
+int	copyout_nosmap(const void *kaddr, void *udaddr, size_t len);
+int	copyout_smap(const void *kaddr, void *udaddr, size_t len);
+DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static)
+{
+
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
+	    copyout_smap : copyout_nosmap);
+}

Modified: head/sys/amd64/amd64/exception.S
==============================================================================
--- head/sys/amd64/amd64/exception.S	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/exception.S	Sun Jul 29 20:47:00 2018	(r336876)
@@ -43,8 +43,8 @@
 
 #include "assym.inc"
 
-#include <machine/asmacros.h>
 #include <machine/psl.h>
+#include <machine/asmacros.h>
 #include <machine/trap.h>
 #include <machine/specialreg.h>
 
@@ -196,7 +196,9 @@ alltraps_pushregs_no_rax:
 	movq	%r14,TF_R14(%rsp)
 	movq	%r15,TF_R15(%rsp)
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	FAKE_MCOUNT(TF_RIP(%rsp))
 #ifdef KDTRACE_HOOKS
 	/*
@@ -277,7 +279,9 @@ IDTVEC(dblfault)
 	movq	%r15,TF_R15(%rsp)
 	SAVE_SEGS
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 	jz	1f			/* already running with kernel GS.base */
 	swapgs
@@ -571,7 +575,9 @@ IDTVEC(dbg)
 	movq	%r15,TF_R15(%rsp)
 	SAVE_SEGS
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	testb	$SEL_RPL_MASK,TF_CS(%rsp)
 	jnz	dbg_fromuserspace
 	/*
@@ -704,7 +710,9 @@ IDTVEC(nmi)
 	movq	%r15,TF_R15(%rsp)
 	SAVE_SEGS
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	xorl	%ebx,%ebx
 	testb	$SEL_RPL_MASK,TF_CS(%rsp)
 	jnz	nmi_fromuserspace
@@ -793,7 +801,9 @@ nmi_calltrap:
 	subq	%rcx,%rdx
 	movq	%rdx,%rdi	/* destination stack pointer */
 	shrq	$3,%rcx		/* trap frame size in long words */
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	rep
 	movsq			/* copy trapframe */
 	movq	%rdx,%rsp	/* we are on the regular kstack */
@@ -902,7 +912,9 @@ IDTVEC(mchk)
 	movq	%r15,TF_R15(%rsp)
 	SAVE_SEGS
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	xorl	%ebx,%ebx
 	testb	$SEL_RPL_MASK,TF_CS(%rsp)
 	jnz	mchk_fromuserspace

Modified: head/sys/amd64/amd64/initcpu.c
==============================================================================
--- head/sys/amd64/amd64/initcpu.c	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/initcpu.c	Sun Jul 29 20:47:00 2018	(r336876)
@@ -239,8 +239,12 @@ initializecpu(void)
 	 * to the kernel tables.  The boot loader enables the U bit in
 	 * its tables.
 	 */
-	if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
-		cr4 |= CR4_SMEP;
+	if (!IS_BSP()) {
+		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
+			cr4 |= CR4_SMEP;
+		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
+			cr4 |= CR4_SMAP;
+	}
 	load_cr4(cr4);
 	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
 		msr = rdmsr(MSR_EFER) | EFER_NXE;

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/machdep.c	Sun Jul 29 20:47:00 2018	(r336876)
@@ -1548,7 +1548,7 @@ amd64_conf_fast_syscall(void)
 	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 	    ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 	wrmsr(MSR_STAR, msr);
-	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D);
+	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 }
 
 u_int64_t

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/pmap.c	Sun Jul 29 20:47:00 2018	(r336876)
@@ -1092,6 +1092,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
+	uint64_t cr4;
 	int i;
 
 	KERNend = *firstaddr;
@@ -1118,11 +1119,21 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 
-	/* XXX do %cr0 as well */
-	load_cr4(rcr4() | CR4_PGE);
+	/*
+	 * Enable PG_G global pages, then switch to the kernel page
+	 * table from the bootstrap page table.  After the switch, it
+	 * is possible to enable SMEP and SMAP since PG_U bits are
+	 * correct now.
+	 */
+	cr4 = rcr4();
+	cr4 |= CR4_PGE;
+	load_cr4(cr4);
 	load_cr3(KPML4phys);
 	if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
-		load_cr4(rcr4() | CR4_SMEP);
+		cr4 |= CR4_SMEP;
+	if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
+		cr4 |= CR4_SMAP;
+	load_cr4(cr4);
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/support.S	Sun Jul 29 20:47:00 2018	(r336876)
@@ -226,7 +226,7 @@ END(fillw)
  * copyout(from_kernel, to_user, len)
  *         %rdi,        %rsi,    %rdx
  */
-ENTRY(copyout)
+ENTRY(copyout_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyout_fault,PCB_ONFAULT(%rax)
@@ -268,6 +268,55 @@ ENTRY(copyout)
 	rep
 	movsb
 
+	jmp	done_copyout
+END(copyout_nosmap)
+
+ENTRY(copyout_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rax
+	/* Trap entry clears PSL.AC */
+	movq	$copyout_fault,PCB_ONFAULT(%rax)
+	testq	%rdx,%rdx			/* anything to do? */
+	jz	done_copyout
+
+	/*
+	 * Check explicitly for non-user addresses.  If 486 write protection
+	 * is being used, this check is essential because we are in kernel
+	 * mode so the h/w does not provide any protection against writing
+	 * kernel addresses.
+	 */
+
+	/*
+	 * First, prevent address wrapping.
+	 */
+	movq	%rsi,%rax
+	addq	%rdx,%rax
+	jc	copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+	movq	$VM_MAXUSER_ADDRESS,%rcx
+	cmpq	%rcx,%rax
+	ja	copyout_fault
+
+	xchgq	%rdi,%rsi
+	/* bcopy(%rsi, %rdi, %rdx) */
+	movq	%rdx,%rcx
+
+	shrq	$3,%rcx
+	cld
+	stac
+	rep
+	movsq
+	movb	%dl,%cl
+	andb	$7,%cl
+	rep
+	movsb
+	clac
+
 done_copyout:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
@@ -288,7 +337,7 @@ END(copyout)
  * copyin(from_user, to_kernel, len)
  *        %rdi,      %rsi,      %rdx
  */
-ENTRY(copyin)
+ENTRY(copyin_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyin_fault,PCB_ONFAULT(%rax)
@@ -309,13 +358,47 @@ ENTRY(copyin)
 	movq	%rdx,%rcx
 	movb	%cl,%al
 	shrq	$3,%rcx				/* copy longword-wise */
+	cld
 	rep
 	movsq
 	movb	%al,%cl
 	andb	$7,%cl				/* copy remaining bytes */
+	rep
+	movsb
+
+	jmp	done_copyin
+END(copyin_nosmap)
+
+ENTRY(copyin_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rax
+	movq	$copyin_fault,PCB_ONFAULT(%rax)
+	testq	%rdx,%rdx			/* anything to do? */
+	jz	done_copyin
+
+	/*
+	 * make sure address is valid
+	 */
+	movq	%rdi,%rax
+	addq	%rdx,%rax
+	jc	copyin_fault
+	movq	$VM_MAXUSER_ADDRESS,%rcx
+	cmpq	%rcx,%rax
+	ja	copyin_fault
+
+	xchgq	%rdi,%rsi
+	movq	%rdx,%rcx
+	movb	%cl,%al
+	shrq	$3,%rcx				/* copy longword-wise */
+	stac
+	rep
+	movsq
+	movb	%al,%cl
+	andb	$7,%cl				/* copy remaining bytes */
 	je	done_copyin
 	rep
 	movsb
+	clac
 
 done_copyin:
 	xorl	%eax,%eax
@@ -323,6 +406,7 @@ done_copyin:
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
+END(copyin_smap)
 
 	ALIGN_TEXT
 copyin_fault:
@@ -331,14 +415,13 @@ copyin_fault:
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
-END(copyin)
 
 /*
  * casueword32.  Compare and set user integer.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
  */
-ENTRY(casueword32)
+ENTRY(casueword32_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
@@ -370,14 +453,50 @@ ENTRY(casueword32)
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
 	ret
-END(casueword32)
+END(casueword32_nosmap)
 
+ENTRY(casueword32_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%r8
+	movq	$fusufault,PCB_ONFAULT(%r8)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movl	%esi,%eax			/* old */
+	stac
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
+	clac
+
+	/*
+	 * The old value is in %eax.  If the store succeeded it will be the
+	 * value we expected (old) from before the store, otherwise it will
+	 * be the current value.  Save %eax into %esi to prepare the return
+	 * value.
+	 */
+	movl	%eax,%esi
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+
+	/*
+	 * Access the oldp after the pcb_onfault is cleared, to correctly
+	 * catch corrupted pointer.
+	 */
+	movl	%esi,(%rdx)			/* oldp = %rdx */
+	POP_FRAME_POINTER
+	ret
+END(casueword32_smap)
+
 /*
  * casueword.  Compare and set user long.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
  */
-ENTRY(casueword)
+ENTRY(casueword_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
@@ -403,16 +522,45 @@ ENTRY(casueword)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
 	ret
-END(casueword)
+END(casueword_nosmap)
 
+ENTRY(casueword_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%r8
+	movq	$fusufault,PCB_ONFAULT(%r8)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movq	%rsi,%rax			/* old */
+	stac
+#ifdef SMP
+	lock
+#endif
+	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
+	clac
+
+	/*
+	 * The old value is in %rax.  If the store succeeded it will be the
+	 * value we expected (old) from before the store, otherwise it will
+	 * be the current value.
+	 */
+	movq	%rax,%rsi
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	movq	%rsi,(%rdx)
+	POP_FRAME_POINTER
+	ret
+END(casueword_smap)
+
 /*
  * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
  * byte from user memory.
  * addr = %rdi, valp = %rsi
  */
 
-ALTENTRY(fueword64)
-ENTRY(fueword)
+ENTRY(fueword_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
@@ -427,14 +575,32 @@ ENTRY(fueword)
 	movq	%r11,(%rsi)
 	POP_FRAME_POINTER
 	ret
-END(fueword64)
-END(fueword)
+END(fueword64_nosmap)
 
-ENTRY(fueword32)
+ENTRY(fueword_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-8,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	xorl	%eax,%eax
+	stac
+	movq	(%rdi),%r11
+	clac
+	movq	%rax,PCB_ONFAULT(%rcx)
+	movq	%r11,(%rsi)
+	POP_FRAME_POINTER
+	ret
+END(fueword64_smap)
+
+ENTRY(fueword32_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
@@ -445,13 +611,32 @@ ENTRY(fueword32)
 	movl	%r11d,(%rsi)
 	POP_FRAME_POINTER
 	ret
-END(fueword32)
+END(fueword32_nosmap)
 
-ENTRY(fuword16)
+ENTRY(fueword32_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	xorl	%eax,%eax
+	stac
+	movl	(%rdi),%r11d
+	clac
+	movq	%rax,PCB_ONFAULT(%rcx)
+	movl	%r11d,(%rsi)
+	POP_FRAME_POINTER
+	ret
+END(fueword32_smap)
+
+ENTRY(fuword16_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
@@ -460,13 +645,30 @@ ENTRY(fuword16)
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(fuword16)
+END(fuword16_nosmap)
 
-ENTRY(fubyte)
+ENTRY(fuword16_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-2,%rax
+	cmpq	%rax,%rdi
+	ja	fusufault
+
+	stac
+	movzwl	(%rdi),%eax
+	clac
+	movq	$0,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(fuword16_smap)
+
+ENTRY(fubyte_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
@@ -475,9 +677,27 @@ ENTRY(fubyte)
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(fubyte)
+END(fubyte_nosmap)
 
+ENTRY(fubyte_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-1,%rax
+	cmpq	%rax,%rdi
+	ja	fusufault
+
+	stac
+	movzbl	(%rdi),%eax
+	clac
+	movq	$0,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(fubyte_smap)
+
 	ALIGN_TEXT
+	/* Fault entry clears PSL.AC */
 fusufault:
 	movq	PCPU(CURPCB),%rcx
 	xorl	%eax,%eax
@@ -491,8 +711,7 @@ fusufault:
  * user memory.
  * addr = %rdi, value = %rsi
  */
-ALTENTRY(suword64)
-ENTRY(suword)
+ENTRY(suword_nosmap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
@@ -507,14 +726,32 @@ ENTRY(suword)
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(suword64)
-END(suword)
+END(suword_nosmap)
 
-ENTRY(suword32)
+ENTRY(suword_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-8,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	stac
+	movq	%rsi,(%rdi)
+	clac
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx
+	movq	%rax,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(suword_smap)
+
+ENTRY(suword32_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
@@ -525,13 +762,32 @@ ENTRY(suword32)
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(suword32)
+END(suword32_nosmap)
 
-ENTRY(suword16)
+ENTRY(suword32_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	stac
+	movl	%esi,(%rdi)
+	clac
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx
+	movq	%rax,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(suword32_smap)
+
+ENTRY(suword16_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
@@ -542,13 +798,32 @@ ENTRY(suword16)
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(suword16)
+END(suword16_nosmap)
 
-ENTRY(subyte)
+ENTRY(suword16_smap)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
+	movq	$VM_MAXUSER_ADDRESS-2,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	stac
+	movw	%si,(%rdi)
+	clac
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
+	movq	%rax,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(suword16_smap)
+
+ENTRY(subyte_nosmap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
@@ -560,8 +835,28 @@ ENTRY(subyte)
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
-END(subyte)
+END(subyte_nosmap)
 
+ENTRY(subyte_smap)
+	PUSH_FRAME_POINTER
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-1,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	movl	%esi,%eax
+	stac
+	movb	%al,(%rdi)
+	clac
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
+	movq	%rax,PCB_ONFAULT(%rcx)
+	POP_FRAME_POINTER
+	ret
+END(subyte_smap)
+
 /*
  * copyinstr(from, to, maxlen, int *lencopied)
  *           %rdi, %rsi, %rdx, %rcx
@@ -571,7 +866,7 @@ END(subyte)
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
-ENTRY(copyinstr)
+ENTRY(copyinstr_nosmap)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 	movq	%rcx,%r9			/* %r9 = *len */
@@ -592,29 +887,67 @@ ENTRY(copyinstr)
 	movq	%rax,%r8
 1:
 	incq	%rdx
+	cld
 
 2:
 	decq	%rdx
-	jz	3f
+	jz	copyinstr_toolong
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
+	jmp	copyinstr_succ
+END(copyinstr_nosmap)
+
+ENTRY(copyinstr_smap)
+	PUSH_FRAME_POINTER
+	movq	%rdx,%r8			/* %r8 = maxlen */
+	movq	%rcx,%r9			/* %r9 = *len */
+	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
+	movq	PCPU(CURPCB),%rcx
+	movq	$cpystrflt,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS,%rax
+
+	/* make sure 'from' is within bounds */
+	subq	%rsi,%rax
+	jbe	cpystrflt
+
+	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
+	cmpq	%rdx,%rax
+	jae	1f
+	movq	%rax,%rdx
+	movq	%rax,%r8
+1:
+	incq	%rdx
+
+2:
+	decq	%rdx
+	jz	copyinstr_succ
+
+	stac
+	lodsb
+	stosb
+	clac
+	orb	%al,%al
+	jnz	2b
+
+copyinstr_succ:
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
-3:
+copyinstr_toolong:
 	/* rdx is zero - return ENAMETOOLONG or EFAULT */
 	movq	$VM_MAXUSER_ADDRESS,%rax
 	cmpq	%rax,%rsi
 	jae	cpystrflt
-4:
 	movq	$ENAMETOOLONG,%rax
 	jmp	cpystrflt_x
 
+	/* Fault entry clears PSL.AC */
 cpystrflt:
 	movq	$EFAULT,%rax
 
@@ -630,7 +963,7 @@ cpystrflt_x:
 1:
 	POP_FRAME_POINTER
 	ret
-END(copyinstr)
+END(copyinstr_smap)
 
 /*
  * copystr(from, to, maxlen, int *lencopied)

Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/amd64/trap.c	Sun Jul 29 20:47:00 2018	(r336876)
@@ -673,6 +673,24 @@ trap_check(struct trapframe *frame)
 	trap(frame);
 }
 
+static bool
+trap_is_smap(struct trapframe *frame)
+{
+
+	/*
+	 * A page fault on a userspace address is classified as
+	 * SMAP-induced if:
+	 * - SMAP is supported;
+	 * - kernel mode accessed present data page;
+	 * - rflags.AC was cleared.
+	 * Kernel must never access user space with rflags.AC cleared
+	 * if SMAP is enabled.
+	 */
+	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 &&
+	    (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) ==
+	    PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
+}
+
 static int
 trap_pfault(struct trapframe *frame, int usermode)
 {
@@ -750,9 +768,13 @@ trap_pfault(struct trapframe *frame, int usermode)
 		 * handling routine.  Since accessing the address
 		 * without the handler is a bug, do not try to handle
 		 * it normally, and panic immediately.
+		 *
+		 * If SMAP is enabled, filter SMAP faults also,
+		 * because illegal access might occur to the mapped
+		 * user address, causing infinite loop.
 		 */
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
-		    curpcb->pcb_onfault == NULL)) {
+		    trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}

Modified: head/sys/amd64/ia32/ia32_exception.S
==============================================================================
--- head/sys/amd64/ia32/ia32_exception.S	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/ia32/ia32_exception.S	Sun Jul 29 20:47:00 2018	(r336876)
@@ -70,7 +70,9 @@ int0x80_syscall_common:
 	movq	%r14,TF_R14(%rsp)
 	movq	%r15,TF_R15(%rsp)
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
-	cld
+	pushfq
+	andq	$~(PSL_D | PSL_AC),(%rsp)
+	popfq
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	movq	%rsp, %rdi
 	call	ia32_syscall

Modified: head/sys/amd64/include/asmacros.h
==============================================================================
--- head/sys/amd64/include/asmacros.h	Sun Jul 29 20:34:44 2018	(r336875)
+++ head/sys/amd64/include/asmacros.h	Sun Jul 29 20:47:00 2018	(r336876)
@@ -255,7 +255,9 @@ X\vec_name:
 	movq	%r15,TF_R15(%rsp)
 	SAVE_SEGS

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201807292047.w6TKl0hV004691>