Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 29 Jul 2018 23:57:47 +0200
From:      Oliver Pinter <oliver.pinter@hardenedbsd.org>
To:        Konstantin Belousov <kib@freebsd.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org,  svn-src-head@freebsd.org
Subject:   Re: svn commit: r336876 - in head/sys: amd64/amd64 amd64/ia32 amd64/include conf dev/hyperv/vmbus/amd64
Message-ID:  <CAPQ4ffsHqf8OiqUF-WBJ_0KbZGgAOs-6npyLjqWYFN%2BoCq=8Pw@mail.gmail.com>
In-Reply-To: <201807292047.w6TKl0hV004691@repo.freebsd.org>
References:  <201807292047.w6TKl0hV004691@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 7/29/18, Konstantin Belousov <kib@freebsd.org> wrote:
> Author: kib
> Date: Sun Jul 29 20:47:00 2018
> New Revision: 336876
> URL: https://svnweb.freebsd.org/changeset/base/336876
>
> Log:
>   Use SMAP on amd64.
>
>   Ifuncs selectors dispatch copyin(9) family to the suitable variant, to
>   set rflags.AC around userspace access.  Rflags.AC bit is cleared in
>   all kernel entry points unconditionally even on machines not
>   supporting SMAP.
>
>   Reviewed by:	jhb
>   Sponsored by:	The FreeBSD Foundation
>   Differential revision:	https://reviews.freebsd.org/D13838
>
> Added:
>   head/sys/amd64/amd64/copyout.c   (contents, props changed)
> Modified:
>   head/sys/amd64/amd64/exception.S
>   head/sys/amd64/amd64/initcpu.c
>   head/sys/amd64/amd64/machdep.c
>   head/sys/amd64/amd64/pmap.c
>   head/sys/amd64/amd64/support.S
>   head/sys/amd64/amd64/trap.c
>   head/sys/amd64/ia32/ia32_exception.S
>   head/sys/amd64/include/asmacros.h
>   head/sys/conf/files.amd64
>   head/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
>
> Added: head/sys/amd64/amd64/copyout.c
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sys/amd64/amd64/copyout.c	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -0,0 +1,178 @@
> +/*-
> + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
> + *
> + * Copyright (c) 2018 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
> + * under sponsorship from the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +
> +#include <machine/md_var.h>
> +#include <machine/specialreg.h>
> +#include <x86/ifunc.h>
> +
> +int fubyte_nosmap(volatile const void *base);
> +int fubyte_smap(volatile const void *base);
> +DEFINE_IFUNC(, int, fubyte, (volatile const void *), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    fubyte_smap : fubyte_nosmap);
> +}
> +
> +int fuword16_nosmap(volatile const void *base);
> +int fuword16_smap(volatile const void *base);
> +DEFINE_IFUNC(, int, fuword16, (volatile const void *), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    fuword16_smap : fuword16_nosmap);
> +}
> +
> +int fueword_nosmap(volatile const void *base, long *val);
> +int fueword_smap(volatile const void *base, long *val);
> +DEFINE_IFUNC(, int, fueword, (volatile const void *, long *), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    fueword_smap : fueword_nosmap);
> +}
> +DEFINE_IFUNC(, int, fueword64, (volatile const void *, int64_t *), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    fueword_smap : fueword_nosmap);
> +}
> +
> +int	fueword32_nosmap(volatile const void *base, int32_t *val);
> +int	fueword32_smap(volatile const void *base, int32_t *val);
> +DEFINE_IFUNC(, int, fueword32, (volatile const void *, int32_t *), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    fueword32_smap : fueword32_nosmap);
> +}
> +
> +int	subyte_nosmap(volatile void *base, int byte);
> +int	subyte_smap(volatile void *base, int byte);
> +DEFINE_IFUNC(, int, subyte, (volatile void *, int), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    subyte_smap : subyte_nosmap);
> +}
> +
> +int	suword16_nosmap(volatile void *base, int word);
> +int	suword16_smap(volatile void *base, int word);
> +DEFINE_IFUNC(, int, suword16, (volatile void *, int), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    suword16_smap : suword16_nosmap);
> +}
> +
> +int	suword32_nosmap(volatile void *base, int32_t word);
> +int	suword32_smap(volatile void *base, int32_t word);
> +DEFINE_IFUNC(, int, suword32, (volatile void *, int32_t), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    suword32_smap : suword32_nosmap);
> +}
> +
> +int	suword_nosmap(volatile void *base, long word);
> +int	suword_smap(volatile void *base, long word);
> +DEFINE_IFUNC(, int, suword, (volatile void *, long), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    suword_smap : suword_nosmap);
> +}
> +DEFINE_IFUNC(, int, suword64, (volatile void *, int64_t), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    suword_smap : suword_nosmap);
> +}
> +
> +int	casueword32_nosmap(volatile uint32_t *base, uint32_t oldval,
> +	    uint32_t *oldvalp, uint32_t newval);
> +int	casueword32_smap(volatile uint32_t *base, uint32_t oldval,
> +	    uint32_t *oldvalp, uint32_t newval);
> +DEFINE_IFUNC(, int, casueword32, (volatile uint32_t *, uint32_t, uint32_t
> *,
> +    uint32_t), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    casueword32_smap : casueword32_nosmap);
> +}
> +
> +int	casueword_nosmap(volatile u_long *p, u_long oldval, u_long *oldvalp,
> +	    u_long newval);
> +int	casueword_smap(volatile u_long *p, u_long oldval, u_long *oldvalp,
> +	    u_long newval);
> +DEFINE_IFUNC(, int, casueword, (volatile u_long *, u_long, u_long *,
> u_long),
> +    static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    casueword_smap : casueword_nosmap);
> +}
> +
> +int	copyinstr_nosmap(const void *udaddr, void *kaddr, size_t len,
> +	    size_t *lencopied);
> +int	copyinstr_smap(const void *udaddr, void *kaddr, size_t len,
> +	    size_t *lencopied);
> +DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *),
> +    static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    copyinstr_smap : copyinstr_nosmap);
> +}
> +
> +int	copyin_nosmap(const void *udaddr, void *kaddr, size_t len);
> +int	copyin_smap(const void *udaddr, void *kaddr, size_t len);
> +DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    copyin_smap : copyin_nosmap);
> +}
> +
> +int	copyout_nosmap(const void *kaddr, void *udaddr, size_t len);
> +int	copyout_smap(const void *kaddr, void *udaddr, size_t len);
> +DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static)
> +{
> +
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
> +	    copyout_smap : copyout_nosmap);
> +}
>
> Modified: head/sys/amd64/amd64/exception.S
> ==============================================================================
> --- head/sys/amd64/amd64/exception.S	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/exception.S	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -43,8 +43,8 @@
>
>  #include "assym.inc"
>
> -#include <machine/asmacros.h>
>  #include <machine/psl.h>
> +#include <machine/asmacros.h>
>  #include <machine/trap.h>
>  #include <machine/specialreg.h>
>
> @@ -196,7 +196,9 @@ alltraps_pushregs_no_rax:
>  	movq	%r14,TF_R14(%rsp)
>  	movq	%r15,TF_R15(%rsp)
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	FAKE_MCOUNT(TF_RIP(%rsp))
>  #ifdef KDTRACE_HOOKS
>  	/*
> @@ -277,7 +279,9 @@ IDTVEC(dblfault)
>  	movq	%r15,TF_R15(%rsp)
>  	SAVE_SEGS
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
>  	jz	1f			/* already running with kernel GS.base */
>  	swapgs
> @@ -571,7 +575,9 @@ IDTVEC(dbg)
>  	movq	%r15,TF_R15(%rsp)
>  	SAVE_SEGS
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	testb	$SEL_RPL_MASK,TF_CS(%rsp)
>  	jnz	dbg_fromuserspace
>  	/*
> @@ -704,7 +710,9 @@ IDTVEC(nmi)
>  	movq	%r15,TF_R15(%rsp)
>  	SAVE_SEGS
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	xorl	%ebx,%ebx
>  	testb	$SEL_RPL_MASK,TF_CS(%rsp)
>  	jnz	nmi_fromuserspace
> @@ -793,7 +801,9 @@ nmi_calltrap:
>  	subq	%rcx,%rdx
>  	movq	%rdx,%rdi	/* destination stack pointer */
>  	shrq	$3,%rcx		/* trap frame size in long words */
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	rep
>  	movsq			/* copy trapframe */
>  	movq	%rdx,%rsp	/* we are on the regular kstack */
> @@ -902,7 +912,9 @@ IDTVEC(mchk)
>  	movq	%r15,TF_R15(%rsp)
>  	SAVE_SEGS
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	xorl	%ebx,%ebx
>  	testb	$SEL_RPL_MASK,TF_CS(%rsp)
>  	jnz	mchk_fromuserspace
>
> Modified: head/sys/amd64/amd64/initcpu.c
> ==============================================================================
> --- head/sys/amd64/amd64/initcpu.c	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/initcpu.c	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -239,8 +239,12 @@ initializecpu(void)
>  	 * to the kernel tables.  The boot loader enables the U bit in
>  	 * its tables.
>  	 */
> -	if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
> -		cr4 |= CR4_SMEP;
> +	if (!IS_BSP()) {
> +		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
> +			cr4 |= CR4_SMEP;
> +		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
> +			cr4 |= CR4_SMAP;
> +	}
>  	load_cr4(cr4);
>  	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
>  		msr = rdmsr(MSR_EFER) | EFER_NXE;
>
> Modified: head/sys/amd64/amd64/machdep.c
> ==============================================================================
> --- head/sys/amd64/amd64/machdep.c	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/machdep.c	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -1548,7 +1548,7 @@ amd64_conf_fast_syscall(void)
>  	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
>  	    ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
>  	wrmsr(MSR_STAR, msr);
> -	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D);
> +	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
>  }
>
>  u_int64_t
>
> Modified: head/sys/amd64/amd64/pmap.c
> ==============================================================================
> --- head/sys/amd64/amd64/pmap.c	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/pmap.c	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -1092,6 +1092,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
>  {
>  	vm_offset_t va;
>  	pt_entry_t *pte;
> +	uint64_t cr4;
>  	int i;
>
>  	KERNend = *firstaddr;
> @@ -1118,11 +1119,21 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
>  	virtual_end = VM_MAX_KERNEL_ADDRESS;
>
>
> -	/* XXX do %cr0 as well */
> -	load_cr4(rcr4() | CR4_PGE);
> +	/*
> +	 * Enable PG_G global pages, then switch to the kernel page
> +	 * table from the bootstrap page table.  After the switch, it
> +	 * is possible to enable SMEP and SMAP since PG_U bits are
> +	 * correct now.
> +	 */
> +	cr4 = rcr4();
> +	cr4 |= CR4_PGE;
> +	load_cr4(cr4);
>  	load_cr3(KPML4phys);
>  	if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
> -		load_cr4(rcr4() | CR4_SMEP);
> +		cr4 |= CR4_SMEP;
> +	if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
> +		cr4 |= CR4_SMAP;
> +	load_cr4(cr4);
>
>  	/*
>  	 * Initialize the kernel pmap (which is statically allocated).
>
> Modified: head/sys/amd64/amd64/support.S
> ==============================================================================
> --- head/sys/amd64/amd64/support.S	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/support.S	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -226,7 +226,7 @@ END(fillw)
>   * copyout(from_kernel, to_user, len)
>   *         %rdi,        %rsi,    %rdx
>   */
> -ENTRY(copyout)
> +ENTRY(copyout_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rax
>  	movq	$copyout_fault,PCB_ONFAULT(%rax)
> @@ -268,6 +268,55 @@ ENTRY(copyout)
>  	rep
>  	movsb
>
> +	jmp	done_copyout
> +END(copyout_nosmap)
> +
> +ENTRY(copyout_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rax
> +	/* Trap entry clears PSL.AC */
> +	movq	$copyout_fault,PCB_ONFAULT(%rax)
> +	testq	%rdx,%rdx			/* anything to do? */
> +	jz	done_copyout
> +
> +	/*
> +	 * Check explicitly for non-user addresses.  If 486 write protection
> +	 * is being used, this check is essential because we are in kernel
> +	 * mode so the h/w does not provide any protection against writing
> +	 * kernel addresses.
> +	 */
> +
> +	/*
> +	 * First, prevent address wrapping.
> +	 */
> +	movq	%rsi,%rax
> +	addq	%rdx,%rax
> +	jc	copyout_fault
> +/*
> + * XXX STOP USING VM_MAXUSER_ADDRESS.
> + * It is an end address, not a max, so every time it is used correctly it
> + * looks like there is an off by one error, and of course it caused an off
> + * by one error in several places.
> + */
> +	movq	$VM_MAXUSER_ADDRESS,%rcx
> +	cmpq	%rcx,%rax
> +	ja	copyout_fault
> +
> +	xchgq	%rdi,%rsi
> +	/* bcopy(%rsi, %rdi, %rdx) */
> +	movq	%rdx,%rcx
> +
> +	shrq	$3,%rcx
> +	cld
> +	stac
> +	rep
> +	movsq
> +	movb	%dl,%cl
> +	andb	$7,%cl
> +	rep
> +	movsb
> +	clac
> +
>  done_copyout:
>  	xorl	%eax,%eax
>  	movq	PCPU(CURPCB),%rdx
> @@ -288,7 +337,7 @@ END(copyout)
>   * copyin(from_user, to_kernel, len)
>   *        %rdi,      %rsi,      %rdx
>   */
> -ENTRY(copyin)
> +ENTRY(copyin_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rax
>  	movq	$copyin_fault,PCB_ONFAULT(%rax)
> @@ -309,13 +358,47 @@ ENTRY(copyin)
>  	movq	%rdx,%rcx
>  	movb	%cl,%al
>  	shrq	$3,%rcx				/* copy longword-wise */
> +	cld
>  	rep
>  	movsq
>  	movb	%al,%cl
>  	andb	$7,%cl				/* copy remaining bytes */
> +	rep
> +	movsb
> +
> +	jmp	done_copyin
> +END(copyin_nosmap)
> +
> +ENTRY(copyin_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rax
> +	movq	$copyin_fault,PCB_ONFAULT(%rax)
> +	testq	%rdx,%rdx			/* anything to do? */
> +	jz	done_copyin
> +
> +	/*
> +	 * make sure address is valid
> +	 */
> +	movq	%rdi,%rax
> +	addq	%rdx,%rax
> +	jc	copyin_fault
> +	movq	$VM_MAXUSER_ADDRESS,%rcx
> +	cmpq	%rcx,%rax
> +	ja	copyin_fault
> +
> +	xchgq	%rdi,%rsi
> +	movq	%rdx,%rcx
> +	movb	%cl,%al
> +	shrq	$3,%rcx				/* copy longword-wise */

missing cld from here

> +	stac
> +	rep
> +	movsq
> +	movb	%al,%cl
> +	andb	$7,%cl				/* copy remaining bytes */
>  	je	done_copyin
>  	rep
>  	movsb
> +	clac
>
>  done_copyin:
>  	xorl	%eax,%eax
> @@ -323,6 +406,7 @@ done_copyin:
>  	movq	%rax,PCB_ONFAULT(%rdx)
>  	POP_FRAME_POINTER
>  	ret
> +END(copyin_smap)
>
>  	ALIGN_TEXT
>  copyin_fault:
> @@ -331,14 +415,13 @@ copyin_fault:
>  	movq	$EFAULT,%rax
>  	POP_FRAME_POINTER
>  	ret
> -END(copyin)
>
>  /*
>   * casueword32.  Compare and set user integer.  Returns -1 on fault,
>   *        0 if access was successful.  Old value is written to *oldp.
>   *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
>   */
> -ENTRY(casueword32)
> +ENTRY(casueword32_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%r8
>  	movq	$fusufault,PCB_ONFAULT(%r8)
> @@ -370,14 +453,50 @@ ENTRY(casueword32)
>  	movl	%esi,(%rdx)			/* oldp = %rdx */
>  	POP_FRAME_POINTER
>  	ret
> -END(casueword32)
> +END(casueword32_nosmap)
>
> +ENTRY(casueword32_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%r8
> +	movq	$fusufault,PCB_ONFAULT(%r8)
> +
> +	movq	$VM_MAXUSER_ADDRESS-4,%rax
> +	cmpq	%rax,%rdi			/* verify address is valid */
> +	ja	fusufault
> +
> +	movl	%esi,%eax			/* old */
> +	stac
> +#ifdef SMP
> +	lock
> +#endif
> +	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
> +	clac
> +
> +	/*
> +	 * The old value is in %eax.  If the store succeeded it will be the
> +	 * value we expected (old) from before the store, otherwise it will
> +	 * be the current value.  Save %eax into %esi to prepare the return
> +	 * value.
> +	 */
> +	movl	%eax,%esi
> +	xorl	%eax,%eax
> +	movq	%rax,PCB_ONFAULT(%r8)
> +
> +	/*
> +	 * Access the oldp after the pcb_onfault is cleared, to correctly
> +	 * catch corrupted pointer.
> +	 */
> +	movl	%esi,(%rdx)			/* oldp = %rdx */
> +	POP_FRAME_POINTER
> +	ret
> +END(casueword32_smap)
> +
>  /*
>   * casueword.  Compare and set user long.  Returns -1 on fault,
>   *        0 if access was successful.  Old value is written to *oldp.
>   *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
>   */
> -ENTRY(casueword)
> +ENTRY(casueword_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%r8
>  	movq	$fusufault,PCB_ONFAULT(%r8)
> @@ -403,16 +522,45 @@ ENTRY(casueword)
>  	movq	%rsi,(%rdx)
>  	POP_FRAME_POINTER
>  	ret
> -END(casueword)
> +END(casueword_nosmap)
>
> +ENTRY(casueword_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%r8
> +	movq	$fusufault,PCB_ONFAULT(%r8)
> +
> +	movq	$VM_MAXUSER_ADDRESS-4,%rax
> +	cmpq	%rax,%rdi			/* verify address is valid */
> +	ja	fusufault
> +
> +	movq	%rsi,%rax			/* old */
> +	stac
> +#ifdef SMP
> +	lock
> +#endif
> +	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
> +	clac
> +
> +	/*
> +	 * The old value is in %rax.  If the store succeeded it will be the
> +	 * value we expected (old) from before the store, otherwise it will
> +	 * be the current value.
> +	 */
> +	movq	%rax,%rsi
> +	xorl	%eax,%eax
> +	movq	%rax,PCB_ONFAULT(%r8)
> +	movq	%rsi,(%rdx)
> +	POP_FRAME_POINTER
> +	ret
> +END(casueword_smap)
> +
>  /*
>   * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
>   * byte from user memory.
>   * addr = %rdi, valp = %rsi
>   */
>
> -ALTENTRY(fueword64)
> -ENTRY(fueword)
> +ENTRY(fueword_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
> @@ -427,14 +575,32 @@ ENTRY(fueword)
>  	movq	%r11,(%rsi)
>  	POP_FRAME_POINTER
>  	ret
> -END(fueword64)
> -END(fueword)
> +END(fueword64_nosmap)
>
> -ENTRY(fueword32)
> +ENTRY(fueword_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-8,%rax
> +	cmpq	%rax,%rdi			/* verify address is valid */
> +	ja	fusufault
> +
> +	xorl	%eax,%eax
> +	stac
> +	movq	(%rdi),%r11
> +	clac
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	movq	%r11,(%rsi)
> +	POP_FRAME_POINTER
> +	ret
> +END(fueword64_smap)
> +
> +ENTRY(fueword32_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-4,%rax
>  	cmpq	%rax,%rdi			/* verify address is valid */
>  	ja	fusufault
> @@ -445,13 +611,32 @@ ENTRY(fueword32)
>  	movl	%r11d,(%rsi)
>  	POP_FRAME_POINTER
>  	ret
> -END(fueword32)
> +END(fueword32_nosmap)
>
> -ENTRY(fuword16)
> +ENTRY(fueword32_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-4,%rax
> +	cmpq	%rax,%rdi			/* verify address is valid */
> +	ja	fusufault
> +
> +	xorl	%eax,%eax
> +	stac
> +	movl	(%rdi),%r11d
> +	clac
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	movl	%r11d,(%rsi)
> +	POP_FRAME_POINTER
> +	ret
> +END(fueword32_smap)
> +
> +ENTRY(fuword16_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-2,%rax
>  	cmpq	%rax,%rdi
>  	ja	fusufault
> @@ -460,13 +645,30 @@ ENTRY(fuword16)
>  	movq	$0,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(fuword16)
> +END(fuword16_nosmap)
>
> -ENTRY(fubyte)
> +ENTRY(fuword16_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-2,%rax
> +	cmpq	%rax,%rdi
> +	ja	fusufault
> +
> +	stac
> +	movzwl	(%rdi),%eax
> +	clac
> +	movq	$0,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(fuword16_smap)
> +
> +ENTRY(fubyte_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-1,%rax
>  	cmpq	%rax,%rdi
>  	ja	fusufault
> @@ -475,9 +677,27 @@ ENTRY(fubyte)
>  	movq	$0,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(fubyte)
> +END(fubyte_nosmap)
>
> +ENTRY(fubyte_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
> +	movq	$VM_MAXUSER_ADDRESS-1,%rax
> +	cmpq	%rax,%rdi
> +	ja	fusufault
> +
> +	stac
> +	movzbl	(%rdi),%eax
> +	clac
> +	movq	$0,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(fubyte_smap)
> +
>  	ALIGN_TEXT
> +	/* Fault entry clears PSL.AC */
>  fusufault:
>  	movq	PCPU(CURPCB),%rcx
>  	xorl	%eax,%eax
> @@ -491,8 +711,7 @@ fusufault:
>   * user memory.
>   * addr = %rdi, value = %rsi
>   */
> -ALTENTRY(suword64)
> -ENTRY(suword)
> +ENTRY(suword_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
> @@ -507,14 +726,32 @@ ENTRY(suword)
>  	movq	%rax,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(suword64)
> -END(suword)
> +END(suword_nosmap)
>
> -ENTRY(suword32)
> +ENTRY(suword_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-8,%rax
> +	cmpq	%rax,%rdi			/* verify address validity */
> +	ja	fusufault
> +
> +	stac
> +	movq	%rsi,(%rdi)
> +	clac
> +	xorl	%eax,%eax
> +	movq	PCPU(CURPCB),%rcx
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(suword_smap)
> +
> +ENTRY(suword32_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-4,%rax
>  	cmpq	%rax,%rdi			/* verify address validity */
>  	ja	fusufault
> @@ -525,13 +762,32 @@ ENTRY(suword32)
>  	movq	%rax,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(suword32)
> +END(suword32_nosmap)
>
> -ENTRY(suword16)
> +ENTRY(suword32_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-4,%rax
> +	cmpq	%rax,%rdi			/* verify address validity */
> +	ja	fusufault
> +
> +	stac
> +	movl	%esi,(%rdi)
> +	clac
> +	xorl	%eax,%eax
> +	movq	PCPU(CURPCB),%rcx
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(suword32_smap)
> +
> +ENTRY(suword16_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-2,%rax
>  	cmpq	%rax,%rdi			/* verify address validity */
>  	ja	fusufault
> @@ -542,13 +798,32 @@ ENTRY(suword16)
>  	movq	%rax,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(suword16)
> +END(suword16_nosmap)
>
> -ENTRY(subyte)
> +ENTRY(suword16_smap)
>  	PUSH_FRAME_POINTER
>  	movq	PCPU(CURPCB),%rcx
>  	movq	$fusufault,PCB_ONFAULT(%rcx)
>
> +	movq	$VM_MAXUSER_ADDRESS-2,%rax
> +	cmpq	%rax,%rdi			/* verify address validity */
> +	ja	fusufault
> +
> +	stac
> +	movw	%si,(%rdi)
> +	clac
> +	xorl	%eax,%eax
> +	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(suword16_smap)
> +
> +ENTRY(subyte_nosmap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
>  	movq	$VM_MAXUSER_ADDRESS-1,%rax
>  	cmpq	%rax,%rdi			/* verify address validity */
>  	ja	fusufault
> @@ -560,8 +835,28 @@ ENTRY(subyte)
>  	movq	%rax,PCB_ONFAULT(%rcx)
>  	POP_FRAME_POINTER
>  	ret
> -END(subyte)
> +END(subyte_nosmap)
>
> +ENTRY(subyte_smap)
> +	PUSH_FRAME_POINTER
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$fusufault,PCB_ONFAULT(%rcx)
> +
> +	movq	$VM_MAXUSER_ADDRESS-1,%rax
> +	cmpq	%rax,%rdi			/* verify address validity */
> +	ja	fusufault
> +
> +	movl	%esi,%eax
> +	stac
> +	movb	%al,(%rdi)
> +	clac
> +	xorl	%eax,%eax
> +	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
> +	movq	%rax,PCB_ONFAULT(%rcx)
> +	POP_FRAME_POINTER
> +	ret
> +END(subyte_smap)
> +
>  /*
>   * copyinstr(from, to, maxlen, int *lencopied)
>   *           %rdi, %rsi, %rdx, %rcx
> @@ -571,7 +866,7 @@ END(subyte)
>   *	EFAULT on protection violations. If lencopied is non-zero,
>   *	return the actual length in *lencopied.
>   */
> -ENTRY(copyinstr)
> +ENTRY(copyinstr_nosmap)
>  	PUSH_FRAME_POINTER
>  	movq	%rdx,%r8			/* %r8 = maxlen */
>  	movq	%rcx,%r9			/* %r9 = *len */
> @@ -592,29 +887,67 @@ ENTRY(copyinstr)
>  	movq	%rax,%r8
>  1:
>  	incq	%rdx
> +	cld
>
>  2:
>  	decq	%rdx
> -	jz	3f
> +	jz	copyinstr_toolong
>
>  	lodsb
>  	stosb
>  	orb	%al,%al
>  	jnz	2b
>
> +	jmp	copyinstr_succ
> +END(copyinstr_nosmap)
> +
> +ENTRY(copyinstr_smap)
> +	PUSH_FRAME_POINTER
> +	movq	%rdx,%r8			/* %r8 = maxlen */
> +	movq	%rcx,%r9			/* %r9 = *len */
> +	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
> +	movq	PCPU(CURPCB),%rcx
> +	movq	$cpystrflt,PCB_ONFAULT(%rcx)
> +
> +	movq	$VM_MAXUSER_ADDRESS,%rax
> +
> +	/* make sure 'from' is within bounds */
> +	subq	%rsi,%rax
> +	jbe	cpystrflt
> +
> +	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
> +	cmpq	%rdx,%rax
> +	jae	1f
> +	movq	%rax,%rdx
> +	movq	%rax,%r8
> +1:
> +	incq	%rdx

missing cld here

> +
> +2:
> +	decq	%rdx
> +	jz	copyinstr_succ
> +
> +	stac
> +	lodsb
> +	stosb
> +	clac
> +	orb	%al,%al
> +	jnz	2b
> +
> +copyinstr_succ:
>  	/* Success -- 0 byte reached */
>  	decq	%rdx
>  	xorl	%eax,%eax
>  	jmp	cpystrflt_x
> -3:
> +copyinstr_toolong:
>  	/* rdx is zero - return ENAMETOOLONG or EFAULT */
>  	movq	$VM_MAXUSER_ADDRESS,%rax
>  	cmpq	%rax,%rsi
>  	jae	cpystrflt
> -4:
>  	movq	$ENAMETOOLONG,%rax
>  	jmp	cpystrflt_x
>
> +	/* Fault entry clears PSL.AC */
>  cpystrflt:
>  	movq	$EFAULT,%rax
>
> @@ -630,7 +963,7 @@ cpystrflt_x:

cpystrflt_x:
        /* set *lencopied and return %eax */
        movq    PCPU(CURPCB),%rcx
        movq    $0,PCB_ONFAULT(%rcx)

        testq   %r9,%r9
        jz      1f
        subq    %rdx,%r8
        movq    %r8,(%r9) << Here you access user-space, with cleared
RFLAGS.AC from the fault handler.
1:
        POP_FRAME_POINTER
        ret


>  1:
>  	POP_FRAME_POINTER
>  	ret
> -END(copyinstr)
> +END(copyinstr_smap)
>
>  /*
>   * copystr(from, to, maxlen, int *lencopied)
>
> Modified: head/sys/amd64/amd64/trap.c
> ==============================================================================
> --- head/sys/amd64/amd64/trap.c	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/amd64/trap.c	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -673,6 +673,24 @@ trap_check(struct trapframe *frame)
>  	trap(frame);
>  }
>
> +static bool
> +trap_is_smap(struct trapframe *frame)
> +{
> +
> +	/*
> +	 * A page fault on a userspace address is classified as
> +	 * SMAP-induced if:
> +	 * - SMAP is supported;
> +	 * - kernel mode accessed present data page;
> +	 * - rflags.AC was cleared.
> +	 * Kernel must never access user space with rflags.AC cleared
> +	 * if SMAP is enabled.
> +	 */
> +	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 &&
> +	    (frame->tf_err & (PGEX_P | PGEX_U | PGEX_I | PGEX_RSV)) ==
> +	    PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
> +}
> +
>  static int
>  trap_pfault(struct trapframe *frame, int usermode)
>  {
> @@ -750,9 +768,13 @@ trap_pfault(struct trapframe *frame, int usermode)
>  		 * handling routine.  Since accessing the address
>  		 * without the handler is a bug, do not try to handle
>  		 * it normally, and panic immediately.
> +		 *
> +		 * If SMAP is enabled, filter SMAP faults also,
> +		 * because illegal access might occur to the mapped
> +		 * user address, causing infinite loop.
>  		 */
>  		if (!usermode && (td->td_intr_nesting_level != 0 ||
> -		    curpcb->pcb_onfault == NULL)) {
> +		    trap_is_smap(frame) || curpcb->pcb_onfault == NULL)) {
>  			trap_fatal(frame, eva);
>  			return (-1);
>  		}
>
> Modified: head/sys/amd64/ia32/ia32_exception.S
> ==============================================================================
> --- head/sys/amd64/ia32/ia32_exception.S	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/ia32/ia32_exception.S	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -70,7 +70,9 @@ int0x80_syscall_common:
>  	movq	%r14,TF_R14(%rsp)
>  	movq	%r15,TF_R15(%rsp)
>  	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
> -	cld
> +	pushfq
> +	andq	$~(PSL_D | PSL_AC),(%rsp)
> +	popfq
>  	FAKE_MCOUNT(TF_RIP(%rsp))
>  	movq	%rsp, %rdi
>  	call	ia32_syscall
>
> Modified: head/sys/amd64/include/asmacros.h
> ==============================================================================
> --- head/sys/amd64/include/asmacros.h	Sun Jul 29 20:34:44 2018	(r336875)
> +++ head/sys/amd64/include/asmacros.h	Sun Jul 29 20:47:00 2018	(r336876)
> @@ -255,7 +255,9 @@ X\vec_name:
>  	movq	%r15,TF_R15(%rsp)
>  	SAVE_SEGS
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAPQ4ffsHqf8OiqUF-WBJ_0KbZGgAOs-6npyLjqWYFN%2BoCq=8Pw>