Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 May 2018 17:12:10 +0000 (UTC)
From:      Gordon Tetlow <gordon@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org
Subject:   svn commit: r333371 - in releng: 10.4 10.4/sys/amd64/amd64 10.4/sys/conf 10.4/sys/i386/i386 11.1 11.1/sys/amd64/amd64 11.1/sys/conf 11.1/sys/i386/i386
Message-ID:  <201805081712.w48HCAjJ035913@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gordon
Date: Tue May  8 17:12:10 2018
New Revision: 333371
URL: https://svnweb.freebsd.org/changeset/base/333371

Log:
  Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
  
  Bump newvers.sh and UPDATING for today's patches.
  
  Approved by:	so
  Security:	CVE-2018-8897
  Security:	FreeBSD-SA-18:06.debugreg
  Sponsored by:	The FreeBSD Foundation

Modified:
  releng/10.4/UPDATING
  releng/10.4/sys/amd64/amd64/exception.S
  releng/10.4/sys/amd64/amd64/machdep.c
  releng/10.4/sys/amd64/amd64/mp_machdep.c
  releng/10.4/sys/amd64/amd64/trap.c
  releng/10.4/sys/conf/newvers.sh
  releng/10.4/sys/i386/i386/trap.c
  releng/11.1/UPDATING
  releng/11.1/sys/amd64/amd64/exception.S
  releng/11.1/sys/amd64/amd64/machdep.c
  releng/11.1/sys/amd64/amd64/mp_machdep.c
  releng/11.1/sys/amd64/amd64/pmap.c
  releng/11.1/sys/amd64/amd64/trap.c
  releng/11.1/sys/conf/newvers.sh
  releng/11.1/sys/i386/i386/trap.c

Modified: releng/10.4/UPDATING
==============================================================================
--- releng/10.4/UPDATING	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/UPDATING	Tue May  8 17:12:10 2018	(r333371)
@@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG to b
 stable/10, and then rebuild without this option. The bootstrap process from
 older version of current is a bit fragile.
 
+20180508	p9	FreeBSD-SA-18:06.debugreg
+			FreeBSD-EN-18:05.mem
+			FreeBSD-EN-18:06.tzdata
+
+	Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
+
+	Fix multiple small kernel memory disclosures. [EN-18:05.mem]
+
+	Update timezone database information. [EN-18:06.tzdata]
+
 20180404	p8	FreeBSD-SA-18:04.vt
 			FreeBSD-SA-18:05.ipsec
 			FreeBSD-EN-18:03.tzdata

Modified: releng/10.4/sys/amd64/amd64/exception.S
==============================================================================
--- releng/10.4/sys/amd64/amd64/exception.S	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/amd64/amd64/exception.S	Tue May  8 17:12:10 2018	(r333371)
@@ -108,8 +108,6 @@ MCOUNT_LABEL(btrap)
 	movq $0,TF_ADDR(%rsp) ; \
 	movq $0,TF_ERR(%rsp) ; \
 	jmp alltraps_noen
-IDTVEC(dbg)
-	TRAP_NOEN(T_TRCTRAP)
 IDTVEC(bpt)
 	TRAP_NOEN(T_BPTFLT)
 #ifdef KDTRACE_HOOKS
@@ -434,6 +432,101 @@ IDTVEC(fast_syscall)
  */
 IDTVEC(fast_syscall32)
 	sysret
+
+/*
+ * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
+ * generation of exception until the next instruction is executed,
+ * which might be a kernel entry.  So we must execute the handler
+ * on IST stack and be ready for non-kernel GSBASE.
+ */
+IDTVEC(dbg)
+	subq	$TF_RIP,%rsp
+	movl	$(T_TRCTRAP),TF_TRAPNO(%rsp)
+	movq	$0,TF_ADDR(%rsp)
+	movq	$0,TF_ERR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	testb	$SEL_RPL_MASK,TF_CS(%rsp)
+	jnz	dbg_fromuserspace
+	/*
+	 * We've interrupted the kernel.  Preserve GS.base in %r12.
+	 */
+	movl	$MSR_GSBASE,%ecx
+	rdmsr
+	movq	%rax,%r12
+	shlq	$32,%rdx
+	orq	%rdx,%r12
+	/* Retrieve and load the canonical value for GS.base. */
+	movq	TF_SIZE(%rsp),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp,%rdi
+	call	trap
+	MEXITCOUNT
+	/*
+	 * Put back the preserved MSR_GSBASE value.
+	 */
+	movl	$MSR_GSBASE,%ecx
+	movq	%r12,%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+	movq	TF_RDI(%rsp),%rdi
+	movq	TF_RSI(%rsp),%rsi
+	movq	TF_RDX(%rsp),%rdx
+	movq	TF_RCX(%rsp),%rcx
+	movq	TF_R8(%rsp),%r8
+	movq	TF_R9(%rsp),%r9
+	movq	TF_RAX(%rsp),%rax
+	movq	TF_RBX(%rsp),%rbx
+	movq	TF_RBP(%rsp),%rbp
+	movq	TF_R10(%rsp),%r10
+	movq	TF_R11(%rsp),%r11
+	movq	TF_R12(%rsp),%r12
+	movq	TF_R13(%rsp),%r13
+	movq	TF_R14(%rsp),%r14
+	movq	TF_R15(%rsp),%r15
+	addq	$TF_RIP,%rsp
+	jmp	doreti_iret
+dbg_fromuserspace:
+	/*
+	 * Switch to kernel GSBASE and kernel page table, and copy frame
+	 * from the IST stack to the normal kernel stack, since trap()
+	 * re-enables interrupts, and since we might trap on DB# while
+	 * in trap().
+	 */
+	swapgs
+	movq	PCPU(RSP0),%rax
+	movl	$TF_SIZE,%ecx
+	subq	%rcx,%rax
+	movq	%rax,%rdi
+	movq	%rsp,%rsi
+	rep;movsb
+	movq	%rax,%rsp
+	movq	PCPU(CURPCB),%rdi
+	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)
+	jmp	calltrap
 
 /*
  * NMI handling is special.

Modified: releng/10.4/sys/amd64/amd64/machdep.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/machdep.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/amd64/amd64/machdep.c	Tue May  8 17:12:10 2018	(r333371)
@@ -1023,6 +1023,7 @@ struct gate_descriptor *idt = &idt0[0];	/* interrupt d
 static char dblfault_stack[PAGE_SIZE] __aligned(16);
 
 static char nmi0_stack[PAGE_SIZE] __aligned(16);
+static char dbg0_stack[PAGE_SIZE] __aligned(16);
 CTASSERT(sizeof(struct nmi_pcpu) == 16);
 
 struct amd64tss common_tss[MAXCPU];
@@ -1908,7 +1909,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_DE, &IDTVEC(div),  SDT_SYSIGT, SEL_KPL, 0);
-	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYSIGT, SEL_KPL, 4);
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
  	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYSIGT, SEL_UPL, 0);
 	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYSIGT, SEL_KPL, 0);
@@ -1965,6 +1966,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
 	np->np_pcpu = (register_t) pc;
 	common_tss[0].tss_ist2 = (long) np;
+
+	/*
+	 * DB# stack, runs on ist4.
+	 */
+	np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
+	np->np_pcpu = (register_t) pc;
+	common_tss[0].tss_ist4 = (long) np;
 
 	/* Set the IO permission bitmap (empty due to tss seg limit) */
 	common_tss[0].tss_iobase = sizeof(struct amd64tss) +

Modified: releng/10.4/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/mp_machdep.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/amd64/amd64/mp_machdep.c	Tue May  8 17:12:10 2018	(r333371)
@@ -98,6 +98,7 @@ void *bootstacks[MAXCPU];
 /* Temporary variables for init_secondary()  */
 char *doublefault_stack;
 char *nmi_stack;
+char *dbg_stack;
 void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
@@ -644,6 +645,10 @@ init_secondary(void)
 	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
 	common_tss[cpu].tss_ist2 = (long) np;
 
+	/* The DB# stack runs on IST4. */
+	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+	common_tss[cpu].tss_ist4 = (long) np;
+
 	/* Prepare private GDT */
 	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
 	for (x = 0; x < NGDT; x++) {
@@ -679,6 +684,10 @@ init_secondary(void)
 	/* Save the per-cpu pointer for use by the NMI handler. */
 	np->np_pcpu = (register_t) pc;
 
+	/* Save the per-cpu pointer for use by the DB# handler. */
+	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+	np->np_pcpu = (register_t) pc;
+
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
@@ -966,6 +975,8 @@ start_all_aps(void)
 		doublefault_stack = (char *)kmem_malloc(kernel_arena,
 		    PAGE_SIZE, M_WAITOK | M_ZERO);
 		nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
+		    M_WAITOK | M_ZERO);
+		dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
 		    M_WAITOK | M_ZERO);
 		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO);

Modified: releng/10.4/sys/amd64/amd64/trap.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/trap.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/amd64/amd64/trap.c	Tue May  8 17:12:10 2018	(r333371)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
  */
 
 #include "opt_clock.h"
+#include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
@@ -98,6 +99,9 @@ PMC_SOFT_DEFINE( , , page_fault, write);
 #include <sys/dtrace_bsd.h>
 #endif
 
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(fast_syscall),
+    IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall);
+
 extern void trap(struct trapframe *frame);
 extern void syscall(struct trapframe *frame);
 void dblfault_handler(struct trapframe *frame);
@@ -549,6 +553,39 @@ trap(struct trapframe *frame)
 				load_dr6(rdr6() & 0xfffffff0);
 				goto out;
 			}
+
+			/*
+			 * Malicious user code can configure a debug
+			 * register watchpoint to trap on data access
+			 * to the top of stack and then execute 'pop
+			 * %ss; int 3'.  Due to exception deferral for
+			 * 'pop %ss', the CPU will not interrupt 'int
+			 * 3' to raise the DB# exception for the debug
+			 * register but will postpone the DB# until
+			 * execution of the first instruction of the
+			 * BP# handler (in kernel mode).  Normally the
+			 * previous check would ignore DB# exceptions
+			 * for watchpoints on user addresses raised in
+			 * kernel mode.  However, some CPU errata
+			 * include cases where DB# exceptions do not
+			 * properly set bits in %dr6, e.g. Haswell
+			 * HSD23 and Skylake-X SKZ24.
+			 *
+			 * A deferred DB# can also be raised on the
+			 * first instructions of system call entry
+			 * points or single-step traps via similar use
+			 * of 'pop %ss' or 'mov xxx, %ss'.
+			 */
+			if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) ||
+#ifdef COMPAT_FREEBSD32
+			    frame->tf_rip ==
+			    (uintptr_t)IDTVEC(int0x80_syscall) ||
+#endif
+			    frame->tf_rip == (uintptr_t)IDTVEC(bpt) ||
+			    frame->tf_rip == (uintptr_t)IDTVEC(dbg) ||
+			    /* Needed for AMD. */
+			    frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32))
+				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */

Modified: releng/10.4/sys/conf/newvers.sh
==============================================================================
--- releng/10.4/sys/conf/newvers.sh	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/conf/newvers.sh	Tue May  8 17:12:10 2018	(r333371)
@@ -32,7 +32,7 @@
 
 TYPE="FreeBSD"
 REVISION="10.4"
-BRANCH="RELEASE-p8"
+BRANCH="RELEASE-p9"
 if [ "X${BRANCH_OVERRIDE}" != "X" ]; then
 	BRANCH=${BRANCH_OVERRIDE}
 fi

Modified: releng/10.4/sys/i386/i386/trap.c
==============================================================================
--- releng/10.4/sys/i386/i386/trap.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/10.4/sys/i386/i386/trap.c	Tue May  8 17:12:10 2018	(r333371)
@@ -116,6 +116,8 @@ void dblfault_handler(void);
 
 extern inthand_t IDTVEC(lcall_syscall);
 
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
 #define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
@@ -683,6 +685,34 @@ trap(struct trapframe *frame)
 				load_dr6(rdr6() & 0xfffffff0);
 				goto out;
 			}
+
+			/*
+			 * Malicious user code can configure a debug
+			 * register watchpoint to trap on data access
+			 * to the top of stack and then execute 'pop
+			 * %ss; int 3'.  Due to exception deferral for
+			 * 'pop %ss', the CPU will not interrupt 'int
+			 * 3' to raise the DB# exception for the debug
+			 * register but will postpone the DB# until
+			 * execution of the first instruction of the
+			 * BP# handler (in kernel mode).  Normally the
+			 * previous check would ignore DB# exceptions
+			 * for watchpoints on user addresses raised in
+			 * kernel mode.  However, some CPU errata
+			 * include cases where DB# exceptions do not
+			 * properly set bits in %dr6, e.g. Haswell
+			 * HSD23 and Skylake-X SKZ24.
+			 *
+			 * A deferred DB# can also be raised on the
+			 * first instructions of system call entry
+			 * points or single-step traps via similar use
+			 * of 'pop %ss' or 'mov xxx, %ss'.
+			 */
+			if (frame->tf_eip ==
+			    (uintptr_t)IDTVEC(int0x80_syscall) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */

Modified: releng/11.1/UPDATING
==============================================================================
--- releng/11.1/UPDATING	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/UPDATING	Tue May  8 17:12:10 2018	(r333371)
@@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG and 
 the tip of head, and then rebuild without this option. The bootstrap process
 from older version of current across the gcc/clang cutover is a bit fragile.
 
+20180508	p10	FreeBSD-SA-18:06.debugreg
+			FreeBSD-EN-18:05.mem
+			FreeBSD-EN-18:06.tzdata
+
+	Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
+
+	Fix multiple small kernel memory disclosures. [EN-18:05.mem]
+
+	Update timezone database information. [EN-18:06.tzdata]
+
 20180404	p9	FreeBSD-SA-18:04.vt
 			FreeBSD-SA-18:05.ipsec
 			FreeBSD-EN-18:03.tzdata

Modified: releng/11.1/sys/amd64/amd64/exception.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/exception.S	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/amd64/amd64/exception.S	Tue May  8 17:12:10 2018	(r333371)
@@ -116,7 +116,6 @@ X\l:	subq $TF_RIP,%rsp
 	jmp alltraps_noen
 	.endm
 
-	TRAP_NOEN	dbg, T_TRCTRAP
 	TRAP_NOEN	bpt, T_BPTFLT
 #ifdef KDTRACE_HOOKS
 	TRAP_NOEN	dtrace_ret, T_DTRACE_RET
@@ -507,6 +506,121 @@ fast_syscall_common:
  */
 IDTVEC(fast_syscall32)
 	sysret
+
+/*
+ * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
+ * generation of exception until the next instruction is executed,
+ * which might be a kernel entry.  So we must execute the handler
+ * on IST stack and be ready for non-kernel GSBASE.
+ */
+IDTVEC(dbg)
+	subq	$TF_RIP,%rsp
+	movl	$(T_TRCTRAP),TF_TRAPNO(%rsp)
+	movq	$0,TF_ADDR(%rsp)
+	movq	$0,TF_ERR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	SAVE_SEGS
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	testb	$SEL_RPL_MASK,TF_CS(%rsp)
+	jnz	dbg_fromuserspace
+	/*
+	 * We've interrupted the kernel.  Preserve GS.base in %r12,
+	 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
+	 */
+	movl	$MSR_GSBASE,%ecx
+	rdmsr
+	movq	%rax,%r12
+	shlq	$32,%rdx
+	orq	%rdx,%r12
+	/* Retrieve and load the canonical value for GS.base. */
+	movq	TF_SIZE(%rsp),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+	movq	%cr3,%r13
+	movq	PCPU(KCR3),%rax
+	cmpq	$~0,%rax
+	je	1f
+	movq	%rax,%cr3
+1:	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+	je	2f
+	movl	$MSR_IA32_SPEC_CTRL,%ecx
+	rdmsr
+	movl	%eax,%r14d
+	call	handle_ibrs_entry
+2:	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp,%rdi
+	call	trap
+	MEXITCOUNT
+	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+	je	3f
+	movl	%r14d,%eax
+	xorl	%edx,%edx
+	movl	$MSR_IA32_SPEC_CTRL,%ecx
+	wrmsr
+	/*
+	 * Put back the preserved MSR_GSBASE value.
+	 */
+3:	movl	$MSR_GSBASE,%ecx
+	movq	%r12,%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+	movq	%r13,%cr3
+	RESTORE_REGS
+	addq	$TF_RIP,%rsp
+	jmp	doreti_iret
+dbg_fromuserspace:
+	/*
+	 * Switch to kernel GSBASE and kernel page table, and copy frame
+	 * from the IST stack to the normal kernel stack, since trap()
+	 * re-enables interrupts, and since we might trap on DB# while
+	 * in trap().
+	 */
+	swapgs
+	movq	PCPU(KCR3),%rax
+	cmpq	$~0,%rax
+	je	1f
+	movq	%rax,%cr3
+1:	movq	PCPU(RSP0),%rax
+	movl	$TF_SIZE,%ecx
+	subq	%rcx,%rax
+	movq	%rax,%rdi
+	movq	%rsp,%rsi
+	rep;movsb
+	movq	%rax,%rsp
+	call	handle_ibrs_entry
+	movq	PCPU(CURPCB),%rdi
+	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)
+	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+	jz	3f
+	cmpw	$KUF32SEL,TF_FS(%rsp)
+	jne	2f
+	rdfsbase %rax
+	movq	%rax,PCB_FSBASE(%rdi)
+2:	cmpw	$KUG32SEL,TF_GS(%rsp)
+	jne	3f
+	movl	$MSR_KGSBASE,%ecx
+	rdmsr
+	shlq	$32,%rdx
+	orq	%rdx,%rax
+	movq	%rax,PCB_GSBASE(%rdi)
+3:	jmp	calltrap
 
 /*
  * NMI handling is special.

Modified: releng/11.1/sys/amd64/amd64/machdep.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/machdep.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/amd64/amd64/machdep.c	Tue May  8 17:12:10 2018	(r333371)
@@ -675,6 +675,7 @@ struct gate_descriptor *idt = &idt0[0];	/* interrupt d
 static char dblfault_stack[PAGE_SIZE] __aligned(16);
 static char mce0_stack[PAGE_SIZE] __aligned(16);
 static char nmi0_stack[PAGE_SIZE] __aligned(16);
+static char dbg0_stack[PAGE_SIZE] __aligned(16);
 CTASSERT(sizeof(struct nmi_pcpu) == 16);
 
 struct amd64tss common_tss[MAXCPU];
@@ -827,7 +828,7 @@ extern inthand_t
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(dblfault),
-	IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti),
+	IDTVEC(div_pti), IDTVEC(bpt_pti),
 	IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
 	IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
 	IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
@@ -1637,8 +1638,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 		    SEL_KPL, 0);
 	setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 	    SEL_KPL, 0);
-	setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT,
-	    SEL_KPL, 0);
+	setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 	setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 	    SEL_UPL, 0);
@@ -1720,6 +1720,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1;
 	np->np_pcpu = (register_t) pc;
 	common_tss[0].tss_ist3 = (long) np;
+
+	/*
+	 * DB# stack, runs on ist4.
+	 */
+	np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
+	np->np_pcpu = (register_t) pc;
+	common_tss[0].tss_ist4 = (long) np;
 	
 	/* Set the IO permission bitmap (empty due to tss seg limit) */
 	common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;

Modified: releng/11.1/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/mp_machdep.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/amd64/amd64/mp_machdep.c	Tue May  8 17:12:10 2018	(r333371)
@@ -87,6 +87,7 @@ extern	struct pcpu __pcpu[];
 char *doublefault_stack;
 char *mce_stack;
 char *nmi_stack;
+char *dbg_stack;
 
 /*
  * Local data and functions.
@@ -225,6 +226,10 @@ init_secondary(void)
 	np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
 	common_tss[cpu].tss_ist3 = (long) np;
 
+	/* The DB# stack runs on IST4. */
+	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+	common_tss[cpu].tss_ist4 = (long) np;
+
 	/* Prepare private GDT */
 	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
 	for (x = 0; x < NGDT; x++) {
@@ -270,6 +275,10 @@ init_secondary(void)
 	np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
 	np->np_pcpu = (register_t) pc;
 
+	/* Save the per-cpu pointer for use by the DB# handler. */
+	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+	np->np_pcpu = (register_t) pc;
+
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
@@ -367,6 +376,8 @@ native_start_all_aps(void)
 		mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
 		    M_WAITOK | M_ZERO);
 		nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
+		    M_WAITOK | M_ZERO);
+		dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
 		    M_WAITOK | M_ZERO);
 		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO);

Modified: releng/11.1/sys/amd64/amd64/pmap.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/pmap.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/amd64/amd64/pmap.c	Tue May  8 17:12:10 2018	(r333371)
@@ -7565,6 +7565,9 @@ pmap_pti_init(void)
 		/* MC# stack IST 3 */
 		va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu);
 		pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
+		/* DB# stack IST 4 */
+		va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu);
+		pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
 	}
 	pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE,
 	    (vm_offset_t)etext, true);

Modified: releng/11.1/sys/amd64/amd64/trap.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/trap.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/amd64/amd64/trap.c	Tue May  8 17:12:10 2018	(r333371)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
  */
 
 #include "opt_clock.h"
+#include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
@@ -99,6 +100,11 @@ PMC_SOFT_DEFINE( , , page_fault, write);
 #include <sys/dtrace_bsd.h>
 #endif
 
+extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg),
+    IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32),
+    IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall);
+
+
 extern void __noinline trap(struct trapframe *frame);
 extern void trap_check(struct trapframe *frame);
 extern void syscall(struct trapframe *frame);
@@ -536,6 +542,52 @@ trap(struct trapframe *frame)
 				load_dr6(rdr6() & ~0xf);
 				goto out;
 			}
+
+			/*
+			 * Malicious user code can configure a debug
+			 * register watchpoint to trap on data access
+			 * to the top of stack and then execute 'pop
+			 * %ss; int 3'.  Due to exception deferral for
+			 * 'pop %ss', the CPU will not interrupt 'int
+			 * 3' to raise the DB# exception for the debug
+			 * register but will postpone the DB# until
+			 * execution of the first instruction of the
+			 * BP# handler (in kernel mode).  Normally the
+			 * previous check would ignore DB# exceptions
+			 * for watchpoints on user addresses raised in
+			 * kernel mode.  However, some CPU errata
+			 * include cases where DB# exceptions do not
+			 * properly set bits in %dr6, e.g. Haswell
+			 * HSD23 and Skylake-X SKZ24.
+			 *
+			 * A deferred DB# can also be raised on the
+			 * first instructions of system call entry
+			 * points or single-step traps via similar use
+			 * of 'pop %ss' or 'mov xxx, %ss'.
+			 */
+			if (pti) {
+				if (frame->tf_rip ==
+				    (uintptr_t)IDTVEC(fast_syscall_pti) ||
+#ifdef COMPAT_FREEBSD32
+				    frame->tf_rip ==
+				    (uintptr_t)IDTVEC(int0x80_syscall_pti) ||
+#endif
+				    frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti))
+					return;
+			} else {
+				if (frame->tf_rip ==
+				    (uintptr_t)IDTVEC(fast_syscall) ||
+#ifdef COMPAT_FREEBSD32
+				    frame->tf_rip ==
+				    (uintptr_t)IDTVEC(int0x80_syscall) ||
+#endif
+				    frame->tf_rip == (uintptr_t)IDTVEC(bpt))
+					return;
+			}
+			if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) ||
+			    /* Needed for AMD. */
+			    frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32))
+				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */

Modified: releng/11.1/sys/conf/newvers.sh
==============================================================================
--- releng/11.1/sys/conf/newvers.sh	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/conf/newvers.sh	Tue May  8 17:12:10 2018	(r333371)
@@ -44,7 +44,7 @@
 
 TYPE="FreeBSD"
 REVISION="11.1"
-BRANCH="RELEASE-p9"
+BRANCH="RELEASE-p10"
 if [ -n "${BRANCH_OVERRIDE}" ]; then
 	BRANCH=${BRANCH_OVERRIDE}
 fi

Modified: releng/11.1/sys/i386/i386/trap.c
==============================================================================
--- releng/11.1/sys/i386/i386/trap.c	Tue May  8 17:05:39 2018	(r333370)
+++ releng/11.1/sys/i386/i386/trap.c	Tue May  8 17:12:10 2018	(r333371)
@@ -116,6 +116,8 @@ void dblfault_handler(void);
 
 extern inthand_t IDTVEC(lcall_syscall);
 
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
 #define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
@@ -668,6 +670,34 @@ kernel_trctrap:
 				load_dr6(rdr6() & ~0xf);
 				goto out;
 			}
+
+			/*
+			 * Malicious user code can configure a debug
+			 * register watchpoint to trap on data access
+			 * to the top of stack and then execute 'pop
+			 * %ss; int 3'.  Due to exception deferral for
+			 * 'pop %ss', the CPU will not interrupt 'int
+			 * 3' to raise the DB# exception for the debug
+			 * register but will postpone the DB# until
+			 * execution of the first instruction of the
+			 * BP# handler (in kernel mode).  Normally the
+			 * previous check would ignore DB# exceptions
+			 * for watchpoints on user addresses raised in
+			 * kernel mode.  However, some CPU errata
+			 * include cases where DB# exceptions do not
+			 * properly set bits in %dr6, e.g. Haswell
+			 * HSD23 and Skylake-X SKZ24.
+			 *
+			 * A deferred DB# can also be raised on the
+			 * first instructions of system call entry
+			 * points or single-step traps via similar use
+			 * of 'pop %ss' or 'mov xxx, %ss'.
+			 */
+			if (frame->tf_eip ==
+			    (uintptr_t)IDTVEC(int0x80_syscall) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+			    frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+				return;
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805081712.w48HCAjJ035913>