Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 13 Jun 2012 15:25:53 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r237009 - in stable/8: lib/libc/amd64/gen lib/libc/arm/gen lib/libc/gen lib/libc/i386/gen lib/libc/ia64/gen lib/libc/mips/gen lib/libc/powerpc/gen lib/libc/sparc64/gen sys/amd64/acpica ...
Message-ID:  <201206131525.q5DFPr3o032307@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Wed Jun 13 15:25:52 2012
New Revision: 237009
URL: http://svn.freebsd.org/changeset/base/237009

Log:
  MFC 230260-230262,230269,230270,230426,230429,230538,230765,230766,230864,
  232520 (partial),235563:
  Add support for the extended FPU states on amd64, both for native
  64bit and 32bit ABIs.  As a side-effect, it enables AVX on capable
  CPUs.
  
  In particular:
  
  - Query the CPU support for XSAVE, list of the supported extensions
    and the required size of FPU save area. The hw.use_xsave tunable is
    provided for disabling XSAVE, and hw.xsave_mask may be used to
    select the enabled extensions.
  
  - Remove the FPU save area from PCB and dynamically allocate the
    (run-time sized) user save area on the top of the kernel stack,
    right above the PCB. Reorganize the thread0 PCB initialization to
    postpone it after BSP is queried for save area size.
  
  - The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as
    well. FPU state is only useful for suspend, where it is saved in
    dynamically allocated suspfpusave area.
  
  - Use XSAVE and XRSTOR to save/restore FPU state, if supported and
    enabled.
  
  - Define new mcontext_t flag _MC_HASFPXSTATE, indicating that
    mcontext_t has a valid pointer to out-of-struct extended FPU
    state. Signal handlers are supplied with stack-allocated fpu
    state. The sigreturn(2) and setcontext(2) syscall honour the flag,
    allowing the signal handlers to inspect and manipilate extended
    state in the interrupted context.
  
  - The getcontext(2) never returns extended state, since there is no
    place in the fixed-sized mcontext_t to place variable-sized save
    area. And, since mcontext_t is embedded into ucontext_t, makes it
    impossible to fix in a reasonable way.  Provide a sysarch(2)
    facility to query extended FPU state.
  
  - Add API for obtaining extended machine context states that cannot be
    fit into existing mcontext_t.
  
    On i386 and amd64 return the extended FPU states using
    getcontextx(3). For other architectures, getcontextx(3) returns the
    same information as getcontext(2).
  
  - Add ptrace(2) support for getting and setting extended state; while
    there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries.
  
  - Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to
    consumers, making it opaque. Internally, struct fpu_kern_ctx now
    contains a space for the extended state. Convert in-kernel consumers
    of fpu_kern KPI both on i386 and amd64.
  
  Reviewed by:	kib

Added:
  stable/8/lib/libc/amd64/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/amd64/gen/getcontextx.c
  stable/8/lib/libc/arm/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/arm/gen/getcontextx.c
  stable/8/lib/libc/i386/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/i386/gen/getcontextx.c
  stable/8/lib/libc/ia64/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/ia64/gen/getcontextx.c
  stable/8/lib/libc/mips/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/mips/gen/getcontextx.c
  stable/8/lib/libc/powerpc/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/powerpc/gen/getcontextx.c
  stable/8/lib/libc/sparc64/gen/getcontextx.c
     - copied, changed from r230429, head/lib/libc/sparc64/gen/getcontextx.c
  stable/8/sys/amd64/amd64/ptrace_machdep.c
     - copied, changed from r230426, head/sys/amd64/amd64/ptrace_machdep.c
Modified:
  stable/8/lib/libc/amd64/gen/Makefile.inc
  stable/8/lib/libc/arm/gen/Makefile.inc
  stable/8/lib/libc/gen/Symbol.map
  stable/8/lib/libc/gen/getcontext.3
  stable/8/lib/libc/gen/ucontext.3
  stable/8/lib/libc/i386/gen/Makefile.inc
  stable/8/lib/libc/ia64/gen/Makefile.inc
  stable/8/lib/libc/mips/gen/Makefile.inc
  stable/8/lib/libc/powerpc/gen/Makefile.inc
  stable/8/lib/libc/sparc64/gen/Makefile.inc
  stable/8/sys/amd64/acpica/acpi_switch.S
  stable/8/sys/amd64/acpica/acpi_wakecode.S
  stable/8/sys/amd64/acpica/acpi_wakeup.c
  stable/8/sys/amd64/amd64/cpu_switch.S
  stable/8/sys/amd64/amd64/fpu.c
  stable/8/sys/amd64/amd64/genassym.c
  stable/8/sys/amd64/amd64/initcpu.c
  stable/8/sys/amd64/amd64/machdep.c
  stable/8/sys/amd64/amd64/mp_machdep.c
  stable/8/sys/amd64/amd64/sys_machdep.c
  stable/8/sys/amd64/amd64/trap.c
  stable/8/sys/amd64/amd64/vm_machdep.c
  stable/8/sys/amd64/ia32/ia32_reg.c
  stable/8/sys/amd64/ia32/ia32_signal.c
  stable/8/sys/amd64/include/fpu.h
  stable/8/sys/amd64/include/frame.h
  stable/8/sys/amd64/include/md_var.h
  stable/8/sys/amd64/include/pcb.h
  stable/8/sys/amd64/include/pcpu.h
  stable/8/sys/amd64/include/ptrace.h
  stable/8/sys/amd64/include/signal.h
  stable/8/sys/amd64/include/specialreg.h
  stable/8/sys/amd64/include/sysarch.h
  stable/8/sys/amd64/include/ucontext.h
  stable/8/sys/compat/ia32/ia32_signal.h
  stable/8/sys/conf/files.amd64
  stable/8/sys/crypto/aesni/aesni.c
  stable/8/sys/crypto/aesni/aesni.h
  stable/8/sys/crypto/aesni/aesni_wrap.c
  stable/8/sys/crypto/via/padlock.c
  stable/8/sys/crypto/via/padlock.h
  stable/8/sys/crypto/via/padlock_cipher.c
  stable/8/sys/crypto/via/padlock_hash.c
  stable/8/sys/dev/random/nehemiah.c
  stable/8/sys/i386/i386/machdep.c
  stable/8/sys/i386/include/npx.h
  stable/8/sys/i386/include/ptrace.h
  stable/8/sys/i386/include/signal.h
  stable/8/sys/i386/include/specialreg.h
  stable/8/sys/i386/include/sysarch.h
  stable/8/sys/i386/include/ucontext.h
  stable/8/sys/i386/isa/npx.c
  stable/8/sys/pc98/pc98/machdep.c
  stable/8/sys/sys/ucontext.h
Directory Properties:
  stable/8/lib/libc/   (props changed)
  stable/8/lib/libc/stdtime/   (props changed)
  stable/8/lib/libc/sys/   (props changed)
  stable/8/lib/libc/uuid/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/boot/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/e1000/   (props changed)

Modified: stable/8/lib/libc/amd64/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/amd64/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/amd64/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -2,7 +2,7 @@
 # $FreeBSD$
 
 SRCS+=	_setjmp.S _set_tp.c rfork_thread.S setjmp.S sigsetjmp.S \
-	fabs.S modf.S \
+	fabs.S getcontextx.c modf.S \
 	infinity.c ldexp.c makecontext.c signalcontext.c \
 	flt_rounds.c fpgetmask.c fpsetmask.c fpgetprec.c fpsetprec.c \
 	fpgetround.c fpsetround.c fpgetsticky.c

Copied and modified: stable/8/lib/libc/amd64/gen/getcontextx.c (from r230429, head/lib/libc/amd64/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/amd64/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/amd64/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$");
 
 static int xstate_sz = -1;
 
-size_t
+int
 __getcontextx_size(void)
 {
 	u_int p[4];

Modified: stable/8/lib/libc/arm/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/arm/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/arm/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -2,5 +2,5 @@
 # $FreeBSD$
 
 SRCS+=	_ctx_start.S _setjmp.S _set_tp.c alloca.S fabs.c \
-	infinity.c ldexp.c makecontext.c modf.c \
+	getcontextx.c infinity.c ldexp.c makecontext.c modf.c \
 	setjmp.S signalcontext.c sigsetjmp.S divsi3.S

Copied and modified: stable/8/lib/libc/arm/gen/getcontextx.c (from r230429, head/lib/libc/arm/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/arm/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/arm/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <stdlib.h>
 
-size_t
+int
 __getcontextx_size(void)
 {
 

Modified: stable/8/lib/libc/gen/Symbol.map
==============================================================================
--- stable/8/lib/libc/gen/Symbol.map	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/gen/Symbol.map	Wed Jun 13 15:25:52 2012	(r237009)
@@ -371,6 +371,7 @@ FBSD_1.2 {
 
 FBSD_1.3 {
 	__FreeBSD_libc_enter_restricted_mode;
+	getcontextx;
 };
 
 FBSDprivate_1.0 {
@@ -487,4 +488,6 @@ FBSDprivate_1.0 {
 	_wait;
 	__waitpid;
 	_waitpid;
+	__fillcontextx;
+	__getcontextx_size;
 };

Modified: stable/8/lib/libc/gen/getcontext.3
==============================================================================
--- stable/8/lib/libc/gen/getcontext.3	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/gen/getcontext.3	Wed Jun 13 15:25:52 2012	(r237009)
@@ -35,11 +35,11 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 10, 2002
+.Dd December 26, 2011
 .Dt GETCONTEXT 3
 .Os
 .Sh NAME
-.Nm getcontext , setcontext
+.Nm getcontext , getcontextx , setcontext
 .Nd get and set user thread context
 .Sh LIBRARY
 .Lb libc
@@ -59,6 +59,20 @@ This saved context may then later be res
 .Fn setcontext .
 .Pp
 The
+.Fn getcontextx
+function saves the current execution context in the newly allocated structure
+.Vt ucontext_t ,
+which is returned on success.
+If architecture defines additional CPU states that can be stored in extended
+blocks referenced from the
+.Vt ucontext_t ,
+the memory for them may be allocated and their context also stored.
+Memory returned by
+.Fn getcontextx
+function shall be freed using
+.Fn free 3 .
+.Pp
+The
 .Fn setcontext
 function
 makes a previously saved thread context the current thread context, i.e.,
@@ -109,11 +123,24 @@ If successful,
 returns zero and
 .Fn setcontext
 does not return; otherwise \-1 is returned.
+The
+.Fn getcontextx
+returns pointer to the allocated and initialized context on success, and
+.Va NULL
+on failure.
 .Sh ERRORS
 No errors are defined for
 .Fn getcontext
 or
 .Fn setcontext .
+The
+.Fn getcontextx
+may return the following errors in
+.Va errno :
+.Bl -tag -width Er
+.It Bq Er ENOMEM
+No memory was available to allocate for the context or some extended state.
+.El
 .Sh SEE ALSO
 .Xr sigaction 2 ,
 .Xr sigaltstack 2 ,

Modified: stable/8/lib/libc/gen/ucontext.3
==============================================================================
--- stable/8/lib/libc/gen/ucontext.3	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/gen/ucontext.3	Wed Jun 13 15:25:52 2012	(r237009)
@@ -92,6 +92,9 @@ structures:
 .Ft int
 .Fn getcontext "ucontext_t *" ;
 .It
+.Ft "ucontext_t *"
+.Fn getcontextx "void" ;
+.It
 .Ft int
 .Fn setcontext "const ucontext_t *" ;
 .It
@@ -104,4 +107,5 @@ structures:
 .Sh SEE ALSO
 .Xr sigaltstack 2 ,
 .Xr getcontext 3 ,
+.Xr getcontextx 3 ,
 .Xr makecontext 3

Modified: stable/8/lib/libc/i386/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/i386/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/i386/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -2,5 +2,5 @@
 # $FreeBSD$
 
 SRCS+=	_ctx_start.S _setjmp.S _set_tp.c fabs.S \
-	flt_rounds.c infinity.c ldexp.c makecontext.c modf.S \
+	flt_rounds.c getcontextx.c infinity.c ldexp.c makecontext.c modf.S \
 	rfork_thread.S setjmp.S signalcontext.c sigsetjmp.S

Copied and modified: stable/8/lib/libc/i386/gen/getcontextx.c (from r230429, head/lib/libc/i386/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/i386/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/i386/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$");
 
 static int xstate_sz = -1;
 
-size_t
+int
 __getcontextx_size(void)
 {
 	u_int p[4];

Modified: stable/8/lib/libc/ia64/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/ia64/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/ia64/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -3,8 +3,8 @@
 SRCS+=	__divdf3.S __divdi3.S __divsf3.S __divsi3.S __moddi3.S __modsi3.S \
 	__udivdi3.S __udivsi3.S __umoddi3.S __umodsi3.S _mcount.S _set_tp.c \
 	_setjmp.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c fpsetmask.c \
-	fpsetround.c infinity.c ldexp.c makecontext.c modf.c setjmp.S \
-	signalcontext.c sigsetjmp.S
+	fpsetround.c getcontextx.c infinity.c ldexp.c makecontext.c modf.c \
+	setjmp.S signalcontext.c sigsetjmp.S
 
 # The following may go away if function _Unwind_FindTableEntry()
 # will be part of GCC.

Copied and modified: stable/8/lib/libc/ia64/gen/getcontextx.c (from r230429, head/lib/libc/ia64/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/ia64/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/ia64/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <stdlib.h>
 
-size_t
+int
 __getcontextx_size(void)
 {
 

Modified: stable/8/lib/libc/mips/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/mips/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/mips/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -6,4 +6,5 @@ SRCS+=	infinity.c fabs.c ldexp.c modf.c
 # SRCS+=	flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
 #	fpsetround.c fpsetsticky.c
 
-SRCS+=	_ctx_start.S _set_tp.c _setjmp.S makecontext.c setjmp.S signalcontext.c sigsetjmp.S
+SRCS+=	_ctx_start.S _set_tp.c _setjmp.S getcontextx.c makecontext.c \
+	setjmp.S signalcontext.c sigsetjmp.S

Copied and modified: stable/8/lib/libc/mips/gen/getcontextx.c (from r230429, head/lib/libc/mips/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/mips/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/mips/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <stdlib.h>
 
-size_t
+int
 __getcontextx_size(void)
 {
 

Modified: stable/8/lib/libc/powerpc/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/powerpc/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/powerpc/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -1,7 +1,7 @@
 # $FreeBSD$
 
 SRCS += _ctx_start.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c \
-	fpgetsticky.c fpsetmask.c fpsetround.c \
+	fpgetsticky.c fpsetmask.c fpsetround.c getcontextx.c \
 	infinity.c ldexp.c makecontext.c modf.c _setjmp.S \
 	setjmp.S sigsetjmp.S signalcontext.c syncicache.c \
 	_set_tp.c

Copied and modified: stable/8/lib/libc/powerpc/gen/getcontextx.c (from r230429, head/lib/libc/powerpc/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/powerpc/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/powerpc/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <stdlib.h>
 
-size_t
+int
 __getcontextx_size(void)
 {
 

Modified: stable/8/lib/libc/sparc64/gen/Makefile.inc
==============================================================================
--- stable/8/lib/libc/sparc64/gen/Makefile.inc	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/lib/libc/sparc64/gen/Makefile.inc	Wed Jun 13 15:25:52 2012	(r237009)
@@ -2,5 +2,5 @@
 
 SRCS+=	_ctx_start.S _setjmp.S fabs.S fixunsdfsi.S flt_rounds.c fpgetmask.c \
 	fpgetround.c fpgetsticky.c fpsetmask.c fpsetround.c \
-	infinity.c ldexp.c makecontext.c modf.S \
+	getcontextx.c infinity.c ldexp.c makecontext.c modf.S \
 	signalcontext.c setjmp.S sigsetjmp.S _set_tp.c

Copied and modified: stable/8/lib/libc/sparc64/gen/getcontextx.c (from r230429, head/lib/libc/sparc64/gen/getcontextx.c)
==============================================================================
--- head/lib/libc/sparc64/gen/getcontextx.c	Sat Jan 21 18:00:28 2012	(r230429, copy source)
+++ stable/8/lib/libc/sparc64/gen/getcontextx.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -32,7 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <stdlib.h>
 
-size_t
+int
 __getcontextx_size(void)
 {
 

Modified: stable/8/sys/amd64/acpica/acpi_switch.S
==============================================================================
--- stable/8/sys/amd64/acpica/acpi_switch.S	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/acpica/acpi_switch.S	Wed Jun 13 15:25:52 2012	(r237009)
@@ -146,11 +146,22 @@ ENTRY(acpi_restorecpu)
 
 	/* Restore FPU state. */
 	fninit
-	fxrstor	PCB_USERFPU(%rdi)
+	movq	WAKEUP_CTX(fpusave),%rdi
+	cmpl	$0,use_xsave
+	jne	1f
+	fxrstor	(%rdi)
+	jmp	2f
+1:	movl	xsave_mask,%eax
+	movl	xsave_mask+4,%edx
+/*	xrstor	(%rdi) */
+	.byte	0x0f,0xae,0x2f
+2:
 
 	/* Reload CR0. */
 	movq	%rcx, %cr0
 
+	movq	WAKEUP_CTX(pcb),%rdi
+
 	/* Restore return address. */
 	movq	PCB_RIP(%rdi), %rax
 	movq	%rax, (%rsp)

Modified: stable/8/sys/amd64/acpica/acpi_wakecode.S
==============================================================================
--- stable/8/sys/amd64/acpica/acpi_wakecode.S	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/acpica/acpi_wakecode.S	Wed Jun 13 15:25:52 2012	(r237009)
@@ -270,6 +270,8 @@ wakeup_pcb:
 wakeup_gdt:
 	.word	0
 	.quad	0
+wakeup_fpusave:
+	.quad	0
 
 	ALIGN_DATA
 wakeup_efer:

Modified: stable/8/sys/amd64/acpica/acpi_wakeup.c
==============================================================================
--- stable/8/sys/amd64/acpica/acpi_wakeup.c	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/acpica/acpi_wakeup.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
+#include <machine/md_var.h>
 
 #ifdef SMP
 #include <machine/apicreg.h>
@@ -67,8 +68,10 @@ extern int		acpi_reset_video;
 
 #ifdef SMP
 extern struct pcb	**susppcbs;
+extern void		**suspfpusave;
 #else
 static struct pcb	**susppcbs;
+static void		**suspfpusave;
 #endif
 
 int			acpi_restorecpu(vm_offset_t, struct pcb *);
@@ -105,6 +108,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, in
 	int		ms;
 
 	WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]);
+	WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[cpu]);
 	WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit);
 	WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
 	    susppcbs[cpu]->pcb_gdt.rd_base);
@@ -243,6 +247,7 @@ acpi_sleep_machdep(struct acpi_softc *sc
 	load_cr3(KPML4phys);
 
 	if (savectx(susppcbs[0])) {
+		ctx_fpusave(suspfpusave[0]);
 #ifdef SMP
 		if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
 			device_printf(sc->acpi_dev,
@@ -256,6 +261,7 @@ acpi_sleep_machdep(struct acpi_softc *sc
 		WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0));
 
 		WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]);
+		WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[0]);
 		WAKECODE_FIXUP(wakeup_gdt, uint16_t,
 		    susppcbs[0]->pcb_gdt.rd_limit);
 		WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
@@ -333,8 +339,11 @@ acpi_alloc_wakeup_handler(void)
 		return (NULL);
 	}
 	susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK);
-	for (i = 0; i < mp_ncpus; i++)
+	suspfpusave = malloc(mp_ncpus * sizeof(void *), M_DEVBUF, M_WAITOK);
+	for (i = 0; i < mp_ncpus; i++) {
 		susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK);
+		suspfpusave[i] = alloc_fpusave(M_WAITOK);
+	}
 
 	return (wakeaddr);
 }

Modified: stable/8/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/8/sys/amd64/amd64/cpu_switch.S	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/amd64/cpu_switch.S	Wed Jun 13 15:25:52 2012	(r237009)
@@ -112,16 +112,25 @@ done_store_dr:
 
 	/* have we used fp, and need a save? */
 	cmpq	%rdi,PCPU(FPCURTHREAD)
-	jne	1f
+	jne	3f
 	movq	PCB_SAVEFPU(%r8),%r8
 	clts
+	cmpl	$0,use_xsave
+	jne	1f
 	fxsave	(%r8)
-	smsw	%ax
+	jmp	2f
+1:	movq	%rdx,%rcx
+	movl	xsave_mask,%eax
+	movl	xsave_mask+4,%edx
+/*	xsave	(%r8) */
+	.byte	0x41,0x0f,0xae,0x20
+	movq	%rcx,%rdx
+2:	smsw	%ax
 	orb	$CR0_TS,%al
 	lmsw	%ax
 	xorl	%eax,%eax
 	movq	%rax,PCPU(FPCURTHREAD)
-1:
+3:
 
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movq	TD_PCB(%rsi),%r8
@@ -354,10 +363,19 @@ ENTRY(savectx)
 	sldt	PCB_LDT(%rdi)
 	str	PCB_TR(%rdi)
 
-	clts
-	fxsave	PCB_USERFPU(%rdi)
-	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
+2:	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
 
 	movl	$1,%eax
 	ret
 END(savectx)
+
+/*
+ * Wrapper around fpusave to care about TS0_CR.
+ */
+ENTRY(ctx_fpusave)
+	movq	%cr0,%rsi
+	clts
+	call	fpusave
+	movq	%rsi,%cr0
+	ret
+END(ctx_fpusave)

Modified: stable/8/sys/amd64/amd64/fpu.c
==============================================================================
--- stable/8/sys/amd64/amd64/fpu.c	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/amd64/fpu.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -78,6 +78,41 @@ __FBSDID("$FreeBSD$");
 				    : : "n" (CR0_TS) : "ax")
 #define	stop_emulating()	__asm __volatile("clts")
 
+static __inline void
+xrstor(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	/* xrstor (%rdi) */
+	__asm __volatile(".byte	0x0f,0xae,0x2f" : :
+	    "a" (low), "d" (hi), "D" (addr));
+}
+
+static __inline void
+xsave(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	/* xsave (%rdi) */
+	__asm __volatile(".byte	0x0f,0xae,0x27" : :
+	    "a" (low), "d" (hi), "D" (addr) : "memory");
+}
+
+static __inline void
+xsetbv(uint32_t reg, uint64_t val)
+{
+	uint32_t low, hi;
+
+	low = val;
+	hi = val >> 32;
+	__asm __volatile(".byte 0x0f,0x01,0xd1" : :
+	    "c" (reg), "a" (low), "d" (hi));
+}
+
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(u_short cw);
@@ -90,25 +125,106 @@ void	fxrstor(caddr_t addr);
 void	ldmxcsr(u_int csr);
 void	start_emulating(void);
 void	stop_emulating(void);
+void	xrstor(char *addr, uint64_t mask);
+void	xsave(char *addr, uint64_t mask);
+void	xsetbv(uint32_t reg, uint64_t val);
 
 #endif	/* __GNUCLIKE_ASM && !lint */
 
 #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
 #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
 
-typedef u_char bool_t;
+CTASSERT(sizeof(struct savefpu) == 512);
+CTASSERT(sizeof(struct xstate_hdr) == 64);
+CTASSERT(sizeof(struct savefpu_ymm) == 832);
+
+/*
+ * This requirement is to make it easier for asm code to calculate
+ * offset of the fpu save area from the pcb address. FPU save area
+ * must by 64-bytes aligned.
+ */
+CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
 
 static	void	fpu_clean_state(void);
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     NULL, 1, "Floating point instructions executed in hardware");
 
-static	struct savefpu		fpu_initialstate;
+int use_xsave;			/* non-static for cpu_switch.S */
+uint64_t xsave_mask;		/* the same */
+static	struct savefpu *fpu_initialstate;
+
+void
+fpusave(void *addr)
+{
+
+	if (use_xsave)
+		xsave((char *)addr, xsave_mask);
+	else
+		fxsave((char *)addr);
+}
+
+static void
+fpurestore(void *addr)
+{
+
+	if (use_xsave)
+		xrstor((char *)addr, xsave_mask);
+	else
+		fxrstor((char *)addr);
+}
 
 /*
- * Initialize the floating point unit.  On the boot CPU we generate a
- * clean state that is used to initialize the floating point unit when
- * it is first used by a process.
+ * Enable XSAVE if supported and allowed by user.
+ * Calculate the xsave_mask.
+ */
+static void
+fpuinit_bsp1(void)
+{
+	u_int cp[4];
+	uint64_t xsave_mask_user;
+
+	if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
+		use_xsave = 1;
+		TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+	}
+	if (!use_xsave)
+		return;
+
+	cpuid_count(0xd, 0x0, cp);
+	xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	if ((cp[0] & xsave_mask) != xsave_mask)
+		panic("CPU0 does not support X87 or SSE: %x", cp[0]);
+	xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
+	xsave_mask_user = xsave_mask;
+	TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
+	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	xsave_mask &= xsave_mask_user;
+}
+
+/*
+ * Calculate the fpu save area size.
+ */
+static void
+fpuinit_bsp2(void)
+{
+	u_int cp[4];
+
+	if (use_xsave) {
+		cpuid_count(0xd, 0x0, cp);
+		cpu_max_ext_state_size = cp[1];
+
+		/*
+		 * Reload the cpu_feature2, since we enabled OSXSAVE.
+		 */
+		do_cpuid(1, cp);
+		cpu_feature2 = cp[2];
+	} else
+		cpu_max_ext_state_size = sizeof(struct savefpu);
+}
+
+/*
+ * Initialize the floating point unit.
  */
 void
 fpuinit(void)
@@ -117,6 +233,20 @@ fpuinit(void)
 	u_int mxcsr;
 	u_short control;
 
+	if (IS_BSP())
+		fpuinit_bsp1();
+
+	if (use_xsave) {
+		load_cr4(rcr4() | CR4_XSAVE);
+		xsetbv(XCR0, xsave_mask);
+	}
+
+	/*
+	 * XCR0 shall be set up before CPU can report the save area size.
+	 */
+	if (IS_BSP())
+		fpuinit_bsp2();
+
 	/*
 	 * It is too early for critical_enter() to work on AP.
 	 */
@@ -127,20 +257,46 @@ fpuinit(void)
 	fldcw(control);
 	mxcsr = __INITIAL_MXCSR__;
 	ldmxcsr(mxcsr);
-	if (PCPU_GET(cpuid) == 0) {
-		fxsave(&fpu_initialstate);
-		if (fpu_initialstate.sv_env.en_mxcsr_mask)
-			cpu_mxcsr_mask = fpu_initialstate.sv_env.en_mxcsr_mask;
-		else
-			cpu_mxcsr_mask = 0xFFBF;
-		bzero(fpu_initialstate.sv_fp, sizeof(fpu_initialstate.sv_fp));
-		bzero(fpu_initialstate.sv_xmm, sizeof(fpu_initialstate.sv_xmm));
-	}
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
+ * On the boot CPU we generate a clean state that is used to
+ * initialize the floating point unit when it is first used by a
+ * process.
+ */
+static void
+fpuinitstate(void *arg __unused)
+{
+	register_t saveintr;
+
+	fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+	saveintr = intr_disable();
+	stop_emulating();
+
+	fpusave(fpu_initialstate);
+	if (fpu_initialstate->sv_env.en_mxcsr_mask)
+		cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask;
+	else
+		cpu_mxcsr_mask = 0xFFBF;
+
+	/*
+	 * The fninit instruction does not modify XMM registers.  The
+	 * fpusave call dumped the garbage contained in the registers
+	 * after reset to the initial state saved.  Clear XMM
+	 * registers file image to make the startup program state and
+	 * signal handler XMM register content predictable.
+	 */
+	bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+
+	start_emulating();
+	intr_restore(saveintr);
+}
+SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL);
+
+/*
  * Free coprocessor (if we have it).
  */
 void
@@ -150,7 +306,7 @@ fpuexit(struct thread *td)
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread)) {
 		stop_emulating();
-		fxsave(PCPU_GET(curpcb)->pcb_save);
+		fpusave(PCPU_GET(curpcb)->pcb_save);
 		start_emulating();
 		PCPU_SET(fpcurthread, 0);
 	}
@@ -423,7 +579,7 @@ fpudna(void)
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 */
-		fxrstor(&fpu_initialstate);
+		fpurestore(fpu_initialstate);
 		if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
 			fldcw(pcb->pcb_initial_fpucw);
 		if (PCB_USER_FPU(pcb))
@@ -432,7 +588,7 @@ fpudna(void)
 		else
 			set_pcb_flags(pcb, PCB_FPUINITDONE);
 	} else
-		fxrstor(pcb->pcb_save);
+		fpurestore(pcb->pcb_save);
 	critical_exit();
 }
 
@@ -461,15 +617,16 @@ fpugetregs(struct thread *td)
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
-		bcopy(&fpu_initialstate, &pcb->pcb_user_save,
-		    sizeof(fpu_initialstate));
-		pcb->pcb_user_save.sv_env.en_cw = pcb->pcb_initial_fpucw;
+		bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb),
+		    cpu_max_ext_state_size);
+		get_pcb_user_save_pcb(pcb)->sv_env.en_cw =
+		    pcb->pcb_initial_fpucw;
 		fpuuserinited(td);
 		return (_MC_FPOWNED_PCB);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
-		fxsave(&pcb->pcb_user_save);
+		fpusave(get_pcb_user_save_pcb(pcb));
 		critical_exit();
 		return (_MC_FPOWNED_FPU);
 	} else {
@@ -491,25 +648,78 @@ fpuuserinited(struct thread *td)
 		set_pcb_flags(pcb, PCB_FPUINITDONE);
 }
 
+int
+fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
+{
+	struct xstate_hdr *hdr, *ehdr;
+	size_t len, max_len;
+	uint64_t bv;
+
+	/* XXXKIB should we clear all extended state in xstate_bv instead ? */
+	if (xfpustate == NULL)
+		return (0);
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	len = xfpustate_size;
+	if (len < sizeof(struct xstate_hdr))
+		return (EINVAL);
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	if (len > max_len)
+		return (EINVAL);
+
+	ehdr = (struct xstate_hdr *)xfpustate;
+	bv = ehdr->xstate_bv;
+
+	/*
+	 * Avoid #gp.
+	 */
+	if (bv & ~xsave_mask)
+		return (EINVAL);
+	if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) !=
+	    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE))
+		return (EINVAL);
+
+	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
+
+	hdr->xstate_bv = bv;
+	bcopy(xfpustate + sizeof(struct xstate_hdr),
+	    (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
+
+	return (0);
+}
+
 /*
  * Set the state of the FPU.
  */
-void
-fpusetregs(struct thread *td, struct savefpu *addr)
+int
+fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
+    size_t xfpustate_size)
 {
 	struct pcb *pcb;
+	int error;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
-		fxrstor(addr);
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0) {
+			critical_exit();
+			return (error);
+		}
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
+		fpurestore(get_pcb_user_save_td(td));
 		critical_exit();
 		set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 	} else {
 		critical_exit();
-		bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr));
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0)
+			return (error);
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		fpuuserinited(td);
 	}
+	return (0);
 }
 
 /*
@@ -599,20 +809,62 @@ static devclass_t fpupnp_devclass;
 DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0);
 #endif	/* DEV_ISA */
 
+static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
+    "Kernel contexts for FPU state");
+
+#define	FPU_KERN_CTX_FPUINITDONE 0x01
+
+struct fpu_kern_ctx {
+	struct savefpu *prev;
+	uint32_t flags;
+	char hwstate1[];
+};
+
+struct fpu_kern_ctx *
+fpu_kern_alloc_ctx(u_int flags)
+{
+	struct fpu_kern_ctx *res;
+	size_t sz;
+
+	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
+	    cpu_max_ext_state_size;
+	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
+	    M_NOWAIT : M_WAITOK) | M_ZERO);
+	return (res);
+}
+
+void
+fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
+{
+
+	/* XXXKIB clear the memory ? */
+	free(ctx, M_FPUKERN_CTX);
+}
+
+static struct savefpu *
+fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
+{
+	vm_offset_t p;
+
+	p = (vm_offset_t)&ctx->hwstate1;
+	p = roundup2(p, XSAVE_AREA_ALIGN);
+	return ((struct savefpu *)p);
+}
+
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
-	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
-	    ("mangled pcb_save"));
+	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
+	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = 0;
 	if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
 	fpuexit(td);
 	ctx->prev = pcb->pcb_save;
-	pcb->pcb_save = &ctx->hwstate;
+	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
 	set_pcb_flags(pcb, PCB_KERNFPU);
 	clear_pcb_flags(pcb, PCB_FPUINITDONE);
 	return (0);
@@ -629,7 +881,7 @@ fpu_kern_leave(struct thread *td, struct
 		fpudrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
-	if (pcb->pcb_save == &pcb->pcb_user_save) {
+	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
 			set_pcb_flags(pcb, PCB_FPUINITDONE);
 			clear_pcb_flags(pcb, PCB_KERNFPU);
@@ -653,7 +905,8 @@ fpu_kern_thread(u_int flags)
 	pcb = PCPU_GET(curpcb);
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
-	KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save"));
+	KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb),
+	    ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
 
 	set_pcb_flags(pcb, PCB_KERNFPU);

Modified: stable/8/sys/amd64/amd64/genassym.c
==============================================================================
--- stable/8/sys/amd64/amd64/genassym.c	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/amd64/genassym.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -156,7 +156,7 @@ ASSYM(PCB_GS32SD, offsetof(struct pcb, p
 ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
 ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
+ASSYM(PCB_USERFPU, sizeof(struct pcb));
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_FULL_IRET, PCB_FULL_IRET);
 ASSYM(PCB_DBREGS, PCB_DBREGS);

Modified: stable/8/sys/amd64/amd64/initcpu.c
==============================================================================
--- stable/8/sys/amd64/amd64/initcpu.c	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/amd64/initcpu.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -72,6 +72,7 @@ u_int	cpu_vendor_id;		/* CPU vendor ID *
 u_int	cpu_fxsr;		/* SSE enabled */
 u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
 u_int	cpu_clflush_line_size = 32;
+u_int	cpu_max_ext_state_size;
 
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
 	&via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");

Modified: stable/8/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/8/sys/amd64/amd64/machdep.c	Wed Jun 13 15:04:50 2012	(r237008)
+++ stable/8/sys/amd64/amd64/machdep.c	Wed Jun 13 15:25:52 2012	(r237009)
@@ -149,8 +149,10 @@ extern void panicifcpuunsupported(void);
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup(void *);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpusave, size_t xfpusave_len);
+static int  set_fpcontext(struct thread *td, const mcontext_t *mcp,
+    char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 #ifdef DDB
@@ -305,6 +307,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
+	char *xfpusave;
+	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
@@ -318,6 +322,14 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
+	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
+		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+		xfpusave = __builtin_alloca(xfpusave_len);
+	} else {
+		xfpusave_len = 0;
+		xfpusave = NULL;
+	}
+
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
@@ -327,7 +339,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
-	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
@@ -338,13 +350,18 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
-		sp = td->td_sigstk.ss_sp +
-		    td->td_sigstk.ss_size - sizeof(struct sigframe);
+		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
-		sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128;
+		sp = (char *)regs->tf_rsp - 128;
+	if (xfpusave != NULL) {
+		sp -= xfpusave_len;
+		sp = (char *)((unsigned long)sp & ~0x3Ful);
+		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+	}
+	sp -= sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
 
@@ -377,7 +394,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
-	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+	    (xfpusave != NULL && copyout(xfpusave,
+	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
@@ -422,6 +442,8 @@ sigreturn(td, uap)
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
+	char *xfpustate;
+	size_t xfpustate_len;
 	long rflags;
 	int cs, error, ret;
 	ksiginfo_t ksi;
@@ -480,7 +502,28 @@ sigreturn(td, uap)
 		return (EINVAL);
 	}
 
-	ret = set_fpcontext(td, &ucp->uc_mcontext);
+	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+		if (xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu)) {
+			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+			    p->p_pid, td->td_name, xfpustate_len);
+			return (EINVAL);
+		}
+		xfpustate = __builtin_alloca(xfpustate_len);
+		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+		    xfpustate, xfpustate_len);
+		if (error != 0) {
+			uprintf(
+	"pid %d (%s): sigreturn copying xfpustate failed\n",
+			    p->p_pid, td->td_name);
+			return (error);
+		}
+	} else {
+		xfpustate = NULL;
+		xfpustate_len = 0;
+	}
+	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
 	if (ret != 0) {
 		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
 		    p->p_pid, td->td_name, ret);
@@ -1543,6 +1586,7 @@ hammer_time(u_int64_t modulep, u_int64_t
 	int gsel_tss, x;
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
+	struct xstate_hdr *xhdr;
 	u_int64_t msr;
 	char *env;
 	size_t kstack0_sz;
@@ -1552,7 +1596,6 @@ hammer_time(u_int64_t modulep, u_int64_t
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	bzero((void *)thread0.td_kstack, kstack0_sz);
 	physfree += kstack0_sz;
-	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
 	/*

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201206131525.q5DFPr3o032307>