Date: Tue, 21 May 2013 19:25:20 +0000 (UTC) From: John Baldwin <jhb@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r250880 - in stable/8/sys/amd64: amd64 include Message-ID: <201305211925.r4LJPK1q023718@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jhb Date: Tue May 21 19:25:19 2013 New Revision: 250880 URL: http://svnweb.freebsd.org/changeset/base/250880 Log: MFC 238450,250152-250153,250415: - Add support for the XSAVEOPT instruction use. - The check to ensure that xstate_bv always has XFEATURE_ENABLED_X87 and XFEATURE_ENABLED_SSE bits set is not needed. CPU correctly handles any bitmask which is subset of the enabled bits in %XCR0. - Partially saved extended state must be handled always, i.e. for both fpu-owned context, and for pcb-saved one. - Correct the type for the literal used on the left side of the shift up to 63 bit positions. Do not fill the save area and do not set the saved bit in the xstate bit vector for the state which is not marked as enabled in xsave_mask. Modified: stable/8/sys/amd64/amd64/cpu_switch.S stable/8/sys/amd64/amd64/fpu.c stable/8/sys/amd64/include/md_var.h stable/8/sys/amd64/include/specialreg.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/ (props changed) stable/8/sys/x86/ (props changed) Modified: stable/8/sys/amd64/amd64/cpu_switch.S ============================================================================== --- stable/8/sys/amd64/amd64/cpu_switch.S Tue May 21 19:23:49 2013 (r250879) +++ stable/8/sys/amd64/amd64/cpu_switch.S Tue May 21 19:25:19 2013 (r250880) @@ -122,6 +122,9 @@ done_store_dr: 1: movq %rdx,%rcx movl xsave_mask,%eax movl xsave_mask+4,%edx + .globl ctx_switch_xsave +ctx_switch_xsave: + /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ /* xsave (%r8) */ .byte 0x41,0x0f,0xae,0x20 movq %rcx,%rdx Modified: stable/8/sys/amd64/amd64/fpu.c ============================================================================== --- stable/8/sys/amd64/amd64/fpu.c Tue May 21 19:23:49 2013 (r250879) +++ stable/8/sys/amd64/amd64/fpu.c Tue May 21 19:25:19 2013 (r250880) @@ -154,6 +154,11 @@ int use_xsave; /* non-static for cpu_s uint64_t xsave_mask; /* the same */ static struct savefpu *fpu_initialstate; +struct xsave_area_elm_descr { + u_int offset; + u_int size; +} *xsave_area_desc; + void fpusave(void *addr) { @@ -200,6 +205,16 @@ fpuinit_bsp1(void) TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; + + cpuid_count(0xd, 0x1, cp); + if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { + /* + * Patch the XSAVE instruction in the cpu_switch code + * to XSAVEOPT. We assume that XSAVE encoding used + * REX byte, and set the bit 4 of the r/m byte. + */ + ctx_switch_xsave[3] |= 0x10; + } } /* @@ -270,6 +285,7 @@ static void fpuinitstate(void *arg __unused) { register_t saveintr; + int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); @@ -291,6 +307,28 @@ fpuinitstate(void *arg __unused) */ bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); + /* + * Create a table describing the layout of the CPU Extended + * Save Area. + */ + if (use_xsave) { + max_ext_n = flsl(xsave_mask); + xsave_area_desc = malloc(max_ext_n * sizeof(struct + xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); + /* x87 state */ + xsave_area_desc[0].offset = 0; + xsave_area_desc[0].size = 160; + /* XMM */ + xsave_area_desc[1].offset = 160; + xsave_area_desc[1].size = 288 - 160; + + for (i = 2; i < max_ext_n; i++) { + cpuid_count(0xd, i, cp); + xsave_area_desc[i].offset = cp[1]; + xsave_area_desc[i].size = cp[0]; + } + } + start_emulating(); intr_restore(saveintr); } @@ -578,8 +616,14 @@ fpudna(void) * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. */ - fpurestore(fpu_initialstate); + bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) @@ -614,6 +658,9 @@ int fpugetregs(struct thread *td) { struct pcb *pcb; + uint64_t *xstate_bv, bit; + char *sa; + int max_ext_n, i, owned; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { @@ -627,12 +674,31 @@ fpugetregs(struct thread *td) critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); - critical_exit(); - return (_MC_FPOWNED_FPU); + owned = _MC_FPOWNED_FPU; } else { - critical_exit(); - return (_MC_FPOWNED_PCB); + owned = _MC_FPOWNED_PCB; } + critical_exit(); + if (use_xsave) { + /* + * Handle partially saved state. + */ + sa = (char *)get_pcb_user_save_pcb(pcb); + xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + + offsetof(struct xstate_hdr, xstate_bv)); + max_ext_n = flsl(xsave_mask); + for (i = 0; i < max_ext_n; i++) { + bit = 1ULL << i; + if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) + continue; + bcopy((char *)fpu_initialstate + + xsave_area_desc[i].offset, + sa + xsave_area_desc[i].offset, + xsave_area_desc[i].size); + *xstate_bv |= bit; + } + } + return (owned); } void @@ -676,9 +742,6 @@ fpusetxstate(struct thread *td, char *xf */ if (bv & ~xsave_mask) return (EINVAL); - if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) != - (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) - return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); Modified: stable/8/sys/amd64/include/md_var.h ============================================================================== --- stable/8/sys/amd64/include/md_var.h Tue May 21 19:23:49 2013 (r250879) +++ stable/8/sys/amd64/include/md_var.h Tue May 21 19:25:19 2013 (r250880) @@ -57,6 +57,7 @@ extern u_int cpu_procinfo; extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern u_int cpu_vendor_id; +extern char ctx_switch_xsave[]; extern char kstack[]; extern char sigcode[]; extern int szsigcode; Modified: stable/8/sys/amd64/include/specialreg.h ============================================================================== --- stable/8/sys/amd64/include/specialreg.h Tue May 21 19:23:49 2013 (r250879) +++ stable/8/sys/amd64/include/specialreg.h Tue May 21 19:25:19 2013 (r250880) @@ -230,6 +230,11 @@ #define CPUID_TYPE_CORE 2 /* + * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1 + */ +#define CPUID_EXTSTATE_XSAVEOPT 0x00000001 + +/* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201305211925.r4LJPK1q023718>