Date: Wed, 10 Jan 2001 21:35:52 +0900 (JST) From: NAKAMURA Kazushi <kaz@kobe1995.net> To: FreeBSD-gnats-submit@freebsd.org Cc: kaz@ns.kobe1995.net Subject: kern/24219: Pentium3 SSE patch for 4.2R Message-ID: <200101101235.f0ACZq000695@beauty.kobe1995.net>
next in thread | raw e-mail | index | archive | help
>Number: 24219
>Category: kern
>Synopsis: <4.2R can't use Pentium3 SSE instructions>
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Wed Jan 10 04:40:01 PST 2001
>Closed-Date:
>Last-Modified:
>Originator: NAKAMURA Kazushi
>Release: FreeBSD 4.2-RELEASE i386
>Organization:
PCshop LABBIT
>Environment:
FreeBSD4.2R/Pentium3
>Description:
FreeBSD4.2R's gcc 2.95.2 & gas 2.10.0 can compile & assemble MMX2,SSE
instructions. But SSE instructions can't run on 4.2R's kernel. I don't
know about 5-current, but I want to use SSE instructions on 4-stable.
Please merge this patch.
This patch is origined from titech.ac.jp(sorry I forget the URL).
Caution: This patch has NOT tested on mendocino celeron.
>How-To-Repeat:
Any code that uses SSE instructions(XMM registers).
>Fix:
Apply this patch, and append "option CPU_ENABLE_SSE" to kernel
configuration file. Then recompile the kernel.
diff -ruN -9 sys/conf/options.i386 sys+/conf/options.i386
--- sys/conf/options.i386 Sat Sep 30 11:49:30 2000
+++ sys+/conf/options.i386 Tue Jan 9 13:44:18 2001
@@ -52,18 +52,19 @@
CPU_LOOP_EN opt_cpu.h
CPU_PPRO2CELERON opt_cpu.h
CPU_RSTK_EN opt_cpu.h
CPU_SUSP_HLT opt_cpu.h
CPU_UPGRADE_HW_CACHE opt_cpu.h
CPU_WT_ALLOC opt_cpu.h
CYRIX_CACHE_WORKS opt_cpu.h
CYRIX_CACHE_REALLY_WORKS opt_cpu.h
NO_MEMORY_HOLE opt_cpu.h
+CPU_ENABLE_SSE opt_cpu.h
# The CPU type affects the endian conversion functions all over the kernel.
I386_CPU opt_global.h
I486_CPU opt_global.h
I586_CPU opt_global.h
I686_CPU opt_global.h
MAXCONS opt_syscons.h
SC_ALT_MOUSE_IMAGE opt_syscons.h
diff -ruN -9 sys/i386/conf/LINT sys+/i386/conf/LINT
--- sys/i386/conf/LINT Sat Nov 18 18:22:07 2000
+++ sys+/i386/conf/LINT Tue Jan 9 13:45:36 2001
@@ -158,18 +158,20 @@
#
# CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
# of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
# Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3)
#
# CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
+#
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
#
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
# for i386 machines.
#
# CPU_IORT defines I/O clock delay time (NOTE 1). Default values of
# I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
# (no clock delay).
#
diff -ruN -9 sys/i386/i386/genassym.c sys+/i386/i386/genassym.c
--- sys/i386/i386/genassym.c Tue May 16 15:58:06 2000
+++ sys+/i386/i386/genassym.c Tue Jan 9 13:49:18 2001
@@ -126,20 +126,21 @@
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
#ifdef SMP
ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
#endif
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
+ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
#ifdef SMP
ASSYM(PCB_SIZE, sizeof(struct pcb));
#endif
ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
diff -ruN -9 sys/i386/i386/initcpu.c sys+/i386/i386/initcpu.c
--- sys/i386/i386/initcpu.c Sun Oct 15 12:09:32 2000
+++ sys+/i386/i386/initcpu.c Tue Jan 9 14:04:38 2001
@@ -28,18 +28,19 @@
*
* $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.19.2.2 2000/10/15 03:09:32 nyan Exp $
*/
#include "opt_cpu.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
void initializecpu(void);
#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
void enable_K5_wt_alloc(void);
void enable_K6_wt_alloc(void);
@@ -55,19 +56,27 @@
static void init_i486_on_386(void);
#endif
static void init_6x86(void);
#endif /* I486_CPU */
#ifdef I686_CPU
static void init_6x86MX(void);
static void init_ppro(void);
static void init_mendocino(void);
+#if defined(CPU_ENABLE_SSE)
+void init_sse(void);
#endif
+#endif /* I686_CPU */
+
+int hw_instruction_sse = 0;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+ &hw_instruction_sse, 0,
+ "SSE/MMX2 instructions available in CPU");
#ifdef I486_CPU
/*
* IBM Blue Lightning
*/
static void
init_bluelightning(void)
{
u_long eflags;
@@ -494,19 +503,28 @@
bbl_cr_ctl3 |= 5 << 1;
#endif
wrmsr(0x11e, bbl_cr_ctl3);
}
load_cr0(rcr0() & ~(CR0_CD | CR0_NW));
write_eflags(eflags);
#endif /* CPU_PPRO2CELERON */
}
-
+#if defined(CPU_ENABLE_SSE)
+void
+init_sse(void)
+{
+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cpu_fxsr = hw_instruction_sse = 1;
+ }
+}
+#endif
#endif /* I686_CPU */
void
initializecpu(void)
{
switch (cpu) {
#ifdef I486_CPU
case CPU_BLUE:
@@ -538,18 +556,21 @@
if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
switch (cpu_id & 0xff0) {
case 0x610:
init_ppro();
break;
case 0x660:
init_mendocino();
break;
}
+#if defined(CPU_ENABLE_SSE)
+ init_sse();
+#endif
}
break;
#endif
default:
break;
}
#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
/*
diff -ruN -9 sys/i386/i386/locore.s sys+/i386/i386/locore.s
--- sys/i386/i386/locore.s Fri Jul 7 09:38:46 2000
+++ sys+/i386/i386/locore.s Tue Jan 9 14:07:36 2001
@@ -90,24 +90,25 @@
ALIGN_DATA /* just to be sure */
.globl HIDENAME(tmpstk)
.space 0x2000 /* space for tmpstk - temporary stack */
HIDENAME(tmpstk):
.globl _boothowto,_bootdev
.globl _cpu,_cpu_vendor,_cpu_id,_bootinfo
- .globl _cpu_high, _cpu_feature
+ .globl _cpu_high, _cpu_feature, _cpu_fxsr
_cpu: .long 0 /* are we 386, 386sx, or 486 */
_cpu_id: .long 0 /* stepping ID */
_cpu_high: .long 0 /* highest arg to CPUID */
_cpu_feature: .long 0 /* features */
+_cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
_cpu_vendor: .space 20 /* CPU origin code */
_bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
_KERNend: .long 0 /* phys addr end of kernel (just after bss) */
physfree: .long 0 /* phys addr of next free page */
#ifdef SMP
.globl _cpu0prvpage
cpu0pp: .long 0 /* phys addr cpu0 private pg */
diff -ruN -9 sys/i386/i386/machdep.c sys+/i386/i386/machdep.c
--- sys/i386/i386/machdep.c Fri Oct 27 18:07:22 2000
+++ sys+/i386/i386/machdep.c Tue Jan 9 14:21:51 2001
@@ -119,18 +119,22 @@
extern void dblfault_handler __P((void));
extern void printcpuinfo(void); /* XXX header file */
extern void earlysetcpuclass(void); /* same header file */
extern void finishidentcpu(void);
extern void panicifcpuunsupported(void);
extern void initializecpu(void);
static void cpu_startup __P((void *));
+#ifdef CPU_ENABLE_SSE
+static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
+static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
+#endif /* CPU_ENABLE_SSE */
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
int _udatasel, _ucodesel;
u_int atdevbase;
#if defined(SWTCH_OPTIM_STATS)
extern int swtch_optim_stats;
@@ -2076,20 +2080,20 @@
tp = p->p_md.md_regs;
frame_copy = *tp;
*(int *)((char *)&frame_copy + (off - min)) = data;
if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
!CS_SECURE(frame_copy.tf_cs))
return (EINVAL);
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
- min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
- if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
+ min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
+ if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
return (EFAULT);
}
int
fill_regs(p, regs)
struct proc *p;
@@ -2145,33 +2149,101 @@
tp->tf_cs = regs->r_cs;
tp->tf_eflags = regs->r_eflags;
tp->tf_esp = regs->r_esp;
tp->tf_ss = regs->r_ss;
pcb = &p->p_addr->u_pcb;
pcb->pcb_gs = regs->r_gs;
return (0);
}
+#ifdef CPU_ENABLE_SSE
+static void
+fill_fpregs_xmm(sv_xmm, sv_87)
+ struct savexmm *sv_xmm;
+ struct save87 *sv_87;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_87->en_cw = penv_xmm->en_cw;
+ penv_87->en_sw = penv_xmm->en_sw;
+ penv_87->en_tw = penv_xmm->en_tw;
+ penv_87->en_fip = penv_xmm->en_fip;
+ penv_87->en_fcs = penv_xmm->en_fcs;
+ penv_87->en_opcode = penv_xmm->en_opcode;
+ penv_87->en_foo = penv_xmm->en_foo;
+ penv_87->en_fos = penv_xmm->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+
+ sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
+}
+
+static void
+set_fpregs_xmm(sv_87, sv_xmm)
+ struct save87 *sv_87;
+ struct savexmm *sv_xmm;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+/* FPU control/status */
+ penv_xmm->en_cw = penv_87->en_cw;
+ penv_xmm->en_sw = penv_87->en_sw;
+ penv_xmm->en_tw = penv_87->en_tw;
+ penv_xmm->en_fip = penv_87->en_fip;
+ penv_xmm->en_fcs = penv_87->en_fcs;
+ penv_xmm->en_opcode = penv_87->en_opcode;
+ penv_xmm->en_foo = penv_87->en_foo;
+ penv_xmm->en_fos = penv_87->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+
+ sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
+}
+#endif /* CPU_ENABLE_SSE */
+
int
fill_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
+ (struct save87 *)fpregs);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
int
set_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ set_fpregs_xmm((struct save87 *)fpregs,
+ &p->p_addr->u_pcb.pcb_save.sv_xmm);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
return (0);
}
int
fill_dbregs(p, dbregs)
struct proc *p;
struct dbreg *dbregs;
{
struct pcb *pcb;
diff -ruN -9 sys/i386/i386/mp_machdep.c sys+/i386/i386/mp_machdep.c
--- sys/i386/i386/mp_machdep.c Sat Sep 30 11:49:32 2000
+++ sys+/i386/i386/mp_machdep.c Tue Jan 9 14:26:16 2001
@@ -229,18 +229,22 @@
#define MP_ENABLE_POST 0x14
#define MPTABLE_PASS2_POST 0x15
#define START_ALL_APS_POST 0x16
#define INSTALL_AP_TRAMP_POST 0x17
#define START_AP_POST 0x18
#define MP_ANNOUNCE_POST 0x19
+#if defined(CPU_ENABLE_SSE)
+extern void init_sse(void);
+#endif
+
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
/** XXX FIXME: what system files declare these??? */
extern struct region_descriptor r_gdt, r_idt;
int bsp_apic_ready = 0; /* flags useability of BSP apic */
int mp_ncpus; /* # of CPUs, including BSP */
@@ -2392,18 +2396,23 @@
other_cpus = all_cpus & ~(1 << cpuid);
printf("SMP: AP CPU #%d Launched!\n", cpuid);
/* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+
+ /* Setup the SSE */
+#if defined(CPU_ENABLE_SSE)
+ init_sse();
+#endif
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (cpuid != apic_id) {
printf("SMP: cpuid = %d\n", cpuid);
printf("SMP: apic_id = %d\n", apic_id);
printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
panic("cpuid mismatch! boom!!");
}
diff -ruN -9 sys/i386/i386/support.s sys+/i386/i386/support.s
--- sys/i386/i386/support.s Sat Sep 30 11:49:33 2000
+++ sys+/i386/i386/support.s Tue Jan 9 14:29:32 2001
@@ -948,19 +948,19 @@
src in %esi
dst in %edi
len in %ecx XXX changed to on stack for profiling
uses %eax and %edx for tmp. storage
*/
/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVEFPU_SIZE+3*4,%esp
+ subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
jbe fastmove_tail
testl $7,%esi /* check if src addr is multiple of 8 */
jnz fastmove_tail
testl $7,%edi /* check if dst addr is multiple of 8 */
@@ -987,19 +987,19 @@
*/
/* tmp = curpcb->pcb_savefpu; */
movl %ecx,-12(%ebp)
movl %esi,-8(%ebp)
movl %edi,-4(%ebp)
movl %esp,%edi
movl _curpcb,%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
movl -8(%ebp),%esi
movl -4(%ebp),%edi
/* stop_emulating(); */
clts
/* npxproc = curproc; */
movl _curproc,%eax
@@ -1064,19 +1064,19 @@
/* curpcb->pcb_savefpu = tmp; */
movl %ecx,-12(%ebp)
movl %esi,-8(%ebp)
movl %edi,-4(%ebp)
movl _curpcb,%edi
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
movl -8(%ebp),%esi
movl -4(%ebp),%edi
/* start_emulating(); */
smsw %ax
orb $CR0_TS,%al
@@ -1103,19 +1103,19 @@
popl %ebp
ret
ALIGN_TEXT
fastmove_fault:
movl _curpcb,%edi
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
smsw %ax
orb $CR0_TS,%al
lmsw %ax
movl $0,_npxproc
fastmove_tail_fault:
diff -ruN -9 sys/i386/i386/vm_machdep.c sys+/i386/i386/vm_machdep.c
--- sys/i386/i386/vm_machdep.c Sat Aug 26 13:19:26 2000
+++ sys+/i386/i386/vm_machdep.c Tue Jan 9 14:30:56 2001
@@ -135,19 +135,19 @@
}
}
#endif
return;
}
#if NNPX > 0
/* Ensure that p1's pcb is up to date. */
if (npxproc == p1)
- npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
+ npxsave(&p1->p_addr->u_pcb.pcb_save);
#endif
/* Copy p1's pcb. */
p2->p_addr->u_pcb = p1->p_addr->u_pcb;
pcb2 = &p2->p_addr->u_pcb;
/*
* Create a new fresh stack for the new process.
* Copy the trap frame for the return to user mode as if from a
diff -ruN -9 sys/i386/include/asnames.h sys+/i386/include/asnames.h
--- sys/i386/include/asnames.h Tue May 16 15:58:10 2000
+++ sys+/i386/include/asnames.h Tue Jan 9 14:32:12 2001
@@ -185,18 +185,19 @@
#define _copyin_vector copyin_vector
#define _copyout_vector copyout_vector
#define _cpl cpl
#define _cpl_lock cpl_lock
#define _cpu cpu
#define _cpu0prvpage cpu0prvpage
#define _cpu_apic_versions cpu_apic_versions
#define _cpu_class cpu_class
#define _cpu_feature cpu_feature
+#define _cpu_fxsr cpu_fxsr
#define _cpu_high cpu_high
#define _cpu_id cpu_id
#define _cpu_num_to_apic_id cpu_num_to_apic_id
#define _cpu_switch cpu_switch
#define _cpu_vendor cpu_vendor
#define _default_halt default_halt
#define _denormal_operand denormal_operand
#define _div_small div_small
#define _divide_by_zero divide_by_zero
diff -ruN -9 sys/i386/include/md_var.h sys+/i386/include/md_var.h
--- sys/i386/include/md_var.h Mon Feb 21 05:51:23 2000
+++ sys+/i386/include/md_var.h Tue Jan 9 14:33:24 2001
@@ -41,18 +41,19 @@
extern void (*bcopy_vector) __P((const void *from, void *to, size_t len));
extern int busdma_swi_pending;
extern int (*copyin_vector) __P((const void *udaddr, void *kaddr,
size_t len));
extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
size_t len));
extern u_int cpu_feature;
extern u_int cpu_high;
extern u_int cpu_id;
+extern u_int cpu_fxsr;
extern char cpu_vendor[];
extern u_int cyrix_did;
extern char kstack[];
#ifdef PC98
extern int need_pre_dma_flush;
extern int need_post_dma_flush;
#endif
extern void (*netisrs[32]) __P((void));
extern int nfs_diskless_valid;
diff -ruN -9 sys/i386/include/npx.h sys+/i386/include/npx.h
--- sys/i386/include/npx.h Sat Mar 11 02:56:33 2000
+++ sys+/i386/include/npx.h Tue Jan 9 14:40:14 2001
@@ -81,18 +81,54 @@
* struct and arrange to store into this struct (ending here)
* before it is inspected for ptracing or for core dumps. Some
* emulators overwrite the whole struct. We have no good way of
* knowing how much padding to leave. Leave just enough for the
* GPL emulator's i387_union (176 bytes total).
*/
u_char sv_pad[64]; /* padding; used by emulators */
};
+struct envxmm {
+ u_int16_t en_cw; /* control word (16bits) */
+ u_int16_t en_sw; /* status word (16bits) */
+ u_int16_t en_tw; /* tag word (16bits) */
+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */
+ u_int32_t en_fip; /* floating point instruction pointer */
+ u_int16_t en_fcs; /* floating code segment selector */
+ u_int16_t en_pad0; /* padding */
+ u_int32_t en_foo; /* floating operand offset */
+ u_int16_t en_fos; /* floating operand segment selector */
+ u_int16_t en_pad1; /* padding */
+ u_int32_t en_mxcsr; /* SSE sontorol/status register */
+ u_int32_t en_pad2; /* padding */
+};
+
+/* Contents of each SSE extended accumulator */
+struct xmmacc {
+ u_char xmm_bytes[16];
+};
+
+struct savexmm {
+ struct envxmm sv_env;
+ struct {
+ struct fpacc87 fp_acc;
+ u_char fp_pad[6]; /* padding */
+ } sv_fp[8];
+ struct xmmacc sv_xmm[8];
+ u_long sv_ex_sw; /* status word for last exception */
+ u_char sv_pad[220];
+};
+
+union savefpu {
+ struct save87 sv_87;
+ struct savexmm sv_xmm;
+};
+
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
*
* round to nearest
* 64-bit precision
* all exceptions masked.
*
* We modify the affine mode bit and precision bits in this to give:
@@ -108,13 +144,13 @@
#ifdef _KERNEL
#ifndef npxproc
extern struct proc *npxproc;
#endif
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
-void npxsave __P((struct save87 *addr));
+void npxsave __P((union savefpu *addr));
#endif
#endif /* !_MACHINE_NPX_H_ */
diff -ruN -9 sys/i386/include/pcb.h sys+/i386/include/pcb.h
--- sys/i386/include/pcb.h Wed Dec 29 13:33:03 1999
+++ sys+/i386/include/pcb.h Tue Jan 9 14:43:07 2001
@@ -61,19 +61,20 @@
int pcb_dr3;
int pcb_dr6;
int pcb_dr7;
#ifdef USER_LDT
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
#else
struct pcb_ldt *pcb_ldt_dontuse;
#endif
- struct save87 pcb_savefpu; /* floating point state for 287/387 */
+ union savefpu pcb_save;
+#define pcb_savefpu pcb_save.sv_87
u_char pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
caddr_t pcb_onfault; /* copyin/out fault recovery */
#ifdef SMP
u_long pcb_mpnest;
#else
u_long pcb_mpnest_dontuse;
#endif
diff -ruN -9 sys/i386/include/specialreg.h sys+/i386/include/specialreg.h
--- sys/i386/include/specialreg.h Sat Sep 11 00:51:44 1999
+++ sys+/i386/include/specialreg.h Tue Jan 9 14:44:25 2001
@@ -87,18 +87,20 @@
#define CPUID_MCE 0x0080
#define CPUID_CX8 0x0100
#define CPUID_APIC 0x0200
#define CPUID_B10 0x0400
#define CPUID_B11 0x0800
#define CPUID_MTRR 0x1000
#define CPUID_PGE 0x2000
#define CPUID_MCA 0x4000
#define CPUID_CMOV 0x8000
+#define CPUID_FXSR 0x01000000
+#define CPUID_XMM 0x02000000
/*
* Model-specific registers for the i386 family
*/
#define MSR_P5_MC_ADDR 0x000
#define MSR_P5_MC_TYPE 0x001
#define MSR_TSC 0x010
#define MSR_APICBASE 0x01b
#define MSR_EBL_CR_POWERON 0x02a
diff -ruN -9 sys/i386/isa/npx.c sys+/i386/isa/npx.c
--- sys/i386/isa/npx.c Sun Jan 30 01:17:36 2000
+++ sys+/i386/isa/npx.c Tue Jan 9 15:06:48 2001
@@ -29,18 +29,19 @@
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)npx.c 7.2 (Berkeley) 5/12/91
* $FreeBSD: src/sys/i386/isa/npx.c,v 1.80 2000/01/29 16:17:36 peter Exp $
*/
+#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -90,45 +91,60 @@
#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
#define fnclex() __asm("fnclex")
#define fninit() __asm("fninit")
#define fnop() __asm("fnop")
#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
#else /* not __GNUC__ */
void fldcw __P((caddr_t addr));
void fnclex __P((void));
void fninit __P((void));
void fnop __P((void));
void fnsave __P((caddr_t addr));
void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
+void fxsave __P((caddr_t addr));
+void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
+#ifdef CPU_ENABLE_SSE
+#define FPU_STATUS_EX(pcb) \
+ (cpu_fxsr ? \
+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
+ &(pcb)->pcb_save.sv_87.sv_ex_sw)
+#else /* CPU_ENABLE_SSE */
+#define FPU_STATUS_EX(pcb) (&(pcb)->pcb_save.sv_87.sv_ex_sw)
+#endif /* CPU_ENABLE_SSE */
+
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
void npx_intr __P((void *));
static void npx_identify __P((driver_t *driver, device_t parent));
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
+static void fpusave __P((union savefpu *));
+static void fpurstor __P((union savefpu *));
#ifdef I586_CPU
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
#endif /* I586_CPU */
int hw_float; /* XXX currently just alias for npx_exists */
SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint,
CTLFLAG_RD, &hw_float, 0,
@@ -468,45 +484,45 @@
}
/*
* Initialize floating point unit.
*/
void
npxinit(control)
u_short control;
{
- struct save87 dummy;
+ union savefpu dummy;
if (!npx_exists)
return;
/*
* fninit has the same h/w bugs as fnsave. Use the detoxified
* fnsave to throw away any junk in the fpu. npxsave() initializes
* the fpu and sets npxproc = NULL as important side effects.
*/
npxsave(&dummy);
stop_emulating();
fldcw(&control);
if (curpcb != NULL)
- fnsave(&curpcb->pcb_savefpu);
+ fpusave(&curpcb->pcb_save);
start_emulating();
}
/*
* Free coprocessor (if we have it).
*/
void
npxexit(p)
struct proc *p;
{
if (p == npxproc)
- npxsave(&curpcb->pcb_savefpu);
+ npxsave(&curpcb->pcb_save);
#ifdef NPX_DEBUG
if (npx_exists) {
u_int masked_exceptions;
masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw
& curpcb->pcb_savefpu.sv_env.en_sw & 0x7f;
/*
* Log exceptions that would have trapped with the old
* control word (overflow, divide by 0, and invalid operand).
@@ -708,32 +724,35 @@
* solution for signals other than SIGFPE.
*/
void
npx_intr(dummy)
void *dummy;
{
int code;
u_short control;
struct intrframe *frame;
+ u_long *pstatus;
if (npxproc == NULL || !npx_exists) {
printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
npxproc, curproc, npx_exists);
panic("npxintr from nowhere");
}
if (npxproc != curproc) {
printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
npxproc, curproc, npx_exists);
panic("npxintr from non-current process");
}
+ pstatus = FPU_STATUS_EX(curpcb);
+
outb(0xf0, 0);
- fnstsw(&curpcb->pcb_savefpu.sv_ex_sw);
+ fnstsw(pstatus);
fnstcw(&control);
fnclex();
/*
* Pass exception to process.
*/
frame = (struct intrframe *)&dummy; /* XXX */
if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) {
/*
@@ -747,20 +766,19 @@
* in doreti, and the frame for that could easily be set up
* just before it is used).
*/
curproc->p_md.md_regs = INTR_TO_TRAPFRAME(frame);
/*
* Encode the appropriate code for detailed information on
* this exception.
*/
code =
- fpetable[(curpcb->pcb_savefpu.sv_ex_sw & ~control & 0x3f) |
- (curpcb->pcb_savefpu.sv_ex_sw & 0x40)];
+ fpetable[(*pstatus & ~control & 0x3f) | (*pstatus & 0x40)];
trapsignal(curproc, SIGFPE, code);
} else {
/*
* Nested interrupt. These losers occur when:
* o an IRQ13 is bogusly generated at a bogus time, e.g.:
* o immediately after an fnsave or frstor of an
* error state.
* o a couple of 386 instructions after
* "fstpl _memvar" causes a stack overflow.
@@ -779,69 +797,73 @@
* Implement device not available (DNA) exception
*
* It would be better to switch FP context here (if curproc != npxproc)
* and not necessarily for every context switch, but it is too hard to
* access foreign pcb's.
*/
int
npxdna()
{
+ u_long *pstatus;
+
if (!npx_exists)
return (0);
if (npxproc != NULL) {
printf("npxdna: npxproc = %p, curproc = %p\n",
npxproc, curproc);
panic("npxdna");
}
stop_emulating();
/*
* Record new context early in case frstor causes an IRQ13.
*/
npxproc = curproc;
- curpcb->pcb_savefpu.sv_ex_sw = 0;
+
+ pstatus = FPU_STATUS_EX(curpcb);
+ *pstatus = 0;
+
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
* triggered by the current (npx) user instruction even when that
* instruction is a no-wait instruction that should not trigger an
* error (e.g., fnclex). On at least one 486 system all of the
* no-wait instructions are broken the same as frstor, so our
* treatment does not amplify the breakage. On at least one
* 386/Cyrix 387 system, fnclex works correctly while frstor and
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
- frstor(&curpcb->pcb_savefpu);
+ fpurstor(&curpcb->pcb_save);
return (1);
}
/*
* Wrapper for fnsave instruction to handle h/w bugs. If there is an error
* pending, then fnsave generates a bogus IRQ13 on some systems. Force
* any IRQ13 to be handled immediately, and then ignore it. This routine is
* often called at splhigh so it must not use many system services. In
* particular, it's much easier to install a special handler than to
* guarantee that it's safe to use npxintr() and its supporting code.
*/
void
npxsave(addr)
- struct save87 *addr;
+ union savefpu *addr;
{
-#ifdef SMP
-
+#if defined(SMP) || defined(CPU_ENABLE_SSE)
stop_emulating();
- fnsave(addr);
+ fpusave(addr);
/* fnop(); */
start_emulating();
npxproc = NULL;
-#else /* SMP */
+#else /* SMP or CPU_ENABLE_SSE */
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
@@ -862,18 +884,50 @@
outb(IO_ICU1 + 1,
(icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask));
outb(IO_ICU2 + 1,
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
enable_intr(); /* back to usual state */
#endif /* SMP */
+}
+
+static void
+fpusave(addr)
+ union savefpu *addr;
+{
+#ifdef CPU_ENABLE_SSE
+ static struct savexmm svxmm __attribute__((aligned(16)));
+
+ if (cpu_fxsr) {
+ fxsave(&svxmm);
+ bcopy(&svxmm, addr, sizeof(struct savexmm));
+ return;
+ }
+#endif /* CPU_ENABLE_SSE */
+ fnsave(addr);
+}
+
+static void
+fpurstor(addr)
+ union savefpu *addr;
+{
+#ifdef CPU_ENABLE_SSE
+ static struct savexmm svxmm __attribute__((aligned(16)));
+
+ if (cpu_fxsr) {
+ bcopy(addr, &svxmm, sizeof (struct savexmm));
+ fxrstor(&svxmm);
+ return;
+ }
+#endif /* CPU_ENABLE_SSE */
+ frstor(addr);
}
#ifdef I586_CPU
static long
timezero(funcname, func)
const char *funcname;
void (*func) __P((void *buf, size_t len));
{
>Release-Note:
>Audit-Trail:
>Unformatted:
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200101101235.f0ACZq000695>
