Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 13 Apr 2003 19:57:03 -0700 (PDT)
From:      Marcel Moolenaar <marcel@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 28905 for review
Message-ID:  <200304140257.h3E2v3v7057436@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=28905

Change 28905 by marcel@marcel_nfs on 2003/04/13 19:56:37

	Implement lazy context switching for the high FP registers.
	This does not use any synchronization and/or locking yet.
	Also, we currently disable the high FP registers whenever
	we enter and leave the kernel. We may want to leave the high
	FP registers enabled if we leave the kernel and the CPU
	holds the high FP registers of the process we're returning
	to. For now we just let it trap. I expect to have to
	revisit this...

Affected files ...

.. //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 edit
.. //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 edit
.. //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 edit

Differences ...

==== //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 (text+ko) ====

@@ -327,7 +327,7 @@
 	;;
 }
 {	.mlx
-	ssm		psr.ic
+	ssm		psr.ic|psr.dfh
 	movl		gp=__gp
 	;;
 }
@@ -506,15 +506,20 @@
 (p14)	br.cond.sptk	1f
 	;;
 }
-
+{	.mii
 	// Switch register stack
 	alloc		r31=ar.pfs,0,0,0,0	// discard current frame
 	shl		r30=r25,16		// value for ar.rsc
+	dep		r24=-1,r24,19,1		// XXX disable high FP.
 	;;
+}
+{	.mmi
 	mov		ar.rsc=r30		// setup for loadrs
 	;;
 	loadrs					// load user regs
+	nop		0
 	;;
+}
 	mov		r31=ar.bspstore
 	;;
 	mov		ar.bspstore=r20

==== //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 (text+ko) ====

@@ -152,6 +152,9 @@
 	} else if (vector == ipi_vector[IPI_AST]) {
 		asts[PCPU_GET(cpuid)]++;
 		CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
+	} else if (vector == ipi_vector[IPI_HIGH_FP]) {
+		if (PCPU_GET(fpcurthread) != NULL)
+			ia64_highfp_save(PCPU_GET(fpcurthread));
 	} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
 		rdvs[PCPU_GET(cpuid)]++;
 		CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));

==== //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 (text+ko) ====

@@ -74,6 +74,9 @@
 #include <machine/mca.h>
 #include <machine/pal.h>
 #include <machine/sal.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
 #include <machine/bootinfo.h>
 #include <machine/mutex.h>
 #include <machine/vmparam.h>
@@ -723,9 +726,6 @@
 	ia64_set_cflg((CR0_PE | CR0_PG)
 		      | ((long)(CR4_XMM | CR4_FXSR) << 32));
 
-	/* We pretend to own FP state so that ia64_fpstate_check() works */
-	PCPU_SET(fpcurthread, &thread0);
-
 	/*
 	 * Initialize the rest of proc 0's PCB.
 	 *
@@ -974,6 +974,9 @@
 	    printf("sigreturn: pid %d, scp %p\n", p->p_pid, uap->sigcntxp);
 #endif
 
+	/* Throw away the high FP registers. */
+	ia64_highfp_drop(td);
+
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 * We don't use a normal argument to simplify RSE handling.
@@ -990,6 +993,7 @@
 		frame->tf_scratch = mc->mc_scratch;
 		frame->tf_scratch_fp = mc->mc_scratch_fp;
 	}
+
 	/*
 	 * XXX preserved registers.
 	 * XXX High FP.
@@ -1008,9 +1012,6 @@
 	signotify(td);
 	PROC_UNLOCK(p);
 
-	/* XXX ksc.sc_ownedfp ? */
-	ia64_fpstate_drop(td);
-
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sigreturn(%d): returns\n", p->p_pid);
@@ -1092,9 +1093,6 @@
 	suword((caddr_t)frame->tf_special.bspstore - 24, stack);
 	suword((caddr_t)frame->tf_special.bspstore - 16, ps_strings);
 	suword((caddr_t)frame->tf_special.bspstore -  8, 0);
-
-	td->td_md.md_flags &= ~MDP_FPUSED;
-	ia64_fpstate_drop(td);
 }
 
 int
@@ -1181,10 +1179,11 @@
 	struct trapframe *frame = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
+	/* Save the high FP registers. */
+	ia64_highfp_save(td);
+
 	fpregs->fpr_scratch = frame->tf_scratch_fp;
-	/* XXX preserved */
-
-	ia64_fpstate_save(td, 0);
+	/* XXX preserved_fp */
 	fpregs->fpr_high = pcb->pcb_high_fp;
 	return (0);
 }
@@ -1195,14 +1194,87 @@
 	struct trapframe *frame = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
+	/* Throw away the high FP registers (should be redundant). */
+	ia64_highfp_drop(td);
+
 	frame->tf_scratch_fp = fpregs->fpr_scratch;
-	/* XXX preserved */
-
-	ia64_fpstate_drop(td);
+	/* XXX preserved_fp */
 	pcb->pcb_high_fp = fpregs->fpr_high;
 	return (0);
 }
 
+/*
+ * High FP register functions.
+ * XXX no synchronization yet.
+ */
+
+int
+ia64_highfp_drop(struct thread *td)
+{
+	struct pcb *pcb;
+	struct pcpu *cpu;
+	struct thread *thr;
+
+	pcb = td->td_pcb;
+	cpu = pcb->pcb_fpcpu;
+	if (cpu == NULL)
+		return (0);
+	pcb->pcb_fpcpu = NULL;
+	thr = cpu->pc_fpcurthread;
+	cpu->pc_fpcurthread = NULL;
+
+	/* Post-mortem sanity checking. */
+	KASSERT(thr == td, ("Inconsistent high FP state"));
+	return (1);
+}
+
+int
+ia64_highfp_load(struct thread *td)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+	KASSERT(pcb->pcb_fpcpu == NULL, ("FP race on thread"));
+	KASSERT(PCPU_GET(fpcurthread) == NULL, ("FP race on pcpu"));
+	restore_high_fp(&pcb->pcb_high_fp);
+	PCPU_SET(fpcurthread, td);
+	pcb->pcb_fpcpu = pcpup;
+	return (1);
+}
+
+int
+ia64_highfp_save(struct thread *td)
+{
+	struct pcb *pcb;
+	struct pcpu *cpu;
+	struct thread *thr;
+
+	/* Don't save if the high FP registers weren't modified. */
+	if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0)
+		return (ia64_highfp_drop(td));
+
+	pcb = td->td_pcb;
+	cpu = pcb->pcb_fpcpu;
+	if (cpu == NULL)
+		return (0);
+#ifdef SMP
+	if (cpu != pcpup) {
+		ipi_send(cpu->pc_lid, IPI_HIGH_FP);
+		while (pcb->pcb_fpcpu != cpu)
+			DELAY(100);
+		return (1);
+	}
+#endif
+	save_high_fp(&pcb->pcb_high_fp);
+	pcb->pcb_fpcpu = NULL;
+	thr = cpu->pc_fpcurthread;
+	cpu->pc_fpcurthread = NULL;
+
+	/* Post-mortem sanity cxhecking. */
+	KASSERT(thr == td, ("Inconsistent high FP state"));
+	return (1);
+}
+
 #ifndef DDB
 void
 Debugger(const char *msg)
@@ -1231,72 +1303,6 @@
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
-void
-ia64_fpstate_check(struct thread *td)
-{
-	if ((td->td_frame->tf_special.psr & IA64_PSR_DFH) == 0)
-		if (td != PCPU_GET(fpcurthread))
-			panic("ia64_fpstate_check: bogus");
-}
-
-/*
- * Save the high floating point state in the pcb. Use this to get
- * read-only access to the floating point state. If write is true, the
- * current fp process is cleared so that fp state can safely be
- * modified. The process will automatically reload the changed state
- * by generating a disabled fp trap.
- */
-void
-ia64_fpstate_save(struct thread *td, int write)
-{
-	if (td == PCPU_GET(fpcurthread)) {
-		_get_high_fp(&td->td_pcb->pcb_high_fp);
-
-		if (write) {
-			td->td_frame->tf_special.psr |= IA64_PSR_DFH;
-			PCPU_SET(fpcurthread, NULL);
-		}
-	}
-}
-
-/*
- * Relinquish ownership of the FP state. This is called instead of
- * ia64_save_fpstate() if the entire FP state is being changed
- * (e.g. on sigreturn).
- */
-void
-ia64_fpstate_drop(struct thread *td)
-{
-	if (td == PCPU_GET(fpcurthread)) {
-		td->td_frame->tf_special.psr |= IA64_PSR_DFH;
-		PCPU_SET(fpcurthread, NULL);
-	}
-}
-
-/*
- * Switch the current owner of the fp state to p, reloading the state
- * from the pcb.
- */
-void
-ia64_fpstate_switch(struct thread *td)
-{
-	struct thread *fptd;
-
-	fptd = PCPU_GET(fpcurthread);
-	if (fptd != NULL) {
-		/* Dump the old fp state if its valid. */
-		_get_high_fp(&fptd->td_pcb->pcb_high_fp);
-		fptd->td_frame->tf_special.psr |= IA64_PSR_DFH;
-	}
-
-	/* Remember the new FP owner and reload its state. */
-	PCPU_SET(fpcurthread, td);
-	_set_high_fp(&td->td_pcb->pcb_high_fp);
-	td->td_frame->tf_special.psr &= ~IA64_PSR_DFH;
-
-	td->td_md.md_flags |= MDP_FPUSED;
-}
-
 /*
  * Utility functions for manipulating instruction bundles.
  */

==== //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 (text+ko) ====

@@ -75,7 +75,6 @@
 volatile int ap_awake;
 volatile int ap_spin;
 
-static void ipi_send(u_int64_t, int);
 static void cpu_mp_unleash(void *);
 
 void
@@ -346,7 +345,7 @@
  * cr.lid (CR64) contents of the target processor. Only the id and eid
  * fields are used here.
  */
-static void
+void
 ipi_send(u_int64_t lid, int ipi)
 {
 	volatile u_int64_t *pipi;

==== //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 (text+ko) ====

@@ -62,6 +62,9 @@
 #include <machine/pal.h>
 #include <machine/fpu.h>
 #include <machine/efi.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
 
 #ifdef KTRACE
 #include <sys/uio.h>
@@ -513,19 +516,81 @@
 		}
 	}
 
-	case IA64_VEC_DISABLED_FP:
+	case IA64_VEC_DISABLED_FP: {	/* High FP registers are disabled. */
+		struct pcpu *pcpu;
+		struct pcb *pcb;
+		struct thread *thr;
+
+		pcb = td->td_pcb;
+		pcpu = pcb->pcb_fpcpu;
+
+		/*
+		 * The pcpu variable holds the address of the per-CPU
+		 * structure of the CPU currently holding this threads
+		 * high FP registers (or NULL if no CPU holds these
+		 * registers). We have to interrupt that CPU and wait
+		 * for it to have saved the registers.
+		 */
+		if (pcpu != NULL) {
+			thr = pcpu->pc_fpcurthread;
+			KASSERT(thr == td, ("High FP state out of sync"));
+
+			if (pcpu == pcpup) {
+				/*
+				 * Short-circuit handling the trap when this
+				 * CPU already holds the high FP registers for
+				 * this thread. We really shouldn't get the
+				 * trap in the first place, but since it's
+				 * only a performance issue and not a
+				 * correctness issue, we emit a message for
+				 * now, enable the high FP registers and
+				 * return.
+				 */
+				printf("XXX: bogusly disabled high FP regs\n");
+				framep->tf_special.psr &= ~IA64_PSR_DFH;
+				goto out;
+			}
+#ifdef SMP
+			/*
+			 * Interrupt the other CPU so that it saves the high
+			 * FP registers of this thread. Note that this can
+			 * only happen for the SMP case.
+			 */
+			ipi_send(pcpu->pc_lid, IPI_HIGH_FP);
+#endif
+#ifdef DIAGNOSTICS
+		} else {
+			KASSERT(PCPU_GET(fpcurthread) != td,
+			    ("High FP state out of sync"));
+#endif
+		}
+
+		thr = PCPU_GET(fpcurthread);
+
 		/*
-		 * on exit from the kernel, if thread == fpcurthread,
-		 * FP is enabled.
+		 * The thr variable holds the thread that owns the high FP
+		 * registers currently on this CPU. Free this CPU so that
+		 * we can load the current threads high FP registers.
 		 */
-		if (PCPU_GET(fpcurthread) == td) {
-			printf("trap: fp disabled for fpcurthread == %p", td);
-			goto dopanic;
+		if (thr != NULL) {
+			KASSERT(thr != td, ("High FP state out of sync"));
+			pcb = thr->td_pcb;
+			KASSERT(pcb->pcb_fpcpu == pcpup,
+			    ("High FP state out of sync"));
+			ia64_highfp_save(thr);
 		}
-	
-		ia64_fpstate_switch(td);
+
+		/*
+		 * Wait for the other CPU to have saved out high FP
+		 * registers (if applicable).
+		 */
+		while (pcpu && pcpu->pc_fpcurthread == td);
+
+		ia64_highfp_load(td);
+		framep->tf_special.psr &= ~IA64_PSR_DFH;
 		goto out;
 		break;
+	}
 
 	case IA64_VEC_PAGE_NOT_PRESENT:
 	case IA64_VEC_INST_ACCESS_RIGHTS:

==== //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 (text+ko) ====

@@ -148,43 +148,18 @@
 	    (td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	td2->td_md.md_flags = td1->td_md.md_flags & (MDP_FPUSED | MDP_UAC_MASK);
 
-	/*
-	 * Copy floating point state from the FP chip to the PCB
-	 * if this process has state stored there.
-	 */
-	ia64_fpstate_save(td1, 0);
+	/* Save the high FP registers so that we can copy them. */
+	ia64_highfp_save(td1);
 
 	/*
 	 * Copy pcb and stack from proc p1 to p2.  We do this as
 	 * cheaply as possible, copying only the active part of the
-	 * stack.  The stack and pcb need to agree. Make sure that the 
+	 * stack.  The stack and pcb need to agree. Make sure that the
 	 * new process has FEN disabled.
 	 */
 	bcopy(td1->td_pcb, td2->td_pcb, sizeof(struct pcb));
 
 	/*
-	 * Set the floating point state.
-	 */
-#if 0
-	if ((td2->td_pcb->pcb_fp_control & IEEE_INHERIT) == 0) {
-		td2->td_pcb->pcb_fp_control = 0;
-		td2->td_pcb->pcb_fp.fpr_cr = (FPCR_DYN_NORMAL
-						   | FPCR_INVD | FPCR_DZED
-						   | FPCR_OVFD | FPCR_INED
-						   | FPCR_UNFD);
-	}
-#endif
-
-	/*
-	 * Arrange for a non-local goto when the new process
-	 * is started, to resume here, returning nonzero from setjmp.
-	 */
-#ifdef DIAGNOSTIC
-	if (td1 == curthread)
-		ia64_fpstate_check(td1);
-#endif
-
-	/*
 	 * create the child's kernel stack, from scratch.
 	 *
 	 * Pick a stack pointer, leaving room for a trapframe;
@@ -316,11 +291,11 @@
  * When the proc is reaped, cpu_wait() will gc the VM state.
  */
 void
-cpu_exit(td)
-	register struct thread *td;
+cpu_exit(struct thread *td)
 {
 
-	ia64_fpstate_drop(td);
+	/* Throw away the high FP registers. */
+	ia64_highfp_drop(td);
 }
 
 void

==== //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 (text+ko) ====

@@ -112,12 +112,11 @@
 void	exception_restore(void);				/* MAGIC */
 void	frametoreg(struct trapframe *, struct reg *);
 long	fswintrberr(void);				/* MAGIC */
+int	ia64_highfp_drop(struct thread *);
+int	ia64_highfp_load(struct thread *);
+int	ia64_highfp_save(struct thread *);
+void	ia64_init(u_int64_t, u_int64_t);
 int	ia64_pa_access(u_long);
-void	ia64_init(u_int64_t, u_int64_t);
-void	ia64_fpstate_check(struct thread *p);
-void	ia64_fpstate_save(struct thread *p, int write);
-void	ia64_fpstate_drop(struct thread *p);
-void	ia64_fpstate_switch(struct thread *p);
 void	init_prom_interface(struct rpb*);
 void	interrupt(u_int64_t, struct trapframe *);
 void	machine_check

==== //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 (text+ko) ====

@@ -33,6 +33,7 @@
 void	ipi_all_but_self(int ipi);
 void	ipi_selected(u_int64_t cpus, int ipi);
 void	ipi_self(int ipi);
+void	ipi_send(u_int64_t lid, int ipi);
 
 #endif /* !LOCORE */
 #endif /* _KERNEL */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200304140257.h3E2v3v7057436>