From owner-p4-projects@FreeBSD.ORG Sun Apr 13 19:57:06 2003 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id DEEAE37B404; Sun, 13 Apr 2003 19:57:05 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 8774337B401 for ; Sun, 13 Apr 2003 19:57:05 -0700 (PDT) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id 495EE43FCB for ; Sun, 13 Apr 2003 19:57:04 -0700 (PDT) (envelope-from marcel@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.12.6/8.12.6) with ESMTP id h3E2v40U057439 for ; Sun, 13 Apr 2003 19:57:04 -0700 (PDT) (envelope-from marcel@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.12.6/8.12.6/Submit) id h3E2v3v7057436 for perforce@freebsd.org; Sun, 13 Apr 2003 19:57:03 -0700 (PDT) Date: Sun, 13 Apr 2003 19:57:03 -0700 (PDT) Message-Id: <200304140257.h3E2v3v7057436@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to marcel@freebsd.org using -f From: Marcel Moolenaar To: Perforce Change Reviews Subject: PERFORCE change 28905 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 14 Apr 2003 02:57:07 -0000 http://perforce.freebsd.org/chv.cgi?CH=28905 Change 28905 by marcel@marcel_nfs on 2003/04/13 19:56:37 Implement lazy context switching for the high FP registers. This does not use any synchronization and/or locking yet. Also, we currently disable the high FP registers whenever we enter and leave the kernel. We may want to leave the high FP registers enabled if we leave the kernel and the CPU holds the high FP registers of the process we're returning to. For now we just let it trap. I expect to have to revisit this... Affected files ... .. //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 edit .. //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 edit .. //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 edit Differences ... ==== //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 (text+ko) ==== @@ -327,7 +327,7 @@ ;; } { .mlx - ssm psr.ic + ssm psr.ic|psr.dfh movl gp=__gp ;; } @@ -506,15 +506,20 @@ (p14) br.cond.sptk 1f ;; } - +{ .mii // Switch register stack alloc r31=ar.pfs,0,0,0,0 // discard current frame shl r30=r25,16 // value for ar.rsc + dep r24=-1,r24,19,1 // XXX disable high FP. ;; +} +{ .mmi mov ar.rsc=r30 // setup for loadrs ;; loadrs // load user regs + nop 0 ;; +} mov r31=ar.bspstore ;; mov ar.bspstore=r20 ==== //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 (text+ko) ==== @@ -152,6 +152,9 @@ } else if (vector == ipi_vector[IPI_AST]) { asts[PCPU_GET(cpuid)]++; CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid)); + } else if (vector == ipi_vector[IPI_HIGH_FP]) { + if (PCPU_GET(fpcurthread) != NULL) + ia64_highfp_save(PCPU_GET(fpcurthread)); } else if (vector == ipi_vector[IPI_RENDEZVOUS]) { rdvs[PCPU_GET(cpuid)]++; CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid)); ==== //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 (text+ko) ==== @@ -74,6 +74,9 @@ #include #include #include +#ifdef SMP +#include +#endif #include #include #include @@ -723,9 +726,6 @@ ia64_set_cflg((CR0_PE | CR0_PG) | ((long)(CR4_XMM | CR4_FXSR) << 32)); - /* We pretend to own FP state so that ia64_fpstate_check() works */ - PCPU_SET(fpcurthread, &thread0); - /* * Initialize the rest of proc 0's PCB. * @@ -974,6 +974,9 @@ printf("sigreturn: pid %d, scp %p\n", p->p_pid, uap->sigcntxp); #endif + /* Throw away the high FP registers. */ + ia64_highfp_drop(td); + /* * Fetch the entire context structure at once for speed. * We don't use a normal argument to simplify RSE handling. @@ -990,6 +993,7 @@ frame->tf_scratch = mc->mc_scratch; frame->tf_scratch_fp = mc->mc_scratch_fp; } + /* * XXX preserved registers. * XXX High FP. @@ -1008,9 +1012,6 @@ signotify(td); PROC_UNLOCK(p); - /* XXX ksc.sc_ownedfp ? */ - ia64_fpstate_drop(td); - #ifdef DEBUG if (sigdebug & SDB_FOLLOW) printf("sigreturn(%d): returns\n", p->p_pid); @@ -1092,9 +1093,6 @@ suword((caddr_t)frame->tf_special.bspstore - 24, stack); suword((caddr_t)frame->tf_special.bspstore - 16, ps_strings); suword((caddr_t)frame->tf_special.bspstore - 8, 0); - - td->td_md.md_flags &= ~MDP_FPUSED; - ia64_fpstate_drop(td); } int @@ -1181,10 +1179,11 @@ struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; + /* Save the high FP registers. */ + ia64_highfp_save(td); + fpregs->fpr_scratch = frame->tf_scratch_fp; - /* XXX preserved */ - - ia64_fpstate_save(td, 0); + /* XXX preserved_fp */ fpregs->fpr_high = pcb->pcb_high_fp; return (0); } @@ -1195,14 +1194,87 @@ struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; + /* Throw away the high FP registers (should be redundant). */ + ia64_highfp_drop(td); + frame->tf_scratch_fp = fpregs->fpr_scratch; - /* XXX preserved */ - - ia64_fpstate_drop(td); + /* XXX preserved_fp */ pcb->pcb_high_fp = fpregs->fpr_high; return (0); } +/* + * High FP register functions. + * XXX no synchronization yet. + */ + +int +ia64_highfp_drop(struct thread *td) +{ + struct pcb *pcb; + struct pcpu *cpu; + struct thread *thr; + + pcb = td->td_pcb; + cpu = pcb->pcb_fpcpu; + if (cpu == NULL) + return (0); + pcb->pcb_fpcpu = NULL; + thr = cpu->pc_fpcurthread; + cpu->pc_fpcurthread = NULL; + + /* Post-mortem sanity checking. */ + KASSERT(thr == td, ("Inconsistent high FP state")); + return (1); +} + +int +ia64_highfp_load(struct thread *td) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(pcb->pcb_fpcpu == NULL, ("FP race on thread")); + KASSERT(PCPU_GET(fpcurthread) == NULL, ("FP race on pcpu")); + restore_high_fp(&pcb->pcb_high_fp); + PCPU_SET(fpcurthread, td); + pcb->pcb_fpcpu = pcpup; + return (1); +} + +int +ia64_highfp_save(struct thread *td) +{ + struct pcb *pcb; + struct pcpu *cpu; + struct thread *thr; + + /* Don't save if the high FP registers weren't modified. */ + if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0) + return (ia64_highfp_drop(td)); + + pcb = td->td_pcb; + cpu = pcb->pcb_fpcpu; + if (cpu == NULL) + return (0); +#ifdef SMP + if (cpu != pcpup) { + ipi_send(cpu->pc_lid, IPI_HIGH_FP); + while (pcb->pcb_fpcpu != cpu) + DELAY(100); + return (1); + } +#endif + save_high_fp(&pcb->pcb_high_fp); + pcb->pcb_fpcpu = NULL; + thr = cpu->pc_fpcurthread; + cpu->pc_fpcurthread = NULL; + + /* Post-mortem sanity cxhecking. */ + KASSERT(thr == td, ("Inconsistent high FP state")); + return (1); +} + #ifndef DDB void Debugger(const char *msg) @@ -1231,72 +1303,6 @@ SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); -void -ia64_fpstate_check(struct thread *td) -{ - if ((td->td_frame->tf_special.psr & IA64_PSR_DFH) == 0) - if (td != PCPU_GET(fpcurthread)) - panic("ia64_fpstate_check: bogus"); -} - -/* - * Save the high floating point state in the pcb. Use this to get - * read-only access to the floating point state. If write is true, the - * current fp process is cleared so that fp state can safely be - * modified. The process will automatically reload the changed state - * by generating a disabled fp trap. - */ -void -ia64_fpstate_save(struct thread *td, int write) -{ - if (td == PCPU_GET(fpcurthread)) { - _get_high_fp(&td->td_pcb->pcb_high_fp); - - if (write) { - td->td_frame->tf_special.psr |= IA64_PSR_DFH; - PCPU_SET(fpcurthread, NULL); - } - } -} - -/* - * Relinquish ownership of the FP state. This is called instead of - * ia64_save_fpstate() if the entire FP state is being changed - * (e.g. on sigreturn). - */ -void -ia64_fpstate_drop(struct thread *td) -{ - if (td == PCPU_GET(fpcurthread)) { - td->td_frame->tf_special.psr |= IA64_PSR_DFH; - PCPU_SET(fpcurthread, NULL); - } -} - -/* - * Switch the current owner of the fp state to p, reloading the state - * from the pcb. - */ -void -ia64_fpstate_switch(struct thread *td) -{ - struct thread *fptd; - - fptd = PCPU_GET(fpcurthread); - if (fptd != NULL) { - /* Dump the old fp state if its valid. */ - _get_high_fp(&fptd->td_pcb->pcb_high_fp); - fptd->td_frame->tf_special.psr |= IA64_PSR_DFH; - } - - /* Remember the new FP owner and reload its state. */ - PCPU_SET(fpcurthread, td); - _set_high_fp(&td->td_pcb->pcb_high_fp); - td->td_frame->tf_special.psr &= ~IA64_PSR_DFH; - - td->td_md.md_flags |= MDP_FPUSED; -} - /* * Utility functions for manipulating instruction bundles. */ ==== //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 (text+ko) ==== @@ -75,7 +75,6 @@ volatile int ap_awake; volatile int ap_spin; -static void ipi_send(u_int64_t, int); static void cpu_mp_unleash(void *); void @@ -346,7 +345,7 @@ * cr.lid (CR64) contents of the target processor. Only the id and eid * fields are used here. */ -static void +void ipi_send(u_int64_t lid, int ipi) { volatile u_int64_t *pipi; ==== //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 (text+ko) ==== @@ -62,6 +62,9 @@ #include #include #include +#ifdef SMP +#include +#endif #ifdef KTRACE #include @@ -513,19 +516,81 @@ } } - case IA64_VEC_DISABLED_FP: + case IA64_VEC_DISABLED_FP: { /* High FP registers are disabled. */ + struct pcpu *pcpu; + struct pcb *pcb; + struct thread *thr; + + pcb = td->td_pcb; + pcpu = pcb->pcb_fpcpu; + + /* + * The pcpu variable holds the address of the per-CPU + * structure of the CPU currently holding this threads + * high FP registers (or NULL if no CPU holds these + * registers). We have to interrupt that CPU and wait + * for it to have saved the registers. + */ + if (pcpu != NULL) { + thr = pcpu->pc_fpcurthread; + KASSERT(thr == td, ("High FP state out of sync")); + + if (pcpu == pcpup) { + /* + * Short-circuit handling the trap when this + * CPU already holds the high FP registers for + * this thread. We really shouldn't get the + * trap in the first place, but since it's + * only a performance issue and not a + * correctness issue, we emit a message for + * now, enable the high FP registers and + * return. + */ + printf("XXX: bogusly disabled high FP regs\n"); + framep->tf_special.psr &= ~IA64_PSR_DFH; + goto out; + } +#ifdef SMP + /* + * Interrupt the other CPU so that it saves the high + * FP registers of this thread. Note that this can + * only happen for the SMP case. + */ + ipi_send(pcpu->pc_lid, IPI_HIGH_FP); +#endif +#ifdef DIAGNOSTICS + } else { + KASSERT(PCPU_GET(fpcurthread) != td, + ("High FP state out of sync")); +#endif + } + + thr = PCPU_GET(fpcurthread); + /* - * on exit from the kernel, if thread == fpcurthread, - * FP is enabled. + * The thr variable holds the thread that owns the high FP + * registers currently on this CPU. Free this CPU so that + * we can load the current threads high FP registers. */ - if (PCPU_GET(fpcurthread) == td) { - printf("trap: fp disabled for fpcurthread == %p", td); - goto dopanic; + if (thr != NULL) { + KASSERT(thr != td, ("High FP state out of sync")); + pcb = thr->td_pcb; + KASSERT(pcb->pcb_fpcpu == pcpup, + ("High FP state out of sync")); + ia64_highfp_save(thr); } - - ia64_fpstate_switch(td); + + /* + * Wait for the other CPU to have saved out high FP + * registers (if applicable). + */ + while (pcpu && pcpu->pc_fpcurthread == td); + + ia64_highfp_load(td); + framep->tf_special.psr &= ~IA64_PSR_DFH; goto out; break; + } case IA64_VEC_PAGE_NOT_PRESENT: case IA64_VEC_INST_ACCESS_RIGHTS: ==== //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 (text+ko) ==== @@ -148,43 +148,18 @@ (td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; td2->td_md.md_flags = td1->td_md.md_flags & (MDP_FPUSED | MDP_UAC_MASK); - /* - * Copy floating point state from the FP chip to the PCB - * if this process has state stored there. - */ - ia64_fpstate_save(td1, 0); + /* Save the high FP registers so that we can copy them. */ + ia64_highfp_save(td1); /* * Copy pcb and stack from proc p1 to p2. We do this as * cheaply as possible, copying only the active part of the - * stack. The stack and pcb need to agree. Make sure that the + * stack. The stack and pcb need to agree. Make sure that the * new process has FEN disabled. */ bcopy(td1->td_pcb, td2->td_pcb, sizeof(struct pcb)); /* - * Set the floating point state. - */ -#if 0 - if ((td2->td_pcb->pcb_fp_control & IEEE_INHERIT) == 0) { - td2->td_pcb->pcb_fp_control = 0; - td2->td_pcb->pcb_fp.fpr_cr = (FPCR_DYN_NORMAL - | FPCR_INVD | FPCR_DZED - | FPCR_OVFD | FPCR_INED - | FPCR_UNFD); - } -#endif - - /* - * Arrange for a non-local goto when the new process - * is started, to resume here, returning nonzero from setjmp. - */ -#ifdef DIAGNOSTIC - if (td1 == curthread) - ia64_fpstate_check(td1); -#endif - - /* * create the child's kernel stack, from scratch. * * Pick a stack pointer, leaving room for a trapframe; @@ -316,11 +291,11 @@ * When the proc is reaped, cpu_wait() will gc the VM state. */ void -cpu_exit(td) - register struct thread *td; +cpu_exit(struct thread *td) { - ia64_fpstate_drop(td); + /* Throw away the high FP registers. */ + ia64_highfp_drop(td); } void ==== //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 (text+ko) ==== @@ -112,12 +112,11 @@ void exception_restore(void); /* MAGIC */ void frametoreg(struct trapframe *, struct reg *); long fswintrberr(void); /* MAGIC */ +int ia64_highfp_drop(struct thread *); +int ia64_highfp_load(struct thread *); +int ia64_highfp_save(struct thread *); +void ia64_init(u_int64_t, u_int64_t); int ia64_pa_access(u_long); -void ia64_init(u_int64_t, u_int64_t); -void ia64_fpstate_check(struct thread *p); -void ia64_fpstate_save(struct thread *p, int write); -void ia64_fpstate_drop(struct thread *p); -void ia64_fpstate_switch(struct thread *p); void init_prom_interface(struct rpb*); void interrupt(u_int64_t, struct trapframe *); void machine_check ==== //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 (text+ko) ==== @@ -33,6 +33,7 @@ void ipi_all_but_self(int ipi); void ipi_selected(u_int64_t cpus, int ipi); void ipi_self(int ipi); +void ipi_send(u_int64_t lid, int ipi); #endif /* !LOCORE */ #endif /* _KERNEL */