From owner-svn-src-all@FreeBSD.ORG Sun Jan 15 00:08:14 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 96DAC106568E; Sun, 15 Jan 2012 00:08:14 +0000 (UTC) (envelope-from nwhitehorn@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 803FA8FC19; Sun, 15 Jan 2012 00:08:14 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q0F08EsY054643; Sun, 15 Jan 2012 00:08:14 GMT (envelope-from nwhitehorn@svn.freebsd.org) Received: (from nwhitehorn@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q0F08Ei7054636; Sun, 15 Jan 2012 00:08:14 GMT (envelope-from nwhitehorn@svn.freebsd.org) Message-Id: <201201150008.q0F08Ei7054636@svn.freebsd.org> From: Nathan Whitehorn Date: Sun, 15 Jan 2012 00:08:14 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r230123 - in head/sys/powerpc: aim include powerpc X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 15 Jan 2012 00:08:14 -0000 Author: nwhitehorn Date: Sun Jan 15 00:08:14 2012 New Revision: 230123 URL: http://svn.freebsd.org/changeset/base/230123 Log: Rework SLB trap handling so that double-faults into an SLB trap handler are possible, and double faults within an SLB trap handler are not. The result is that it possible to take an SLB fault at any time, on any address, for any reason, at any point in the kernel. This lets us do two important things. First, it removes the (soft) 16 GB RAM ceiling on PPC64 as well as any architectural limitations on KVA space. Second, it lets the kernel tolerate poorly designed hypervisors that have a tendency to fail to restore the SLB properly after a hypervisor context switch. MFC after: 6 weeks Modified: head/sys/powerpc/aim/machdep.c head/sys/powerpc/aim/slb.c head/sys/powerpc/aim/trap.c head/sys/powerpc/aim/trap_subr64.S head/sys/powerpc/include/pcpu.h head/sys/powerpc/powerpc/genassym.c Modified: head/sys/powerpc/aim/machdep.c ============================================================================== --- head/sys/powerpc/aim/machdep.c Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/aim/machdep.c Sun Jan 15 00:08:14 2012 (r230123) @@ -238,6 +238,7 @@ extern void *trapcode64; extern void *rstcode, *rstsize; #endif extern void *trapcode, *trapsize; +extern void *slbtrap, *slbtrapsize; extern void *alitrap, *alisize; extern void *dsitrap, *dsisize; extern void *decrint, *decrsize; @@ -490,8 +491,8 @@ powerpc_init(vm_offset_t startkernel, vm bcopy(&dsitrap, (void *)(EXC_DSI + trap_offset), (size_t)&dsisize); bcopy(generictrap, (void *)EXC_ISI, (size_t)&trapsize); #ifdef __powerpc64__ - bcopy(generictrap, (void *)EXC_DSE, (size_t)&trapsize); - bcopy(generictrap, (void *)EXC_ISE, (size_t)&trapsize); + bcopy(&slbtrap, (void *)EXC_DSE, (size_t)&slbtrapsize); + bcopy(&slbtrap, (void *)EXC_ISE, (size_t)&slbtrapsize); #endif bcopy(generictrap, (void *)EXC_EXI, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_FPU, (size_t)&trapsize); Modified: head/sys/powerpc/aim/slb.c ============================================================================== --- head/sys/powerpc/aim/slb.c Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/aim/slb.c Sun Jan 15 00:08:14 2012 (r230123) @@ -409,15 +409,11 @@ slb_alloc_tree(void) /* Lock entries mapping kernel text and stacks */ -#define SLB_SPILLABLE(slbe) \ - (((slbe & SLBE_ESID_MASK) < VM_MIN_KERNEL_ADDRESS && \ - (slbe & SLBE_ESID_MASK) > 16*SEGMENT_LENGTH) || \ - (slbe & SLBE_ESID_MASK) > VM_MAX_KERNEL_ADDRESS) void slb_insert_kernel(uint64_t slbe, uint64_t slbv) { struct slb *slbcache; - int i, j; + int i; /* We don't want to be preempted while modifying the kernel map */ critical_enter(); @@ -437,15 +433,9 @@ slb_insert_kernel(uint64_t slbe, uint64_ slbcache[USER_SLB_SLOT].slbe = 1; } - for (i = mftb() % n_slbs, j = 0; j < n_slbs; j++, i = (i+1) % n_slbs) { - if (i == USER_SLB_SLOT) - continue; - - if (SLB_SPILLABLE(slbcache[i].slbe)) - break; - } - - KASSERT(j < n_slbs, ("All kernel SLB slots locked!")); + i = mftb() % n_slbs; + if (i == USER_SLB_SLOT) + i = (i+1) % n_slbs; fillkernslb: KASSERT(i != USER_SLB_SLOT, Modified: head/sys/powerpc/aim/trap.c ============================================================================== --- head/sys/powerpc/aim/trap.c Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/aim/trap.c Sun Jan 15 00:08:14 2012 (r230123) @@ -88,7 +88,9 @@ static int handle_onfault(struct trapfra static void syscall(struct trapframe *frame); #ifdef __powerpc64__ -static int handle_slb_spill(pmap_t pm, vm_offset_t addr); + void handle_kernel_slb_spill(int, register_t, register_t); +static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr); +extern int n_slbs; #endif int setfault(faultbuf); /* defined in locore.S */ @@ -191,7 +193,7 @@ trap(struct trapframe *frame) #ifdef __powerpc64__ case EXC_ISE: case EXC_DSE: - if (handle_slb_spill(&p->p_vmspace->vm_pmap, + if (handle_user_slb_spill(&p->p_vmspace->vm_pmap, (type == EXC_ISE) ? frame->srr0 : frame->cpu.aim.dar) != 0) sig = SIGSEGV; @@ -259,27 +261,20 @@ trap(struct trapframe *frame) KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { - case EXC_DSI: - if (trap_pfault(frame, 0) == 0) - return; - break; #ifdef __powerpc64__ case EXC_DSE: if ((frame->cpu.aim.dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: - "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), - "r"(USER_SLB_SLBE)); + "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), + "r"(USER_SLB_SLBE)); return; } - - /* FALLTHROUGH */ - case EXC_ISE: - if (handle_slb_spill(kernel_pmap, - (type == EXC_ISE) ? frame->srr0 : - frame->cpu.aim.dar) != 0) - panic("Fault handling kernel SLB miss"); - return; + break; #endif + case EXC_DSI: + if (trap_pfault(frame, 0) == 0) + return; + break; case EXC_MCHK: if (handle_onfault(frame)) return; @@ -326,8 +321,7 @@ printtrap(u_int vector, struct trapframe printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); - printf(" exception = 0x%x (%s)\n", vector >> 8, - trapname(vector)); + printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: @@ -486,8 +480,54 @@ syscall(struct trapframe *frame) } #ifdef __powerpc64__ +/* Handle kernel SLB faults -- runs in real mode, all seat belts off */ +void +handle_kernel_slb_spill(int type, register_t dar, register_t srr0) +{ + struct slb *slbcache; + uint64_t slbe, slbv; + uint64_t esid, addr; + int i; + + addr = (type == EXC_ISE) ? srr0 : dar; + slbcache = PCPU_GET(slb); + esid = (uintptr_t)addr >> ADDR_SR_SHFT; + slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; + + /* See if the hardware flushed this somehow (can happen in LPARs) */ + for (i = 0; i < n_slbs; i++) + if (slbcache[i].slbe == (slbe | (uint64_t)i)) + return; + + /* Not in the map, needs to actually be added */ + slbv = kernel_va_to_slbv(addr); + if (slbcache[USER_SLB_SLOT].slbe == 0) { + for (i = 0; i < n_slbs; i++) { + if (i == USER_SLB_SLOT) + continue; + if (!(slbcache[i].slbe & SLBE_VALID)) + goto fillkernslb; + } + + if (i == n_slbs) + slbcache[USER_SLB_SLOT].slbe = 1; + } + + /* Sacrifice a random SLB entry that is not the user entry */ + i = mftb() % n_slbs; + if (i == USER_SLB_SLOT) + i = (i+1) % n_slbs; + +fillkernslb: + /* Write new entry */ + slbcache[i].slbv = slbv; + slbcache[i].slbe = slbe | (uint64_t)i; + + /* Trap handler will restore from cache on exit */ +} + static int -handle_slb_spill(pmap_t pm, vm_offset_t addr) +handle_user_slb_spill(pmap_t pm, vm_offset_t addr) { struct slb *user_entry; uint64_t esid; @@ -495,12 +535,6 @@ handle_slb_spill(pmap_t pm, vm_offset_t esid = (uintptr_t)addr >> ADDR_SR_SHFT; - if (pm == kernel_pmap) { - slb_insert_kernel((esid << SLBE_ESID_SHIFT) | SLBE_VALID, - kernel_va_to_slbv(addr)); - return (0); - } - PMAP_LOCK(pm); user_entry = user_va_to_slb_entry(pm, addr); Modified: head/sys/powerpc/aim/trap_subr64.S ============================================================================== --- head/sys/powerpc/aim/trap_subr64.S Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/aim/trap_subr64.S Sun Jan 15 00:08:14 2012 (r230123) @@ -112,6 +112,9 @@ restore_kernsrs: * r31 scratch * r1 kernel stack * SRR0/1 as at start of trap + * + * NOTE: SPRG1 is never used while the MMU is on, making it safe to reuse + * in any real-mode fault handler, including those handling double faults. */ #define FRAME_SETUP(savearea) \ /* Have to enable translation to allow access of kernel stack: */ \ @@ -120,11 +123,11 @@ restore_kernsrs: std %r30,(savearea+CPUSAVE_SRR0)(%r31); /* save SRR0 */ \ mfsrr1 %r30; \ std %r30,(savearea+CPUSAVE_SRR1)(%r31); /* save SRR1 */ \ + mfsprg1 %r31; /* get saved SP (clears SPRG1) */ \ mfmsr %r30; \ ori %r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */ \ mtmsr %r30; /* stack can now be accessed */ \ isync; \ - mfsprg1 %r31; /* get saved SP */ \ stdu %r31,-(FRAMELEN+288)(%r1); /* save it in the callframe */ \ std %r0, FRAME_0+48(%r1); /* save r0 in the trapframe */ \ std %r31,FRAME_1+48(%r1); /* save SP " " */ \ @@ -201,7 +204,7 @@ restore_kernsrs: mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ - mtsprg1 %r7; /* save cr */ \ + mtsprg2 %r7; /* save cr */ \ ld %r31,FRAME_31+48(%r1); /* restore r0-31 */ \ ld %r30,FRAME_30+48(%r1); \ ld %r29,FRAME_29+48(%r1); \ @@ -235,16 +238,15 @@ restore_kernsrs: ld %r0, FRAME_0+48(%r1); \ ld %r1, FRAME_1+48(%r1); \ /* Can't touch %r1 from here on */ \ - mtsprg2 %r2; /* save r2 & r3 */ \ - mtsprg3 %r3; \ + mtsprg3 %r3; /* save r3 */ \ /* Disable translation, machine check and recoverability: */ \ - mfmsr %r2; \ - andi. %r2,%r2,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ - mtmsr %r2; \ + mfmsr %r3; \ + andi. %r3,%r3,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ + mtmsr %r3; \ isync; \ /* Decide whether we return to user mode: */ \ - GET_CPUINFO(%r2); \ - ld %r3,(savearea+CPUSAVE_SRR1)(%r2); \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR1)(%r3); \ mtcr %r3; \ bf 17,1f; /* branch if PSL_PR is false */ \ /* Restore user SRs */ \ @@ -262,15 +264,15 @@ restore_kernsrs: ld %r29,(savearea+CPUSAVE_R29)(%r3); \ ld %r28,(savearea+CPUSAVE_R28)(%r3); \ ld %r27,(savearea+CPUSAVE_R27)(%r3); \ -1: mfsprg1 %r2; /* restore cr */ \ - mtcr %r2; \ - GET_CPUINFO(%r2); \ - ld %r3,(savearea+CPUSAVE_SRR0)(%r2); /* restore srr0 */ \ +1: mfsprg2 %r3; /* restore cr */ \ + mtcr %r3; \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR0)(%r3); /* restore srr0 */ \ mtsrr0 %r3; \ - ld %r3,(savearea+CPUSAVE_SRR1)(%r2); /* restore srr1 */ \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR1)(%r3); /* restore srr1 */ \ mtsrr1 %r3; \ - mfsprg2 %r2; /* restore r2 & r3 */ \ - mfsprg3 %r3 + mfsprg3 %r3 /* restore r3 */ #ifdef SMP /* @@ -330,6 +332,151 @@ CNAME(trapcode): CNAME(trapsize) = .-CNAME(trapcode) /* + * For SLB misses: do special things for the kernel + * + * Note: SPRG1 is always safe to overwrite any time the MMU is on, which is + * the only time this can be called. + */ + .globl CNAME(slbtrap),CNAME(slbtrapsize) +CNAME(slbtrap): + mtsprg1 %r1 /* save SP */ + GET_CPUINFO(%r1) + std %r2,(PC_SLBSAVE+16)(%r1) + mfcr %r2 /* save CR */ + std %r2,(PC_SLBSAVE+104)(%r1) + mfsrr1 %r2 /* test kernel mode */ + mtcr %r2 + bf 17,1f /* branch if PSL_PR is false */ + /* User mode */ + ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ + mtcr %r2 + ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2 */ + mflr %r1 /* Save the old LR in r1 */ + mtsprg2 %r1 /* And then in SPRG2 */ + li %r1, 0x80 /* How to get the vector from LR */ + bla generictrap /* LR & SPRG3 is exception # */ +1: mflr %r2 /* Save the old LR in r2 */ + bla kern_slbtrap +CNAME(slbtrapsize) = .-CNAME(slbtrap) + +kern_slbtrap: + std %r2,(PC_SLBSAVE+136)(%r1) /* old LR */ + std %r3,(PC_SLBSAVE+24)(%r1) /* save R3 */ + + /* Check if this needs to be handled as a regular trap (userseg miss) */ + mflr %r2 + andi. %r2,%r2,0xff80 + cmpwi %r2,0x380 + bne 1f + mfdar %r2 + b 2f +1: mfsrr0 %r2 +2: /* r2 now contains the fault address */ + lis %r3,SEGMENT_MASK@highesta + ori %r3,%r3,SEGMENT_MASK@highera + sldi %r3,%r3,32 + oris %r3,%r3,SEGMENT_MASK@ha + ori %r3,%r3,SEGMENT_MASK@l + and %r2,%r2,%r3 /* R2 = segment base address */ + lis %r3,USER_ADDR@highesta + ori %r3,%r3,USER_ADDR@highera + sldi %r3,%r3,32 + oris %r3,%r3,USER_ADDR@ha + ori %r3,%r3,USER_ADDR@l + cmpd %r2,%r3 /* Compare fault base to USER_ADDR */ + bne 3f + + /* User seg miss, handle as a regular trap */ + ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ + mtcr %r2 + ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2,R3 */ + ld %r3,(PC_SLBSAVE+24)(%r1) + ld %r1,(PC_SLBSAVE+136)(%r1) /* Save the old LR in r1 */ + mtsprg2 %r1 /* And then in SPRG2 */ + li %r1, 0x80 /* How to get the vector from LR */ + b generictrap /* Retain old LR using b */ + +3: /* Real kernel SLB miss */ + std %r0,(PC_SLBSAVE+0)(%r1) /* free all volatile regs */ + mfsprg1 %r2 /* Old R1 */ + std %r2,(PC_SLBSAVE+8)(%r1) + /* R2,R3 already saved */ + std %r4,(PC_SLBSAVE+32)(%r1) + std %r5,(PC_SLBSAVE+40)(%r1) + std %r6,(PC_SLBSAVE+48)(%r1) + std %r7,(PC_SLBSAVE+56)(%r1) + std %r8,(PC_SLBSAVE+64)(%r1) + std %r9,(PC_SLBSAVE+72)(%r1) + std %r10,(PC_SLBSAVE+80)(%r1) + std %r11,(PC_SLBSAVE+88)(%r1) + std %r12,(PC_SLBSAVE+96)(%r1) + /* CR already saved */ + mfxer %r2 /* save XER */ + std %r2,(PC_SLBSAVE+112)(%r1) + mflr %r2 /* save LR (SP already saved) */ + std %r2,(PC_SLBSAVE+120)(%r1) + mfctr %r2 /* save CTR */ + std %r2,(PC_SLBSAVE+128)(%r1) + + /* Call handler */ + addi %r1,%r1,PC_SLBSTACK-48+1024 + li %r2,~15 + and %r1,%r1,%r2 + lis %r3,tocbase@ha + ld %r2,tocbase@l(%r3) + mflr %r3 + andi. %r3,%r3,0xff80 + mfdar %r4 + mfsrr0 %r5 + bl handle_kernel_slb_spill + nop + + /* Save r28-31, restore r4-r12 */ + GET_CPUINFO(%r1) + ld %r4,(PC_SLBSAVE+32)(%r1) + ld %r5,(PC_SLBSAVE+40)(%r1) + ld %r6,(PC_SLBSAVE+48)(%r1) + ld %r7,(PC_SLBSAVE+56)(%r1) + ld %r8,(PC_SLBSAVE+64)(%r1) + ld %r9,(PC_SLBSAVE+72)(%r1) + ld %r10,(PC_SLBSAVE+80)(%r1) + ld %r11,(PC_SLBSAVE+88)(%r1) + ld %r12,(PC_SLBSAVE+96)(%r1) + std %r28,(PC_SLBSAVE+64)(%r1) + std %r29,(PC_SLBSAVE+72)(%r1) + std %r30,(PC_SLBSAVE+80)(%r1) + std %r31,(PC_SLBSAVE+88)(%r1) + + /* Restore kernel mapping */ + bl restore_kernsrs + + /* Restore remaining registers */ + ld %r28,(PC_SLBSAVE+64)(%r1) + ld %r29,(PC_SLBSAVE+72)(%r1) + ld %r30,(PC_SLBSAVE+80)(%r1) + ld %r31,(PC_SLBSAVE+88)(%r1) + + ld %r2,(PC_SLBSAVE+104)(%r1) + mtcr %r2 + ld %r2,(PC_SLBSAVE+112)(%r1) + mtxer %r2 + ld %r2,(PC_SLBSAVE+120)(%r1) + mtlr %r2 + ld %r2,(PC_SLBSAVE+128)(%r1) + mtctr %r2 + ld %r2,(PC_SLBSAVE+136)(%r1) + mtlr %r2 + + /* Restore r0-r3 */ + ld %r0,(PC_SLBSAVE+0)(%r1) + ld %r2,(PC_SLBSAVE+16)(%r1) + ld %r3,(PC_SLBSAVE+24)(%r1) + mfsprg1 %r1 + + /* Back to whatever we were doing */ + rfid + +/* * For ALI: has to save DSISR and DAR */ .globl CNAME(alitrap),CNAME(alisize) Modified: head/sys/powerpc/include/pcpu.h ============================================================================== --- head/sys/powerpc/include/pcpu.h Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/include/pcpu.h Sun Jan 15 00:08:14 2012 (r230123) @@ -55,7 +55,9 @@ struct pmap; #define PCPU_MD_AIM64_FIELDS \ struct slb pc_slb[64]; \ - struct slb **pc_userslb; + struct slb **pc_userslb; \ + register_t pc_slbsave[18]; \ + uint8_t pc_slbstack[1024]; #ifdef __powerpc64__ #define PCPU_MD_AIM_FIELDS PCPU_MD_AIM64_FIELDS Modified: head/sys/powerpc/powerpc/genassym.c ============================================================================== --- head/sys/powerpc/powerpc/genassym.c Sat Jan 14 23:19:10 2012 (r230122) +++ head/sys/powerpc/powerpc/genassym.c Sun Jan 15 00:08:14 2012 (r230123) @@ -107,8 +107,11 @@ ASSYM(USER_ADDR, USER_ADDR); #ifdef __powerpc64__ ASSYM(PC_KERNSLB, offsetof(struct pcpu, pc_slb)); ASSYM(PC_USERSLB, offsetof(struct pcpu, pc_userslb)); +ASSYM(PC_SLBSAVE, offsetof(struct pcpu, pc_slbsave)); +ASSYM(PC_SLBSTACK, offsetof(struct pcpu, pc_slbstack)); ASSYM(USER_SLB_SLOT, USER_SLB_SLOT); ASSYM(USER_SLB_SLBE, USER_SLB_SLBE); +ASSYM(SEGMENT_MASK, SEGMENT_MASK); #else ASSYM(PM_SR, offsetof(struct pmap, pm_sr)); ASSYM(USER_SR, USER_SR);