From owner-p4-projects@FreeBSD.ORG Sun Apr 20 17:49:37 2003 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id DD81337B404; Sun, 20 Apr 2003 17:49:36 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 989A737B401 for ; Sun, 20 Apr 2003 17:49:34 -0700 (PDT) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id 0C53243F75 for ; Sun, 20 Apr 2003 17:49:34 -0700 (PDT) (envelope-from marcel@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.12.6/8.12.6) with ESMTP id h3L0nX0U003973 for ; Sun, 20 Apr 2003 17:49:33 -0700 (PDT) (envelope-from marcel@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.12.6/8.12.6/Submit) id h3L0nXEY003970 for perforce@freebsd.org; Sun, 20 Apr 2003 17:49:33 -0700 (PDT) Date: Sun, 20 Apr 2003 17:49:33 -0700 (PDT) Message-Id: <200304210049.h3L0nXEY003970@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to marcel@freebsd.org using -f From: Marcel Moolenaar To: Perforce Change Reviews Subject: PERFORCE change 29320 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Apr 2003 00:49:38 -0000 http://perforce.freebsd.org/chv.cgi?CH=29320 Change 29320 by marcel@marcel_nfs on 2003/04/20 17:49:18 o execve fix: When we jump to the entry point of the newly loaded program, we still have dirty registers on the kernel stack from when the execve was performed. In exec_setregs we cleared the trapframe, causing us to not completely "unwind" to the bottom of the kernel stack. This caused problems because we assume that when we enter the kernel from user space though a syscall, we start at the bottom of the kernel stack (see cpu_fork()). The fix is to discard a multiple of 512 bytes of the register stack by relocating the current stack base and copying "life" registers. The remaining dirty bytes (<512) are simply discarded by masking of the lower 9 bits of the kernel register stack pointer before we save it in ar.k6. This two part operation guarantees that we keep in sync with the NaT collections (every 512 bytes). o Only define ar.k7 (the kernel memory stack of the thread) when we enter user space. We used to define ar.k7 when we would return to kernel space as well. This is completely harmless, but remove a possible INVARIANTS test. By not defining ar.k7 when we return to kernel space, we have ar.k6 (the saved register stack base) and ar.k7 (the saved memory stack top) at a fixed distance from each other at all times: namely KSTACK_PAGES * PAGE_SIZE - SIZEOF_PCB. Lost or corrupted state is then more easily identified. Affected files ... .. //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#14 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#14 edit .. //depot/projects/ia64_epc/sys/ia64/ia64/syscall.s#9 edit Differences ... ==== //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#14 (text+ko) ==== @@ -354,32 +354,25 @@ * been flushed. */ { .mmi + rsm psr.ic|psr.i + ;; + srlz.d add sp=16,sp ;; - ld8 r9=[sp] // length - add r3=SIZEOF_TRAPFRAME-32,sp - ;; } { .mmi - rsm psr.ic|psr.i + add r3=SIZEOF_TRAPFRAME-32,sp + add r2=SIZEOF_TRAPFRAME-16,sp + add r8=SIZEOF_SPECIAL+16,sp ;; - srlz.d - add r2=16,r3 - ;; } -{ .mmi + ldf.fill f15=[r2],-32 // f15 ldf.fill f14=[r3],-32 // f14 - add r8=SIZEOF_SPECIAL+16,sp ;; -} -{ .mmi ldf.fill f13=[r2],-32 // f13 ldf.fill f12=[r3],-32 // f12 - add r9=r9,sp ;; -} - ldf.fill f11=[r2],-32 // f11 ldf.fill f10=[r3],-32 // f10 ;; @@ -392,96 +385,93 @@ { .mmi ld8 r8=[r8] // unat (after) - mov ar.k7=r9 + ;; + mov ar.unat=r8 nop 0 ;; } -{ .mmi + ld8 r10=[r2],-16 // ssd ld8 r11=[r3],-16 // csd - nop 0 ;; -} -{ .mmi - mov ar.unat=r8 mov ar.ssd=r10 - nop 0 -} -{ .mmi + mov ar.csd=r11 + ld8 r14=[r2],-16 // ccv ld8 r15=[r3],-16 // b7 - nop 0 ;; -} + { .mmi - mov ar.csd=r11 mov ar.ccv=r14 + ld8 r8=[r2],-16 // b6 mov b7=r15 ;; } { .mmi - ld8 r8=[r2],-16 // b6 ld8.fill r31=[r3],-16 // r31 - nop 0 - ;; -} -{ .mmi ld8.fill r30=[r2],-16 // r30 - ld8.fill r29=[r3],-16 // r29 mov b6=r8 ;; } + ld8.fill r29=[r3],-16 // r29 ld8.fill r28=[r2],-16 // r28 + ;; ld8.fill r27=[r3],-16 // r27 + ld8.fill r26=[r2],-16 // r26 ;; - ld8.fill r26=[r2],-16 // r26 ld8.fill r25=[r3],-16 // r25 + ld8.fill r24=[r2],-16 // r24 ;; - ld8.fill r24=[r2],-16 // r24 ld8.fill r23=[r3],-16 // r23 + ld8.fill r22=[r2],-16 // r22 ;; - ld8.fill r22=[r2],-16 // r22 ld8.fill r21=[r3],-16 // r21 + ld8.fill r20=[r2],-16 // r20 ;; - ld8.fill r20=[r2],-16 // r20 ld8.fill r19=[r3],-16 // r19 - ;; ld8.fill r18=[r2],-16 // r18 - ld8.fill r17=[r3],-16 // r17 ;; { .mmb + ld8.fill r17=[r3],-16 // r17 ld8.fill r16=[r2],-16 // r16 - ld8.fill r15=[r3],-16 // r15 bsw.0 ;; } { .mmi + ld8.fill r15=[r3],-16 // r15 ld8.fill r14=[r2],-16 // r14 + add r31=16,sp + ;; +} +{ .mmi + ld8 r16=[sp] // tf_length ld8.fill r11=[r3],-16 // r11 - add r31=16,sp + add r30=24,sp ;; } { .mmi ld8.fill r10=[r2],-16 // r10 ld8.fill r9=[r3],-16 // r9 - add r30=24,sp + add r16=r16,sp // ar.k7 ;; } - +{ .mmi ld8.fill r8=[r2],-16 // r8 ld8.fill r3=[r3] // r3 ;; +} + ld8.fill r2=[r2] // r2 ld8.fill sp=[r31],16 // sp ;; - ld8 r16=[r30],16 // unat - ld8 r17=[r31],16 // rp + ld8 r17=[r30],16 // unat + ld8 r29=[r31],16 // rp ;; ld8 r18=[r30],16 // pr ld8 r19=[r31],16 // pfs - mov rp=r17 + mov rp=r29 ;; ld8 r20=[r30],24 // bspstore ld8 r21=[r31],24 // rnat @@ -521,18 +511,25 @@ nop 0 ;; } +{ .mmi mov r31=ar.bspstore + ;; mov ar.bspstore=r20 + dep r31=0,r31,0,9 ;; +} + mov ar.k6=r31 + mov ar.k7=r16 + ;; mov ar.rnat=r21 mov r13=r29 ;; 1: - mov ar.unat=r16 + mov ar.unat=r17 + mov ar.fpsr=r23 mov ar.pfs=r19 - mov ar.fpsr=r23 mov cr.ipsr=r24 mov cr.ifs=r26 mov cr.iip=r27 ==== //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#14 (text+ko) ==== @@ -851,18 +851,19 @@ mc->mc_flags |= IA64_MC_FLAGS_SCRATCH_VALID; mc->mc_scratch = frame->tf_scratch; mc->mc_scratch_fp = frame->tf_scratch_fp; + /* + * XXX High FP. If the process has never used the high FP, + * mark the high FP as valid (zero defaults). If the process + * did use the high FP, then store them in the PCB if not + * already there (ie get them from the CPU that has them) + * and write them in the context. + */ } + /* * XXX preserved registers. We don't have the preserved registers * in the trapframe. We don't worry about it now. */ - /* - * XXX High FP. If the process has never used the high FP, mark - * the high FP as valid (zero defaults). If the process did use - * the high FP, then store them in the PCB if not already there - * (ie get them from the CPU that has them) and write them in - * the context. - */ /* * Allocate and validate space for the signal handler @@ -1071,14 +1072,45 @@ exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; + char *kstack; + uint64_t bspst, ndirty; tf = td->td_frame; + kstack = (char*)td->td_kstack; + + /* + * RSE magic: We have ndirty registers of the process on the kernel + * stack which don't belong to the new image. Discard them. Note + * that for the "legacy" syscall support we need to keep 3 registers + * worth of dirty bytes. These 3 registers are the initial arguments + * to the newly executing program. + * However, we cannot discard all the ndirty registers by simply + * moving the kernel related registers to the bottom of the kernel + * stack and lowering the current bspstore, because we get into + * trouble with the NaT collections. We need to keep that in sync + * with the registers. Hence, we can only copy a multiple of 512 + * bytes. Consequently, we may end up with some registers of the + * previous image on the kernel stack. This we ignore by making + * sure we mask-off the lower 9 bits of the bspstore value just + * prior to saving it in ar.k6. + */ + if ((tf->tf_flags & FRAME_SYSCALL) == 0) + tf->tf_special.ndirty -= 24; + ndirty = tf->tf_special.ndirty & ~0x1ff; + if (ndirty > 0) { + __asm __volatile("mov ar.rsc=0;;"); + __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); + bcopy(kstack + ndirty, kstack, ndirty); + bspst -= ndirty; + __asm __volatile("mov ar.bspstore=%0;;" :: "r"(bspst)); + __asm __volatile("mov ar.rsc=3;;"); + tf->tf_special.ndirty -= ndirty; + } + ndirty = tf->tf_special.ndirty; + + bzero(&tf->tf_special, sizeof(tf->tf_special)); + if ((tf->tf_flags & FRAME_SYSCALL) == 0) { /* break syscalls. */ - uint64_t *args; - uint64_t ndirty; - ndirty = tf->tf_special.ndirty; - bzero(&tf->tf_special, sizeof(tf->tf_special)); - tf->tf_special.ndirty = ndirty; bzero(&tf->tf_scratch, sizeof(tf->tf_scratch)); bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp)); tf->tf_special.iip = entry; @@ -1086,20 +1118,12 @@ tf->tf_special.bspstore = td->td_md.md_bspstore; /* * Copy the arguments onto the kernel register stack so that - * they get loaded by the loadrs. This involves some NaT - * collection magic. + * they get loaded by the loadrs instruction. */ - args = (uint64_t*)(td->td_kstack + ndirty); - args -= (((uintptr_t)args & 0x1ff) < 24) ? 4 : 3; - *args++ = stack; - if (((uintptr_t)args & 0x1ff) == 0x1f8) - args++; - *args++ = ps_strings; - if (((uintptr_t)args & 0x1ff) == 0x1f8) - args++; - *args = 0; + *(uint64_t*)(kstack + ndirty - 24) = stack; + *(uint64_t*)(kstack + ndirty - 16) = ps_strings; + *(uint64_t*)(kstack + ndirty - 8) = 0; } else { /* epc syscalls (default). */ - bzero(&tf->tf_special, sizeof(tf->tf_special)); tf->tf_special.rp = entry; tf->tf_special.pfs = (3UL<<62) | (3UL<<7) | 3UL; tf->tf_special.bspstore = td->td_md.md_bspstore + 24; @@ -1113,6 +1137,7 @@ suword((caddr_t)tf->tf_special.bspstore - 16, ps_strings); suword((caddr_t)tf->tf_special.bspstore - 8, 0); } + tf->tf_special.sp = (stack & ~15) - 16; tf->tf_special.rsc = 0xf; tf->tf_special.fpsr = IA64_FPSR_DEFAULT; ==== //depot/projects/ia64_epc/sys/ia64/ia64/syscall.s#9 (text+ko) ==== @@ -329,20 +329,21 @@ } { .mmi loadrs - mov ar.k7=r31 + mov r14=ar.k5 dep r26=-1,r26,19,1 // Set psr.dfh ;; } { .mmi - mov r31=ar.bspstore + mov r30=ar.bspstore + ;; mov ar.bspstore=r21 - mov r13=r23 + dep r30=0,r30,0,9 ;; } { .mmi - mov r14=ar.k5 - mov ar.k6=r31 - nop 0 + mov ar.k6=r30 + mov ar.k7=r31 + mov r13=r23 ;; } { .mmi