From owner-freebsd-current Sat May 25 03:39:43 1996 Return-Path: owner-current Received: (from root@localhost) by freefall.freebsd.org (8.7.5/8.7.3) id DAA00402 for current-outgoing; Sat, 25 May 1996 03:39:43 -0700 (PDT) Received: from silvia.HIP.Berkeley.EDU (silvia.HIP.Berkeley.EDU [136.152.64.181]) by freefall.freebsd.org (8.7.5/8.7.3) with ESMTP id DAA00377 for ; Sat, 25 May 1996 03:39:35 -0700 (PDT) Received: (from asami@localhost) by silvia.HIP.Berkeley.EDU (8.7.5/8.6.9) id DAA21280; Sat, 25 May 1996 03:39:11 -0700 (PDT) Date: Sat, 25 May 1996 03:39:11 -0700 (PDT) Message-Id: <199605251039.DAA21280@silvia.HIP.Berkeley.EDU> To: bde@zeta.org.au CC: current@freebsd.org, ccd@stampede.cs.berkeley.edu Subject: More on kernel bcopy From: asami@cs.berkeley.edu (Satoshi Asami) Sender: owner-current@freebsd.org X-Loop: FreeBSD.org Precedence: bulk Bruce, I tried calling fastmove from bcopy. Unfortunately, it crashed right after it loaded. I remember you mentioning that the fnsave/frstor part is only used when we call it from bcopy and an interrupt handler calls bcopy. Maybe there is a bug in there still. Here's the diff. === Index: support.s =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/support.s,v retrieving revision 1.35 diff -u -r1.35 support.s --- support.s 1996/05/03 21:01:00 1.35 +++ support.s 1996/05/25 09:30:20 @@ -291,6 +291,18 @@ subl %esi,%eax cmpl %ecx,%eax /* overlapping? */ jb 1f +#ifdef I586_FAST_BCOPY + cmpl $128,%ecx + jbe slow_bcopy + + jmp slow_bcopy /* XXX take this out and see it crash */ + + call fastmove + jmp done_bcopy + + ALIGN_TEXT +slow_bcopy: +#endif /* I586_FAST_BCOPY */ shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep @@ -299,6 +311,9 @@ andl $3,%ecx /* any bytes left? */ rep movsb +#ifdef I586_FAST_BCOPY +done_bcopy: +#endif /* I586_FAST_BCOPY */ popl %edi popl %esi ret @@ -453,6 +468,16 @@ /* bcopy(%esi, %edi, %ebx) */ 3: movl %ebx,%ecx +#ifdef I586_FAST_BCOPY + cmpl $128,%ecx + jbe slow_copyout + + call fastmove + jmp done_copyout + + ALIGN_TEXT +slow_copyout: +#endif /* I586_FAST_BCOPY */ shrl $2,%ecx cld rep @@ -500,6 +525,16 @@ cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault +#ifdef I586_FAST_BCOPY + cmpl $128,%ecx + jbe slow_copyin + + call fastmove + jmp done_copyin + + ALIGN_TEXT +slow_copyin: +#endif /* I586_FAST_BCOPY */ movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -510,6 +545,10 @@ rep movsb +#ifdef I586_FAST_BCOPY + ALIGN_TEXT +done_copyin: +#endif /* I586_FAST_BCOPY */ popl %edi popl %esi xorl %eax,%eax @@ -525,6 +564,206 @@ movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret + +#ifdef I586_FAST_BCOPY +/* fastmove(src, dst, len) + src in %esi + dst in %edi + len in %ecx + uses %eax and %edx for tmp. storage + */ +/* +LC0: + .ascii "npxproc == curproc\0" +LC1: + .ascii "support.s" + */ + ALIGN_TEXT +fastmove: + cmpl $63,%ecx + jbe fastmove_tail + + testl $7,%esi /* check if src addr is multiple of 8 */ + jnz fastmove_tail + + testl $7,%edi /* check if dst addr is multiple of 8 */ + jnz fastmove_tail + + pushl %ebp + movl %esp,%ebp + subl $176,%esp + +/* if (intr_nesting_level > 0) */ + cmpb $0,_intr_nesting_level + je L6 +/* save reentrantly */ + movl %cr0,%edx + clts + fnsave -176(%ebp) + jmp L7 + +/* else { */ + ALIGN_TEXT +L6: +/* if (npxproc != NULL) { */ + cmpl $0,_npxproc + je L8 +/* assert(npxproc == curproc); */ +/* movl _npxproc,%eax + cmpl %eax,_curproc + je L6b + pushl LC0 + pushl $599 + pushl LC1 + call ___assert + addl $12,%esp +L6b: */ +/* fnsave(&curpcb->pcb_savefpu); */ + movl _curpcb,%eax + fnsave 112(%eax) +/* npxproc = NULL; */ + movl $0,_npxproc +/* } */ +L8: +/* now we own the FPU. */ + +/* + * The process' FP state is saved in the pcb, but if we get + * switched, the cpu_switch() will store our FP state in the + * pcb. It should be possible to avoid all the copying for + * this, e.g., by setting a flag to tell cpu_switch() to + * save the state somewhere else. + */ +/* tmp = curpcb->pcb_savefpu; */ + pushl %edi + pushl %esi + pushl %ecx + leal -176(%ebp),%edi + movl _curpcb,%esi + addl $112,%esi + cld + movl $44,%ecx + rep + movsl + popl %ecx + popl %esi + popl %edi +/* stop_emulating(); */ + clts +/* npxproc = curproc; */ + movl _curproc,%eax + movl %eax,_npxproc +/* } */ +L7: +4: + pushl %ecx + cmpl $1792,%ecx + jbe 2f + movl $1792,%ecx +2: + subl %ecx,0(%esp) + cmpl $256,%ecx + jb 5f + pushl %esi + pushl %ecx + .align 4,0x90 +3: + movl 0(%esi),%eax + movl 32(%esi),%eax + movl 64(%esi),%eax + movl 96(%esi),%eax + movl 128(%esi),%eax + movl 160(%esi),%eax + movl 192(%esi),%eax + movl 224(%esi),%eax + addl $256,%esi + subl $256,%ecx + cmpl $256,%ecx + jae 3b + popl %ecx + popl %esi +5: + ALIGN_TEXT +fastmove_loop: + fildq 0(%esi) + fildq 8(%esi) + fildq 16(%esi) + fildq 24(%esi) + fildq 32(%esi) + fildq 40(%esi) + fildq 48(%esi) + fildq 56(%esi) + fistpq 56(%edi) + fistpq 48(%edi) + fistpq 40(%edi) + fistpq 32(%edi) + fistpq 24(%edi) + fistpq 16(%edi) + fistpq 8(%edi) + fistpq 0(%edi) + addl $-64,%ecx + addl $64,%esi + addl $64,%edi + cmpl $63,%ecx + ja fastmove_loop + popl %eax + addl %eax,%ecx + cmpl $64,%ecx + jae 4b + +/* if (intr_nesting_level > 0) */ + + cmpb $0,_intr_nesting_level + je L9 + +/* Restore reentrantly. */ + frstor -176(%ebp) + movl %edx,%cr0 + jmp L10 + +/* else { */ + ALIGN_TEXT +L9: +/* curpcb->pcb_savefpu = tmp; */ + pushl %edi + pushl %esi + pushl %ecx + movl _curpcb,%edi + addl $112,%edi + leal -176(%ebp),%esi + cld + movl $44,%ecx + rep + movsl + popl %ecx + popl %esi + popl %edi + +/* start_emulating(); */ + smsw %ax + orb $8,%al + lmsw %ax +/* npxproc = NULL; */ + movl $0,_npxproc +/* } */ +L10: + movl %ebp,%esp + popl %ebp + + ALIGN_TEXT +fastmove_tail: + movb %cl,%al + shrl $2,%ecx /* copy longword-wise */ + cld + rep + movsl + movb %al,%cl + andb $3,%cl /* copy remaining bytes */ + rep + movsb + + ret +#endif /* I586_FAST_BCOPY */ /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory Index: trap.c =================================================================== RCS file: /usr/cvs/src/sys/i386/i386/trap.c,v retrieving revision 1.76 diff -u -r1.76 trap.c --- trap.c 1996/05/18 03:36:19 1.76 +++ trap.c 1996/05/18 11:23:39 @@ -319,6 +319,14 @@ (void) trap_pfault(&frame, FALSE); return; + case T_DNA: +#if NNPX > 0 + /* if a transparent fault (due to context switch "late") */ + if (npxdna()) + return; +#endif /* NNPX > 0 */ + break; + case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ /* === As it is, it will check the copy size but won't call fastmove. When you take out this line: jmp slow_bcopy /* XXX take this out and see it crash */ it should crash with fireworks on the screen right after the kernel load. Satoshi