Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 02 Apr 2001 09:12:53 -0700
From:      Peter Wemm <peter@netplex.com.au>
To:        "Michael C . Wu" <keichii@peorth.iteration.net>
Cc:        hackers@FreeBSD.ORG, fs@FreeBSD.ORG
Subject:   Re: panic in FFS and other related I/O problems 
Message-ID:  <200104021612.f32GCrh12225@mobile.wemm.org>
In-Reply-To: <20010402102744.A69438@peorth.iteration.net> 

next in thread | previous in thread | raw e-mail | index | archive | help
"Michael C . Wu" wrote:
> Hi Everyone,
> 
> This is the famous bbs with high loads.  We upgraded to 4.3-RC
> recently.
> 
> The user level we have at this point is not that much compared
> to before.  2000 users is what we can usually take on without
> problems.  Now we panic() 20 times a day.
> 
> FreeBSD zoo.ee.ntu.edu.tw 4.3-RC FreeBSD 4.3-RC #0: Tue Apr
>                  3 07:14:31 CST 2001
> 
> I am thinking that this is either a bug is FFS I/O operations
> or our own bbs having race conditions.  However, the bbs
> has run fine without problem before.
> 
> -------------------------------------------------------------------
> 2001/04/02 22:58 options VM_KMEM_SIZE_MAX="(300*1024*1024)"(200->300)
> -------------------------------------------------------------------
> 2001/04/02 22:43 run-time panic
> uptime:10:43¤U¤È  up  1:40, 6 users, load averages: 3.69, 2.99, 2.61
> bbs:Number of Users:2160
> panic: pipeinit: cannot allocate pipe -- out of kvm -- code = 3

You have used up you 1G of KVM.  I suggest you reconfigure the KVM on your
boxes so that you double it.  This will change you from 1G kernel, 3G user
to 2G each.

Suggested patch:
Index: conf/ldscript.i386
===================================================================
RCS file: /home/ncvs/src/sys/conf/ldscript.i386,v
retrieving revision 1.4
diff -u -r1.4 ldscript.i386
--- conf/ldscript.i386	2000/01/11 15:35:16	1.4
+++ conf/ldscript.i386	2001/04/02 16:07:18
@@ -6,7 +6,7 @@
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
-  . = 0xc0100000 + SIZEOF_HEADERS;
+  . = 0x80100000 + SIZEOF_HEADERS;
   .interp     : { *(.interp) 	}
   .hash          : { *(.hash)		}
   .dynsym        : { *(.dynsym)		}
Index: i386/include/pmap.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/pmap.h,v
retrieving revision 1.70
diff -u -r1.70 pmap.h
--- i386/include/pmap.h	2000/11/30 01:53:02	1.70
+++ i386/include/pmap.h	2001/04/02 16:07:18
@@ -92,9 +92,9 @@
 #endif
 #ifndef NKPDE
 #ifdef SMP
-#define NKPDE			254	/* addressable number of page tables/pde's */
+#define NKPDE			510	/* addressable number of page tables/pde's */
 #else
-#define NKPDE			255	/* addressable number of page tables/pde's */
+#define NKPDE			511	/* addressable number of page tables/pde's */
 #endif	/* SMP */
 #endif
 
You have basically raised so many limits that you have run out of space
for kernel allocations.
root@overcee[9:09am]/home/src/sys/compile/OVERCEE-103# gdb -k kernel.debug /dev/mem
...
(kgdb) print /x kernel_map.size
$8 = 0x1a658000

With a default start addr of 0xc0100000, there is nearly 0x4000000
available. My machine here is using just less than half of it.  Yours is
probably almost full.


> mp_lock = 01000001; cpuid = 1; lapic.id = 00000000
> Debugger("panic")
> Stopped at      Debugger+0x35:  movb    $0,in_Debugger.597
> db> tr
> Debugger(c0249bf2) at Debugger+0x35
> panic(c024b3a0,3,facac440,fe726e7c,c015eaa2) at panic+0xa4
> pipespace(facac440) at pipespace+0x58
> pipe_write(cacf2540,fe726ed4,ca506300,0,fe55e040) at pipe_write+0x192
> dofilewrite(fe55e040,cacf2540,8,80f7000,19fe) at dofilewrite+0xbe
> write(fe55e040,fe726f80,2823f0c8,28251008,80f7000) at write+0x3b
> syscall2(2f,2f,2f,80f7000,28251008) at syscall2+0x221
> Xint0x80_syscall() at Xint0x80_syscall+0x2b
> -------------------------------------------------------------------
> 2001/04/02 20:52 PMAP_SHPGPERPROC removed from kernel
>                  /bin/mv /etc/vntab.orig /etc/vntab, Add more swap
> -------------------------------------------------------------------
> 2001/04/02 20:30 run-time panic
> Fatal trap 12: page fault while in kernel mode
> mp_lock = 00000002; cpuid = 0; lapic.id = 01000000
> fault virtual address   = 0x0
> fault code              = supervisor write, page not present
> instruction pointer     = 0x8:0xc022fb2b
> stack pointer           = 0x10:0xfd9d1bcc
> frame pointer           = 0x10:0xfd9d1bfc
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
> processor eflags        = interrupt enabled, resume, IOPL = 0
> current process         = 4118 (bbsd)
> interrupt mask          = bio  <- SMP: XXX
> kernel: type 12 trap, code=0
> Stopped at      generic_bzero+0xf:      repe stosl      %es:(%edi)
>                                                                              
       
> db> tr
> generic_bzero(1,c755fe00,c7243400,fd9d1c44,100) at generic_bzero+0xf
> ffs_vget(c755fe00,5e3f84,fd9d1cb8,0,fbe61 f40) at ffs_vget+0xa4
> ufs_lookup(fd9d1d10,fd9d1d24,c01774ca,fd9d1d10,fbe61f40) at ufs_lookup+0x9c7
> ufs_vnoperate(fd9d1d10,fbe61f40,ff27941d,fd9d1ef0,fbe62000) at ufs_vnoperate+
    0x1
> 5
> vfs_cache_lookup(fd9d1d68,fd9d1d78,c017a338,fd9d1d68,ef8c1d00) at vfs_cache_l
    ook
> up+0x28a
> ufs_vnoperate(fd9d1d68,ef8c1d00,fd9d1ef0,fd9d1ec8,fd9b7ac0) at ufs_vnoperate+
    0x1
> 5
> lookup(fd9d1ec8,0,fd9d1ec8,fd9d1f80,fd9b7ac0) at lookup+0x290
> namei(fd9d1ec8,0,ca3ed580,fd9d1f80,fd9d1df8) at namei+0x147
> vn_open(fd9d1ec8,1,1a4,3,fd9b7ac0) at vn_open+0x1cd
> open(fd9b7ac0,fd9d1f80,281960c8,281a7f00,4) at open+0xb8
> syscall2(2f,2f,bfbf002f,4,281a7f00) at syscall2+0x221
> Xint0x80_syscall() at Xint0x80_syscall+0x2b
> db> call boot(0)
> boot() called on cpu#0
> Waiting (max 60 seconds) for system process `bufdaemon' to stop...
>                                                                              
       
> Fatal trap 12: page fault while in kernel mode
> mp_lock = 00000002; cpuid = 0; lapic.id = 01000000
> fault virtual address   = 0x0
> fault code              = supervisor write, page not present
> instruction pointer     = 0x8:0xc02082b3
> stack pointer           = 0x10:0xfe80adec
> frame pointer           = 0x10:0xfe80adf8
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
> processor eflags        = interrupt enabled, resume, IOPL = 0
> current process         = 7749 (bbsd)
> interrupt mask          = net tty bio cam  <- SMP: XXX
kernel: type 12 trap, code=0
> Stopped at      generic_bzero+0xf:      repe stosl      %es:(%edi)
> ---------------------------------------------------------------
> 2000/04/02 08:29 boot panic => lowered PMAP_SHPGPERPROC to 300
> panic: swap_pager_swap_init: swap_zone == NULL
> mp_lock = 00000001; cpuid = 0; lapic.id = 01000000
> Debugger("panic")
> Stopped at      Debugger+0x35:  movb    $0,in_Debugger.597
> -------------------------------------------------------------------
> 2000/04/02 08:24 change kernel again
> options         MAXFILES=30000
> options         NMBCLUSTERS=16000(making this smaller)
> options         PMAP_SHPGPERPROC=720(back to our setup that was stable before
    )
> -------------------------------------------------------------------
> 2001/04/02 08:08AM run-time panic
> _SimFarm_[root]:/bighead#vim dump1.pl   
> panic: ffs_valloc: dup alloc
> mp_lock = 01000001; cpuid = 1; lapic.id = 00000000
> Debugger("panic")
> Stopped at      Debugger+0x35:  movb    $0,in_Debugger.597
> db> tr
> Debugger(c0249b12) at Debugger+0x35
> panic(c0257501,c02574e0,43ff,45,c847e8d4) at panic+0xa4
> ffs_valloc(f382da40,8180,c8577700,f3b13ca0,f3b13dfc) at ffs_valloc+0xf9
> ufs_makeinode(8180,f382da40,f3b13edc,f3b13ef0) at ufs_makeinode+0x5e
> ufs_create(f3b13dfc,f3b13e70,c0182ba4,f3b13dfc,0) at ufs_create+0x2b
> ufs_vnoperate(f3b13dfc,0,cdcf85c0,f3b13f80,f3a02a2c) at ufs_vnoperate+0x15
> vn_open(f3b13ec8,a03,180,3,f3a7e8a0) at vn_open+0x10c
> open(f3a7e8a0,f3b13f80,8123090,ffffffff,812c4de) at open+0xb8
> syscall2(2f,2f,2f,812c4de,ffffffff) at syscall2+0x221
> Xint0x80_syscall() at Xint0x80_syscall+0x2b
> db>
> -------------------------------------------------------------------
> 2001/04/01 21:09 run-time panic(dont remember the setup at this point, 
> irrecoverable)·
> Fatal trap 12: page fault while in kernel mode
> mp_lock = 01000002; cpuid = 1; lapic.id = 00000000
> fault virtual address   = 0x0
> fault code              = supervisor write, page not present
> instruction pointer     = 0x8:0xc022fb2b
> stack pointer           = 0x10:0xff7d8bcc
> frame pointer           = 0x10:0xff7d8bfc
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, def32 1, gran 1
> processor eflags        = interrupt enabled, resume, IOP
>      L = 0
> current process         = 1601 (bbsd)
> interrupt mask          = bio  <- SMP: XXX
> kernel: type 12 trap, code=0
> Stopped at      generic_bzero+0xf:      repe stosl
> %es:(%edi)
> db> tr
> generic_bzero(1,c7b3ee00,c7822400,ff7d8c44,100) at generic_bzero+0xf
> ffs_vget(c7b3ee00,13a676,ff7d8cb8,0,f2283000) at ffs_vget+0xa4
> ufs_lookup(ff7d8d10,ff7d8d24,c01774ca,ff7d8d10,f2283000) at ufs_lookup+0x9c7
> ufs_vnoperate(ff7d8d10,f2283000,ff6c7c16,ff7d8ef0,fec9e480) at ufs_vnoperate+
    0x1
> 5
> -------------------------------------------------------------------
> 
> 
> -- 
> +-----------------------------------------------------------+
> | keichii@iteration.net         | keichii@freebsd.org       |
> | http://iteration.net/~keichii | Yes, BSD is a conspiracy. |
> +-----------------------------------------------------------+
> 
> To Unsubscribe: send mail to majordomo@FreeBSD.org
> with "unsubscribe freebsd-fs" in the body of the message
> 

Cheers,
-Peter
--
Peter Wemm - peter@FreeBSD.org; peter@yahoo-inc.com; peter@netplex.com.au
"All of this is for nothing if we don't go to the stars" - JMS/B5


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200104021612.f32GCrh12225>