Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Oct 2000 14:50:43 +0200
From:      Vadim Belman <voland@lflat.org>
To:        freebsd-hackers@freebsd.org
Subject:   NFS/VM deadlock report and help request
Message-ID:  <20001020145043.B73760@lflat.vas.mobilix.dk>

next in thread | raw e-mail | index | archive | help

--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

	I'm trying to locate a bug which causes a deadlock in VM subsystem
and would like to find some help here.

	First of all, I'd describe the situation which has revealed the
problem.

	We run a bunch of web servers providing free webpage service to our
customers. Back in July it was decided to upgrade the boxes due to security
issues with older kernels. Same time some other changes was done to the
internal network and some other stuff and now it's rather unclear whether
the upgrade is the cause, but thereafter we experience systematic httpd
hangups in uniterruptable waits ('D' status in ps output). Each hangup was
related to a webcam page with a image been updated each minute or so via
ftp (the way our customers update their pages).

	While trying to find a solution we tested thttpd instead of apache
with one single box serving both HTTP and FTP. It resulted in even more
regular hangups occuring approximately each hour perhaps due to
single-process nature of thttpd. Just after another hangup the box was
taken out of service and preserved in that state so that I was able to dig
into the kernel and see what's going on.

	Here is technical details I've got so far.

	The kernel config I supply as an attachment. Kernel-mode stack
trace for the thttpd process looks like this:

======================================================================
IdlePTD 2928640
initial pcb at 1f1cf000
panic messages:
---
---
#0  mi_switch () at ../../kern/kern_synch.c:858
858		if (switchtime.tv_sec == 0)
#0  mi_switch () at ../../kern/kern_synch.c:858
#1  0xc0151881 in tsleep (ident=0xc05c38d0, priority=4, 
    wmesg=0xc0233171 "vmopar", timo=0) at ../../kern/kern_synch.c:467
#2  0xc01e40ff in vm_object_page_remove (object=0xdeb81c60, start=2, end=8, 
    clean_only=0) at ../../vm/vm_page.h:546
#3  0xc01e8189 in vnode_pager_setsize (vp=0xdeb8ec80, nsize=8192)
    at ../../vm/vnode_pager.c:289
#4  0xc01a1cb7 in nfs_loadattrcache (vpp=0xdeb3ebec, mdp=0xdeb3ebf8, 
    dposp=0xdeb3ebfc, vaper=0x0) at ../../nfs/nfs_subs.c:1335
#5  0xc01a87e7 in nfs_readrpc (vp=0xdeb8ec80, uiop=0xdeb3ec60, cred=0xc4d49100)
    at ../../nfs/nfs_vnops.c:1102
#6  0xc019b219 in nfs_getpages (ap=0xdeb3ec98) at ../../nfs/nfs_bio.c:153
#7  0xc01e8736 in vnode_pager_getpages (object=0xdeb81c60, m=0xdeb3ed2c, 
    count=2, reqpage=0) at vnode_if.h:1089
#8  0xc01dd606 in vm_fault (map=0xdc3e7e80, vaddr=712876032, 
    fault_type=1 '\001', fault_flags=0) at ../../vm/vm_pager.h:130
#9  0xc0209266 in trap_pfault (frame=0xdeb3eddc, usermode=0, eva=712876032)
    at ../../i386/i386/trap.c:800
#10 0xc0208ecf in trap (frame={tf_fs = 134545424, tf_es = 16, 
      tf_ds = -599916528, tf_edi = -1054114628, tf_esi = 712876031, 
      tf_ebp = -558633400, tf_isp = -558633464, tf_ebx = 2048, 
      tf_edx = 712876867, tf_ecx = 209, tf_eax = -558641152, tf_trapno = 12, 
      tf_err = 0, tf_eip = -1071610959, tf_cs = 8, tf_eflags = 66054, 
      tf_esp = -558633252, tf_ss = -558633260}) at ../../i386/i386/trap.c:426
#11 0xc02083b1 in generic_copyin ()
#12 0xc0169c64 in sosend (so=0xdaea9780, addr=0x0, uio=0xdeb3eedc, top=0x0, 
    control=0x0, flags=0, p=0xdc3e45e0) at ../../kern/uipc_socket.c:567
#13 0xc015ef50 in soo_write (fp=0xc4eb0180, uio=0xdeb3eedc, cred=0xc4d49100, 
    flags=0, p=0xdc3e45e0) at ../../kern/sys_socket.c:78
#14 0xc015bc52 in dofilewrite (p=0xdc3e45e0, fp=0xc4eb0180, fd=76, 
    buf=0x2a7d9b43, nbyte=6797, offset=-1, flags=0) at ../../sys/file.h:159
#15 0xc015bb57 in write (p=0xdc3e45e0, uap=0xdeb3ef80)
    at ../../kern/sys_generic.c:298
#16 0xc02098a5 in syscall2 (frame={tf_fs = -1078001617, tf_es = -558694353, 
      tf_ds = 47, tf_edi = 136698368, tf_esi = 68, tf_ebp = -1077938184, 
      tf_isp = -558633004, tf_ebx = 0, tf_edx = 0, tf_ecx = 134826912, 
      tf_eax = 4, tf_trapno = 0, tf_err = 2, tf_eip = 672115896, tf_cs = 31, 
      tf_eflags = 514, tf_esp = -1077938244, tf_ss = 47})
    at ../../i386/i386/trap.c:1126
#17 0xc01fe4d6 in Xint0x80_syscall ()
#18 0x804a443 in ?? ()
#19 0x80499b5 in ?? ()
======================================================================

	Further investigation has shown that the hangup happend while
trying to release a page which is most likely locked by NFS subsystem. The
page belongs to the image file I mentioned before.

	Versions of the sources shown in the stack trace:

======================================================================
$FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.1 2000/05/16 06:58:12 dillon Exp $
$FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.1 2000/05/16 06:58:12 dillon Exp $
$FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.1 2000/05/16 06:58:12 dillon Exp $
$FreeBSD: src/sys/vm/vm_page.h,v 1.75 1999/12/29 04:55:10 peter Exp $
$FreeBSD: src/sys/vm/vnode_pager.c,v 1.116 1999/10/29 18:09:36 phk Exp $
$FreeBSD: src/sys/nfs/nfs_subs.c,v 1.90 2000/02/13 03:32:06 peter Exp $
$FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150 2000/01/05 00:32:18 dillon Exp $
$FreeBSD: src/sys/nfs/nfs_bio.c,v 1.83 2000/01/05 05:11:36 dillon Exp $
$FreeBSD: src/sys/vm/vm_pager.h,v 1.24.2.1 2000/03/27 21:34:45 dillon Exp $
$FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.1 2000/05/16 06:58:07 dillon Exp $
$FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.1 2000/05/16 06:58:07 dillon Exp $
$FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.2 2000/05/05 03:49:57 jlemon Exp $
$FreeBSD: src/sys/kern/sys_socket.c,v 1.28 1999/11/08 03:30:59 peter Exp $
$FreeBSD: src/sys/sys/file.h,v 1.22.2.4 2000/05/16 16:27:32 dillon Exp $
$FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.1 2000/05/13 19:28:13 dillon Exp $
$FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.1 2000/05/16 06:58:07 dillon Exp $
======================================================================

	I'm looking for some help with further analysis directions because
from this point I'm not good enough with kernel internals.

-- 
    /Voland			Vadim Belman
				E-mail: voland@lflat.org

--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="KERNEL-4.X"

machine		"i386"
cpu		"I686_CPU"
ident		"KERNEL-4-WWW"
maxusers	512

options		INET			
options		FFS			
options		FFS_ROOT		
options		NFS			
options		NFS_NOSERVER		
options		"CD9660"		
options		PROCFS			
options		"COMPAT_43"		
options		SCSI_DELAY=15000	
options		UCONSOLE		
options		USERCONFIG		
options		KTRACE		
options		SYSVSHM
options         SYSVSEM
options		SYSVMSG
options		P1003_1B		
options		_KPOSIX_PRIORITY_SCHEDULING
options		ICMP_BANDLIM		

options		NMBCLUSTERS=20000

options		QUOTA

options		MAXDSIZ="(512*1024*1024)"
options		DFLDSIZ="(256*1024*1024)"

options         INCLUDE_CONFIG_FILE     



device		isa
device		pci

device		fdc0	at isa? port IO_FD1 irq 6 drq 2
device		fd0	at fdc0 drive 0

device		ncr
device		ahc
device		sym
device		bt0	at isa?

device		scbus

device		da

device		pass

device		cd	

device		atkbdc0	at isa? port IO_KBD 
device		atkbd0	at atkbdc? irq 1

device		vga0	at isa?

pseudo-device	splash

device		sc0	at isa?
options		SC_HISTORY_SIZE=512	

device		npx0	at isa? port IO_NPX irq 13

device		sio0	at isa? port IO_COM1 flags 0x10 irq 4
device		sio1	at isa? port IO_COM2 irq 3

device		ppc0	at isa? irq 7
device		ppbus

device		plip
device		ppi

device 		de
device 		fxp

pseudo-device	loop
pseudo-device	ether
pseudo-device	tun
pseudo-device	pty	64


pseudo-device	bpf		


options NFS_DEBUG
makeoptions DEBUG=-g

--SUOF0GtieIMvvwua--


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20001020145043.B73760>