From owner-freebsd-current@FreeBSD.ORG Sun Feb 1 15:53:24 2004 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 7532816A4CE for ; Sun, 1 Feb 2004 15:53:24 -0800 (PST) Received: from alpha.siliconlandmark.com (alpha.siliconlandmark.com [209.69.98.4]) by mx1.FreeBSD.org (Postfix) with ESMTP id E4F5A43D1F for ; Sun, 1 Feb 2004 15:51:42 -0800 (PST) (envelope-from andy@siliconlandmark.com) Received: from alpha.siliconlandmark.com (localhost [127.0.0.1]) i11NpYZV042578 for ; Sun, 1 Feb 2004 18:51:34 -0500 (EST) (envelope-from andy@siliconlandmark.com) Received: from localhost (andy@localhost)i11NpXsf042575 for ; Sun, 1 Feb 2004 18:51:34 -0500 (EST) (envelope-from andy@siliconlandmark.com) X-Authentication-Warning: alpha.siliconlandmark.com: andy owned process doing -bs Date: Sun, 1 Feb 2004 18:51:33 -0500 (EST) From: Andre Guibert de Bruet To: current@freebsd.org Message-ID: <20040201182824.S10185@alpha.siliconlandmark.com> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII X-MailScanner-Information: Please contact the ISP for more information X-MailScanner: Found to be clean Subject: dump/mksnap_ffs - ufs2 deadlockss X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 01 Feb 2004 23:53:24 -0000 Hi, I guess I'm running into a string of problems with ufs2 across my systems. This deadlock is 100% reproduceable across all of my systems with Adaptec and LSI RAID controllers. I tried to dump a live ufs2 system using: /sbin/dump -0uaL -f "/usr/backups/`hostname -s`-raid5.dump" /mnt/amrd0a and I get a mksnap_ffs process in the following state after about a minute of moderate disk activity: 18312 root -4 0 1196K 744K ufs 1 0:04 0.00% 0.00% mksnap_ffs db> tr 18312 sched_switch(696782a0,2,6071f99a,1d2,2846bc66) at 0x60571e85 mi_switch(1,50,6071f99a,ca,1) at 0x605657eb msleep(712fa5c0,60799868,50,6072a3b0,0) at 0x60564f67 acquire(b4b4876c,1000040,600,e7,69678310) at 0x605503ae lockmgr(712fa5c0,1010002,712fa514,696782a0,b4b48788) at 0x60550898 vop_stdlock(b4b487b0,b4b48794,606838b8,b4b487b0,b4b487cc) at 0x605af8cc vop_defaultop(b4b487b0,b4b487cc,605c5162,b4b487b0,b4b487cc) at 0x605af728 ufs_vnoperate(b4b487b0,b4b487cc,605536ba,695e084c,1) at 0x606838b8 vn_lock(712fa514,10002,696782a0,1a3,64f59200) at 0x605c5162 ffs_snapshot(695e0800,5fbfee0d,70,b4b48ad0,60585f53) at 0x60662a44 ffs_mount(695e0800,6a1e7880,5fbfe688,b4b48bec,696782a0) at 0x60672328 vfs_domount(696782a0,6c40fe50,6a1e7880,1211100,5fbfe688) at 0x605b3ab0 mount(696782a0,b4b48d14,6073960f,3e8,4) at 0x605b32a0 syscall(2f,2f,2f,5fbfe8d8,5fbfee01) at 0x606d7c02 Xint0x80_syscall() at 0x606c423d --- syscall (21, FreeBSD ELF32, mount), eip = 0x280bd72f, esp = 0x5fbfe60c, ebp= 0x5fbfecf0 --- db> ps pid proc uarea uid ppid pgrp flag stat wmesg wchan cmd 18323 6967b1b8 b4b6d000 80 17792 17792 0000100 [SLP]lockf 0x736a8640] httpd 18312 69677898 b4b68000 0 18311 18308 0004002 [SLP]ufs 0x712fa5c0] mksnap_ffs 18311 6967b528 b4b6f000 0 18308 18308 0004002 [SLP]wait 0x6967b528] sh 18308 6968e370 b4b86000 0 17948 18308 0004002 [SLP]wait 0x6968e370] dump 18270 6968e6e0 b4bac000 501 18260 18270 0004002 [CV]select 0x607c47e4] ssh2 18260 6967b6e0 b4b70000 501 17909 18260 0004002 [SLP]wait 0x6967b6e0] bash 17986 6a63b6e0 b4c38000 501 17908 17986 0004002 [SLP]ttyin 0x6a512810] bash 17948 69677c08 b4b6a000 0 17907 17948 0004002 [SLP]pause 0x69155000] csh 17927 6fbc21b8 b4d14000 0 17906 17927 0004002 [SLP]ttyin 0x68ede610] csh 17919 690f1a50 b2950000 0 17905 17919 0004002 [SLP]ttyin 0x68feba10] csh 17912 6968e000 b4b84000 0 1 17912 0004002 [SLP]ttyin 0x695d8810] getty 17911 69fdb000 b4bcf000 0 1 17911 0004002 [SLP]ttyin 0x695e6c10] getty 17910 6916b000 b4cb9000 0 1 17910 0004002 [SLP]ttyin 0x695e7210] getty 17909 6a63da50 b4c8b000 0 1 17909 0004102 [SLP]wait 0x6a63da50] login 17908 6916b6e0 b4cbd000 0 1 17908 0004102 [SLP]wait 0x6916b6e0] login 17907 6968ea50 b4bae000 0 1 17907 0004102 [SLP]wait 0x6968ea50] login 17906 6968adc0 b4b83000 0 1 17906 0004102 [SLP]wait 0x6968adc0] login 17905 6968a1b8 b4b7c000 0 1 17905 0004102 [SLP]wait 0x6968a1b8] login 17873 6916da50 b4d10000 0 1 17873 0000000 [CV]select 0x607c47e4] moused 17846 6a63ddc0 b4c8d000 0 1 17846 0000000 [CV]select 0x607c47e4] sshd2 17832 68eaf1b8 b08c0000 25 1 17832 0000100 [SLP]pause 0x6965a000] sendmail 17828 6fbc2000 b4d13000 0 1 17828 0000100 [SLP]pause 0x6914c000] sendmail 17824 6fbc2a50 b4d19000 0 1 17824 0000100 [CV]select 0x607c47e4] sendmail 17822 69fdb1b8 b4c18000 80 17792 17792 0000100 [SLP]lockf 0x6a5cf1c0] httpd 17821 6a63d6e0 b4c89000 80 17792 17792 0000100 [SLP]lockf 0x6a5e4ac0] httpd 17820 6a63b000 b4c34000 80 17792 17792 0000100 [CV]select 0x607c47e4] httpd 17819 69fdb898 b4c1c000 80 17792 17792 0000100 [SLP]suspfs 0x695e0874] httpd 17818 6916b528 b4cbc000 80 17792 17792 0000100 [SLP]lockf 0x6a5dc880] httpd 17792 6916b1b8 b4cba000 0 1 17792 0000000 [CV]select 0x607c47e4] httpd 17773 6968a6e0 b4b7f000 0 1 17773 0000000 [SLP]nanslp 0x6079b37c] cron 17758 6967ba50 b4b72000 25 1 17758 0000100 [SLP]pause 0x6965c000] sendmail 17752 6916b898 b4cbe000 0 1 17752 0000100 [CV]select 0x607c47e4] sendmail 17687 6a63ba50 b4c3a000 0 1 17687 0000000 [CV]select 0x607c47e4] usbd 17666 6916d898 b4cc7000 0 17662 17662 0000000 [SLP]ufs 0x712fa5c0] nfsd 17665 6a63d528 b4c88000 0 17662 17662 0000000 [SLP]- 0x695e7400] nfsd 17664 6a63bdc0 b4c3c000 0 17662 17662 0000000 [SLP]- 0x6e14f000] nfsd 17663 69fdb370 b4c19000 0 17662 17662 0000000 [SLP]suspfs 0x695e0874] nfsd 17662 6916d1b8 b4cc3000 0 1 17662 0000000 [CV]select 0x607c47e4] nfsd 17652 6a63d1b8 b4c3e000 0 1 17652 0000000 [CV]select 0x607c47e4] mountd 17561 6916bdc0 b4cc1000 53 1 17561 0000100 [CV]select 0x607c47e4] named 17548 6968a898 b4b80000 0 1 17548 0000000 [CV]select 0x607c47e4] syslogd 58 68eaf528 b08c2000 0 0 0 0000204 [SLP]- 0xb08a4d0c] schedcpu 57 68eaf6e0 b08c3000 0 0 0 0000204 [SLP]- 0x607cb90c] nfsiod 3 56 68eaf898 b08c4000 0 0 0 0000204 [SLP]- 0x607cb908] nfsiod 2 55 68eafa50 b08c5000 0 0 0 0000204 [SLP]- 0x607cb904] nfsiod 1 54 68eafc08 b08c6000 0 0 0 0000204 [SLP]- 0x607cb900] nfsiod 0 53 68eafdc0 b08c7000 0 0 0 0000204 [SLP]syncer 0x6079ad40] syncer 52 690f1000 b2902000 0 0 0 0000204 [SLP]vlruwt 0x690f1000] vnlru 51 690f11b8 b2903000 0 0 0 0000204 [SLP]psleep 0x607c4c8c] bufdaemon 50 690f1370 b2904000 0 0 0 000020c [SLP]pgzero 0x607d2088] pagezero 49 690f1528 b2905000 0 0 0 0000204 [SLP]psleep 0x607d20e0] vmdaemon 9 690f16e0 b2906000 0 0 0 0000204 [SLP]psleep 0x607d20cc] pagedaemon 48 690f1898 b294f000 0 0 0 0000204 [IWAIT] swi0: tty:sio 47 68e5ba50 b088f000 0 0 0 0000204 [SLP]usbtsk 0x60793384] usbtask 46 68e5bc08 b0890000 0 0 0 0000204 [SLP]usbevt 0x68fce210] usb0 8 68e5bdc0 b0891000 0 0 0 0000204 [SLP]actask 0x608cf64c] acpi_task2 7 68ead000 b0892000 0 0 0 0000204 [SLP]actask 0x608cf64c] acpi_task1 6 68ead1b8 b0893000 0 0 0 0000204 [SLP]actask 0x608cf64c] acpi_task0 45 68ead370 b0894000 0 0 0 0000204 [IWAIT] swi6:+ 44 68ead528 b0895000 0 0 0 0000204 [IWAIT] swi7: task queue 43 68ead6e0 b0896000 0 0 0 0000204 [IWAIT] swi7: acpitaskq 42 68ead898 b0897000 0 0 0 0000204 [IWAIT] swi3: cambio 41 68eada50 b0898000 0 0 0 0000204 new [IWAIT] swi2: camnet 40 68eadc08 b08bd000 0 0 0 0000204 new [IWAIT] swi5:+ 5 68eaddc0 b08be000 0 0 0 0000204 [SLP]tqthr 0x6079e848] taskqueue 39 68eaf000 b08bf000 0 0 0 0000204 [SLP]- 0x60790120] random 4 68e52528 b085f000 0 0 0 0000204 [SLP]- 0x607959e0] g_down 3 68e526e0 b0860000 0 0 0 0000204 [SLP]- 0x607959dc] g_up 2 68e52898 b0861000 0 0 0 0000204 [SLP]- 0x607959d4] g_event 38 68e52a50 b0862000 0 0 0 0000204 [IWAIT] swi1: net 37 68e52c08 b0863000 0 0 0 0000204 new [IWAIT] swi4: vm 36 68e52dc0 b0864000 0 0 0 000020c [IWAIT] swi8: tty:sio clock 35 68e5b000 b0865000 0 0 0 0000204 new [IWAIT] irq0: clk 34 68e5b1b8 b088a000 0 0 0 0000204 new [IWAIT] irq23: 33 68e5b370 b088b000 0 0 0 0000204 new [IWAIT] irq22: 32 68e5b528 b088c000 0 0 0 0000204 [IWAIT] irq21: amr0 31 68e5b6e0 b088d000 0 0 0 0000204 new [IWAIT] irq20: 30 68e5b898 b088e000 0 0 0 0000204 [IWAIT] irq19: fwohci1+ 29 64f661b8 aee2a000 0 0 0 0000204 [IWAIT] irq18: rl0 28 64f66370 aee2b000 0 0 0 0000204 [IWAIT] irq17: atapci1 pcm0 27 64f66528 aee2c000 0 0 0 0000204 [IWAIT] irq16: fwohci0 26 64f666e0 aee2d000 0 0 0 0000204 [IWAIT] irq15: ata1 25 64f66898 aee52000 0 0 0 0000204 [IWAIT] irq14: ata0 24 64f66a50 aee53000 0 0 0 0000204 new [IWAIT] irq13: 23 64f66c08 aee54000 0 0 0 0000204 new [IWAIT] irq12: 22 64f66dc0 aee55000 0 0 0 0000204 new [IWAIT] irq11: 21 68e52000 b085c000 0 0 0 0000204 new [IWAIT] irq10: 20 68e521b8 b085d000 0 0 0 0000204 new [IWAIT] irq9: acpi0 19 68e52370 b085e000 0 0 0 0000204 new [IWAIT] irq8: rtc 18 64f5d000 aedd8000 0 0 0 0000204 new [IWAIT] irq7: ppc0 17 64f5d1b8 aee21000 0 0 0 0000204 new [IWAIT] irq6: 16 64f5d370 aee22000 0 0 0 0000204 new [IWAIT] irq5: 15 64f5d528 aee23000 0 0 0 0000204 new [IWAIT] irq4: sio0 14 64f5d6e0 aee24000 0 0 0 0000204 new [IWAIT] irq3: sio1 13 64f5d898 aee25000 0 0 0 0000204 [CPU 1] irq1: atkbd0 12 64f5da50 aee26000 0 0 0 000020c [CPU 0] idle: cpu0 11 64f5dc08 aee27000 0 0 0 000020c [Can run] idle: cpu1 1 64f5ddc0 aee28000 0 0 1 0004200 [SLP]wait 0x64f5ddc0] init 10 64f66000 aee29000 0 0 0 0000204 [CV]ktrace 0x60798f64] ktrace 0 60795ae0 60c1f000 0 0 0 0000200 [SLP]sched 0x60795ae0] swapper db> tr 17666 sched_switch(6916f2a0,2,6071f99a,1d2,709f2be6) at 0x60571e85 mi_switch(1,50,6071f99a,ca,2) at 0x605657eb msleep(712fa5c0,60799868,50,6072a3b0,0) at 0x60564f67 acquire(b4cf4944,1000040,600,e7,6079fd00) at 0x605503ae lockmgr(712fa5c0,1010002,712fa514,6916f2a0,b4cf4960) at 0x60550898 vop_stdlock(b4cf4988,b4cf496c,606838b8,b4cf4988,b4cf49a4) at 0x605af8cc vop_defaultop(b4cf4988,b4cf49a4,605c5162,b4cf4988,b4cf499c) at 0x605af728 ufs_vnoperate(b4cf4988,b4cf499c,60584efe,607c46d8,712fa514) at 0x606838b8 vn_lock(712fa514,10002,6916f2a0,607a0880,1163454) at 0x605c5162 vget(712fa514,10002,6916f2a0,81,6916f2a0) at 0x605b86db ufs_ihashget(69600000,1163454,2,b4cf4a8c,60726626) at 0x6067b242 ffs_vget(695e0800,1163454,2,b4cf4a8c,0) at 0x6067450e ufs_fhtovp(695e0800,b4cf4bdc,b4cf4b2c,695e0800,b4cf4af4) at 0x6067f327 ffs_fhtovp(695e0800,b4cf4bdc,b4cf4b2c,b4cf4ad4,6916f2a0) at 0x606749d7 nfsrv_fhtovp(b4cf4bd4,1,b4cf4b2c,7358e880,6967ea00) at 0x6064f96b nfsrv3_access(7358e800,6967ea00,6916f2a0,b4cf4c90,0) at 0x606411f9 nfssvc_nfsd(6916f2a0,0,6072ec7f,95,605535ea) at 0x6065094a nfssvc(6916f2a0,b4cf4d14,6073960f,3e8,2) at 0x6065032d syscall(2f,2f,2f,5fbfeec4,0) at 0x606d7c02 Xint0x80_syscall() at 0x606c423d --- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280bfd6f, esp = 0x5fbfeb2c, ebp = 0x5fbfeb48 --- db> show lockedvnods Locked vnodes 0x712fa514: tag ufs, type VREG, usecount 1, writecount 0, refcount 1, flags (VV_OBJBUF), lock type ufs: EXCL (count 1) by thread 0x69fdd690 (pid 17663) with 2 pending ino 18232404, on dev amrd0a (4, 30) 0x6ebfd820: tag ufs, type VREG, usecount 1, writecount 0, refcount 5222, lock type ufs: EXCL (count 1) by thread 0x696782a0 (pid 18312) ino 6, on dev amrd0a (4, 30) My guess is that inode 6 is the filesystem snapshot that dump produced. Inode 18232404 is a file in the ports collections that was being checked out on another machine. The system is current as of today. uname -a: FreeBSD bling.home 5.2-CURRENT FreeBSD 5.2-CURRENT #1: Sun Feb 1 13:30:42 EST 2004 root@bling.home:/usr/src/sys/i386/compile/BLING i386 This looks strangely like the getblk() hangs that I have seen on this system but it's the first time that I've come across the ufs() state on the hung process(es). Any ideas? :-) > Andre Guibert de Bruet | Enterprise Software Consultant > > Silicon Landmark, LLC. | http://siliconlandmark.com/ >