From owner-freebsd-hackers Sat Aug 7 13:58:21 1999 Delivered-To: freebsd-hackers@freebsd.org Received: from fep4.post.tele.dk (fep4.post.tele.dk [195.41.46.139]) by hub.freebsd.org (Postfix) with ESMTP id BAA6F14CFC for ; Sat, 7 Aug 1999 13:58:09 -0700 (PDT) (envelope-from peter@holm.cc) Received: from holm.cc ([195.249.209.182]) by fep4.post.tele.dk (InterMail v4.0 201-221) with ESMTP id <19990807205636.WJLC1485.fep4@holm.cc> for ; Sat, 7 Aug 1999 22:56:36 +0200 Message-ID: <37AC9E3B.49CD3DA6@holm.cc> Date: Sat, 07 Aug 1999 22:59:40 +0200 From: Peter Holm X-Mailer: Mozilla 4.51 [en] (Win95; I) X-Accept-Language: en MIME-Version: 1.0 To: Freebsd-hackers@freebsd.org Subject: NFS V3 and mkdir bug Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Sender: owner-freebsd-hackers@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.ORG I have a test program that will fail under nfs v3, but not under v2. The same test program works fine under ufs. The error involves mkdir() and heavy nfs load. I have two different situations that fails: 1) mkdir() followed by stat(), where the stat() fails consistently with ENOENT 2) A sequence of mkdir() followed by a sequence of rmdir(). Some of the rmdir() will fail, but if repeated eventually succeed. Here's some of my findings: $ ps auxww | grep dir2 root 316 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.27 ./dir2 210 root 319 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.22 ./dir2 210 root 322 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.18 ./dir2 210 root 325 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.26 ./dir2 210 root 331 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.28 ./dir2 210 root 334 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.19 ./dir2 210 root 337 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.30 ./dir2 210 root 340 0.0 1.0 1524 1292 p0 D 8:59pm -2341055:-5.36 ./dir2 210 root 343 0.0 1.0 1524 1292 p0 D 8:59pm 0:01.25 ./dir2 210 root 346 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.18 ./dir2 210 root 349 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.26 ./dir2 210 root 352 0.0 1.0 1524 1228 p0 D 8:59pm 11:36.72 ./dir2 210 root 355 0.0 1.0 1524 1292 p0 D 8:59pm 0:01.22 ./dir2 210 root 358 0.0 1.0 1524 1228 p0 D 8:59pm 0:01.32 ./dir2 210 console: calcru: negative time of -693963490 usec for pid 340 (dir2) mkdir. p00340.d0210/d209/d208/d207/d206/d205/d204/d203/d202/d201/d200/d199/d198/d197/d196/d195/d194/d193/d192/d191/d190/d189/d188/d187/d186/d185/d184/d183/d182/d181/d180/d179/d178/d177/d176/d175/d174/d173/d172/d171/d170/d169/d168/d167/d166/d165/d164/d163/d162/d161/d160/d159/d158/d157/d156/d155/d154/d153/d152/d151/d150/d149/d148/d147/d146/d145/d144/d143/d142/d141/d140/d139/d138/d137/d136/d135/d134/d133/d132/d131/d130/d129/d128/d127/d126/d125/d124/d123/d122/d121/d120/d119/d118/d117/d116/d115/d114/d113/d112/d111/d110/d109/d108/d107/d106/d105/d104/d103/d102/d101/d100/d99/d98/d97/d96/d95/d94/d93/d92/d91/d90/d89/d88/d87/d86/d85/d84/d83/d82/d81/d80/d79/d78/d77/d76/d75/d74/d73/d72/d71. No such file or directory (dir2.c:21) -- mkdir: error = 2 Debugger("mkdir") Stopped at Debugger+0x37: movl $0,in_Debugger (kgdb) up 9 #9 0xc0192a29 in mkdir (p=0xc7c034a0, uap=0xc86ecf80) at ../../kern/vfs_syscalls.c:2636 2636 Debugger("mkdir"); (kgdb) l 2631 2632 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); 2633 nd.ni_cnd.cn_flags |= WILLBEDIR; 2634 if ((error = namei(&nd)) != 0) { 2635 printf("mkdir: error = %d\n", error); /* PHO */ 2636 Debugger("mkdir"); 2637 return (error); 2638 } 2639 vp = nd.ni_vp; 2640 if (vp != NULL) { (kgdb) p error $1 = 0 (kgdb) p nd $2 = { ni_dirp = 0xbfba05a4 "p00292.d0210/d209/d208/d207/d206/d205/d204/d203/d202/d201/d200/d199/d198/d197/d196/d195/d194/d193/d192/d191/d190/d189/d188/d187/d186/d185/d184/d183/d182/d181/d180/d179/d178/d177/d176/d175/d174/d173/d1"..., ni_segflg = UIO_USERSPACE, ni_startdir = 0x0, ni_rootdir = 0xc7c01e00, ni_vp = 0x0, ni_dvp = 0xc8627180, ni_pathlen = 6, ni_next = 0xc7c165d8 "/d117", ni_loopcnt = 0, ni_cnd = {cn_nameiop = 1, cn_flags = 540680, cn_proc = 0xc7c034a0, cn_cred = 0xc0a72400, cn_pnbuf = 0xc7c16400 "", cn_nameptr = 0xc7c165d4 "d118/d117", cn_namelen = 4, cn_hash = 254, cn_consume = 0}} (kgdb) Here's the test program: /* $Id$ */ #include #include #include #include #include #include #include void error(char *op, char* arg, char* file, int line) { fprintf(stderr,"%s. %s. %s (%s:%d)\n", op, arg, sys_errlist[errno], file, line); } void mkDir(char *path, int level) { char newPath[4096]; while (mkdir(path, 0770) == -1) { error("mkdir", path, __FILE__, __LINE__); sleep(1); } if (level > 1) { sprintf(newPath,"%s/d%d", path, level-1); mkDir(newPath, level-1); } } void rmDir(char *path, int level) { char newPath[4096]; if (level > 1) { sprintf(newPath,"%s/d%d", path, level-1); rmDir(newPath, level-1); } if (rmdir(path) == -1) { error("rmdir", path, __FILE__, __LINE__); exit(2); } } int main(int argc, char **argv) { int levels, pid; char path[128]; if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } levels = 10; sscanf(argv[1], "%d", &levels); if (levels > 210) levels = 210; pid = getpid(); umask(0); sprintf(path,"p%05d.d%04d", pid, levels); mkDir(path, levels); rmDir(path, levels); return 0; } And finally I also had a single crash: current# gdb -k -s kernel.debug -e /var/crash/kernel.6 -c /var/crash/vmcore.6 IdlePTD 3932160 initial pcb at 33cfc0 panicstr: ffs_valloc: dup alloc panic messages: --- panic: ffs_valloc: dup alloc --- #0 boot (howto=256) at ../../kern/kern_shutdown.c:291 291 dumppcb.pcb_cr3 = rcr3(); (kgdb) bt #0 boot (howto=256) at ../../kern/kern_shutdown.c:291 #1 0xc016710d in panic (fmt=0xc02f69c1 "ffs_valloc: dup alloc") at ../../kern/kern_shutdown.c:505 #2 0xc0224103 in ffs_valloc (pvp=0xc8744a80, mode=16888, cred=0xc0b94384, vpp=0xc85d8a04) at ../../ufs/ffs/ffs_alloc.c:605 #3 0xc0236353 in ufs_mkdir (ap=0xc85d8bc4) at ../../ufs/ufs/ufs_vnops.c:1307 #4 0xc02374a1 in ufs_vnoperate (ap=0xc85d8bc4) at ../../ufs/ufs/ufs_vnops.c:2316 #5 0xc01cc26d in nfsrv_mkdir (nfsd=0xc0b94300, slp=0xc09e4600, procp=0xc7c05de0, mrq=0xc85d8dc4) at vnode_if.h:611 #6 0xc01da76e in nfssvc_nfsd (nsd=0xc85d8e80, argp=0x8071bc0 "", p=0xc7c05de0) at ../../nfs/nfs_syscalls.c:650 #7 0xc01da08f in nfssvc (p=0xc7c05de0, uap=0xc85d8f80) at ../../nfs/nfs_syscalls.c:346 #8 0xc026d496 in syscall (frame={tf_fs = 47, tf_es = 47, tf_ds = 47, tf_edi = 4, tf_esi = 1, tf_ebp = -1077944892, tf_isp = -933392428, tf_ebx = 0, tf_edx = -1077944336, tf_ecx = 0, tf_eax = 155, tf_trapno = 12, tf_err = 2, tf_eip = 134517008, tf_cs = 31, tf_eflags = 646, tf_esp = -1077945284, tf_ss = 47}) at ../../i386/i386/trap.c:1056 #9 0xc025e526 in Xint0x80_syscall () #10 0x80480e9 in ?? () (kgdb) quit current# exit Any suggestions as where to investigate? Regards -- Peter Holm | mailto:peter@holm.cc | http://login.dknet.dk/~pho/ To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message