From owner-freebsd-net@FreeBSD.ORG Tue Feb 1 18:30:29 2011 Return-Path: Delivered-To: freebsd-net@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 0CF4D106566C; Tue, 1 Feb 2011 18:30:29 +0000 (UTC) (envelope-from egrosbein@rdtc.ru) Received: from eg.sd.rdtc.ru (eg.sd.rdtc.ru [62.231.161.221]) by mx1.freebsd.org (Postfix) with ESMTP id 3886B8FC0C; Tue, 1 Feb 2011 18:30:27 +0000 (UTC) Received: from eg.sd.rdtc.ru (localhost [127.0.0.1]) by eg.sd.rdtc.ru (8.14.4/8.14.4) with ESMTP id p11IUP9t063264; Wed, 2 Feb 2011 00:30:25 +0600 (NOVT) (envelope-from egrosbein@rdtc.ru) Message-ID: <4D48513C.40503@rdtc.ru> Date: Wed, 02 Feb 2011 00:30:20 +0600 From: Eugene Grosbein User-Agent: Mozilla/5.0 (X11; U; FreeBSD i386; ru-RU; rv:1.9.2.13) Gecko/20110112 Thunderbird/3.1.7 MIME-Version: 1.0 To: Julian Elischer References: <4D3011DB.9050900@frasunek.com> <4D30458D.30007@sentex.net> <4D309983.70709@rdtc.ru> <201101141437.55421.jhb@freebsd.org> <4D46575A.802@rdtc.ru> <4D4670C2.4050500@freebsd.org> In-Reply-To: <4D4670C2.4050500@freebsd.org> Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Cc: freebsd-net@freebsd.org, John Baldwin Subject: Re: panic: bufwrite: buffer is not busy??? X-BeenThere: freebsd-net@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Networking and TCP/IP with FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 01 Feb 2011 18:30:29 -0000 On 31.01.2011 14:20, Julian Elischer wrote: > replace with: > > 3504 if ((hook == NULL) || > 3505 NG_HOOK_NOT_VALID(hook) || > ((peer = NG_HOOK_PEER(hook)) == NULL) || > 3506 NG_HOOK_NOT_VALID(peer) || > ((peernode = NG_PEER_NODE(hook)) == NULL) || > 3507 NG_NODE_NOT_VALID(peernode)) { > if (peer) > kassert((peernode != NULL), ("peer node NULL wile peer hook exists")); > 3508 NG_FREE_ITEM(item); This day I have updated panicing router to RELENG_8 and combined changes supposed by Julian and Gleb. After 8 hours it has just paniced again and could not finish to write crashdump again: Fatal trap 12: page fault while in kernel mode cpuid = 3; apic id = 06 fault virtual address = 0x63 fault code = supervisor read data, page not present instruction pointer = 0x20:0xffffffff803d4ccd stack pointer = 0x28:0xffffff80ebffc600 frame pointer = 0x28:0xffffff80ebffc680 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 2390 (mpd5) trap number = 12 panic: page fault cpuid = 3 Uptime: 8h3m51s Dumping 4087 MB (3 chunks) chunk 0: 1MB (150 pages) ... ok chunk 1: 3575MB (915088 pages) 3559 3543panic: bufwrite: buffer is not busy??? cpuid = 3 Uptime: 8h3m52s Automatic reboot in 15 seconds - press a key on the console to abort # gdb kernel GNU gdb 6.1.1 [FreeBSD] Copyright 2004 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "amd64-marcel-freebsd"... (gdb) l *0xffffffff803d4ccd 0xffffffff803d4ccd is in ng_pppoe_disconnect (netgraph.h:191). 186 int line); 187 188 static __inline void 189 _chkhook(hook_p hook, char *file, int line) 190 { 191 if (hook->hk_magic != HK_MAGIC) { 192 printf("Accessing freed hook "); 193 dumphook(hook, file, line); 194 } 195 hook->lastline = line; (gdb) x/i 0xffffffff803d4ccd 0xffffffff803d4ccd : cmpl $0x78573011,0x64(%rbx) Here is a patch I've applied to the tree: --- sys/netgraph/ng_base.c.orig 2011-02-01 12:34:09.000000000 +0600 +++ sys/netgraph/ng_base.c 2011-02-01 12:00:17.000000000 +0600 @@ -1643,10 +1643,8 @@ node_p *destp, hook_p *lasthook) { char fullpath[NG_PATHSIZ]; - char *nodename, *path, pbuf[2]; + char *nodename, *path; node_p node, oldnode; - char *cp; - hook_p hook = NULL; /* Initialize */ if (destp == NULL) { @@ -1664,11 +1662,6 @@ TRAP_ERROR(); return EINVAL; } - if (path == NULL) { - pbuf[0] = '.'; /* Needs to be writable */ - pbuf[1] = '\0'; - path = pbuf; - } /* * For an absolute address, jump to the starting node. @@ -1690,41 +1683,41 @@ NG_NODE_REF(node); } + if (path == NULL) { + if (lasthook != NULL) + *lasthook = NULL; + *destp = node; + return (0); + } + /* * Now follow the sequence of hooks - * XXX - * We actually cannot guarantee that the sequence - * is not being demolished as we crawl along it - * without extra-ordinary locking etc. - * So this is a bit dodgy to say the least. - * We can probably hold up some things by holding - * the nodelist mutex for the time of this - * crawl if we wanted.. At least that way we wouldn't have to - * worry about the nodes disappearing, but the hooks would still - * be a problem. + * + * XXXGL: The path may demolish as we go the sequence, but if + * we hold the topology mutex at critical places, then, I hope, + * we would always have valid pointers in hand, although the + * path behind us may no longer exist. */ - for (cp = path; node != NULL && *cp != '\0'; ) { + for (;;) { + hook_p hook; char *segment; /* * Break out the next path segment. Replace the dot we just - * found with a NUL; "cp" points to the next segment (or the + * found with a NUL; "path" points to the next segment (or the * NUL at the end). */ - for (segment = cp; *cp != '\0'; cp++) { - if (*cp == '.') { - *cp++ = '\0'; + for (segment = path; *path != '\0'; path++) { + if (*path == '.') { + *path++ = '\0'; break; } } - /* Empty segment */ - if (*segment == '\0') - continue; - /* We have a segment, so look for a hook by that name */ hook = ng_findhook(node, segment); + mtx_lock(&ng_topo_mtx); /* Can't get there from here... */ if (hook == NULL || NG_HOOK_PEER(hook) == NULL @@ -1732,15 +1725,7 @@ || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) { TRAP_ERROR(); NG_NODE_UNREF(node); -#if 0 - printf("hooknotvalid %s %s %d %d %d %d ", - path, - segment, - hook == NULL, - NG_HOOK_PEER(hook) == NULL, - NG_HOOK_NOT_VALID(hook), - NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))); -#endif + mtx_unlock(&ng_topo_mtx); return (ENOENT); } @@ -1757,21 +1742,25 @@ NG_NODE_UNREF(oldnode); /* XXX another race */ if (NG_NODE_NOT_VALID(node)) { NG_NODE_UNREF(node); /* XXX more races */ - node = NULL; + mtx_unlock(&ng_topo_mtx); + TRAP_ERROR(); + return (ENXIO); } - } - /* If node somehow missing, fail here (probably this is not needed) */ - if (node == NULL) { - TRAP_ERROR(); - return (ENXIO); + if (*path == '\0') { + if (lasthook != NULL) { + if (hook != NULL) { + *lasthook = NG_HOOK_PEER(hook); + NG_HOOK_REF(*lasthook); + } else + *lasthook = NULL; + } + mtx_unlock(&ng_topo_mtx); + *destp = node; + return (0); + } + mtx_unlock(&ng_topo_mtx); } - - /* Done */ - *destp = node; - if (lasthook != NULL) - *lasthook = (hook ? NG_HOOK_PEER(hook) : NULL); - return (0); } /***************************************************************\ @@ -3501,12 +3490,18 @@ * that the peer is still connected (even if invalid,) we know * that the peer node is present, though maybe invalid. */ + mtx_lock(&ng_topo_mtx); if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) || - NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) || - NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) { + ((peer = NG_HOOK_PEER(hook)) == NULL) || + NG_HOOK_NOT_VALID(peer) || + ((peernode = NG_PEER_NODE(hook)) == NULL) || + NG_NODE_NOT_VALID(peernode)) { + if (peer) + KASSERT((peernode != NULL), ("peer node NULL while peer hook exists")); NG_FREE_ITEM(item); TRAP_ERROR(); + mtx_unlock(&ng_topo_mtx); return (ENETDOWN); } @@ -3518,6 +3513,9 @@ NGI_SET_HOOK(item, peer); NGI_SET_NODE(item, peernode); SET_RETADDR(item, here, retaddr); + + mtx_unlock(&ng_topo_mtx); + return (0); } @@ -3539,10 +3537,9 @@ return (error); } NGI_SET_NODE(item, dest); - if ( hook) { - NG_HOOK_REF(hook); /* don't let it go while on the queue */ + if (hook) NGI_SET_HOOK(item, hook); - } + SET_RETADDR(item, here, retaddr); return (0); }