Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 31 Jan 2011 15:54:40 +0300
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Przemyslaw Frasunek <przemyslaw@frasunek.com>
Cc:        freebsd-net@FreeBSD.org
Subject:   Re: Netgraph/mpd5 stability issues
Message-ID:  <20110131125440.GK62007@FreeBSD.org>
In-Reply-To: <4D3011DB.9050900@frasunek.com>
References:  <4D3011DB.9050900@frasunek.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, Jan 14, 2011 at 10:05:31AM +0100, Przemyslaw Frasunek wrote:
P> Hello,
P> 
P> I'm using mpd 5.5 on three PPPoE routers, each servicing about 300 PPPoE
P> concurrent sessions. Routers are based on Intel SR1630GP hardware platforms and
P> runs FreeBSD 7.3-RELEASE.
P> 
P> I'm experiencing stability issues related to Netgraph. None of above routers can
P> survive more than 20-30 days of uptime under typical load. There are different
P> flavors of kernel panics, but all are somehow related to netgraph. Typical
P> backtraces follow:
P> 
P> (kgdb) bt
P> #1  0xc0836ac7 in boot (howto=260) at ../../../kern/kern_shutdown.c:418
P> #2  0xc0836d99 in panic (fmt=Variable "fmt" is not available.
P> ) at ../../../kern/kern_shutdown.c:574
P> #3  0xc0b5ef1c in trap_fatal (frame=0xe7ce6820, eva=152)
P>     at ../../../i386/i386/trap.c:950
P> #4  0xc0b5f1a0 in trap_pfault (frame=0xe7ce6820, usermode=0, eva=152)
P>     at ../../../i386/i386/trap.c:863
P> #5  0xc0b5fb95 in trap (frame=0xe7ce6820) at ../../../i386/i386/trap.c:541
P> #6  0xc0b42e7b in calltrap () at ../../../i386/i386/exception.s:166
P> #7  0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
P>     at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
P> #8  0xc5f488cc in ng_path2noderef (here=0xc62a0b80,
P>     address=0xcc4c2110 "ng366:", destp=0xe7ce6ac8, lasthook=0xe7ce6ac4)
P>     at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:1673
P> #9  0xc5f48cc0 in ng_address_path (here=0xc62a0b80, item=0xc5e42ae0,
P>     address=0xcc4c2110 "ng366:", retaddr=0)
P>     at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3488
P> #10 0xc5f431d3 in ngc_send (so=0xc5b53340, flags=0, m=0xd4c6cb00,
P>     addr=0xccac9780, control=0x0, td=0xc65a2b40)
P>     at /usr/src/sys/modules/netgraph/socket/../../../netgraph/ng_socket.c:288
P> #11 0xc0894bfa in sosend_generic (so=0xc5b53340, addr=0xccac9780,
P>     uio=0xe7ce6be8, top=0xd4c6cb00, control=0x0, flags=0, td=0xc65a2b40)
P>     at ../../../kern/uipc_socket.c:1243
P> #12 0xc0890a3f in sosend (so=0xc5b53340, addr=0xccac9780, uio=0xe7ce6be8,
P>     top=0x0, control=0x0, flags=0, td=0xc65a2b40)
P>     at ../../../kern/uipc_socket.c:1285
P> #13 0xc0897fa6 in kern_sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0,
P>     control=0x0, segflg=UIO_USERSPACE) at ../../../kern/uipc_syscalls.c:805
P> #14 0xc089b181 in sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0)
P>     at ../../../kern/uipc_syscalls.c:742
P> #15 0xc089b298 in sendto (td=0xc65a2b40, uap=0xe7ce6cfc)
P>     at ../../../kern/uipc_syscalls.c:857
P> #16 0xc0b5f4f5 in syscall (frame=0xe7ce6d38) at ../../../i386/i386/trap.c:1101
P> #17 0xc0b42ee0 in Xint0x80_syscall () at ../../../i386/i386/exception.s:262
P> #18 0x00000033 in ?? ()
P> (kgdb) frame 7
P> #7  0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
P>     at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
P> 896             LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
P> (kgdb) list
P> 891             }
P> 892
P> 893             /* Find node by name */
P> 894             NG_NAMEHASH(name, hash);
P> 895             mtx_lock(&ng_namehash_mtx);
P> 896             LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
P> 897                     if (NG_NODE_IS_VALID(node) &&
P> 898                         (strcmp(NG_NODE_NAME(node), name) == 0)) {
P> 899                             break;
P> 900                     }
P> (kgdb) print node
P> $1 = 0x74
P> (kgdb) print ng_name_hash
P> $3 = {{lh_first = 0xcbab6200}, {lh_first = 0x0}, {lh_first = 0xc6538300}, {
P>     lh_first = 0xc67e6400}, {lh_first = 0xc6538700}, {lh_first = 0xca2abc00}, {
P>     lh_first = 0xc66d5000}, {lh_first = 0xca8f9200}, {lh_first = 0xca815580}, {
P>     lh_first = 0xc62a2180}, {lh_first = 0xca2ab180}, {lh_first = 0xc6af7d00}, {
P>     lh_first = 0xcbe09a00}, {lh_first = 0xca81b800}, {lh_first = 0xc5b4e980}, {
P>     lh_first = 0xcbc1f080}, {lh_first = 0xca2a5480}, {lh_first = 0xc672b580}, {
P>     lh_first = 0xcbdb1e80}, {lh_first = 0xcc772c00}, {lh_first = 0xc6a99980}, {
P>     lh_first = 0xc629d600}, {lh_first = 0xc6733000}, {lh_first = 0xca967800}, {
P>     lh_first = 0xc5b3b780}, {lh_first = 0xc629c280}, {lh_first = 0xc6396980}, {
P>     lh_first = 0xc6a5f300}, {lh_first = 0xc5bf2280}, {lh_first = 0xcc5ebe80}, {
P>     lh_first = 0xc5e0a400}, {lh_first = 0xc6608100}, {lh_first = 0xc6520e00}, {
P>     lh_first = 0xc6642680}, {lh_first = 0xca8f7b80}, {lh_first = 0xcbd9ce80}, {
P>     lh_first = 0xca81b380}, {lh_first = 0x0} <repeats 13 times>, {
P>     lh_first = 0xc67b8080}, {lh_first = 0xc6455c80}, {lh_first = 0xc652a380}, {
P>     lh_first = 0xc6a74780}, {lh_first = 0xc62d8400}, {lh_first = 0xcc154400}, {
P>     lh_first = 0xca852b80}, {lh_first = 0xcc351580}, {lh_first = 0xc6396a80}, {
P>     lh_first = 0xc66f9580}, {lh_first = 0xc58c8e00}, {lh_first = 0xcc01a000}, {
P>     lh_first = 0xc6614e80}, {lh_first = 0xc6750800}, {lh_first = 0xcc154e80}, {
P>     lh_first = 0xcc32f080}, {lh_first = 0xcbb10e80}, {lh_first = 0xcc1e3700}, {
P>     lh_first = 0xcc020280}, {lh_first = 0xcc75ad00}, {lh_first = 0xca901b00}, {
P>     lh_first = 0xcc3c8380}, {lh_first = 0xcbd90580}, {lh_first = 0xcbb0c480}, {
P>     lh_first = 0xcbed1300}, {lh_first = 0xc6644480}, {lh_first = 0xcc02ca80}, {
P>     lh_first = 0xcc0d1980}, {lh_first = 0xcc35e200}, {lh_first = 0xcc0dc200}, {
P>     lh_first = 0xca9dc200}, {lh_first = 0xcbecf880}, {lh_first = 0xcc065080}, {
P>     lh_first = 0xcc47b280}, {lh_first = 0xcc722a80}, {lh_first = 0xcc28cd80}, {
P>     lh_first = 0xcbd73400}, {lh_first = 0xcbf76b00}, {lh_first = 0xcbbfc280}, {
P>     lh_first = 0xc629c800}, {lh_first = 0xc6700200}, {lh_first = 0x0}, {
P>     lh_first = 0x0}, {lh_first = 0xc5e0b700}, {lh_first = 0xc672a200}, {
P>     lh_first = 0xc62a2080}, {lh_first = 0x0}, {lh_first = 0xc673fc80}, {
P>     lh_first = 0xc5bf2600}, {lh_first = 0xca969800}, {lh_first = 0xc6aa6700}, {
P>     lh_first = 0xc6750b80}, {lh_first = 0xcc0bc200}, {lh_first = 0xcbeead80}, {
P>     lh_first = 0xcc484e00}, {lh_first = 0xcbae6900}, {lh_first = 0xcbbef800}, {
P>     lh_first = 0xcc797500}, {lh_first = 0xc65f3d80}, {lh_first = 0xcbe95900}, {
P>     lh_first = 0xcba8fb80}, {lh_first = 0xcbdb1580}, {lh_first = 0xcc75b080}, {
P>     lh_first = 0xcbd7fb80}, {lh_first = 0xcc75db80}, {lh_first = 0xc5e59500}, {
P>     lh_first = 0xcbd6fb00}, {lh_first = 0xc6a7ed00}, {lh_first = 0xcbe0bc80}, {
P>     lh_first = 0xcc3c1180}, {lh_first = 0xc7486d00}, {lh_first = 0xcba93880}, {
P>     lh_first = 0xcc0c6000}, {lh_first = 0x0}, {lh_first = 0x0}, {
P>     lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}}

In this dump, can we seek for where did 0x74 came from? Can you look at
ng_name_hash[hash]? Hash is 116, so probably the interested node is at 0xcbd6fb00.
If a counted rows correctly.


-- 
Totus tuus, Glebius.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20110131125440.GK62007>