Date: Sat, 13 Sep 2008 12:29:49 -0700 From: Julian Elischer <julian@elischer.org> To: Robert Watson <rwatson@FreeBSD.org> Cc: freebsd-current@freebsd.org, julian@FreeBSD.org, Giorgos Keramidas <keramida@freebsd.org>, jhb@FreeBSD.org Subject: Re: panic in rt_check_fib() Message-ID: <48CC14AD.4090708@elischer.org> In-Reply-To: <alpine.BSF.1.10.0809131105280.55411@fledge.watson.org> References: <87prnjh80z.fsf@kobe.laptop> <alpine.BSF.1.10.0809131105280.55411@fledge.watson.org>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------020906090909020706020402 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Robert Watson wrote: > > On Fri, 5 Sep 2008, Giorgos Keramidas wrote: > >> A kernel that I built last night to test Ed's "packet mode" for ptys >> included all the changes up to 182743 panics with: > > I had an identical panic on 7-STABLE last night: I have a patch for this that i have had out for review for s while... it's a replacement rt_check_fib function.. > > db> bt > Tracing pid 782 tid 100091 td 0xc4496440 > kdb_enter_why(c0b25ea1,c0b25ea1,c0b24c19,e6772978,0,...) at > kdb_enter_why+0x3a > panic(c0b24c19,c0b32d59,c0b32d7a,633,c436c9b0,...) at panic+0x12c > _mtx_lock_sleep(c436ddf4,c4496440,0,c0b32d7a,633,...) at > _mtx_lock_sleep+0x4a > _mtx_lock_flags(c436ddf4,0,c0b32d7a,633,c436ca14,...) at > _mtx_lock_flags+0xd1 > rt_check_fib(e6772a0c,e6772a28,c424ea90,0,e6772a1c,...) at > rt_check_fib+0x2b4 > in_rt_check(e6772a0c,e6772a28,c424ea90,0,0,...) at in_rt_check+0x26 > arpresolve(c4040000,c436c9b0,c4240800,c424ea90,e6772a42,...) at > arpresolve+0xb9 > ether_output(c4040000,c4240800,c424ea90,c436c9b0,c450b9d8,...) at > ether_output+0x7e > ip_output(c4240800,0,e6772ab0,0,0,...) at ip_output+0xa34 > udp_send(c44f74b0,0,c4240800,c4514ac0,0,...) at udp_send+0x58b > sosend_dgram(c44f74b0,c4514ac0,e6772bd4,c4240800,0,...) at > sosend_dgram+0x352 > sosend(c44f74b0,c4514ac0,e6772bd4,0,0,...) at sosend+0x54 > kern_sendit(c4496440,20,e6772c58,0,0,...) at kern_sendit+0x106 > sendit(0,1,e6772c54,28,c426a090,...) at sendit+0x162 > sendmsg(c4496440,e6772cfc,c,c4496630,c0bd53c0,...) at sendmsg+0x78 > syscall(e6772d38) at syscall+0x2b3 > Xint0x80_syscall() at Xint0x80_syscall+0x20 > > Unfortunately, I was unable to successfully get a crashdump -- not > entirely sure why as it seemed to go to disk ok. > > Robert N M Watson > Computer Laboratory > University of Cambridge > > >> >> ======================================================================== >> >> root@kobe:/var/crash# kgdb /boot/kernel/kernel vmcore.5 >> GNU gdb 6.1.1 [FreeBSD] >> Copyright 2004 Free Software Foundation, Inc. >> GDB is free software, covered by the GNU General Public License, and >> you are >> welcome to change it and/or distribute copies of it under certain >> conditions. >> Type "show copying" to see the conditions. >> There is absolutely no warranty for GDB. Type "show warranty" for >> details. >> This GDB was configured as "i386-marcel-freebsd"... >> >> Unread portion of the kernel message buffer: >> panic: _mtx_lock_sleep: recursed on non-recursive mutex rtentry @ >> /home/build/src/sys/net/route.c:1742 >> >> cpuid = 0 >> Uptime: 5m26s >> Physical memory: 2026 MB >> Dumping 80 MB: 65 49 33 17 1 >> >> Reading symbols from /boot/kernel/snd_hda.ko...Reading symbols from >> /boot/kernel/snd_hda.ko.symbols...done. >> done. >> Loaded symbols for /boot/kernel/snd_hda.ko >> Reading symbols from /boot/kernel/sound.ko...Reading symbols from >> /boot/kernel/sound.ko.symbols...done. >> done. >> Loaded symbols for /boot/kernel/sound.ko >> Reading symbols from /boot/kernel/if_iwn.ko...Reading symbols from >> /boot/kernel/if_iwn.ko.symbols...done. >> done. >> Loaded symbols for /boot/kernel/if_iwn.ko >> Reading symbols from /boot/kernel/acpi.ko...Reading symbols from >> /boot/kernel/acpi.ko.symbols...done. >> done. >> Loaded symbols for /boot/kernel/acpi.ko >> Reading symbols from /boot/kernel/snake_saver.ko...Reading symbols >> from /boot/kernel/snake_saver.ko.symbols...done. >> done. >> Loaded symbols for /boot/kernel/snake_saver.ko >> #0 doadump () at pcpu.h:221 >> 221 pcpu.h: No such file or directory. >> in pcpu.h >> (kgdb) list >> 216 in pcpu.h >> (kgdb) bt >> #0 doadump () at pcpu.h:221 >> #1 0xc05e13ac in boot (howto=260) at >> /home/build/src/sys/kern/kern_shutdown.c:418 >> #2 0xc05e1678 in panic (fmt=Variable "fmt" is not available. >> ) at /home/build/src/sys/kern/kern_shutdown.c:572 >> #3 0xc05d3fda in _mtx_lock_sleep (m=0xc573eba4, tid=3314466816, >> opts=0, file=0xc08f457a "/home/build/src/sys/net/route.c", line=1742) >> at /home/build/src/sys/kern/kern_mutex.c:310 >> #4 0xc05d422f in _mtx_lock_flags (m=0xc573eba4, opts=0, >> file=0xc08f457a "/home/build/src/sys/net/route.c", line=1742) at >> /home/build/src/sys/kern/kern_mutex.c:182 >> #5 0xc0694ad8 in rt_check_fib (lrt=0xe7c299ec, lrt0=0xe7c29a08, >> dst=0xc5550710, fibnum=0) at /home/build/src/sys/net/route.c:1742 >> #6 0xc06caf36 in in_rt_check (lrt=0xe7c299ec, lrt0=0xe7c29a08, >> dst=0xc5550710, fibnum=0) at /home/build/src/sys/netinet/in_rmx.c:472 >> #7 0xc06c0ecd in arpresolve (ifp=0xc51fd800, rt0=0xc573eca8, >> m=0xc59c2200, dst=0xc5550710, desten=0xe7c29a22 "") at >> /home/build/src/sys/netinet/if_ether.c:388 >> #8 0xc0689a9e in ether_output (ifp=0xc51fd800, m=0xc59c2200, >> dst=0xc5550710, rt0=0xc573eca8) at >> /home/build/src/sys/net/if_ethersubr.c:183 >> #9 0xc06d1bf1 in ip_output (m=0xc59c2200, opt=0x0, ro=0xe7c29aa8, >> flags=Variable "flags" is not available. >> ) at /home/build/src/sys/netinet/ip_output.c:563 >> #10 0xc073ecfb in udp_send (so=0xc573b498, flags=0, m=0xc59c2200, >> addr=0xc597e2f0, control=0x0, td=0xc58ec000) at >> /home/build/src/sys/netinet/udp_usrreq.c:1060 >> #11 0xc064530f in sosend_dgram (so=0xc573b498, addr=0xc597e2f0, >> uio=0xe7c29bd4, top=0xc59c2200, control=0x0, flags=Variable "flags" is >> not available. >> ) at /home/build/src/sys/kern/uipc_socket.c:1059 >> #12 0xc0643054 in sosend (so=0xc573b498, addr=0xc597e2f0, >> uio=0xe7c29bd4, top=0x0, control=0x0, flags=0, td=0xc58ec000) at >> /home/build/src/sys/kern/uipc_socket.c:1292 >> #13 0xc064bf15 in kern_sendit (td=0xc58ec000, s=516, mp=0xe7c29c54, >> flags=0, control=0x0, segflg=UIO_USERSPACE) at >> /home/build/src/sys/kern/uipc_syscalls.c:782 >> #14 0xc064c121 in sendit (td=0xc58ec000, s=516, mp=0xe7c29c54, >> flags=0) at /home/build/src/sys/kern/uipc_syscalls.c:719 >> #15 0xc064c1d1 in sendmsg (td=0xc58ec000, uap=0xe7c29cf8) at >> /home/build/src/sys/kern/uipc_syscalls.c:915 >> #16 0xc0884d13 in syscall (frame=0xe7c29d38) at >> /home/build/src/sys/i386/i386/trap.c:1090 >> #17 0xc0869020 in Xint0x80_syscall () at >> /home/build/src/sys/i386/i386/exception.s:261 >> #18 0x00000033 in ?? () >> Previous frame inner to this frame (corrupt stack?) >> (kgdb) >> >> ======================================================================== >> >> From the limited testing I could do today it seems that the following >> changes might be useful to track down why this is happening: >> >> /head@182698 -> ok so far >> /head@182743 -> panic >> >> I don't see any rt_check_fib() changes in this commit range, so it may >> be false that /head@182698 is ok. It just doesn't panic immediately >> when I try to bring up my re0 interface and set the default route. >> >> - Giorgos >> >> --------------020906090909020706020402 Content-Type: text/plain; name="rt_check.c" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="rt_check.c" /* * rt_check() is invoked on each layer 2 output path, prior to * encapsulating outbound packets. * * The function is mostly used to find a routing entry for the gateway, * which in some protocol families could also point to the link-level * address for the gateway itself (the side effect of revalidating the * route to the destination is rather pointless at this stage, we did it * already a moment before in the pr_output() routine to locate the ifp * and gateway to use). * * When we remove the layer-3 to layer-2 mapping tables from the * routing table, this function can be removed. * * === On input === * *dst is the address of the NEXT HOP (which coincides with the * final destination if directly reachable); * *lrt0 points to the cached route to the final destination; * *lrt is not meaningful; * fibnum is the index to the correct network fib for this packet * (*lrt0 has not ref held on it so REMREF is not needed ) * * === Operation === * If the route is marked down try to find a new route. If the route * to the gateway is gone, try to setup a new route. Otherwise, * if the route is marked for packets to be rejected, enforce that. * Note that rtalloc returns an rtentry with an extra REF that we need to lose. * * === On return === * *dst is unchanged; * *lrt0 points to the (possibly new) route to the final destination * *lrt points to the route to the next hop [LOCKED] * * Their values are meaningful ONLY if no error is returned. * * To follow this you have to remember that: * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!) * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1) * and an RT_UNLOCK * RTFREE does an RT_LOCK and an RTFREE_LOCKED * The gwroute pointer counts as a reference on the rtentry to which it points. * so when we add it we use the ref that rtalloc gives us and when we lose it * we need to remove the reference. */ int rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) { return (rt_check_fib(lrt, lrt0, dst, 0)); } int rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst, u_int fibnum) { struct rtentry *rt; struct rtentry *rt0; int error; KASSERT(*lrt0 != NULL, ("rt_check")); rt0 = *lrt0; rt = NULL; /* NB: the locking here is tortuous... */ RT_LOCK(rt0); retry: if (rt0 && (rt0->rt_flags & RTF_UP) == 0) { /* Current rt0 is useless, try get a replacement. */ RT_UNLOCK(rt0); rt0 = NULL; } if (rt0 == NULL) { rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum); if (rt0 == NULL) { return (EHOSTUNREACH); } RT_REMREF(rt0); /* don't need the reference. */ } if (rt0->rt_flags & RTF_GATEWAY) { if ((rt = rt0->rt_gwroute) != NULL) { RT_LOCK(rt); /* NB: gwroute */ if ((rt->rt_flags & RTF_UP) == 0) { /* gw route is dud. ignore/lose it */ RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */ rt = rt0->rt_gwroute = NULL; } } if (rt == NULL) { /* NOT AN ELSE CLAUSE */ RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */ rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum); if ((rt == rt0) || (rt == NULL)) { /* the best we can do is not good enough */ if (rt) { RT_REMREF(rt); /* assumes ref > 0 */ RT_UNLOCK(rt); } RT_FREE(rt0); /* lock, unref, (unlock) */ return (ENETUNREACH); } /* * Relock it and lose the added reference. * All sorts of things could have happenned while we * had no lock on it, so check for them. */ RT_RELOCK(rt0); if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0)) /* Ru-roh.. what we had is no longer any good */ goto retry; /* * While we were away, someone replaced the gateway. * Since a reference count is involved we can't just * overwrite it. */ if (rt0->rt_gwroute) { if (rt0->rt_gwroute != rt) { RT_FREE_LOCKED(rt); goto retry; } } else { rt0->rt_gwroute = rt; } } RT_LOCK_ASSERT(rt); RT_UNLOCK(rt0); } else { /* think of rt as having the lock from now on.. */ rt = rt0; } /* XXX why are we inspecting rmx_expire? */ if ((rt->rt_flags & RTF_REJECT) && (rt->rt_rmx.rmx_expire == 0 || time_uptime < rt->rt_rmx.rmx_expire)) { RT_UNLOCK(rt); return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); } *lrt = rt; *lrt0 = rt0; return (0); } --------------020906090909020706020402--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48CC14AD.4090708>