Date: Thu, 10 Dec 2015 16:13:07 +0100 From: Hans Petter Selasky <hps@selasky.org> To: "Alexander V. Chernikov" <melifaro@freebsd.org>, Adrian Chadd <adrian@freebsd.org> Cc: FreeBSD Net <freebsd-net@freebsd.org>, freebsd-current <freebsd-current@freebsd.org>, Randall Stewart <rrs@netflix.com> Subject: Re: panic in arptimer in r289937 Message-ID: <56699683.5030203@selasky.org> In-Reply-To: <5661EF10.8060300@selasky.org> References: <CAJ-VmonvVyTNuYv_as41yPCFdfR5T3FE45DP9MKAc-eyzXzPUg@mail.gmail.com> <2739461446298483@web2h.yandex.ru> <CAJ-Vmo=JjHonDqOYK%2BJDaf9581dRU5_KoaSTnY27JnzQm0v56w@mail.gmail.com> <1733241446303675@web19h.yandex.ru> <5661EAAF.1010906@selasky.org> <5661EF10.8060300@selasky.org>
next in thread | previous in thread | raw e-mail | index | archive | help
Hi, Here is the backtrace for a reproducable panic seen with arptimer(): > #0 doadump (textdump=0) at pcpu.h:221 > #1 0xffffffff80385afb in db_dump (dummy=<value optimized out>, dummy2=false, dummy3=0, dummy4=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:533 > #2 0xffffffff803858ee in db_command (cmd_table=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:440 > #3 0xffffffff80385684 in db_command_loop () at /usr/img/freebsd/sys/ddb/db_command.c:493 > #4 0xffffffff8038818b in db_trap (type=<value optimized out>, code=0) at /usr/img/freebsd/sys/ddb/db_main.c:251 > #5 0xffffffff80ae0973 in kdb_trap (type=12, code=0, tf=<value optimized out>) at /usr/img/freebsd/sys/kern/subr_kdb.c:654 > #6 0xffffffff80f276f1 in trap_fatal (frame=0xfffffe00f59f4950, eva=<value optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:829 > #7 0xffffffff80f27924 in trap_pfault (frame=0xfffffe00f59f4950, usermode=<value optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:684 > #8 0xffffffff80f270de in trap (frame=0xfffffe00f59f4950) at /usr/img/freebsd/sys/amd64/amd64/trap.c:435 > #9 0xffffffff80f0a347 in calltrap () at /usr/img/freebsd/sys/amd64/amd64/exception.S:234 > #10 0xffffffff80be9e3d in arptimer (arg=0xfffff8011d0fda00) at atomic.h:184 > #11 0xffffffff80ab54f1 in softclock_call_cc (c=0xfffff8011d0fdaa8, cc=0xffffffff81ccd600, direct=<value optimized out>) > at /usr/img/freebsd/sys/kern/kern_timeout.c:832 > #12 0xffffffff80ab5814 in softclock (arg=0xffffffff81ccd600) at /usr/img/freebsd/sys/kern/kern_timeout.c:921 > #13 0xffffffff80a5d7f6 in intr_event_execute_handlers (p=<value optimized out>, ie=0xfffff80003998b00) at /usr/img/freebsd/sys/kern/kern_intr.c:1262 > #14 0xffffffff80a5de06 in ithread_loop (arg=0xfffff8000396cde0) at /usr/img/freebsd/sys/kern/kern_intr.c:1275 > #15 0xffffffff80a5a87c in fork_exit (callout=0xffffffff80a5dd60 <ithread_loop>, arg=0xfffff8000396cde0, frame=0xfffffe00f59f4c00) > at /usr/img/freebsd/sys/kern/kern_fork.c:1011 > #16 0xffffffff80f0a87e in fork_trampoline () at /usr/img/freebsd/sys/amd64/amd64/exception.S:609 > #17 0x0000000000000000 in ?? () (kgdb) print ((struct llentry *)arg)[0] $5 = { lle_next = { le_next = 0x0, le_prev = 0xfffff80069b5fa98 }, r_l3addr = { addr4 = { s_addr = 1563742475 }, addr6 = { __u6_addr = { __u6_addr8 = 0xfffff8011d0fda10 "\v�4]", __u6_addr16 = 0xfffff8011d0fda10, __u6_addr32 = 0xfffff8011d0fda10 } } }, ll_addr = { mac_aligned = 121984137371108, mac16 = 0xfffff8011d0fda20, mac8 = 0xfffff8011d0fda20 "�\035-��n" }, r_flags = 1, r_skip_req = 1, spare1 = 0, lle_tbl = 0xfffff80005653300, lle_head = 0xfffff80069b5fa98, lle_free = 0xffffffff80bf2270 <in_lltable_destroy_lle>, la_hold = 0x0, la_numheld = 0, la_expire = 12422, la_flags = 1, la_asked = 0, la_preempt = 5, ln_state = 2, ln_router = 0, ln_ntick = 0, lle_refcnt = 1, lle_chain = { le_next = 0x0, le_prev = 0x0 }, lle_timer = { c_links = { le = { le_next = 0x0, le_prev = 0xffffffff81ccd718 }, sle = { sle_next = 0x0 }, tqe = { tqe_next = 0x0, tqe_prev = 0xffffffff81ccd718 } }, c_time = 53354272998546, c_precision = 268435437, c_arg = 0xfffff8011d0fda00, c_func = 0xffffffff80be9950 <arptimer>, c_lock = 0x0, c_flags = 16, c_cpu = 0 }, lle_lock = { lock_object = { lo_name = 0xffffffff8144abf0 "lle", lo_flags = 90374144, lo_data = 0, lo_witness = 0x0 }, rw_lock = 1 }, req_mtx = { lock_object = { lo_name = 0xffffffff8144abf4 "lle req", lo_flags = 16973824, lo_data = 0, lo_witness = 0x0 }, mtx_lock = 4 } } (kgdb) print /x ((struct llentry *)arg)->lle_tbl[0] $6 = { llt_link = { sle_next = 0xfffff8004be51900 }, llt_af = 0x1d243aa0, llt_hsize = 0xfffff801, lle_head = 0xfffff800053e5330, llt_ifp = 0xd04, llt_lookup = 0x0, llt_alloc_entry = 0xfffffe0001a472f0, llt_delete_entry = 0xfffff800504a5100, llt_prefix_free = 0xfffff800503d8c88, llt_dump_entry = 0x0, llt_hash = 0x0, llt_match_prefix = 0x0, llt_free_entry = 0x0, llt_foreach_entry = 0x0, llt_link_entry = 0x0, llt_unlink_entry = 0x38e425e, llt_fill_sa_entry = 0x0, llt_free_tbl = 0xfffff8011d243ab0 } It appears arptimer() was called after lltable_unlink_entry() was called, because la_flags does not have the LLE_LINKED bit set, which can happen!! If arptimer() is firing exactly when we call lltable_unlink_entry(), then arptimer() will refer to freed memory. Does the following patch make sense? > Index: netinet/if_ether.c > =================================================================== > --- netinet/if_ether.c (revision 291256) > +++ netinet/if_ether.c (working copy) > @@ -185,7 +185,13 @@ > LLE_WUNLOCK(lle); > return; > } > - ifp = lle->lle_tbl->llt_ifp; > + if (lle->la_flags & LLE_LINKED) { > + ifp = lle->lle_tbl->llt_ifp; > + } else { > + /* XXX RACE entry has been freed */ > + llentry_free(lle); > + return; > + } > CURVNET_SET(ifp->if_vnet); > > if ((lle->la_flags & LLE_DELETED) == 0) { If you need more information from the dump, let me know. --HPS
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?56699683.5030203>