Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 Dec 2016 21:17:41 +0200
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Kurt Lidl <lidl@pix.net>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: How to use sem_timedwait?
Message-ID:  <20161215191741.GX94325@kib.kiev.ua>
In-Reply-To: <7a01e4c8-7d58-ad73-4107-4e14b9c89a9d@pix.net>
References:  <20161214074228.zh6q5zya2gszw4g6@hal9000.meka.no-ip.org> <1481748960.1889.398.camel@freebsd.org> <20161215002906.mllorgvvuovbdtze@hal9000.meka.no-ip.org> <1481775511.1889.450.camel@freebsd.org> <1481776532.1889.461.camel@freebsd.org> <44ad49f5-75ec-7c1d-28dc-25df08c67148@pix.net> <20161215171236.GU94325@kib.kiev.ua> <7a01e4c8-7d58-ad73-4107-4e14b9c89a9d@pix.net>

next in thread | previous in thread | raw e-mail | index | archive | help
On Thu, Dec 15, 2016 at 01:53:49PM -0500, Kurt Lidl wrote:
> On 12/15/16 12:12 PM, Konstantin Belousov wrote:
> > On Thu, Dec 15, 2016 at 11:47:33AM -0500, Kurt Lidl wrote:
> >> When I run this code on a sparc64, it panics the machine!
> >>
> >> lidl@ton-148: ./x
> >> panic: trap: fast data access mmu miss (kernel)
> >> KDB: stack backtrace:
> >> vpanic() at vpanic+0xfc
> >> panic() at panic+0x20
> >> trap() at trap+0x554
> >> -- fast data access mmu miss tar=0xc09eda8f %o7=0xc039b344 --
> >> userland() at filt_timerattach+0x78
> >> user trace: trap %o7=0xc039b344
> >> pc 0xc039b378, sp 0xedebe761
> >> done
> >> KDB: enter: panic
> >> [ thread pid 1263 tid 100583 ]
> >> Stopped at      kdb_enter+0x80: ta              %xcc, 1
> >> db>
> >>
> >> Oops.
> > Could you look up the source line for filt_timerattach+0x78 in your kernel ?
> 
> I was able to do "call doadump" from ddb, and savecore worked.
> 
> root@ton-136: kgdb /usr/obj/usr/src/sys/V120/kernel.debug 
> /var/crash/vmcore.1
> 
> GNU gdb 6.1.1 [FreeBSD]
> Copyright 2004 Free Software Foundation, Inc.
> GDB is free software, covered by the GNU General Public License, and you are
> welcome to change it and/or distribute copies of it under certain 
> conditions.
> Type "show copying" to see the conditions.
> There is absolutely no warranty for GDB.  Type "show warranty" for details.
> This GDB was configured as "sparc64-marcel-freebsd"...
> 
> Unread portion of the kernel message buffer:
> panic: trap: fast data access mmu miss (kernel)
> KDB: stack backtrace:
> vpanic() at vpanic+0xfc
> panic() at panic+0x20
> trap() at trap+0x554
> -- fast data access mmu miss tar=0xc09eda8f %o7=0xc039b344 --
> userland() at filt_timerattach+0x78
> user trace: trap %o7=0xc039b344
> pc 0xc039b378, sp 0xedebe761
> done
> KDB: enter: panic
> 
> No symbol "stopped_cpus" in current context.
> Reading symbols from /boot/kernel/zfs.ko...Reading symbols from 
> /usr/lib/debug//boot/kernel/zfs.ko.debug...done.
> done.
> Loaded symbols for /boot/kernel/zfs.ko
> Reading symbols from /boot/kernel/opensolaris.ko...Reading symbols from 
> /usr/lib/debug//boot/kernel/opensolaris.ko.debug...done.
> done.
> Loaded symbols for /boot/kernel/opensolaris.ko
> Reading symbols from /boot/kernel/geom_mirror.ko...Reading symbols from 
> /usr/lib/debug//boot/kernel/geom_mirror.ko.debug...done.
> done.
> Loaded symbols for /boot/kernel/geom_mirror.ko
> Reading symbols from /boot/kernel/tmpfs.ko...Reading symbols from 
> /usr/lib/debug//boot/kernel/tmpfs.ko.debug...done.
> done.
> Loaded symbols for /boot/kernel/tmpfs.ko
> Reading symbols from /boot/kernel/fdescfs.ko...Reading symbols from 
> /usr/lib/debug//boot/kernel/fdescfs.ko.debug...done.
> done.
> Loaded symbols for /boot/kernel/fdescfs.ko
> #0  0x00000000c03f0040 in doadump (textdump=0) at 
> /usr/src/sys/kern/kern_shutdown.c:297
> 297		savectx(&dumppcb);
> (kgdb) bt
> #0  0x00000000c03f0040 in doadump (textdump=0) at 
> /usr/src/sys/kern/kern_shutdown.c:297
> #1  0x00000000c0118f80 in db_fncall (dummy1=1, dummy2=false, dummy3=12289,
>      dummy4=0xedebe410 "??????0\001") at /usr/src/sys/ddb/db_command.c:581
> #2  0x00000000c0119484 in db_command (last_cmdp=0xc09ce890, 
> cmd_table=0x0, dopager=1)
>      at /usr/src/sys/ddb/db_command.c:453
> #3  0x00000000c0119794 in db_command_loop () at 
> /usr/src/sys/ddb/db_command.c:506
> #4  0x00000000c011dfe0 in db_trap (type=<value optimized out>, code=0)
>      at /usr/src/sys/ddb/db_main.c:248
> #5  0x00000000c043a7c4 in kdb_trap (type=107, code=0, tf=0xedebe920)
>      at /usr/src/sys/kern/subr_kdb.c:654
> #6  0x00000000c07a3a28 in trap (tf=0xedebe920) at 
> /usr/src/sys/sparc64/sparc64/trap.c:344
> #7  0x00000000c0099060 in tl1_trap ()
> #8  0x00000000c043a4e0 in kdb_enter (why=0x12 <Address 0x12 out of bounds>,
>      msg=0xc091dfa8 "panic") at /usr/src/sys/kern/subr_kdb.c:442
> #9  0x00000000c043a4c8 in kdb_enter (why=0xc091dfa8 "panic", 
> msg=0xc091dfa8 "panic")
>      at /usr/src/sys/kern/subr_kdb.c:441
> #10 0x00000000c03f0888 in vpanic (fmt=0xc094cde0 "trap: %s (kernel)", 
> ap=0xedebecf8)
>      at /usr/src/sys/kern/kern_shutdown.c:752
> #11 0x00000000c03f0908 in panic (fmt=0xc094cde0 "trap: %s (kernel)")
>      at /usr/src/sys/kern/kern_shutdown.c:690
> #12 0x00000000c07a3c9c in trap (tf=0xedebee60) at 
> /usr/src/sys/sparc64/sparc64/trap.c:410
> #13 0x00000000c0099060 in tl1_trap ()
> #14 0x00000000c039b378 in filt_timerattach (kn=0x418937) at 
> /usr/src/sys/kern/kern_event.c:675
> ---Type <return> to continue, or q <return> to quit---
> #15 0x00000000c039b34c in filt_timerattach (kn=0xfffff80016629e80)
>      at /usr/src/sys/kern/kern_event.c:671
> #16 0x00000000c039df24 in kqueue_register (kq=0xfffff800010da300, 
> kev=0xedebf230,
>      td=0xfffff800163fa000, waitok=0) at /usr/src/sys/kern/kern_event.c:1316
> #17 0x00000000c039e4ec in kqueue_kevent (kq=0xfffff800010da300, 
> td=0xfffff800163fa000,
>      nchanges=1, nevents=1, k_ops=0xedebf5e8, timeout=0x0) at 
> /usr/src/sys/kern/kern_event.c:1001
> #18 0x00000000c039f15c in kern_kevent_fp (td=0xfffff800163fa000, 
> fp=0xfffff80001e807d0,
>      nchanges=1, nevents=1, k_ops=0xedebf5e8, timeout=0x0) at 
> /usr/src/sys/kern/kern_event.c:1032
> #19 0x00000000c039f21c in kern_kevent (td=0xfffff800163fa000, fd=3, 
> nchanges=1, nevents=1,
>      k_ops=0xedebf5e8, timeout=0x0) at /usr/src/sys/kern/kern_event.c:975
> #20 0x00000000c039f3e0 in sys_kevent (td=0xfffff800163fa000, uap=0xedebf768)
>      at /usr/src/sys/kern/kern_event.c:907
> #21 0x00000000c07a2a8c in syscall (tf=0xedebf880) at subr_syscall.c:135
> #22 0x00000000c0098e40 in tl0_intr ()
> #23 0x0000000000000000 in ?? ()
> (kgdb) up 14
> #14 0x00000000c039b378 in filt_timerattach (kn=0x418937) at 
> /usr/src/sys/kern/kern_event.c:675
> 675		ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed);
> (kgdb) list *0x00000000c039b378
> 0xc039b378 is in filt_timerattach (/usr/src/sys/kern/kern_event.c:675).
> 670	
> 671		to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
> 672		if (to < 0)
> 673			return (EINVAL);
> 674	
> 675		ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed);
> 676		do {
> 677			if (ncallouts >= kq_calloutmax)
> 678				return (ENOMEM);
> 679		} while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts,
Note that kgdb has some troubles interpreting the stack, since it reported
two frames for filt_timerattach(), frames 14 and 15.  I would believe into
the ddb trace more.

As such, could you compare filt_timerattach+0x78 address with the pc for
frames 14, 15 to see which one more trustwory.

That said, I will be not too surprised if the issue is indeed in the
emulation of C11 atomics on old gcc.  The reasons to use stdatomic there
was silly, so if this breaks sparc64 I would just switch to atomic.h.

Could you try the patch below and see if the problem goes away ?

diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 36fe20fbc91..b734cf271bc 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -48,7 +48,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/fcntl.h>
 #include <sys/kthread.h>
 #include <sys/selinfo.h>
-#include <sys/stdatomic.h>
 #include <sys/queue.h>
 #include <sys/event.h>
 #include <sys/eventvar.h>
@@ -69,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
+#include <machine/atomic.h>
 
 #include <vm/uma.h>
 
@@ -188,7 +188,7 @@ static struct filterops user_filtops = {
 };
 
 static uma_zone_t	knote_zone;
-static atomic_uint	kq_ncallouts = ATOMIC_VAR_INIT(0);
+static unsigned int	kq_ncallouts = 0;
 static unsigned int 	kq_calloutmax = 4 * 1024;
 SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
     &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
@@ -672,13 +672,11 @@ filt_timerattach(struct knote *kn)
 	if (to < 0)
 		return (EINVAL);
 
-	ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed);
 	do {
+		ncallouts = kq_ncallouts;
 		if (ncallouts >= kq_calloutmax)
 			return (ENOMEM);
-	} while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts,
-	    &ncallouts, ncallouts + 1, memory_order_relaxed,
-	    memory_order_relaxed));
+	} while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1));
 
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 	kn->kn_status &= ~KN_DETACHED;		/* knlist_add clears it */
@@ -703,7 +701,7 @@ filt_timerdetach(struct knote *kn)
 	callout_drain(calloutp);
 	free(calloutp, M_KQUEUE);
 	free(kn->kn_ptr.p_nexttime, M_KQUEUE);
-	old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed);
+	old = atomic_fetchadd_int(&kq_ncallouts, -1);
 	KASSERT(old > 0, ("Number of callouts cannot become negative"));
 	kn->kn_status |= KN_DETACHED;	/* knlist_remove sets it */
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20161215191741.GX94325>