Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 20 Nov 2010 21:09:35 +0200
From:      Mikolaj Golub <to.my.trociny@gmail.com>
To:        "Bjoern A. Zeeb" <bzeeb-lists@lists.zabbadoz.net>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: flowtable_cleaner/flowtable_flush livelock
Message-ID:  <868w0nolqo.fsf@kopusha.home.net>
In-Reply-To: <20101120165604.T24596@maildrop.int.zabbadoz.net> (Bjoern A. Zeeb's message of "Sat, 20 Nov 2010 17:03:13 %2B0000 (UTC)")
References:  <86pqu0nexd.fsf@kopusha.home.net> <20101120165604.T24596@maildrop.int.zabbadoz.net>

next in thread | previous in thread | raw e-mail | index | archive | help
--=-=-=


On Sat, 20 Nov 2010 17:03:13 +0000 (UTC) Bjoern A. Zeeb wrote:

 BAZ> I think net@ would have been a better initial place but since this
 BAZ> seems to be a problem when interacting with VIMAGE
 BAZ> freebsd-virtualization might be better.

 BAZ> What you could try is:
 BAZ> http://people.freebsd.org/~bz/20100216-10-ft-cv.diff

Ah, I have recalled I had already saw this patch but did not understand what
the problem was that it fixed, thus did not associated it with my case
(actually, I thought you had committed all these patches to the tree long time
ago and I was running the kernel with them already :-).

BTW, the patch needs updating: in the current flow_full() wakes up flowcleaner
too, and flowcleaner sleeps for flowclean_freq instead of 10*hz (see the
attached patch).

With the patch I can't reproduce the lock. Only the crash I mentioned in my
first letter is observed:

(kgdb) bt
#0  doadump () at pcpu.h:231
#1  0xc04f2789 in db_fncall (dummy1=1, dummy2=0, dummy3=-1056677760, dummy4=0xc8731860 "")
    at /usr/src/sys/ddb/db_command.c:548
#2  0xc04f2b81 in db_command (last_cmdp=0xc0e79f7c, cmd_table=0x0, dopager=1)
    at /usr/src/sys/ddb/db_command.c:445
#3  0xc04f2cda in db_command_loop () at /usr/src/sys/ddb/db_command.c:498
#4  0xc04f4bfd in db_trap (type=12, code=0) at /usr/src/sys/ddb/db_main.c:229
#5  0xc09119be in kdb_trap (type=12, code=0, tf=0xc8731a94) at /usr/src/sys/kern/subr_kdb.c:546
#6  0xc0c3da8f in trap_fatal (frame=0xc8731a94, eva=3735929074)
    at /usr/src/sys/i386/i386/trap.c:970
#7  0xc0c3e0be in trap (frame=0xc8731a94) at /usr/src/sys/i386/i386/trap.c:361
#8  0xc0c272dc in calltrap () at /usr/src/sys/i386/i386/exception.s:168
#9  0xc0988415 in strncmp (s1=0xc1fee4e0 "epair20b", 
    s2=0xdeadc0f2 <Address 0xdeadc0f2 out of bounds>, n=16) at /usr/src/sys/libkern/strncmp.c:44
#10 0xc09929d7 in ifunit_ref (name=0xc1fee4e0 "epair20b") at /usr/src/sys/net/if.c:1986
#11 0xc0996982 in ifioctl (so=0xc25649c0, cmd=3223349536, data=0xc1fee4e0 "epair20b", 
    td=0xc286c000) at /usr/src/sys/net/if.c:2475
#12 0xc09307f7 in soo_ioctl (fp=0xc1ff5af0, cmd=3223349536, data=0xc1fee4e0, 
    active_cred=0xc1d83e80, td=0xc286c000) at /usr/src/sys/kern/sys_socket.c:212
#13 0xc092a61d in kern_ioctl (td=0xc286c000, fd=3, com=3223349536, data=0xc1fee4e0 "epair20b")
    at file.h:254
#14 0xc092a7a4 in ioctl (td=0xc286c000, uap=0xc8731cec) at /usr/src/sys/kern/sys_generic.c:679
#15 0xc091f303 in syscallenter (td=0xc286c000, sa=0xc8731ce4)
    at /usr/src/sys/kern/subr_trap.c:318
#16 0xc0c3dd2f in syscall (frame=0xc8731d28) at /usr/src/sys/i386/i386/trap.c:1094
#17 0xc0c27371 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:266
#18 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 10
#10 0xc09929d7 in ifunit_ref (name=0xc1fee4e0 "epair20b") at /usr/src/sys/net/if.c:1986
1986                    if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
(kgdb) p ifp
$1 = (struct ifnet *) 0xdeadc0de

I might want to report it to freebsd-virtualization unless I find that this is
a known issue.

-- 
Mikolaj Golub


--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=20101120-10-ft-cv.diff

Index: sys/net/flowtable.c
===================================================================
--- sys/net/flowtable.c	(revision 215574)
+++ sys/net/flowtable.c	(working copy)
@@ -195,7 +195,8 @@ STATIC_VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
 #define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
 
 
-static struct cv 	flowclean_cv;
+static struct cv 	flowclean_f_cv;
+static struct cv 	flowclean_c_cv;
 static struct mtx	flowclean_lock;
 static uint32_t		flowclean_cycles;
 static uint32_t		flowclean_freq;
@@ -951,7 +952,7 @@ flow_full(struct flowtable *ft)
 		if ((ft->ft_flags & FL_HASH_ALL) == 0)
 			ft->ft_udp_idle = ft->ft_fin_wait_idle =
 			    ft->ft_syn_idle = ft->ft_tcp_idle = 5;
-		cv_broadcast(&flowclean_cv);
+		cv_broadcast(&flowclean_c_cv);
 	} else if (!full && ft->ft_full) {
 		flowclean_freq = 20*hz;
 		if ((ft->ft_flags & FL_HASH_ALL) == 0)
@@ -1560,14 +1561,14 @@ flowtable_cleaner(void)
 		}
 		VNET_LIST_RUNLOCK();
 
-		flowclean_cycles++;
 		/*
 		 * The 10 second interval between cleaning checks
 		 * is arbitrary
 		 */
 		mtx_lock(&flowclean_lock);
-		cv_broadcast(&flowclean_cv);
-		cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq);
+		flowclean_cycles++;
+		cv_broadcast(&flowclean_f_cv);
+		cv_timedwait(&flowclean_c_cv, &flowclean_lock, 10*hz);
 		mtx_unlock(&flowclean_lock);
 	}
 }
@@ -1580,8 +1581,8 @@ flowtable_flush(void *unused __unused)
 	mtx_lock(&flowclean_lock);
 	start = flowclean_cycles;
 	while (start == flowclean_cycles) {
-		cv_broadcast(&flowclean_cv);
-		cv_wait(&flowclean_cv, &flowclean_lock);
+		cv_broadcast(&flowclean_c_cv);
+		cv_wait(&flowclean_f_cv, &flowclean_lock);
 	}
 	mtx_unlock(&flowclean_lock);
 }
@@ -1613,7 +1614,8 @@ static void
 flowtable_init(const void *unused __unused)
 {
 
-	cv_init(&flowclean_cv, "flowcleanwait");
+	cv_init(&flowclean_c_cv, "c_flowcleanwait");
+	cv_init(&flowclean_f_cv, "f_flowcleanwait");
 	mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 	EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 	    EVENTHANDLER_PRI_ANY);

--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?868w0nolqo.fsf>