Date: Thu, 13 Feb 2014 09:48:12 +0400 From: Gleb Smirnoff <glebius@FreeBSD.org> To: Adrian Chadd <adrian@freebsd.org> Cc: FreeBSD Net <freebsd-net@freebsd.org> Subject: Re: flowtable, collisions, locking and CPU affinity Message-ID: <20140213054812.GK26785@FreeBSD.org> In-Reply-To: <CAJ-VmonNCzFED=20_C2fV1g1jvFNRE=N-H%2B09Wb2OdxdzHp9JQ@mail.gmail.com> References: <CAJ-VmonNCzFED=20_C2fV1g1jvFNRE=N-H%2B09Wb2OdxdzHp9JQ@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
On Fri, Feb 07, 2014 at 04:12:56PM -0800, Adrian Chadd wrote:
A> I've been knee deep in the flowtable code looking at some of the less
A> .. predictable ways it behaves.
A>
A> One of them is the collisions that do pop up from time to time.
A>
A> I dug into it in quite some depth and found out what's going on. This
A> assumes it's a per-CPU flowtable.
A>
A> * A flowtable lookup is performed, on say CPU #0
A> * the flowtable lookup fails, so it goes to do a flowtable insert
A> * .. but since in between the two, the flowtable "lock" is released so
A> it can do a route/adjacency lookup, and that grabs a lock
A> * .. then the flowtable insert is done on a totally different CPU
A> * .. which happens to _have_ the flowtable entry already, so it fails
A> as a collision which already has a matching entry.
A>
A> Now, the reason for this is primarily because there's no CPU pinning
A> in the lookup path and if there's contention during the route lookup
A> phase, the scheduler may decide to schedule the kernel thread on a
A> totally different CPU to the one that was running the code when the
A> lock was entered.
A>
A> Now, Gleb's recent changes seem to have made the instances of this
A> drop, but he didn't set out to fix it. So there's something about his
A> changes that has changed the locking/contention profile that I was
A> using to easily reproduce it.
A>
A> In any case - the reason it's happening above is because there's no
A> actual lock held over the whole lookup/insert path. It's a per-CPU
A> critical enter/exit path, so the only way to guarantee consistency is
A> to use sched_pin() for the entirety of the function.
A>
A> I'll go and test that out in a moment and see if it quietens the
A> collisions that I see in lab testing.
A>
A> Has anyone already debugged/diagnosed this? Can anyone think of an
A> alternate (better) way to fix this?
Can't we just reuse the colliding entry?
Can you evaluate patch attached (against head) in your testing
conditions?
--
Totus tuus, Glebius.
[-- Attachment #2 --]
Index: sys/net/flowtable.c
===================================================================
--- sys/net/flowtable.c (revision 261825)
+++ sys/net/flowtable.c (working copy)
@@ -716,21 +716,39 @@ flow_full(struct flowtable *ft)
}
static int
+flow_matches(struct flentry *fle, uint32_t hash, uint32_t *key, uint8_t
+ proto, uint32_t fibnum)
+{
+
+ if (fle->f_fhash == hash &&
+ bcmp(&fle->f_flow, key, KEYLEN(fle->f_flags)) == 0 &&
+ proto == fle->f_proto && fibnum == fle->f_fibnum &&
+ (fle->f_rt->rt_flags & RTF_UP) &&
+ fle->f_rt->rt_ifp != NULL &&
+ (fle->f_lle->la_flags & LLE_VALID))
+ return (1);
+
+ return (0);
+}
+
+static struct flentry *
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
uint32_t fibnum, struct route *ro, uint16_t flags)
{
struct flist *flist;
struct flentry *fle, *iter;
+ bitstr_t *mask;
int depth;
- bitstr_t *mask;
+ uint8_t proto;
fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO);
if (fle == NULL)
- return (ENOMEM);
+ return (NULL);
+ proto = flags_to_proto(flags);
bcopy(key, &fle->f_flow, KEYLEN(flags));
fle->f_flags |= (flags & FL_IPV6);
- fle->f_proto = flags_to_proto(flags);
+ fle->f_proto = proto;
fle->f_rt = ro->ro_rt;
fle->f_lle = ro->ro_lle;
fle->f_fhash = hash;
@@ -748,21 +766,24 @@ flowtable_insert(struct flowtable *ft, uint32_t ha
}
depth = 0;
- FLOWSTAT_INC(ft, ft_collisions);
/*
* find end of list and make sure that we were not
* preempted by another thread handling this flow
*/
SLIST_FOREACH(iter, flist, f_next) {
- if (iter->f_fhash == hash && !flow_stale(ft, iter)) {
+ if (flow_matches(iter, hash, key, proto, fibnum)) {
/*
- * there was either a hash collision
- * or we lost a race to insert
+ * We probably migrated to an other CPU after
+ * lookup in flowtable_lookup_common() failed.
+ * It appeared that this CPU already has flow
+ * entry.
*/
+ iter->f_uptime = time_uptime;
+ iter->f_flags |= flags;
critical_exit();
+ FLOWSTAT_INC(ft, ft_collisions);
uma_zfree(flow_zone, fle);
-
- return (EEXIST);
+ return (iter);
}
depth++;
}
@@ -773,8 +794,9 @@ flowtable_insert(struct flowtable *ft, uint32_t ha
SLIST_INSERT_HEAD(flist, fle, f_next);
skip:
critical_exit();
+ FLOWSTAT_INC(ft, ft_inserts);
- return (0);
+ return (fle);
}
struct flentry *
@@ -885,12 +907,7 @@ flowtable_lookup_common(struct flowtable *ft, stru
critical_enter();
flist = flowtable_list(ft, hash);
SLIST_FOREACH(fle, flist, f_next)
- if (fle->f_fhash == hash && bcmp(&fle->f_flow, key,
- KEYLEN(fle->f_flags)) == 0 &&
- proto == fle->f_proto && fibnum == fle->f_fibnum &&
- (fle->f_rt->rt_flags & RTF_UP) &&
- fle->f_rt->rt_ifp != NULL &&
- (fle->f_lle->la_flags & LLE_VALID)) {
+ if (flow_matches(fle, hash, key, proto, fibnum)) {
fle->f_uptime = time_uptime;
fle->f_flags |= flags;
critical_exit();
@@ -968,7 +985,8 @@ flowtable_lookup_common(struct flowtable *ft, stru
}
ro->ro_lle = lle;
- if (flowtable_insert(ft, hash, key, fibnum, ro, flags) != 0) {
+ fle = flowtable_insert(ft, hash, key, fibnum, ro, flags);
+ if (fle == NULL) {
RTFREE(rt);
LLE_FREE(lle);
return (NULL);
@@ -975,10 +993,11 @@ flowtable_lookup_common(struct flowtable *ft, stru
}
success:
- if (fle != NULL && (m->m_flags & M_FLOWID) == 0) {
+ if (m->m_flags & M_FLOWID) {
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = fle->f_fhash;
}
+
return (fle);
}
Index: sys/net/flowtable.h
===================================================================
--- sys/net/flowtable.h (revision 261825)
+++ sys/net/flowtable.h (working copy)
@@ -39,6 +39,7 @@ struct flowtable_stat {
uint64_t ft_frees;
uint64_t ft_hits;
uint64_t ft_lookups;
+ uint64_t ft_inserts;
};
#ifdef _KERNEL
Index: usr.bin/netstat/flowtable.c
===================================================================
--- usr.bin/netstat/flowtable.c (revision 261825)
+++ usr.bin/netstat/flowtable.c (working copy)
@@ -52,6 +52,7 @@ print_stats(struct flowtable_stat *stat)
p(ft_lookups, "\t%ju lookup%s\n");
p(ft_hits, "\t%ju hit%s\n");
p2(ft_misses, "\t%ju miss%s\n");
+ p(ft_inserts, "\t%ju insert%s\n");
p(ft_collisions, "\t%ju collision%s\n");
p(ft_free_checks, "\t%ju free check%s\n");
p(ft_frees, "\t%ju free%s\n");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140213054812.GK26785>
