Date: Sat, 28 Mar 2015 22:16:29 +0300 From: Gleb Smirnoff <glebius@FreeBSD.org> To: Fabien Thomas <fabient@FreeBSD.org> Cc: svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org Subject: Re: svn commit: r280759 - head/sys/netinet Message-ID: <20150328191629.GY64665@FreeBSD.org> In-Reply-To: <20150328083443.GV64665@FreeBSD.org> References: <201503271326.t2RDQxd3056112@svn.freebsd.org> <20150328083443.GV64665@FreeBSD.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--TBNym+cBXeFsS4Vs Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Sat, Mar 28, 2015 at 11:34:43AM +0300, Gleb Smirnoff wrote: T> On Fri, Mar 27, 2015 at 01:26:59PM +0000, Fabien Thomas wrote: T> F> Author: fabient T> F> Date: Fri Mar 27 13:26:59 2015 T> F> New Revision: 280759 T> F> URL: https://svnweb.freebsd.org/changeset/base/280759 T> F> T> F> Log: T> F> On multi CPU systems, we may emit successive packets with the same id. T> F> Fix the race by using an atomic operation. T> F> T> F> Differential Revision: https://reviews.freebsd.org/D2141 T> F> Obtained from: emeric.poupon@stormshield.eu T> F> MFC after: 1 week T> F> Sponsored by: Stormshield T> T> The D2141 says that benchmarking were done in presence of IPSEC, which T> of course is the bottleneck and performance of this instruction can't T> be benchmarked in its presence. Anyway, I believe that results of T> right benchmark would still show little difference between atomic and T> non-atomic increment of a shared value. T> T> I think we can use per-cpu ID counters, each CPU incrementing its T> own. If we start with random values, then probability of two packets with T> the same ID emitting at the allowed timeframe will be acceptably small. Here I've made a patch. It will apply only to fresh head, though. To apply it to stable/10 you probably need to replay these two commits before: r280787, r280788. I'd appreciate if you test it in your environments, where you observed IP id collisions. -- Totus tuus, Glebius. --TBNym+cBXeFsS4Vs Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="ip_newid-pcpu.diff" Index: ip_id.c =================================================================== --- ip_id.c (revision 280788) +++ ip_id.c (working copy) @@ -74,9 +74,10 @@ * enabled. */ -#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/counter.h> #include <sys/malloc.h> -#include <sys/param.h> #include <sys/time.h> #include <sys/kernel.h> #include <sys/libkern.h> @@ -83,7 +84,7 @@ #include <sys/lock.h> #include <sys/mutex.h> #include <sys/random.h> -#include <sys/systm.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/bitstring.h> @@ -92,8 +93,10 @@ #include <netinet/in.h> #include <netinet/ip_var.h> +/* + * Random ID state engine. + */ static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state"); - static VNET_DEFINE(uint16_t *, id_array); static VNET_DEFINE(bitstr_t *, id_bits); static VNET_DEFINE(int, array_ptr); @@ -109,6 +112,12 @@ #define V_random_id_total VNET(random_id_total) #define V_ip_id_mtx VNET(ip_id_mtx) +/* + * Non-random ID state engine is simply a per-cpu counter. + */ +static VNET_DEFINE(counter_u64_t, ip_id); +#define V_ip_id VNET(ip_id) + static void ip_initid(int); static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS); static void ipid_sysinit(void); @@ -191,6 +200,14 @@ return (new_id); } +uint16_t +ip_newid(void) +{ + + counter_u64_add(V_ip_id, 1); + return (htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff)); +} + static void ipid_sysinit(void) { @@ -197,6 +214,9 @@ mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF); ip_initid(8192); + V_ip_id = counter_u64_alloc(M_WAITOK); + for (int i = 0; i < mp_ncpus; i++) + arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0); } VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL); @@ -207,5 +227,6 @@ mtx_destroy(&V_ip_id_mtx); free(V_id_array, M_IPID); free(V_id_bits, M_IPID); + counter_u64_free(V_ip_id); } VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL); Index: ip_input.c =================================================================== --- ip_input.c (revision 280779) +++ ip_input.c (working copy) @@ -331,8 +331,6 @@ struct protosw *pr; int i; - V_ip_id = time_second & 0xffff; - TAILQ_INIT(&V_in_ifaddrhead); V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); Index: ip_output.c =================================================================== --- ip_output.c (revision 280779) +++ ip_output.c (working copy) @@ -91,8 +91,6 @@ #include <security/mac/mac_framework.h> -VNET_DEFINE(uint32_t, ip_id); - #ifdef MBUF_STRESS_TEST static int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, Index: ip_var.h =================================================================== --- ip_var.h (revision 280779) +++ ip_var.h (working copy) @@ -174,7 +174,6 @@ struct route; struct sockopt; -VNET_DECLARE(uint32_t, ip_id); /* ip packet ctr, for ids */ VNET_DECLARE(int, ip_defttl); /* default IP ttl */ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ #ifdef IPSTEALTH @@ -229,6 +228,7 @@ struct mbuf *); void ip_slowtimo(void); uint16_t ip_randomid(void); +uint16_t ip_newid(void); int rip_ctloutput(struct socket *, struct sockopt *); void rip_ctlinput(int, struct sockaddr *, void *); void rip_init(void); @@ -305,19 +305,7 @@ VNET_DECLARE(int, ip_do_randomid); #define V_ip_do_randomid VNET(ip_do_randomid) -static __inline uint16_t -ip_newid(void) -{ - uint16_t res; - if (V_ip_do_randomid != 0) - return (ip_randomid()); - else { - res = atomic_fetchadd_32(&V_ip_id, 1) & 0xFFFF; - return (htons(res)); - } -} - #endif /* _KERNEL */ #endif /* !_NETINET_IP_VAR_H_ */ --TBNym+cBXeFsS4Vs--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20150328191629.GY64665>