Date: Thu, 19 Jul 2012 11:43:32 +0000 (UTC) From: "Alexander V. Chernikov" <melifaro@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r238619 - in stable/8/sys: conf modules/netgraph/netflow netgraph/netflow Message-ID: <201207191143.q6JBhWWe077343@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: melifaro Date: Thu Jul 19 11:43:31 2012 New Revision: 238619 URL: http://svn.freebsd.org/changeset/base/238619 Log: MFC r219182, r219229, r220769, r223706, r223787, r223822, r232921, r237162, r237163, r237164, r237226, r237227 Merge netflow v9 support (kernel side) r219182 - Add support for NetFlow version 9 into ng_netflow(4) node. r219229 - Unbreak the build for no options INET6. r220769 - ng_netflow_cache_init() can be void. r223706 - Fix double free. r223787 - Eliminate flow6_hash_entry in favor of flow_hash_entry r223822 - Add missing unlocks. r232921 - Use rt_numfibs variable instead of compile-time RT_NUMFIBS. r237162 - Fix improper L4 header handling for IPv6 packets passed via DLT_RAW. r237163 - Set netflow v9 observation domain value to fib number instead of node id r237164 - Use time_uptime instead of getnanotime for accouting integer number of seconds. r237226 - Simplify IP pointer recovery in case of mbuf reallocation. r237227 - Make radix lookup on src and dst flow addresses optional No objection from: glebius Approved by: ae(mentor) Added: stable/8/sys/netgraph/netflow/netflow_v9.c - copied, changed from r219182, head/sys/netgraph/netflow/netflow_v9.c stable/8/sys/netgraph/netflow/netflow_v9.h - copied unchanged from r219182, head/sys/netgraph/netflow/netflow_v9.h Modified: stable/8/sys/conf/files stable/8/sys/modules/netgraph/netflow/Makefile stable/8/sys/netgraph/netflow/netflow.c stable/8/sys/netgraph/netflow/netflow.h stable/8/sys/netgraph/netflow/ng_netflow.c stable/8/sys/netgraph/netflow/ng_netflow.h Directory Properties: stable/8/sys/ (props changed) Modified: stable/8/sys/conf/files ============================================================================== --- stable/8/sys/conf/files Thu Jul 19 11:18:41 2012 (r238618) +++ stable/8/sys/conf/files Thu Jul 19 11:43:31 2012 (r238619) @@ -2533,6 +2533,7 @@ netgraph/bluetooth/socket/ng_btsocket_l2 netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket netgraph/netflow/netflow.c optional netgraph_netflow +netgraph/netflow/netflow_v9.c optional netgraph_netflow netgraph/netflow/ng_netflow.c optional netgraph_netflow netgraph/ng_UI.c optional netgraph_UI netgraph/ng_async.c optional netgraph_async Modified: stable/8/sys/modules/netgraph/netflow/Makefile ============================================================================== --- stable/8/sys/modules/netgraph/netflow/Makefile Thu Jul 19 11:18:41 2012 (r238618) +++ stable/8/sys/modules/netgraph/netflow/Makefile Thu Jul 19 11:43:31 2012 (r238619) @@ -3,9 +3,20 @@ # Author: Gleb Smirnoff <glebius@freebsd.org> # +.include <bsd.own.mk> + .PATH: ${.CURDIR}/../../../netgraph/netflow KMOD= ng_netflow -SRCS= ng_netflow.c netflow.c +SRCS= ng_netflow.c netflow.c netflow_v9.c opt_inet6.h opt_route.h + +.if !defined(KERNBUILDDIR) + +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + echo "#define INET6 1" > ${.TARGET} +.endif + +.endif .include <bsd.kmod.mk> Modified: stable/8/sys/netgraph/netflow/netflow.c ============================================================================== --- stable/8/sys/netgraph/netflow/netflow.c Thu Jul 19 11:18:41 2012 (r238618) +++ stable/8/sys/netgraph/netflow/netflow.c Thu Jul 19 11:43:31 2012 (r238619) @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru> * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org> * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net> * All rights reserved. @@ -30,6 +31,8 @@ static const char rcs_id[] = "@(#) $FreeBSD$"; +#include "opt_inet6.h" +#include "opt_route.h" #include <sys/param.h> #include <sys/kernel.h> #include <sys/limits.h> @@ -37,14 +40,18 @@ static const char rcs_id[] = #include <sys/syslog.h> #include <sys/systm.h> #include <sys/socket.h> +#include <sys/endian.h> #include <machine/atomic.h> +#include <machine/stdarg.h> #include <net/if.h> #include <net/route.h> +#include <net/ethernet.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> +#include <netinet/ip6.h> #include <netinet/tcp.h> #include <netinet/udp.h> @@ -52,6 +59,7 @@ static const char rcs_id[] = #include <netgraph/netgraph.h> #include <netgraph/netflow/netflow.h> +#include <netgraph/netflow/netflow_v9.h> #include <netgraph/netflow/ng_netflow.h> #define NBUCKETS (65536) /* must be power of 2 */ @@ -83,25 +91,30 @@ static const char rcs_id[] = */ #define SMALL(fle) (fle->f.packets <= 4) -/* - * Cisco uses milliseconds for uptime. Bad idea, since it overflows - * every 48+ days. But we will do same to keep compatibility. This macro - * does overflowable multiplication to 1000. - */ -#define MILLIUPTIME(t) (((t) << 9) + /* 512 */ \ - ((t) << 8) + /* 256 */ \ - ((t) << 7) + /* 128 */ \ - ((t) << 6) + /* 64 */ \ - ((t) << 5) + /* 32 */ \ - ((t) << 3)) /* 8 */ MALLOC_DECLARE(M_NETFLOW_HASH); MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); static int export_add(item_p, struct flow_entry *); -static int export_send(priv_p, item_p, int flags); +static int export_send(priv_p, fib_export_p, item_p, int); + +static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, int, uint8_t, uint8_t); +#ifdef INET6 +static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, int, uint8_t, uint8_t); +#endif + +static __inline void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); -/* Generate hash for a given flow record. */ +/* + * Generate hash for a given flow record. + * + * FIB is not used here, because: + * most VRFS will carry public IPv4 addresses which are unique even + * without FIB private addresses can overlap, but this is worked out + * via flow_rec bcmp() containing fib id. In IPv6 world addresses are + * all globally unique (it's not fully true, there is FC00::/7 for example, + * but chances of address overlap are MUCH smaller) + */ static __inline uint32_t ip_hash(struct flow_rec *r) { @@ -115,6 +128,24 @@ ip_hash(struct flow_rec *r) } } +#ifdef INET6 +/* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ +static __inline uint32_t +ip6_hash(struct flow6_rec *r) +{ + switch (r->r_ip_p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], + r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, + r->r_dport); + default: + return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], + r->dst.r_dst6.__u6_addr.__u6_addr32[3]); + } +} +#endif + /* This is callback from uma(9), called on alloc. */ static int uma_ctor_flow(void *mem, int size, void *arg, int how) @@ -138,21 +169,46 @@ uma_dtor_flow(void *mem, int size, void atomic_subtract_32(&priv->info.nfinfo_used, 1); } +#ifdef INET6 +/* This is callback from uma(9), called on alloc. */ +static int +uma_ctor_flow6(void *mem, int size, void *arg, int how) +{ + priv_p priv = (priv_p )arg; + + if (atomic_load_acq_32(&priv->info.nfinfo_used6) >= CACHESIZE) + return (ENOMEM); + + atomic_add_32(&priv->info.nfinfo_used6, 1); + + return (0); +} + +/* This is callback from uma(9), called on free. */ +static void +uma_dtor_flow6(void *mem, int size, void *arg) +{ + priv_p priv = (priv_p )arg; + + atomic_subtract_32(&priv->info.nfinfo_used6, 1); +} +#endif + /* * Detach export datagram from priv, if there is any. * If there is no, allocate a new one. */ static item_p -get_export_dgram(priv_p priv) +get_export_dgram(priv_p priv, fib_export_p fe) { item_p item = NULL; - mtx_lock(&priv->export_mtx); - if (priv->export_item != NULL) { - item = priv->export_item; - priv->export_item = NULL; + mtx_lock(&fe->export_mtx); + if (fe->exp.item != NULL) { + item = fe->exp.item; + fe->exp.item = NULL; } - mtx_unlock(&priv->export_mtx); + mtx_unlock(&fe->export_mtx); if (item == NULL) { struct netflow_v5_export_dgram *dgram; @@ -178,20 +234,20 @@ get_export_dgram(priv_p priv) * Re-attach incomplete datagram back to priv. * If there is already another one, then send incomplete. */ static void -return_export_dgram(priv_p priv, item_p item, int flags) +return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ - mtx_lock(&priv->export_mtx); - if (priv->export_item == NULL) { - priv->export_item = item; - mtx_unlock(&priv->export_mtx); + mtx_lock(&fe->export_mtx); + if (fe->exp.item == NULL) { + fe->exp.item = item; + mtx_unlock(&fe->export_mtx); } else { - mtx_unlock(&priv->export_mtx); - export_send(priv, item, flags); + mtx_unlock(&fe->export_mtx); + export_send(priv, fe, item, flags); } } @@ -200,20 +256,55 @@ return_export_dgram(priv_p priv, item_p * full, then call export_send(). */ static __inline void -expire_flow(priv_p priv, item_p *item, struct flow_entry *fle, int flags) +expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) { - if (*item == NULL) - *item = get_export_dgram(priv); - if (*item == NULL) { - atomic_add_32(&priv->info.nfinfo_export_failed, 1); - uma_zfree_arg(priv->zone, fle, priv); - return; + struct netflow_export_item exp; + uint16_t version = fle->f.version; + + if ((priv->export != NULL) && (version == IPVERSION)) { + exp.item = get_export_dgram(priv, fe); + if (exp.item == NULL) { + atomic_add_32(&priv->info.nfinfo_export_failed, 1); + if (priv->export9 != NULL) + atomic_add_32(&priv->info.nfinfo_export9_failed, 1); + /* fle definitely contains IPv4 flow */ + uma_zfree_arg(priv->zone, fle, priv); + return; + } + + if (export_add(exp.item, fle) > 0) + export_send(priv, fe, exp.item, flags); + else + return_export_dgram(priv, fe, exp.item, NG_QUEUE); } - if (export_add(*item, fle) > 0) { - export_send(priv, *item, flags); - *item = NULL; + + if (priv->export9 != NULL) { + exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); + if (exp.item9 == NULL) { + atomic_add_32(&priv->info.nfinfo_export9_failed, 1); + if (version == IPVERSION) + uma_zfree_arg(priv->zone, fle, priv); +#ifdef INET6 + else if (version == IP6VERSION) + uma_zfree_arg(priv->zone6, fle, priv); +#endif + else + panic("ng_netflow: Unknown IP proto: %d", version); + return; + } + + if (export9_add(exp.item9, exp.item9_opt, fle) > 0) + export9_send(priv, fe, exp.item9, exp.item9_opt, flags); + else + return_export9_dgram(priv, fe, exp.item9, exp.item9_opt, NG_QUEUE); } - uma_zfree_arg(priv->zone, fle, priv); + + if (version == IPVERSION) + uma_zfree_arg(priv->zone, fle, priv); +#ifdef INET6 + else if (version == IP6VERSION) + uma_zfree_arg(priv->zone6, fle, priv); +#endif } /* Get a snapshot of node statistics */ @@ -234,9 +325,9 @@ ng_netflow_copyinfo(priv_p priv, struct * as this was done in previous version. Need to test & profile * to be sure. */ -static __inline int -hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, - int plen, uint8_t tcp_flags) +static int +hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, + int plen, uint8_t flags, uint8_t tcp_flags) { struct flow_entry *fle; struct sockaddr_in sin; @@ -255,6 +346,7 @@ hash_insert(priv_p priv, struct flow_has * we can safely edit it. */ + fle->f.version = IPVERSION; bcopy(r, &fle->f.r, sizeof(struct flow_rec)); fle->f.bytes = plen; fle->f.packets = 1; @@ -266,46 +358,48 @@ hash_insert(priv_p priv, struct flow_has * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr = fle->f.r.r_dst; - /* XXX MRT 0 as a default.. need the m here to get fib */ - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, 0); - if (rt != NULL) { - fle->f.fle_o_ifx = rt->rt_ifp->if_index; - - if (rt->rt_flags & RTF_GATEWAY && - rt->rt_gateway->sa_family == AF_INET) - fle->f.next_hop = - ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; - - if (rt_mask(rt)) - fle->f.dst_mask = bitcount32(((struct sockaddr_in *) - rt_mask(rt))->sin_addr.s_addr); - else if (rt->rt_flags & RTF_HOST) - /* Give up. We can't determine mask :( */ - fle->f.dst_mask = 32; + if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr = fle->f.r.r_dst; + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); + if (rt != NULL) { + fle->f.fle_o_ifx = rt->rt_ifp->if_index; + + if (rt->rt_flags & RTF_GATEWAY && + rt->rt_gateway->sa_family == AF_INET) + fle->f.next_hop = + ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; + + if (rt_mask(rt)) + fle->f.dst_mask = bitcount32(((struct sockaddr_in *) + rt_mask(rt))->sin_addr.s_addr); + else if (rt->rt_flags & RTF_HOST) + /* Give up. We can't determine mask :( */ + fle->f.dst_mask = 32; - RTFREE_LOCKED(rt); + RTFREE_LOCKED(rt); + } } /* Do route lookup on source address, to fill in src_mask. */ - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr = fle->f.r.r_src; - /* XXX MRT 0 as a default revisit. need the mbuf for fib*/ - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, 0); - if (rt != NULL) { - if (rt_mask(rt)) - fle->f.src_mask = bitcount32(((struct sockaddr_in *) - rt_mask(rt))->sin_addr.s_addr); - else if (rt->rt_flags & RTF_HOST) - /* Give up. We can't determine mask :( */ - fle->f.src_mask = 32; + if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr = fle->f.r.r_src; + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); + if (rt != NULL) { + if (rt_mask(rt)) + fle->f.src_mask = bitcount32(((struct sockaddr_in *) + rt_mask(rt))->sin_addr.s_addr); + else if (rt->rt_flags & RTF_HOST) + /* Give up. We can't determine mask :( */ + fle->f.src_mask = 32; - RTFREE_LOCKED(rt); + RTFREE_LOCKED(rt); + } } /* Push new flow at the and of hash. */ @@ -314,40 +408,186 @@ hash_insert(priv_p priv, struct flow_has return (0); } +#ifdef INET6 +/* XXX: make normal function, instead of.. */ +#define ipv6_masklen(x) bitcount32((x).__u6_addr.__u6_addr32[0]) + \ + bitcount32((x).__u6_addr.__u6_addr32[1]) + \ + bitcount32((x).__u6_addr.__u6_addr32[2]) + \ + bitcount32((x).__u6_addr.__u6_addr32[3]) +#define RT_MASK6(x) (ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr)) +static int +hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, + int plen, uint8_t flags, uint8_t tcp_flags) +{ + struct flow6_entry *fle6; + struct sockaddr_in6 *src, *dst; + struct rtentry *rt; + struct route_in6 rin6; + + mtx_assert(&hsh6->mtx, MA_OWNED); + + fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); + if (fle6 == NULL) { + atomic_add_32(&priv->info.nfinfo_alloc_failed, 1); + return (ENOMEM); + } + + /* + * Now fle is totally ours. It is detached from all lists, + * we can safely edit it. + */ + + fle6->f.version = IP6VERSION; + bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); + fle6->f.bytes = plen; + fle6->f.packets = 1; + fle6->f.tcp_flags = tcp_flags; + + fle6->f.first = fle6->f.last = time_uptime; + + /* + * First we do route table lookup on destination address. So we can + * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. + */ + if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) + { + bzero(&rin6, sizeof(struct route_in6)); + dst = (struct sockaddr_in6 *)&rin6.ro_dst; + dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_family = AF_INET6; + dst->sin6_addr = r->dst.r_dst6; + + rin6.ro_rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0, r->fib); + + if (rin6.ro_rt != NULL) { + rt = rin6.ro_rt; + fle6->f.fle_o_ifx = rt->rt_ifp->if_index; + + if (rt->rt_flags & RTF_GATEWAY && + rt->rt_gateway->sa_family == AF_INET6) + fle6->f.n.next_hop6 = + ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr; + + if (rt_mask(rt)) + fle6->f.dst_mask = RT_MASK6(rt); + else + fle6->f.dst_mask = 128; + + RTFREE_LOCKED(rt); + } + } + + if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) + { + /* Do route lookup on source address, to fill in src_mask. */ + bzero(&rin6, sizeof(struct route_in6)); + src = (struct sockaddr_in6 *)&rin6.ro_dst; + src->sin6_len = sizeof(struct sockaddr_in6); + src->sin6_family = AF_INET6; + src->sin6_addr = r->src.r_src6; + + rin6.ro_rt = rtalloc1_fib((struct sockaddr *)src, 0, 0, r->fib); + + if (rin6.ro_rt != NULL) { + rt = rin6.ro_rt; + + if (rt_mask(rt)) + fle6->f.src_mask = RT_MASK6(rt); + else + fle6->f.src_mask = 128; + + RTFREE_LOCKED(rt); + } + } + + /* Push new flow at the and of hash. */ + TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); + + return (0); +} +#undef ipv6_masklen +#undef RT_MASK6 +#endif + /* * Non-static functions called from ng_netflow.c */ /* Allocate memory and set up flow cache */ -int +void ng_netflow_cache_init(priv_p priv) { - struct flow_hash_entry *hsh; + struct flow_hash_entry *hsh; int i; /* Initialize cache UMA zone. */ - priv->zone = uma_zcreate("NetFlow cache", sizeof(struct flow_entry), + priv->zone = uma_zcreate("NetFlow IPv4 cache", sizeof(struct flow_entry), uma_ctor_flow, uma_dtor_flow, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone, CACHESIZE); +#ifdef INET6 + priv->zone6 = uma_zcreate("NetFlow IPv6 cache", sizeof(struct flow6_entry), + uma_ctor_flow6, uma_dtor_flow6, NULL, NULL, UMA_ALIGN_CACHE, 0); + uma_zone_set_max(priv->zone6, CACHESIZE); +#endif /* Allocate hash. */ priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); - if (priv->hash == NULL) { - uma_zdestroy(priv->zone); - return (ENOMEM); + /* Initialize hash. */ + for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { + mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); + TAILQ_INIT(&hsh->head); } +#ifdef INET6 + /* Allocate hash. */ + priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), + M_NETFLOW_HASH, M_WAITOK | M_ZERO); + /* Initialize hash. */ - for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { + for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } +#endif - mtx_init(&priv->export_mtx, "export dgram lock", NULL, MTX_DEF); + ng_netflow_v9_cache_init(priv); + CTR0(KTR_NET, "ng_netflow startup()"); +} +/* Initialize new FIB table for v5 and v9 */ +int +ng_netflow_fib_init(priv_p priv, int fib) +{ + fib_export_p fe = priv_to_fib(priv, fib); + + CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); + + if (fe != NULL) + return (0); + + if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, M_NOWAIT | M_ZERO)) == NULL) + return (1); + + mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); + mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); + fe->fib = fib; + fe->domain_id = fib; + + if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], (uintptr_t)NULL, (uintptr_t)fe) == 0) { + /* FIB already set up by other ISR */ + CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", fib, fe, priv_to_fib(priv, fib)); + mtx_destroy(&fe->export_mtx); + mtx_destroy(&fe->export9_mtx); + free(fe, M_NETGRAPH); + } else { + /* Increase counter for statistics */ + CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", fib, fe, priv_to_fib(priv, fib)); + atomic_fetchadd_32(&priv->info.nfinfo_alloc_fibs, 1); + } + return (0); } @@ -357,9 +597,12 @@ ng_netflow_cache_flush(priv_p priv) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; - item_p item = NULL; + struct netflow_export_item exp; + fib_export_p fe; int i; + bzero(&exp, sizeof(exp)); + /* * We are going to free probably billable data. * Expire everything before freeing it. @@ -368,36 +611,67 @@ ng_netflow_cache_flush(priv_p priv) for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); - expire_flow(priv, &item, fle, NG_QUEUE); + fe = priv_to_fib(priv, fle->f.r.fib); + expire_flow(priv, fe, fle, NG_QUEUE); } - - if (item != NULL) - export_send(priv, item, NG_QUEUE); +#ifdef INET6 + for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) + TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { + TAILQ_REMOVE(&hsh->head, fle, fle_hash); + fe = priv_to_fib(priv, fle->f.r.fib); + expire_flow(priv, fe, fle, NG_QUEUE); + } +#endif uma_zdestroy(priv->zone); - /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ - if (priv->hash) + if (priv->hash != NULL) free(priv->hash, M_NETFLOW_HASH); +#ifdef INET6 + uma_zdestroy(priv->zone6); + /* Destroy hash mutexes. */ + for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) + mtx_destroy(&hsh->mtx); + + /* Free hash memory. */ + if (priv->hash6 != NULL) + free(priv->hash6, M_NETFLOW_HASH); +#endif + + for (i = 0; i < priv->maxfibs; i++) { + if ((fe = priv_to_fib(priv, i)) == NULL) + continue; + + if (fe->exp.item != NULL) + export_send(priv, fe, fe->exp.item, NG_QUEUE); - mtx_destroy(&priv->export_mtx); + if (fe->exp.item9 != NULL) + export9_send(priv, fe, fe->exp.item9, fe->exp.item9_opt, NG_QUEUE); + + mtx_destroy(&fe->export_mtx); + mtx_destroy(&fe->export9_mtx); + free(fe, M_NETGRAPH); + } + + ng_netflow_v9_cache_flush(priv); } /* Insert packet from into flow cache. */ int -ng_netflow_flow_add(priv_p priv, struct ip *ip, unsigned int src_if_index) +ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, caddr_t upper_ptr, uint8_t upper_proto, + uint8_t flags, unsigned int src_if_index) { register struct flow_entry *fle, *fle1; - struct flow_hash_entry *hsh; + struct flow_hash_entry *hsh; struct flow_rec r; - item_p item = NULL; int hlen, plen; int error = 0; uint8_t tcp_flags = 0; + uint16_t eproto; /* Try to fill flow_rec r */ bzero(&r, sizeof(r)); @@ -411,8 +685,13 @@ ng_netflow_flow_add(priv_p priv, struct if (hlen < sizeof(struct ip)) return (EINVAL); + eproto = ETHERTYPE_IP; + /* Assume L4 template by default */ + r.flow_type = NETFLOW_V9_FLOW_V4_L4; + r.r_src = ip->ip_src; r.r_dst = ip->ip_dst; + r.fib = fe->fib; /* save packet length */ plen = ntohs(ip->ip_len); @@ -448,8 +727,8 @@ ng_netflow_flow_add(priv_p priv, struct break; } - /* Update node statistics. XXX: race... */ - priv->info.nfinfo_packets ++; + atomic_fetchadd_32(&priv->info.nfinfo_packets, 1); + /* XXX: atomic */ priv->info.nfinfo_bytes += plen; /* Find hash slot. */ @@ -468,7 +747,7 @@ ng_netflow_flow_add(priv_p priv, struct break; if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); - expire_flow(priv, &item, fle, NG_QUEUE); + expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); atomic_add_32(&priv->info.nfinfo_act_exp, 1); } } @@ -487,9 +766,9 @@ ng_netflow_flow_add(priv_p priv, struct * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || - (fle->f.bytes >= (UINT_MAX - IF_MAXMTU)) ) { + (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); - expire_flow(priv, &item, fle, NG_QUEUE); + expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); atomic_add_32(&priv->info.nfinfo_act_exp, 1); } else { /* @@ -503,15 +782,139 @@ ng_netflow_flow_add(priv_p priv, struct } } } else /* A new flow entry. */ - error = hash_insert(priv, hsh, &r, plen, tcp_flags); + error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); - if (item != NULL) - return_export_dgram(priv, item, NG_QUEUE); + return (error); +} + +#ifdef INET6 +/* Insert IPv6 packet from into flow cache. */ +int +ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, caddr_t upper_ptr, uint8_t upper_proto, + uint8_t flags, unsigned int src_if_index) +{ + register struct flow_entry *fle = NULL, *fle1; + register struct flow6_entry *fle6; + struct flow_hash_entry *hsh; + struct flow6_rec r; + int plen; + int error = 0; + uint8_t tcp_flags = 0; + + /* check version */ + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) + return (EINVAL); + + bzero(&r, sizeof(r)); + + r.src.r_src6 = ip6->ip6_src; + r.dst.r_dst6 = ip6->ip6_dst; + r.fib = fe->fib; + + /* Assume L4 template by default */ + r.flow_type = NETFLOW_V9_FLOW_V6_L4; + + /* save packet length */ + plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); + + /* XXX: set DSCP/CoS value */ +#if 0 + r.r_tos = ip->ip_tos; +#endif + if ((flags & NG_NETFLOW_IS_FRAG) == 0) { + switch(upper_proto) { + case IPPROTO_TCP: + { + register struct tcphdr *tcp; + + tcp = (struct tcphdr *)upper_ptr; + r.r_ports = *(uint32_t *)upper_ptr; + tcp_flags = tcp->th_flags; + break; + } + case IPPROTO_UDP: + case IPPROTO_SCTP: + { + r.r_ports = *(uint32_t *)upper_ptr; + break; + } + + } + } + + r.r_ip_p = upper_proto; + r.r_i_ifx = src_if_index; + + atomic_fetchadd_32(&priv->info.nfinfo_packets6, 1); + /* XXX: atomic */ + priv->info.nfinfo_bytes6 += plen; + + /* Find hash slot. */ + hsh = &priv->hash6[ip6_hash(&r)]; + + mtx_lock(&hsh->mtx); + + /* + * Go through hash and find our entry. If we encounter an + * entry, that should be expired, purge it. We do a reverse + * search since most active entries are first, and most + * searches are done on most active entries. + */ + TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { + if (fle->f.version != IP6VERSION) + continue; + fle6 = (struct flow6_entry *)fle; + if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) + break; + if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { + TAILQ_REMOVE(&hsh->head, fle, fle_hash); + expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, + NG_QUEUE); + atomic_add_32(&priv->info.nfinfo_act_exp, 1); + } + } + + if (fle != NULL) { /* An existent entry. */ + fle6 = (struct flow6_entry *)fle; + + fle6->f.bytes += plen; + fle6->f.packets ++; + fle6->f.tcp_flags |= tcp_flags; + fle6->f.last = time_uptime; + + /* + * We have the following reasons to expire flow in active way: + * - it hit active timeout + * - a TCP connection closed + * - it is going to overflow counter + */ + if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || + (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { + TAILQ_REMOVE(&hsh->head, fle, fle_hash); + expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, + NG_QUEUE); + atomic_add_32(&priv->info.nfinfo_act_exp, 1); + } else { + /* + * It is the newest, move it to the tail, + * if it isn't there already. Next search will + * locate it quicker. + */ + if (fle != TAILQ_LAST(&hsh->head, fhead)) { + TAILQ_REMOVE(&hsh->head, fle, fle_hash); + TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); + } + } + } else /* A new flow entry. */ + error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); + + mtx_unlock(&hsh->mtx); return (error); } +#endif /* * Return records from cache to userland. @@ -519,67 +922,113 @@ ng_netflow_flow_add(priv_p priv, struct * TODO: matching particular IP should be done in kernel, here. */ int -ng_netflow_flow_show(priv_p priv, uint32_t last, struct ng_mesg *resp) +ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, +struct ngnf_show_header *resp) { - struct flow_hash_entry *hsh; - struct flow_entry *fle; - struct ngnf_flows *data; - int i; + struct flow_hash_entry *hsh; + struct flow_entry *fle; + struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); +#ifdef INET6 + struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); +#endif + int i, max; - data = (struct ngnf_flows *)resp->data; - data->last = 0; - data->nentries = 0; - - /* Check if this is a first run */ - if (last == 0) { - hsh = priv->hash; - i = 0; - } else { - if (last > NBUCKETS-1) - return (EINVAL); - hsh = priv->hash + last; - i = last; - } + i = req->hash_id; + if (i > NBUCKETS-1) + return (EINVAL); + +#ifdef INET6 + if (req->version == 6) { + resp->version = 6; + hsh = priv->hash6 + i; + max = NREC6_AT_ONCE; + } else +#endif + if (req->version == 4) { + resp->version = 4; + hsh = priv->hash + i; + max = NREC_AT_ONCE; + } else + return (EINVAL); /* * We will transfer not more than NREC_AT_ONCE. More data * will come in next message. - * We send current hash index to userland, and userland should - * return it back to us. Then, we will restart with new entry. + * We send current hash index and current record number in list + * to userland, and userland should return it back to us. + * Then, we will restart with new entry. * - * The resulting cache snapshot is inaccurate for the - * following reasons: - * - we skip locked hash entries - * - we bail out, if someone wants our entry - * - we skip rest of entry, when hit NREC_AT_ONCE + * The resulting cache snapshot can be inaccurate if flow expiration + * is taking place on hash item between userland data requests for + * this hash item id. */ + resp->nentries = 0; for (; i < NBUCKETS; hsh++, i++) { - if (mtx_trylock(&hsh->mtx) == 0) - continue; + int list_id; + if (mtx_trylock(&hsh->mtx) == 0) { + /* + * Requested hash index is not available, + * relay decision to skip or re-request data + * to userland. + */ + resp->hash_id = i; + resp->list_id = 0; + return (0); + } + + list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { - if (hsh->mtx.mtx_lock & MTX_CONTESTED) - break; + if (hsh->mtx.mtx_lock & MTX_CONTESTED) { + resp->hash_id = i; + resp->list_id = list_id; + mtx_unlock(&hsh->mtx); + return (0); + } + + list_id++; + /* Search for particular record in list. */ + if (req->list_id > 0) { + if (list_id < req->list_id) + continue; - bcopy(&fle->f, &(data->entries[data->nentries]), - sizeof(fle->f)); - data->nentries++; - if (data->nentries == NREC_AT_ONCE) { + /* Requested list position found. */ + req->list_id = 0; + } +#ifdef INET6 + if (req->version == 6) { + struct flow6_entry *fle6; + + fle6 = (struct flow6_entry *)fle; + bcopy(&fle6->f, data6 + resp->nentries, + sizeof(fle6->f)); + } else +#endif + bcopy(&fle->f, data + resp->nentries, + sizeof(fle->f)); + resp->nentries++; + if (resp->nentries == max) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201207191143.q6JBhWWe077343>