Date: Mon, 4 May 1998 01:43:10 +0200 From: Pierre Beyssac <pb@fasterix.freenix.org> To: freebsd-net@FreeBSD.ORG Subject: patches for fast forwarding (with kernel option & sysctl) Message-ID: <19980504014310.A22037@fasterix.frmug.fr.net>
next in thread | raw e-mail | index | archive | help
--VS++wcV0S1rZb1Fb
Content-Type: text/plain; charset=us-ascii
Here are new patches to use NetBSD's IPFLOW stuff on -current. To
use, store ipflow.c in /sys/netinet, then add IPFLOW to your kernel
config. After booting, activate with:
sysctl -w net.inet.ip.fastforwarding=1
Two attachments follow: the patches and the additional file.
--
Pierre Beyssac pb@fasterix.frmug.org pb@fasterix.freenix.org
{Free,Net,Open}BSD, Linux : il y a moins bien, mais c'est plus cher
Free domains: http://www.eu.org/ or mail dns-manager@EU.org
--VS++wcV0S1rZb1Fb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=patch2
Index: conf/files
===================================================================
RCS file: /home/ncvs/src/sys/conf/files,v
retrieving revision 1.136
diff -u -r1.136 files
--- files 1998/04/22 18:12:29 1.136
+++ files 1998/05/03 23:30:21
@@ -268,6 +268,7 @@
netinet/ip_auth.c optional ipfilter inet
netinet/ip_divert.c optional ipdivert
netinet/ip_fil.c optional ipfilter inet
+netinet/ip_flow.c optional ipflow inet
netinet/ip_frag.c optional ipfilter inet
netinet/ip_fw.c optional ipfirewall
netinet/ip_icmp.c optional inet
Index: conf/options
===================================================================
RCS file: /home/ncvs/src/sys/conf/options,v
retrieving revision 1.72
diff -u -r1.72 options
--- options 1998/04/20 04:30:41 1.72
+++ options 1998/05/03 23:30:24
@@ -172,6 +172,7 @@
IPFIREWALL_VERBOSE opt_ipfw.h
IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h
IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h
+IPFLOW opt_ipflow.h
IPX opt_ipx.h
IPXIP opt_ipx.h
IPTUNNEL opt_ipx.h
Index: i386/conf/LINT
===================================================================
RCS file: /home/ncvs/src/sys/i386/conf/LINT,v
retrieving revision 1.429
diff -u -r1.429 LINT
--- LINT 1998/04/29 17:09:41 1.429
+++ LINT 1998/05/03 23:30:53
@@ -419,6 +419,9 @@
# IPFILTER_LOG enables ipfilter's logging.
# IPFILTER_LKM enables LKM support for an ipfilter module (untested).
#
+# IPFLOW enables Matt Thomas' fast IP forwarding code
+# (activate using "sysctl -w net.inet.ip.fastforwarding=1")
+#
# TCPDEBUG is undocumented.
#
options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs
@@ -432,6 +435,7 @@
options IPFILTER #kernel ipfilter support
options IPFILTER_LOG #ipfilter logging
#options IPFILTER_LKM #kernel support for ip_fil.o LKM
+options IPFLOW
options TCPDEBUG
Index: net/if_ethersubr.c
===================================================================
RCS file: /home/ncvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.47
diff -u -r1.47 if_ethersubr.c
--- if_ethersubr.c 1998/03/30 09:51:39 1.47
+++ if_ethersubr.c 1998/05/03 23:31:06
@@ -36,6 +36,7 @@
#include "opt_atalk.h"
#include "opt_inet.h"
+#include "opt_ipflow.h"
#include "opt_ipx.h"
#include <sys/param.h>
@@ -501,6 +502,10 @@
switch (ether_type) {
#ifdef INET
case ETHERTYPE_IP:
+#ifdef IPFLOW
+ if (ipflow_fastforward(m))
+ return;
+#endif
schednetisr(NETISR_IP);
inq = &ipintrq;
break;
Index: net/if_fddisubr.c
===================================================================
RCS file: /home/ncvs/src/sys/net/if_fddisubr.c,v
retrieving revision 1.27
diff -u -r1.27 if_fddisubr.c
--- if_fddisubr.c 1998/03/30 09:51:44 1.27
+++ if_fddisubr.c 1998/05/03 23:31:18
@@ -38,6 +38,7 @@
#include "opt_atalk.h"
#include "opt_inet.h"
+#include "opt_ipflow.h"
#include "opt_ipx.h"
#include <sys/param.h>
@@ -533,6 +534,10 @@
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
+#ifdef IPFLOW
+ if (ipflow_fastforward(m))
+ return;
+#endif
schednetisr(NETISR_IP);
inq = &ipintrq;
break;
Index: net/if_ppp.c
===================================================================
RCS file: /home/ncvs/src/sys/net/if_ppp.c,v
retrieving revision 1.56
diff -u -r1.56 if_ppp.c
--- if_ppp.c 1998/04/06 11:43:10 1.56
+++ if_ppp.c 1998/05/03 23:31:50
@@ -77,6 +77,7 @@
#if NPPP > 0
#include "opt_inet.h"
+#include "opt_ipflow.h"
#include "opt_ipx.h"
#include "opt_ppp.h"
@@ -1488,6 +1489,12 @@
m->m_pkthdr.len -= PPP_HDRLEN;
m->m_data += PPP_HDRLEN;
m->m_len -= PPP_HDRLEN;
+#ifdef IPFLOW
+ if (ipflow_fastforward(m)) {
+ sc->sc_last_recv = time_second;
+ return;
+ }
+#endif
schednetisr(NETISR_IP);
inq = &ipintrq;
sc->sc_last_recv = time_second; /* update time of last pkt rcvd */
Index: netinet/in.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/in.h,v
retrieving revision 1.31
diff -u -r1.31 in.h
--- in.h 1998/04/19 17:22:27 1.31
+++ in.h 1998/05/03 23:32:00
@@ -398,7 +398,8 @@
#define IPCTL_INTRQDROPS 11 /* number of netisr q drops */
#define IPCTL_STATS 12 /* ipstat structure */
#define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */
-#define IPCTL_MAXID 14
+#define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */
+#define IPCTL_MAXID 15
#define IPCTL_NAMES { \
{ 0, 0 }, \
@@ -415,6 +416,7 @@
{ "intr-queue-drops", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
{ "accept_sourceroute", CTLTYPE_INT }, \
+ { "fastforwarding", CTLTYPE_INT }, \
}
Index: netinet/in_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/in_var.h,v
retrieving revision 1.27
diff -u -r1.27 in_var.h
--- in_var.h 1997/09/07 05:26:43 1.27
+++ in_var.h 1998/05/03 23:32:02
@@ -211,6 +211,7 @@
IN_NEXT_MULTI((step), (inm)); \
} while(0)
+struct route;
struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
void in_delmulti __P((struct in_multi *));
int in_control __P((struct socket *, int, caddr_t, struct ifnet *,
@@ -219,6 +220,9 @@
void ip_input __P((struct mbuf *));
int in_ifadown __P((struct ifaddr *ifa));
void in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
+int ipflow_fastforward __P((struct mbuf *));
+void ipflow_create __P((const struct route *, struct mbuf *));
+void ipflow_slowtimo __P((void));
#endif /* KERNEL */
Index: netinet/ip_fw.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_fw.c,v
retrieving revision 1.82
diff -u -r1.82 ip_fw.c
--- ip_fw.c 1998/04/21 18:54:53 1.82
+++ ip_fw.c 1998/05/03 23:32:13
@@ -36,6 +36,7 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
Index: netinet/ip_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.82
diff -u -r1.82 ip_input.c
--- ip_input.c 1998/04/13 17:27:08 1.82
+++ ip_input.c 1998/05/03 23:32:27
@@ -41,6 +41,7 @@
#include "opt_ipfw.h"
#include "opt_ipdivert.h"
#include "opt_ipfilter.h"
+#include "opt_ipflow.h"
#include <stddef.h>
@@ -80,7 +81,7 @@
static int ip_rsvp_on;
struct socket *ip_rsvpd;
-static int ipforwarding = 0;
+int ipforwarding = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
&ipforwarding, 0, "");
@@ -878,6 +879,9 @@
}
}
}
+#ifdef IPFLOW
+ ipflow_slowtimo();
+#endif
splx(s);
}
@@ -1381,8 +1385,12 @@
if (type)
ipstat.ips_redirectsent++;
else {
- if (mcopy)
+ if (mcopy) {
+#ifdef IPFLOW
+ ipflow_create(&ipforward_rt, mcopy);
+#endif
m_freem(mcopy);
+ }
return;
}
}
Index: netinet/ip_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.34
diff -u -r1.34 ip_var.h
--- ip_var.h 1997/09/07 05:26:46 1.34
+++ ip_var.h 1998/05/03 23:32:29
@@ -132,6 +132,7 @@
u_long ips_fragdropped; /* frags dropped (dups, out of space) */
u_long ips_fragtimeout; /* fragments timed out */
u_long ips_forward; /* packets forwarded */
+ u_long ips_fastforward; /* packets fast forwarded */
u_long ips_cantforward; /* packets rcvd for unreachable dest */
u_long ips_redirectsent; /* packets forwarded on same net */
u_long ips_noproto; /* unknown or unsupported protocol */
@@ -150,6 +151,21 @@
u_long ips_notmember; /* multicasts for unregistered grps */
};
+#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */
+struct ipflow {
+ LIST_ENTRY(ipflow) ipf_next; /* next ipflow in bucket */
+ struct in_addr ipf_dst; /* destination address */
+ struct in_addr ipf_src; /* source address */
+ u_int8_t ipf_tos; /* type-of-service */
+ struct route ipf_ro; /* associated route entry */
+ u_long ipf_uses; /* number of uses in this period */
+ u_long ipf_last_uses; /* number of uses in last period */
+ u_long ipf_dropped; /* ENOBUFS returned by if_output */
+ u_long ipf_errors; /* other errors returned by if_output */
+ int ipf_timer; /* remaining lifetime of this entry */
+ time_t ipf_start; /* creation time */
+};
+
#ifdef KERNEL
/* flags passed to ip_output as last parameter */
#define IP_FORWARDING 0x1 /* most of ip header exists */
@@ -163,6 +179,7 @@
extern struct ipstat ipstat;
extern u_short ip_id; /* ip packet ctr, for ids */
extern int ip_defttl; /* default IP ttl */
+extern int ipforwarding; /* ip forwarding */
extern u_char ip_protox[];
extern struct socket *ip_rsvpd; /* reservation protocol daemon */
extern struct socket *ip_mrouter; /* multicast routing daemon */
--VS++wcV0S1rZb1Fb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ip_flow.c"
/* $NetBSD: ip_flow.c,v 1.1 1998/04/29 21:37:55 matt Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <vm/vm.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#define IPFLOW_TIMER (5 * PR_SLOWHZ)
#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS)
static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
static int ipflow_inuse;
#define IPFLOW_MAX 256
static int ipflow_active = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
&ipflow_active, 0, "");
MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
static unsigned
ipflow_hash(
struct in_addr dst,
struct in_addr src,
unsigned tos)
{
unsigned hash = tos;
int idx;
for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
return hash & (IPFLOW_HASHSIZE-1);
}
static struct ipflow *
ipflow_lookup(
const struct ip *ip)
{
unsigned hash;
struct ipflow *ipf;
hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
ipf = LIST_FIRST(&ipflows[hash]);
while (ipf != NULL) {
if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
&& ip->ip_src.s_addr == ipf->ipf_src.s_addr
&& ip->ip_tos == ipf->ipf_tos)
break;
ipf = LIST_NEXT(ipf, ipf_next);
}
return ipf;
}
int
ipflow_fastforward(
struct mbuf *m)
{
struct ip *ip;
struct ipflow *ipf;
struct rtentry *rt;
u_int32_t sum;
int error;
/*
* Are we forwarding packets? Big enough for an IP packet?
*/
if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
return 0;
/*
* IP header with no option and valid version and length
*/
ip = mtod(m, struct ip *);
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
|| ntohs(ip->ip_len) > m->m_pkthdr.len)
return 0;
/*
* Find a flow.
*/
if ((ipf = ipflow_lookup(ip)) == NULL)
return 0;
/*
* Route and interface still up?
*/
rt = ipf->ipf_ro.ro_rt;
if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
return 0;
/*
* Packet size OK? TTL?
*/
if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
return 0;
/*
* Everything checks out and so we can forward this packet.
* Modify the TTL and incrementally change the checksum.
* On little endian machine, the TTL is in LSB position
* (so we can simply add) while on big-endian it's in the
* MSB position (so we have to do two calculation; the first
* is the add and second is to wrap the results into 17 bits,
* 16 bits and a carry).
*/
ip->ip_ttl -= IPTTLDEC;
#if BYTE_ORDER == LITTLE_ENDIAN
sum = ip->ip_sum + IPTTLDEC;
#endif
#if BYTE_ORDER == BIG_ENDIAN
sum = ip->ip_sum + (IPTTLDEC << 8);
sum = (sum & 0xFFFF) + (sum >> 16);
#endif
if (sum > 0x10000) /* add in carry if needed */
sum++;
ip->ip_sum = sum; /* bit 16 is dropped */
/*
* Send the packet on its way. All we can get back is ENOBUFS
*/
ipf->ipf_uses++;
ipf->ipf_timer = IPFLOW_TIMER;
if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, &ipf->ipf_ro.ro_dst, rt)) != 0) {
if (error == ENOBUFS)
ipf->ipf_dropped++;
else
ipf->ipf_errors++;
}
return 1;
}
static void
ipflow_addstats(
struct ipflow *ipf)
{
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
ipstat.ips_forward += ipf->ipf_uses;
ipstat.ips_fastforward += ipf->ipf_uses;
}
static void
ipflow_free(
struct ipflow *ipf)
{
int s;
/*
* Remove the flow from the hash table (at elevated IPL).
* Once it's off the list, we can deal with it at normal
* network IPL.
*/
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
ipflow_inuse--;
FREE(ipf, M_IPFLOW);
}
static struct ipflow *
ipflow_reap(
void)
{
struct ipflow *ipf, *maybe_ipf = NULL;
int idx;
int s;
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
ipf = LIST_FIRST(&ipflows[idx]);
while (ipf != NULL) {
/*
* If this no longer points to a valid route
* reclaim it.
*/
if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
goto done;
/*
* choose the one that's been least recently used
* or has had the least uses in the last 1.5
* intervals.
*/
if (ipf == NULL
|| ipf->ipf_timer < maybe_ipf->ipf_timer
|| (ipf->ipf_timer == maybe_ipf->ipf_timer
&& ipf->ipf_last_uses + ipf->ipf_uses <
maybe_ipf->ipf_last_uses +
maybe_ipf->ipf_uses))
maybe_ipf = ipf;
ipf = LIST_NEXT(ipf, ipf_next);
}
}
ipf = maybe_ipf;
done:
/*
* Remove the entry from the flow table.
*/
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
return ipf;
}
void
ipflow_slowtimo(
void)
{
struct ipflow *ipf;
int idx;
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
ipf = LIST_FIRST(&ipflows[idx]);
while (ipf != NULL) {
struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
if (--ipf->ipf_timer == 0) {
ipflow_free(ipf);
} else {
ipf->ipf_last_uses = ipf->ipf_uses;
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
ipstat.ips_forward += ipf->ipf_uses;
ipstat.ips_fastforward += ipf->ipf_uses;
ipf->ipf_uses = 0;
}
ipf = next_ipf;
}
}
}
void
ipflow_create(
const struct route *ro,
struct mbuf *m)
{
const struct ip *const ip = mtod(m, struct ip *);
struct ipflow *ipf;
unsigned hash;
int s;
/*
* Don't create cache entries for ICMP messages.
*/
if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
return;
/*
* See if an existing flow struct exists. If so remove it from it's
* list and free the old route. If not, try to malloc a new one
* (if we aren't at our limit).
*/
ipf = ipflow_lookup(ip);
if (ipf == NULL) {
if (ipflow_inuse == IPFLOW_MAX) {
ipf = ipflow_reap();
} else {
ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
M_NOWAIT);
if (ipf == NULL)
return;
ipflow_inuse++;
}
bzero((caddr_t) ipf, sizeof(*ipf));
} else {
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
ipf->ipf_uses = ipf->ipf_last_uses = 0;
ipf->ipf_errors = ipf->ipf_dropped = 0;
}
/*
* Fill in the updated information.
*/
ipf->ipf_ro = *ro;
ro->ro_rt->rt_refcnt++;
ipf->ipf_dst = ip->ip_dst;
ipf->ipf_src = ip->ip_src;
ipf->ipf_tos = ip->ip_tos;
ipf->ipf_timer = IPFLOW_TIMER;
ipf->ipf_start = time_second;
/*
* Insert into the approriate bucket of the flow table.
*/
hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
s = splimp();
LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
splx(s);
}
--VS++wcV0S1rZb1Fb--
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?19980504014310.A22037>
