Date: Tue, 22 Oct 2013 13:45:03 +0000 (UTC) From: Andre Oppermann <andre@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r256890 - user/andre/mbuf_staging/netinet Message-ID: <201310221345.r9MDj3Ca005688@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: andre Date: Tue Oct 22 13:45:03 2013 New Revision: 256890 URL: http://svnweb.freebsd.org/changeset/base/256890 Log: Add gross proof of concept hack to bypass the entire lower stack for outbound tcp sends for certain very high performance uses. Bypassing means packets going directly to the wire and skip over all firewalls or other processing done in ip_output() and ether_output(). This very first version only does one interface and MAC address lookup at the beginning of a connection. It doesn't detect any changes afterwards and will grind into a void if it has to. On certain errors it also leaks a bit of memory. The point of this patch is to allow for performance analysis of a) the normal vs. bypass path; b) to profile and optimize the normal path to bring it as close as possible to the bypass path. This patch is not intended to be merged to HEAD in its current form (or at all). Modified: user/andre/mbuf_staging/netinet/tcp_output.c Modified: user/andre/mbuf_staging/netinet/tcp_output.c ============================================================================== --- user/andre/mbuf_staging/netinet/tcp_output.c Tue Oct 22 13:31:36 2013 (r256889) +++ user/andre/mbuf_staging/netinet/tcp_output.c Tue Oct 22 13:45:03 2013 (r256890) @@ -129,6 +129,14 @@ static void inline hhook_run_tcp_est_out long len, int tso); static void inline cc_after_idle(struct tcpcb *tp); +#ifdef TCP_IFTRANSMIT +static int tcp_l2hdr(struct tcpcb *tp, struct in_conninfo *inc); +static int tcp_ifsend(struct tcpcb *tp, struct mbuf *m); + +#define t_ifp t_pspare2[0] +#define t_l2h t_pspare2[1] +#endif + /* * Wrapper for the TCP established output helper hook. */ @@ -1228,6 +1236,13 @@ send: TCP_PROBE5(send, NULL, tp, ip, tp, th); +#ifdef TCP_IFTRANSMIT + if (tp->t_l2h == NULL) + (void)tcp_l2hdr(tp, &tp->t_inpcb->inp_inc); + if (tp->t_ifp != NULL) { + tcp_ifsend(tp, m); + } else +#endif error = ip_output(m, tp->t_inpcb->inp_options, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); @@ -1424,6 +1439,71 @@ tcp_setpersist(struct tcpcb *tp) tp->t_rxtshift++; } +#ifdef TCP_IFTRANSMIT +#include <net/if_arp.h> +#include <net/if_llc.h> +#include <net/if_dl.h> +#include <net/if_types.h> +#include <net/ethernet.h> +#include <net/if_llatbl.h> +#include <netinet/in_var.h> +#include <netinet/if_ether.h> + +static int +tcp_l2hdr(struct tcpcb *tp, struct in_conninfo *inc) +{ + struct route sro; + struct sockaddr_in *dst; + struct ifnet *ifp; + struct llentry *lle = NULL; + struct ether_header *eh; + + /* Look up destination interface. */ + bzero(&sro, sizeof(sro)); + if (inc->inc_faddr.s_addr == INADDR_ANY) + return (ENOBUFS); + dst = (struct sockaddr_in *)&sro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = inc->inc_faddr; + in_rtalloc_ign(&sro, 0, inc->inc_fibnum); + if (sro.ro_rt == NULL) + return (ENOBUFS); + ifp = sro.ro_rt->rt_ifp; + RO_RTFREE(&sro); + + if ((tp->t_l2h = malloc(ETHER_HDR_LEN, M_TEMP, M_NOWAIT)) == NULL) + return (ENOBUFS); + + eh = tp->t_l2h; + if (arpresolve(ifp, NULL, NULL, &sro.ro_dst, (u_char *)(&eh->ether_dhost), + &lle) != 0) + return (ENOBUFS); /* XXX leak */ + (void)memcpy(eh->ether_shost, IF_LLADDR(ifp), sizeof(eh->ether_shost)); + eh->ether_type = htons(ETHERTYPE_IP); + tp->t_ifp = ifp; + + return (0); +} + +static int +tcp_ifsend(struct tcpcb *tp, struct mbuf *m) +{ + struct ether_header *eh; + struct ifnet *ifp; + + M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); + if (m == NULL) + return (ENOBUFS); + + ifp = tp->t_ifp; + eh = mtod(m, struct ether_header *); + (void)memcpy(eh, tp->t_l2h, ETHER_HDR_LEN); + + return ((ifp->if_transmit)(ifp, m)); +} +#endif /* TCP_IFTRANSMIT */ + /* * Insert TCP options according to the supplied parameters to the place * optp in a consistent way. Can handle unaligned destinations.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201310221345.r9MDj3Ca005688>