Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 9 Sep 2014 01:45:39 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r271293 - head/sys/netinet
Message-ID:  <201409090145.s891jd1d037943@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Tue Sep  9 01:45:39 2014
New Revision: 271293
URL: http://svnweb.freebsd.org/changeset/base/271293

Log:
  Add support for receiving and setting flowtype, flowid and RSS bucket
  information as part of recvmsg().
  
  This is primarily used for debugging/verification of the various
  processing paths in the IP, PCB and driver layers.
  
  Unfortunately the current implementation of the control message path
  results in a ~10% or so drop in UDP frame throughput when it's used.
  
  Differential Revision:	https://reviews.freebsd.org/D527
  Reviewed by:	grehan

Modified:
  head/sys/netinet/in.h
  head/sys/netinet/in_pcb.h
  head/sys/netinet/ip_input.c
  head/sys/netinet/ip_output.c
  head/sys/netinet/udp_usrreq.c

Modified: head/sys/netinet/in.h
==============================================================================
--- head/sys/netinet/in.h	Tue Sep  9 00:21:21 2014	(r271292)
+++ head/sys/netinet/in.h	Tue Sep  9 01:45:39 2014	(r271293)
@@ -492,6 +492,8 @@ __END_DECLS
 #define	IP_FLOWID		90   /* get flow id for the given socket/inp */
 #define	IP_FLOWTYPE		91   /* get flow type (M_HASHTYPE) */
 #define	IP_RSSBUCKETID		92   /* get RSS flowid -> bucket mapping */
+#define	IP_RECVFLOWID		93   /* bool; receive IP flowid/flowtype w/ datagram */
+#define	IP_RECVRSSBUCKETID	94   /* bool; receive IP RSS bucket id w/ datagram */
 
 /*
  * Defaults and limits for options

Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h	Tue Sep  9 00:21:21 2014	(r271292)
+++ head/sys/netinet/in_pcb.h	Tue Sep  9 01:45:39 2014	(r271293)
@@ -549,6 +549,8 @@ short	inp_so_options(const struct inpcb 
 #define	INP_REUSEADDR		0x00000020 /* SO_REUSEADDR option is set */
 #define	INP_BINDMULTI		0x00000040 /* IP_BINDMULTI option is set */
 #define	INP_RSS_BUCKET_SET	0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
+#define	INP_RECVFLOWID		0x00000100 /* populate recv datagram with flow info */
+#define	INP_RECVRSSBUCKETID	0x00000200 /* populate recv datagram with bucket id */
 
 /*
  * Flags passed to in_pcblookup*() functions.

Modified: head/sys/netinet/ip_input.c
==============================================================================
--- head/sys/netinet/ip_input.c	Tue Sep  9 00:21:21 2014	(r271292)
+++ head/sys/netinet/ip_input.c	Tue Sep  9 01:45:39 2014	(r271293)
@@ -1662,6 +1662,43 @@ makedummy:	
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
+
+	if (inp->inp_flags2 & INP_RECVFLOWID) {
+		uint32_t flowid, flow_type;
+
+		flowid = m->m_pkthdr.flowid;
+		flow_type = M_HASHTYPE_GET(m);
+
+		/*
+		 * XXX should handle the failure of one or the
+		 * other - don't populate both?
+		 */
+		*mp = sbcreatecontrol((caddr_t) &flowid,
+		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+		*mp = sbcreatecontrol((caddr_t) &flow_type,
+		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+
+#ifdef	RSS
+	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
+		uint32_t flowid, flow_type;
+		uint32_t rss_bucketid;
+
+		flowid = m->m_pkthdr.flowid;
+		flow_type = M_HASHTYPE_GET(m);
+
+		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
+			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
+			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+	}
+#endif
 }
 
 /*

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Tue Sep  9 00:21:21 2014	(r271292)
+++ head/sys/netinet/ip_output.c	Tue Sep  9 01:45:39 2014	(r271293)
@@ -1016,6 +1016,10 @@ ip_ctloutput(struct socket *so, struct s
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
+		case IP_RECVFLOWID:
+#ifdef	RSS
+		case IP_RECVRSSBUCKETID:
+#endif
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
@@ -1094,6 +1098,9 @@ ip_ctloutput(struct socket *so, struct s
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
+			case IP_RECVFLOWID:
+				OPTSET2(INP_RECVFLOWID, optval);
+				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
@@ -1104,6 +1111,9 @@ ip_ctloutput(struct socket *so, struct s
 					error = EINVAL;
 				}
 				break;
+			case IP_RECVRSSBUCKETID:
+				OPTSET2(INP_RECVRSSBUCKETID, optval);
+				break;
 #endif
 			}
 			break;
@@ -1219,8 +1229,10 @@ ip_ctloutput(struct socket *so, struct s
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
+		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
+		case IP_RECVRSSBUCKETID:
 #endif
 			switch (sopt->sopt_name) {
 
@@ -1290,6 +1302,9 @@ ip_ctloutput(struct socket *so, struct s
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
+			case IP_RECVFLOWID:
+				optval = OPTBIT2(INP_RECVFLOWID);
+				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
@@ -1300,6 +1315,9 @@ ip_ctloutput(struct socket *so, struct s
 				else
 					error = EINVAL;
 				break;
+			case IP_RECVRSSBUCKETID:
+				optval = OPTBIT2(INP_RECVRSSBUCKETID);
+				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);

Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c	Tue Sep  9 00:21:21 2014	(r271292)
+++ head/sys/netinet/udp_usrreq.c	Tue Sep  9 01:45:39 2014	(r271293)
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
@@ -1084,6 +1085,9 @@ udp_output(struct inpcb *inp, struct mbu
 	u_char tos;
 	uint8_t pr;
 	uint16_t cscov = 0;
+	uint32_t flowid = 0;
+	int flowid_type = 0;
+	int use_flowid = 0;
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
@@ -1147,6 +1151,32 @@ udp_output(struct inpcb *inp, struct mbu
 				tos = *(u_char *)CMSG_DATA(cm);
 				break;
 
+			case IP_FLOWID:
+				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+					error = EINVAL;
+					break;
+				}
+				flowid = *(uint32_t *) CMSG_DATA(cm);
+				break;
+
+			case IP_FLOWTYPE:
+				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+					error = EINVAL;
+					break;
+				}
+				flowid_type = *(uint32_t *) CMSG_DATA(cm);
+				use_flowid = 1;
+				break;
+
+#ifdef	RSS
+			case IP_RSSBUCKETID:
+				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+					error = EINVAL;
+					break;
+				}
+				/* This is just a placeholder for now */
+				break;
+#endif	/* RSS */
 			default:
 				error = ENOPROTOOPT;
 				break;
@@ -1395,6 +1425,22 @@ udp_output(struct inpcb *inp, struct mbu
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
+	/*
+	 * Setup flowid / RSS information for outbound socket.
+	 *
+	 * Once the UDP code decides to set a flowid some other way,
+	 * this allows the flowid to be overridden by userland.
+	 */
+	if (use_flowid) {
+		m->m_flags |= M_FLOWID;
+		m->m_pkthdr.flowid = flowid;
+		M_HASHTYPE_SET(m, flowid_type);
+	}
+
+#ifdef	RSS
+	ipflags |= IP_NODEFAULTFLOWID;
+#endif	/* RSS */
+
 	if (unlock_udbinfo == UH_WLOCKED)
 		INP_HASH_WUNLOCK(pcbinfo);
 	else if (unlock_udbinfo == UH_RLOCKED)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201409090145.s891jd1d037943>