Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 20 Aug 2014 09:34:41 +0200
From:      Hans Petter Selasky <hps@selasky.org>
To:        freebsd-net@freebsd.org,  FreeBSD Current <freebsd-current@freebsd.org>
Subject:   [RFC] Add support for hardware transmit rate limiting queues [WAS: Add support for changing the flow ID of TCP connections]
Message-ID:  <53F44F91.2060006@selasky.org>
In-Reply-To: <20140709163146.GA21731@ox>
References:  <53BC2E73.6090700@selasky.org> <53BC43AE.3040409@FreeBSD.org> <53BD5385.4090208@selasky.org> <20140709163146.GA21731@ox>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------080406030702000505070708
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Hi,

A month has passed since the last e-mail on this topic, and in the 
meanwhile some new patches have been created and tested:

Basically the approach has been changed a little bit:

- The creation of hardware transmit rings has been made independent of 
the TCP stack. This allows firewall applications to forward traffic into 
hardware transmit rings aswell, and not only native TCP applications. 
This should be one more reason to get the feature into the kernel.

- A hardware transmit ring basically can have two modes: FIXED-RATE or 
AUTOMATIC-RATE. In the fixed rate mode all traffic is sent at a fixed 
bytes per second rate. In the automatic mode you can configure a time 
after which the TX queue must be empty. The hardware driver uses this to 
configure the actual rate. In automatic mode you can also set an upper 
and lower transmit rate limit.

- The MBUF has got a new field in the packet header: "txringid"

- IOCTLs for TCP v4 and v6 sockets has been updated to allow setting of 
the "txringid" field in the mbuf.

The current patch [see attachment] should be much simpler and less 
intrusive than the previous one.

Any comments ?

--HPS

--------------080406030702000505070708
Content-Type: text/x-diff;
 name="net_ratectl.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="net_ratectl.diff"

=== sys/net/if.h
==================================================================
--- sys/net/if.h	(revision 270138)
+++ sys/net/if.h	(local)
@@ -239,6 +239,7 @@
 #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
 #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
 #define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
+#define	IFCAP_HWTXRINGS		0x1000000 /* hardware supports TX rings */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
=== sys/netinet/in.c
==================================================================
--- sys/netinet/in.c	(revision 270138)
+++ sys/netinet/in.c	(local)
@@ -42,6 +42,7 @@
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
@@ -201,9 +202,23 @@
 	struct in_ifaddr *ia;
 	int error;
 
-	if (ifp == NULL)
-		return (EADDRNOTAVAIL);
+	if (ifp == NULL) {
+		struct inpcb *inp;
 
+		switch (cmd) {
+		case SIOCSTXRINGID:
+			inp = sotoinpcb(so);
+			if (inp == NULL)
+				return (EINVAL);
+			INP_WLOCK(inp);
+			inp->inp_txringid = *(unsigned *)data;
+			INP_WUNLOCK(inp);
+			return (0);
+		default:
+			return (EADDRNOTAVAIL);
+		}
+	}
+
 	/*
 	 * Filter out 4 ioctls we implement directly.  Forward the rest
 	 * to specific functions and ifp->if_ioctl().
=== sys/netinet/in_pcb.h
==================================================================
--- sys/netinet/in_pcb.h	(revision 270138)
+++ sys/netinet/in_pcb.h	(local)
@@ -46,6 +46,7 @@
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/rwlock.h>
+#include <sys/mbuf.h>
 #include <net/vnet.h>
 #include <vm/uma.h>
 #endif
@@ -177,7 +178,8 @@
 	u_char	inp_ip_ttl;		/* (i) time to live proto */
 	u_char	inp_ip_p;		/* (c) protocol proto */
 	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
-	uint32_t inp_flowid;		/* (x) flow id / queue id */
+	m_flowid_t inp_flowid;		/* (x) flow ID */
+	m_txringid_t inp_txringid;		/* (x) transmit ring ID */
 	u_int	inp_refcount;		/* (i) refcount */
 	void	*inp_pspare[5];		/* (x) route caching / general use */
 	uint32_t inp_flowtype;		/* (x) M_HASHTYPE value */
=== sys/netinet/in_var.h
==================================================================
--- sys/netinet/in_var.h	(revision 270138)
+++ sys/netinet/in_var.h	(local)
@@ -33,6 +33,7 @@
 #ifndef _NETINET_IN_VAR_H_
 #define _NETINET_IN_VAR_H_
 
+#include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/fnv_hash.h>
 #include <sys/tree.h>
@@ -81,6 +82,18 @@
 	struct	sockaddr_in ifra_mask;
 	int	ifra_vhid;
 };
+
+struct in_ratectlreq {
+	char		ifreq_name[IFNAMSIZ];
+	m_txringid_t	tx_ring_id;
+	uint32_t	min_bytes_per_interval;
+	uint32_t	max_bytes_per_interval;
+	uint32_t	micros_per_interval;
+	uint32_t	mode;
+#define	IN_RATECTLREQ_MODE_FIXED 0	/* min rate = max rate */
+#define	IN_RATECTLREQ_MODE_AUTOMATIC 1	/* bounded by min/max */
+};
+
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in.
=== sys/netinet/ip_output.c
==================================================================
--- sys/netinet/ip_output.c	(revision 270138)
+++ sys/netinet/ip_output.c	(local)
@@ -145,6 +145,7 @@
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
+		m->m_pkthdr.txringid = inp->inp_txringid;
 		if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
=== sys/netinet6/in6.c
==================================================================
--- sys/netinet6/in6.c	(revision 270138)
+++ sys/netinet6/in6.c	(local)
@@ -235,6 +235,23 @@
 	int error;
 	u_long ocmd = cmd;
 
+	if (ifp == NULL) {
+		struct inpcb *inp;
+
+		switch (cmd) {
+		case SIOCSTXRINGID:
+			inp = sotoinpcb(so);
+			if (inp == NULL)
+				return (EINVAL);
+			INP_WLOCK(inp);
+			inp->inp_txringid = *(unsigned *)data;
+			INP_WUNLOCK(inp);
+			return (0);
+		default:
+			break;
+		}
+	}
+
 	/*
 	 * Compat to make pre-10.x ifconfig(8) operable.
 	 */
=== sys/sys/mbuf.h
==================================================================
--- sys/sys/mbuf.h	(revision 270138)
+++ sys/sys/mbuf.h	(local)
@@ -114,6 +114,10 @@
 	void			(*m_tag_free)(struct m_tag *);
 };
 
+typedef uint32_t m_flowid_t;
+typedef uint32_t m_txringid_t;
+#define	M_TXRINGID_UNDEFINED 0
+
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  * Size ILP32: 48
@@ -125,7 +129,8 @@
 	int32_t		 len;		/* total packet length */
 
 	/* Layer crossing persistent information. */
-	uint32_t	 flowid;	/* packet's 4-tuple system */
+	m_flowid_t	 flowid;	/* packet's 4-tuple system */
+	m_txringid_t	 txringid;	/* transmit ring ID */
 	uint64_t	 csum_flags;	/* checksum and offload features */
 	uint16_t	 fibnum;	/* this packet should use this fib */
 	uint8_t		 cosqos;	/* class/quality of service */
=== sys/sys/sockio.h
==================================================================
--- sys/sys/sockio.h	(revision 270138)
+++ sys/sys/sockio.h	(local)
@@ -43,6 +43,7 @@
 #define	SIOCATMARK	 _IOR('s',  7, int)		/* at oob mark? */
 #define	SIOCSPGRP	 _IOW('s',  8, int)		/* set process group */
 #define	SIOCGPGRP	 _IOR('s',  9, int)		/* get process group */
+#define	SIOCSTXRINGID	 _IOW('s', 10, unsigned)	/* set transmit ring ID */
 
 /*	SIOCADDRT	 _IOW('r', 10, struct ortentry)	4.3BSD */
 /*	SIOCDELRT	 _IOW('r', 11, struct ortentry)	4.3BSD */
@@ -128,4 +129,9 @@
 #define	SIOCDIFGROUP	 _IOW('i', 137, struct ifgroupreq) /* delete ifgroup */
 #define	SIOCGIFGMEMB	_IOWR('i', 138, struct ifgroupreq) /* get members */
 
+#define	SIOCARATECTL    _IOWR('i', 139, struct in_ratectlreq) /* add new new rate control HW ring */
+#define	SIOCSRATECTL    _IOWR('i', 140, struct in_ratectlreq) /* set parameters for existing HW ring */
+#define	SIOCGRATECTL    _IOWR('i', 141, struct in_ratectlreq) /* get parameters for existing HW ring */
+#define	SIOCDRATECTL     _IOW('i', 142, struct in_ratectlreq) /* delete existing HW ring */
+
 #endif /* !_SYS_SOCKIO_H_ */

--------------080406030702000505070708--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?53F44F91.2060006>