Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Sep 2004 23:12:48 -0400
From:      Brian Fundakowski Feldman <green@FreeBSD.org>
To:        ipfw@FreeBSD.org
Subject:   Re: ALTQ with IPFW
Message-ID:  <20041001031248.GC3411@green.homeunix.org>
In-Reply-To: <20040929195920.GC1807@green.homeunix.org>
References:  <20040929195920.GC1807@green.homeunix.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--C7zPtVaVf+AK4Oqc
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Okay, here's a newer revision that turns ALTQ into an action modifier
similar to O_LOG.  To use the previous behavior, you would specifiy
"ipfw add count altq <qname> ..."

-- 
Brian Fundakowski Feldman                           \'[ FreeBSD ]''''''''''\
  <> green@FreeBSD.org                               \  The Power to Serve! \
 Opinions expressed are my own.                       \,,,,,,,,,,,,,,,,,,,,,,\

--C7zPtVaVf+AK4Oqc
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ipfw-altq-tcpexts-etc.patch"

Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.99
diff -u -r1.99 ip_divert.c
--- sys/netinet/ip_divert.c	5 Sep 2004 02:34:12 -0000	1.99
+++ sys/netinet/ip_divert.c	29 Sep 2004 14:44:19 -0000
@@ -66,6 +66,7 @@
 #include <netinet/ip.h>
 #include <netinet/ip_divert.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
 
 /*
  * Divert sockets
@@ -268,6 +269,8 @@
 div_output(struct socket *so, struct mbuf *m,
 	struct sockaddr_in *sin, struct mbuf *control)
 {
+	struct m_tag *mtag;
+	struct divert_tag *dt;
 	int error = 0;
 
 	KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null"));
@@ -275,23 +278,22 @@
 	if (control)
 		m_freem(control);		/* XXX */
 
+	mtag = m_tag_get(PACKET_TAG_DIVERT,
+			sizeof(struct divert_tag), M_NOWAIT);
+	if (mtag == NULL) {
+		error = ENOBUFS;
+		goto cantsend;
+	}
+	dt = (struct divert_tag *)(mtag+1);
+	dt->info = 0;
+	dt->cookie = 0;
+	m_tag_prepend(m, mtag);
+
 	/* Loopback avoidance and state recovery */
 	if (sin) {
-		struct m_tag *mtag;
-		struct divert_tag *dt;
 		int i;
 
-		mtag = m_tag_get(PACKET_TAG_DIVERT,
-				sizeof(struct divert_tag), M_NOWAIT);
-		if (mtag == NULL) {
-			error = ENOBUFS;
-			goto cantsend;
-		}
-		dt = (struct divert_tag *)(mtag+1);
-		dt->info = 0;
 		dt->cookie = sin->sin_port;
-		m_tag_prepend(m, mtag);
-
 		/*
 		 * Find receive interface with the given name, stuffed
 		 * (if it exists) in the sin_zero[] field.
@@ -309,6 +311,7 @@
 		struct ip *const ip = mtod(m, struct ip *);
 		struct inpcb *inp;
 
+		dt->info |= IP_FW_DIVERT_OUTPUT_FLAG;
 		INP_INFO_WLOCK(&divcbinfo);
 		inp = sotoinpcb(so);
 		INP_LOCK(inp);
@@ -341,6 +344,7 @@
 		INP_UNLOCK(inp);
 		INP_INFO_WUNLOCK(&divcbinfo);
 	} else {
+		dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
 		if (m->m_pkthdr.rcvif == NULL) {
 			/*
 			 * No luck with the name, check by IP address.
Index: sys/netinet/ip_fw.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_fw.h,v
retrieving revision 1.91
diff -u -r1.91 ip_fw.h
--- sys/netinet/ip_fw.h	29 Sep 2004 04:54:33 -0000	1.91
+++ sys/netinet/ip_fw.h	30 Sep 2004 05:41:01 -0000
@@ -134,6 +134,9 @@
 	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
 	O_ANTISPOOF,		/* none				*/
 	O_JAIL,			/* u32 = id			*/
+	O_ALTQ,			/* u32 = altq classif. qid	*/
+	O_DIVERTED,		/* arg1=bitmap (1:loop, 2:out)	*/
+	O_TCPDATALEN,		/* arg1 = len			*/
 
 	O_LAST_OPCODE		/* not an opcode!		*/
 };
@@ -251,6 +254,14 @@
 } ipfw_insn_pipe;
 
 /*
+ * This is used for storing an altq queue id number.
+ */
+typedef struct _ipfw_insn_altq {
+	ipfw_insn	o;
+	u_int32_t	qid;
+} ipfw_insn_altq;
+
+/*
  * This is used for limit rules.
  */
 typedef struct	_ipfw_insn_limit {
@@ -293,6 +304,7 @@
  *	first instruction (at r->cmd) MUST BE an O_PROBE_STATE
  *  + if a rule has a "log" option, then the first action
  *	(at ACTION_PTR(r)) MUST be O_LOG
+ *  + if a rule has an "altq" option, it comes after "log"
  *
  * NOTE: we use a simple linked list of rules because we never need
  * 	to delete a rule without scanning the list. We do not use
@@ -405,9 +417,11 @@
  */
 #ifdef _KERNEL
 
-#define	IP_FW_PORT_DYNT_FLAG	0x10000
-#define	IP_FW_PORT_TEE_FLAG	0x20000
-#define	IP_FW_PORT_DENY_FLAG	0x40000
+#define	IP_FW_PORT_DYNT_FLAG		0x00010000
+#define	IP_FW_PORT_TEE_FLAG		0x00020000
+#define	IP_FW_PORT_DENY_FLAG		0x00040000
+#define	IP_FW_DIVERT_LOOPBACK_FLAG	0x00080000
+#define	IP_FW_DIVERT_OUTPUT_FLAG	0x00100000
 
 /*
  * Arguments for calling ipfw_chk() and dummynet_io(). We put them
Index: sys/netinet/ip_fw2.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.77
diff -u -r1.77 ip_fw2.c
--- sys/netinet/ip_fw2.c	29 Sep 2004 04:54:33 -0000	1.77
+++ sys/netinet/ip_fw2.c	30 Sep 2004 08:15:18 -0000
@@ -77,6 +77,7 @@
 #include <netinet/tcpip.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
+#include <altq/if_altq.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
@@ -553,6 +554,13 @@
 		if (l->log_left == 0)
 			limit_reached = l->max_log;
 		cmd += F_LEN(cmd);	/* point to first action */
+		if (cmd->opcode == O_ALTQ) {
+			ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+			snprintf(SNPARGS(action2, 0), "Altq %d",
+				altq->qid);
+			cmd += F_LEN(cmd);
+		}
 		if (cmd->opcode == O_PROB)
 			cmd += F_LEN(cmd);
 
@@ -1324,6 +1332,8 @@
 	cmd = ACTION_PTR(me);
 	if (cmd->opcode == O_LOG)
 		cmd += F_LEN(cmd);
+	if (cmd->opcode == O_ALTQ)
+		cmd += F_LEN(cmd);
 	if ( cmd->opcode == O_SKIPTO )
 		for (rule = me->next; rule ; rule = rule->next)
 			if (rule->rulenum >= cmd->arg1)
@@ -1708,6 +1718,14 @@
 	int ugid_lookup = 0;
 
 	/*
+	 * divinput_flags	If non-zero, set to the IP_FW_DIVERT_*_FLAG
+	 *	associated with a packet input on a divert socket.  This
+	 *	will allow to distinguish traffic and its direction when
+	 *	it originates from a divert socket.
+	 */
+	u_int divinput_flags = 0;
+
+	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, bdg_forward, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
@@ -1883,8 +1901,11 @@
 		}
 	}
 	/* reset divert rule to avoid confusion later */
-	if (mtag)
+	if (mtag) {
+		divinput_flags = divert_info(mtag) &
+		    (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG);
 		m_tag_delete(m, mtag);
+	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
@@ -2017,6 +2038,13 @@
 				match = (args->eh != NULL);
 				break;
 
+			case O_DIVERTED:
+				match = (cmd->arg1 & 1 && divinput_flags &
+				    IP_FW_DIVERT_LOOPBACK_FLAG) ||
+					(cmd->arg1 & 2 && divinput_flags &
+				    IP_FW_DIVERT_OUTPUT_FLAG);
+				break;
+
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
@@ -2175,6 +2203,28 @@
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
+			case O_TCPDATALEN:
+				if (proto == IPPROTO_TCP && offset == 0) {
+				    struct tcphdr *tcp;
+				    uint16_t x;
+				    uint16_t *p;
+				    int i;
+
+				    tcp = L3HDR(struct tcphdr,ip);
+				    x = ip_len -
+					((ip->ip_hl + tcp->th_off) << 2);
+				    if (cmdlen == 1) {
+					match = (cmd->arg1 == x);
+					break;
+				    }
+				    /* otherwise we have ranges */
+				    p = ((ipfw_insn_u16 *)cmd)->ports;
+				    i = cmdlen - 1;
+				    for (; !match && i>0; i--, p += 2)
+					match = (x >= p[0] && x <= p[1]);
+				}
+				break;
+
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd,
@@ -2212,6 +2262,32 @@
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
+			case O_ALTQ: {
+				struct altq_tag *at;
+				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+				match = 1;
+				mtag = m_tag_get(PACKET_TAG_PF_QID,
+						sizeof(struct altq_tag),
+						M_NOWAIT);
+				if (mtag == NULL) {
+					/*
+					 * Let the packet fall back to the
+					 * default ALTQ.
+					 */
+					break;
+				}
+				at = (struct altq_tag *)(mtag+1);
+				at->qid = altq->qid;
+				if (hlen != 0)
+					at->af = AF_INET;
+				else
+					at->af = AF_LINK;
+				at->hdr = ip;
+				m_tag_prepend(m, mtag);
+				break;
+			}
+
 			case O_LOG:
 				if (fw_verbose)
 					ipfw_log(f, hlen, args->eh, m, oif);
@@ -2275,6 +2351,9 @@
 			 *   or to the SKIPTO target ('goto again' after
 			 *   having set f, cmd and l), respectively.
 			 *
+			 * O_LOG and O_ALTQ action parameters:
+			 *   perform some action and set match = 1;
+			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule.
@@ -2846,6 +2925,11 @@
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
+	if (rule->act_ofs >= rule->cmd_len) {
+		printf("ipfw: bogus action offset (%u > %u)\n",
+		    rule->act_ofs, rule->cmd_len - 1);
+		return (EINVAL);
+	}
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
@@ -2868,6 +2952,7 @@
 		case O_LAYER2:
 		case O_IN:
 		case O_FRAG:
+		case O_DIVERTED:
 		case O_IPOPT:
 		case O_IPTOS:
 		case O_IPPRECEDENCE:
@@ -2951,6 +3036,7 @@
 		case O_IPID:
 		case O_IPTTL:
 		case O_IPLEN:
+		case O_TCPDATALEN:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
@@ -2969,6 +3055,11 @@
 				goto bad_size;
 			break;
 
+		case O_ALTQ:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
+				goto bad_size;
+			break;
+
 		case O_PIPE:
 		case O_QUEUE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
Index: sys/netinet/tcp.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp.h,v
retrieving revision 1.26
diff -u -r1.26 tcp.h
--- sys/netinet/tcp.h	16 Aug 2004 18:32:07 -0000	1.26
+++ sys/netinet/tcp.h	29 Sep 2004 05:09:59 -0000
@@ -161,12 +161,14 @@
 /*
  * User-settable options (used with setsockopt).
  */
-#define	TCP_NODELAY	0x01	/* don't delay send to coalesce packets */
+#define	TCP_NODELAY	 0x01	/* don't delay send to coalesce packets */
 #if __BSD_VISIBLE
-#define	TCP_MAXSEG	0x02	/* set maximum segment size */
-#define TCP_NOPUSH	0x04	/* don't push last block of write */
-#define TCP_NOOPT	0x08	/* don't use TCP options */
-#define TCP_MD5SIG	0x10	/* use MD5 digests (RFC2385) */
+#define	TCP_MAXSEG	 0x02	/* set maximum segment size */
+#define TCP_NOPUSH	 0x04	/* don't push last block of write */
+#define TCP_NOOPT	 0x08	/* don't use TCP options */
+#define TCP_MD5SIG	 0x10	/* use MD5 digests (RFC2385) */
+#define TCP_DELACKTIME	 0x20	/* delayed ack time (timeval, 0 disables) */
+#define TCP_REXMITJITTER 0x40	/* retransmit slop time (timeval, 0 disables) */
 #endif
 
 #endif /* !_NETINET_TCP_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.252
diff -u -r1.252 tcp_input.c
--- sys/netinet/tcp_input.c	17 Aug 2004 22:05:54 -0000	1.252
+++ sys/netinet/tcp_input.c	29 Sep 2004 05:33:17 -0000
@@ -195,7 +195,7 @@
 #define DELAY_ACK(tp)							\
 	((!callout_active(tp->tt_delack) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
-	    (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+	    (tp->t_delacktime > 0 || (tp->t_flags & TF_NEEDSYN)))
 
 /* Initialize TCP reassembly queue */
 uma_zone_t	tcp_reass_zone;
@@ -1416,8 +1416,8 @@
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp) && tlen != 0)
-				callout_reset(tp->tt_delack, tcp_delacktime,
-				    tcp_timer_delack, tp);
+				callout_reset(tp->tt_delack,
+				    TCP_DELACKTICKS(tp), tcp_timer_delack, tp);
 			else
 				tp->t_flags |= TF_ACKNOW;
 			/*
@@ -2509,7 +2509,7 @@
 	INP_LOCK_ASSERT(inp);
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
-		callout_reset(tp->tt_delack, tcp_delacktime,
+		callout_reset(tp->tt_delack, TCP_DELACKTICKS(tp),
 		    tcp_timer_delack, tp);
 	}
 	INP_UNLOCK(inp);
@@ -2841,7 +2841,7 @@
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
-	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+	TCPT_RANGESET(tp, tp->t_rxtcur, TCP_REXMTVAL(tp),
 		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
 
 	/*
@@ -3082,7 +3082,7 @@
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
 		}
-		TCPT_RANGESET(tp->t_rxtcur,
+		TCPT_RANGESET(tp, tp->t_rxtcur,
 			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
 			      tp->t_rttmin, TCPTV_REXMTMAX);
 	}
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.101
diff -u -r1.101 tcp_output.c
--- sys/netinet/tcp_output.c	5 Sep 2004 02:34:12 -0000	1.101
+++ sys/netinet/tcp_output.c	29 Sep 2004 04:41:12 -0000
@@ -1169,7 +1169,7 @@
 	/*
 	 * Start/restart persistance timer.
 	 */
-	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
+	TCPT_RANGESET(tp, tt, t * tcp_backoff[tp->t_rxtshift],
 		      TCPTV_PERSMIN, TCPTV_PERSMAX);
 	callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.203
diff -u -r1.203 tcp_subr.c
--- sys/netinet/tcp_subr.c	5 Sep 2004 02:34:12 -0000	1.203
+++ sys/netinet/tcp_subr.c	29 Sep 2004 05:37:00 -0000
@@ -620,6 +620,8 @@
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (tcp_do_rfc1644)
 		tp->t_flags |= TF_REQ_CC;
+	if (tcp_delack_enabled)
+		tp->t_delacktime = max(tcp_delacktime, 1);
 	tp->sack_enable = tcp_do_sack;
 	tp->t_inpcb = inp;	/* XXX */
 	/*
@@ -631,6 +633,7 @@
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
+	tp->t_rxtjitter = max(tcp_rexmit_slop, 0);
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.66
diff -u -r1.66 tcp_timer.c
--- sys/netinet/tcp_timer.c	16 Aug 2004 18:32:07 -0000	1.66
+++ sys/netinet/tcp_timer.c	29 Sep 2004 04:41:16 -0000
@@ -538,7 +538,7 @@
 		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
-	TCPT_RANGESET(tp->t_rxtcur, rexmt,
+	TCPT_RANGESET(tp, tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
 	/*
 	 * Disable rfc1323 and rfc1644 if we havn't got any response to
Index: sys/netinet/tcp_timer.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.26
diff -u -r1.26 tcp_timer.h
--- sys/netinet/tcp_timer.h	16 Aug 2004 18:32:07 -0000	1.26
+++ sys/netinet/tcp_timer.h	29 Sep 2004 05:31:10 -0000
@@ -126,8 +126,8 @@
 /*
  * Force a time value to be in a certain range.
  */
-#define	TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
-	(tv) = (value) + tcp_rexmit_slop; \
+#define	TCPT_RANGESET(tp, tv, value, tvmin, tvmax) do { \
+	(tv) = (value) + TCP_REXMITJITTERTICKS(tp); \
 	if ((u_long)(tv) < (u_long)(tvmin)) \
 		(tv) = (tvmin); \
 	else if ((u_long)(tv) > (u_long)(tvmax)) \
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.107
diff -u -r1.107 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c	16 Aug 2004 18:32:07 -0000	1.107
+++ sys/netinet/tcp_usrreq.c	29 Sep 2004 05:37:25 -0000
@@ -997,9 +997,12 @@
 	struct socket *so;
 	struct sockopt *sopt;
 {
+	struct	timeval opttv;
 	int	error, opt, optval;
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
+	void	*optout;
+	socklen_t optlen;
 
 	error = 0;
 	INP_INFO_RLOCK(&tcbinfo);
@@ -1090,6 +1093,32 @@
 				error = EINVAL;
 			break;
 
+		case TCP_DELACKTIME:
+			error = sooptcopyin(sopt, &opttv, sizeof opttv,
+					    sizeof opttv);
+			if (error)
+				break;
+
+			if (opttv.tv_sec == 0 && opttv.tv_usec == 0)
+				tp->t_delacktime = 0;
+			else
+				tp->t_delacktime = tvtohz(&opttv);
+			error = 0;
+			break;
+
+		case TCP_REXMITJITTER:
+			error = sooptcopyin(sopt, &opttv, sizeof opttv,
+					    sizeof opttv);
+			if (error)
+				break;
+
+			if (opttv.tv_sec == 0 && opttv.tv_usec == 0)
+				tp->t_rxtjitter = 0;
+			else
+				tp->t_rxtjitter = tvtohz(&opttv);
+			error = 0;
+			break;
+
 		default:
 			error = ENOPROTOOPT;
 			break;
@@ -1097,6 +1126,8 @@
 		break;
 
 	case SOPT_GET:
+		optout = &optval;
+		optlen = sizeof(optval);
 		switch (sopt->sopt_name) {
 #ifdef TCP_SIGNATURE
 		case TCP_MD5SIG:
@@ -1115,12 +1146,36 @@
 		case TCP_NOPUSH:
 			optval = tp->t_flags & TF_NOPUSH;
 			break;
+		case TCP_DELACKTIME:
+			optout = &opttv;
+			optlen = sizeof(opttv);
+			if (tp->t_delacktime == 0) {
+				opttv.tv_sec = 0;
+				opttv.tv_usec = 0;
+			} else {
+				opttv.tv_sec = tp->t_delacktime / hz;
+				opttv.tv_usec = (tp->t_delacktime % hz) *
+				    (1000000 / hz);
+			}
+			break;
+		case TCP_REXMITJITTER:
+			optout = &opttv;
+			optlen = sizeof(opttv);
+			if (tp->t_rxtjitter == 0) {
+				opttv.tv_sec = 0;
+				opttv.tv_usec = 0;
+			} else {
+				opttv.tv_sec = tp->t_rxtjitter / hz;
+				opttv.tv_usec = (tp->t_rxtjitter % hz) *
+				    (1000000 / hz);
+			}
+			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0)
-			error = sooptcopyout(sopt, &optval, sizeof optval);
+			error = sooptcopyout(sopt, optout, optlen);
 		break;
 	}
 	INP_UNLOCK(inp);
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.109
diff -u -r1.109 tcp_var.h
--- sys/netinet/tcp_var.h	16 Aug 2004 18:32:07 -0000	1.109
+++ sys/netinet/tcp_var.h	29 Sep 2004 05:36:47 -0000
@@ -200,6 +200,8 @@
 	tcp_seq	rcv_lastsack;		/* last seq number(+1) sack'd by rcv'r*/
 	int	rcv_numsacks;		/* # distinct sack blks present */
 	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+	int	t_rxtjitter;		/* retransmission slop ticks to use. */
+	int	t_delacktime;		/* delayed ack ticks to use. */
 };
 
 #define IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
@@ -368,6 +370,20 @@
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
 /*
+ * Per-socket retransmit slop setting (0 for off, else value in ticks).
+ */
+#define	TCP_REXMITJITTERTICKS(tp) \
+	(tp)->t_rxtjitter
+
+/*
+ * Per-socket delayed ack timer setting (0 for off, else value in ticks).
+ * If off, and using T/TCP, the value will fall be the system value as
+ * delayed ack will be a necessity.
+ */
+#define	TCP_DELACKTICKS(tp) \
+	((tp)->t_delacktime == 0 ? tcp_delacktime : (tp)->t_delacktime)
+
+/*
  * TCP statistics.
  * Many of these should be kept per connection,
  * but that's inconvenient at the moment.
Index: sbin/ipfw/Makefile
===================================================================
RCS file: /usr/ncvs/src/sbin/ipfw/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- sbin/ipfw/Makefile	11 Jul 2002 17:33:37 -0000	1.12
+++ sbin/ipfw/Makefile	29 Sep 2004 03:09:15 -0000
@@ -4,5 +4,6 @@
 SRCS=	ipfw2.c
 WARNS?=	0
 MAN=	ipfw.8
+CFLAGS+= -I${.CURDIR}/../../sys/contrib/pf
 
 .include <bsd.prog.mk>
Index: sbin/ipfw/ipfw2.c
===================================================================
RCS file: /usr/ncvs/src/sbin/ipfw/ipfw2.c,v
retrieving revision 1.59
diff -u -r1.59 ipfw2.c
--- sbin/ipfw/ipfw2.c	21 Sep 2004 22:12:43 -0000	1.59
+++ sbin/ipfw/ipfw2.c	1 Oct 2004 01:27:08 -0000
@@ -27,6 +27,7 @@
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/wait.h>
+#include <sys/queue.h>
 
 #include <ctype.h>
 #include <err.h>
@@ -43,8 +44,11 @@
 #include <timeconv.h>	/* XXX do we need this ? */
 #include <unistd.h>
 #include <sysexits.h>
+#include <unistd.h>
+#include <fcntl.h>
 
 #include <net/if.h>
+#include <net/pfvar.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -202,6 +206,9 @@
 	TOK_UNREACH,
 	TOK_CHECKSTATE,
 
+	TOK_ALTQ,
+	TOK_LOG,
+
 	TOK_UID,
 	TOK_GID,
 	TOK_JAIL,
@@ -210,6 +217,9 @@
 	TOK_KEEPSTATE,
 	TOK_LAYER2,
 	TOK_OUT,
+	TOK_DIVERTED,
+	TOK_DIVERTEDLOOPBACK,
+	TOK_DIVERTEDOUTPUT,
 	TOK_XMIT,
 	TOK_RECV,
 	TOK_VIA,
@@ -223,6 +233,7 @@
 	TOK_IPVER,
 	TOK_ESTAB,
 	TOK_SETUP,
+	TOK_TCPDATALEN,
 	TOK_TCPFLAGS,
 	TOK_TCPOPTS,
 	TOK_TCPSEQ,
@@ -302,6 +313,12 @@
 	{ NULL, 0 }	/* terminator */
 };
 
+struct _s_x rule_action_params[] = {
+	{ "altq",		TOK_ALTQ },
+	{ "log",		TOK_LOG },
+	{ NULL, 0 }	/* terminator */
+};
+
 struct _s_x rule_options[] = {
 	{ "uid",		TOK_UID },
 	{ "gid",		TOK_GID },
@@ -312,6 +329,9 @@
 	{ "bridged",		TOK_LAYER2 },
 	{ "layer2",		TOK_LAYER2 },
 	{ "out",		TOK_OUT },
+	{ "diverted",		TOK_DIVERTED },
+	{ "diverted-loopback",	TOK_DIVERTEDLOOPBACK },
+	{ "diverted-output",	TOK_DIVERTEDOUTPUT },
 	{ "xmit",		TOK_XMIT },
 	{ "recv",		TOK_RECV },
 	{ "via",		TOK_VIA },
@@ -329,6 +349,7 @@
 	{ "estab",		TOK_ESTAB },
 	{ "established",	TOK_ESTAB },
 	{ "setup",		TOK_SETUP },
+	{ "tcpdatalen",		TOK_TCPDATALEN },
 	{ "tcpflags",		TOK_TCPFLAGS },
 	{ "tcpflgs",		TOK_TCPFLAGS },
 	{ "tcpoptions",		TOK_TCPOPTS },
@@ -462,6 +483,7 @@
 	{"iplen",	O_IPLEN},
 	{"ipttl",	O_IPTTL},
 	{"mac-type",	O_MAC_TYPE},
+	{"tcpdatalen",	O_TCPDATALEN},
 	{NULL,		0}
 };
 
@@ -563,6 +585,107 @@
 }
 
 /*
+ * Map between current altq queue id numbers and names.
+ */
+static int altq_fetched = 0;
+static TAILQ_HEAD(, pf_altq) altq_entries = 
+	TAILQ_HEAD_INITIALIZER(altq_entries);
+
+static void
+altq_set_enabled(int enabled)
+{
+	int pffd;
+
+	pffd = open("/dev/pf", O_RDWR);
+	if (pffd == -1)
+		err(EX_UNAVAILABLE,
+		    "altq support opening pf(4) control device");
+	if (enabled) {
+		if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST)
+			err(EX_UNAVAILABLE, "enabling altq");
+	} else {
+		if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT)
+			err(EX_UNAVAILABLE, "disabling altq");
+	}
+	close(pffd);
+}
+
+static void
+altq_fetch()
+{
+	struct pfioc_altq pfioc;
+	struct pf_altq *altq;
+	int pffd, mnr;
+
+	if (altq_fetched)
+		return;
+	altq_fetched = 1;
+	pffd = open("/dev/pf", O_RDONLY);
+	if (pffd == -1) {
+		warn("altq support opening pf(4) control device");
+		return;
+	}
+	bzero(&pfioc, sizeof(pfioc));
+	if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) {
+		warn("altq support getting queue list");
+		close(pffd);
+		return;
+	}
+	mnr = pfioc.nr;
+	for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) {
+		if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) {
+			if (errno == EBUSY)
+				break;
+			warn("altq support getting queue list");
+			close(pffd);
+			return;
+		}
+		if (pfioc.altq.qid == 0)
+			continue;
+		altq = malloc(sizeof(*altq));
+		if (altq == NULL)
+			err(EX_OSERR, "malloc");
+		*altq = pfioc.altq;
+		TAILQ_INSERT_TAIL(&altq_entries, altq, entries);
+	}
+	close(pffd);
+}
+
+static u_int32_t
+altq_name_to_qid(const char *name)
+{
+	struct pf_altq *altq;
+
+	altq_fetch();
+	TAILQ_FOREACH(altq, &altq_entries, entries)
+		if (strcmp(name, altq->qname) == 0)
+			break;
+	if (altq == NULL)
+		errx(EX_DATAERR, "altq has no queue named `%s'", name);
+	return altq->qid;
+}
+
+static const char *
+altq_qid_to_name(u_int32_t qid)
+{
+	struct pf_altq *altq;
+
+	altq_fetch();
+	TAILQ_FOREACH(altq, &altq_entries, entries)
+		if (qid == altq->qid)
+			break;
+	if (altq == NULL)
+		return NULL;
+	return altq->qname;
+}
+
+static void
+fill_altq_qid(u_int32_t *qid, const char *av)
+{
+	*qid = altq_name_to_qid(av);
+}
+
+/*
  * Fill the body of the command with the list of port ranges.
  */
 static int
@@ -908,6 +1031,7 @@
 	int proto = 0;		/* default */
 	int flags = 0;	/* prerequisites */
 	ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */
+	ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */
 	int or_block = 0;	/* we are in an or block */
 	uint32_t set_disable;
 
@@ -1033,6 +1157,10 @@
 			logptr = (ipfw_insn_log *)cmd;
 			break;
 
+		case O_ALTQ: /* O_ALTQ is printed after O_LOG */
+			altqptr = (ipfw_insn_altq *)cmd;
+			break;
+
 		default:
 			printf("** unrecognized action %d len %d",
 				cmd->opcode, cmd->len);
@@ -1044,6 +1172,15 @@
 		else
 			printf(" log");
 	}
+	if (altqptr) {
+		const char *qname;
+
+		qname = altq_qid_to_name(altqptr->qid);
+		if (qname == NULL)
+			printf(" altq ?<%u>", altqptr->qid);
+		else
+			printf(" altq %s", qname);
+	}
 
 	/*
 	 * then print the body.
@@ -1174,6 +1311,23 @@
 				printf(cmd->len & F_NOT ? " out" : " in");
 				break;
 
+			case O_DIVERTED:
+				switch (cmd->arg1) {
+				case 3:
+					printf(" diverted");
+					break;
+				case 1:
+					printf(" diverted-loopback");
+					break;
+				case 2:
+					printf(" diverted-output");
+					break;
+				default:
+					printf(" diverted-?<%u>", cmd->arg1);
+					break;
+				}
+				break;
+
 			case O_LAYER2:
 				printf(" layer2");
 				break;
@@ -1244,6 +1398,14 @@
 				printf(" established");
 				break;
 
+			case O_TCPDATALEN:
+				if (F_LEN(cmd) == 1)
+				    printf(" tcpdatalen %u", cmd->arg1 );
+				else
+				    print_newports((ipfw_insn_u16 *)cmd, 0,
+					O_TCPDATALEN);
+				break;
+
 			case O_TCPFLAGS:
 				print_flags("tcpflags", cmd, f_tcpflags);
 				break;
@@ -1709,6 +1871,8 @@
 	} else if (strncmp(*av, "dyn_keepalive", strlen(*av)) == 0) {
 		sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0,
 		    &which, sizeof(which));
+	} else if (strncmp(*av, "altq", strlen(*av)) == 0) {
+		altq_set_enabled(which);
 	} else {
 		warnx("unrecognize enable/disable keyword: %s\n", *av);
 	}
@@ -1903,21 +2067,23 @@
 "set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n"
 "table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n"
 "\n"
-"RULE-BODY:	check-state [LOG] | ACTION [LOG] ADDR [OPTION_LIST]\n"
+"RULE-BODY:	check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n"
 "ACTION:	check-state | allow | count | deny | reject | skipto N |\n"
 "		{divert|tee} PORT | forward ADDR | pipe N | queue N\n"
+"PARAMS: 	[log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n"
 "ADDR:		[ MAC dst src ether_type ] \n"
 "		[ from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n"
 "IPADDR:	[not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n"
 "IPLIST:	{ ip | ip/bits | ip:mask }[,IPLIST]\n"
 "OPTION_LIST:	OPTION [OPTION_LIST]\n"
-"OPTION:	bridged | {dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n"
+"OPTION:	bridged | diverted | diverted-loopback | diverted-output |\n"
+"	{dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n"
 "	estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n"
 "	iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n"
 "	ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n"
 "	mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n"
 "	setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n"
-"	verrevpath | versrcreach | antispoof\n"
+"	tcpdatalen LIST | verrevpath | versrcreach | antispoof\n"
 );
 exit(0);
 }
@@ -2756,11 +2922,11 @@
  * Rules are added into the 'rulebuf' and then copied in the correct order
  * into the actual rule.
  *
- * The syntax for a rule starts with the action, followed by an
- * optional log action, and the various match patterns.
+ * The syntax for a rule starts with the action, followed by
+ * optional action parameters, and the various match patterns.
  * In the assembled microcode, the first opcode must be an O_PROBE_STATE
  * (generated if the rule includes a keep-state option), then the
- * various match patterns, the "log" action, and the actual action.
+ * various match patterns, log/altq actions, and the actual action.
  *
  */
 static void
@@ -2783,6 +2949,7 @@
 	 * various flags used to record that we entered some fields.
 	 */
 	ipfw_insn *have_state = NULL;	/* check-state or keep-state */
+	ipfw_insn *have_log = NULL, *have_altq = NULL;
 	size_t len;
 
 	int i;
@@ -2945,32 +3112,63 @@
 	action = next_cmd(action);
 
 	/*
+	 * [altq queuename] -- altq tag, optional
 	 * [log [logamount N]]	-- log, optional
 	 *
-	 * If exists, it goes first in the cmdbuf, but then it is
+	 * If they exist, it go first in the cmdbuf, but then it is
 	 * skipped in the copy section to the end of the buffer.
 	 */
-	if (ac && !strncmp(*av, "log", strlen(*av))) {
-		ipfw_insn_log *c = (ipfw_insn_log *)cmd;
-		int l;
+	while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) {
+		ac--; av++;
+		switch (i) {
+		case TOK_LOG:
+		    {
+			ipfw_insn_log *c = (ipfw_insn_log *)cmd;
+			int l;
 
-		cmd->len = F_INSN_SIZE(ipfw_insn_log);
-		cmd->opcode = O_LOG;
-		av++; ac--;
-		if (ac && !strncmp(*av, "logamount", strlen(*av))) {
-			ac--; av++;
-			NEED1("logamount requires argument");
-			l = atoi(*av);
-			if (l < 0)
-				errx(EX_DATAERR, "logamount must be positive");
-			c->max_log = l;
+			if (have_log)
+				errx(EX_DATAERR,
+				    "log cannot be specified more than once");
+			have_log = (ipfw_insn *)c;
+			cmd->len = F_INSN_SIZE(ipfw_insn_log);
+			cmd->opcode = O_LOG;
+			if (ac && !strncmp(*av, "logamount", strlen(*av))) {
+				ac--; av++;
+				NEED1("logamount requires argument");
+				l = atoi(*av);
+				if (l < 0)
+					errx(EX_DATAERR,
+					    "logamount must be positive");
+				c->max_log = l;
+				ac--; av++;
+			} else {
+				len = sizeof(c->max_log);
+				if (sysctlbyname("net.inet.ip.fw.verbose_limit",
+				    &c->max_log, &len, NULL, 0) == -1)
+					errx(1, "sysctlbyname(\"%s\")",
+					    "net.inet.ip.fw.verbose_limit");
+			}
+		    }
+			break;
+
+		case TOK_ALTQ:
+		    {
+			ipfw_insn_altq *a = (ipfw_insn_altq *)cmd;
+
+			NEED1("missing altq queue name");
+			if (have_altq)
+				errx(EX_DATAERR,
+				    "altq cannot be specified more than once");
+			have_altq = (ipfw_insn *)a;
+			cmd->len = F_INSN_SIZE(ipfw_insn_altq);
+			cmd->opcode = O_ALTQ;
+			fill_altq_qid(&a->qid, *av);
 			ac--; av++;
-		} else {
-			len = sizeof(c->max_log);
-			if (sysctlbyname("net.inet.ip.fw.verbose_limit",
-			    &c->max_log, &len, NULL, 0) == -1)
-				errx(1, "sysctlbyname(\"%s\")",
-				    "net.inet.ip.fw.verbose_limit");
+		    }
+			break;
+
+		default:
+			abort();
 		}
 		cmd = next_cmd(cmd);
 	}
@@ -3197,6 +3395,18 @@
 			fill_cmd(cmd, O_IN, 0, 0);
 			break;
 
+		case TOK_DIVERTED:
+			fill_cmd(cmd, O_DIVERTED, 0, 3);
+			break;
+
+		case TOK_DIVERTEDLOOPBACK:
+			fill_cmd(cmd, O_DIVERTED, 0, 1);
+			break;
+
+		case TOK_DIVERTEDOUTPUT:
+			fill_cmd(cmd, O_DIVERTED, 0, 2);
+			break;
+
 		case TOK_FRAG:
 			fill_cmd(cmd, O_FRAG, 0, 0);
 			break;
@@ -3344,6 +3554,17 @@
 				(TH_SYN) | ( (TH_ACK) & 0xff) <<8 );
 			break;
 
+		case TOK_TCPDATALEN:
+			NEED1("tcpdatalen requires length");
+			if (strpbrk(*av, "-,")) {
+			    if (!add_ports(cmd, *av, 0, O_TCPDATALEN))
+				errx(EX_DATAERR, "invalid tcpdata len %s", *av);
+			} else
+			    fill_cmd(cmd, O_TCPDATALEN, 0,
+				    strtoul(*av, NULL, 0));
+			ac--; av++;
+			break;
+
 		case TOK_TCPOPTS:
 			NEED1("missing argument for tcpoptions");
 			fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av);
@@ -3533,7 +3754,7 @@
 		dst = next_cmd(dst);
 	}
 	/*
-	 * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT
+	 * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ
 	 */
 	for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) {
 		i = F_LEN(src);
@@ -3542,6 +3763,7 @@
 		case O_LOG:
 		case O_KEEP_STATE:
 		case O_LIMIT:
+		case O_ALTQ:
 			break;
 		default:
 			bcopy(src, dst, i * sizeof(uint32_t));
@@ -3563,12 +3785,16 @@
 	rule->act_ofs = dst - rule->cmd;
 
 	/*
-	 * put back O_LOG if necessary
+	 * put back O_LOG, O_ALTQ if necessary
 	 */
-	src = (ipfw_insn *)cmdbuf;
-	if (src->opcode == O_LOG) {
-		i = F_LEN(src);
-		bcopy(src, dst, i * sizeof(uint32_t));
+	if (have_log) {
+		i = F_LEN(have_log);
+		bcopy(have_log, dst, i * sizeof(uint32_t));
+		dst += i;
+	}
+	if (have_altq) {
+		i = F_LEN(have_altq);
+		bcopy(have_altq, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	/*

--C7zPtVaVf+AK4Oqc--


Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20041001031248.GC3411>