From owner-freebsd-ipfw@FreeBSD.ORG Fri Oct 1 03:13:10 2004 Return-Path: Delivered-To: freebsd-ipfw@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 25E1616A4CF for ; Fri, 1 Oct 2004 03:13:10 +0000 (GMT) Received: from green.homeunix.org (pcp04368961pcs.nrockv01.md.comcast.net [69.140.212.7]) by mx1.FreeBSD.org (Postfix) with ESMTP id A36CE43D48 for ; Fri, 1 Oct 2004 03:12:53 +0000 (GMT) (envelope-from green@green.homeunix.org) Received: from green.homeunix.org (green@localhost [127.0.0.1]) by green.homeunix.org (8.13.1/8.13.1) with ESMTP id i913CoaQ007615 for ; Thu, 30 Sep 2004 23:12:50 -0400 (EDT) (envelope-from green@green.homeunix.org) Received: (from green@localhost) by green.homeunix.org (8.13.1/8.13.1/Submit) id i913Cmrj007614 for ipfw@FreeBSD.org; Thu, 30 Sep 2004 23:12:48 -0400 (EDT) (envelope-from green) Date: Thu, 30 Sep 2004 23:12:48 -0400 From: Brian Fundakowski Feldman To: ipfw@FreeBSD.org Message-ID: <20041001031248.GC3411@green.homeunix.org> References: <20040929195920.GC1807@green.homeunix.org> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="C7zPtVaVf+AK4Oqc" Content-Disposition: inline In-Reply-To: <20040929195920.GC1807@green.homeunix.org> User-Agent: Mutt/1.5.6i Subject: Re: ALTQ with IPFW X-BeenThere: freebsd-ipfw@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: IPFW Technical Discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 01 Oct 2004 03:13:10 -0000 --C7zPtVaVf+AK4Oqc Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Okay, here's a newer revision that turns ALTQ into an action modifier similar to O_LOG. To use the previous behavior, you would specifiy "ipfw add count altq ..." -- Brian Fundakowski Feldman \'[ FreeBSD ]''''''''''\ <> green@FreeBSD.org \ The Power to Serve! \ Opinions expressed are my own. \,,,,,,,,,,,,,,,,,,,,,,\ --C7zPtVaVf+AK4Oqc Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="ipfw-altq-tcpexts-etc.patch" Index: sys/netinet/ip_divert.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.99 diff -u -r1.99 ip_divert.c --- sys/netinet/ip_divert.c 5 Sep 2004 02:34:12 -0000 1.99 +++ sys/netinet/ip_divert.c 29 Sep 2004 14:44:19 -0000 @@ -66,6 +66,7 @@ #include #include #include +#include /* * Divert sockets @@ -268,6 +269,8 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { + struct m_tag *mtag; + struct divert_tag *dt; int error = 0; KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null")); @@ -275,23 +278,22 @@ if (control) m_freem(control); /* XXX */ + mtag = m_tag_get(PACKET_TAG_DIVERT, + sizeof(struct divert_tag), M_NOWAIT); + if (mtag == NULL) { + error = ENOBUFS; + goto cantsend; + } + dt = (struct divert_tag *)(mtag+1); + dt->info = 0; + dt->cookie = 0; + m_tag_prepend(m, mtag); + /* Loopback avoidance and state recovery */ if (sin) { - struct m_tag *mtag; - struct divert_tag *dt; int i; - mtag = m_tag_get(PACKET_TAG_DIVERT, - sizeof(struct divert_tag), M_NOWAIT); - if (mtag == NULL) { - error = ENOBUFS; - goto cantsend; - } - dt = (struct divert_tag *)(mtag+1); - dt->info = 0; dt->cookie = sin->sin_port; - m_tag_prepend(m, mtag); - /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. @@ -309,6 +311,7 @@ struct ip *const ip = mtod(m, struct ip *); struct inpcb *inp; + dt->info |= IP_FW_DIVERT_OUTPUT_FLAG; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); INP_LOCK(inp); @@ -341,6 +344,7 @@ INP_UNLOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); } else { + dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG; if (m->m_pkthdr.rcvif == NULL) { /* * No luck with the name, check by IP address. Index: sys/netinet/ip_fw.h =================================================================== RCS file: /usr/ncvs/src/sys/netinet/ip_fw.h,v retrieving revision 1.91 diff -u -r1.91 ip_fw.h --- sys/netinet/ip_fw.h 29 Sep 2004 04:54:33 -0000 1.91 +++ sys/netinet/ip_fw.h 30 Sep 2004 05:41:01 -0000 @@ -134,6 +134,9 @@ O_IP_DST_LOOKUP, /* arg1=table number, u32=value */ O_ANTISPOOF, /* none */ O_JAIL, /* u32 = id */ + O_ALTQ, /* u32 = altq classif. qid */ + O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */ + O_TCPDATALEN, /* arg1 = len */ O_LAST_OPCODE /* not an opcode! */ }; @@ -251,6 +254,14 @@ } ipfw_insn_pipe; /* + * This is used for storing an altq queue id number. + */ +typedef struct _ipfw_insn_altq { + ipfw_insn o; + u_int32_t qid; +} ipfw_insn_altq; + +/* * This is used for limit rules. */ typedef struct _ipfw_insn_limit { @@ -293,6 +304,7 @@ * first instruction (at r->cmd) MUST BE an O_PROBE_STATE * + if a rule has a "log" option, then the first action * (at ACTION_PTR(r)) MUST be O_LOG + * + if a rule has an "altq" option, it comes after "log" * * NOTE: we use a simple linked list of rules because we never need * to delete a rule without scanning the list. We do not use @@ -405,9 +417,11 @@ */ #ifdef _KERNEL -#define IP_FW_PORT_DYNT_FLAG 0x10000 -#define IP_FW_PORT_TEE_FLAG 0x20000 -#define IP_FW_PORT_DENY_FLAG 0x40000 +#define IP_FW_PORT_DYNT_FLAG 0x00010000 +#define IP_FW_PORT_TEE_FLAG 0x00020000 +#define IP_FW_PORT_DENY_FLAG 0x00040000 +#define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000 +#define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000 /* * Arguments for calling ipfw_chk() and dummynet_io(). We put them Index: sys/netinet/ip_fw2.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/ip_fw2.c,v retrieving revision 1.77 diff -u -r1.77 ip_fw2.c --- sys/netinet/ip_fw2.c 29 Sep 2004 04:54:33 -0000 1.77 +++ sys/netinet/ip_fw2.c 30 Sep 2004 08:15:18 -0000 @@ -77,6 +77,7 @@ #include #include #include +#include #ifdef IPSEC #include @@ -553,6 +554,13 @@ if (l->log_left == 0) limit_reached = l->max_log; cmd += F_LEN(cmd); /* point to first action */ + if (cmd->opcode == O_ALTQ) { + ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + + snprintf(SNPARGS(action2, 0), "Altq %d", + altq->qid); + cmd += F_LEN(cmd); + } if (cmd->opcode == O_PROB) cmd += F_LEN(cmd); @@ -1324,6 +1332,8 @@ cmd = ACTION_PTR(me); if (cmd->opcode == O_LOG) cmd += F_LEN(cmd); + if (cmd->opcode == O_ALTQ) + cmd += F_LEN(cmd); if ( cmd->opcode == O_SKIPTO ) for (rule = me->next; rule ; rule = rule->next) if (rule->rulenum >= cmd->arg1) @@ -1708,6 +1718,14 @@ int ugid_lookup = 0; /* + * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG + * associated with a packet input on a divert socket. This + * will allow to distinguish traffic and its direction when + * it originates from a divert socket. + */ + u_int divinput_flags = 0; + + /* * oif | args->oif If NULL, ipfw_chk has been called on the * inbound path (ether_input, bdg_forward, ip_input). * If non-NULL, ipfw_chk has been called on the outbound path @@ -1883,8 +1901,11 @@ } } /* reset divert rule to avoid confusion later */ - if (mtag) + if (mtag) { + divinput_flags = divert_info(mtag) & + (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); m_tag_delete(m, mtag); + } /* * Now scan the rules, and parse microinstructions for each rule. @@ -2017,6 +2038,13 @@ match = (args->eh != NULL); break; + case O_DIVERTED: + match = (cmd->arg1 & 1 && divinput_flags & + IP_FW_DIVERT_LOOPBACK_FLAG) || + (cmd->arg1 & 2 && divinput_flags & + IP_FW_DIVERT_OUTPUT_FLAG); + break; + case O_PROTO: /* * We do not allow an arg of 0 so the @@ -2175,6 +2203,28 @@ flags_match(cmd, ip->ip_tos)); break; + case O_TCPDATALEN: + if (proto == IPPROTO_TCP && offset == 0) { + struct tcphdr *tcp; + uint16_t x; + uint16_t *p; + int i; + + tcp = L3HDR(struct tcphdr,ip); + x = ip_len - + ((ip->ip_hl + tcp->th_off) << 2); + if (cmdlen == 1) { + match = (cmd->arg1 == x); + break; + } + /* otherwise we have ranges */ + p = ((ipfw_insn_u16 *)cmd)->ports; + i = cmdlen - 1; + for (; !match && i>0; i--, p += 2) + match = (x >= p[0] && x <= p[1]); + } + break; + case O_TCPFLAGS: match = (proto == IPPROTO_TCP && offset == 0 && flags_match(cmd, @@ -2212,6 +2262,32 @@ (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); break; + case O_ALTQ: { + struct altq_tag *at; + ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + + match = 1; + mtag = m_tag_get(PACKET_TAG_PF_QID, + sizeof(struct altq_tag), + M_NOWAIT); + if (mtag == NULL) { + /* + * Let the packet fall back to the + * default ALTQ. + */ + break; + } + at = (struct altq_tag *)(mtag+1); + at->qid = altq->qid; + if (hlen != 0) + at->af = AF_INET; + else + at->af = AF_LINK; + at->hdr = ip; + m_tag_prepend(m, mtag); + break; + } + case O_LOG: if (fw_verbose) ipfw_log(f, hlen, args->eh, m, oif); @@ -2275,6 +2351,9 @@ * or to the SKIPTO target ('goto again' after * having set f, cmd and l), respectively. * + * O_LOG and O_ALTQ action parameters: + * perform some action and set match = 1; + * * O_LIMIT and O_KEEP_STATE: these opcodes are * not real 'actions', and are stored right * before the 'action' part of the rule. @@ -2846,6 +2925,11 @@ printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } + if (rule->act_ofs >= rule->cmd_len) { + printf("ipfw: bogus action offset (%u > %u)\n", + rule->act_ofs, rule->cmd_len - 1); + return (EINVAL); + } /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. @@ -2868,6 +2952,7 @@ case O_LAYER2: case O_IN: case O_FRAG: + case O_DIVERTED: case O_IPOPT: case O_IPTOS: case O_IPPRECEDENCE: @@ -2951,6 +3036,7 @@ case O_IPID: case O_IPTTL: case O_IPLEN: + case O_TCPDATALEN: if (cmdlen < 1 || cmdlen > 31) goto bad_size; break; @@ -2969,6 +3055,11 @@ goto bad_size; break; + case O_ALTQ: + if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) + goto bad_size; + break; + case O_PIPE: case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) Index: sys/netinet/tcp.h =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp.h,v retrieving revision 1.26 diff -u -r1.26 tcp.h --- sys/netinet/tcp.h 16 Aug 2004 18:32:07 -0000 1.26 +++ sys/netinet/tcp.h 29 Sep 2004 05:09:59 -0000 @@ -161,12 +161,14 @@ /* * User-settable options (used with setsockopt). */ -#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ #if __BSD_VISIBLE -#define TCP_MAXSEG 0x02 /* set maximum segment size */ -#define TCP_NOPUSH 0x04 /* don't push last block of write */ -#define TCP_NOOPT 0x08 /* don't use TCP options */ -#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ +#define TCP_MAXSEG 0x02 /* set maximum segment size */ +#define TCP_NOPUSH 0x04 /* don't push last block of write */ +#define TCP_NOOPT 0x08 /* don't use TCP options */ +#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ +#define TCP_DELACKTIME 0x20 /* delayed ack time (timeval, 0 disables) */ +#define TCP_REXMITJITTER 0x40 /* retransmit slop time (timeval, 0 disables) */ #endif #endif /* !_NETINET_TCP_H_ */ Index: sys/netinet/tcp_input.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.252 diff -u -r1.252 tcp_input.c --- sys/netinet/tcp_input.c 17 Aug 2004 22:05:54 -0000 1.252 +++ sys/netinet/tcp_input.c 29 Sep 2004 05:33:17 -0000 @@ -195,7 +195,7 @@ #define DELAY_ACK(tp) \ ((!callout_active(tp->tt_delack) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ - (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) + (tp->t_delacktime > 0 || (tp->t_flags & TF_NEEDSYN))) /* Initialize TCP reassembly queue */ uma_zone_t tcp_reass_zone; @@ -1416,8 +1416,8 @@ * ACKNOW will be turned on later. */ if (DELAY_ACK(tp) && tlen != 0) - callout_reset(tp->tt_delack, tcp_delacktime, - tcp_timer_delack, tp); + callout_reset(tp->tt_delack, + TCP_DELACKTICKS(tp), tcp_timer_delack, tp); else tp->t_flags |= TF_ACKNOW; /* @@ -2509,7 +2509,7 @@ INP_LOCK_ASSERT(inp); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; - callout_reset(tp->tt_delack, tcp_delacktime, + callout_reset(tp->tt_delack, TCP_DELACKTICKS(tp), tcp_timer_delack, tp); } INP_UNLOCK(inp); @@ -2841,7 +2841,7 @@ * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). */ - TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + TCPT_RANGESET(tp, tp->t_rxtcur, TCP_REXMTVAL(tp), max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); /* @@ -3082,7 +3082,7 @@ tp->t_rttvar = tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; } - TCPT_RANGESET(tp->t_rxtcur, + TCPT_RANGESET(tp, tp->t_rxtcur, ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, tp->t_rttmin, TCPTV_REXMTMAX); } Index: sys/netinet/tcp_output.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.101 diff -u -r1.101 tcp_output.c --- sys/netinet/tcp_output.c 5 Sep 2004 02:34:12 -0000 1.101 +++ sys/netinet/tcp_output.c 29 Sep 2004 04:41:12 -0000 @@ -1169,7 +1169,7 @@ /* * Start/restart persistance timer. */ - TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], + TCPT_RANGESET(tp, tt, t * tcp_backoff[tp->t_rxtshift], TCPTV_PERSMIN, TCPTV_PERSMAX); callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) Index: sys/netinet/tcp_subr.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.203 diff -u -r1.203 tcp_subr.c --- sys/netinet/tcp_subr.c 5 Sep 2004 02:34:12 -0000 1.203 +++ sys/netinet/tcp_subr.c 29 Sep 2004 05:37:00 -0000 @@ -620,6 +620,8 @@ tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (tcp_do_rfc1644) tp->t_flags |= TF_REQ_CC; + if (tcp_delack_enabled) + tp->t_delacktime = max(tcp_delacktime, 1); tp->sack_enable = tcp_do_sack; tp->t_inpcb = inp; /* XXX */ /* @@ -631,6 +633,7 @@ tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = tcp_rexmit_min; tp->t_rxtcur = TCPTV_RTOBASE; + tp->t_rxtjitter = max(tcp_rexmit_slop, 0); tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.c,v retrieving revision 1.66 diff -u -r1.66 tcp_timer.c --- sys/netinet/tcp_timer.c 16 Aug 2004 18:32:07 -0000 1.66 +++ sys/netinet/tcp_timer.c 29 Sep 2004 04:41:16 -0000 @@ -538,7 +538,7 @@ rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; else rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; - TCPT_RANGESET(tp->t_rxtcur, rexmt, + TCPT_RANGESET(tp, tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX); /* * Disable rfc1323 and rfc1644 if we havn't got any response to Index: sys/netinet/tcp_timer.h =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_timer.h,v retrieving revision 1.26 diff -u -r1.26 tcp_timer.h --- sys/netinet/tcp_timer.h 16 Aug 2004 18:32:07 -0000 1.26 +++ sys/netinet/tcp_timer.h 29 Sep 2004 05:31:10 -0000 @@ -126,8 +126,8 @@ /* * Force a time value to be in a certain range. */ -#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \ - (tv) = (value) + tcp_rexmit_slop; \ +#define TCPT_RANGESET(tp, tv, value, tvmin, tvmax) do { \ + (tv) = (value) + TCP_REXMITJITTERTICKS(tp); \ if ((u_long)(tv) < (u_long)(tvmin)) \ (tv) = (tvmin); \ else if ((u_long)(tv) > (u_long)(tvmax)) \ Index: sys/netinet/tcp_usrreq.c =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.107 diff -u -r1.107 tcp_usrreq.c --- sys/netinet/tcp_usrreq.c 16 Aug 2004 18:32:07 -0000 1.107 +++ sys/netinet/tcp_usrreq.c 29 Sep 2004 05:37:25 -0000 @@ -997,9 +997,12 @@ struct socket *so; struct sockopt *sopt; { + struct timeval opttv; int error, opt, optval; struct inpcb *inp; struct tcpcb *tp; + void *optout; + socklen_t optlen; error = 0; INP_INFO_RLOCK(&tcbinfo); @@ -1090,6 +1093,32 @@ error = EINVAL; break; + case TCP_DELACKTIME: + error = sooptcopyin(sopt, &opttv, sizeof opttv, + sizeof opttv); + if (error) + break; + + if (opttv.tv_sec == 0 && opttv.tv_usec == 0) + tp->t_delacktime = 0; + else + tp->t_delacktime = tvtohz(&opttv); + error = 0; + break; + + case TCP_REXMITJITTER: + error = sooptcopyin(sopt, &opttv, sizeof opttv, + sizeof opttv); + if (error) + break; + + if (opttv.tv_sec == 0 && opttv.tv_usec == 0) + tp->t_rxtjitter = 0; + else + tp->t_rxtjitter = tvtohz(&opttv); + error = 0; + break; + default: error = ENOPROTOOPT; break; @@ -1097,6 +1126,8 @@ break; case SOPT_GET: + optout = &optval; + optlen = sizeof(optval); switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: @@ -1115,12 +1146,36 @@ case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; break; + case TCP_DELACKTIME: + optout = &opttv; + optlen = sizeof(opttv); + if (tp->t_delacktime == 0) { + opttv.tv_sec = 0; + opttv.tv_usec = 0; + } else { + opttv.tv_sec = tp->t_delacktime / hz; + opttv.tv_usec = (tp->t_delacktime % hz) * + (1000000 / hz); + } + break; + case TCP_REXMITJITTER: + optout = &opttv; + optlen = sizeof(opttv); + if (tp->t_rxtjitter == 0) { + opttv.tv_sec = 0; + opttv.tv_usec = 0; + } else { + opttv.tv_sec = tp->t_rxtjitter / hz; + opttv.tv_usec = (tp->t_rxtjitter % hz) * + (1000000 / hz); + } + break; default: error = ENOPROTOOPT; break; } if (error == 0) - error = sooptcopyout(sopt, &optval, sizeof optval); + error = sooptcopyout(sopt, optout, optlen); break; } INP_UNLOCK(inp); Index: sys/netinet/tcp_var.h =================================================================== RCS file: /usr/ncvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.109 diff -u -r1.109 tcp_var.h --- sys/netinet/tcp_var.h 16 Aug 2004 18:32:07 -0000 1.109 +++ sys/netinet/tcp_var.h 29 Sep 2004 05:36:47 -0000 @@ -200,6 +200,8 @@ tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/ int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ + int t_rxtjitter; /* retransmission slop ticks to use. */ + int t_delacktime; /* delayed ack ticks to use. */ }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) @@ -368,6 +370,20 @@ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) /* + * Per-socket retransmit slop setting (0 for off, else value in ticks). + */ +#define TCP_REXMITJITTERTICKS(tp) \ + (tp)->t_rxtjitter + +/* + * Per-socket delayed ack timer setting (0 for off, else value in ticks). + * If off, and using T/TCP, the value will fall be the system value as + * delayed ack will be a necessity. + */ +#define TCP_DELACKTICKS(tp) \ + ((tp)->t_delacktime == 0 ? tcp_delacktime : (tp)->t_delacktime) + +/* * TCP statistics. * Many of these should be kept per connection, * but that's inconvenient at the moment. Index: sbin/ipfw/Makefile =================================================================== RCS file: /usr/ncvs/src/sbin/ipfw/Makefile,v retrieving revision 1.12 diff -u -r1.12 Makefile --- sbin/ipfw/Makefile 11 Jul 2002 17:33:37 -0000 1.12 +++ sbin/ipfw/Makefile 29 Sep 2004 03:09:15 -0000 @@ -4,5 +4,6 @@ SRCS= ipfw2.c WARNS?= 0 MAN= ipfw.8 +CFLAGS+= -I${.CURDIR}/../../sys/contrib/pf .include Index: sbin/ipfw/ipfw2.c =================================================================== RCS file: /usr/ncvs/src/sbin/ipfw/ipfw2.c,v retrieving revision 1.59 diff -u -r1.59 ipfw2.c --- sbin/ipfw/ipfw2.c 21 Sep 2004 22:12:43 -0000 1.59 +++ sbin/ipfw/ipfw2.c 1 Oct 2004 01:27:08 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -43,8 +44,11 @@ #include /* XXX do we need this ? */ #include #include +#include +#include #include +#include #include #include #include @@ -202,6 +206,9 @@ TOK_UNREACH, TOK_CHECKSTATE, + TOK_ALTQ, + TOK_LOG, + TOK_UID, TOK_GID, TOK_JAIL, @@ -210,6 +217,9 @@ TOK_KEEPSTATE, TOK_LAYER2, TOK_OUT, + TOK_DIVERTED, + TOK_DIVERTEDLOOPBACK, + TOK_DIVERTEDOUTPUT, TOK_XMIT, TOK_RECV, TOK_VIA, @@ -223,6 +233,7 @@ TOK_IPVER, TOK_ESTAB, TOK_SETUP, + TOK_TCPDATALEN, TOK_TCPFLAGS, TOK_TCPOPTS, TOK_TCPSEQ, @@ -302,6 +313,12 @@ { NULL, 0 } /* terminator */ }; +struct _s_x rule_action_params[] = { + { "altq", TOK_ALTQ }, + { "log", TOK_LOG }, + { NULL, 0 } /* terminator */ +}; + struct _s_x rule_options[] = { { "uid", TOK_UID }, { "gid", TOK_GID }, @@ -312,6 +329,9 @@ { "bridged", TOK_LAYER2 }, { "layer2", TOK_LAYER2 }, { "out", TOK_OUT }, + { "diverted", TOK_DIVERTED }, + { "diverted-loopback", TOK_DIVERTEDLOOPBACK }, + { "diverted-output", TOK_DIVERTEDOUTPUT }, { "xmit", TOK_XMIT }, { "recv", TOK_RECV }, { "via", TOK_VIA }, @@ -329,6 +349,7 @@ { "estab", TOK_ESTAB }, { "established", TOK_ESTAB }, { "setup", TOK_SETUP }, + { "tcpdatalen", TOK_TCPDATALEN }, { "tcpflags", TOK_TCPFLAGS }, { "tcpflgs", TOK_TCPFLAGS }, { "tcpoptions", TOK_TCPOPTS }, @@ -462,6 +483,7 @@ {"iplen", O_IPLEN}, {"ipttl", O_IPTTL}, {"mac-type", O_MAC_TYPE}, + {"tcpdatalen", O_TCPDATALEN}, {NULL, 0} }; @@ -563,6 +585,107 @@ } /* + * Map between current altq queue id numbers and names. + */ +static int altq_fetched = 0; +static TAILQ_HEAD(, pf_altq) altq_entries = + TAILQ_HEAD_INITIALIZER(altq_entries); + +static void +altq_set_enabled(int enabled) +{ + int pffd; + + pffd = open("/dev/pf", O_RDWR); + if (pffd == -1) + err(EX_UNAVAILABLE, + "altq support opening pf(4) control device"); + if (enabled) { + if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST) + err(EX_UNAVAILABLE, "enabling altq"); + } else { + if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT) + err(EX_UNAVAILABLE, "disabling altq"); + } + close(pffd); +} + +static void +altq_fetch() +{ + struct pfioc_altq pfioc; + struct pf_altq *altq; + int pffd, mnr; + + if (altq_fetched) + return; + altq_fetched = 1; + pffd = open("/dev/pf", O_RDONLY); + if (pffd == -1) { + warn("altq support opening pf(4) control device"); + return; + } + bzero(&pfioc, sizeof(pfioc)); + if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) { + warn("altq support getting queue list"); + close(pffd); + return; + } + mnr = pfioc.nr; + for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) { + if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) { + if (errno == EBUSY) + break; + warn("altq support getting queue list"); + close(pffd); + return; + } + if (pfioc.altq.qid == 0) + continue; + altq = malloc(sizeof(*altq)); + if (altq == NULL) + err(EX_OSERR, "malloc"); + *altq = pfioc.altq; + TAILQ_INSERT_TAIL(&altq_entries, altq, entries); + } + close(pffd); +} + +static u_int32_t +altq_name_to_qid(const char *name) +{ + struct pf_altq *altq; + + altq_fetch(); + TAILQ_FOREACH(altq, &altq_entries, entries) + if (strcmp(name, altq->qname) == 0) + break; + if (altq == NULL) + errx(EX_DATAERR, "altq has no queue named `%s'", name); + return altq->qid; +} + +static const char * +altq_qid_to_name(u_int32_t qid) +{ + struct pf_altq *altq; + + altq_fetch(); + TAILQ_FOREACH(altq, &altq_entries, entries) + if (qid == altq->qid) + break; + if (altq == NULL) + return NULL; + return altq->qname; +} + +static void +fill_altq_qid(u_int32_t *qid, const char *av) +{ + *qid = altq_name_to_qid(av); +} + +/* * Fill the body of the command with the list of port ranges. */ static int @@ -908,6 +1031,7 @@ int proto = 0; /* default */ int flags = 0; /* prerequisites */ ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */ + ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */ int or_block = 0; /* we are in an or block */ uint32_t set_disable; @@ -1033,6 +1157,10 @@ logptr = (ipfw_insn_log *)cmd; break; + case O_ALTQ: /* O_ALTQ is printed after O_LOG */ + altqptr = (ipfw_insn_altq *)cmd; + break; + default: printf("** unrecognized action %d len %d", cmd->opcode, cmd->len); @@ -1044,6 +1172,15 @@ else printf(" log"); } + if (altqptr) { + const char *qname; + + qname = altq_qid_to_name(altqptr->qid); + if (qname == NULL) + printf(" altq ?<%u>", altqptr->qid); + else + printf(" altq %s", qname); + } /* * then print the body. @@ -1174,6 +1311,23 @@ printf(cmd->len & F_NOT ? " out" : " in"); break; + case O_DIVERTED: + switch (cmd->arg1) { + case 3: + printf(" diverted"); + break; + case 1: + printf(" diverted-loopback"); + break; + case 2: + printf(" diverted-output"); + break; + default: + printf(" diverted-?<%u>", cmd->arg1); + break; + } + break; + case O_LAYER2: printf(" layer2"); break; @@ -1244,6 +1398,14 @@ printf(" established"); break; + case O_TCPDATALEN: + if (F_LEN(cmd) == 1) + printf(" tcpdatalen %u", cmd->arg1 ); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_TCPDATALEN); + break; + case O_TCPFLAGS: print_flags("tcpflags", cmd, f_tcpflags); break; @@ -1709,6 +1871,8 @@ } else if (strncmp(*av, "dyn_keepalive", strlen(*av)) == 0) { sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0, &which, sizeof(which)); + } else if (strncmp(*av, "altq", strlen(*av)) == 0) { + altq_set_enabled(which); } else { warnx("unrecognize enable/disable keyword: %s\n", *av); } @@ -1903,21 +2067,23 @@ "set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n" "table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n" "\n" -"RULE-BODY: check-state [LOG] | ACTION [LOG] ADDR [OPTION_LIST]\n" +"RULE-BODY: check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n" "ACTION: check-state | allow | count | deny | reject | skipto N |\n" " {divert|tee} PORT | forward ADDR | pipe N | queue N\n" +"PARAMS: [log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n" "ADDR: [ MAC dst src ether_type ] \n" " [ from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n" "IPADDR: [not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n" "IPLIST: { ip | ip/bits | ip:mask }[,IPLIST]\n" "OPTION_LIST: OPTION [OPTION_LIST]\n" -"OPTION: bridged | {dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n" +"OPTION: bridged | diverted | diverted-loopback | diverted-output |\n" +" {dst-ip|src-ip} ADDR | {dst-port|src-port} LIST |\n" " estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n" " iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n" " ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n" " mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n" " setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n" -" verrevpath | versrcreach | antispoof\n" +" tcpdatalen LIST | verrevpath | versrcreach | antispoof\n" ); exit(0); } @@ -2756,11 +2922,11 @@ * Rules are added into the 'rulebuf' and then copied in the correct order * into the actual rule. * - * The syntax for a rule starts with the action, followed by an - * optional log action, and the various match patterns. + * The syntax for a rule starts with the action, followed by + * optional action parameters, and the various match patterns. * In the assembled microcode, the first opcode must be an O_PROBE_STATE * (generated if the rule includes a keep-state option), then the - * various match patterns, the "log" action, and the actual action. + * various match patterns, log/altq actions, and the actual action. * */ static void @@ -2783,6 +2949,7 @@ * various flags used to record that we entered some fields. */ ipfw_insn *have_state = NULL; /* check-state or keep-state */ + ipfw_insn *have_log = NULL, *have_altq = NULL; size_t len; int i; @@ -2945,32 +3112,63 @@ action = next_cmd(action); /* + * [altq queuename] -- altq tag, optional * [log [logamount N]] -- log, optional * - * If exists, it goes first in the cmdbuf, but then it is + * If they exist, it go first in the cmdbuf, but then it is * skipped in the copy section to the end of the buffer. */ - if (ac && !strncmp(*av, "log", strlen(*av))) { - ipfw_insn_log *c = (ipfw_insn_log *)cmd; - int l; + while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) { + ac--; av++; + switch (i) { + case TOK_LOG: + { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + int l; - cmd->len = F_INSN_SIZE(ipfw_insn_log); - cmd->opcode = O_LOG; - av++; ac--; - if (ac && !strncmp(*av, "logamount", strlen(*av))) { - ac--; av++; - NEED1("logamount requires argument"); - l = atoi(*av); - if (l < 0) - errx(EX_DATAERR, "logamount must be positive"); - c->max_log = l; + if (have_log) + errx(EX_DATAERR, + "log cannot be specified more than once"); + have_log = (ipfw_insn *)c; + cmd->len = F_INSN_SIZE(ipfw_insn_log); + cmd->opcode = O_LOG; + if (ac && !strncmp(*av, "logamount", strlen(*av))) { + ac--; av++; + NEED1("logamount requires argument"); + l = atoi(*av); + if (l < 0) + errx(EX_DATAERR, + "logamount must be positive"); + c->max_log = l; + ac--; av++; + } else { + len = sizeof(c->max_log); + if (sysctlbyname("net.inet.ip.fw.verbose_limit", + &c->max_log, &len, NULL, 0) == -1) + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.fw.verbose_limit"); + } + } + break; + + case TOK_ALTQ: + { + ipfw_insn_altq *a = (ipfw_insn_altq *)cmd; + + NEED1("missing altq queue name"); + if (have_altq) + errx(EX_DATAERR, + "altq cannot be specified more than once"); + have_altq = (ipfw_insn *)a; + cmd->len = F_INSN_SIZE(ipfw_insn_altq); + cmd->opcode = O_ALTQ; + fill_altq_qid(&a->qid, *av); ac--; av++; - } else { - len = sizeof(c->max_log); - if (sysctlbyname("net.inet.ip.fw.verbose_limit", - &c->max_log, &len, NULL, 0) == -1) - errx(1, "sysctlbyname(\"%s\")", - "net.inet.ip.fw.verbose_limit"); + } + break; + + default: + abort(); } cmd = next_cmd(cmd); } @@ -3197,6 +3395,18 @@ fill_cmd(cmd, O_IN, 0, 0); break; + case TOK_DIVERTED: + fill_cmd(cmd, O_DIVERTED, 0, 3); + break; + + case TOK_DIVERTEDLOOPBACK: + fill_cmd(cmd, O_DIVERTED, 0, 1); + break; + + case TOK_DIVERTEDOUTPUT: + fill_cmd(cmd, O_DIVERTED, 0, 2); + break; + case TOK_FRAG: fill_cmd(cmd, O_FRAG, 0, 0); break; @@ -3344,6 +3554,17 @@ (TH_SYN) | ( (TH_ACK) & 0xff) <<8 ); break; + case TOK_TCPDATALEN: + NEED1("tcpdatalen requires length"); + if (strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_TCPDATALEN)) + errx(EX_DATAERR, "invalid tcpdata len %s", *av); + } else + fill_cmd(cmd, O_TCPDATALEN, 0, + strtoul(*av, NULL, 0)); + ac--; av++; + break; + case TOK_TCPOPTS: NEED1("missing argument for tcpoptions"); fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); @@ -3533,7 +3754,7 @@ dst = next_cmd(dst); } /* - * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT + * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ */ for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) { i = F_LEN(src); @@ -3542,6 +3763,7 @@ case O_LOG: case O_KEEP_STATE: case O_LIMIT: + case O_ALTQ: break; default: bcopy(src, dst, i * sizeof(uint32_t)); @@ -3563,12 +3785,16 @@ rule->act_ofs = dst - rule->cmd; /* - * put back O_LOG if necessary + * put back O_LOG, O_ALTQ if necessary */ - src = (ipfw_insn *)cmdbuf; - if (src->opcode == O_LOG) { - i = F_LEN(src); - bcopy(src, dst, i * sizeof(uint32_t)); + if (have_log) { + i = F_LEN(have_log); + bcopy(have_log, dst, i * sizeof(uint32_t)); + dst += i; + } + if (have_altq) { + i = F_LEN(have_altq); + bcopy(have_altq, dst, i * sizeof(uint32_t)); dst += i; } /* --C7zPtVaVf+AK4Oqc--