Date: Fri, 25 Apr 2008 23:08:09 GMT From: John Birrell <jb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 140645 for review Message-ID: <200804252308.m3PN89R4007146@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=140645 Change 140645 by jb@freebsd3 on 2008/04/25 23:07:20 IF6 Affected files ... .. //depot/projects/dtrace6/src/sbin/ipfw/ipfw.8#4 integrate .. //depot/projects/dtrace6/src/sbin/ipfw/ipfw2.c#4 integrate .. //depot/projects/dtrace6/src/sys/net/if_bridge.c#4 integrate .. //depot/projects/dtrace6/src/sys/net/if_ethersubr.c#2 integrate .. //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.c#2 integrate .. //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.h#2 integrate .. //depot/projects/dtrace6/src/sys/netinet/ip_fw_pfil.c#2 integrate Differences ... ==== //depot/projects/dtrace6/src/sbin/ipfw/ipfw.8#4 (text+ko) ==== @@ -1,5 +1,5 @@ .\" -.\" $FreeBSD: src/sbin/ipfw/ipfw.8,v 1.175.2.14 2008/04/04 18:10:50 julian Exp $ +.\" $FreeBSD: src/sbin/ipfw/ipfw.8,v 1.175.2.15 2008/04/25 10:29:26 oleg Exp $ .\" .Dd May 4, 2007 .Dt IPFW 8 @@ -1750,6 +1750,16 @@ TCP connection, or from/to a given host, or entire subnet, or a protocol type, etc. .Pp +There are two modes of dummynet operation: normal and fast. +Normal mode tries to emulate real link: dummynet scheduler ensures packet will +not leave pipe faster than it would be on real link with given bandwidth. +Fast mode allows certain packets to bypass dummynet scheduler (if packet flow +does not exceed pipe's bandwidth). Thus fast mode requires less cpu cycles +per packet (in average) but packet latency can be significantly lower comparing +to real link with same bandwidth. Default is normal mode, fast mode can be +enabled by setting net.inet.ip.dummynet.io_fast sysctl(8) variable to non-zero +value. +.Pp Packets belonging to the same flow are then passed to either of two different objects, which implement the traffic regulation: .Bl -hang -offset XXXX @@ -2062,6 +2072,14 @@ This value is used when no .Cm buckets option is specified when configuring a pipe/queue. +.It Em net.inet.ip.dummynet.io_fast : No 0 +If set to non-zero value enables "fast" mode of dummynet operation (see above). +.It Em net.inet.ip.dummynet.io_pkt +Number of packets passed to by dummynet. +.It Em net.inet.ip.dummynet.io_pkt_drop +Number of packets dropped by dummynet. +.It Em net.inet.ip.dummynet.io_pkt_fast +Number of packets bypassed dummynet scheduler. .It Em net.inet.ip.dummynet.max_chain_len : No 16 Target value for the maximum number of pipes/queues in a hash bucket. The product ==== //depot/projects/dtrace6/src/sbin/ipfw/ipfw2.c#4 (text+ko) ==== @@ -17,7 +17,7 @@ * * NEW command line interface for IP firewall facility * - * $FreeBSD: src/sbin/ipfw/ipfw2.c,v 1.76.2.21 2008/04/04 18:10:50 julian Exp $ + * $FreeBSD: src/sbin/ipfw/ipfw2.c,v 1.76.2.22 2008/04/25 10:35:53 oleg Exp $ */ #include <sys/param.h> @@ -3541,7 +3541,7 @@ if (p.bandwidth==0) /* this is a WF2Q+ queue */ s = 0; else - s = ck.hz * avg_pkt_size * 8 / p.bandwidth; + s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; /* * max idle time (in ticks) before avg queue size becomes 0. @@ -3554,8 +3554,8 @@ if (!p.fs.lookup_step) p.fs.lookup_step = 1; weight = 1 - w_q; - for (t = p.fs.lookup_step; t > 0; --t) - weight *= weight; + for (t = p.fs.lookup_step; t > 1; --t) + weight *= 1 - w_q; p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); } i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); ==== //depot/projects/dtrace6/src/sys/net/if_bridge.c#4 (text+ko) ==== @@ -80,7 +80,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.11.2.54 2007/12/21 05:30:15 thompsa Exp $"); +__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.11.2.55 2008/04/25 10:29:26 oleg Exp $"); #include "opt_inet.h" #include "opt_inet6.h" @@ -2949,7 +2949,7 @@ * packet will return to us via bridge_dummynet(). */ args.oif = ifp; - ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args); + ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args); return (error); } ==== //depot/projects/dtrace6/src/sys/net/if_ethersubr.c#2 (text+ko) ==== @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 - * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.193.2.15 2007/09/17 17:50:49 julian Exp $ + * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.193.2.16 2008/04/25 10:29:26 oleg Exp $ */ #include "opt_atalk.h" @@ -497,7 +497,7 @@ */ *m0 = NULL ; } - ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); + ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); return 0; } /* ==== //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.c#2 (text+ko) ==== @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.6 2007/03/21 17:25:15 oleg Exp $ + * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.7 2008/04/25 10:29:26 oleg Exp $ */ #define DUMMYNET_DEBUG @@ -55,6 +55,7 @@ * include files marked with XXX are probably not needed */ +#include <sys/limits.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> @@ -110,6 +111,11 @@ /* Adjusted vs non-adjusted curr_time difference (ticks). */ static long tick_diff; +static int io_fast; +static unsigned long io_pkt; +static unsigned long io_pkt_fast; +static unsigned long io_pkt_drop; + /* * Three heaps contain queues and pipes that the scheduler handles: * @@ -181,6 +187,17 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, CTLFLAG_RD, &tick_lost, 0, "Number of ticks coalesced by dummynet taskqueue."); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, + CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, + CTLFLAG_RD, &io_pkt, 0, + "Number of packets passed to dummynet."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, + CTLFLAG_RD, &io_pkt_fast, 0, + "Number of packets bypassed dummynet scheduler."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, + CTLFLAG_RD, &io_pkt_drop, 0, + "Number of packets dropped by dummynet."); #endif #ifdef DUMMYNET_DEBUG @@ -209,15 +226,15 @@ NET_ASSERT_GIANT(); \ } while (0) -static int config_pipe(struct dn_pipe *p); -static int ip_dn_ctl(struct sockopt *sopt); +static int config_pipe(struct dn_pipe *p); +static int ip_dn_ctl(struct sockopt *sopt); -static void dummynet(void *); -static void dummynet_flush(void); -static void dummynet_send(struct mbuf *); -void dummynet_drain(void); +static void dummynet(void *); +static void dummynet_flush(void); +static void dummynet_send(struct mbuf *); +void dummynet_drain(void); static ip_dn_io_t dummynet_io; -static void dn_rule_delete(void *); +static void dn_rule_delete(void *); /* * Heap management functions. @@ -486,7 +503,7 @@ if ((m = pipe->head) != NULL) { pkt = dn_tag_get(m); /* - * XXX: Should check errors on heap_insert, by draining the + * XXX Should check errors on heap_insert, by draining the * whole pipe p and hoping in the future we are more successful. */ heap_insert(&extract_heap, pkt->output_time, pipe); @@ -499,8 +516,8 @@ * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ #define SET_TICKS(_m, q, p) \ - ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ - p->bandwidth ; + ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \ + p->bandwidth; /* * extract pkt from queue, compute output time (could be now) @@ -536,59 +553,61 @@ static void ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) { - struct mbuf *pkt; - struct dn_pipe *p = q->fs->pipe ; - int p_was_empty ; + struct mbuf *pkt; + struct dn_pipe *p = q->fs->pipe; + int p_was_empty; + + DUMMYNET_LOCK_ASSERT(); + + if (p == NULL) { + printf("dummynet: ready_event- pipe is gone\n"); + return; + } + p_was_empty = (p->head == NULL); + + /* + * Schedule fixed-rate queues linked to this pipe: + * account for the bw accumulated since last scheduling, then + * drain as many pkts as allowed by q->numbytes and move to + * the delay line (in p) computing output time. + * bandwidth==0 (no limit) means we can drain the whole queue, + * setting len_scaled = 0 does the job. + */ + q->numbytes += (curr_time - q->sched_time) * p->bandwidth; + while ((pkt = q->head) != NULL) { + int len = pkt->m_pkthdr.len; + int len_scaled = p->bandwidth ? len * 8 * hz : 0; - DUMMYNET_LOCK_ASSERT(); + if (len_scaled > q->numbytes) + break; + q->numbytes -= len_scaled; + move_pkt(pkt, q, p, len); + } + /* + * If we have more packets queued, schedule next ready event + * (can only occur when bandwidth != 0, otherwise we would have + * flushed the whole queue in the previous loop). + * To this purpose we record the current time and compute how many + * ticks to go for the finish time of the packet. + */ + if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */ + dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ - if (p == NULL) { - printf("dummynet: ready_event- pipe is gone\n"); - return ; - } - p_was_empty = (p->head == NULL) ; + q->sched_time = curr_time; + heap_insert(&ready_heap, curr_time + t, (void *)q); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ + } else /* RED needs to know when the queue becomes empty. */ + q->q_time = curr_time; - /* - * schedule fixed-rate queues linked to this pipe: - * Account for the bw accumulated since last scheduling, then - * drain as many pkts as allowed by q->numbytes and move to - * the delay line (in p) computing output time. - * bandwidth==0 (no limit) means we can drain the whole queue, - * setting len_scaled = 0 does the job. - */ - q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; - while ( (pkt = q->head) != NULL ) { - int len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; - if (len_scaled > q->numbytes ) - break ; - q->numbytes -= len_scaled ; - move_pkt(pkt, q, p, len); - } - /* - * If we have more packets queued, schedule next ready event - * (can only occur when bandwidth != 0, otherwise we would have - * flushed the whole queue in the previous loop). - * To this purpose we record the current time and compute how many - * ticks to go for the finish time of the packet. - */ - if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ - dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ - q->sched_time = curr_time ; - heap_insert(&ready_heap, curr_time + t, (void *)q ); - /* XXX should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. */ - } else { /* RED needs to know when the queue becomes empty */ - q->q_time = curr_time; - q->numbytes = 0; - } - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); + if (p_was_empty) + transmit_event(p, head, tail); } /* @@ -596,123 +615,147 @@ * the queues at their start time, and enqueue into the delay line. * Packets are drained until p->numbytes < 0. As long as * len_scaled >= p->numbytes, the packet goes into the delay line - * with a deadline p->delay. For the last packet, if p->numbytes<0, + * with a deadline p->delay. For the last packet, if p->numbytes < 0, * there is an additional delay. */ static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) { - int p_was_empty = (p->head == NULL) ; - struct dn_heap *sch = &(p->scheduler_heap); - struct dn_heap *neh = &(p->not_eligible_heap) ; + int p_was_empty = (p->head == NULL); + struct dn_heap *sch = &(p->scheduler_heap); + struct dn_heap *neh = &(p->not_eligible_heap); + int64_t p_numbytes = p->numbytes; - DUMMYNET_LOCK_ASSERT(); + DUMMYNET_LOCK_ASSERT(); - if (p->if_name[0] == 0) /* tx clock is simulated */ - p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; - else { /* tx clock is for real, the ifq must be empty or this is a NOP */ - if (p->ifp && p->ifp->if_snd.ifq_head != NULL) - return ; - else { - DPRINTF(("dummynet: pipe %d ready from %s --\n", - p->pipe_nr, p->if_name)); + if (p->if_name[0] == 0) /* tx clock is simulated */ + /* + * Since result may not fit into p->numbytes (32bit) we + * are using 64bit var here. + */ + p_numbytes += (curr_time - p->sched_time) * p->bandwidth; + else { /* + * tx clock is for real, + * the ifq must be empty or this is a NOP. + */ + if (p->ifp && p->ifp->if_snd.ifq_head != NULL) + return; + else { + DPRINTF(("dummynet: pipe %d ready from %s --\n", + p->pipe_nr, p->if_name)); + } } - } + + /* + * While we have backlogged traffic AND credit, we need to do + * something on the queue. + */ + while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { + if (sch->elements > 0) { + /* Have some eligible pkts to send out. */ + struct dn_flow_queue *q = sch->p[0].object; + struct mbuf *pkt = q->head; + struct dn_flow_set *fs = q->fs; + uint64_t len = pkt->m_pkthdr.len; + int len_scaled = p->bandwidth ? len * 8 * hz : 0; - /* - * While we have backlogged traffic AND credit, we need to do - * something on the queue. - */ - while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { - if (sch->elements > 0) { /* have some eligible pkts to send out */ - struct dn_flow_queue *q = sch->p[0].object ; - struct mbuf *pkt = q->head; - struct dn_flow_set *fs = q->fs; - u_int64_t len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; + heap_extract(sch, NULL); /* Remove queue from heap. */ + p_numbytes -= len_scaled; + move_pkt(pkt, q, p, len); - heap_extract(sch, NULL); /* remove queue from heap */ - p->numbytes -= len_scaled ; - move_pkt(pkt, q, p, len); + p->V += (len << MY_M) / p->sum; /* Update V. */ + q->S = q->F; /* Update start time. */ + if (q->len == 0) { + /* Flow not backlogged any more. */ + fs->backlogged--; + heap_insert(&(p->idle_heap), q->F, q); + } else { + /* Still backlogged. */ - p->V += (len<<MY_M) / p->sum ; /* update V */ - q->S = q->F ; /* update start time */ - if (q->len == 0) { /* Flow not backlogged any more */ - fs->backlogged-- ; - heap_insert(&(p->idle_heap), q->F, q); - } else { /* still backlogged */ + /* + * Update F and position in backlogged queue, + * then put flow in not_eligible_heap + * (we will fix this later). + */ + len = (q->head)->m_pkthdr.len; + q->F += (len << MY_M) / (uint64_t)fs->weight; + if (DN_KEY_LEQ(q->S, p->V)) + heap_insert(neh, q->S, q); + else + heap_insert(sch, q->F, q); + } + } /* - * update F and position in backlogged queue, then - * put flow in not_eligible_heap (we will fix this later). + * Now compute V = max(V, min(S_i)). Remember that all elements + * in sch have by definition S_i <= V so if sch is not empty, + * V is surely the max and we must not update it. Conversely, + * if sch is empty we only need to look at neh. */ - len = (q->head)->m_pkthdr.len; - q->F += (len<<MY_M)/(u_int64_t) fs->weight ; - if (DN_KEY_LEQ(q->S, p->V)) - heap_insert(neh, q->S, q); - else - heap_insert(sch, q->F, q); - } + if (sch->elements == 0 && neh->elements > 0) + p->V = MAX64(p->V, neh->p[0].key); + /* Move from neh to sch any packets that have become eligible */ + while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { + struct dn_flow_queue *q = neh->p[0].object; + heap_extract(neh, NULL); + heap_insert(sch, q->F, q); + } + + if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ + p_numbytes = -1; /* Mark not ready for I/O. */ + break; + } } - /* - * now compute V = max(V, min(S_i)). Remember that all elements in sch - * have by definition S_i <= V so if sch is not empty, V is surely - * the max and we must not update it. Conversely, if sch is empty - * we only need to look at neh. - */ - if (sch->elements == 0 && neh->elements > 0) - p->V = MAX64 ( p->V, neh->p[0].key ); - /* move from neh to sch any packets that have become eligible */ - while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { - struct dn_flow_queue *q = neh->p[0].object ; - heap_extract(neh, NULL); - heap_insert(sch, q->F, q); - } + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && + p->idle_heap.elements > 0) { + /* + * No traffic and no events scheduled. + * We can get rid of idle-heap. + */ + int i; + + for (i = 0; i < p->idle_heap.elements; i++) { + struct dn_flow_queue *q = p->idle_heap.p[i].object; - if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ - p->numbytes = -1 ; /* mark not ready for I/O */ - break ; + q->F = 0; + q->S = q->F + 1; + } + p->sum = 0; + p->V = 0; + p->idle_heap.elements = 0; } - } - if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 - && p->idle_heap.elements > 0) { /* - * no traffic and no events scheduled. We can get rid of idle-heap. + * If we are getting clocks from dummynet (not a real interface) and + * If we are under credit, schedule the next ready event. + * Also fix the delivery time of the last packet. */ - int i ; + if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */ + dn_key t = 0; /* Number of ticks i have to wait. */ - for (i = 0 ; i < p->idle_heap.elements ; i++) { - struct dn_flow_queue *q = p->idle_heap.p[i].object ; + if (p->bandwidth > 0) + t = (p->bandwidth - 1 - p_numbytes) / p->bandwidth; + dn_tag_get(p->tail)->output_time += t; + p->sched_time = curr_time; + heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ + } - q->F = 0 ; - q->S = q->F + 1 ; - } - p->sum = 0 ; - p->V = 0 ; - p->idle_heap.elements = 0 ; - } - /* - * If we are getting clocks from dummynet (not a real interface) and - * If we are under credit, schedule the next ready event. - * Also fix the delivery time of the last packet. - */ - if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ - dn_key t=0 ; /* number of ticks i have to wait */ + /* Fit (adjust if necessary) 64bit result into 32bit variable. */ + if (p_numbytes > INT_MAX) + p->numbytes = INT_MAX; + else if (p_numbytes < INT_MIN) + p->numbytes = INT_MIN; + else + p->numbytes = p_numbytes; - if (p->bandwidth > 0) - t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; - dn_tag_get(p->tail)->output_time += t ; - p->sched_time = curr_time ; - heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); - /* XXX should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. */ - } - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); + if (p_was_empty) + transmit_event(p, head, tail); } /* @@ -956,29 +999,28 @@ static struct dn_flow_queue * create_queue(struct dn_flow_set *fs, int i) { - struct dn_flow_queue *q ; + struct dn_flow_queue *q; - if (fs->rq_elements > fs->rq_size * dn_max_ratio && + if (fs->rq_elements > fs->rq_size * dn_max_ratio && expire_queues(fs) == 0) { - /* - * No way to get room, use or create overflow queue. - */ - i = fs->rq_size ; - if ( fs->rq[i] != NULL ) - return fs->rq[i] ; - } - q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); - if (q == NULL) { - printf("dummynet: sorry, cannot allocate queue for new flow\n"); - return NULL ; - } - q->fs = fs ; - q->hash_slot = i ; - q->next = fs->rq[i] ; - q->S = q->F + 1; /* hack - mark timestamp as invalid */ - fs->rq[i] = q ; - fs->rq_elements++ ; - return q ; + /* No way to get room, use or create overflow queue. */ + i = fs->rq_size; + if (fs->rq[i] != NULL) + return fs->rq[i]; + } + q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); + if (q == NULL) { + printf("dummynet: sorry, cannot allocate queue for new flow\n"); + return (NULL); + } + q->fs = fs; + q->hash_slot = i; + q->next = fs->rq[i]; + q->S = q->F + 1; /* hack - mark timestamp as invalid. */ + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + fs->rq[i] = q; + fs->rq_elements++; + return (q); } /* @@ -1233,185 +1275,201 @@ * NULL in ip_input, destination interface in ip_output, * real_dst in bdg_forward * rule matching rule, in case of multiple passes - * */ static int -dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) { - struct mbuf *head = NULL, *tail = NULL; - struct dn_pkt_tag *pkt; - struct m_tag *mtag; - struct dn_flow_set *fs = NULL; - struct dn_pipe *pipe ; - u_int64_t len = m->m_pkthdr.len ; - struct dn_flow_queue *q = NULL ; - int is_pipe; - ipfw_insn *cmd = ACTION_PTR(fwa->rule); + struct mbuf *m = *m0, *head = NULL, *tail = NULL; + struct dn_pkt_tag *pkt; + struct m_tag *mtag; + struct dn_flow_set *fs = NULL; + struct dn_pipe *pipe; + uint64_t len = m->m_pkthdr.len; + struct dn_flow_queue *q = NULL; + int is_pipe; + ipfw_insn *cmd = ACTION_PTR(fwa->rule); - KASSERT(m->m_nextpkt == NULL, - ("dummynet_io: mbuf queue passed to dummynet")); + KASSERT(m->m_nextpkt == NULL, + ("dummynet_io: mbuf queue passed to dummynet")); - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_ALTQ) - cmd += F_LEN(cmd); - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - is_pipe = (cmd->opcode == O_PIPE); + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + if (cmd->opcode == O_ALTQ) + cmd += F_LEN(cmd); + if (cmd->opcode == O_TAG) + cmd += F_LEN(cmd); + is_pipe = (cmd->opcode == O_PIPE); - DUMMYNET_LOCK(); - /* - * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. - * - * XXXGL: probably the pipe->fs and fs->pipe logic here - * below can be simplified. - */ - if (is_pipe) { - pipe = locate_pipe(fwa->cookie); - if (pipe != NULL) - fs = &(pipe->fs); - } else - fs = locate_flowset(fwa->cookie); + DUMMYNET_LOCK(); + io_pkt++; + /* + * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. + * + * XXXGL: probably the pipe->fs and fs->pipe logic here + * below can be simplified. + */ + if (is_pipe) { + pipe = locate_pipe(fwa->cookie); + if (pipe != NULL) + fs = &(pipe->fs); + } else + fs = locate_flowset(fwa->cookie); - if (fs == NULL) - goto dropit; /* This queue/pipe does not exist! */ - pipe = fs->pipe; - if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ - pipe = locate_pipe(fs->parent_nr); - if (pipe != NULL) - fs->pipe = pipe; - else { - printf("dummynet: no pipe %d for queue %d, drop pkt\n", - fs->parent_nr, fs->fs_nr); - goto dropit ; + if (fs == NULL) + goto dropit; /* This queue/pipe does not exist! */ + pipe = fs->pipe; + if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ + pipe = locate_pipe(fs->parent_nr); + if (pipe != NULL) + fs->pipe = pipe; + else { + printf("dummynet: no pipe %d for queue %d, drop pkt\n", + fs->parent_nr, fs->fs_nr); + goto dropit; + } } - } - q = find_queue(fs, &(fwa->f_id)); - if ( q == NULL ) - goto dropit ; /* cannot allocate queue */ - /* - * update statistics, then check reasons to drop pkt - */ - q->tot_bytes += len ; - q->tot_pkts++ ; - if ( fs->plr && random() < fs->plr ) - goto dropit ; /* random pkt drop */ - if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { - if (q->len_bytes > fs->qsize) - goto dropit ; /* queue size overflow */ - } else { - if (q->len >= fs->qsize) - goto dropit ; /* queue count overflow */ - } - if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) - goto dropit ; + q = find_queue(fs, &(fwa->f_id)); + if (q == NULL) + goto dropit; /* Cannot allocate queue. */ - /* XXX expensive to zero, see if we can remove it*/ - mtag = m_tag_get(PACKET_TAG_DUMMYNET, - sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO); - if ( mtag == NULL ) - goto dropit ; /* cannot allocate packet header */ - m_tag_prepend(m, mtag); /* attach to mbuf chain */ + /* Update statistics, then check reasons to drop pkt. */ + q->tot_bytes += len; + q->tot_pkts++; + if (fs->plr && random() < fs->plr) + goto dropit; /* Random pkt drop. */ + if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (q->len_bytes > fs->qsize) + goto dropit; /* Queue size overflow. */ + } else { + if (q->len >= fs->qsize) + goto dropit; /* Queue count overflow. */ + } + if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len)) + goto dropit; - pkt = (struct dn_pkt_tag *)(mtag+1); - /* ok, i can handle the pkt now... */ - /* build and enqueue packet + parameters */ - pkt->rule = fwa->rule ; - pkt->dn_dir = dir ; + /* XXX expensive to zero, see if we can remove it. */ + mtag = m_tag_get(PACKET_TAG_DUMMYNET, + sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO); + if (mtag == NULL) + goto dropit; /* Cannot allocate packet header. */ + m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ - pkt->ifp = fwa->oif; + pkt = (struct dn_pkt_tag *)(mtag + 1); + /* + * Ok, i can handle the pkt now... + * Build and enqueue packet + parameters. + */ + pkt->rule = fwa->rule; + pkt->dn_dir = dir; - if (q->head == NULL) - q->head = m; - else - q->tail->m_nextpkt = m; - q->tail = m; - q->len++; - q->len_bytes += len ; + pkt->ifp = fwa->oif; - if ( q->head != m ) /* flow was not idle, we are done */ - goto done; - /* - * If we reach this point the flow was previously idle, so we need - * to schedule it. This involves different actions for fixed-rate or - * WF2Q queues. - */ - if (is_pipe) { - /* - * Fixed-rate queue: just insert into the ready_heap. - */ - dn_key t = 0 ; - if (pipe->bandwidth) - t = SET_TICKS(m, q, pipe); - q->sched_time = curr_time ; - if (t == 0) /* must process it now */ - ready_event(q, &head, &tail); + if (q->head == NULL) + q->head = m; else - heap_insert(&ready_heap, curr_time + t , q ); - } else { + q->tail->m_nextpkt = m; + q->tail = m; + q->len++; + q->len_bytes += len; + + if (q->head != m) /* Flow was not idle, we are done. */ + goto done; + + if (q->q_time < curr_time) + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + q->q_time = curr_time; + /* - * WF2Q. First, compute start time S: if the flow was idle (S=F+1) - * set S to the virtual time V for the controlling pipe, and update - * the sum of weights for the pipe; otherwise, remove flow from - * idle_heap and set S to max(F,V). - * Second, compute finish time F = S + len/weight. - * Third, if pipe was idle, update V=max(S, V). - * Fourth, count one more backlogged flow. + * If we reach this point the flow was previously idle, so we need + * to schedule it. This involves different actions for fixed-rate or + * WF2Q queues. */ - if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ - q->S = pipe->V ; - pipe->sum += fs->weight ; /* add weight of new queue */ + if (is_pipe) { + /* Fixed-rate queue: just insert into the ready_heap. */ + dn_key t = 0; + + if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes) + t = SET_TICKS(m, q, pipe); + q->sched_time = curr_time; + if (t == 0) /* Must process it now. */ + ready_event(q, &head, &tail); + else + heap_insert(&ready_heap, curr_time + t , q); } else { - heap_extract(&(pipe->idle_heap), q); - q->S = MAX64(q->F, pipe->V ) ; - } - q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; + /* + * WF2Q. First, compute start time S: if the flow was + * idle (S = F + 1) set S to the virtual time V for the + * controlling pipe, and update the sum of weights for the pipe; + * otherwise, remove flow from idle_heap and set S to max(F,V). + * Second, compute finish time F = S + len / weight. + * Third, if pipe was idle, update V = max(S, V). + * Fourth, count one more backlogged flow. + */ + if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */ + q->S = pipe->V; + pipe->sum += fs->weight; /* Add weight of new queue. */ + } else { + heap_extract(&(pipe->idle_heap), q); + q->S = MAX64(q->F, pipe->V); + } + q->F = q->S + (len << MY_M) / (uint64_t)fs->weight; - if (pipe->not_eligible_heap.elements == 0 && - pipe->scheduler_heap.elements == 0) - pipe->V = MAX64 ( q->S, pipe->V ); - fs->backlogged++ ; - /* - * Look at eligibility. A flow is not eligibile if S>V (when - * this happens, it means that there is some other flow already - * scheduled for the same pipe, so the scheduler_heap cannot be - * empty). If the flow is not eligible we just store it in the - * not_eligible_heap. Otherwise, we store in the scheduler_heap - * and possibly invoke ready_event_wfq() right now if there is - * leftover credit. - * Note that for all flows in scheduler_heap (SCH), S_i <= V, - * and for all flows in not_eligible_heap (NEH), S_i > V . - * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, - * we only need to look into NEH. - */ - if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ - if (pipe->scheduler_heap.elements == 0) - printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); - heap_insert(&(pipe->not_eligible_heap), q->S, q); - } else { - heap_insert(&(pipe->scheduler_heap), q->F, q); - if (pipe->numbytes >= 0) { /* pipe is idle */ - if (pipe->scheduler_heap.elements != 1) - printf("dummynet: OUCH! pipe should have been idle!\n"); - DPRINTF(("dummynet: waking up pipe %d at %d\n", - pipe->pipe_nr, (int)(q->F >> MY_M))); - pipe->sched_time = curr_time ; - ready_event_wfq(pipe, &head, &tail); - } + if (pipe->not_eligible_heap.elements == 0 && + pipe->scheduler_heap.elements == 0) + pipe->V = MAX64(q->S, pipe->V); + fs->backlogged++; + /* + * Look at eligibility. A flow is not eligibile if S>V (when + * this happens, it means that there is some other flow already + * scheduled for the same pipe, so the scheduler_heap cannot be + * empty). If the flow is not eligible we just store it in the + * not_eligible_heap. Otherwise, we store in the scheduler_heap + * and possibly invoke ready_event_wfq() right now if there is + * leftover credit. + * Note that for all flows in scheduler_heap (SCH), S_i <= V, + * and for all flows in not_eligible_heap (NEH), S_i > V. + * So when we need to compute max(V, min(S_i)) forall i in + * SCH+NEH, we only need to look into NEH. + */ + if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ + if (pipe->scheduler_heap.elements == 0) + printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); + heap_insert(&(pipe->not_eligible_heap), q->S, q); + } else { + heap_insert(&(pipe->scheduler_heap), q->F, q); + if (pipe->numbytes >= 0) { /* Pipe is idle. */ + if (pipe->scheduler_heap.elements != 1) + printf("dummynet: OUCH! pipe should have been idle!\n"); + DPRINTF(("dummynet: waking up pipe %d at %d\n", + pipe->pipe_nr, (int)(q->F >> MY_M))); + pipe->sched_time = curr_time; + ready_event_wfq(pipe, &head, &tail); + } + } } - } done: - DUMMYNET_UNLOCK(); - if (head != NULL) - dummynet_send(head); - return 0; + if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && + dir != DN_TO_ETH_OUT) { /* Fast io. */ + io_pkt_fast++; + if (m->m_nextpkt != NULL) + printf("dummynet: fast io: pkt chain detected!\n"); + head = m->m_nextpkt = NULL; + } else + *m0 = NULL; /* Normal io. */ + + DUMMYNET_UNLOCK(); + if (head != NULL) + dummynet_send(head); + return (0); dropit: - if (q) - q->drops++ ; - DUMMYNET_UNLOCK(); - m_freem(m); - return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); + io_pkt_drop++; + if (q) + q->drops++; + DUMMYNET_UNLOCK(); + m_freem(m); + *m0 = NULL; + return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } /* @@ -1729,7 +1787,7 @@ /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) for (q = pipe->fs.rq[i]; q; q = q->next) - q->numbytes = 0; + q->numbytes = io_fast ? p->bandwidth : 0; pipe->bandwidth = p->bandwidth; pipe->numbytes = 0; /* just in case... */ ==== //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.h#2 (text+ko) ==== @@ -24,7 +24,7 @@ >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200804252308.m3PN89R4007146>