Date: Thu, 3 Apr 2008 14:30:03 GMT From: Oleg Bulyzhin <oleg@FreeBSD.org> To: freebsd-ipfw@FreeBSD.org Subject: Re: kern/121955: dummynet panics after 6.2 Message-ID: <200804031430.m33EU3DN005330@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/121955; it has been noted by GNATS. From: Oleg Bulyzhin <oleg@FreeBSD.org> To: bug-followup@FreeBSD.org Cc: Subject: Re: kern/121955: dummynet panics after 6.2 Date: Wed, 2 Apr 2008 20:47:47 +0400 --rwEMma7ioTxnRzrJ Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Please test attached patch and let me know if it changes anything for you. -- Oleg. ================================================================ === Oleg Bulyzhin -- OBUL-RIPN -- OBUL-RIPE -- oleg@rinet.ru === ================================================================ --rwEMma7ioTxnRzrJ Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="dummynet_iofast.diff" Index: sys/netinet/ip_dummynet.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.h,v retrieving revision 1.40 diff -u -r1.40 ip_dummynet.h --- sys/netinet/ip_dummynet.h 17 Jun 2007 00:33:34 -0000 1.40 +++ sys/netinet/ip_dummynet.h 27 Mar 2008 17:19:00 -0000 @@ -343,7 +343,7 @@ #ifdef _KERNEL typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ -typedef int ip_dn_io_t(struct mbuf *m, int dir, struct ip_fw_args *fwa); +typedef int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args *fwa); extern ip_dn_ctl_t *ip_dn_ctl_ptr; extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; Index: sys/netinet/ip_dummynet.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.c,v retrieving revision 1.110 diff -u -r1.110 ip_dummynet.c --- sys/netinet/ip_dummynet.c 7 Oct 2007 20:44:22 -0000 1.110 +++ sys/netinet/ip_dummynet.c 27 Mar 2008 17:19:03 -0000 @@ -56,6 +56,7 @@ * include files marked with XXX are probably not needed */ +#include <sys/limits.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> @@ -110,6 +111,11 @@ /* Adjusted vs non-adjusted curr_time difference (ticks). */ static long tick_diff; +static int io_fast; +static unsigned long io_pkt; +static unsigned long io_pkt_fast; +static unsigned long io_pkt_drop; + /* * Three heaps contain queues and pipes that the scheduler handles: * @@ -181,6 +187,17 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, CTLFLAG_RD, &tick_lost, 0, "Number of ticks coalesced by dummynet taskqueue."); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, + CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, + CTLFLAG_RD, &io_pkt, 0, + "Number of packets passed to dummynet."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, + CTLFLAG_RD, &io_pkt_fast, 0, + "Number of packets bypassed dummynet scheduler."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, + CTLFLAG_RD, &io_pkt_drop, 0, + "Number of packets dropped by dummynet."); #endif #ifdef DUMMYNET_DEBUG @@ -206,15 +223,15 @@ #define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) #define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) -static int config_pipe(struct dn_pipe *p); -static int ip_dn_ctl(struct sockopt *sopt); +static int config_pipe(struct dn_pipe *p); +static int ip_dn_ctl(struct sockopt *sopt); -static void dummynet(void *); -static void dummynet_flush(void); -static void dummynet_send(struct mbuf *); -void dummynet_drain(void); +static void dummynet(void *); +static void dummynet_flush(void); +static void dummynet_send(struct mbuf *); +void dummynet_drain(void); static ip_dn_io_t dummynet_io; -static void dn_rule_delete(void *); +static void dn_rule_delete(void *); /* * Heap management functions. @@ -483,7 +500,7 @@ if ((m = pipe->head) != NULL) { pkt = dn_tag_get(m); /* - * XXX: Should check errors on heap_insert, by draining the + * XXX Should check errors on heap_insert, by draining the * whole pipe p and hoping in the future we are more successful. */ heap_insert(&extract_heap, pkt->output_time, pipe); @@ -496,8 +513,8 @@ * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ #define SET_TICKS(_m, q, p) \ - ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ - p->bandwidth ; + ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \ + p->bandwidth; /* * extract pkt from queue, compute output time (could be now) @@ -533,59 +550,61 @@ static void ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) { - struct mbuf *pkt; - struct dn_pipe *p = q->fs->pipe ; - int p_was_empty ; + struct mbuf *pkt; + struct dn_pipe *p = q->fs->pipe; + int p_was_empty; - DUMMYNET_LOCK_ASSERT(); + DUMMYNET_LOCK_ASSERT(); - if (p == NULL) { - printf("dummynet: ready_event- pipe is gone\n"); - return ; - } - p_was_empty = (p->head == NULL) ; + if (p == NULL) { + printf("dummynet: ready_event- pipe is gone\n"); + return; + } + p_was_empty = (p->head == NULL); - /* - * schedule fixed-rate queues linked to this pipe: - * Account for the bw accumulated since last scheduling, then - * drain as many pkts as allowed by q->numbytes and move to - * the delay line (in p) computing output time. - * bandwidth==0 (no limit) means we can drain the whole queue, - * setting len_scaled = 0 does the job. - */ - q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; - while ( (pkt = q->head) != NULL ) { - int len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; - if (len_scaled > q->numbytes ) - break ; - q->numbytes -= len_scaled ; - move_pkt(pkt, q, p, len); - } - /* - * If we have more packets queued, schedule next ready event - * (can only occur when bandwidth != 0, otherwise we would have - * flushed the whole queue in the previous loop). - * To this purpose we record the current time and compute how many - * ticks to go for the finish time of the packet. - */ - if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ - dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ - q->sched_time = curr_time ; - heap_insert(&ready_heap, curr_time + t, (void *)q ); - /* XXX should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. + /* + * Schedule fixed-rate queues linked to this pipe: + * account for the bw accumulated since last scheduling, then + * drain as many pkts as allowed by q->numbytes and move to + * the delay line (in p) computing output time. + * bandwidth==0 (no limit) means we can drain the whole queue, + * setting len_scaled = 0 does the job. */ - } else { /* RED needs to know when the queue becomes empty */ - q->q_time = curr_time; - q->numbytes = 0; - } - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); + q->numbytes += (curr_time - q->sched_time) * p->bandwidth; + while ((pkt = q->head) != NULL) { + int len = pkt->m_pkthdr.len; + int len_scaled = p->bandwidth ? len * 8 * hz : 0; + + if (len_scaled > q->numbytes) + break; + q->numbytes -= len_scaled; + move_pkt(pkt, q, p, len); + } + /* + * If we have more packets queued, schedule next ready event + * (can only occur when bandwidth != 0, otherwise we would have + * flushed the whole queue in the previous loop). + * To this purpose we record the current time and compute how many + * ticks to go for the finish time of the packet. + */ + if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */ + dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ + + q->sched_time = curr_time; + heap_insert(&ready_heap, curr_time + t, (void *)q); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ + } else /* RED needs to know when the queue becomes empty. */ + q->q_time = curr_time; + + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. + */ + if (p_was_empty) + transmit_event(p, head, tail); } /* @@ -593,123 +612,147 @@ * the queues at their start time, and enqueue into the delay line. * Packets are drained until p->numbytes < 0. As long as * len_scaled >= p->numbytes, the packet goes into the delay line - * with a deadline p->delay. For the last packet, if p->numbytes<0, + * with a deadline p->delay. For the last packet, if p->numbytes < 0, * there is an additional delay. */ static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) { - int p_was_empty = (p->head == NULL) ; - struct dn_heap *sch = &(p->scheduler_heap); - struct dn_heap *neh = &(p->not_eligible_heap) ; + int p_was_empty = (p->head == NULL); + struct dn_heap *sch = &(p->scheduler_heap); + struct dn_heap *neh = &(p->not_eligible_heap); + int64_t p_numbytes = p->numbytes; - DUMMYNET_LOCK_ASSERT(); - - if (p->if_name[0] == 0) /* tx clock is simulated */ - p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; - else { /* tx clock is for real, the ifq must be empty or this is a NOP */ - if (p->ifp && p->ifp->if_snd.ifq_head != NULL) - return ; - else { - DPRINTF(("dummynet: pipe %d ready from %s --\n", - p->pipe_nr, p->if_name)); - } - } + DUMMYNET_LOCK_ASSERT(); - /* - * While we have backlogged traffic AND credit, we need to do - * something on the queue. - */ - while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { - if (sch->elements > 0) { /* have some eligible pkts to send out */ - struct dn_flow_queue *q = sch->p[0].object ; - struct mbuf *pkt = q->head; - struct dn_flow_set *fs = q->fs; - u_int64_t len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; - - heap_extract(sch, NULL); /* remove queue from heap */ - p->numbytes -= len_scaled ; - move_pkt(pkt, q, p, len); - - p->V += (len<<MY_M) / p->sum ; /* update V */ - q->S = q->F ; /* update start time */ - if (q->len == 0) { /* Flow not backlogged any more */ - fs->backlogged-- ; - heap_insert(&(p->idle_heap), q->F, q); - } else { /* still backlogged */ + if (p->if_name[0] == 0) /* tx clock is simulated */ /* - * update F and position in backlogged queue, then - * put flow in not_eligible_heap (we will fix this later). + * Since result may not fit into p->numbytes (32bit) we + * are using 64bit var here. */ - len = (q->head)->m_pkthdr.len; - q->F += (len<<MY_M)/(u_int64_t) fs->weight ; - if (DN_KEY_LEQ(q->S, p->V)) - heap_insert(neh, q->S, q); - else - heap_insert(sch, q->F, q); - } + p_numbytes += (curr_time - p->sched_time) * p->bandwidth; + else { /* + * tx clock is for real, + * the ifq must be empty or this is a NOP. + */ + if (p->ifp && p->ifp->if_snd.ifq_head != NULL) + return; + else { + DPRINTF(("dummynet: pipe %d ready from %s --\n", + p->pipe_nr, p->if_name)); + } } + /* - * now compute V = max(V, min(S_i)). Remember that all elements in sch - * have by definition S_i <= V so if sch is not empty, V is surely - * the max and we must not update it. Conversely, if sch is empty - * we only need to look at neh. + * While we have backlogged traffic AND credit, we need to do + * something on the queue. */ - if (sch->elements == 0 && neh->elements > 0) - p->V = MAX64 ( p->V, neh->p[0].key ); - /* move from neh to sch any packets that have become eligible */ - while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { - struct dn_flow_queue *q = neh->p[0].object ; - heap_extract(neh, NULL); - heap_insert(sch, q->F, q); + while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { + if (sch->elements > 0) { + /* Have some eligible pkts to send out. */ + struct dn_flow_queue *q = sch->p[0].object; + struct mbuf *pkt = q->head; + struct dn_flow_set *fs = q->fs; + uint64_t len = pkt->m_pkthdr.len; + int len_scaled = p->bandwidth ? len * 8 * hz : 0; + + heap_extract(sch, NULL); /* Remove queue from heap. */ + p_numbytes -= len_scaled; + move_pkt(pkt, q, p, len); + + p->V += (len << MY_M) / p->sum; /* Update V. */ + q->S = q->F; /* Update start time. */ + if (q->len == 0) { + /* Flow not backlogged any more. */ + fs->backlogged--; + heap_insert(&(p->idle_heap), q->F, q); + } else { + /* Still backlogged. */ + + /* + * Update F and position in backlogged queue, + * then put flow in not_eligible_heap + * (we will fix this later). + */ + len = (q->head)->m_pkthdr.len; + q->F += (len << MY_M) / (uint64_t)fs->weight; + if (DN_KEY_LEQ(q->S, p->V)) + heap_insert(neh, q->S, q); + else + heap_insert(sch, q->F, q); + } + } + /* + * Now compute V = max(V, min(S_i)). Remember that all elements + * in sch have by definition S_i <= V so if sch is not empty, + * V is surely the max and we must not update it. Conversely, + * if sch is empty we only need to look at neh. + */ + if (sch->elements == 0 && neh->elements > 0) + p->V = MAX64(p->V, neh->p[0].key); + /* Move from neh to sch any packets that have become eligible */ + while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { + struct dn_flow_queue *q = neh->p[0].object; + heap_extract(neh, NULL); + heap_insert(sch, q->F, q); + } + + if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ + p_numbytes = -1; /* Mark not ready for I/O. */ + break; + } } + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && + p->idle_heap.elements > 0) { + /* + * No traffic and no events scheduled. + * We can get rid of idle-heap. + */ + int i; - if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ - p->numbytes = -1 ; /* mark not ready for I/O */ - break ; + for (i = 0; i < p->idle_heap.elements; i++) { + struct dn_flow_queue *q = p->idle_heap.p[i].object; + + q->F = 0; + q->S = q->F + 1; + } + p->sum = 0; + p->V = 0; + p->idle_heap.elements = 0; } - } - if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 - && p->idle_heap.elements > 0) { /* - * no traffic and no events scheduled. We can get rid of idle-heap. + * If we are getting clocks from dummynet (not a real interface) and + * If we are under credit, schedule the next ready event. + * Also fix the delivery time of the last packet. */ - int i ; + if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */ + dn_key t = 0; /* Number of ticks i have to wait. */ - for (i = 0 ; i < p->idle_heap.elements ; i++) { - struct dn_flow_queue *q = p->idle_heap.p[i].object ; - - q->F = 0 ; - q->S = q->F + 1 ; + if (p->bandwidth > 0) + t = (p->bandwidth - 1 - p_numbytes) / p->bandwidth; + dn_tag_get(p->tail)->output_time += t; + p->sched_time = curr_time; + heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ } - p->sum = 0 ; - p->V = 0 ; - p->idle_heap.elements = 0 ; - } - /* - * If we are getting clocks from dummynet (not a real interface) and - * If we are under credit, schedule the next ready event. - * Also fix the delivery time of the last packet. - */ - if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ - dn_key t=0 ; /* number of ticks i have to wait */ - if (p->bandwidth > 0) - t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; - dn_tag_get(p->tail)->output_time += t ; - p->sched_time = curr_time ; - heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); - /* XXX should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. + /* Fit (adjust if necessary) 64bit result into 32bit variable. */ + if (p_numbytes > INT_MAX) + p->numbytes = INT_MAX; + else if (p_numbytes < INT_MIN) + p->numbytes = INT_MIN; + else + p->numbytes = p_numbytes; + + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. */ - } - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); + if (p_was_empty) + transmit_event(p, head, tail); } /* @@ -924,29 +967,28 @@ static struct dn_flow_queue * create_queue(struct dn_flow_set *fs, int i) { - struct dn_flow_queue *q ; + struct dn_flow_queue *q; - if (fs->rq_elements > fs->rq_size * dn_max_ratio && + if (fs->rq_elements > fs->rq_size * dn_max_ratio && expire_queues(fs) == 0) { - /* - * No way to get room, use or create overflow queue. - */ - i = fs->rq_size ; - if ( fs->rq[i] != NULL ) - return fs->rq[i] ; - } - q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); - if (q == NULL) { - printf("dummynet: sorry, cannot allocate queue for new flow\n"); - return NULL ; - } - q->fs = fs ; - q->hash_slot = i ; - q->next = fs->rq[i] ; - q->S = q->F + 1; /* hack - mark timestamp as invalid */ - fs->rq[i] = q ; - fs->rq_elements++ ; - return q ; + /* No way to get room, use or create overflow queue. */ + i = fs->rq_size; + if (fs->rq[i] != NULL) + return fs->rq[i]; + } + q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); + if (q == NULL) { + printf("dummynet: sorry, cannot allocate queue for new flow\n"); + return (NULL); + } + q->fs = fs; + q->hash_slot = i; + q->next = fs->rq[i]; + q->S = q->F + 1; /* hack - mark timestamp as invalid. */ + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + fs->rq[i] = q; + fs->rq_elements++; + return (q); } /* @@ -1200,185 +1242,201 @@ * ifp the 'ifp' parameter from the caller. * NULL in ip_input, destination interface in ip_output, * rule matching rule, in case of multiple passes - * */ static int -dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) { - struct mbuf *head = NULL, *tail = NULL; - struct dn_pkt_tag *pkt; - struct m_tag *mtag; - struct dn_flow_set *fs = NULL; - struct dn_pipe *pipe ; - u_int64_t len = m->m_pkthdr.len ; - struct dn_flow_queue *q = NULL ; - int is_pipe; - ipfw_insn *cmd = ACTION_PTR(fwa->rule); - - KASSERT(m->m_nextpkt == NULL, - ("dummynet_io: mbuf queue passed to dummynet")); - - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_ALTQ) - cmd += F_LEN(cmd); - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - is_pipe = (cmd->opcode == O_PIPE); + struct mbuf *m = *m0, *head = NULL, *tail = NULL; + struct dn_pkt_tag *pkt; + struct m_tag *mtag; + struct dn_flow_set *fs = NULL; + struct dn_pipe *pipe; + uint64_t len = m->m_pkthdr.len; + struct dn_flow_queue *q = NULL; + int is_pipe; + ipfw_insn *cmd = ACTION_PTR(fwa->rule); + + KASSERT(m->m_nextpkt == NULL, + ("dummynet_io: mbuf queue passed to dummynet")); + + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + if (cmd->opcode == O_ALTQ) + cmd += F_LEN(cmd); + if (cmd->opcode == O_TAG) + cmd += F_LEN(cmd); + is_pipe = (cmd->opcode == O_PIPE); - DUMMYNET_LOCK(); - /* - * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. - * - * XXXGL: probably the pipe->fs and fs->pipe logic here - * below can be simplified. - */ - if (is_pipe) { - pipe = locate_pipe(fwa->cookie); - if (pipe != NULL) - fs = &(pipe->fs); - } else - fs = locate_flowset(fwa->cookie); + DUMMYNET_LOCK(); + io_pkt++; + /* + * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. + * + * XXXGL: probably the pipe->fs and fs->pipe logic here + * below can be simplified. + */ + if (is_pipe) { + pipe = locate_pipe(fwa->cookie); + if (pipe != NULL) + fs = &(pipe->fs); + } else + fs = locate_flowset(fwa->cookie); - if (fs == NULL) - goto dropit; /* This queue/pipe does not exist! */ - pipe = fs->pipe; - if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ - pipe = locate_pipe(fs->parent_nr); - if (pipe != NULL) - fs->pipe = pipe; - else { - printf("dummynet: no pipe %d for queue %d, drop pkt\n", - fs->parent_nr, fs->fs_nr); - goto dropit ; + if (fs == NULL) + goto dropit; /* This queue/pipe does not exist! */ + pipe = fs->pipe; + if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ + pipe = locate_pipe(fs->parent_nr); + if (pipe != NULL) + fs->pipe = pipe; + else { + printf("dummynet: no pipe %d for queue %d, drop pkt\n", + fs->parent_nr, fs->fs_nr); + goto dropit; + } } - } - q = find_queue(fs, &(fwa->f_id)); - if ( q == NULL ) - goto dropit ; /* cannot allocate queue */ - /* - * update statistics, then check reasons to drop pkt - */ - q->tot_bytes += len ; - q->tot_pkts++ ; - if ( fs->plr && random() < fs->plr ) - goto dropit ; /* random pkt drop */ - if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { - if (q->len_bytes > fs->qsize) - goto dropit ; /* queue size overflow */ - } else { - if (q->len >= fs->qsize) - goto dropit ; /* queue count overflow */ - } - if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) - goto dropit ; - - /* XXX expensive to zero, see if we can remove it*/ - mtag = m_tag_get(PACKET_TAG_DUMMYNET, - sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO); - if ( mtag == NULL ) - goto dropit ; /* cannot allocate packet header */ - m_tag_prepend(m, mtag); /* attach to mbuf chain */ - - pkt = (struct dn_pkt_tag *)(mtag+1); - /* ok, i can handle the pkt now... */ - /* build and enqueue packet + parameters */ - pkt->rule = fwa->rule ; - pkt->dn_dir = dir ; - - pkt->ifp = fwa->oif; + q = find_queue(fs, &(fwa->f_id)); + if (q == NULL) + goto dropit; /* Cannot allocate queue. */ + + /* Update statistics, then check reasons to drop pkt. */ + q->tot_bytes += len; + q->tot_pkts++; + if (fs->plr && random() < fs->plr) + goto dropit; /* Random pkt drop. */ + if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (q->len_bytes > fs->qsize) + goto dropit; /* Queue size overflow. */ + } else { + if (q->len >= fs->qsize) + goto dropit; /* Queue count overflow. */ + } + if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len)) + goto dropit; - if (q->head == NULL) - q->head = m; - else - q->tail->m_nextpkt = m; - q->tail = m; - q->len++; - q->len_bytes += len ; + /* XXX expensive to zero, see if we can remove it. */ + mtag = m_tag_get(PACKET_TAG_DUMMYNET, + sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO); + if (mtag == NULL) + goto dropit; /* Cannot allocate packet header. */ + m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ - if ( q->head != m ) /* flow was not idle, we are done */ - goto done; - /* - * If we reach this point the flow was previously idle, so we need - * to schedule it. This involves different actions for fixed-rate or - * WF2Q queues. - */ - if (is_pipe) { + pkt = (struct dn_pkt_tag *)(mtag + 1); /* - * Fixed-rate queue: just insert into the ready_heap. + * Ok, i can handle the pkt now... + * Build and enqueue packet + parameters. */ - dn_key t = 0 ; - if (pipe->bandwidth) - t = SET_TICKS(m, q, pipe); - q->sched_time = curr_time ; - if (t == 0) /* must process it now */ - ready_event(q, &head, &tail); + pkt->rule = fwa->rule; + pkt->dn_dir = dir; + + pkt->ifp = fwa->oif; + + if (q->head == NULL) + q->head = m; else - heap_insert(&ready_heap, curr_time + t , q ); - } else { - /* - * WF2Q. First, compute start time S: if the flow was idle (S=F+1) - * set S to the virtual time V for the controlling pipe, and update - * the sum of weights for the pipe; otherwise, remove flow from - * idle_heap and set S to max(F,V). - * Second, compute finish time F = S + len/weight. - * Third, if pipe was idle, update V=max(S, V). - * Fourth, count one more backlogged flow. - */ - if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ - q->S = pipe->V ; - pipe->sum += fs->weight ; /* add weight of new queue */ - } else { - heap_extract(&(pipe->idle_heap), q); - q->S = MAX64(q->F, pipe->V ) ; - } - q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; + q->tail->m_nextpkt = m; + q->tail = m; + q->len++; + q->len_bytes += len; + + if (q->head != m) /* Flow was not idle, we are done. */ + goto done; + + if (q->q_time < curr_time) + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + q->q_time = curr_time; - if (pipe->not_eligible_heap.elements == 0 && - pipe->scheduler_heap.elements == 0) - pipe->V = MAX64 ( q->S, pipe->V ); - fs->backlogged++ ; /* - * Look at eligibility. A flow is not eligibile if S>V (when - * this happens, it means that there is some other flow already - * scheduled for the same pipe, so the scheduler_heap cannot be - * empty). If the flow is not eligible we just store it in the - * not_eligible_heap. Otherwise, we store in the scheduler_heap - * and possibly invoke ready_event_wfq() right now if there is - * leftover credit. - * Note that for all flows in scheduler_heap (SCH), S_i <= V, - * and for all flows in not_eligible_heap (NEH), S_i > V . - * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, - * we only need to look into NEH. + * If we reach this point the flow was previously idle, so we need + * to schedule it. This involves different actions for fixed-rate or + * WF2Q queues. */ - if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ - if (pipe->scheduler_heap.elements == 0) - printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); - heap_insert(&(pipe->not_eligible_heap), q->S, q); + if (is_pipe) { + /* Fixed-rate queue: just insert into the ready_heap. */ + dn_key t = 0; + + if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes) + t = SET_TICKS(m, q, pipe); + q->sched_time = curr_time; + if (t == 0) /* Must process it now. */ + ready_event(q, &head, &tail); + else + heap_insert(&ready_heap, curr_time + t , q); } else { - heap_insert(&(pipe->scheduler_heap), q->F, q); - if (pipe->numbytes >= 0) { /* pipe is idle */ - if (pipe->scheduler_heap.elements != 1) - printf("dummynet: OUCH! pipe should have been idle!\n"); - DPRINTF(("dummynet: waking up pipe %d at %d\n", - pipe->pipe_nr, (int)(q->F >> MY_M))); - pipe->sched_time = curr_time ; - ready_event_wfq(pipe, &head, &tail); - } + /* + * WF2Q. First, compute start time S: if the flow was + * idle (S = F + 1) set S to the virtual time V for the + * controlling pipe, and update the sum of weights for the pipe; + * otherwise, remove flow from idle_heap and set S to max(F,V). + * Second, compute finish time F = S + len / weight. + * Third, if pipe was idle, update V = max(S, V). + * Fourth, count one more backlogged flow. + */ + if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */ + q->S = pipe->V; + pipe->sum += fs->weight; /* Add weight of new queue. */ + } else { + heap_extract(&(pipe->idle_heap), q); + q->S = MAX64(q->F, pipe->V); + } + q->F = q->S + (len << MY_M) / (uint64_t)fs->weight; + + if (pipe->not_eligible_heap.elements == 0 && + pipe->scheduler_heap.elements == 0) + pipe->V = MAX64(q->S, pipe->V); + fs->backlogged++; + /* + * Look at eligibility. A flow is not eligibile if S>V (when + * this happens, it means that there is some other flow already + * scheduled for the same pipe, so the scheduler_heap cannot be + * empty). If the flow is not eligible we just store it in the + * not_eligible_heap. Otherwise, we store in the scheduler_heap + * and possibly invoke ready_event_wfq() right now if there is + * leftover credit. + * Note that for all flows in scheduler_heap (SCH), S_i <= V, + * and for all flows in not_eligible_heap (NEH), S_i > V. + * So when we need to compute max(V, min(S_i)) forall i in + * SCH+NEH, we only need to look into NEH. + */ + if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ + if (pipe->scheduler_heap.elements == 0) + printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); + heap_insert(&(pipe->not_eligible_heap), q->S, q); + } else { + heap_insert(&(pipe->scheduler_heap), q->F, q); + if (pipe->numbytes >= 0) { /* Pipe is idle. */ + if (pipe->scheduler_heap.elements != 1) + printf("dummynet: OUCH! pipe should have been idle!\n"); + DPRINTF(("dummynet: waking up pipe %d at %d\n", + pipe->pipe_nr, (int)(q->F >> MY_M))); + pipe->sched_time = curr_time; + ready_event_wfq(pipe, &head, &tail); + } + } } - } done: - DUMMYNET_UNLOCK(); - if (head != NULL) - dummynet_send(head); - return 0; + if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && + dir != DN_TO_ETH_OUT) { /* Fast io. */ + io_pkt_fast++; + if (m->m_nextpkt != NULL) + printf("dummynet: fast io: pkt chain detected!\n"); + head = m->m_nextpkt = NULL; + } else + *m0 = NULL; /* Normal io. */ + + DUMMYNET_UNLOCK(); + if (head != NULL) + dummynet_send(head); + return (0); dropit: - if (q) - q->drops++ ; - DUMMYNET_UNLOCK(); - m_freem(m); - return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); + io_pkt_drop++; + if (q) + q->drops++; + DUMMYNET_UNLOCK(); + m_freem(m); + *m0 = NULL; + return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } /* @@ -1696,7 +1754,7 @@ /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) for (q = pipe->fs.rq[i]; q; q = q->next) - q->numbytes = 0; + q->numbytes = io_fast ? p->bandwidth : 0; pipe->bandwidth = p->bandwidth; pipe->numbytes = 0; /* just in case... */ Index: sys/netinet/ip_fw_pfil.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_fw_pfil.c,v retrieving revision 1.25 diff -u -r1.25 ip_fw_pfil.c --- sys/netinet/ip_fw_pfil.c 7 Oct 2007 20:44:23 -0000 1.25 +++ sys/netinet/ip_fw_pfil.c 27 Mar 2008 17:19:10 -0000 @@ -104,16 +104,6 @@ bzero(&args, sizeof(args)); - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL){ - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - - m_tag_delete(*m0, dn_tag); - } - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { @@ -124,6 +114,16 @@ } again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL){ + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + args.m = *m0; args.inp = inp; ipfw = ipfw_chk(&args); @@ -160,10 +160,11 @@ if (!DUMMYNET_LOADED) goto drop; if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(*m0, DN_TO_IP_IN, &args); + ip_dn_io_ptr(m0, DN_TO_IP_IN, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(*m0, DN_TO_IP6_IN, &args); - *m0 = NULL; + ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args); + if (*m0 != NULL) + goto again; return 0; /* packet consumed */ case IP_FW_TEE: @@ -225,16 +226,6 @@ bzero(&args, sizeof(args)); - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL) { - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - - m_tag_delete(*m0, dn_tag); - } - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { @@ -245,6 +236,16 @@ } again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL) { + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + args.m = *m0; args.oif = ifp; args.inp = inp; @@ -286,10 +287,11 @@ if (!DUMMYNET_LOADED) break; if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(*m0, DN_TO_IP_OUT, &args); + ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(*m0, DN_TO_IP6_OUT, &args); - *m0 = NULL; + ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args); + if (*m0 != NULL) + goto again; return 0; /* packet consumed */ break; Index: sys/net/if_bridge.c =================================================================== RCS file: /home/ncvs/src/sys/net/if_bridge.c,v retrieving revision 1.103.2.3 diff -u -r1.103.2.3 if_bridge.c --- sys/net/if_bridge.c 21 Dec 2007 05:29:15 -0000 1.103.2.3 +++ sys/net/if_bridge.c 27 Mar 2008 17:19:15 -0000 @@ -2982,7 +2982,7 @@ * packet will return to us via bridge_dummynet(). */ args.oif = ifp; - ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args); + ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args); return (error); } Index: sys/net/if_ethersubr.c =================================================================== RCS file: /home/ncvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.236.2.1 diff -u -r1.236.2.1 if_ethersubr.c --- sys/net/if_ethersubr.c 28 Oct 2007 16:24:16 -0000 1.236.2.1 +++ sys/net/if_ethersubr.c 27 Mar 2008 17:19:18 -0000 @@ -491,7 +491,7 @@ */ *m0 = NULL ; } - ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); + ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); return 0; } /* Index: sbin/ipfw/ipfw.8 =================================================================== RCS file: /home/ncvs/src/sbin/ipfw/ipfw.8,v retrieving revision 1.203.2.1 diff -u -r1.203.2.1 ipfw.8 --- sbin/ipfw/ipfw.8 29 Nov 2007 18:42:15 -0000 1.203.2.1 +++ sbin/ipfw/ipfw.8 27 Mar 2008 17:25:32 -0000 @@ -1756,6 +1756,16 @@ TCP connection, or from/to a given host, or entire subnet, or a protocol type, etc. .Pp +There are two modes of dummynet operation: normal and fast. +Normal mode tries to emulate real link: dummynet scheduler ensures packet will +not leave pipe faster than it would be on real link with given bandwidth. +Fast mode allows certain packets to bypass dummynet scheduler (if packet flow +does not exceed pipe's bandwidth). Thus fast mode requires less cpu cycles +per packet (in average) but packet latency can be significantly lower comparing +to real link with same bandwidth. Default is normal mode, fast mode can be +enabled by setting net.inet.ip.dummynet.io_fast sysctl(8) variable to non-zero +value. +.Pp Packets belonging to the same flow are then passed to either of two different objects, which implement the traffic regulation: .Bl -hang -offset XXXX @@ -2120,6 +2130,14 @@ This value is used when no .Cm buckets option is specified when configuring a pipe/queue. +.It Em net.inet.ip.dummynet.io_fast : No 0 +If set to non-zero value enables "fast" mode of dummynet operation (see above). +.It Em net.inet.ip.dummynet.io_pkt +Number of packets passed to by dummynet. +.It Em net.inet.ip.dummynet.io_pkt_drop +Number of packets dropped by dummynet. +.It Em net.inet.ip.dummynet.io_pkt_fast +Number of packets bypassed dummynet scheduler. .It Em net.inet.ip.dummynet.max_chain_len : No 16 Target value for the maximum number of pipes/queues in a hash bucket. The product --rwEMma7ioTxnRzrJ--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200804031430.m33EU3DN005330>