From owner-svn-src-user@FreeBSD.ORG Tue Jan 5 11:39:49 2010 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2342C1065693; Tue, 5 Jan 2010 11:39:49 +0000 (UTC) (envelope-from luigi@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 10AA48FC21; Tue, 5 Jan 2010 11:39:49 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o05BdnRb084848; Tue, 5 Jan 2010 11:39:49 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o05Bdmdk084842; Tue, 5 Jan 2010 11:39:48 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201001051139.o05Bdmdk084842@svn.freebsd.org> From: Luigi Rizzo Date: Tue, 5 Jan 2010 11:39:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r201571 - in user/luigi/ipfw3-head/sys: conf netinet netinet/ipfw X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 05 Jan 2010 11:39:49 -0000 Author: luigi Date: Tue Jan 5 11:39:48 2010 New Revision: 201571 URL: http://svn.freebsd.org/changeset/base/201571 Log: move the binary heap code outside ip_dummynet.c; remove kernel-private definitions and data structures from ip_dummynet.h Added: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.c (contents, props changed) user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.h (contents, props changed) Modified: user/luigi/ipfw3-head/sys/conf/files user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Modified: user/luigi/ipfw3-head/sys/conf/files ============================================================================== --- user/luigi/ipfw3-head/sys/conf/files Tue Jan 5 11:38:37 2010 (r201570) +++ user/luigi/ipfw3-head/sys/conf/files Tue Jan 5 11:39:48 2010 (r201571) @@ -2449,6 +2449,7 @@ netinet/in_proto.c optional inet \ compile-with "${NORMAL_C} -I$S/contrib/pf" netinet/in_rmx.c optional inet netinet/ip_divert.c optional inet ipdivert ipfirewall +netinet/ipfw/dn_heap.c optional inet dummynet netinet/ipfw/ip_dummynet.c optional inet dummynet netinet/ip_ecn.c optional inet | inet6 netinet/ip_encap.c optional inet | inet6 Modified: user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Tue Jan 5 11:38:37 2010 (r201570) +++ user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Tue Jan 5 11:39:48 2010 (r201571) @@ -38,93 +38,12 @@ */ /* - * We start with a heap, which is used in the scheduler to decide when - * to transmit packets etc. - * - * The key for the heap is used for two different values: - * - * 1. timer ticks- max 10K/second, so 32 bits are enough; - * - * 2. virtual times. These increase in steps of len/x, where len is the - * packet length, and x is either the weight of the flow, or the - * sum of all weights. - * If we limit to max 1000 flows and a max weight of 100, then - * x needs 17 bits. The packet size is 16 bits, so we can easily - * overflow if we do not allow errors. - * So we use a key "dn_key" which is 64 bits. Some macros are used to - * compare key values and handle wraparounds. - * MAX64 returns the largest of two key values. - * MY_M is used as a shift count when doing fixed point arithmetic - * (a better name would be useful...). - */ -typedef u_int64_t dn_key ; /* sorting key */ -#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) -#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) -#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) -#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) -#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) -#define MY_M 16 /* number of left shift to obtain a larger precision */ - -/* - * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the - * virtual time wraps every 15 days. - */ - - -/* * The maximum hash table size for queues. This value must be a power * of 2. */ #define DN_MAX_HASH_SIZE 65536 -/* - * A heap entry is made of a key and a pointer to the actual - * object stored in the heap. - * The heap is an array of dn_heap_entry entries, dynamically allocated. - * Current size is "size", with "elements" actually in use. - * The heap normally supports only ordered insert and extract from the top. - * If we want to extract an object from the middle of the heap, we - * have to know where the object itself is located in the heap (or we - * need to scan the whole array). To this purpose, an object has a - * field (int) which contains the index of the object itself into the - * heap. When the object is moved, the field must also be updated. - * The offset of the index in the object is stored in the 'offset' - * field in the heap descriptor. The assumption is that this offset - * is non-zero if we want to support extract from the middle. - */ -struct dn_heap_entry { - dn_key key ; /* sorting key. Topmost element is smallest one */ - void *object ; /* object pointer */ -} ; - -struct dn_heap { - int size ; - int elements ; - int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ - struct dn_heap_entry *p ; /* really an array of "size" entries */ -} ; - -#ifdef _KERNEL -/* - * Packets processed by dummynet have an mbuf tag associated with - * them that carries their dummynet state. This is used within - * the dummynet code as well as outside when checking for special - * processing requirements. - * Note that the first part is the reinject info and is common to - * other forms of packet reinjection. - */ -struct dn_pkt_tag { - struct ipfw_rule_ref rule; /* matching rule */ - - /* second part, dummynet specific */ - int dn_dir; /* action when packet comes out. */ - /* see ip_fw_private.h */ - - dn_key output_time; /* when the pkt is due for delivery */ - struct ifnet *ifp; /* interface, for ip_output */ - struct _ip6dn_args ip6opt; /* XXX ipv6 options */ -}; -#endif /* _KERNEL */ +typedef uint64_t dn_key; /* * Overall structure of dummynet (with WF2Q+): @@ -303,6 +222,7 @@ struct dn_flow_set { }; SLIST_HEAD(dn_flow_set_head, dn_flow_set); +struct dn_heap; /* * Pipe descriptor. Contains global parameters, delay-line queue, * and the flow_set used for fixed-rate queues. @@ -327,9 +247,9 @@ struct dn_pipe { /* a pipe */ struct mbuf *head, *tail ; /* packets in delay line */ /* WF2Q+ */ - struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap not_eligible_heap; /* top extract- key Start time */ - struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ + struct dn_heap *scheduler_heap ; /* top extract - key Finish time*/ + struct dn_heap *not_eligible_heap; /* top extract- key Start time */ + struct dn_heap *idle_heap ; /* random extract - key Start=Finish time */ dn_key V ; /* virtual time */ int sum; /* sum of weights of all active sessions */ Added: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.c Tue Jan 5 11:39:48 2010 (r201571) @@ -0,0 +1,251 @@ +/*- + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * A binary heap data structure used in dummynet + */ + +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap"); + +/* + * Heap management functions. + * + * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. + * Some macros help finding parent/children so we can optimize them. + * + * heap_init() is called to expand the heap when needed. + * Increment size in blocks of 16 entries. + * XXX failure to allocate a new element is a pretty bad failure + * as we basically stall a whole queue forever!! + * Returns 1 on error, 0 on success + */ +#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) +#define HEAP_LEFT(x) ( 2*(x) + 1 ) +#define HEAP_IS_LEFT(x) ( (x) & 1 ) +#define HEAP_RIGHT(x) ( 2*(x) + 2 ) +#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } +#define HEAP_INCREMENT 15 + +int +heap_init(struct dn_heap *h, int new_size) +{ + struct dn_heap_entry *p; + + if (h->size >= new_size ) { + printf("%s: Bogus call, have %d want %d\n", + __func__, h->size, new_size); + return 0; + } + new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT; + p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT); + if (p == NULL) { + printf("%s, resize %d failed\n", __func__, new_size ); + return 1; /* error */ + } + if (h->size > 0) { + bcopy(h->p, p, h->size * sizeof(*p) ); + free(h->p, M_DN_HEAP); + } + h->p = p; + h->size = new_size; + return 0; +} + +/* + * Insert element in heap. Normally, p != NULL, we insert p in + * a new position and bubble up. If p == NULL, then the element is + * already in place, and key is the position where to start the + * bubble-up. + * Returns 1 on failure (cannot allocate new heap entry) + * + * If offset > 0 the position (index, int) of the element in the heap is + * also stored in the element itself at the given offset in bytes. + */ +#define SET_OFFSET(heap, node) \ + if (heap->offset > 0) \ + *((int *)((char *)(heap->p[node].object) + heap->offset)) = node; +/* + * RESET_OFFSET is used for sanity checks. It sets offset + * to an invalid value. + */ +#define RESET_OFFSET(heap, node) \ + if (heap->offset > 0) \ + *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1; +int +heap_insert(struct dn_heap *h, uint64_t key1, void *p) +{ + int son = h->elements; + + if (p == NULL) /* data already there, set starting point */ + son = key1; + else {/* insert new element at the end, possibly resize */ + son = h->elements; + if (son == h->size) /* need resize... */ + if (heap_init(h, h->elements+1) ) + return 1; /* failure... */ + h->p[son].object = p; + h->p[son].key = key1; + h->elements++; + } + while (son > 0) { /* bubble up */ + int father = HEAP_FATHER(son); + struct dn_heap_entry tmp; + + if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) + break; /* found right position */ + /* son smaller than father, swap and repeat */ + HEAP_SWAP(h->p[son], h->p[father], tmp); + SET_OFFSET(h, son); + son = father; + } + SET_OFFSET(h, son); + return 0; +} + +/* + * remove top element from heap, or obj if obj != NULL + */ +void +heap_extract(struct dn_heap *h, void *obj) +{ + int child, father, max = h->elements - 1; + + if (max < 0) { + printf("%s: empty heap 0x%p\n", __FUNCTION__, h); + return; + } + father = 0; /* default: move up smallest child */ + if (obj != NULL) { /* extract specific element, index is at offset */ + if (h->offset <= 0) + panic("%s: extract from middle not set on %p\n", + __FUNCTION__, h); + father = *((int *)((char *)obj + h->offset)); + if (father < 0 || father >= h->elements) { + panic("%s: heap_extract, father %d out of bound 0..%d\n", + __FUNCTION__, father, h->elements); + } + } + RESET_OFFSET(h, father); + child = HEAP_LEFT(father); /* left child */ + while (child <= max) { /* valid entry */ + if (child != max && + DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) + child++; /* take right child, otherwise left */ + h->p[father] = h->p[child]; + SET_OFFSET(h, father); + father = child; + child = HEAP_LEFT(child); /* prepare for next loop */ + } + h->elements--; + if (father != max) { + /* + * Fill hole with last entry and bubble up, + * reusing the insert code + */ + h->p[father] = h->p[max]; + heap_insert(h, father, NULL); + } +} + +#if 0 +/* + * change object position and update references + * XXX this one is never used! + */ +static void +heap_move(struct dn_heap *h, uint64_t new_key, void *object) +{ + int temp; + int i; + int max = h->elements-1; + struct dn_heap_entry buf; + + if (h->offset <= 0) + panic("cannot move items on this heap"); + + i = *((int *)((char *)object + h->offset)); + if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ + h->p[i].key = new_key; + for (; i>0 && + DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key); + i = temp ) { /* bubble up */ + HEAP_SWAP(h->p[i], h->p[temp], buf); + SET_OFFSET(h, i); + } + } else { /* must move down */ + h->p[i].key = new_key; + while ( (temp = HEAP_LEFT(i)) <= max ) { + /* found left child */ + if ((temp != max) && + DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) + temp++; /* select child with min key */ + if (DN_KEY_GT(new_key, h->p[temp].key)) { + /* go down */ + HEAP_SWAP(h->p[i], h->p[temp], buf); + SET_OFFSET(h, i); + } else + break; + i = temp; + } + } + SET_OFFSET(h, i); +} +#endif /* heap_move, unused */ + +/* + * heapify() will reorganize data inside an array to maintain the + * heap property. It is needed when we delete a bunch of entries. + */ +void +heapify(struct dn_heap *h) +{ + int i; + + printf("%s on %p for %d elements\n", + __FUNCTION__, h, h->elements); + for (i = 0; i < h->elements; i++ ) + heap_insert(h, i , NULL); +} + +/* + * cleanup the heap and free data structure + */ +void +heap_free(struct dn_heap *h) +{ + if (h->size >0 ) + free(h->p, M_DN_HEAP); + bzero(h, sizeof(*h) ); +} Added: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.h Tue Jan 5 11:39:48 2010 (r201571) @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IP_DN_HEAP_H +#define _IP_DN_HEAP_H + +#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) +#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) +#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) +#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) + +/* + * A heap entry is made of a key and a pointer to the actual + * object stored in the heap. + * The heap is an array of dn_heap_entry entries, dynamically allocated. + * Current size is "size", with "elements" actually in use. + * The heap normally supports only ordered insert and extract from the top. + * If we want to extract an object from the middle of the heap, we + * have to know where the object itself is located in the heap (or we + * need to scan the whole array). To this purpose, an object has a + * field (int) which contains the index of the object itself into the + * heap. When the object is moved, the field must also be updated. + * The offset of the index in the object is stored in the 'offset' + * field in the heap descriptor. The assumption is that this offset + * is non-zero if we want to support extract from the middle. + */ +struct dn_heap_entry { + uint64_t key; /* sorting key. Topmost element is smallest one */ + void *object; /* object pointer */ +} ; + +struct dn_heap { + int size; /* the size of the array */ + int elements; /* elements in use */ + int offset; /* XXX if > 0 this is the offset of direct ptr to obj */ + struct dn_heap_entry *p ; /* really an array of "size" entries */ +} ; + +int heap_init(struct dn_heap *h, int size); +int heap_insert (struct dn_heap *h, uint64_t key1, void *p); +void heap_extract(struct dn_heap *h, void *obj); +void heapify(struct dn_heap *h); +void heap_free(struct dn_heap *h); + +#endif /* _IP_DN_HEAP_H */ Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Tue Jan 5 11:38:37 2010 (r201570) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Tue Jan 5 11:39:48 2010 (r201571) @@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include /* ip_len, ip_off */ #include #include +#include #include #include /* ip_output(), IP_FORWARDING */ @@ -128,15 +129,33 @@ static unsigned long io_pkt_drop; * * extract_heap contains pipes associated with delay lines. * + * The key for the heap is used for two different values: + * + * 1. timer ticks- max 10K/second, so 32 bits are enough; + * + * 2. virtual times. These increase in steps of len/x, where len is the + * packet length, and x is either the weight of the flow, or the + * sum of all weights. + * If we limit to max 1000 flows and a max weight of 100, then + * x needs 17 bits. The packet size is 16 bits, so we can easily + * overflow if we do not allow errors. + * So we use a key "dn_key" which is 64 bits. Some macros are used to + * compare key values and handle wraparounds. + * MAX64 returns the largest of two key values. + * MY_M is used as a shift count when doing fixed point arithmetic + * (a better name would be useful...). + */ +#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) +#define MY_M 16 /* number of left shift to obtain a larger precision */ + +/* + * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the + * virtual time wraps every 15 days. */ MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; - -static int heap_init(struct dn_heap *h, int size); -static int heap_insert (struct dn_heap *h, dn_key key1, void *p); -static void heap_extract(struct dn_heap *h, void *obj); static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail); static void ready_event(struct dn_flow_queue *q, struct mbuf **head, @@ -256,212 +275,6 @@ static int dummynet_io(struct mbuf **, i (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst)) /* - * Heap management functions. - * - * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. - * Some macros help finding parent/children so we can optimize them. - * - * heap_init() is called to expand the heap when needed. - * Increment size in blocks of 16 entries. - * XXX failure to allocate a new element is a pretty bad failure - * as we basically stall a whole queue forever!! - * Returns 1 on error, 0 on success - */ -#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) -#define HEAP_LEFT(x) ( 2*(x) + 1 ) -#define HEAP_IS_LEFT(x) ( (x) & 1 ) -#define HEAP_RIGHT(x) ( 2*(x) + 2 ) -#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } -#define HEAP_INCREMENT 15 - -static int -heap_init(struct dn_heap *h, int new_size) -{ - struct dn_heap_entry *p; - - if (h->size >= new_size ) { - printf("dummynet: %s, Bogus call, have %d want %d\n", __func__, - h->size, new_size); - return 0 ; - } - new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; - p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); - if (p == NULL) { - printf("dummynet: %s, resize %d failed\n", __func__, new_size ); - return 1 ; /* error */ - } - if (h->size > 0) { - bcopy(h->p, p, h->size * sizeof(*p) ); - free(h->p, M_DUMMYNET); - } - h->p = p ; - h->size = new_size ; - return 0 ; -} - -/* - * Insert element in heap. Normally, p != NULL, we insert p in - * a new position and bubble up. If p == NULL, then the element is - * already in place, and key is the position where to start the - * bubble-up. - * Returns 1 on failure (cannot allocate new heap entry) - * - * If offset > 0 the position (index, int) of the element in the heap is - * also stored in the element itself at the given offset in bytes. - */ -#define SET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; -/* - * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. - */ -#define RESET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; -static int -heap_insert(struct dn_heap *h, dn_key key1, void *p) -{ - int son = h->elements ; - - if (p == NULL) /* data already there, set starting point */ - son = key1 ; - else { /* insert new element at the end, possibly resize */ - son = h->elements ; - if (son == h->size) /* need resize... */ - if (heap_init(h, h->elements+1) ) - return 1 ; /* failure... */ - h->p[son].object = p ; - h->p[son].key = key1 ; - h->elements++ ; - } - while (son > 0) { /* bubble up */ - int father = HEAP_FATHER(son) ; - struct dn_heap_entry tmp ; - - if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) - break ; /* found right position */ - /* son smaller than father, swap and repeat */ - HEAP_SWAP(h->p[son], h->p[father], tmp) ; - SET_OFFSET(h, son); - son = father ; - } - SET_OFFSET(h, son); - return 0 ; -} - -/* - * remove top element from heap, or obj if obj != NULL - */ -static void -heap_extract(struct dn_heap *h, void *obj) -{ - int child, father, max = h->elements - 1 ; - - if (max < 0) { - printf("dummynet: warning, extract from empty heap 0x%p\n", h); - return ; - } - father = 0 ; /* default: move up smallest child */ - if (obj != NULL) { /* extract specific element, index is at offset */ - if (h->offset <= 0) - panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); - father = *((int *)((char *)obj + h->offset)) ; - if (father < 0 || father >= h->elements) { - printf("dummynet: heap_extract, father %d out of bound 0..%d\n", - father, h->elements); - panic("dummynet: heap_extract"); - } - } - RESET_OFFSET(h, father); - child = HEAP_LEFT(father) ; /* left child */ - while (child <= max) { /* valid entry */ - if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) - child = child+1 ; /* take right child, otherwise left */ - h->p[father] = h->p[child] ; - SET_OFFSET(h, father); - father = child ; - child = HEAP_LEFT(child) ; /* left child for next loop */ - } - h->elements-- ; - if (father != max) { - /* - * Fill hole with last entry and bubble up, reusing the insert code - */ - h->p[father] = h->p[max] ; - heap_insert(h, father, NULL); /* this one cannot fail */ - } -} - -#if 0 -/* - * change object position and update references - * XXX this one is never used! - */ -static void -heap_move(struct dn_heap *h, dn_key new_key, void *object) -{ - int temp; - int i ; - int max = h->elements-1 ; - struct dn_heap_entry buf ; - - if (h->offset <= 0) - panic("cannot move items on this heap"); - - i = *((int *)((char *)object + h->offset)); - if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ - h->p[i].key = new_key ; - for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; - i = temp ) { /* bubble up */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } - } else { /* must move down */ - h->p[i].key = new_key ; - while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ - if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) - temp++ ; /* select child with min key */ - if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } else - break ; - i = temp ; - } - } - SET_OFFSET(h, i); -} -#endif /* heap_move, unused */ - -/* - * heapify() will reorganize data inside an array to maintain the - * heap property. It is needed when we delete a bunch of entries. - */ -static void -heapify(struct dn_heap *h) -{ - int i ; - - for (i = 0 ; i < h->elements ; i++ ) - heap_insert(h, i , NULL) ; -} - -/* - * cleanup the heap and free data structure - */ -static void -heap_free(struct dn_heap *h) -{ - if (h->size >0 ) - free(h->p, M_DUMMYNET); - bzero(h, sizeof(*h) ); -} - -/* - * --- end of heap management functions --- - */ - -/* * Dispose a list of packet. Use an inline functions so if we * need to free extra state associated to a packet, this is a * central point to do it. @@ -478,6 +291,25 @@ static __inline void dn_free_pkts(struct } /* + * Packets processed by dummynet have an mbuf tag associated with + * them that carries their dummynet state. This is used within + * the dummynet code as well as outside when checking for special + * processing requirements. + * Note that the first part is the reinject info and is common to + * other forms of packet reinjection. + */ +struct dn_pkt_tag { + struct ipfw_rule_ref rule; /* matching rule */ + + /* second part, dummynet specific */ + int dn_dir; /* action when packet comes out. */ + /* see ip_fw_private.h */ + dn_key output_time; /* when the pkt is due for delivery */ + struct ifnet *ifp; /* interface, for ip_output */ + struct _ip6dn_args ip6opt; /* XXX ipv6 options */ +}; + +/* * Return the mbuf tag holding the dummynet state. As an optimization * this is assumed to be the first tag on the list. If this turns out * wrong we'll need to search the list. @@ -703,8 +535,8 @@ static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) { int p_was_empty = (p->head == NULL); - struct dn_heap *sch = &(p->scheduler_heap); - struct dn_heap *neh = &(p->not_eligible_heap); + struct dn_heap *sch = p->scheduler_heap; + struct dn_heap *neh = p->not_eligible_heap; int64_t p_numbytes = p->numbytes; /* @@ -753,7 +585,7 @@ ready_event_wfq(struct dn_pipe *p, struc if (q->len == 0) { /* Flow not backlogged any more. */ fs->backlogged--; - heap_insert(&(p->idle_heap), q->F, q); + heap_insert(p->idle_heap, q->F, q); } else { /* Still backlogged. */ @@ -796,19 +628,19 @@ ready_event_wfq(struct dn_pipe *p, struc * No traffic and no events scheduled. * We can get rid of idle-heap. */ - if (p->idle_heap.elements > 0) { + if (p->idle_heap->elements > 0) { int i; - for (i = 0; i < p->idle_heap.elements; i++) { + for (i = 0; i < p->idle_heap->elements; i++) { struct dn_flow_queue *q; - q = p->idle_heap.p[i].object; + q = p->idle_heap->p[i].object; q->F = 0; q->S = q->F + 1; } p->sum = 0; p->V = 0; - p->idle_heap.elements = 0; + p->idle_heap->elements = 0; } } /* @@ -934,12 +766,12 @@ dummynet_task(void *context, int pending /* Sweep pipes trying to expire idle flow_queues. */ for (i = 0; i < HASHSIZE; i++) { SLIST_FOREACH(pipe, &pipehash[i], next) { - if (pipe->idle_heap.elements > 0 && - DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) { + if (pipe->idle_heap->elements > 0 && + DN_KEY_LT(pipe->idle_heap->p[0].key, pipe->V)) { struct dn_flow_queue *q = - pipe->idle_heap.p[0].object; + pipe->idle_heap->p[0].object; - heap_extract(&(pipe->idle_heap), NULL); + heap_extract(pipe->idle_heap, NULL); /* Mark timestamp as invalid. */ q->S = q->F + 1; pipe->sum -= q->fs->weight; @@ -1454,8 +1286,8 @@ dummynet_io(struct mbuf **m0, int dir, s } } else { /* WF2Q. */ if (pipe->idle_time < curr_time && - pipe->scheduler_heap.elements == 0 && - pipe->not_eligible_heap.elements == 0) { + pipe->scheduler_heap->elements == 0 && + pipe->not_eligible_heap->elements == 0) { /* Calculate available burst size. */ pipe->numbytes += (curr_time - pipe->idle_time - 1) * pipe->bandwidth; @@ -1501,13 +1333,13 @@ dummynet_io(struct mbuf **m0, int dir, s q->S = pipe->V; pipe->sum += fs->weight; /* Add weight of new queue. */ } else { - heap_extract(&(pipe->idle_heap), q); + heap_extract(pipe->idle_heap, q); q->S = MAX64(q->F, pipe->V); } q->F = q->S + div64(len << MY_M, fs->weight); - if (pipe->not_eligible_heap.elements == 0 && - pipe->scheduler_heap.elements == 0) + if (pipe->not_eligible_heap->elements == 0 && + pipe->scheduler_heap->elements == 0) pipe->V = MAX64(q->S, pipe->V); fs->backlogged++; /* @@ -1524,13 +1356,13 @@ dummynet_io(struct mbuf **m0, int dir, s * SCH+NEH, we only need to look into NEH. */ if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ - if (pipe->scheduler_heap.elements == 0) + if (pipe->scheduler_heap->elements == 0) printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); - heap_insert(&(pipe->not_eligible_heap), q->S, q); + heap_insert(pipe->not_eligible_heap, q->S, q); } else { - heap_insert(&(pipe->scheduler_heap), q->F, q); + heap_insert(pipe->scheduler_heap, q->F, q); if (pipe->numbytes >= 0) { /* Pipe is idle. */ - if (pipe->scheduler_heap.elements != 1) + if (pipe->scheduler_heap->elements != 1) printf("dummynet: OUCH! pipe should have been idle!\n"); DPRINTF(("dummynet: waking up pipe %d at %d\n", pipe->pipe_nr, (int)(q->F >> MY_M))); @@ -1613,9 +1445,9 @@ purge_pipe(struct dn_pipe *pipe) dn_free_pkts(pipe->head); - heap_free( &(pipe->scheduler_heap) ); - heap_free( &(pipe->not_eligible_heap) ); - heap_free( &(pipe->idle_heap) ); + heap_free( pipe->scheduler_heap ); + heap_free( pipe->not_eligible_heap ); + heap_free( pipe->idle_heap ); } /* @@ -1790,21 +1622,28 @@ config_pipe(struct dn_pipe *p) pipe = locate_pipe(p->pipe_nr); /* locate pipe */ if (pipe == NULL) { /* new pipe */ - pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET, + pipe = malloc(sizeof(struct dn_pipe) + + 3 * sizeof(struct dn_heap), M_DUMMYNET, M_NOWAIT | M_ZERO); if (pipe == NULL) { DUMMYNET_UNLOCK(); printf("dummynet: no memory for new pipe\n"); return (ENOMEM); } + + /* the heaps are right after the pipe */ + pipe->scheduler_heap = (struct dn_heap *)(pipe + 1); + pipe->not_eligible_heap = pipe->scheduler_heap + 1; + pipe->idle_heap = pipe->scheduler_heap + 2; + pipe->pipe_nr = p->pipe_nr; pipe->fs.pipe = pipe; /* * idle_heap is the only one from which * we extract from the middle. */ - pipe->idle_heap.size = pipe->idle_heap.elements = 0; - pipe->idle_heap.offset = + pipe->idle_heap->size = pipe->idle_heap->elements = 0; + pipe->idle_heap->offset = offsetof(struct dn_flow_queue, heap_pos); } else { /* Flush accumulated credit for all queues. */ @@ -2048,10 +1887,10 @@ delete_pipe(struct dn_pipe *p) if (fs->pipe != NULL) { /* Update total weight on parent pipe and cleanup parent heaps. */ fs->pipe->sum -= fs->weight * fs->backlogged ; - fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs); - fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs); + fs_remove_from_heap(fs->pipe->not_eligible_heap, fs); + fs_remove_from_heap(fs->pipe->scheduler_heap, fs); #if 1 /* XXX should i remove from idle_heap as well ? */ - fs_remove_from_heap(&(fs->pipe->idle_heap), fs); + fs_remove_from_heap(fs->pipe->idle_heap, fs); #endif } purge_flow_set(fs, 1);