Date: Fri, 8 Jan 2010 09:31:20 +0000 (UTC) From: Luigi Rizzo <luigi@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r201776 - user/luigi/ipfw3-head/sys/netinet/ipfw Message-ID: <201001080931.o089VKge030342@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: luigi Date: Fri Jan 8 09:31:19 2010 New Revision: 201776 URL: http://svn.freebsd.org/changeset/base/201776 Log: snapshot Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Fri Jan 8 09:31:18 2010 (r201775) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Fri Jan 8 09:31:19 2010 (r201776) @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa * All rights reserved * @@ -107,14 +108,6 @@ static unsigned long io_pkt_drop; static struct dn_heap *system_heap; /* - * Three heaps contain queues and pipes that the scheduler handles: - * - * ready_heap contains all dn_flow_queue related to fixed-rate pipes. - * - * wfq_ready_heap contains the pipes associated with WF2Q flows - * - * extract_heap contains pipes associated with delay lines. - * * The key for the heap is used for two different values: * * 1. timer ticks- max 10K/second, so 32 bits are enough; @@ -130,18 +123,14 @@ static struct dn_heap *system_heap; * MAX64 returns the largest of two key values. * MY_M is used as a shift count when doing fixed point arithmetic * (a better name would be useful...). + * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the + * virtual time wraps every 15 days. */ #define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) #define MY_M 16 /* shift for fixed point arithmetic */ -/* - * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the - * virtual time wraps every 15 days. - */ - MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); -struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; struct new_pipe_head pipehash[DN_HASHSIZE]; /* all pipes */ struct new_fs_head flowsethash[DN_HASHSIZE]; /* all flowsets */ @@ -158,10 +147,6 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AU SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time, CTLFLAG_RD, &curr_time, 0, "Current tick"); #endif -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, - CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, - CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches, CTLFLAG_RD, &searches, 0, "Number of queue searches"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps, @@ -234,14 +219,12 @@ static void dummynet_send(struct mbuf *) /* * Packets processed by dummynet have an mbuf tag associated with - * them that carries their dummynet state. This is used within - * the dummynet code as well as outside when checking for special - * processing requirements. - * Note that the first part is the reinject info and is common to - * other forms of packet reinjection. + * them that carries their dummynet state. + * Outside dummynet, only the 'rule' field is relevant, and it must + * be at the beginning of the structure. */ struct dn_pkt_tag { - struct ipfw_rule_ref rule; /* matching rule */ + struct ipfw_rule_ref rule; /* matching rule */ /* second part, dummynet specific */ int dn_dir; /* action when packet comes out.*/ @@ -255,6 +238,7 @@ struct dn_pkt_tag { * Return the mbuf tag holding the dummynet state. As an optimization * this is assumed to be the first tag on the list. If this turns out * wrong we'll need to search the list. + * XXX OK */ static struct dn_pkt_tag * dn_tag_get(struct mbuf *m) @@ -271,43 +255,44 @@ dn_tag_get(struct mbuf *m) * It is called when we have some packet from delay line to send. * If there are leftover packets, this delay line is reinserted into extract * heap + * XXX OK */ -static -struct mbuf * +static struct mbuf * transmit_event(struct delay_line *dline, dn_key l_curr_time) { - struct mbuf *m; - struct dn_pkt_tag *pkt; + struct mbuf *m; + struct dn_pkt_tag *pkt; - struct mbuf *head = NULL, *tail = NULL; - /* XXX scheduler lock */ - while ((m = dline->head) != NULL) { - pkt = dn_tag_get(m); - if (!DN_KEY_LEQ(pkt->output_time, l_curr_time)) - break; - dline->head = m->m_nextpkt; - if (tail != NULL) - tail->m_nextpkt = m; - else - head = m; - tail = m; - } + struct mbuf *head = NULL, *tail = NULL; + /* XXX scheduler lock */ + while ((m = dline->head) != NULL) { + pkt = dn_tag_get(m); + if (!DN_KEY_LEQ(pkt->output_time, l_curr_time)) + break; + dline->head = m->m_nextpkt; + if (tail != NULL) + tail->m_nextpkt = m; + else + head = m; + tail = m; + } - if (tail != NULL) - tail->m_nextpkt = NULL; + if (tail != NULL) + tail->m_nextpkt = NULL; - /* If there are leftover packets, put into the heap for next event. */ - if ((m = dline->head) != NULL) { - pkt = dn_tag_get(m); - //DN_HEAP_LOCK(); - heap_insert(system_heap, pkt->output_time, dline); - //DN_HEAP_UNLOCK(); - } - /* XXX scheduler unlock */ - return head; + /* If there are leftover packets, put into the heap for next event. */ + if ((m = dline->head) != NULL) { + pkt = dn_tag_get(m); + //DN_HEAP_LOCK(); + heap_insert(system_heap, pkt->output_time, dline); + //DN_HEAP_UNLOCK(); + } + /* XXX scheduler unlock */ + return head; } #define div64(a, b) ((int64_t)(a) / (int64_t)(b)) + /* * Compute how many ticks we have to wait before being able to send * a packet. This is computed as the "wire time" for the packet @@ -350,27 +335,230 @@ compute_extra_bits(struct mbuf *pkt, str return extra_bits; } -/* Insert packet pkt into delay line of si. */ +/* Insert packet pkt into delay line */ static void -move_pkt(struct mbuf *pkt, struct new_pipe *p, struct new_sch_inst *si, +move_pkt(struct mbuf *pkt, struct new_pipe *p, struct delay_line *d, dn_key l_curr_time) { - struct dn_pkt_tag *dt = dn_tag_get(pkt); - struct delay_line *d = si->dline; - - dt->output_time = l_curr_time + p->delay ; + struct dn_pkt_tag *dt = dn_tag_get(pkt); + + dt->output_time = l_curr_time + p->delay ; + + if (d->head == NULL) + d->head = pkt; + else + d->tail->m_nextpkt = pkt; + d->tail = pkt; + d->tail->m_nextpkt = NULL; +} + +/* Do masking depending of flow id */ +static struct ipfw_flow_id * +do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) +{ + int is_v6 = IS_IP6_FLOW_ID(id); + + id->dst_port &= mask->dst_port; + id->src_port &= mask->src_port; + id->proto &= mask->proto; + id->flags = 0; /* we don't care about this one */ + if (is_v6) { + APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); + APPLY_MASK(&id->src_ip6, &mask->src_ip6); + id->flow_id6 &= mask->flow_id6; + } else { + id->dst_ip &= mask->dst_ip; + id->src_ip &= mask->src_ip; + } + + return id; +} + +/* + * Calculate the hash of a flow id. + * XXX we may want a better hash function + */ +static int +do_hash(struct ipfw_flow_id *id) +{ + int i; + int is_v6 = IS_IP6_FLOW_ID(id); + + if (is_v6) { + i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^ + + ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^ + + ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ + + ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^ + + (id->dst_port << 1) ^ (id->src_port) ^ + (id->proto ) ^ + (id->flow_id6); + } else { + i = ( (id->dst_ip) & 0xffff ) ^ + ( (id->dst_ip >> 15) & 0xffff ) ^ + ( (id->src_ip << 1) & 0xffff ) ^ + ( (id->src_ip >> 16 ) & 0xffff ) ^ + (id->dst_port << 1) ^ (id->src_port) ^ + (id->proto ); + } + return i; +} + +/* + * returns 0 masks match, + * returns 1 otherwise + */ +static int +mask_are_equals (struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) +{ + int is_v6 = IS_IP6_FLOW_ID(id1); + if (is_v6 != IS_IP6_FLOW_ID(id2)) + return 1; /* a ipv4 and a ipv6 flow */ + + if (!is_v6 && id1->dst_ip == id2->dst_ip && + id1->src_ip == id2->src_ip && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && + id1->flags == id2->flags) + return 0; + + if (is_v6 && + IN6_ARE_ADDR_EQUAL(&id1->dst_ip6,&id2->dst_ip6) && + IN6_ARE_ADDR_EQUAL(&id1->src_ip6,&id2->src_ip6) && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && + id1->flags == id2->flags && + id1->flow_id6 == id2->flow_id6) + return 0; + + /* Masks differ */ + return 1; +} + +/* + * Create a new scheduler instance for the scheduler 'sch_t'. + * Allocate memory for common and scheduler private data. + * XXX put the delay line within the instance ? + * XXX why do we need separate delay lines ? + */ +static struct new_sch_inst * +create_scheduler_instance(struct new_sch *sch_t, dn_key l_curr_time) +{ + struct new_sch_inst *si; + int ret; + const char *msg = "malloc failure"; + int l = sizeof(*si) + sch_t->fp->scheduler_i_size; + + si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); + + if (si == NULL) + goto error; + + si->dline = malloc(sizeof(struct delay_line), M_DUMMYNET, M_NOWAIT); + if (si->dline == NULL) + goto error; + si->dline->si = si; + set_oid(&si->dline->id, DN_DELAY_LINE, 0, sizeof(struct delay_line)); + si->dline->head = si->dline->tail = NULL; + + set_oid(&si->oid, DN_SCH_I, 0, l); + + si->sched_nr = sch_t->sched_nr; + si->ptr_sched = sch_t; + + /* XXX do we make assumption on this starting with dn_id ? */ + ret = sch_t->fp->new_sched((void *)(si + 1)); + if (ret) { + msg = "new_sched error"; + goto error; + } - if (d->head == NULL) - d->head = pkt; - else - d->tail->m_nextpkt = pkt; - d->tail = pkt; - d->tail->m_nextpkt = NULL; + /* Initialize scheduler instance queues list */ + SLIST_INIT(&si->ql_list); + + si->oid.subtype = ((struct dn_id*)(si + 1))->subtype; + ((struct dn_id*)(si + 1))->type = DN_SCH_I; + si->idle_time = l_curr_time; + return si; + +error: + printf("%s: %s\n", __FUNCTION__, msg); + if (si) + free(si, M_DUMMYNET); + return NULL; } -struct new_sch_inst * +static struct new_sch_inst * find_scheduler(struct new_sch *sch_t, struct new_fs *fs, - struct ipfw_flow_id *id, dn_key l_curr_time); + struct ipfw_flow_id *id, dn_key l_curr_time) +{ + struct new_sch_inst *prev, *s; /* returning scheduler instance */ + struct ipfw_flow_id *id_t; + int i = 0; + + id_t = malloc(sizeof(struct ipfw_flow_id), M_DUMMYNET, M_NOWAIT); + if (id_t == NULL) { + printf("dummynet: no memory for flowid\n"); + return NULL; + } + /* XXX check return value */ + *id_t = *id; /* The original id isn't modified */ + do_mask(&sch_t->sched_mask, id_t); + if ( !(sch_t->flags & DN_SCH_HAVE_MASK) ) { + s = sch_t->sch_i[0]; + } else { + /* first, do the masking, then hash */ + i = do_hash(id_t); + i = i % sch_t->sch_i_size; + /* finally, scan the current hash bucket for a match */ + searches++ ; + for (prev=NULL, s = sch_t->sch_i[i] ; s ; ) { + search_steps++; + if (!mask_are_equals(id_t, &s->id)) + break; /* found */ + prev = s ; + s = s->next ; + } + if (s && prev != NULL) { /* found and not in front */ + prev->next = s->next ; + s->next = sch_t->sch_i[i] ; + sch_t->sch_i[i] = s ; + } + } + + if (s == NULL) { /* no match, need to allocate a new entry */ + s = create_scheduler_instance(sch_t, l_curr_time); + if (s == NULL) + return NULL; + /* Link scheduler in front of array */ + s->next = sch_t->sch_i[i]; + sch_t->sch_i[i] = s; + sch_t->sch_i_elements++; + if (s != NULL) { + s->id = *id_t; + s->hash_slot = i; + } + } + return s; +} + /* * The timer handler for dummynet. Time is computed in ticks, but @@ -871,30 +1059,6 @@ ipdn_locate_pipe(int pipe_nr) return (NULL); } -struct ipfw_flow_id * -do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id); -/* Do masking depending of flow id */ -struct ipfw_flow_id * -do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) -{ - int is_v6 = IS_IP6_FLOW_ID(id); - - id->dst_port &= mask->dst_port; - id->src_port &= mask->src_port; - id->proto &= mask->proto; - id->flags = 0; /* we don't care about this one */ - if (is_v6) { - APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); - APPLY_MASK(&id->src_ip6, &mask->src_ip6); - id->flow_id6 &= mask->flow_id6; - } - else { - id->dst_ip &= mask->dst_ip; - id->src_ip &= mask->src_ip; - } - - return id; -} /* * dummynet hook for packets. Below 'pipe' is a pipe or a queue @@ -1035,7 +1199,7 @@ dummynet_io(struct mbuf **m0, int dir, s compute_extra_bits(tosend, pipe) * hz : 0; sch_inst->numbytes -= len_scaled; /* Move packet in the delay line XXX three parameters? */ - move_pkt(tosend, pipe, sch_inst, l_curr_time); + move_pkt(tosend, pipe, sch_inst->dline, l_curr_time); if (sch_inst->numbytes < 0) { /* * Credit became negative, so insert the instance in the Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Fri Jan 8 09:31:18 2010 (r201775) +++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Fri Jan 8 09:31:19 2010 (r201776) @@ -183,10 +183,6 @@ dummynet_flush(void) int i; DUMMYNET_LOCK(); - /* Free heaps so we don't have unwanted events. */ - heap_free(&ready_heap); - heap_free(&wfq_ready_heap); - heap_free(&extract_heap); /* * Now purge all queued pkts and delete all pipes. @@ -824,9 +820,6 @@ ip_dn_init(void) SLIST_INIT(&pipehash[i]); SLIST_INIT(&flowsethash[i]); } - bzero(&ready_heap, sizeof(ready_heap)); - bzero(&wfq_ready_heap, sizeof(wfq_ready_heap)); - bzero(&extract_heap, sizeof(extract_heap)); ip_dn_ctl_ptr = ip_dn_ctl; ip_dn_io_ptr = dummynet_io;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001080931.o089VKge030342>