Date: Mon, 12 Apr 2010 11:18:39 GMT From: Marko Zec <zec@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 176829 for review Message-ID: <201004121118.o3CBIdZs022950@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@176829?ac=10 Change 176829 by zec@zec_tpx32 on 2010/04/12 11:18:05 Remove a crude hack which once upon the time allowed one to directly connect a bpf tap to an ifnet in a remote vnet, but which stopped working a year ago due to some strange locking issues related to jail-vnet integration. This was never intended to be commited to head, and a patch has been commited to IMUNES which alows us to run wireshark in a remote vnet without this crap, so there's no point in keeping this dead code here any longer. Moreover, back out now obsolete V_irtualization patches for dummynet that anchie@ originally started developing back in September, but which were never fully completed, and which now collide with the new version of dummynet in the main tree. Affected files ... .. //depot/projects/vimage/src/sys/net/bpf.c#45 edit .. //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 edit Differences ... ==== //depot/projects/vimage/src/sys/net/bpf.c#45 (text+ko) ==== @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/net/bpf.c,v 1.219 2010/02/20 00:19:21 jkim Exp $"); +__FBSDID("$FreeBSD: head/sys/net/bpf.c 205858 2010-03-29 20:24:03Z jkim $"); #include "opt_bpf.h" #include "opt_netgraph.h" @@ -44,7 +44,6 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/conf.h> -#include <sys/ctype.h> #include <sys/fcntl.h> #include <sys/jail.h> #include <sys/malloc.h> @@ -615,6 +614,7 @@ mac_bpfdesc_destroy(d); #endif /* MAC */ knlist_destroy(&d->bd_sel.si_note); + callout_drain(&d->bd_callout); bpf_freed(d); free(d, M_BPF); } @@ -652,7 +652,7 @@ mac_bpfdesc_create(td->td_ucred, d); #endif mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); - callout_init(&d->bd_callout, CALLOUT_MPSAFE); + callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0); knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx); return (0); @@ -808,13 +808,15 @@ { struct bpf_d *d = (struct bpf_d *)arg; - BPFD_LOCK(d); + BPFD_LOCK_ASSERT(d); + + if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout)) + return; if (d->bd_state == BPF_WAITING) { d->bd_state = BPF_TIMED_OUT; if (d->bd_slen != 0) bpf_wakeup(d); } - BPFD_UNLOCK(d); } static int @@ -1448,34 +1450,9 @@ struct bpf_if *bp; struct ifnet *theywant; -#define XVNET_BPF_SNOOPING -#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING) - struct vnet *target_vnet = curvnet; - char *c; - - /* Attempt to attach to an ifnet in a foreign vnet, specified as @ */ - c = rindex(ifr->ifr_name, '@'); - if ( c != NULL ) { - struct prison *target_pr; - - *c++ = 0; - if (!isascii(*c) && !isdigit(*c)) - return ENXIO; - target_pr = prison_find_name(curthread->td_ucred->cr_prison, c); - if (target_pr == NULL) - return ENXIO; - target_vnet = target_pr->pr_vnet; - } - CURVNET_SET_QUIET(target_vnet); -#endif - theywant = ifunit(ifr->ifr_name); - if (theywant == NULL || theywant->if_bpf == NULL) { -#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING) - CURVNET_RESTORE(); -#endif + if (theywant == NULL || theywant->if_bpf == NULL) return (ENXIO); - } bp = theywant->if_bpf; @@ -1515,9 +1492,6 @@ BPFD_LOCK(d); reset_d(d); BPFD_UNLOCK(d); -#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING) - CURVNET_RESTORE(); -#endif return (0); } @@ -1606,8 +1580,7 @@ kn->kn_data = d->bd_slen; if (d->bd_hbuf) kn->kn_data += d->bd_hlen; - } - else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { + } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { callout_reset(&d->bd_callout, d->bd_rtout, bpf_timed_out, d); d->bd_state = BPF_WAITING; @@ -1894,13 +1867,14 @@ * free. */ bpf_free(d); - if (d->bd_rfilter) { + if (d->bd_rfilter != NULL) { free((caddr_t)d->bd_rfilter, M_BPF); #ifdef BPF_JITTER - bpf_destroy_jit_filter(d->bd_bfilter); + if (d->bd_bfilter != NULL) + bpf_destroy_jit_filter(d->bd_bfilter); #endif } - if (d->bd_wfilter) + if (d->bd_wfilter != NULL) free((caddr_t)d->bd_wfilter, M_BPF); mtx_destroy(&d->bd_mtx); } ==== //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 (text+ko) ==== @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * @@ -26,36 +26,14 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/netinet/ipfw/ip_dummynet.c,v 1.5 2009/06/24 22:57:07 oleg Exp $"); +__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 206428 2010-04-09 18:02:19Z luigi $"); -#define DUMMYNET_DEBUG +/* + * Configuration and internal object management for dummynet. + */ #include "opt_inet6.h" -/* - * This module implements IP dummynet, a bandwidth limiter/delay emulator - * used in conjunction with the ipfw package. - * Description of the data structures used is in ip_dummynet.h - * Here you mainly find the following blocks of code: - * + variable declarations; - * + heap management functions; - * + scheduler and dummynet functions; - * + configuration and initialization. - * - * NOTA BENE: critical sections are protected by the "dummynet lock". - * - * Most important Changes: - * - * 011004: KLDable - * 010124: Fixed WF2Q behaviour - * 010122: Fixed spl protection. - * 000601: WF2Q support - * 000106: large rewrite, use heaps to handle very many pipes. - * 980513: initial release - * - * include files marked with XXX are probably not needed - */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> @@ -69,2258 +47,2147 @@ #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/time.h> -#include <sys/sysctl.h> #include <sys/taskqueue.h> #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ -#include <net/netisr.h> -#include <net/vnet.h> #include <netinet/in.h> -#include <netinet/ip.h> /* ip_len, ip_off */ +#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ #include <netinet/ip_fw.h> +#include <netinet/ipfw/ip_fw_private.h> +#include <netinet/ipfw/dn_heap.h> #include <netinet/ip_dummynet.h> -#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ +#include <netinet/ipfw/ip_dn_private.h> +#include <netinet/ipfw/dn_sched.h> -#include <netinet/if_ether.h> /* various ether_* routines */ +/* which objects to copy */ +#define DN_C_LINK 0x01 +#define DN_C_SCH 0x02 +#define DN_C_FLOW 0x04 +#define DN_C_FS 0x08 +#define DN_C_QUEUE 0x10 -#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */ -#include <netinet6/ip6_var.h> +/* we use this argument in case of a schk_new */ +struct schk_new_arg { + struct dn_alg *fp; + struct dn_sch *sch; +}; -/* - * We keep a private variable for the simulation time, but we could - * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) - */ -static VNET_DEFINE(dn_key, curr_time) = 0 ; /* current simulation time */ -#define V_curr_time VNET(curr_time) +/*---- callout hooks. ----*/ +static struct callout dn_timeout; +static struct task dn_task; +static struct taskqueue *dn_tq = NULL; -static VNET_DEFINE(int, dn_hash_size) = 64 ; /* default hash size */ -#define V_dn_hash_size VNET(dn_hash_size) +static void +dummynet(void * __unused unused) +{ -/* statistics on number of queue searches and search steps */ -static VNET_DEFINE(long, searches); -static VNET_DEFINE(long, search_steps); -static VNET_DEFINE(int, pipe_expire) = 1 ; /* expire queue if empty */ -static VNET_DEFINE(int, dn_max_ratio) = 16 ; /* max queues/buckets ratio */ -#define V_searches VNET(searches) -#define V_search_steps VNET(search_steps) -#define V_pipe_expire VNET(pipe_expire) -#define V_dn_max_ratio VNET(dn_max_ratio) + taskqueue_enqueue(dn_tq, &dn_task); +} -static VNET_DEFINE(long, pipe_slot_limit) = 100; /* Foot shooting limit for pipe queues. */ -static VNET_DEFINE(long, pipe_byte_limit) = 1024 * 1024; -#define V_pipe_slot_limit VNET(pipe_slot_limit) -#define V_pipe_byte_limit VNET(pipe_byte_limit) +void +dn_reschedule(void) +{ + callout_reset(&dn_timeout, 1, dummynet, NULL); +} +/*----- end of callout hooks -----*/ -static VNET_DEFINE(int, red_lookup_depth) = 256; /* RED - default lookup table depth */ -static VNET_DEFINE(int, red_avg_pkt_size) = 512; /* RED - default medium packet size */ -static VNET_DEFINE(int, red_max_pkt_size) = 1500; /* RED - default max packet size */ -#define V_red_lookup_depth VNET(red_lookup_depth) -#define V_red_avg_pkt_size VNET(red_avg_pkt_size) -#define V_red_max_pkt_size VNET(red_max_pkt_size) +/* Return a scheduler descriptor given the type or name. */ +static struct dn_alg * +find_sched_type(int type, char *name) +{ + struct dn_alg *d; -static VNET_DEFINE(struct timeval, prev_t); -static VNET_DEFINE(struct timeval, t); -static VNET_DEFINE(long, tick_last); /* Last tick duration (usec). */ -static VNET_DEFINE(long, tick_delta); /* Last vs standard tick diff (usec). */ -static VNET_DEFINE(long, tick_delta_sum); /* Accumulated tick difference (usec).*/ -static VNET_DEFINE(long, tick_adjustment); /* Tick adjustments done. */ -static VNET_DEFINE(long, tick_lost); /* Lost(coalesced) ticks number. */ -/* Adjusted vs non-adjusted curr_time difference (ticks). */ -static VNET_DEFINE(long, tick_diff); -#define V_prev_t VNET(prev_t) -#define V_t VNET(t) -#define V_tick_last VNET(tick_last) -#define V_tick_delta VNET(tick_delta) -#define V_tick_delta_sum VNET(tick_delta_sum) -#define V_tick_adjustment VNET(tick_adjustment) -#define V_tick_lost VNET(tick_lost) -#define V_tick_diff VNET(tick_diff) + SLIST_FOREACH(d, &dn_cfg.schedlist, next) { + if (d->type == type || (name && !strcmp(d->name, name))) + return d; + } + return NULL; /* not found */ +} -static VNET_DEFINE(int, io_fast); -static VNET_DEFINE(unsigned long, io_pkt); -static VNET_DEFINE(unsigned long, io_pkt_fast); -static VNET_DEFINE(long, io_pkt_drop); -#define V_io_fast VNET(io_fast) -#define V_io_pkt VNET(io_pkt) -#define V_io_pkt_fast VNET(io_pkt_fast) -#define V_io_pkt_drop VNET(io_pkt_drop) +int +ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) +{ + int oldv = *v; + const char *op = NULL; + if (oldv < lo) { + *v = dflt; + op = "Bump"; + } else if (oldv > hi) { + *v = hi; + op = "Clamp"; + } else + return *v; + if (op && msg) + printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); + return *v; +} +/*---- flow_id mask, hash and compare functions ---*/ /* - * Three heaps contain queues and pipes that the scheduler handles: - * - * ready_heap contains all dn_flow_queue related to fixed-rate pipes. - * - * wfq_ready_heap contains the pipes associated with WF2Q flows - * - * extract_heap contains pipes associated with delay lines. - * + * The flow_id includes the 5-tuple, the queue/pipe number + * which we store in the extra area in host order, + * and for ipv6 also the flow_id6. + * XXX see if we want the tos byte (can store in 'flags') */ +static struct ipfw_flow_id * +flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) +{ + int is_v6 = IS_IP6_FLOW_ID(id); -MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); + id->dst_port &= mask->dst_port; + id->src_port &= mask->src_port; + id->proto &= mask->proto; + id->extra &= mask->extra; + if (is_v6) { + APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); + APPLY_MASK(&id->src_ip6, &mask->src_ip6); + id->flow_id6 &= mask->flow_id6; + } else { + id->dst_ip &= mask->dst_ip; + id->src_ip &= mask->src_ip; + } + return id; +} -static VNET_DEFINE(struct dn_heap, ready_heap); -static VNET_DEFINE(struct dn_heap, extract_heap); -static VNET_DEFINE(struct dn_heap, wfq_ready_heap); +/* computes an OR of two masks, result in dst and also returned */ +static struct ipfw_flow_id * +flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) +{ + int is_v6 = IS_IP6_FLOW_ID(dst); -static int heap_init(struct dn_heap *h, int size); -static int heap_insert (struct dn_heap *h, dn_key key1, void *p); -static void heap_extract(struct dn_heap *h, void *obj); -static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, - struct mbuf **tail); -static void ready_event(struct dn_flow_queue *q, struct mbuf **head, - struct mbuf **tail); -static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, - struct mbuf **tail); + dst->dst_port |= src->dst_port; + dst->src_port |= src->src_port; + dst->proto |= src->proto; + dst->extra |= src->extra; + if (is_v6) { +#define OR_MASK(_d, _s) \ + (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ + (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ + (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ + (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; + OR_MASK(&dst->dst_ip6, &src->dst_ip6); + OR_MASK(&dst->src_ip6, &src->src_ip6); +#undef OR_MASK + dst->flow_id6 |= src->flow_id6; + } else { + dst->dst_ip |= src->dst_ip; + dst->src_ip |= src->src_ip; + } + return dst; +} -#define HASHSIZE 16 -#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) -static VNET_DEFINE(struct dn_pipe_head, pipehash[HASHSIZE]); /* all pipes */ -static VNET_DEFINE(struct dn_flow_set_head, flowsethash[HASHSIZE]); /* all flowsets */ +static int +nonzero_mask(struct ipfw_flow_id *m) +{ + if (m->dst_port || m->src_port || m->proto || m->extra) + return 1; + if (IS_IP6_FLOW_ID(m)) { + return + m->dst_ip6.__u6_addr.__u6_addr32[0] || + m->dst_ip6.__u6_addr.__u6_addr32[1] || + m->dst_ip6.__u6_addr.__u6_addr32[2] || + m->dst_ip6.__u6_addr.__u6_addr32[3] || + m->src_ip6.__u6_addr.__u6_addr32[0] || + m->src_ip6.__u6_addr.__u6_addr32[1] || + m->src_ip6.__u6_addr.__u6_addr32[2] || + m->src_ip6.__u6_addr.__u6_addr32[3] || + m->flow_id6; + } else { + return m->dst_ip || m->src_ip; + } +} -static VNET_DEFINE(struct callout, dn_timeout); +/* XXX we may want a better hash function */ +static uint32_t +flow_id_hash(struct ipfw_flow_id *id) +{ + uint32_t i; -extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); + if (IS_IP6_FLOW_ID(id)) { + uint32_t *d = (uint32_t *)&id->dst_ip6; + uint32_t *s = (uint32_t *)&id->src_ip6; + i = (d[0] ) ^ (d[1]) ^ + (d[2] ) ^ (d[3]) ^ + (d[0] >> 15) ^ (d[1] >> 15) ^ + (d[2] >> 15) ^ (d[3] >> 15) ^ + (s[0] << 1) ^ (s[1] << 1) ^ + (s[2] << 1) ^ (s[3] << 1) ^ + (s[0] << 16) ^ (s[1] << 16) ^ + (s[2] << 16) ^ (s[3] << 16) ^ + (id->dst_port << 1) ^ (id->src_port) ^ + (id->extra) ^ + (id->proto ) ^ (id->flow_id6); + } else { + i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ + (id->src_ip << 1) ^ (id->src_ip >> 16) ^ + (id->extra) ^ + (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); + } + return i; +} -#ifdef SYSCTL_NODE -SYSCTL_DECL(_net_inet); -SYSCTL_DECL(_net_inet_ip); +/* Like bcmp, returns 0 if ids match, 1 otherwise. */ +static int +flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) +{ + int is_v6 = IS_IP6_FLOW_ID(id1); -SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, - CTLFLAG_RW, &VNET_NAME(dn_hash_size), 0, "Default hash table size"); -#if 0 /* curr_time is 64 bit */ -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time, - CTLFLAG_RD, &VNET_NAME(curr_time), 0, "Current tick"); -#endif -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, - CTLFLAG_RD, &VNET_NAME(ready_heap).size, 0, "Size of ready heap"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, - CTLFLAG_RD, &VNET_NAME(extract_heap).size, 0, "Size of extract heap"); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, searches, - CTLFLAG_RD, &VNET_NAME(searches), 0, "Number of queue searches"); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps, - CTLFLAG_RD, &VNET_NAME(search_steps), 0, "Number of queue search steps"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, expire, - CTLFLAG_RW, &VNET_NAME(pipe_expire), 0, "Expire queue if empty"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, - CTLFLAG_RW, &VNET_NAME(dn_max_ratio), 0, - "Max ratio between dynamic queues and buckets"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, - CTLFLAG_RD, &VNET_NAME(red_lookup_depth), 0, "Depth of RED lookup table"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, - CTLFLAG_RD, &VNET_NAME(red_avg_pkt_size), 0, "RED Medium packet size"); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, - CTLFLAG_RD, &VNET_NAME(red_max_pkt_size), 0, "RED Max packet size"); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, - CTLFLAG_RD, &VNET_NAME(tick_delta), 0, "Last vs standard tick difference (usec)."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum, - CTLFLAG_RD, &VNET_NAME(tick_delta_sum), 0, "Accumulated tick difference (usec)."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment, - CTLFLAG_RD, &VNET_NAME(tick_adjustment), 0, "Tick adjustments done."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff, - CTLFLAG_RD, &VNET_NAME(tick_diff), 0, - "Adjusted vs non-adjusted curr_time difference (ticks)."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, - CTLFLAG_RD, &VNET_NAME(tick_lost), 0, - "Number of ticks coalesced by dummynet taskqueue."); -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, - CTLFLAG_RW, &VNET_NAME(io_fast), 0, "Enable fast dummynet io."); -SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, - CTLFLAG_RD, &VNET_NAME(io_pkt), 0, - "Number of packets passed to dummynet."); -SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, - CTLFLAG_RD, &VNET_NAME(io_pkt_fast), 0, - "Number of packets bypassed dummynet scheduler."); -SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, - CTLFLAG_RD, &VNET_NAME(io_pkt_drop), 0, - "Number of packets dropped by dummynet."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, - CTLFLAG_RW, &VNET_NAME(pipe_slot_limit), 0, "Upper limit in slots for pipe queue."); -SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, - CTLFLAG_RW, &VNET_NAME(pipe_byte_limit), 0, "Upper limit in bytes for pipe queue."); -#endif + if (!is_v6) { + if (IS_IP6_FLOW_ID(id2)) + return 1; /* different address families */ -#ifdef DUMMYNET_DEBUG -VNET_DEFINE(int, dummynet_debug) = 0; -#ifdef SYSCTL_NODE -SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &VNET_NAME(dummynet_debug), - 0, "control debugging printfs"); -#endif -#define DPRINTF(X) if (V_dummynet_debug) printf X -#else -#define DPRINTF(X) -#endif + return (id1->dst_ip == id2->dst_ip && + id1->src_ip == id2->src_ip && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && + id1->extra == id2->extra) ? 0 : 1; + } + /* the ipv6 case */ + return ( + !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && + !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && + id1->extra == id2->extra && + id1->flow_id6 == id2->flow_id6) ? 0 : 1; +} +/*--------- end of flow-id mask, hash and compare ---------*/ -static VNET_DEFINE(struct task, dn_task); -static VNET_DEFINE(struct taskqueue *, dn_tq) = NULL; -static void dummynet_task(void *, int); +/*--- support functions for the qht hashtable ---- + * Entries are hashed by flow-id + */ +static uint32_t +q_hash(uintptr_t key, int flags, void *arg) +{ + /* compute the hash slot from the flow id */ + struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? + &((struct dn_queue *)key)->ni.fid : + (struct ipfw_flow_id *)key; -static struct mtx dummynet_mtx; -#define DUMMYNET_LOCK_INIT() \ - mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF) -#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx) -#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx) -#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) -#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) + return flow_id_hash(id); +} -static int config_pipe(struct dn_pipe *p); -static int ip_dn_ctl(struct sockopt *sopt); +static int +q_match(void *obj, uintptr_t key, int flags, void *arg) +{ + struct dn_queue *o = (struct dn_queue *)obj; + struct ipfw_flow_id *id2; -static void dummynet(void *); -static void dummynet_flush(void); -static void dummynet_send(struct mbuf *); -void dummynet_drain(void); -static int dummynet_io(struct mbuf **, int , struct ip_fw_args *); + if (flags & DNHT_KEY_IS_OBJ) { + /* compare pointers */ + id2 = &((struct dn_queue *)key)->ni.fid; + } else { + id2 = (struct ipfw_flow_id *)key; + } + return (0 == flow_id_cmp(&o->ni.fid, id2)); +} /* - * Heap management functions. - * - * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. - * Some macros help finding parent/children so we can optimize them. - * - * heap_init() is called to expand the heap when needed. - * Increment size in blocks of 16 entries. - * XXX failure to allocate a new element is a pretty bad failure - * as we basically stall a whole queue forever!! - * Returns 1 on error, 0 on success + * create a new queue instance for the given 'key'. */ -#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) -#define HEAP_LEFT(x) ( 2*(x) + 1 ) -#define HEAP_IS_LEFT(x) ( (x) & 1 ) -#define HEAP_RIGHT(x) ( 2*(x) + 2 ) -#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } -#define HEAP_INCREMENT 15 +static void * +q_new(uintptr_t key, int flags, void *arg) +{ + struct dn_queue *q, *template = arg; + struct dn_fsk *fs = template->fs; + int size = sizeof(*q) + fs->sched->fp->q_datalen; + + q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); + if (q == NULL) { + D("no memory for new queue"); + return NULL; + } -static int -heap_init(struct dn_heap *h, int new_size) -{ - struct dn_heap_entry *p; + set_oid(&q->ni.oid, DN_QUEUE, size); + if (fs->fs.flags & DN_QHT_HASH) + q->ni.fid = *(struct ipfw_flow_id *)key; + q->fs = fs; + q->_si = template->_si; + q->_si->q_count++; - if (h->size >= new_size ) { - printf("dummynet: %s, Bogus call, have %d want %d\n", __func__, - h->size, new_size); - return 0 ; - } - new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; - p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); - if (p == NULL) { - printf("dummynet: %s, resize %d failed\n", __func__, new_size ); - return 1 ; /* error */ - } - if (h->size > 0) { - bcopy(h->p, p, h->size * sizeof(*p) ); - free(h->p, M_DUMMYNET); - } - h->p = p ; - h->size = new_size ; - return 0 ; + if (fs->sched->fp->new_queue) + fs->sched->fp->new_queue(q); + dn_cfg.queue_count++; + return q; } /* - * Insert element in heap. Normally, p != NULL, we insert p in - * a new position and bubble up. If p == NULL, then the element is - * already in place, and key is the position where to start the - * bubble-up. - * Returns 1 on failure (cannot allocate new heap entry) - * - * If offset > 0 the position (index, int) of the element in the heap is - * also stored in the element itself at the given offset in bytes. + * Notify schedulers that a queue is going away. + * If (flags & DN_DESTROY), also free the packets. + * The version for callbacks is called q_delete_cb(). */ -#define SET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; -/* - * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. - */ -#define RESET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; -static int -heap_insert(struct dn_heap *h, dn_key key1, void *p) +static void +dn_delete_queue(struct dn_queue *q, int flags) { - int son = h->elements ; + struct dn_fsk *fs = q->fs; - if (p == NULL) /* data already there, set starting point */ - son = key1 ; - else { /* insert new element at the end, possibly resize */ - son = h->elements ; - if (son == h->size) /* need resize... */ - if (heap_init(h, h->elements+1) ) - return 1 ; /* failure... */ - h->p[son].object = p ; - h->p[son].key = key1 ; - h->elements++ ; - } - while (son > 0) { /* bubble up */ - int father = HEAP_FATHER(son) ; - struct dn_heap_entry tmp ; + // D("fs %p si %p\n", fs, q->_si); + /* notify the parent scheduler that the queue is going away */ + if (fs && fs->sched->fp->free_queue) + fs->sched->fp->free_queue(q); + q->_si->q_count--; + q->_si = NULL; + if (flags & DN_DESTROY) { + if (q->mq.head) + dn_free_pkts(q->mq.head); + bzero(q, sizeof(*q)); // safety + free(q, M_DUMMYNET); + dn_cfg.queue_count--; + } +} - if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) - break ; /* found right position */ - /* son smaller than father, swap and repeat */ - HEAP_SWAP(h->p[son], h->p[father], tmp) ; - SET_OFFSET(h, son); - son = father ; - } - SET_OFFSET(h, son); - return 0 ; +static int +q_delete_cb(void *q, void *arg) +{ + int flags = (int)(uintptr_t)arg; + dn_delete_queue(q, flags); + return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; } /* - * remove top element from heap, or obj if obj != NULL + * calls dn_delete_queue/q_delete_cb on all queues, + * which notifies the parent scheduler and possibly drains packets. + * flags & DN_DESTROY: drains queues and destroy qht; */ static void -heap_extract(struct dn_heap *h, void *obj) +qht_delete(struct dn_fsk *fs, int flags) { - int child, father, max = h->elements - 1 ; - - if (max < 0) { - printf("dummynet: warning, extract from empty heap 0x%p\n", h); - return ; - } - father = 0 ; /* default: move up smallest child */ - if (obj != NULL) { /* extract specific element, index is at offset */ - if (h->offset <= 0) - panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); - father = *((int *)((char *)obj + h->offset)) ; - if (father < 0 || father >= h->elements) { - printf("dummynet: heap_extract, father %d out of bound 0..%d\n", - father, h->elements); - panic("dummynet: heap_extract"); + ND("fs %d start flags %d qht %p", + fs->fs.fs_nr, flags, fs->qht); + if (!fs->qht) + return; + if (fs->fs.flags & DN_QHT_HASH) { + dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); + if (flags & DN_DESTROY) { + dn_ht_free(fs->qht, 0); + fs->qht = NULL; + } + } else { + dn_delete_queue((struct dn_queue *)(fs->qht), flags); + if (flags & DN_DESTROY) + fs->qht = NULL; } - } - RESET_OFFSET(h, father); - child = HEAP_LEFT(father) ; /* left child */ - while (child <= max) { /* valid entry */ - if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) - child = child+1 ; /* take right child, otherwise left */ - h->p[father] = h->p[child] ; - SET_OFFSET(h, father); - father = child ; - child = HEAP_LEFT(child) ; /* left child for next loop */ - } - h->elements-- ; - if (father != max) { - /* - * Fill hole with last entry and bubble up, reusing the insert code - */ - h->p[father] = h->p[max] ; - heap_insert(h, father, NULL); /* this one cannot fail */ - } } -#if 0 /* - * change object position and update references - * XXX this one is never used! + * Find and possibly create the queue for a MULTIQUEUE scheduler. + * We never call it for !MULTIQUEUE (the queue is in the sch_inst). */ -static void -heap_move(struct dn_heap *h, dn_key new_key, void *object) +struct dn_queue * +ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, + struct ipfw_flow_id *id) { - int temp; - int i ; - int max = h->elements-1 ; - struct dn_heap_entry buf ; + struct dn_queue template; - if (h->offset <= 0) - panic("cannot move items on this heap"); + template._si = si; + template.fs = fs; - i = *((int *)((char *)object + h->offset)); - if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ - h->p[i].key = new_key ; - for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; - i = temp ) { /* bubble up */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } - } else { /* must move down */ - h->p[i].key = new_key ; - while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ - if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) - temp++ ; /* select child with min key */ - if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } else - break ; - i = temp ; + if (fs->fs.flags & DN_QHT_HASH) { + struct ipfw_flow_id masked_id; + if (fs->qht == NULL) { + fs->qht = dn_ht_init(NULL, fs->fs.buckets, + offsetof(struct dn_queue, q_next), + q_hash, q_match, q_new); + if (fs->qht == NULL) + return NULL; + } + masked_id = *id; + flow_id_mask(&fs->fsk_mask, &masked_id); + return dn_ht_find(fs->qht, (uintptr_t)&masked_id, + DNHT_INSERT, &template); + } else { + if (fs->qht == NULL) + fs->qht = q_new(0, 0, &template); + return (struct dn_queue *)fs->qht; } - } - SET_OFFSET(h, i); } -#endif /* heap_move, unused */ +/*--- end of queue hash table ---*/ -/* - * heapify() will reorganize data inside an array to maintain the - * heap property. It is needed when we delete a bunch of entries. +/*--- support functions for the sch_inst hashtable ---- + * + * These are hashed by flow-id */ -static void -heapify(struct dn_heap *h) +static uint32_t +si_hash(uintptr_t key, int flags, void *arg) { - int i ; + /* compute the hash slot from the flow id */ + struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? + &((struct dn_sch_inst *)key)->ni.fid : + (struct ipfw_flow_id *)key; - for (i = 0 ; i < h->elements ; i++ ) - heap_insert(h, i , NULL) ; + return flow_id_hash(id); } -/* - * cleanup the heap and free data structure - */ -static void -heap_free(struct dn_heap *h) +static int +si_match(void *obj, uintptr_t key, int flags, void *arg) { - if (h->size >0 ) - free(h->p, M_DUMMYNET); - bzero(h, sizeof(*h) ); -} + struct dn_sch_inst *o = obj; + struct ipfw_flow_id *id2; -/* - * --- end of heap management functions --- - */ - -/* - * Return the mbuf tag holding the dummynet state. As an optimization - * this is assumed to be the first tag on the list. If this turns out - * wrong we'll need to search the list. - */ -static struct dn_pkt_tag * -dn_tag_get(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_first(m); - KASSERT(mtag != NULL && - mtag->m_tag_cookie == MTAG_ABI_COMPAT && - mtag->m_tag_id == PACKET_TAG_DUMMYNET, - ("packet on dummynet queue w/o dummynet tag!")); - return (struct dn_pkt_tag *)(mtag+1); + id2 = (flags & DNHT_KEY_IS_OBJ) ? + &((struct dn_sch_inst *)key)->ni.fid : + (struct ipfw_flow_id *)key; + return flow_id_cmp(&o->ni.fid, id2) == 0; } /* - * Scheduler functions: - * - * transmit_event() is called when the delay-line needs to enter - * the scheduler, either because of existing pkts getting ready, - * or new packets entering the queue. The event handled is the delivery - * time of the packet. - * - * ready_event() does something similar with fixed-rate queues, and the - * event handled is the finish time of the head pkt. - * - * wfq_ready_event() does something similar with WF2Q queues, and the - * event handled is the start time of the head pkt. - * - * In all cases, we make sure that the data structures are consistent - * before passing pkts out, because this might trigger recursive - * invocations of the procedures. + * create a new instance for the given 'key' + * Allocate memory for instance, delay line and scheduler private data. */ -static void -transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) +static void * +si_new(uintptr_t key, int flags, void *arg) { - struct mbuf *m; - struct dn_pkt_tag *pkt; + struct dn_schk *s = arg; + struct dn_sch_inst *si; + int l = sizeof(*si) + s->fp->si_datalen; - DUMMYNET_LOCK_ASSERT(); + si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); + if (si == NULL) + goto error; + /* Set length only for the part passed up to userland. */ + set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); + set_oid(&(si->dline.oid), DN_DELAY_LINE, + sizeof(struct delay_line)); + /* mark si and dline as outside the event queue */ + si->ni.oid.id = si->dline.oid.id = -1; - while ((m = pipe->head) != NULL) { - pkt = dn_tag_get(m); - if (!DN_KEY_LEQ(pkt->output_time, V_curr_time)) - break; + si->sched = s; + si->dline.si = si; - pipe->head = m->m_nextpkt; - if (*tail != NULL) - (*tail)->m_nextpkt = m; - else >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201004121118.o3CBIdZs022950>