From owner-dev-commits-src-all@freebsd.org Thu Jun 3 15:19:42 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 4282E63B9DA; Thu, 3 Jun 2021 15:19:42 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4FwqM20vL5z3jjj; Thu, 3 Jun 2021 15:19:42 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 0646E42B3; Thu, 3 Jun 2021 15:19:42 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 153FJfJW063878; Thu, 3 Jun 2021 15:19:41 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 153FJfrR063877; Thu, 3 Jun 2021 15:19:41 GMT (envelope-from git) Date: Thu, 3 Jun 2021 15:19:41 GMT Message-Id: <202106031519.153FJfrR063877@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Kristof Provost Subject: git: fe3bcfbda30e - main - VNETify dummynet MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: kp X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: fe3bcfbda30e763a3ec56083b3a19cebbeaf8952 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 Jun 2021 15:19:42 -0000 The branch main has been updated by kp: URL: https://cgit.FreeBSD.org/src/commit/?id=fe3bcfbda30e763a3ec56083b3a19cebbeaf8952 commit fe3bcfbda30e763a3ec56083b3a19cebbeaf8952 Author: Tom Jones AuthorDate: 2021-05-15 12:36:45 +0000 Commit: Kristof Provost CommitDate: 2021-06-03 07:01:56 +0000 VNETify dummynet This moves dn_cfg and other parameters into per VNET variables. The taskqueue and control state remains global. Reviewed by: kp Differential Revision: https://reviews.freebsd.org/D29274 --- sys/netpfil/ipfw/dn_aqm.h | 6 +- sys/netpfil/ipfw/dn_aqm_codel.c | 2 +- sys/netpfil/ipfw/dn_aqm_pie.c | 2 +- sys/netpfil/ipfw/dn_sched.h | 2 +- sys/netpfil/ipfw/dn_sched_fq_codel.c | 2 +- sys/netpfil/ipfw/dn_sched_fq_codel.h | 7 +- sys/netpfil/ipfw/dn_sched_fq_pie.c | 9 +- sys/netpfil/ipfw/ip_dn_glue.c | 8 +- sys/netpfil/ipfw/ip_dn_io.c | 180 ++++++++++++----------- sys/netpfil/ipfw/ip_dn_private.h | 42 +++--- sys/netpfil/ipfw/ip_dummynet.c | 271 +++++++++++++++++++---------------- 11 files changed, 281 insertions(+), 250 deletions(-) diff --git a/sys/netpfil/ipfw/dn_aqm.h b/sys/netpfil/ipfw/dn_aqm.h index 8bbe9fe69e86..b0eaf2ecfc8a 100644 --- a/sys/netpfil/ipfw/dn_aqm.h +++ b/sys/netpfil/ipfw/dn_aqm.h @@ -37,9 +37,9 @@ #define _IP_DN_AQM_H /* NOW is the current time in millisecond*/ -#define NOW ((dn_cfg.curr_time * tick) / 1000) +#define NOW ((V_dn_cfg.curr_time * tick) / 1000) -#define AQM_UNOW (dn_cfg.curr_time * tick) +#define AQM_UNOW (V_dn_cfg.curr_time * tick) #define AQM_TIME_1US ((aqm_time_t)(1)) #define AQM_TIME_1MS ((aqm_time_t)(1000)) #define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000)) @@ -134,7 +134,7 @@ update_stats(struct dn_queue *q, int len, int drop) if (drop) { qni->drops++; sni->drops++; - dn_cfg.io_pkt_drop++; + V_dn_cfg.io_pkt_drop++; } else { /*update queue stats */ qni->length += inc; diff --git a/sys/netpfil/ipfw/dn_aqm_codel.c b/sys/netpfil/ipfw/dn_aqm_codel.c index a1f90461ecab..79c6afd8b635 100644 --- a/sys/netpfil/ipfw/dn_aqm_codel.c +++ b/sys/netpfil/ipfw/dn_aqm_codel.c @@ -202,7 +202,7 @@ codel_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts) update_stats(q, -m->m_pkthdr.len, 0); if (q->ni.length == 0) /* queue is now idle */ - q->q_time = dn_cfg.curr_time; + q->q_time = V_dn_cfg.curr_time; /* extract packet TS*/ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL); diff --git a/sys/netpfil/ipfw/dn_aqm_pie.c b/sys/netpfil/ipfw/dn_aqm_pie.c index 2d5d500e275c..4a55aed662f7 100644 --- a/sys/netpfil/ipfw/dn_aqm_pie.c +++ b/sys/netpfil/ipfw/dn_aqm_pie.c @@ -338,7 +338,7 @@ pie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts) update_stats(q, -m->m_pkthdr.len, 0); if (q->ni.length == 0) /* queue is now idle */ - q->q_time = dn_cfg.curr_time; + q->q_time = V_dn_cfg.curr_time; if (getts) { /* extract packet TS*/ diff --git a/sys/netpfil/ipfw/dn_sched.h b/sys/netpfil/ipfw/dn_sched.h index 9bbd9019d623..1aa885ce3ccf 100644 --- a/sys/netpfil/ipfw/dn_sched.h +++ b/sys/netpfil/ipfw/dn_sched.h @@ -187,7 +187,7 @@ dn_dequeue(struct dn_queue *q) q->_si->ni.len_bytes -= m->m_pkthdr.len; } if (q->ni.length == 0) /* queue is now idle */ - q->q_time = dn_cfg.curr_time; + q->q_time = V_dn_cfg.curr_time; return m; } diff --git a/sys/netpfil/ipfw/dn_sched_fq_codel.c b/sys/netpfil/ipfw/dn_sched_fq_codel.c index bc61be867d36..97341f5a9a60 100644 --- a/sys/netpfil/ipfw/dn_sched_fq_codel.c +++ b/sys/netpfil/ipfw/dn_sched_fq_codel.c @@ -165,7 +165,7 @@ codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si) fq_update_stats(q, si, -m->m_pkthdr.len, 1); if (si->main_q.ni.length == 0) /* queue is now idle */ - si->main_q.q_time = dn_cfg.curr_time; + si->main_q.q_time = V_dn_cfg.curr_time; FREE_PKT(m); } diff --git a/sys/netpfil/ipfw/dn_sched_fq_codel.h b/sys/netpfil/ipfw/dn_sched_fq_codel.h index a8369ac83129..dcdbc6f32e7a 100644 --- a/sys/netpfil/ipfw/dn_sched_fq_codel.h +++ b/sys/netpfil/ipfw/dn_sched_fq_codel.h @@ -36,6 +36,9 @@ #ifndef _IP_DN_SCHED_FQ_CODEL_H #define _IP_DN_SCHED_FQ_CODEL_H +VNET_DECLARE(unsigned long, io_pkt_drop); +#define V_io_pkt_drop VNET(io_pkt_drop) + /* list of queues */ STAILQ_HEAD(fq_codel_list, fq_codel_flow) ; @@ -104,7 +107,7 @@ fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len, si->main_q.ni.drops ++; q->stats.drops ++; si->_si.ni.drops ++; - dn_cfg.io_pkt_drop ++; + V_dn_cfg.io_pkt_drop ++; } if (!drop || (drop && len < 0)) { @@ -147,7 +150,7 @@ fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_cod fq_update_stats(q, si, -m->m_pkthdr.len, 0); if (si->main_q.ni.length == 0) /* queue is now idle */ - si->main_q.q_time = dn_cfg.curr_time; + si->main_q.q_time = V_dn_cfg.curr_time; /* extract packet timestamp*/ struct m_tag *mtag; diff --git a/sys/netpfil/ipfw/dn_sched_fq_pie.c b/sys/netpfil/ipfw/dn_sched_fq_pie.c index 809ca2b5f4e8..76215aed610a 100644 --- a/sys/netpfil/ipfw/dn_sched_fq_pie.c +++ b/sys/netpfil/ipfw/dn_sched_fq_pie.c @@ -82,6 +82,9 @@ #define DN_SCHED_FQ_PIE 7 +VNET_DECLARE(unsigned long, io_pkt_drop); +#define V_io_pkt_drop VNET(io_pkt_drop) + /* list of queues */ STAILQ_HEAD(fq_pie_list, fq_pie_flow) ; @@ -299,7 +302,7 @@ fq_update_stats(struct fq_pie_flow *q, struct fq_pie_si *si, int len, si->main_q.ni.drops ++; q->stats.drops ++; si->_si.ni.drops ++; - dn_cfg.io_pkt_drop ++; + V_dn_cfg.io_pkt_drop ++; } if (!drop || (drop && len < 0)) { @@ -347,7 +350,7 @@ fq_pie_extract_head(struct fq_pie_flow *q, aqm_time_t *pkt_ts, fq_update_stats(q, si, -m->m_pkthdr.len, 0); if (si->main_q.ni.length == 0) /* queue is now idle */ - si->main_q.q_time = dn_cfg.curr_time; + si->main_q.q_time = V_dn_cfg.curr_time; if (getts) { /* extract packet timestamp*/ @@ -768,7 +771,7 @@ pie_drop_head(struct fq_pie_flow *q, struct fq_pie_si *si) fq_update_stats(q, si, -m->m_pkthdr.len, 1); if (si->main_q.ni.length == 0) /* queue is now idle */ - si->main_q.q_time = dn_cfg.curr_time; + si->main_q.q_time = V_dn_cfg.curr_time; /* reset accu_prob after packet drop */ q->pst.accu_prob = 0; diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c index 5a39a1a47282..83f26cb23680 100644 --- a/sys/netpfil/ipfw/ip_dn_glue.c +++ b/sys/netpfil/ipfw/ip_dn_glue.c @@ -567,10 +567,10 @@ dn_compat_calc_size(void) * - all flowset queues: queue_count * - all pipe queue: si_count */ - need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2; - need += dn_cfg.fsk_count * sizeof(struct dn_flow_set); - need += dn_cfg.si_count * sizeof(struct dn_flow_queue8); - need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8); + need += V_dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2; + need += V_dn_cfg.fsk_count * sizeof(struct dn_flow_set); + need += V_dn_cfg.si_count * sizeof(struct dn_flow_queue8); + need += V_dn_cfg.queue_count * sizeof(struct dn_flow_queue8); return need; } diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index 4a65bd0ef798..39bea3eb99dd 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -74,11 +74,10 @@ __FBSDID("$FreeBSD$"); /* * We keep a private variable for the simulation time, but we could * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) - * instead of dn_cfg.curr_time + * instead of V_dn_cfg.curr_time */ - -struct dn_parms dn_cfg; -//VNET_DEFINE(struct dn_parms, _base_dn_cfg); +VNET_DEFINE(struct dn_parms, dn_cfg); +#define V_dn_cfg VNET(dn_cfg) /* * We use a heap to store entities for which we have pending timer events. @@ -102,13 +101,13 @@ sysctl_hash_size(SYSCTL_HANDLER_ARGS) { int error, value; - value = dn_cfg.hash_size; + value = V_dn_cfg.hash_size; error = sysctl_handle_int(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (value < 16 || value > 65536) return (EINVAL); - dn_cfg.hash_size = value; + V_dn_cfg.hash_size = value; return (0); } @@ -119,9 +118,9 @@ sysctl_limits(SYSCTL_HANDLER_ARGS) long value; if (arg2 != 0) - value = dn_cfg.slot_limit; + value = V_dn_cfg.slot_limit; else - value = dn_cfg.byte_limit; + value = V_dn_cfg.byte_limit; error = sysctl_handle_long(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) @@ -129,11 +128,11 @@ sysctl_limits(SYSCTL_HANDLER_ARGS) if (arg2 != 0) { if (value < 1) return (EINVAL); - dn_cfg.slot_limit = value; + V_dn_cfg.slot_limit = value; } else { if (value < 1500) return (EINVAL); - dn_cfg.byte_limit = value; + V_dn_cfg.byte_limit = value; } return (0); } @@ -151,9 +150,9 @@ static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, "Dummynet"); #endif -/* wrapper to pass dn_cfg fields to SYSCTL_* */ -//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x)) -#define DC(x) (&(dn_cfg.x)) +/* wrapper to pass V_dn_cfg fields to SYSCTL_* */ +#define DC(x) (&(VNET_NAME(dn_cfg).x)) + /* parameters */ SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size, @@ -349,7 +348,7 @@ red_drops (struct dn_queue *q, int len) * XXX check wraps... */ if (q->avg) { - u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step); + u_int t = div64((V_dn_cfg.curr_time - q->q_time), fs->lookup_step); q->avg = (t < fs->lookup_depth) ? SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; @@ -524,7 +523,7 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop) return (0); drop: - dn_cfg.io_pkt_drop++; + V_dn_cfg.io_pkt_drop++; q->ni.drops++; ni->drops++; FREE_PKT(m); @@ -553,7 +552,7 @@ transmit_event(struct mq *q, struct delay_line *dline, uint64_t now) } if (m != NULL) { dline->oid.subtype = 1; /* in heap */ - heap_insert(&dn_cfg.evheap, pkt->output_time, dline); + heap_insert(&V_dn_cfg.evheap, pkt->output_time, dline); } } @@ -616,7 +615,7 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) (m->m_pkthdr.len * 8 + extra_bits(m, s)); si->credit -= len_scaled; /* Move packet in the delay line */ - dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ; + dn_tag_get(m)->output_time = V_dn_cfg.curr_time + s->link.delay ; mq_append(&si->dline.mq, m); } @@ -634,7 +633,7 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) if (m) dn_tag_get(m)->output_time += t; si->kflags |= DN_ACTIVE; - heap_insert(&dn_cfg.evheap, now + t, si); + heap_insert(&V_dn_cfg.evheap, now + t, si); } if (delay_line_idle && done) transmit_event(q, &si->dline, now); @@ -651,74 +650,85 @@ dummynet_task(void *context, int pending) { struct timeval t; struct mq q = { NULL, NULL }; /* queue to accumulate results */ + struct epoch_tracker et; - CURVNET_SET((struct vnet *)context); + VNET_ITERATOR_DECL(vnet_iter); + VNET_LIST_RLOCK(); + NET_EPOCH_ENTER(et); - DN_BH_WLOCK(); + VNET_FOREACH(vnet_iter) { + memset(&q, 0, sizeof(struct mq)); + CURVNET_SET(vnet_iter); - /* Update number of lost(coalesced) ticks. */ - dn_cfg.tick_lost += pending - 1; + DN_BH_WLOCK(); - getmicrouptime(&t); - /* Last tick duration (usec). */ - dn_cfg.tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 + - (t.tv_usec - dn_cfg.prev_t.tv_usec); - /* Last tick vs standard tick difference (usec). */ - dn_cfg.tick_delta = (dn_cfg.tick_last * hz - 1000000) / hz; - /* Accumulated tick difference (usec). */ - dn_cfg.tick_delta_sum += dn_cfg.tick_delta; + /* Update number of lost(coalesced) ticks. */ + V_dn_cfg.tick_lost += pending - 1; - dn_cfg.prev_t = t; + getmicrouptime(&t); + /* Last tick duration (usec). */ + V_dn_cfg.tick_last = (t.tv_sec - V_dn_cfg.prev_t.tv_sec) * 1000000 + + (t.tv_usec - V_dn_cfg.prev_t.tv_usec); + /* Last tick vs standard tick difference (usec). */ + V_dn_cfg.tick_delta = (V_dn_cfg.tick_last * hz - 1000000) / hz; + /* Accumulated tick difference (usec). */ + V_dn_cfg.tick_delta_sum += V_dn_cfg.tick_delta; - /* - * Adjust curr_time if the accumulated tick difference is - * greater than the 'standard' tick. Since curr_time should - * be monotonically increasing, we do positive adjustments - * as required, and throttle curr_time in case of negative - * adjustment. - */ - dn_cfg.curr_time++; - if (dn_cfg.tick_delta_sum - tick >= 0) { - int diff = dn_cfg.tick_delta_sum / tick; - - dn_cfg.curr_time += diff; - dn_cfg.tick_diff += diff; - dn_cfg.tick_delta_sum %= tick; - dn_cfg.tick_adjustment++; - } else if (dn_cfg.tick_delta_sum + tick <= 0) { - dn_cfg.curr_time--; - dn_cfg.tick_diff--; - dn_cfg.tick_delta_sum += tick; - dn_cfg.tick_adjustment++; - } + V_dn_cfg.prev_t = t; - /* serve pending events, accumulate in q */ - for (;;) { - struct dn_id *p; /* generic parameter to handler */ + /* + * Adjust curr_time if the accumulated tick difference is + * greater than the 'standard' tick. Since curr_time should + * be monotonically increasing, we do positive adjustments + * as required, and throttle curr_time in case of negative + * adjustment. + */ + V_dn_cfg.curr_time++; + if (V_dn_cfg.tick_delta_sum - tick >= 0) { + int diff = V_dn_cfg.tick_delta_sum / tick; + + V_dn_cfg.curr_time += diff; + V_dn_cfg.tick_diff += diff; + V_dn_cfg.tick_delta_sum %= tick; + V_dn_cfg.tick_adjustment++; + } else if (V_dn_cfg.tick_delta_sum + tick <= 0) { + V_dn_cfg.curr_time--; + V_dn_cfg.tick_diff--; + V_dn_cfg.tick_delta_sum += tick; + V_dn_cfg.tick_adjustment++; + } - if (dn_cfg.evheap.elements == 0 || - DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key)) - break; - p = HEAP_TOP(&dn_cfg.evheap)->object; - heap_extract(&dn_cfg.evheap, NULL); + /* serve pending events, accumulate in q */ + for (;;) { + struct dn_id *p; /* generic parameter to handler */ - if (p->type == DN_SCH_I) { - serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time); - } else { /* extracted a delay line */ - transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time); + if (V_dn_cfg.evheap.elements == 0 || + DN_KEY_LT(V_dn_cfg.curr_time, HEAP_TOP(&V_dn_cfg.evheap)->key)) + break; + p = HEAP_TOP(&V_dn_cfg.evheap)->object; + heap_extract(&V_dn_cfg.evheap, NULL); + if (p->type == DN_SCH_I) { + serve_sched(&q, (struct dn_sch_inst *)p, V_dn_cfg.curr_time); + } else { /* extracted a delay line */ + transmit_event(&q, (struct delay_line *)p, V_dn_cfg.curr_time); + } } + if (V_dn_cfg.expire && ++V_dn_cfg.expire_cycle >= V_dn_cfg.expire) { + V_dn_cfg.expire_cycle = 0; + dn_drain_scheduler(); + dn_drain_queue(); + } + DN_BH_WUNLOCK(); + if (q.head != NULL) + dummynet_send(q.head); + + CURVNET_RESTORE(); } - if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) { - dn_cfg.expire_cycle = 0; - dn_drain_scheduler(); - dn_drain_queue(); - } + NET_EPOCH_EXIT(et); + VNET_LIST_RUNLOCK(); + /* Schedule our next run. */ dn_reschedule(); - DN_BH_WUNLOCK(); - if (q.head != NULL) - dummynet_send(q.head); - CURVNET_RESTORE(); } /* @@ -834,7 +844,7 @@ tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa) dt->dn_dir = dir; dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL; /* dt->output tame is updated as we move through */ - dt->output_time = dn_cfg.curr_time; + dt->output_time = V_dn_cfg.curr_time; dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0; return 0; } @@ -866,12 +876,12 @@ dummynet_io(struct mbuf **m0, struct ip_fw_args *fwa) else if (fwa->flags & IPFW_ARGS_IP6) dir |= PROTO_IPV6; DN_BH_WLOCK(); - dn_cfg.io_pkt++; + V_dn_cfg.io_pkt++; /* we could actually tag outside the lock, but who cares... */ if (tag_mbuf(m, dir, fwa)) goto dropit; /* XXX locate_flowset could be optimised with a direct ref. */ - fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL); + fs = dn_ht_find(V_dn_cfg.fshash, fs_id, 0, NULL); if (fs == NULL) goto dropit; /* This queue/pipe does not exist! */ if (fs->sched == NULL) /* should not happen */ @@ -894,7 +904,7 @@ dummynet_io(struct mbuf **m0, struct ip_fw_args *fwa) m = *m0 = NULL; /* dn_enqueue already increases io_pkt_drop */ - dn_cfg.io_pkt_drop--; + V_dn_cfg.io_pkt_drop--; goto dropit; } @@ -905,34 +915,34 @@ dummynet_io(struct mbuf **m0, struct ip_fw_args *fwa) } /* compute the initial allowance */ - if (si->idle_time < dn_cfg.curr_time) { + if (si->idle_time < V_dn_cfg.curr_time) { /* Do this only on the first packet on an idle pipe */ struct dn_link *p = &fs->sched->link; - si->sched_time = dn_cfg.curr_time; - si->credit = dn_cfg.io_fast ? p->bandwidth : 0; + si->sched_time = V_dn_cfg.curr_time; + si->credit = V_dn_cfg.io_fast ? p->bandwidth : 0; if (p->burst) { - uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth; + uint64_t burst = (V_dn_cfg.curr_time - si->idle_time) * p->bandwidth; if (burst > p->burst) burst = p->burst; si->credit += burst; } } /* pass through scheduler and delay line */ - m = serve_sched(NULL, si, dn_cfg.curr_time); + m = serve_sched(NULL, si, V_dn_cfg.curr_time); /* optimization -- pass it back to ipfw for immediate send */ /* XXX Don't call dummynet_send() if scheduler return the packet * just enqueued. This avoid a lock order reversal. * */ - if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) { + if (/*V_dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) { /* fast io, rename the tag * to carry reinject info. */ struct m_tag *tag = m_tag_first(m); tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; - dn_cfg.io_pkt_fast++; + V_dn_cfg.io_pkt_fast++; if (m->m_nextpkt != NULL) { printf("dummynet: fast io: pkt chain detected!\n"); m->m_nextpkt = NULL; @@ -948,7 +958,7 @@ done: return 0; dropit: - dn_cfg.io_pkt_drop++; + V_dn_cfg.io_pkt_drop++; DN_BH_WUNLOCK(); if (m) FREE_PKT(m); diff --git a/sys/netpfil/ipfw/ip_dn_private.h b/sys/netpfil/ipfw/ip_dn_private.h index e6e699bf35b2..8dedd071bd81 100644 --- a/sys/netpfil/ipfw/ip_dn_private.h +++ b/sys/netpfil/ipfw/ip_dn_private.h @@ -46,7 +46,7 @@ #define D(fmt, ...) printf("%-10s " fmt "\n", \ __FUNCTION__, ## __VA_ARGS__) #define DX(lev, fmt, ...) do { \ - if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0) + if (V_dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0) #endif MALLOC_DECLARE(M_DUMMYNET); @@ -56,26 +56,26 @@ MALLOC_DECLARE(M_DUMMYNET); #endif #define DN_LOCK_INIT() do { \ - mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \ - mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \ + mtx_init(&V_dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \ + mtx_init(&V_dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \ } while (0) #define DN_LOCK_DESTROY() do { \ - mtx_destroy(&dn_cfg.uh_mtx); \ - mtx_destroy(&dn_cfg.bh_mtx); \ + mtx_destroy(&V_dn_cfg.uh_mtx); \ + mtx_destroy(&V_dn_cfg.bh_mtx); \ } while (0) #if 0 /* not used yet */ -#define DN_UH_RLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_UH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_UH_WLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_UH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_UH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED) +#define DN_UH_RLOCK() mtx_lock(&V_dn_cfg.uh_mtx) +#define DN_UH_RUNLOCK() mtx_unlock(&V_dn_cfg.uh_mtx) +#define DN_UH_WLOCK() mtx_lock(&V_dn_cfg.uh_mtx) +#define DN_UH_WUNLOCK() mtx_unlock(&V_dn_cfg.uh_mtx) +#define DN_UH_LOCK_ASSERT() mtx_assert(&V_dn_cfg.uh_mtx, MA_OWNED) #endif -#define DN_BH_RLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_BH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_BH_WLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_BH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_BH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED) +#define DN_BH_RLOCK() mtx_lock(&V_dn_cfg.uh_mtx) +#define DN_BH_RUNLOCK() mtx_unlock(&V_dn_cfg.uh_mtx) +#define DN_BH_WLOCK() mtx_lock(&V_dn_cfg.uh_mtx) +#define DN_BH_WUNLOCK() mtx_unlock(&V_dn_cfg.uh_mtx) +#define DN_BH_LOCK_ASSERT() mtx_assert(&V_dn_cfg.uh_mtx, MA_OWNED) SLIST_HEAD(dn_schk_head, dn_schk); SLIST_HEAD(dn_sch_inst_head, dn_sch_inst); @@ -101,7 +101,7 @@ set_oid(struct dn_id *o, int type, int len) } /* - * configuration and global data for a dummynet instance + * configuration and data for a dummynet instance * * When a configuration is modified from userland, 'id' is incremented * so we can use the value to check for stale pointers. @@ -154,10 +154,6 @@ struct dn_parms { struct dn_ht *schedhash; /* list of flowsets without a scheduler -- use sch_chain */ struct dn_fsk_head fsu; /* list of unlinked flowsets */ - struct dn_alg_head schedlist; /* list of algorithms */ -#ifdef NEW_AQM - struct dn_aqm_head aqmlist; /* list of AQMs */ -#endif /* Store the fs/sch to scan when draining. The value is the * bucket number of the hash table. Expire can be disabled @@ -406,9 +402,9 @@ enum { PROTO_IFB = 0x0c, /* layer2 + ifbridge */ }; -extern struct dn_parms dn_cfg; -//VNET_DECLARE(struct dn_parms, _base_dn_cfg); -//#define dn_cfg VNET(_base_dn_cfg) +//extern struct dn_parms V_dn_cfg; +VNET_DECLARE(struct dn_parms, dn_cfg); +#define V_dn_cfg VNET(dn_cfg) int dummynet_io(struct mbuf **, struct ip_fw_args *); void dummynet_task(void *context, int pending); diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index 17f3e364756e..3abc78fc1410 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include /* ip_output(), IP_FORWARDING */ #include #include +#include #include #include @@ -87,10 +88,17 @@ struct schk_new_arg { /*---- callout hooks. ----*/ static struct callout dn_timeout; +static int dn_tasks_started = 0; static int dn_gone; static struct task dn_task; static struct taskqueue *dn_tq = NULL; +/* global scheduler list */ +struct dn_alg_head schedlist; +#ifdef NEW_AQM +struct dn_aqm_head aqmlist; /* list of AQMs */ +#endif + static void dummynet(void *arg) { @@ -117,7 +125,7 @@ find_aqm_type(int type, char *name) { struct dn_aqm *d; - SLIST_FOREACH(d, &dn_cfg.aqmlist, next) { + SLIST_FOREACH(d, &aqmlist, next) { if (d->type == type || (name && !strcasecmp(d->name, name))) return d; } @@ -131,7 +139,7 @@ find_sched_type(int type, char *name) { struct dn_alg *d; - SLIST_FOREACH(d, &dn_cfg.schedlist, next) { + SLIST_FOREACH(d, &schedlist, next) { if (d->type == type || (name && !strcasecmp(d->name, name))) return d; } @@ -354,7 +362,7 @@ q_new(uintptr_t key, int flags, void *arg) if(fs->aqmfp->init(q)) D("unable to init AQM for fs %d", fs->fs.fs_nr); #endif - dn_cfg.queue_count++; + V_dn_cfg.queue_count++; return q; } @@ -387,7 +395,7 @@ dn_delete_queue(struct dn_queue *q, int flags) dn_free_pkts(q->mq.head); bzero(q, sizeof(*q)); // safety free(q, M_DUMMYNET); - dn_cfg.queue_count--; + V_dn_cfg.queue_count--; } } @@ -527,7 +535,7 @@ si_new(uintptr_t key, int flags, void *arg) } #endif - dn_cfg.si_count++; + V_dn_cfg.si_count++; return si; error: @@ -552,10 +560,10 @@ si_destroy(void *_si, void *arg) struct delay_line *dl = &si->dline; if (dl->oid.subtype) /* remove delay line from event heap */ - heap_extract(&dn_cfg.evheap, dl); + heap_extract(&V_dn_cfg.evheap, dl); dn_free_pkts(dl->mq.head); /* drain delay line */ if (si->kflags & DN_ACTIVE) /* remove si from event heap */ - heap_extract(&dn_cfg.evheap, si); + heap_extract(&V_dn_cfg.evheap, si); #ifdef NEW_AQM /* clean up AQM status for !DN_MULTIQUEUE sched @@ -574,7 +582,7 @@ si_destroy(void *_si, void *arg) s->fp->free_sched(si); bzero(si, sizeof(*si)); /* safety */ free(si, M_DUMMYNET); - dn_cfg.si_count--; + V_dn_cfg.si_count--; return DNHT_SCAN_DEL; } @@ -605,7 +613,7 @@ si_reset_credit(void *_si, void *arg) struct dn_sch_inst *si = _si; struct dn_link *p = &si->sched->link; - si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); + si->credit = p->burst + (V_dn_cfg.io_fast ? p->bandwidth : 0); return 0; } @@ -651,9 +659,9 @@ fsk_new(uintptr_t key, int flags, void *arg) fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); if (fs) { set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); - dn_cfg.fsk_count++; + V_dn_cfg.fsk_count++; fs->drain_bucket = 0; - SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); + SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); } return fs; } @@ -737,7 +745,7 @@ fsk_detach(struct dn_fsk *fs, int flags) (flags & DN_DETACH) ? "DET":""); if (flags & DN_DETACH) { /* detach from the list */ struct dn_fsk_head *h; - h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; + h = fs->sched ? &fs->sched->fsk_list : &V_dn_cfg.fsu; SLIST_REMOVE(h, fs, dn_fsk, sch_chain); } /* Free the RED parameters, they will be recomputed on @@ -757,9 +765,9 @@ fsk_detach(struct dn_fsk *fs, int flags) if (flags & DN_DELETE_FS) { bzero(fs, sizeof(*fs)); /* safety */ free(fs, M_DUMMYNET); - dn_cfg.fsk_count--; + V_dn_cfg.fsk_count--; } else { - SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); + SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); } } @@ -797,7 +805,7 @@ delete_fs(int i, int locked) if (!locked) DN_BH_WLOCK(); - fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); + fs = dn_ht_find(V_dn_cfg.fshash, i, DNHT_REMOVE, NULL); ND("fs %d found %p", i, fs); if (fs) { fsk_detach(fs, DN_DETACH | DN_DELETE_FS); @@ -866,7 +874,7 @@ schk_new(uintptr_t key, int flags, void *arg) } } s->fp = NULL; /* mark as a new scheduler */ - dn_cfg.schk_count++; + V_dn_cfg.schk_count++; return s; } @@ -905,7 +913,7 @@ schk_delete_cb(void *obj, void *arg) s->fp->destroy(s); bzero(s, sizeof(*s)); // safety free(obj, M_DUMMYNET); - dn_cfg.schk_count--; + V_dn_cfg.schk_count--; return DNHT_SCAN_DEL; } @@ -919,7 +927,7 @@ delete_schk(int i) { struct dn_schk *s; - s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); + s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); ND("%d %p", i, s); if (!s) return EINVAL; @@ -1176,7 +1184,7 @@ copy_data_helper(void *_o, void *_arg) static inline struct dn_schk * locate_scheduler(int i) { - return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); + return dn_ht_find(V_dn_cfg.schedhash, i, 0, NULL); } /* @@ -1194,10 +1202,10 @@ config_red(struct dn_fsk *fs) /* Doing stuff that was in userland */ i = fs->sched->link.bandwidth; s = (i <= 0) ? 0 : - hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; + hz * V_dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ - fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); + fs->lookup_step = div64(idle , V_dn_cfg.red_lookup_depth); /* fs->lookup_step not scaled, */ if (!fs->lookup_step) fs->lookup_step = 1; @@ -1227,14 +1235,14 @@ config_red(struct dn_fsk *fs) free(fs->w_q_lookup, M_DUMMYNET); fs->w_q_lookup = NULL; } - if (dn_cfg.red_lookup_depth == 0) { + if (V_dn_cfg.red_lookup_depth == 0) { printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" "must be > 0\n"); fs->fs.flags &= ~DN_IS_RED; fs->fs.flags &= ~DN_IS_GENTLE_RED; return (EINVAL); } - fs->lookup_depth = dn_cfg.red_lookup_depth; + fs->lookup_depth = V_dn_cfg.red_lookup_depth; fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), M_DUMMYNET, M_NOWAIT); if (fs->w_q_lookup == NULL) { @@ -1251,12 +1259,12 @@ config_red(struct dn_fsk *fs) fs->w_q_lookup[i] = SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); - if (dn_cfg.red_avg_pkt_size < 1) - dn_cfg.red_avg_pkt_size = 512; - fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; - if (dn_cfg.red_max_pkt_size < 1) - dn_cfg.red_max_pkt_size = 1500; - fs->max_pkt_size = dn_cfg.red_max_pkt_size; + if (V_dn_cfg.red_avg_pkt_size < 1) + V_dn_cfg.red_avg_pkt_size = 512; + fs->avg_pkt_size = V_dn_cfg.red_avg_pkt_size; + if (V_dn_cfg.red_max_pkt_size < 1) + V_dn_cfg.red_max_pkt_size = 1500; + fs->max_pkt_size = V_dn_cfg.red_max_pkt_size; ND("exit"); return 0; } @@ -1278,7 +1286,7 @@ fsk_attach(struct dn_fsk *fs, struct dn_schk *s) { ND("remove fs %d from fsunlinked, link to sched %d", fs->fs.fs_nr, s->sch.sched_nr); - SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); + SLIST_REMOVE(&V_dn_cfg.fsu, fs, dn_fsk, sch_chain); fs->sched = s; SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); if (s->fp->new_fsk) @@ -1317,7 +1325,7 @@ update_fs(struct dn_schk *s) { struct dn_fsk *fs, *tmp; - SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { + SLIST_FOREACH_SAFE(fs, &V_dn_cfg.fsu, sch_chain, tmp) { if (s->sch.sched_nr != fs->fs.sched_nr) { D("fs %d for sch %d not %d still unlinked", fs->fs.fs_nr, fs->fs.sched_nr, @@ -1362,7 +1370,7 @@ get_aqm_parms(struct sockopt *sopt) break; } - fs = dn_ht_find(dn_cfg.fshash, ep->nr, 0, NULL); + fs = dn_ht_find(V_dn_cfg.fshash, ep->nr, 0, NULL); if (!fs) { D("fs %d not found", ep->nr); err = EINVAL; @@ -1579,7 +1587,7 @@ config_link(struct dn_link *p, struct dn_id *arg) s->link.burst = p->burst; schk_reset_credit(s); } - dn_cfg.id++; + V_dn_cfg.id++; DN_BH_WUNLOCK(); return 0; } @@ -1616,15 +1624,15 @@ config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) /* XXX other sanity checks */ if (nfs->flags & DN_QSIZE_BYTES) { ipdn_bound_var(&nfs->qsize, 16384, - 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); + 1500, V_dn_cfg.byte_limit, NULL); // "queue byte size"); } else { ipdn_bound_var(&nfs->qsize, 50, - 1, dn_cfg.slot_limit, NULL); // "queue slot size"); + 1, V_dn_cfg.slot_limit, NULL); // "queue slot size"); } if (nfs->flags & DN_HAVE_MASK) { /* make sure we have some buckets */ - ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size, - 1, dn_cfg.max_hash_size, "flowset buckets"); + ipdn_bound_var((int *)&nfs->buckets, V_dn_cfg.hash_size, + 1, V_dn_cfg.max_hash_size, "flowset buckets"); } else { nfs->buckets = 1; /* we only need 1 */ } @@ -1634,8 +1642,8 @@ config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) struct dn_schk *s; int flags = nfs->sched_nr ? DNHT_INSERT : 0; int j; - int oldc = dn_cfg.fsk_count; - fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); + int oldc = V_dn_cfg.fsk_count; + fs = dn_ht_find(V_dn_cfg.fshash, i, flags, NULL); if (fs == NULL) { D("missing sched for flowset %d", i); break; @@ -1662,8 +1670,8 @@ config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) #endif break; /* no change, nothing to do */ } - if (oldc != dn_cfg.fsk_count) /* new item */ - dn_cfg.id++; + if (oldc != V_dn_cfg.fsk_count) /* new item */ + V_dn_cfg.id++; s = locate_scheduler(nfs->sched_nr); /* detach from old scheduler if needed, preserving * queues if we need to reattach. Then update the @@ -1729,8 +1737,8 @@ config_sched(struct dn_sch *_nsch, struct dn_id *arg) return EINVAL; /* make sure we have some buckets */ if (a.sch->flags & DN_HAVE_MASK) - ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size, - 1, dn_cfg.max_hash_size, "sched buckets"); + ipdn_bound_var((int *)&a.sch->buckets, V_dn_cfg.hash_size, + 1, V_dn_cfg.max_hash_size, "sched buckets"); /* XXX other sanity checks */ bzero(&p, sizeof(p)); @@ -1748,14 +1756,14 @@ again: /* run twice, for wfq and fifo */ * lookup the type. If not supplied, use the previous one * or default to WF2Q+. Otherwise, return an error. */ - dn_cfg.id++; + V_dn_cfg.id++; a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); if (a.fp != NULL) { /* found. Lookup or create entry */ - s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); + s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_INSERT, &a); } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { /* No type. search existing s* or retry with WF2Q+ */ - s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); + s = dn_ht_find(V_dn_cfg.schedhash, i, 0, &a); if (s != NULL) { a.fp = s->fp; /* Scheduler exists, skip to FIFO scheduler @@ -1827,7 +1835,7 @@ again: /* run twice, for wfq and fifo */ memcpy(pf, s->profile, sizeof(*pf)); } /* remove from the hash */ - dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); + dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); /* Detach flowsets, preserve queues. */ // schk_delete_cb(s, NULL); // XXX temporarily, kill queues @@ -1845,7 +1853,7 @@ again: /* run twice, for wfq and fifo */ * trying to reuse existing ones if available */ if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { - s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); + s->fs = dn_ht_find(V_dn_cfg.fshash, i, 0, NULL); *** 381 LINES SKIPPED ***