Date: Tue, 12 Jul 2016 17:32:40 +0000 (UTC) From: Don Lewis <truckman@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r302667 - head/sys/netpfil/ipfw Message-ID: <201607121732.u6CHWeDv030651@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: truckman Date: Tue Jul 12 17:32:40 2016 New Revision: 302667 URL: https://svnweb.freebsd.org/changeset/base/302667 Log: Fix problems in the FQ-PIE AQM cleanup code that could leak memory or cause a crash. Because dummynet calls pie_cleanup() while holding a mutex, pie_cleanup() is not able to use callout_drain() to make sure that all callouts are finished before it returns, and callout_stop() is not sufficient to make that guarantee. After pie_cleanup() returns, dummynet will free a structure that any remaining callouts will want to access. Fix these problems by allocating a separate structure to contain the data used by the callouts. In pie_cleanup(), call callout_reset_sbt() to replace the normal callout with a cleanup callout that does the cleanup work for each sub-queue. The instance of the cleanup callout that destroys the last flow will also free the extra allocated block of memory. Protect the reference count manipulation in the cleanup callout with DN_BH_WLOCK() to be consistent with all of the other usage of the reference count where this lock is held by the dummynet code. Submitted by: Rasool Al-Saadi <ralsaadi@swin.edu.au> MFC after: 3 days Differential Revision: https://reviews.freebsd.org/D7174 Modified: head/sys/netpfil/ipfw/dn_sched_fq_pie.c Modified: head/sys/netpfil/ipfw/dn_sched_fq_pie.c ============================================================================== --- head/sys/netpfil/ipfw/dn_sched_fq_pie.c Tue Jul 12 17:30:37 2016 (r302666) +++ head/sys/netpfil/ipfw/dn_sched_fq_pie.c Tue Jul 12 17:32:40 2016 (r302667) @@ -111,7 +111,7 @@ struct fq_pie_flow { int deficit; int active; /* 1: flow is active (in a list) */ struct pie_status pst; /* pie status variables */ - struct fq_pie_si *psi; /* parent scheduler instance */ + struct fq_pie_si_extra *psi_extra; STAILQ_ENTRY(fq_pie_flow) flowchain; }; @@ -120,23 +120,30 @@ struct fq_pie_schk { struct dn_sch_fq_pie_parms cfg; }; + +/* fq_pie scheduler instance extra state vars. + * The purpose of separation this structure is to preserve number of active + * sub-queues and the flows array pointer even after the scheduler instance + * is destroyed. + * Preserving these varaiables allows freeing the allocated memory by + * fqpie_callout_cleanup() independently from fq_pie_free_sched(). + */ +struct fq_pie_si_extra { + uint32_t nr_active_q; /* number of active queues */ + struct fq_pie_flow *flows; /* array of flows (queues) */ + }; + /* fq_pie scheduler instance */ struct fq_pie_si { - struct dn_sch_inst _si; /* standard scheduler instance */ + struct dn_sch_inst _si; /* standard scheduler instance. SHOULD BE FIRST */ struct dn_queue main_q; /* main queue is after si directly */ - uint32_t nr_active_q; - struct fq_pie_flow *flows; /* array of flows (queues) */ uint32_t perturbation; /* random value */ struct fq_pie_list newflows; /* list of new queues */ struct fq_pie_list oldflows; /* list of old queues */ + struct fq_pie_si_extra *si_extra; /* extra state vars*/ }; -struct mem_to_free { - void *mem_flows; - void *mem_callout; -}; -static struct mtx freemem_mtx; static struct dn_alg fq_pie_desc; /* Default FQ-PIE parameters including PIE */ @@ -371,22 +378,6 @@ fq_calculate_drop_prob(void *x) int64_t p, prob, oldprob; aqm_time_t now; - /* dealing with race condition */ - if (callout_pending(&pst->aqm_pie_callout)) { - /* callout was reset */ - mtx_unlock(&pst->lock_mtx); - return; - } - - if (!callout_active(&pst->aqm_pie_callout)) { - /* callout was stopped */ - mtx_unlock(&pst->lock_mtx); - mtx_destroy(&pst->lock_mtx); - q->psi->nr_active_q--; - return; - } - callout_deactivate(&pst->aqm_pie_callout); - now = AQM_UNOW; pprms = pst->parms; prob = pst->drop_prob; @@ -524,20 +515,17 @@ fq_deactivate_pie(struct pie_status *pst * Initialize PIE for sub-queue 'q' */ static int -pie_init(struct fq_pie_flow *q) +pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk) { struct pie_status *pst=&q->pst; struct dn_aqm_pie_parms *pprms = pst->parms; - struct fq_pie_schk *fqpie_schk; - - fqpie_schk = (struct fq_pie_schk *)(q->psi->_si.sched+1); - int err = 0; + int err = 0; if (!pprms){ D("AQM_PIE is not configured"); err = EINVAL; } else { - q->psi->nr_active_q++; + q->psi_extra->nr_active_q++; /* For speed optimization, we caculate 1/3 queue size once here */ // XXX limit divided by number of queues divided by 3 ??? @@ -553,8 +541,36 @@ pie_init(struct fq_pie_flow *q) } /* + * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si + * extra memory when number of active sub-queues reaches zero. + * 'x' is a fq_pie_flow to be destroyed + */ +static void +fqpie_callout_cleanup(void *x) +{ + struct fq_pie_flow *q = x; + struct pie_status *pst = &q->pst; + struct fq_pie_si_extra *psi_extra; + + mtx_unlock(&pst->lock_mtx); + mtx_destroy(&pst->lock_mtx); + psi_extra = q->psi_extra; + + DN_BH_WLOCK(); + psi_extra->nr_active_q--; + + /* when all sub-queues are destroyed, free flows fq_pie extra vars memory */ + if (!psi_extra->nr_active_q) { + free(psi_extra->flows, M_DUMMYNET); + free(psi_extra, M_DUMMYNET); + fq_pie_desc.ref_count--; + } + DN_BH_WUNLOCK(); +} + +/* * Clean up PIE status for sub-queue 'q' - * Stop callout timer and destroy mtx + * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout. */ static int pie_cleanup(struct fq_pie_flow *q) @@ -562,14 +578,9 @@ pie_cleanup(struct fq_pie_flow *q) struct pie_status *pst = &q->pst; mtx_lock(&pst->lock_mtx); - if (callout_stop(&pst->aqm_pie_callout) || !(pst->sflags & PIE_ACTIVE)) { - mtx_unlock(&pst->lock_mtx); - mtx_destroy(&pst->lock_mtx); - q->psi->nr_active_q--; - } else { - mtx_unlock(&pst->lock_mtx); - return EBUSY; - } + callout_reset_sbt(&pst->aqm_pie_callout, + SBT_1US, 0, fqpie_callout_cleanup, q, 0); + mtx_unlock(&pst->lock_mtx); return 0; } @@ -831,10 +842,12 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct fq_pie_schk *schk; struct dn_sch_fq_pie_parms *param; struct dn_queue *mainq; + struct fq_pie_flow *flows; int idx, drop, i, maxidx; mainq = (struct dn_queue *)(_si + 1); si = (struct fq_pie_si *)_si; + flows = si->si_extra->flows; schk = (struct fq_pie_schk *)(si->_si.sched+1); param = &schk->cfg; @@ -844,7 +857,7 @@ fq_pie_enqueue(struct dn_sch_inst *_si, /* enqueue packet into appropriate queue using PIE AQM. * Note: 'pie_enqueue' function returns 1 only when it unable to * add timestamp to packet (no limit check)*/ - drop = pie_enqueue(&si->flows[idx], m, si); + drop = pie_enqueue(&flows[idx], m, si); /* pie unable to timestamp a packet */ if (drop) @@ -853,11 +866,11 @@ fq_pie_enqueue(struct dn_sch_inst *_si, /* If the flow (sub-queue) is not active ,then add it to tail of * new flows list, initialize and activate it. */ - if (!si->flows[idx].active) { - STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain); - si->flows[idx].deficit = param->quantum; - fq_activate_pie(&si->flows[idx]); - si->flows[idx].active = 1; + if (!flows[idx].active) { + STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain); + flows[idx].deficit = param->quantum; + fq_activate_pie(&flows[idx]); + flows[idx].active = 1; } /* check the limit for all queues and remove a packet from the @@ -866,15 +879,15 @@ fq_pie_enqueue(struct dn_sch_inst *_si, if (mainq->ni.length > schk->cfg.limit) { /* find first active flow */ for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++) - if (si->flows[maxidx].active) + if (flows[maxidx].active) break; if (maxidx < schk->cfg.flows_cnt) { /* find the largest sub- queue */ for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) - if (si->flows[i].active && si->flows[i].stats.length > - si->flows[maxidx].stats.length) + if (flows[i].active && flows[i].stats.length > + flows[maxidx].stats.length) maxidx = i; - pie_drop_head(&si->flows[maxidx], si); + pie_drop_head(&flows[maxidx], si); drop = 1; } } @@ -974,12 +987,13 @@ fq_pie_new_sched(struct dn_sch_inst *_si struct fq_pie_si *si; struct dn_queue *q; struct fq_pie_schk *schk; + struct fq_pie_flow *flows; int i; si = (struct fq_pie_si *)_si; schk = (struct fq_pie_schk *)(_si->sched+1); - if(si->flows) { + if(si->si_extra) { D("si already configured!"); return 0; } @@ -990,17 +1004,27 @@ fq_pie_new_sched(struct dn_sch_inst *_si q->_si = _si; q->fs = _si->sched->fs; + /* allocate memory for scheduler instance extra vars */ + si->si_extra = malloc(sizeof(struct fq_pie_si_extra), + M_DUMMYNET, M_NOWAIT | M_ZERO); + if (si->si_extra == NULL) { + D("cannot allocate memory for fq_pie si extra vars"); + return ENOMEM ; + } /* allocate memory for flows array */ - si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow), + si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow), M_DUMMYNET, M_NOWAIT | M_ZERO); - if (si->flows == NULL) { - D("cannot allocate memory for fq_pie configuration parameters"); + flows = si->si_extra->flows; + if (flows == NULL) { + free(si->si_extra, M_DUMMYNET); + si->si_extra = NULL; + D("cannot allocate memory for fq_pie flows"); return ENOMEM ; } /* init perturbation for this si */ si->perturbation = random(); - si->nr_active_q = 0; + si->si_extra->nr_active_q = 0; /* init the old and new flows lists */ STAILQ_INIT(&si->newflows); @@ -1008,45 +1032,16 @@ fq_pie_new_sched(struct dn_sch_inst *_si /* init the flows (sub-queues) */ for (i = 0; i < schk->cfg.flows_cnt; i++) { - si->flows[i].pst.parms = &schk->cfg.pcfg; - si->flows[i].psi = si; - pie_init(&si->flows[i]); - } - - /* init mtx lock and callout function for free memory */ - if (!fq_pie_desc.ref_count) { - mtx_init(&freemem_mtx, "mtx_pie", NULL, MTX_DEF); + flows[i].pst.parms = &schk->cfg.pcfg; + flows[i].psi_extra = si->si_extra; + pie_init(&flows[i], schk); } - mtx_lock(&freemem_mtx); fq_pie_desc.ref_count++; - mtx_unlock(&freemem_mtx); return 0; } -/* - * Free FQ-PIE flows memory callout function. - * This function is scheduled when a flow or more still active and - * the scheduer is about to be destroyed, to prevent memory leak. - */ -static void -free_flows(void *_mem) -{ - struct mem_to_free *mem = _mem; - - free(mem->mem_flows, M_DUMMYNET); - free(mem->mem_callout, M_DUMMYNET); - free(_mem, M_DUMMYNET); - - fq_pie_desc.ref_count--; - if (!fq_pie_desc.ref_count) { - mtx_unlock(&freemem_mtx); - mtx_destroy(&freemem_mtx); - } else - mtx_unlock(&freemem_mtx); - //D("mem freed ok!"); -} /* * Free fq_pie scheduler instance. @@ -1056,61 +1051,17 @@ fq_pie_free_sched(struct dn_sch_inst *_s { struct fq_pie_si *si; struct fq_pie_schk *schk; + struct fq_pie_flow *flows; int i; si = (struct fq_pie_si *)_si; schk = (struct fq_pie_schk *)(_si->sched+1); - + flows = si->si_extra->flows; for (i = 0; i < schk->cfg.flows_cnt; i++) { - pie_cleanup(&si->flows[i]); - } - - /* if there are still some queues have a callout going to start, - * we cannot free flows memory. If we do so, a panic can happen - * as prob calculate callout function uses flows memory. - */ - if (!si->nr_active_q) { - /* free the flows array */ - free(si->flows , M_DUMMYNET); - si->flows = NULL; - mtx_lock(&freemem_mtx); - fq_pie_desc.ref_count--; - if (!fq_pie_desc.ref_count) { - mtx_unlock(&freemem_mtx); - mtx_destroy(&freemem_mtx); - } else - mtx_unlock(&freemem_mtx); - //D("ok!"); - return 0; - } else { - /* memory leak happens here. So, we register a callout function to free - * flows memory later. - */ - D("unable to stop all fq_pie sub-queues!"); - mtx_lock(&freemem_mtx); - - struct callout *mem_callout; - struct mem_to_free *mem; - - mem = malloc(sizeof(*mem), M_DUMMYNET, - M_NOWAIT | M_ZERO); - mem_callout = malloc(sizeof(*mem_callout), M_DUMMYNET, - M_NOWAIT | M_ZERO); - - callout_init_mtx(mem_callout, &freemem_mtx, - CALLOUT_RETURNUNLOCKED); - - mem->mem_flows = si->flows; - mem->mem_callout = mem_callout; - callout_reset_sbt(mem_callout, - (uint64_t)(si->flows[0].pst.parms->tupdate + 1000) * SBT_1US, - 0, free_flows, mem, 0); - - si->flows = NULL; - mtx_unlock(&freemem_mtx); - - return EBUSY; + pie_cleanup(&flows[i]); } + si->si_extra = NULL; + return 0; } /*
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607121732.u6CHWeDv030651>