Date: Mon, 30 Jul 2012 15:02:05 +0100 From: Attilio Rao <attilio@freebsd.org> To: Davide Italiano <davide@freebsd.org> Cc: svn-src-projects@freebsd.org, src-committers@freebsd.org Subject: Re: svn commit: r238907 - projects/calloutng/sys/kern Message-ID: <CAJ-FndBJNNBNDUEDsDBUvwoVExZpnXmoJmpY58gE3QQbw3hRGA@mail.gmail.com> In-Reply-To: <201207301350.q6UDobCI099069@svn.freebsd.org> References: <201207301350.q6UDobCI099069@svn.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On 7/30/12, Davide Italiano <davide@freebsd.org> wrote: > Author: davide > Date: Mon Jul 30 13:50:37 2012 > New Revision: 238907 > URL: http://svn.freebsd.org/changeset/base/238907 > > Log: > - Fix a LOR deadlock dropping the callout lock while executing the > handler > directly from hardware interrupt context. > > - Add migration support for callouts which runs from hw interrupt > context. > > - Refactor a couple of comments to reflect the new world order. > > TODO: implement proper stats for direct callouts. > > Just a note: I'd like to thank flo@ for his invaluable help in testing > and > issues, mav@ that helped me in tackling the problem and Yahoo! which > provided access to the machines on which tests were run. > > Reported by: avg, flo [1] > Discused with: mav > > Modified: > projects/calloutng/sys/kern/kern_timeout.c > > Modified: projects/calloutng/sys/kern/kern_timeout.c > ============================================================================== > --- projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 12:25:20 > 2012 (r238906) > +++ projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 13:50:37 > 2012 (r238907) > @@ -91,45 +91,62 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpca > int callwheelsize, callwheelmask; > > /* > - * The callout cpu migration entity represents informations necessary for > - * describing the migrating callout to the new callout cpu. > + * The callout cpu exec entities represent informations necessary for > + * describing the state of callouts currently running on the CPU and the > ones > + * necessary for migrating callouts to the new callout cpu. In particular, > + * the first entry of the array cc_exec_entity holds informations for > callout > + * running in SWI thread context, while the second one holds informations > + * for callout running directly from hardware interrupt context. > * The cached informations are very important for deferring migration when > * the migrating callout is already running. > */ > -struct cc_mig_ent { > +struct cc_exec { > + struct callout *cc_next; > + struct callout *cc_curr; > #ifdef SMP > void (*ce_migration_func)(void *); > void *ce_migration_arg; > int ce_migration_cpu; > struct bintime ce_migration_time; > #endif > + int cc_cancel; > + int cc_waiting; > }; > > /* > - * There is one struct callout_cpu per cpu, holding all relevant > + * There is one struct callou_cpu per cpu, holding all relevant > * state for the callout processing thread on the individual CPU. > */ > struct callout_cpu { > - struct cc_mig_ent cc_migrating_entity; > + struct cc_exec cc_exec_entity[2]; > struct mtx cc_lock; > struct callout *cc_callout; > struct callout_tailq *cc_callwheel; > struct callout_tailq cc_expireq; > struct callout_list cc_callfree; > - struct callout *cc_next; > - struct callout *cc_curr; > struct bintime cc_firstevent; > struct bintime cc_lastscan; > void *cc_cookie; > - int cc_cancel; > - int cc_waiting; > }; > > +#define cc_exec_curr cc_exec_entity[0].cc_curr > +#define cc_exec_next cc_exec_entity[0].cc_next > +#define cc_exec_cancel cc_exec_entity[0].cc_cancel > +#define cc_exec_waiting cc_exec_entity[0].cc_waiting > +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr > +#define cc_exec_next_dir cc_exec_entity[1].cc_next > +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel > +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting > + > #ifdef SMP > -#define cc_migration_func cc_migrating_entity.ce_migration_func > -#define cc_migration_arg cc_migrating_entity.ce_migration_arg > -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu > -#define cc_migration_time cc_migrating_entity.ce_migration_time > +#define cc_migration_func cc_exec_entity[0].ce_migration_func > +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg > +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu > +#define cc_migration_time cc_exec_entity[0].ce_migration_time > +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func > +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg > +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu > +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time > > struct callout_cpu cc_cpu[MAXCPU]; > #define CPUBLOCK MAXCPU > @@ -156,6 +173,9 @@ struct callout_cpu cc_cpu; > > static int timeout_cpu; > void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL; > +static struct callout * > +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, > + int *lockcalls, int *gcalls, int direct); > > static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); > > @@ -180,15 +200,18 @@ static MALLOC_DEFINE(M_CALLOUT, "callout > * Resets the migration entity tied to a specific callout cpu. > */ > static void > -cc_cme_cleanup(struct callout_cpu *cc) > +cc_cme_cleanup(struct callout_cpu *cc, int direct) > { > - > + > + cc->cc_exec_entity[direct].cc_curr = NULL; > + cc->cc_exec_entity[direct].cc_next = NULL; > + cc->cc_exec_entity[direct].cc_cancel = 0; > + cc->cc_exec_entity[direct].cc_waiting = 0; > #ifdef SMP > - cc->cc_migration_cpu = CPUBLOCK; > - cc->cc_migration_time.sec = 0; > - cc->cc_migration_time.frac = 0; > - cc->cc_migration_func = NULL; > - cc->cc_migration_arg = NULL; > + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; > + bintime_clear(&cc->cc_exec_entity[direct].ce_migration_time); > + cc->cc_exec_entity[direct].ce_migration_func = NULL; > + cc->cc_exec_entity[direct].ce_migration_arg = NULL; > #endif > } > > @@ -196,11 +219,12 @@ cc_cme_cleanup(struct callout_cpu *cc) > * Checks if migration is requested by a specific callout cpu. > */ > static int > -cc_cme_migrating(struct callout_cpu *cc) > +cc_cme_migrating(struct callout_cpu *cc, int direct) > { > > #ifdef SMP > - return (cc->cc_migration_cpu != CPUBLOCK); > + > + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); > #else > return (0); > #endif > @@ -246,7 +270,8 @@ callout_cpu_init(struct callout_cpu *cc) > TAILQ_INIT(&cc->cc_callwheel[i]); > } > TAILQ_INIT(&cc->cc_expireq); > - cc_cme_cleanup(cc); > + for (i = 0; i < 2; i++) > + cc_cme_cleanup(cc, i); > if (cc->cc_callout == NULL) > return; > for (i = 0; i < ncallout; i++) { > @@ -355,7 +380,8 @@ callout_process(struct bintime *now) > struct callout *tmp; > struct callout_cpu *cc; > struct callout_tailq *sc; > - int cpu, first, future, last, need_softclock; > + int cpu, first, future, gcalls, mpcalls, last, lockcalls, > + need_softclock; > > /* > * Process callouts at a very low cpu priority, so we don't keep the > @@ -376,7 +402,8 @@ callout_process(struct bintime *now) > first &= callwheelmask; > for (;;) { > sc = &cc->cc_callwheel[first]; > - TAILQ_FOREACH(tmp, sc, c_links.tqe) { > + tmp = TAILQ_FIRST(sc); > + while (tmp != NULL) { > next = tmp->c_time; > bintime_sub(&next, &tmp->c_precision); > if (bintime_cmp(&next, now, <=)) { > @@ -385,17 +412,20 @@ callout_process(struct bintime *now) > * directly from hardware interrupt context. > */ > if (tmp->c_flags & CALLOUT_DIRECT) { > - tmp->c_func(tmp->c_arg); > TAILQ_REMOVE(sc, tmp, c_links.tqe); > - tmp->c_flags &= ~CALLOUT_PENDING; > + tmp = softclock_call_cc(tmp, cc, > + &mpcalls, &lockcalls, &gcalls, 1); > } else { > TAILQ_INSERT_TAIL(&cc->cc_expireq, > tmp, c_staiter); > TAILQ_REMOVE(sc, tmp, c_links.tqe); > tmp->c_flags |= CALLOUT_PROCESSED; > need_softclock = 1; > + tmp = TAILQ_NEXT(tmp, c_links.tqe); > } > } > + else > + tmp = TAILQ_NEXT(tmp, c_links.tqe); > } > if (first == last) > break; > @@ -561,11 +591,12 @@ callout_cc_add(struct callout *c, struct > } > > static void > -callout_cc_del(struct callout *c, struct callout_cpu *cc) > +callout_cc_del(struct callout *c, struct callout_cpu *cc, int direct) > { > - > - if (cc->cc_next == c) > - cc->cc_next = TAILQ_NEXT(c, c_staiter); > + if (direct && cc->cc_exec_next_dir == c) > + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); > + else if (!direct && cc->cc_exec_next == c) > + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); > if (c->c_flags & CALLOUT_LOCAL_ALLOC) { > c->c_func = NULL; > SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); > @@ -574,13 +605,13 @@ callout_cc_del(struct callout *c, struct > > static struct callout * > softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, > - int *lockcalls, int *gcalls) > + int *lockcalls, int *gcalls, int direct) > { > void (*c_func)(void *); > void *c_arg; > struct lock_class *class; > struct lock_object *c_lock; > - int c_flags, sharedlock; > + int c_flags, flags, sharedlock; > #ifdef SMP > struct callout_cpu *new_cc; > void (*new_func)(void *); > @@ -595,7 +626,14 @@ softclock_call_cc(struct callout *c, str > static timeout_t *lastfunc; > #endif > > - cc->cc_next = TAILQ_NEXT(c, c_staiter); > + if (direct) > + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); > + else { > + if ((c->c_flags & CALLOUT_PROCESSED) == 0) > + cc->cc_exec_next = TAILQ_NEXT(c, c_links.tqe); > + else > + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); > + } > class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; > sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; > c_lock = c->c_lock; > @@ -606,8 +644,8 @@ softclock_call_cc(struct callout *c, str > c->c_flags = CALLOUT_LOCAL_ALLOC; > else > c->c_flags &= ~CALLOUT_PENDING; > - cc->cc_curr = c; > - cc->cc_cancel = 0; > + cc->cc_exec_entity[direct].cc_curr = c; > + cc->cc_exec_entity[direct].cc_cancel = 0; > CC_UNLOCK(cc); > if (c_lock != NULL) { > class->lc_lock(c_lock, sharedlock); > @@ -615,13 +653,12 @@ softclock_call_cc(struct callout *c, str > * The callout may have been cancelled > * while we switched locks. > */ > - if (cc->cc_cancel) { > + if (cc->cc_exec_entity[direct].cc_cancel) { > class->lc_unlock(c_lock); > goto skip; > } > /* The callout cannot be stopped now. */ > - cc->cc_cancel = 1; > - > + cc->cc_exec_entity[direct].cc_cancel = 1; > if (c_lock == &Giant.lock_object) { > (*gcalls)++; > CTR3(KTR_CALLOUT, "callout %p func %p arg %p", > @@ -639,11 +676,13 @@ softclock_call_cc(struct callout *c, str > #ifdef DIAGNOSTIC > binuptime(&bt1); > #endif > - THREAD_NO_SLEEPING(); > + if (!direct) > + THREAD_NO_SLEEPING(); > SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); > c_func(c_arg); > SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); > - THREAD_SLEEPING_OK(); > + if (!direct) > + THREAD_SLEEPING_OK(); I didn't look into the full patch, however I have a question about this: was it necessary because now this is running in interrupt context so it can catch the nested sleeping check case? Or there is something else? I think it would be a good thing to keep up THREAD_NO_SLEEPING() also in the direct dispatch case. Attilio -- Peace can only be achieved by understanding - A. Einstein
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-FndBJNNBNDUEDsDBUvwoVExZpnXmoJmpY58gE3QQbw3hRGA>