Date: Mon, 30 Jul 2012 16:17:21 +0200 From: Davide Italiano <davide@freebsd.org> To: attilio@freebsd.org Cc: svn-src-projects@freebsd.org, src-committers@freebsd.org Subject: Re: svn commit: r238907 - projects/calloutng/sys/kern Message-ID: <CACYV=-HmOwZ=E8Pw3-mUw0994SbvZaA3eMfcwM0fDTu_zykBJg@mail.gmail.com> In-Reply-To: <CAJ-FndBJNNBNDUEDsDBUvwoVExZpnXmoJmpY58gE3QQbw3hRGA@mail.gmail.com> References: <201207301350.q6UDobCI099069@svn.freebsd.org> <CAJ-FndBJNNBNDUEDsDBUvwoVExZpnXmoJmpY58gE3QQbw3hRGA@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On Mon, Jul 30, 2012 at 4:02 PM, Attilio Rao <attilio@freebsd.org> wrote: > On 7/30/12, Davide Italiano <davide@freebsd.org> wrote: >> Author: davide >> Date: Mon Jul 30 13:50:37 2012 >> New Revision: 238907 >> URL: http://svn.freebsd.org/changeset/base/238907 >> >> Log: >> - Fix a LOR deadlock dropping the callout lock while executing the >> handler >> directly from hardware interrupt context. >> >> - Add migration support for callouts which runs from hw interrupt >> context. >> >> - Refactor a couple of comments to reflect the new world order. >> >> TODO: implement proper stats for direct callouts. >> >> Just a note: I'd like to thank flo@ for his invaluable help in testing >> and >> issues, mav@ that helped me in tackling the problem and Yahoo! which >> provided access to the machines on which tests were run. >> >> Reported by: avg, flo [1] >> Discused with: mav >> >> Modified: >> projects/calloutng/sys/kern/kern_timeout.c >> >> Modified: projects/calloutng/sys/kern/kern_timeout.c >> ============================================================================== >> --- projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 12:25:20 >> 2012 (r238906) >> +++ projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 13:50:37 >> 2012 (r238907) >> @@ -91,45 +91,62 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpca >> int callwheelsize, callwheelmask; >> >> /* >> - * The callout cpu migration entity represents informations necessary for >> - * describing the migrating callout to the new callout cpu. >> + * The callout cpu exec entities represent informations necessary for >> + * describing the state of callouts currently running on the CPU and the >> ones >> + * necessary for migrating callouts to the new callout cpu. In particular, >> + * the first entry of the array cc_exec_entity holds informations for >> callout >> + * running in SWI thread context, while the second one holds informations >> + * for callout running directly from hardware interrupt context. >> * The cached informations are very important for deferring migration when >> * the migrating callout is already running. >> */ >> -struct cc_mig_ent { >> +struct cc_exec { >> + struct callout *cc_next; >> + struct callout *cc_curr; >> #ifdef SMP >> void (*ce_migration_func)(void *); >> void *ce_migration_arg; >> int ce_migration_cpu; >> struct bintime ce_migration_time; >> #endif >> + int cc_cancel; >> + int cc_waiting; >> }; >> >> /* >> - * There is one struct callout_cpu per cpu, holding all relevant >> + * There is one struct callou_cpu per cpu, holding all relevant >> * state for the callout processing thread on the individual CPU. >> */ >> struct callout_cpu { >> - struct cc_mig_ent cc_migrating_entity; >> + struct cc_exec cc_exec_entity[2]; >> struct mtx cc_lock; >> struct callout *cc_callout; >> struct callout_tailq *cc_callwheel; >> struct callout_tailq cc_expireq; >> struct callout_list cc_callfree; >> - struct callout *cc_next; >> - struct callout *cc_curr; >> struct bintime cc_firstevent; >> struct bintime cc_lastscan; >> void *cc_cookie; >> - int cc_cancel; >> - int cc_waiting; >> }; >> >> +#define cc_exec_curr cc_exec_entity[0].cc_curr >> +#define cc_exec_next cc_exec_entity[0].cc_next >> +#define cc_exec_cancel cc_exec_entity[0].cc_cancel >> +#define cc_exec_waiting cc_exec_entity[0].cc_waiting >> +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr >> +#define cc_exec_next_dir cc_exec_entity[1].cc_next >> +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel >> +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting >> + >> #ifdef SMP >> -#define cc_migration_func cc_migrating_entity.ce_migration_func >> -#define cc_migration_arg cc_migrating_entity.ce_migration_arg >> -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu >> -#define cc_migration_time cc_migrating_entity.ce_migration_time >> +#define cc_migration_func cc_exec_entity[0].ce_migration_func >> +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg >> +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu >> +#define cc_migration_time cc_exec_entity[0].ce_migration_time >> +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func >> +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg >> +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu >> +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time >> >> struct callout_cpu cc_cpu[MAXCPU]; >> #define CPUBLOCK MAXCPU >> @@ -156,6 +173,9 @@ struct callout_cpu cc_cpu; >> >> static int timeout_cpu; >> void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL; >> +static struct callout * >> +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, >> + int *lockcalls, int *gcalls, int direct); >> >> static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); >> >> @@ -180,15 +200,18 @@ static MALLOC_DEFINE(M_CALLOUT, "callout >> * Resets the migration entity tied to a specific callout cpu. >> */ >> static void >> -cc_cme_cleanup(struct callout_cpu *cc) >> +cc_cme_cleanup(struct callout_cpu *cc, int direct) >> { >> - >> + >> + cc->cc_exec_entity[direct].cc_curr = NULL; >> + cc->cc_exec_entity[direct].cc_next = NULL; >> + cc->cc_exec_entity[direct].cc_cancel = 0; >> + cc->cc_exec_entity[direct].cc_waiting = 0; >> #ifdef SMP >> - cc->cc_migration_cpu = CPUBLOCK; >> - cc->cc_migration_time.sec = 0; >> - cc->cc_migration_time.frac = 0; >> - cc->cc_migration_func = NULL; >> - cc->cc_migration_arg = NULL; >> + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; >> + bintime_clear(&cc->cc_exec_entity[direct].ce_migration_time); >> + cc->cc_exec_entity[direct].ce_migration_func = NULL; >> + cc->cc_exec_entity[direct].ce_migration_arg = NULL; >> #endif >> } >> >> @@ -196,11 +219,12 @@ cc_cme_cleanup(struct callout_cpu *cc) >> * Checks if migration is requested by a specific callout cpu. >> */ >> static int >> -cc_cme_migrating(struct callout_cpu *cc) >> +cc_cme_migrating(struct callout_cpu *cc, int direct) >> { >> >> #ifdef SMP >> - return (cc->cc_migration_cpu != CPUBLOCK); >> + >> + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); >> #else >> return (0); >> #endif >> @@ -246,7 +270,8 @@ callout_cpu_init(struct callout_cpu *cc) >> TAILQ_INIT(&cc->cc_callwheel[i]); >> } >> TAILQ_INIT(&cc->cc_expireq); >> - cc_cme_cleanup(cc); >> + for (i = 0; i < 2; i++) >> + cc_cme_cleanup(cc, i); >> if (cc->cc_callout == NULL) >> return; >> for (i = 0; i < ncallout; i++) { >> @@ -355,7 +380,8 @@ callout_process(struct bintime *now) >> struct callout *tmp; >> struct callout_cpu *cc; >> struct callout_tailq *sc; >> - int cpu, first, future, last, need_softclock; >> + int cpu, first, future, gcalls, mpcalls, last, lockcalls, >> + need_softclock; >> >> /* >> * Process callouts at a very low cpu priority, so we don't keep the >> @@ -376,7 +402,8 @@ callout_process(struct bintime *now) >> first &= callwheelmask; >> for (;;) { >> sc = &cc->cc_callwheel[first]; >> - TAILQ_FOREACH(tmp, sc, c_links.tqe) { >> + tmp = TAILQ_FIRST(sc); >> + while (tmp != NULL) { >> next = tmp->c_time; >> bintime_sub(&next, &tmp->c_precision); >> if (bintime_cmp(&next, now, <=)) { >> @@ -385,17 +412,20 @@ callout_process(struct bintime *now) >> * directly from hardware interrupt context. >> */ >> if (tmp->c_flags & CALLOUT_DIRECT) { >> - tmp->c_func(tmp->c_arg); >> TAILQ_REMOVE(sc, tmp, c_links.tqe); >> - tmp->c_flags &= ~CALLOUT_PENDING; >> + tmp = softclock_call_cc(tmp, cc, >> + &mpcalls, &lockcalls, &gcalls, 1); >> } else { >> TAILQ_INSERT_TAIL(&cc->cc_expireq, >> tmp, c_staiter); >> TAILQ_REMOVE(sc, tmp, c_links.tqe); >> tmp->c_flags |= CALLOUT_PROCESSED; >> need_softclock = 1; >> + tmp = TAILQ_NEXT(tmp, c_links.tqe); >> } >> } >> + else >> + tmp = TAILQ_NEXT(tmp, c_links.tqe); >> } >> if (first == last) >> break; >> @@ -561,11 +591,12 @@ callout_cc_add(struct callout *c, struct >> } >> >> static void >> -callout_cc_del(struct callout *c, struct callout_cpu *cc) >> +callout_cc_del(struct callout *c, struct callout_cpu *cc, int direct) >> { >> - >> - if (cc->cc_next == c) >> - cc->cc_next = TAILQ_NEXT(c, c_staiter); >> + if (direct && cc->cc_exec_next_dir == c) >> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); >> + else if (!direct && cc->cc_exec_next == c) >> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); >> if (c->c_flags & CALLOUT_LOCAL_ALLOC) { >> c->c_func = NULL; >> SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); >> @@ -574,13 +605,13 @@ callout_cc_del(struct callout *c, struct >> >> static struct callout * >> softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, >> - int *lockcalls, int *gcalls) >> + int *lockcalls, int *gcalls, int direct) >> { >> void (*c_func)(void *); >> void *c_arg; >> struct lock_class *class; >> struct lock_object *c_lock; >> - int c_flags, sharedlock; >> + int c_flags, flags, sharedlock; >> #ifdef SMP >> struct callout_cpu *new_cc; >> void (*new_func)(void *); >> @@ -595,7 +626,14 @@ softclock_call_cc(struct callout *c, str >> static timeout_t *lastfunc; >> #endif >> >> - cc->cc_next = TAILQ_NEXT(c, c_staiter); >> + if (direct) >> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); >> + else { >> + if ((c->c_flags & CALLOUT_PROCESSED) == 0) >> + cc->cc_exec_next = TAILQ_NEXT(c, c_links.tqe); >> + else >> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); >> + } >> class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; >> sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; >> c_lock = c->c_lock; >> @@ -606,8 +644,8 @@ softclock_call_cc(struct callout *c, str >> c->c_flags = CALLOUT_LOCAL_ALLOC; >> else >> c->c_flags &= ~CALLOUT_PENDING; >> - cc->cc_curr = c; >> - cc->cc_cancel = 0; >> + cc->cc_exec_entity[direct].cc_curr = c; >> + cc->cc_exec_entity[direct].cc_cancel = 0; >> CC_UNLOCK(cc); >> if (c_lock != NULL) { >> class->lc_lock(c_lock, sharedlock); >> @@ -615,13 +653,12 @@ softclock_call_cc(struct callout *c, str >> * The callout may have been cancelled >> * while we switched locks. >> */ >> - if (cc->cc_cancel) { >> + if (cc->cc_exec_entity[direct].cc_cancel) { >> class->lc_unlock(c_lock); >> goto skip; >> } >> /* The callout cannot be stopped now. */ >> - cc->cc_cancel = 1; >> - >> + cc->cc_exec_entity[direct].cc_cancel = 1; >> if (c_lock == &Giant.lock_object) { >> (*gcalls)++; >> CTR3(KTR_CALLOUT, "callout %p func %p arg %p", >> @@ -639,11 +676,13 @@ softclock_call_cc(struct callout *c, str >> #ifdef DIAGNOSTIC >> binuptime(&bt1); >> #endif >> - THREAD_NO_SLEEPING(); >> + if (!direct) >> + THREAD_NO_SLEEPING(); >> SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); >> c_func(c_arg); >> SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); >> - THREAD_SLEEPING_OK(); >> + if (!direct) >> + THREAD_SLEEPING_OK(); > > I didn't look into the full patch, however I have a question about > this: was it necessary because now this is running in interrupt > context so it can catch the nested sleeping check case? Or there is > something else? > I think it would be a good thing to keep up THREAD_NO_SLEEPING() also > in the direct dispatch case. > > Attilio > > > -- > Peace can only be achieved by understanding - A. Einstein Thanks for the comment, Attilio. Yes, it's exactly what you thought. If direct flag is equal to one you're sure you're processing a callout which runs directly from hardware interrupt context. In this case, the running thread cannot sleep and it's likely you have TDP_NOSLEEPING flags set, failing the KASSERT() in THREAD_NO_SLEEPING() and leading to panic if kernel is compiled with INVARIANTS. In case you're running from SWI context (direct equals to zero) code remains the same as before. I think what I'm doing works due the assumption thread running never sleeps. Do you suggest some other way to handle this? Davide
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CACYV=-HmOwZ=E8Pw3-mUw0994SbvZaA3eMfcwM0fDTu_zykBJg>