From owner-svn-src-all@FreeBSD.ORG Mon Feb 9 19:19:45 2015 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id D60A6B80; Mon, 9 Feb 2015 19:19:45 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id C0BD4FC0; Mon, 9 Feb 2015 19:19:45 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t19JJjIc061544; Mon, 9 Feb 2015 19:19:45 GMT (envelope-from rrs@FreeBSD.org) Received: (from rrs@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t19JJjbg061542; Mon, 9 Feb 2015 19:19:45 GMT (envelope-from rrs@FreeBSD.org) Message-Id: <201502091919.t19JJjbg061542@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: rrs set sender to rrs@FreeBSD.org using -f From: Randall Stewart Date: Mon, 9 Feb 2015 19:19:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r278469 - in head/sys: kern sys X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Feb 2015 19:19:45 -0000 Author: rrs Date: Mon Feb 9 19:19:44 2015 New Revision: 278469 URL: https://svnweb.freebsd.org/changeset/base/278469 Log: This fixes two conditions that can incur when migration is being done in the callout code and harmonizes the macro use.: 1) The callout_active() will lie. Basically if a migration is occuring and the callout is about to expire and the migration has been deferred, the callout_active will no longer return true until after the migration. This confuses and breaks callers that are doing callout_init(&c, 1); such as TCP. 2) The migration code had a bug in it where when migrating, if a two calls to callout_reset came in and they both collided with the callout on the wheel about to run, then the second call to callout_reset would corrupt the list the callout wheel uses putting the callout thread into a endless loop. 3) Per imp, I have fixed all the macro occurance in the code that were for the most part being ignored. Phabricator D1711 and looked at by lstewart and jhb and sbruno. Reviewed by: kostikbel, imp, adrian, hselasky MFC after: 3 days Sponsored by: Netflix Inc. Modified: head/sys/kern/kern_timeout.c head/sys/sys/callout.h Modified: head/sys/kern/kern_timeout.c ============================================================================== --- head/sys/kern/kern_timeout.c Mon Feb 9 19:19:13 2015 (r278468) +++ head/sys/kern/kern_timeout.c Mon Feb 9 19:19:44 2015 (r278469) @@ -166,26 +166,16 @@ struct callout_cpu { char cc_ktr_event_name[20]; }; -#define cc_exec_curr cc_exec_entity[0].cc_curr -#define cc_exec_next cc_exec_entity[0].cc_next -#define cc_exec_cancel cc_exec_entity[0].cc_cancel -#define cc_exec_waiting cc_exec_entity[0].cc_waiting -#define cc_exec_curr_dir cc_exec_entity[1].cc_curr -#define cc_exec_next_dir cc_exec_entity[1].cc_next -#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel -#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting - +#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr +#define cc_exec_next(cc, dir) cc->cc_exec_entity[dir].cc_next +#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel +#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP -#define cc_migration_func cc_exec_entity[0].ce_migration_func -#define cc_migration_arg cc_exec_entity[0].ce_migration_arg -#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu -#define cc_migration_time cc_exec_entity[0].ce_migration_time -#define cc_migration_prec cc_exec_entity[0].ce_migration_prec -#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func -#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg -#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu -#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time -#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec +#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func +#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg +#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu +#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time +#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU @@ -235,16 +225,16 @@ static void cc_cce_cleanup(struct callout_cpu *cc, int direct) { - cc->cc_exec_entity[direct].cc_curr = NULL; - cc->cc_exec_entity[direct].cc_next = NULL; - cc->cc_exec_entity[direct].cc_cancel = false; - cc->cc_exec_entity[direct].cc_waiting = false; + cc_exec_curr(cc, direct) = NULL; + cc_exec_next(cc, direct) = NULL; + cc_exec_cancel(cc, direct) = false; + cc_exec_waiting(cc, direct) = false; #ifdef SMP - cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; - cc->cc_exec_entity[direct].ce_migration_time = 0; - cc->cc_exec_entity[direct].ce_migration_prec = 0; - cc->cc_exec_entity[direct].ce_migration_func = NULL; - cc->cc_exec_entity[direct].ce_migration_arg = NULL; + cc_migration_cpu(cc, direct) = CPUBLOCK; + cc_migration_time(cc, direct) = 0; + cc_migration_prec(cc, direct) = 0; + cc_migration_func(cc, direct) = NULL; + cc_migration_arg(cc, direct) = NULL; #endif } @@ -256,7 +246,7 @@ cc_cce_migrating(struct callout_cpu *cc, { #ifdef SMP - return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); + return (cc_migration_cpu(cc, direct) != CPUBLOCK); #else return (0); #endif @@ -492,7 +482,7 @@ callout_process(sbintime_t now) #ifdef CALLOUT_PROFILING ++depth_dir; #endif - cc->cc_exec_next_dir = + cc_exec_next(cc, 1) = LIST_NEXT(tmp, c_links.le); cc->cc_bucket = firstb & callwheelmask; LIST_REMOVE(tmp, c_links.le); @@ -501,7 +491,7 @@ callout_process(sbintime_t now) &mpcalls_dir, &lockcalls_dir, NULL, #endif 1); - tmp = cc->cc_exec_next_dir; + tmp = cc_exec_next(cc, 1); } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); @@ -585,7 +575,7 @@ callout_lock(struct callout *c) static void callout_cc_add(struct callout *c, struct callout_cpu *cc, sbintime_t sbt, sbintime_t precision, void (*func)(void *), - void *arg, int cpu, int flags) + void *arg, int cpu, int flags, int direct) { int bucket; @@ -606,7 +596,7 @@ callout_cc_add(struct callout *c, struct (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) - cc->cc_exec_next_dir = c; + cc_exec_next(cc, direct) = c; #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout @@ -679,8 +669,9 @@ softclock_call_cc(struct callout *c, str c->c_flags = CALLOUT_LOCAL_ALLOC; else c->c_flags &= ~CALLOUT_PENDING; - cc->cc_exec_entity[direct].cc_curr = c; - cc->cc_exec_entity[direct].cc_cancel = false; + + cc_exec_curr(cc, direct) = c; + cc_exec_cancel(cc, direct) = false; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); @@ -688,12 +679,12 @@ softclock_call_cc(struct callout *c, str * The callout may have been cancelled * while we switched locks. */ - if (cc->cc_exec_entity[direct].cc_cancel) { + if (cc_exec_cancel(cc, direct)) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ - cc->cc_exec_entity[direct].cc_cancel = true; + cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; @@ -744,9 +735,9 @@ softclock_call_cc(struct callout *c, str class->lc_unlock(c_lock); skip: CC_LOCK(cc); - KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); - cc->cc_exec_entity[direct].cc_curr = NULL; - if (cc->cc_exec_entity[direct].cc_waiting) { + KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); + cc_exec_curr(cc, direct) = NULL; + if (cc_exec_waiting(cc, direct)) { /* * There is someone waiting for the * callout to complete. @@ -762,9 +753,9 @@ skip: */ c->c_flags &= ~CALLOUT_DFRMIGRATION; } - cc->cc_exec_entity[direct].cc_waiting = false; + cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); - wakeup(&cc->cc_exec_entity[direct].cc_waiting); + wakeup(&cc_exec_waiting(cc, direct)); CC_LOCK(cc); } else if (cc_cce_migrating(cc, direct)) { KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, @@ -774,11 +765,11 @@ skip: * If the callout was scheduled for * migration just perform it now. */ - new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; - new_time = cc->cc_exec_entity[direct].ce_migration_time; - new_prec = cc->cc_exec_entity[direct].ce_migration_prec; - new_func = cc->cc_exec_entity[direct].ce_migration_func; - new_arg = cc->cc_exec_entity[direct].ce_migration_arg; + new_cpu = cc_migration_cpu(cc, direct); + new_time = cc_migration_time(cc, direct); + new_prec = cc_migration_prec(cc, direct); + new_func = cc_migration_func(cc, direct); + new_arg = cc_migration_arg(cc, direct); cc_cce_cleanup(cc, direct); /* @@ -787,7 +778,7 @@ skip: * * As first thing, handle deferred callout stops. */ - if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { + if (!callout_migrating(c)) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); @@ -799,7 +790,7 @@ skip: new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; callout_cc_add(c, new_cc, new_time, new_prec, new_func, - new_arg, new_cpu, flags); + new_arg, new_cpu, flags, direct); CC_UNLOCK(new_cc); CC_LOCK(cc); #else @@ -1007,15 +998,15 @@ callout_reset_sbt_on(struct callout *c, KASSERT(!direct || c->c_lock == NULL, ("%s: direct callout %p has lock", __func__, c)); cc = callout_lock(c); - if (cc->cc_exec_entity[direct].cc_curr == c) { + if (cc_exec_curr(cc, direct) == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ - if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) - cancelled = cc->cc_exec_entity[direct].cc_cancel = true; - if (cc->cc_exec_entity[direct].cc_waiting) { + if (c->c_lock != NULL && cc_exec_cancel(cc, direct)) + cancelled = cc_exec_cancel(cc, direct) = true; + if (cc_exec_waiting(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. @@ -1026,11 +1017,30 @@ callout_reset_sbt_on(struct callout *c, CC_UNLOCK(cc); return (cancelled); } +#ifdef SMP + if (callout_migrating(c)) { + /* + * This only occurs when a second callout_reset_sbt_on + * is made after a previous one moved it into + * deferred migration (below). Note we do *not* change + * the prev_cpu even though the previous target may + * be different. + */ + cc_migration_cpu(cc, direct) = cpu; + cc_migration_time(cc, direct) = to_sbt; + cc_migration_prec(cc, direct) = precision; + cc_migration_func(cc, direct) = ftn; + cc_migration_arg(cc, direct) = arg; + cancelled = 1; + CC_UNLOCK(cc); + return (cancelled); + } +#endif } if (c->c_flags & CALLOUT_PENDING) { if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + if (cc_exec_next(cc, direct) == c) + cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); @@ -1045,15 +1055,29 @@ callout_reset_sbt_on(struct callout *c, * to a more appropriate moment. */ if (c->c_cpu != cpu) { - if (cc->cc_exec_entity[direct].cc_curr == c) { - cc->cc_exec_entity[direct].ce_migration_cpu = cpu; - cc->cc_exec_entity[direct].ce_migration_time - = to_sbt; - cc->cc_exec_entity[direct].ce_migration_prec - = precision; - cc->cc_exec_entity[direct].ce_migration_func = ftn; - cc->cc_exec_entity[direct].ce_migration_arg = arg; - c->c_flags |= CALLOUT_DFRMIGRATION; + if (cc_exec_curr(cc, direct) == c) { + /* + * Pending will have been removed since we are + * actually executing the callout on another + * CPU. That callout should be waiting on the + * lock the caller holds. If we set both + * active/and/pending after we return and the + * lock on the executing callout proceeds, it + * will then see pending is true and return. + * At the return from the actual callout execution + * the migration will occur in softclock_call_cc + * and this new callout will be placed on the + * new CPU via a call to callout_cpu_switch() which + * will get the lock on the right CPU followed + * by a call callout_cc_add() which will add it there. + * (see above in softclock_call_cc()). + */ + cc_migration_cpu(cc, direct) = cpu; + cc_migration_time(cc, direct) = to_sbt; + cc_migration_prec(cc, direct) = precision; + cc_migration_func(cc, direct) = ftn; + cc_migration_arg(cc, direct) = arg; + c->c_flags |= (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING); CTR6(KTR_CALLOUT, "migration of %p func %p arg %p in %d.%08x to %u deferred", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), @@ -1065,7 +1089,7 @@ callout_reset_sbt_on(struct callout *c, } #endif - callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); + callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags, direct); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff)); @@ -1095,6 +1119,7 @@ _callout_stop_safe(struct callout *c, in struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; + int not_on_a_list; if (safe) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, @@ -1120,6 +1145,26 @@ _callout_stop_safe(struct callout *c, in again: cc = callout_lock(c); + if ((c->c_flags & (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) == + (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) { + /* + * Special case where this slipped in while we + * were migrating *as* the callout is about to + * execute. The caller probably holds the lock + * the callout wants. + * + * Get rid of the migration first. Then set + * the flag that tells this code *not* to + * try to remove it from any lists (its not + * on one yet). When the callout wheel runs, + * it will ignore this callout. + */ + c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_ACTIVE); + not_on_a_list = 1; + } else { + not_on_a_list = 0; + } + /* * If the callout was migrating while the callout cpu lock was * dropped, just drop the sleepqueue lock and check the states @@ -1128,7 +1173,7 @@ again: if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); - sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); + sleepq_release(&cc_exec_waiting(old_cc, direct)); sq_locked = 0; old_cc = NULL; goto again; @@ -1149,13 +1194,12 @@ again: * If it wasn't on the queue and it isn't the current * callout, then we can't stop it, so just bail. */ - if (cc->cc_exec_entity[direct].cc_curr != c) { + if (cc_exec_curr(cc, direct) != c) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); if (sq_locked) - sleepq_release( - &cc->cc_exec_entity[direct].cc_waiting); + sleepq_release(&cc_exec_waiting(cc, direct)); return (0); } @@ -1166,7 +1210,7 @@ again: * just wait for the current invocation to * finish. */ - while (cc->cc_exec_entity[direct].cc_curr == c) { + while (cc_exec_curr(cc, direct) == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid @@ -1187,7 +1231,7 @@ again: if (!sq_locked) { CC_UNLOCK(cc); sleepq_lock( - &cc->cc_exec_entity[direct].cc_waiting); + &cc_exec_waiting(cc, direct)); sq_locked = 1; old_cc = cc; goto again; @@ -1199,15 +1243,15 @@ again: * will be packed up, just let softclock() * take care of it. */ - cc->cc_exec_entity[direct].cc_waiting = true; + cc_exec_waiting(cc, direct) = true; DROP_GIANT(); CC_UNLOCK(cc); sleepq_add( - &cc->cc_exec_entity[direct].cc_waiting, + &cc_exec_waiting(cc, direct), &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); sleepq_wait( - &cc->cc_exec_entity[direct].cc_waiting, + &cc_exec_waiting(cc, direct), 0); sq_locked = 0; old_cc = NULL; @@ -1217,7 +1261,8 @@ again: CC_LOCK(cc); } } else if (use_lock && - !cc->cc_exec_entity[direct].cc_cancel) { + !cc_exec_cancel(cc, direct)) { + /* * The current callout is waiting for its * lock which we hold. Cancel the callout @@ -1225,7 +1270,7 @@ again: * lock, the callout will be skipped in * softclock(). */ - cc->cc_exec_entity[direct].cc_cancel = true; + cc_exec_cancel(cc, direct) = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); KASSERT(!cc_cce_migrating(cc, direct), @@ -1233,12 +1278,34 @@ again: CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); - } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { + } else if (callout_migrating(c)) { + /* + * The callout is currently being serviced + * and the "next" callout is scheduled at + * its completion with a migration. We remove + * the migration flag so it *won't* get rescheduled, + * but we can't stop the one thats running so + * we return 0. + */ c->c_flags &= ~CALLOUT_DFRMIGRATION; +#ifdef SMP + /* + * We can't call cc_cce_cleanup here since + * if we do it will remove .ce_curr and + * its still running. This will prevent a + * reschedule of the callout when the + * execution completes. + */ + cc_migration_cpu(cc, direct) = CPUBLOCK; + cc_migration_time(cc, direct) = 0; + cc_migration_prec(cc, direct) = 0; + cc_migration_func(cc, direct) = NULL; + cc_migration_arg(cc, direct) = NULL; +#endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); - return (1); + return (0); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); @@ -1247,20 +1314,21 @@ again: return (0); } if (sq_locked) - sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); + sleepq_release(&cc_exec_waiting(cc, direct)); c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + if (not_on_a_list == 0) { + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc_exec_next(cc, direct) == c) + cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } callout_cc_del(c, cc); - CC_UNLOCK(cc); return (1); } Modified: head/sys/sys/callout.h ============================================================================== --- head/sys/sys/callout.h Mon Feb 9 19:19:13 2015 (r278468) +++ head/sys/sys/callout.h Mon Feb 9 19:19:44 2015 (r278469) @@ -64,6 +64,7 @@ struct callout_handle { #ifdef _KERNEL #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) +#define callout_migrating(c) ((c)->c_flags & CALLOUT_DFRMIGRATION) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) #define callout_drain(c) _callout_stop_safe(c, 1) void callout_init(struct callout *, int);