From owner-p4-projects@FreeBSD.ORG  Tue Jun 23 23:13:33 2009
Return-Path: <owner-p4-projects@FreeBSD.ORG>
Delivered-To: p4-projects@freebsd.org
Received: by hub.freebsd.org (Postfix, from userid 32767)
	id 7D54F1065706; Tue, 23 Jun 2009 23:13:32 +0000 (UTC)
Delivered-To: perforce@FreeBSD.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 3A1C41065702
	for <perforce@FreeBSD.org>; Tue, 23 Jun 2009 23:13:32 +0000 (UTC)
	(envelope-from pvaibhav@FreeBSD.org)
Received: from repoman.freebsd.org (repoman.freebsd.org
	[IPv6:2001:4f8:fff6::29])
	by mx1.freebsd.org (Postfix) with ESMTP id 270BF8FC13
	for <perforce@FreeBSD.org>; Tue, 23 Jun 2009 23:13:32 +0000 (UTC)
	(envelope-from pvaibhav@FreeBSD.org)
Received: from repoman.freebsd.org (localhost [127.0.0.1])
	by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id n5NNDWJn083239
	for <perforce@FreeBSD.org>; Tue, 23 Jun 2009 23:13:32 GMT
	(envelope-from pvaibhav@FreeBSD.org)
Received: (from perforce@localhost)
	by repoman.freebsd.org (8.14.3/8.14.3/Submit) id n5NNDW9x083237
	for perforce@freebsd.org; Tue, 23 Jun 2009 23:13:32 GMT
	(envelope-from pvaibhav@FreeBSD.org)
Date: Tue, 23 Jun 2009 23:13:32 GMT
Message-Id: <200906232313.n5NNDW9x083237@repoman.freebsd.org>
X-Authentication-Warning: repoman.freebsd.org: perforce set sender to
	pvaibhav@FreeBSD.org using -f
From: Prashant Vaibhav <pvaibhav@FreeBSD.org>
To: Perforce Change Reviews <perforce@FreeBSD.org>
Cc: 
Subject: PERFORCE change 165014 for review
X-BeenThere: p4-projects@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: p4 projects tree changes <p4-projects.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/p4-projects>,
	<mailto:p4-projects-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/p4-projects>
List-Post: <mailto:p4-projects@freebsd.org>
List-Help: <mailto:p4-projects-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/p4-projects>,
	<mailto:p4-projects-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Tue, 23 Jun 2009 23:13:34 -0000

http://perforce.freebsd.org/chv.cgi?CH=165014

Change 165014 by pvaibhav@pvaibhav_matrix on 2009/06/23 23:12:45

	Syncing branch with local copy. Implemented in this commit are -
	1. Generic priority queue implementation based on binary heaps.
	   Check comments in sys/bheap.h for more information.
	2. callout/timeout scheme modified to use the new binary heap-based
	   priority queue instead of old "callout wheel" data structure.
	Not everything has been heavily tested, however. Calling for help
	with testing on SMP systems.

Affected files ...

.. //depot/projects/soc2009/calloutapi/src/sys/kern/kern_timeout.c#2 edit
.. //depot/projects/soc2009/calloutapi/src/sys/sys/bheap.h#1 add
.. //depot/projects/soc2009/calloutapi/src/sys/sys/callout.h#2 edit

Differences ...

==== //depot/projects/soc2009/calloutapi/src/sys/kern/kern_timeout.c#2 (text+ko) ====

@@ -64,9 +64,6 @@
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
     "struct callout *");
 
-static int avg_depth;
-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
-    "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
@@ -80,17 +77,15 @@
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
-int callwheelsize, callwheelbits, callwheelmask;
-
 struct callout_cpu {
 	struct mtx		cc_lock;
-	struct callout		*cc_callout;
-	struct callout_tailq	*cc_callwheel;
-	struct callout_list	cc_callfree;
-	struct callout		*cc_next;
+	struct callout		*cc_callout;	/* pointer to local callouts */
+	struct callout**	cc_pq_mem;	/* memory for the priority Q */
+	struct callout_queue	cc_callqueue;	/* master priority queue */
+	struct callout_list	cc_callfree;	/* available local callouts */
 	struct callout		*cc_curr;
 	void			*cc_cookie;
-	int 			cc_softticks;
+	int			cc_monoticks;	/* monotonic increasing tick */
 	int			cc_cancel;
 	int			cc_waiting;
 };
@@ -141,19 +136,15 @@
 
 	timeout_cpu = PCPU_GET(cpuid);
 	cc = CC_CPU(timeout_cpu);
-	/*
-	 * Calculate callout wheel size
-	 */
-	for (callwheelsize = 1, callwheelbits = 0;
-	     callwheelsize < ncallout;
-	     callwheelsize <<= 1, ++callwheelbits)
-		;
-	callwheelmask = callwheelsize - 1;
 
+	/* Reserve space for locally allocated callouts (timeout()) */
 	cc->cc_callout = (struct callout *)v;
 	v = (caddr_t)(cc->cc_callout + ncallout);
-	cc->cc_callwheel = (struct callout_tailq *)v;
-	v = (caddr_t)(cc->cc_callwheel + callwheelsize);
+
+	/* Reserve space for callout queue's internal array */
+	cc->cc_pq_mem = (struct callout**)v;
+	v = (caddr_t)(cc->cc_pq_mem + ncallout);
+
 	return(v);
 }
 
@@ -164,17 +155,26 @@
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
+
+	/*
+	 * Initialize the priority queue
+	 */
+	MINHEAP_INIT(&cc->cc_callqueue, callout, ncallout, cc->cc_pq_mem);
+
+	/* Set monoticks to 0 */
+	cc->cc_monoticks = 0;
+
+	/*
+	 * Initialize list of available local callouts
+	 */
 	SLIST_INIT(&cc->cc_callfree);
-	for (i = 0; i < callwheelsize; i++) {
-		TAILQ_INIT(&cc->cc_callwheel[i]);
-	}
 	if (cc->cc_callout == NULL)
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
 		c->c_flags = CALLOUT_LOCAL_ALLOC;
-		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links);
 	}
 }
 
@@ -220,8 +220,8 @@
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
 		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(). */
-		cc->cc_callwheel = malloc(
-		    sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
+		cc->cc_pq_mem = (struct callout**) malloc(
+		    sizeof(struct callout*) * ncallout, M_CALLOUT,
 		    M_WAITOK);
 		callout_cpu_init(cc);
 	}
@@ -235,29 +235,34 @@
 {
 	struct callout_cpu *cc;
 	int need_softclock;
-	int bucket;
 
 	/*
 	 * Process callouts at a very low cpu priority, so we don't keep the
 	 * relatively high clock interrupt priority any longer than necessary.
 	 */
-	need_softclock = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	for (; (cc->cc_softticks - ticks) < 0; cc->cc_softticks++) {
-		bucket = cc->cc_softticks & callwheelmask;
-		if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
-			need_softclock = 1;
-			break;
-		}
-	}
-	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+
+	cc->cc_monoticks++; /* XXX: Can we just use the global 'ticks'? */
+	need_softclock = 0;
+
+	/*
+	 * Check if the first pending callout's time has already arrived (or
+	 * has passed). If yes, schedule the softclock() which will then
+	 * expire this (and any remaining) callout as appropriate.
+	 */
+	if (MINHEAP_EMPTY(&cc->cc_callqueue))
+		need_softclock = 0;
+	else if (MINHEAP_FIRST(&cc->cc_callqueue)->c_time <= cc->cc_monoticks)
+		need_softclock = 1;
 	/*
-	 * swi_sched acquires the thread lock, so we don't want to call it
-	 * with cc_lock held; incorrect locking order.
+	 * swi_sched acquires the thread lock, so we don't want to call
+	 * it with cc_lock held; incorrect locking order.
 	 */
-	if (need_softclock)
+	if (need_softclock == 1) {
+		mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 		swi_sched(cc->cc_cookie, 0);
+	}
 }
 
 static struct callout_cpu *
@@ -278,29 +283,29 @@
 }
 
 /*
- * The callout mechanism is based on the work of Adam M. Costello and 
- * George Varghese, published in a technical report entitled "Redesigning
- * the BSD Callout and Timer Facilities" and modified slightly for inclusion
- * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
- * used in this implementation was published by G. Varghese and T. Lauck in
- * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
- * the Efficient Implementation of a Timer Facility" in the Proceedings of
- * the 11th ACM Annual Symposium on Operating Systems Principles,
- * Austin, Texas Nov 1987.
+ * The callout mechanism is a work-in-progress, based on the idea of using
+ * binary heap based priority queues containing callouts to expire in the
+ * future. The expiry time is (currently) specified as the number of ticks
+ * of the hardware clock (HZ/sec). This is an absolute value.
+ *
+ * The softclock() function checks that the first pending (ie. highest priority
+ * callout) in the queue has an expiry time that has either arrived or already
+ * passed. If yes, that callout is handled. Following this, the callout is
+ * removed from the queue and the next callout is checked in a similar fashion,
+ * until the expiry time of the next-in-line callout is in the future.
+ *
+ * For more information, ref: http://wiki.freebsd.org/SOC2009PrashantVaibhav
  */
 
 /*
  * Software (low priority) clock interrupt.
- * Run periodic events from timeout queue.
  */
 void
 softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
-	struct callout_tailq *bucket;
 	int curticks;
-	int steps;	/* #steps since we last allowed interrupts */
 	int depth;
 	int mpcalls;
 	int lockcalls;
@@ -320,159 +325,144 @@
 	lockcalls = 0;
 	gcalls = 0;
 	depth = 0;
-	steps = 0;
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
-	while (cc->cc_softticks != ticks) {
-		/*
-		 * cc_softticks may be modified by hard clock, so cache
-		 * it while we work on a given bucket.
-		 */
-		curticks = cc->cc_softticks;
-		cc->cc_softticks++;
-		bucket = &cc->cc_callwheel[curticks & callwheelmask];
-		c = TAILQ_FIRST(bucket);
-		while (c) {
-			depth++;
-			if (c->c_time != curticks) {
-				c = TAILQ_NEXT(c, c_links.tqe);
-				++steps;
-				if (steps >= MAX_SOFTCLOCK_STEPS) {
-					cc->cc_next = c;
-					/* Give interrupts a chance. */
-					CC_UNLOCK(cc);
-					;	/* nothing */
-					CC_LOCK(cc);
-					c = cc->cc_next;
-					steps = 0;
-				}
-			} else {
-				void (*c_func)(void *);
-				void *c_arg;
-				struct lock_class *class;
-				struct lock_object *c_lock;
-				int c_flags, sharedlock;
+	/*
+	 * ticks may be modified by hard clock, so cache it
+	 */
+	curticks = cc->cc_monoticks;
+	CTR1(KTR_CALLOUT, "softclock run at %d", curticks);
+	while (!MINHEAP_EMPTY(&cc->cc_callqueue)) {
+		c = MINHEAP_FIRST(&cc->cc_callqueue);
+		if (c->c_time > curticks) {
+			/*
+			 * We've reached a callout whose expiry time is
+			 * in the future, so we stop.
+			 */
+			break;
+		} else {
+			/*
+			 * This callout needs to be expired now
+			 */
+			void (*c_func)(void *);
+			void *c_arg;
+			struct lock_class *class;
+			struct lock_object *c_lock;
+			int c_flags, sharedlock;
 
-				cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
-				TAILQ_REMOVE(bucket, c, c_links.tqe);
-				class = (c->c_lock != NULL) ?
-				    LOCK_CLASS(c->c_lock) : NULL;
-				sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ?
-				    0 : 1;
-				c_lock = c->c_lock;
-				c_func = c->c_func;
-				c_arg = c->c_arg;
-				c_flags = c->c_flags;
-				if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
-					c->c_flags = CALLOUT_LOCAL_ALLOC;
-				} else {
-					c->c_flags =
-					    (c->c_flags & ~CALLOUT_PENDING);
+			MINHEAP_REMOVE(&cc->cc_callqueue, c, c_qe, c_time);
+			class = (c->c_lock != NULL) ?
+			    LOCK_CLASS(c->c_lock) : NULL;
+			sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+			c_lock	= c->c_lock;
+			c_func	= c->c_func;
+			c_arg 	= c->c_arg;
+			c_flags	= c->c_flags;
+			
+			if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+				c->c_flags = CALLOUT_LOCAL_ALLOC;
+			else
+				c->c_flags = (c->c_flags & ~CALLOUT_PENDING);
+			cc->cc_curr = c;
+			cc->cc_cancel = 0;
+			CC_UNLOCK(cc);
+			if (c_lock != NULL) {
+				class->lc_lock(c_lock, sharedlock);
+				/*
+				 * The callout may have been cancelled
+				 * while we switched locks.
+				 */
+				if (cc->cc_cancel) {
+					class->lc_unlock(c_lock);
+					goto skip;
 				}
-				cc->cc_curr = c;
-				cc->cc_cancel = 0;
-				CC_UNLOCK(cc);
-				if (c_lock != NULL) {
-					class->lc_lock(c_lock, sharedlock);
-					/*
-					 * The callout may have been cancelled
-					 * while we switched locks.
-					 */
-					if (cc->cc_cancel) {
-						class->lc_unlock(c_lock);
-						goto skip;
-					}
-					/* The callout cannot be stopped now. */
-					cc->cc_cancel = 1;
+				/* The callout cannot be stopped now. */
+				cc->cc_cancel = 1;
 
-					if (c_lock == &Giant.lock_object) {
-						gcalls++;
-						CTR3(KTR_CALLOUT,
-						    "callout %p func %p arg %p",
-						    c, c_func, c_arg);
-					} else {
-						lockcalls++;
-						CTR3(KTR_CALLOUT, "callout lock"
-						    " %p func %p arg %p",
-						    c, c_func, c_arg);
-					}
+				if (c_lock == &Giant.lock_object) {
+					gcalls++;
+					CTR3(KTR_CALLOUT,
+					     "callout %p func %p arg %p",
+					     c, c_func, c_arg);
 				} else {
-					mpcalls++;
-					CTR3(KTR_CALLOUT,
-					    "callout mpsafe %p func %p arg %p",
-					    c, c_func, c_arg);
+					lockcalls++;
+					CTR3(KTR_CALLOUT, "callout lock"
+					     " %p func %p arg %p",
+					     c, c_func, c_arg);
 				}
+			} else {
+				mpcalls++;
+				CTR3(KTR_CALLOUT,
+				     "callout mpsafe %p func %p arg %p",
+				     c, c_func, c_arg);
+			}
 #ifdef DIAGNOSTIC
-				binuptime(&bt1);
+			binuptime(&bt1);
 #endif
-				THREAD_NO_SLEEPING();
-				SDT_PROBE(callout_execute, kernel, ,
-				    callout_start, c, 0, 0, 0, 0);
-				c_func(c_arg);
-				SDT_PROBE(callout_execute, kernel, ,
-				    callout_end, c, 0, 0, 0, 0);
-				THREAD_SLEEPING_OK();
+			THREAD_NO_SLEEPING();
+			SDT_PROBE(callout_execute, kernel, ,
+			    callout_start, c, 0, 0, 0, 0);
+			c_func(c_arg);
+			SDT_PROBE(callout_execute, kernel, ,
+			    callout_end, c, 0, 0, 0, 0);
+			THREAD_SLEEPING_OK();
 #ifdef DIAGNOSTIC
-				binuptime(&bt2);
-				bintime_sub(&bt2, &bt1);
-				if (bt2.frac > maxdt) {
-					if (lastfunc != c_func ||
-					    bt2.frac > maxdt * 2) {
-						bintime2timespec(&bt2, &ts2);
-						printf(
-			"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
-						    c_func, c_arg,
-						    (intmax_t)ts2.tv_sec,
-						    ts2.tv_nsec);
-					}
-					maxdt = bt2.frac;
-					lastfunc = c_func;
+			binuptime(&bt2);
+			bintime_sub(&bt2, &bt1);
+			if (bt2.frac > maxdt) {
+				if (lastfunc != c_func ||
+				    bt2.frac > maxdt * 2)
+				{
+					bintime2timespec(&bt2, &ts2);
+					printf(
+					    "Expensive timeout(9) function:"
+				            "%p(%p) %jd.%09ld s\n",
+					    c_func, c_arg,
+					    (intmax_t)ts2.tv_sec,
+					    ts2.tv_nsec);
 				}
+				maxdt = bt2.frac;
+				lastfunc = c_func;
+			}
 #endif
-				CTR1(KTR_CALLOUT, "callout %p finished", c);
-				if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
-					class->lc_unlock(c_lock);
-			skip:
-				CC_LOCK(cc);
+			CTR1(KTR_CALLOUT, "callout %p finished", c);
+			if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+				class->lc_unlock(c_lock);
+skip:
+			CC_LOCK(cc);
+			/*
+			 * If the current callout is locally
+			 * allocated (from timeout(9))
+			 * then put it on the freelist.
+			 *
+			 * Note: we need to check the cached
+			 * copy of c_flags because if it was not
+			 * local, then it's not safe to deref the
+			 * callout pointer.
+			 */
+			if (c_flags & CALLOUT_LOCAL_ALLOC) {
+				KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
+					("corrupted callout"));
+				c->c_func = NULL;
+				SLIST_INSERT_HEAD(&cc->cc_callfree, c,
+						  c_links);
+			}
+			cc->cc_curr = NULL;
+			if (cc->cc_waiting) {
 				/*
-				 * If the current callout is locally
-				 * allocated (from timeout(9))
-				 * then put it on the freelist.
-				 *
-				 * Note: we need to check the cached
-				 * copy of c_flags because if it was not
-				 * local, then it's not safe to deref the
-				 * callout pointer.
+				 * There is someone waiting
+				 * for the callout to complete.
 				 */
-				if (c_flags & CALLOUT_LOCAL_ALLOC) {
-					KASSERT(c->c_flags ==
-					    CALLOUT_LOCAL_ALLOC,
-					    ("corrupted callout"));
-					c->c_func = NULL;
-					SLIST_INSERT_HEAD(&cc->cc_callfree, c,
-					    c_links.sle);
-				}
-				cc->cc_curr = NULL;
-				if (cc->cc_waiting) {
-					/*
-					 * There is someone waiting
-					 * for the callout to complete.
-					 */
-					cc->cc_waiting = 0;
-					CC_UNLOCK(cc);
-					wakeup(&cc->cc_waiting);
-					CC_LOCK(cc);
-				}
-				steps = 0;
-				c = cc->cc_next;
+				cc->cc_waiting = 0;
+				CC_UNLOCK(cc);
+				wakeup(&cc->cc_waiting);
+				CC_LOCK(cc);
 			}
 		}
-	}
-	avg_depth += (depth * 1000 - avg_depth) >> 8;
+	};
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
-	cc->cc_next = NULL;
 	CC_UNLOCK(cc);
 }
 
@@ -509,7 +499,7 @@
 	if (new == NULL)
 		/* XXX Attempt to malloc first */
 		panic("timeout table full");
-	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
+	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links);
 	callout_reset(new, to_ticks, ftn, arg);
 	handle.callout = new;
 	CC_UNLOCK(cc);
@@ -597,12 +587,10 @@
 		}
 	}
 	if (c->c_flags & CALLOUT_PENDING) {
-		if (cc->cc_next == c) {
-			cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
-		}
-		TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-		    c_links.tqe);
-
+		/* TODO: Just modify the key of existing callouts instead of
+		 * removing and re-inserting it
+		 */
+		MINHEAP_REMOVE(&cc->cc_callqueue, c, c_qe, c_time);
 		cancelled = 1;
 		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 	}
@@ -622,11 +610,12 @@
 	c->c_arg = arg;
 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 	c->c_func = ftn;
-	c->c_time = ticks + to_ticks;
-	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
-			  c, c_links.tqe);
-	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
-	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+	c->c_time = cc->cc_monoticks + to_ticks;
+
+	MINHEAP_INSERT(&cc->cc_callqueue, c, c_qe, c_time);
+
+	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p at %d",
+	    cancelled ? "re" : "", c, c->c_func, c->c_arg, c->c_time);
 	CC_UNLOCK(cc);
 
 	return (cancelled);
@@ -766,18 +755,14 @@
 
 	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 
-	if (cc->cc_next == c) {
-		cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
-	}
-	TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-	    c_links.tqe);
+	MINHEAP_REMOVE(&cc->cc_callqueue, c, c_qe, c_time);
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
 
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
 		c->c_func = NULL;
-		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links);
 	}
 	CC_UNLOCK(cc);
 	return (1);
@@ -838,48 +823,13 @@
 adjust_timeout_calltodo(time_change)
     struct timeval *time_change;
 {
-	register struct callout *p;
-	unsigned long delta_ticks;
-
-	/* 
-	 * How many ticks were we asleep?
-	 * (stolen from tvtohz()).
-	 */
-
-	/* Don't do anything */
-	if (time_change->tv_sec < 0)
-		return;
-	else if (time_change->tv_sec <= LONG_MAX / 1000000)
-		delta_ticks = (time_change->tv_sec * 1000000 +
-			       time_change->tv_usec + (tick - 1)) / tick + 1;
-	else if (time_change->tv_sec <= LONG_MAX / hz)
-		delta_ticks = time_change->tv_sec * hz +
-			      (time_change->tv_usec + (tick - 1)) / tick + 1;
-	else
-		delta_ticks = LONG_MAX;
-
-	if (delta_ticks > INT_MAX)
-		delta_ticks = INT_MAX;
-
-	/* 
-	 * Now rip through the timer calltodo list looking for timers
-	 * to expire.
+	/*
+	 * pvaibhav: This function is no longer required because all timers
+	 * whose expiry times are in the past or present are automatically
+	 * going to be expired on wake up when callout_tick() runs and then
+	 * subsequently calls softclock().
 	 */
 
-	/* don't collide with softclock() */
-	CC_LOCK(cc);
-	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
-		p->c_time -= delta_ticks;
-
-		/* Break if the timer had more time on it than delta_ticks */
-		if (p->c_time > 0)
-			break;
-
-		/* take back the ticks the timer didn't use (p->c_time <= 0) */
-		delta_ticks = -p->c_time;
-	}
-	CC_UNLOCK(cc);
-
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */

==== //depot/projects/soc2009/calloutapi/src/sys/sys/callout.h#2 (text+ko) ====

@@ -39,23 +39,23 @@
 #define _SYS_CALLOUT_H_
 
 #include <sys/queue.h>
+#include <sys/bheap.h>
 
 struct lock_object;
 
 SLIST_HEAD(callout_list, callout);
-TAILQ_HEAD(callout_tailq, callout);
+MINHEAP_HEAD(callout_queue, callout);
+MINHEAP_ENTRY(callout_queue_entry);
 
 struct callout {
-	union {
-		SLIST_ENTRY(callout) sle;
-		TAILQ_ENTRY(callout) tqe;
-	} c_links;
 	int	c_time;				/* ticks to the event */
 	void	*c_arg;				/* function argument */
 	void	(*c_func)(void *);		/* function to call */
 	struct lock_object *c_lock;		/* lock to handle */
 	int	c_flags;			/* state of this entry */
 	volatile int c_cpu;			/* CPU we're scheduled on */
+	SLIST_ENTRY(callout) c_links;
+	struct callout_queue_entry c_qe;	/* data for priority queue */
 };
 
 #define	CALLOUT_LOCAL_ALLOC	0x0001 /* was allocated from callfree */