FreeBSD Mail Archives

Date:      Mon, 3 Sep 2007 19:31:52 -0700
From:      Alfred Perlstein <alfred@freebsd.org>
To:        smp@freebsd.org
Cc:        attilio@freebsd.org
Subject:   take 2: request for review: backport of sx and rwlocks from 7.0 to 6-stable
Message-ID:  <20070904023152.GZ87451@elvis.mu.org>

next in thread | raw e-mail | index | archive | help


--SnV5plBeK2Ge1I9g
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Updated patch attached:

sizeof(struct sx) should be the same.
garbage removed from kern_sx.c.
alignment of "struct thread" set to constant rather than magic number.
uma_zalloc of "struct thread" fixed to use constant.



-- 
- Alfred Perlstein

--SnV5plBeK2Ge1I9g
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment;
	filename="netsmp_rwlock_freebsd6_09032007.diff"

Index: conf/NOTES
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/NOTES,v
retrieving revision 1.1325.2.36
diff -c -r1.1325.2.36 NOTES
*** conf/NOTES	8 Jul 2007 15:30:28 -0000	1.1325.2.36
--- conf/NOTES	31 Aug 2007 00:39:59 -0000
***************
*** 189,200 ****
--- 189,214 ----
  # to disable it.
  options 	NO_ADAPTIVE_MUTEXES
  
+ # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin
+ # if the thread that currently owns the rwlock is executing on another
+ # CPU.  This behaviour is enabled by default, so this option can be used
+ # to disable it.
+ options         NO_ADAPTIVE_RWLOCKS
+ 
+ 
  # ADAPTIVE_GIANT causes the Giant lock to also be made adaptive when
  # running without NO_ADAPTIVE_MUTEXES.  Normally, because Giant is assumed
  # to be held for extended periods, contention on Giant will cause a thread
  # to sleep rather than spinning.
  options 	ADAPTIVE_GIANT
  
+  
+ # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread
+ # that currently owns the lock is executing on another CPU.  Note that
+ # in addition to enabling this option, individual sx locks must be
+ # initialized with the SX_ADAPTIVESPIN flag.
+ options   ADAPTIVE_SX
+ 
  # MUTEX_NOINLINE forces mutex operations to call functions to perform each
  # operation rather than inlining the simple cases.  This can be used to
  # shrink the size of the kernel text segment.  Note that this behavior is
***************
*** 207,212 ****
--- 221,240 ----
  # priority waiter.
  options 	MUTEX_WAKE_ALL
  
+ # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each
+ # operation rather than inlining the simple cases.  This can be used to
+ # shrink the size of the kernel text segment.  Note that this behavior is
+ # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
+ # and WITNESS options.
+ options         RWLOCK_NOINLINE
+ 
+ # SX_NOINLINE forces sx lock operations to call functions to perform each
+ # operation rather than inlining the simple cases.  This can be used to
+ # shrink the size of the kernel text segment.  Note that this behavior is
+ # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
+ # and WITNESS options.
+ options    SX_NOINLINE
+ 
  # SMP Debugging Options:
  #
  # PREEMPTION allows the threads that are in the kernel to be preempted
Index: conf/files
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/files,v
retrieving revision 1.1031.2.67
diff -c -r1.1031.2.67 files
*** conf/files	23 Aug 2007 22:30:14 -0000	1.1031.2.67
--- conf/files	31 Aug 2007 00:39:59 -0000
***************
*** 1312,1317 ****
--- 1312,1318 ----
  kern/kern_proc.c		standard
  kern/kern_prot.c		standard
  kern/kern_resource.c		standard
+ kern/kern_rwlock.c		standard
  kern/kern_sema.c		standard
  kern/kern_shutdown.c		standard
  kern/kern_sig.c			standard
Index: conf/options
===================================================================
RCS file: /cvs/ncvs/src/sys/conf/options,v
retrieving revision 1.510.2.21
diff -c -r1.510.2.21 options
*** conf/options	8 Jul 2007 15:30:28 -0000	1.510.2.21
--- conf/options	31 Aug 2007 00:39:59 -0000
***************
*** 60,66 ****
--- 60,68 ----
  
  # Miscellaneous options.
  ADAPTIVE_GIANT	opt_adaptive_mutexes.h
+ ADAPTIVE_SX
  NO_ADAPTIVE_MUTEXES	opt_adaptive_mutexes.h
+ NO_ADAPTIVE_RWLOCKS
  ALQ
  AUDIT		opt_global.h
  CODA_COMPAT_5	opt_coda.h
***************
*** 517,522 ****
--- 519,526 ----
  MSIZE			opt_global.h
  REGRESSION		opt_global.h
  RESTARTABLE_PANICS	opt_global.h
+ RWLOCK_NOINLINE		opt_global.h
+ SX_NOINLINE		opt_global.h
  VFS_BIO_DEBUG		opt_global.h
  
  # These are VM related options
Index: dev/acpica/acpi_ec.c
===================================================================
RCS file: /cvs/ncvs/src/sys/dev/acpica/acpi_ec.c,v
retrieving revision 1.65.2.2
diff -c -r1.65.2.2 acpi_ec.c
*** dev/acpica/acpi_ec.c	11 May 2006 17:41:00 -0000	1.65.2.2
--- dev/acpica/acpi_ec.c	31 Aug 2007 01:20:08 -0000
***************
*** 144,149 ****
--- 144,150 ----
  #include <sys/bus.h>
  #include <sys/malloc.h>
  #include <sys/module.h>
+ #include <sys/lock.h>
  #include <sys/sx.h>
  
  #include <machine/bus.h>
Index: kern/kern_ktrace.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.101.2.5
diff -c -r1.101.2.5 kern_ktrace.c
*** kern/kern_ktrace.c	6 Sep 2006 21:43:59 -0000	1.101.2.5
--- kern/kern_ktrace.c	31 Aug 2007 00:39:59 -0000
***************
*** 53,58 ****
--- 53,59 ----
  #include <sys/vnode.h>
  #include <sys/ktrace.h>
  #include <sys/sx.h>
+ #include <sys/condvar.h>
  #include <sys/sysctl.h>
  #include <sys/syslog.h>
  #include <sys/sysproto.h>
Index: kern/kern_rwlock.c
===================================================================
RCS file: kern/kern_rwlock.c
diff -N kern/kern_rwlock.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- kern/kern_rwlock.c	31 Aug 2007 03:25:24 -0000
***************
*** 0 ****
--- 1,943 ----
+ /*-
+  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  * 3. Neither the name of the author nor the names of any co-contributors
+  *    may be used to endorse or promote products derived from this software
+  *    without specific prior written permission.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  */
+ 
+ /*
+  * Machine independent bits of reader/writer lock implementation.
+  */
+ 
+ #include <sys/cdefs.h>
+ __FBSDID("$FreeBSD: src/sys/kern/kern_rwlock.c,v 1.27 2007/06/26 21:31:56 attilio Exp $");
+ 
+ #include "opt_ddb.h"
+ 
+ #include <sys/param.h>
+ #include <sys/ktr.h>
+ #include <sys/lock.h>
+ #include <sys/mutex.h>
+ #include <sys/proc.h>
+ #include <sys/rwlock.h>
+ #include <sys/systm.h>
+ #include <sys/turnstile.h>
+ #include <sys/lock_profile.h>
+ #include <machine/cpu.h>
+ 
+ CTASSERT((RW_RECURSE & LO_CLASSFLAGS) == RW_RECURSE);
+ 
+ #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
+ #define	ADAPTIVE_RWLOCKS
+ #endif
+ 
+ #ifdef DDB
+ #include <ddb/ddb.h>
+ 
+ static void	db_show_rwlock(struct lock_object *lock);
+ #endif
+ 
+ struct lock_class lock_class_rw = {
+ 	.lc_name = "rw",
+ 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
+ #ifdef DDB
+ 	.lc_ddb_show = db_show_rwlock,
+ #endif
+ };
+ 
+ /*
+  * Return a pointer to the owning thread if the lock is write-locked or
+  * NULL if the lock is unlocked or read-locked.
+  */
+ #define	rw_wowner(rw)							\
+ 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
+ 	    (struct thread *)RW_OWNER((rw)->rw_lock))
+ 
+ /*
+  * Returns if a write owner is recursed.  Write ownership is not assured
+  * here and should be previously checked.
+  */
+ #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
+ 
+ /*
+  * Return true if curthread helds the lock.
+  */
+ #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
+ 
+ /*
+  * Return a pointer to the owning thread for this lock who should receive
+  * any priority lent by threads that block on this lock.  Currently this
+  * is identical to rw_wowner().
+  */
+ #define	rw_owner(rw)		rw_wowner(rw)
+ 
+ #ifndef INVARIANTS
+ #define	_rw_assert(rw, what, file, line)
+ #endif
+ 
+ void
+ rw_init_flags(struct rwlock *rw, const char *name, int opts)
+ {
+ 	struct lock_object *lock;
+ 	int flags;
+ 
+ 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
+ 	    RW_RECURSE)) == 0);
+ 
+ 	flags = LO_UPGRADABLE | LO_RECURSABLE;
+ 	if (opts & RW_DUPOK)
+ 		flags |= LO_DUPOK;
+ 	if (!(opts & RW_NOWITNESS))
+ 		flags |= LO_WITNESS;
+ 	if (opts & RW_QUIET)
+ 		flags |= LO_QUIET;
+ 	flags |= opts & RW_RECURSE;
+ 
+ 	rw->rw_lock = RW_UNLOCKED;
+ 	rw->rw_recurse = 0;
+ 	lock = &rw->lock_object;
+ 	lock->lo_class = &lock_class_rw;
+ 	lock->lo_flags = flags;
+ 	lock->lo_name = lock->lo_type = name;
+ 	LOCK_LOG_INIT(lock, opts);
+ 	WITNESS_INIT(lock);
+ }
+ 
+ void
+ rw_destroy(struct rwlock *rw)
+ {
+ 
+ 	LOCK_LOG_DESTROY(&rw->lock_object, 0);
+ 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
+ 	KASSERT(rw->rw_recurse == 0, ("rw lock still recursed"));
+ 	rw->rw_lock = RW_DESTROYED;
+ 	WITNESS_DESTROY(&rw->lock_object);
+ }
+ 
+ void
+ rw_sysinit(void *arg)
+ {
+ 	struct rw_args *args = arg;
+ 
+ 	rw_init(args->ra_rw, args->ra_desc);
+ }
+ 
+ int
+ rw_wowned(struct rwlock *rw)
+ {
+ 
+ 	return (rw_wowner(rw) == curthread);
+ }
+ 
+ void
+ _rw_wlock(struct rwlock *rw, const char *file, int line)
+ {
+ 
+ 	MPASS(curthread != NULL);
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
+ 	KASSERT(rw_wowner(rw) != curthread,
+ 	    ("%s (%s): wlock already held @ %s:%d", __func__,
+ 	    rw->lock_object.lo_name, file, line));
+ 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
+ 	    line);
+ 	__rw_wlock(rw, curthread, file, line);
+ 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
+ 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
+ 	curthread->td_locks++;
+ }
+ 
+ void
+ _rw_wunlock(struct rwlock *rw, const char *file, int line)
+ {
+ 
+ 	MPASS(curthread != NULL);
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
+ 	_rw_assert(rw, RA_WLOCKED, file, line);
+ 	curthread->td_locks--;
+ 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
+ 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
+ 	    line);
+ 	if (!rw_recursed(rw))
+ 		lock_profile_release_lock(&rw->lock_object);
+ 	__rw_wunlock(rw, curthread, file, line);
+ }
+ 
+ void
+ _rw_rlock(struct rwlock *rw, const char *file, int line)
+ {
+ #ifdef ADAPTIVE_RWLOCKS
+ 	volatile struct thread *owner;
+ #endif
+ 	//uint64_t waittime = 0; /* XXX: notsup */
+ 	//int contested = 0; /* XXX: notsup */
+ 	uintptr_t x;
+ 
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
+ 	KASSERT(rw_wowner(rw) != curthread,
+ 	    ("%s (%s): wlock already held @ %s:%d", __func__,
+ 	    rw->lock_object.lo_name, file, line));
+ 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line);
+ 
+ 	/*
+ 	 * Note that we don't make any attempt to try to block read
+ 	 * locks once a writer has blocked on the lock.  The reason is
+ 	 * that we currently allow for read locks to recurse and we
+ 	 * don't keep track of all the holders of read locks.  Thus, if
+ 	 * we were to block readers once a writer blocked and a reader
+ 	 * tried to recurse on their reader lock after a writer had
+ 	 * blocked we would end up in a deadlock since the reader would
+ 	 * be blocked on the writer, and the writer would be blocked
+ 	 * waiting for the reader to release its original read lock.
+ 	 */
+ 	for (;;) {
+ 		/*
+ 		 * Handle the easy case.  If no other thread has a write
+ 		 * lock, then try to bump up the count of read locks.  Note
+ 		 * that we have to preserve the current state of the
+ 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
+ 		 * read lock, then rw_lock must have changed, so restart
+ 		 * the loop.  Note that this handles the case of a
+ 		 * completely unlocked rwlock since such a lock is encoded
+ 		 * as a read lock with no waiters.
+ 		 */
+ 		x = rw->rw_lock;
+ 		if (x & RW_LOCK_READ) {
+ 
+ 			/*
+ 			 * The RW_LOCK_READ_WAITERS flag should only be set
+ 			 * if another thread currently holds a write lock,
+ 			 * and in that case RW_LOCK_READ should be clear.
+ 			 */
+ 			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
+ 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
+ 			    x + RW_ONE_READER)) {
+ 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 					CTR4(KTR_LOCK,
+ 					    "%s: %p succeed %p -> %p", __func__,
+ 					    rw, (void *)x,
+ 					    (void *)(x + RW_ONE_READER));
+ 				if (RW_READERS(x) == 0)
+ 					lock_profile_obtain_lock_success(
+ 					    &rw->lock_object, contested, waittime,
+ 					    file, line);
+ 				break;
+ 			}
+ 			cpu_spinwait();
+ 			continue;
+ 		}
+ 		lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
+ 		    &waittime);
+ 
+ 		/*
+ 		 * Okay, now it's the hard case.  Some other thread already
+ 		 * has a write lock, so acquire the turnstile lock so we can
+ 		 * begin the process of blocking.
+ 		 */
+ 		turnstile_lock(&rw->lock_object);
+ 
+ 		/*
+ 		 * The lock might have been released while we spun, so
+ 		 * recheck its state and restart the loop if there is no
+ 		 * longer a write lock.
+ 		 */
+ 		x = rw->rw_lock;
+ 		if (x & RW_LOCK_READ) {
+ 			turnstile_release(&rw->lock_object);
+ 			cpu_spinwait();
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
+ 		 * flag is already set, then we can go ahead and block.  If
+ 		 * it is not set then try to set it.  If we fail to set it
+ 		 * drop the turnstile lock and restart the loop.
+ 		 */
+ 		if (!(x & RW_LOCK_READ_WAITERS)) {
+ 			if (!atomic_cmpset_ptr(&rw->rw_lock, x,
+ 			    x | RW_LOCK_READ_WAITERS)) {
+ 				turnstile_release(&rw->lock_object);
+ 				cpu_spinwait();
+ 				continue;
+ 			}
+ 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
+ 				    __func__, rw);
+ 		}
+ 
+ #ifdef ADAPTIVE_RWLOCKS
+ 		owner = (struct thread *)RW_OWNER(x);
+ 		/*
+ 		 * If the owner is running on another CPU, spin until
+ 		 * the owner stops running or the state of the lock
+ 		 * changes.
+ 		 */
+ 		if (TD_IS_RUNNING(owner)) {
+ 			turnstile_release(&rw->lock_object);
+ 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
+ 				    __func__, rw, owner);
+ 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
+ 			    TD_IS_RUNNING(owner))
+ 				cpu_spinwait();
+ 			continue;
+ 		}
+ #endif
+ 
+ 		/*
+ 		 * We were unable to acquire the lock and the read waiters
+ 		 * flag is set, so we must block on the turnstile.
+ 		 */
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
+ 			    rw);
+ 		turnstile_wait_queue(&rw->lock_object, rw_owner(rw),
+ 		    TS_SHARED_QUEUE);
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
+ 			    __func__, rw);
+ 	}
+ 
+ 	/*
+ 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
+ 	 * however.  turnstiles don't like owners changing between calls to
+ 	 * turnstile_wait() currently.
+ 	 */
+ 
+ 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
+ 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
+ 	curthread->td_locks++;
+ }
+ 
+ void
+ _rw_runlock(struct rwlock *rw, const char *file, int line)
+ {
+ 	struct turnstile *ts;
+ 	uintptr_t x;
+ 
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
+ 	_rw_assert(rw, RA_RLOCKED, file, line);
+ 	curthread->td_locks--;
+ 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
+ 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
+ 
+ 	/* TODO: drop "owner of record" here. */
+ 
+ 	for (;;) {
+ 		/*
+ 		 * See if there is more than one read lock held.  If so,
+ 		 * just drop one and return.
+ 		 */
+ 		x = rw->rw_lock;
+ 		if (RW_READERS(x) > 1) {
+ 			if (atomic_cmpset_ptr(&rw->rw_lock, x,
+ 			    x - RW_ONE_READER)) {
+ 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 					CTR4(KTR_LOCK,
+ 					    "%s: %p succeeded %p -> %p",
+ 					    __func__, rw, (void *)x,
+ 					    (void *)(x - RW_ONE_READER));
+ 				break;
+ 			}
+ 			continue;
+ 		}
+ 
+ 
+ 		/*
+ 		 * We should never have read waiters while at least one
+ 		 * thread holds a read lock.  (See note above)
+ 		 */
+ 		KASSERT(!(x & RW_LOCK_READ_WAITERS),
+ 		    ("%s: waiting readers", __func__));
+ 
+ 		/*
+ 		 * If there aren't any waiters for a write lock, then try
+ 		 * to drop it quickly.
+ 		 */
+ 		if (!(x & RW_LOCK_WRITE_WAITERS)) {
+ 
+ 			/*
+ 			 * There shouldn't be any flags set and we should
+ 			 * be the only read lock.  If we fail to release
+ 			 * the single read lock, then another thread might
+ 			 * have just acquired a read lock, so go back up
+ 			 * to the multiple read locks case.
+ 			 */
+ 			MPASS(x == RW_READERS_LOCK(1));
+ 			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
+ 			    RW_UNLOCKED)) {
+ 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 					CTR2(KTR_LOCK, "%s: %p last succeeded",
+ 					    __func__, rw);
+ 				break;
+ 			}
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * There should just be one reader with one or more
+ 		 * writers waiting.
+ 		 */
+ 		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
+ 
+ 		/*
+ 		 * Ok, we know we have a waiting writer and we think we
+ 		 * are the last reader, so grab the turnstile lock.
+ 		 */
+ 		turnstile_lock(&rw->lock_object);
+ 
+ 		/*
+ 		 * Try to drop our lock leaving the lock in a unlocked
+ 		 * state.
+ 		 *
+ 		 * If you wanted to do explicit lock handoff you'd have to
+ 		 * do it here.  You'd also want to use turnstile_signal()
+ 		 * and you'd have to handle the race where a higher
+ 		 * priority thread blocks on the write lock before the
+ 		 * thread you wakeup actually runs and have the new thread
+ 		 * "steal" the lock.  For now it's a lot simpler to just
+ 		 * wakeup all of the waiters.
+ 		 *
+ 		 * As above, if we fail, then another thread might have
+ 		 * acquired a read lock, so drop the turnstile lock and
+ 		 * restart.
+ 		 */
+ 		if (!atomic_cmpset_ptr(&rw->rw_lock,
+ 		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
+ 			turnstile_release(&rw->lock_object);
+ 			continue;
+ 		}
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
+ 			    __func__, rw);
+ 
+ 		/*
+ 		 * Ok.  The lock is released and all that's left is to
+ 		 * wake up the waiters.  Note that the lock might not be
+ 		 * free anymore, but in that case the writers will just
+ 		 * block again if they run before the new lock holder(s)
+ 		 * release the lock.
+ 		 */
+ 		ts = turnstile_lookup(&rw->lock_object);
+ 		MPASS(ts != NULL);
+ 		turnstile_broadcast_queue(ts, TS_EXCLUSIVE_QUEUE);
+ 		turnstile_unpend_queue(ts, TS_SHARED_LOCK);
+ 		turnstile_release(&rw->lock_object);
+ 		break;
+ 	}
+ 	lock_profile_release_lock(&rw->lock_object);
+ }
+ 
+ /*
+  * This function is called when we are unable to obtain a write lock on the
+  * first try.  This means that at least one other thread holds either a
+  * read or write lock.
+  */
+ void
+ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+ {
+ 	//struct turnstile *ts;
+ #ifdef ADAPTIVE_RWLOCKS
+ 	volatile struct thread *owner;
+ #endif
+ 	uintptr_t v;
+ 
+ 	if (rw_wlocked(rw)) {
+ 		KASSERT(rw->lock_object.lo_flags & RW_RECURSE,
+ 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
+ 		    __func__, rw->lock_object.lo_name, file, line));
+ 		rw->rw_recurse++;
+ 		atomic_set_ptr(&rw->rw_lock, RW_LOCK_RECURSED);
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
+ 		return;
+ 	}
+ 
+ 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
+ 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
+ 
+ 	while (!_rw_write_lock(rw, tid)) {
+ 		turnstile_lock(&rw->lock_object);
+ 		v = rw->rw_lock;
+ 
+ 		/*
+ 		 * If the lock was released while spinning on the
+ 		 * turnstile chain lock, try again.
+ 		 */
+ 		if (v == RW_UNLOCKED) {
+ 			turnstile_release(&rw->lock_object);
+ 			cpu_spinwait();
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * If the lock was released by a writer with both readers
+ 		 * and writers waiting and a reader hasn't woken up and
+ 		 * acquired the lock yet, rw_lock will be set to the
+ 		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
+ 		 * that value, try to acquire it once.  Note that we have
+ 		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
+ 		 * other writers waiting still.  If we fail, restart the
+ 		 * loop.
+ 		 */
+ 		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
+ 			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
+ 			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
+ 			    tid | RW_LOCK_WRITE_WAITERS)) {
+ 				turnstile_claim(&rw->lock_object);
+ 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
+ 				    __func__, rw);
+ 				break;
+ 			}
+ 			turnstile_release(&rw->lock_object);
+ 			cpu_spinwait();
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
+ 		 * set it.  If we fail to set it, then loop back and try
+ 		 * again.
+ 		 */
+ 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
+ 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
+ 			    v | RW_LOCK_WRITE_WAITERS)) {
+ 				turnstile_release(&rw->lock_object);
+ 				cpu_spinwait();
+ 				continue;
+ 			}
+ 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
+ 				    __func__, rw);
+ 		}
+ 
+ #ifdef ADAPTIVE_RWLOCKS
+ 		/*
+ 		 * If the lock is write locked and the owner is
+ 		 * running on another CPU, spin until the owner stops
+ 		 * running or the state of the lock changes.
+ 		 */
+ 		owner = (struct thread *)RW_OWNER(v);
+ 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
+ 			turnstile_release(&rw->lock_object);
+ 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
+ 				    __func__, rw, owner);
+ 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
+ 			    TD_IS_RUNNING(owner))
+ 				cpu_spinwait();
+ 			continue;
+ 		}
+ #endif
+ 
+ 		/*
+ 		 * We were unable to acquire the lock and the write waiters
+ 		 * flag is set, so we must block on the turnstile.
+ 		 */
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
+ 			    rw);
+ 		turnstile_wait_queue(&rw->lock_object, rw_owner(rw),
+ 		    TS_EXCLUSIVE_QUEUE);
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
+ 			    __func__, rw);
+ 	}
+ }
+ 
+ /*
+  * This function is called if the first try at releasing a write lock failed.
+  * This means that one of the 2 waiter bits must be set indicating that at
+  * least one thread is waiting on this lock.
+  */
+ void
+ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+ {
+ 	struct turnstile *ts;
+ 	uintptr_t v;
+ 	int queue;
+ 
+ 	if (rw_wlocked(rw) && rw_recursed(rw)) {
+ 		if ((--rw->rw_recurse) == 0)
+ 			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_RECURSED);
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
+ 		return;
+ 	}
+ 
+ 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
+ 	    ("%s: neither of the waiter flags are set", __func__));
+ 
+ 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
+ 
+ 	turnstile_lock(&rw->lock_object);
+ 	ts = turnstile_lookup(&rw->lock_object);
+ 
+ #ifdef ADAPTIVE_RWLOCKS
+ 	/*
+ 	 * There might not be a turnstile for this lock if all of
+ 	 * the waiters are adaptively spinning.  In that case, just
+ 	 * reset the lock to the unlocked state and return.
+ 	 */
+ 	if (ts == NULL) {
+ 		atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED);
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw);
+ 		turnstile_release(&rw->lock_object);
+ 		return;
+ 	}
+ #else
+ 	MPASS(ts != NULL);
+ #endif
+ 
+ 	/*
+ 	 * Use the same algo as sx locks for now.  Prefer waking up shared
+ 	 * waiters if we have any over writers.  This is probably not ideal.
+ 	 *
+ 	 * 'v' is the value we are going to write back to rw_lock.  If we
+ 	 * have waiters on both queues, we need to preserve the state of
+ 	 * the waiter flag for the queue we don't wake up.  For now this is
+ 	 * hardcoded for the algorithm mentioned above.
+ 	 *
+ 	 * In the case of both readers and writers waiting we wakeup the
+ 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
+ 	 * new writer comes in before a reader it will claim the lock up
+ 	 * above.  There is probably a potential priority inversion in
+ 	 * there that could be worked around either by waking both queues
+ 	 * of waiters or doing some complicated lock handoff gymnastics.
+ 	 *
+ 	 * Note that in the ADAPTIVE_RWLOCKS case, if both flags are
+ 	 * set, there might not be any actual writers on the turnstile
+ 	 * as they might all be spinning.  In that case, we don't want
+ 	 * to preserve the RW_LOCK_WRITE_WAITERS flag as the turnstile
+ 	 * is going to go away once we wakeup all the readers.
+ 	 */
+ 	v = RW_UNLOCKED;
+ 	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
+ 		queue = TS_SHARED_QUEUE;
+ #ifdef ADAPTIVE_RWLOCKS
+ 		if (rw->rw_lock & RW_LOCK_WRITE_WAITERS &&
+ 		    !turnstile_empty_queue(ts, TS_EXCLUSIVE_QUEUE))
+ 			v |= RW_LOCK_WRITE_WAITERS;
+ #else
+ 		v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
+ #endif
+ 	} else
+ 		queue = TS_EXCLUSIVE_QUEUE;
+ 
+ #ifdef ADAPTIVE_RWLOCKS
+ 	/*
+ 	 * We have to make sure that we actually have waiters to
+ 	 * wakeup.  If they are all spinning, then we just need to
+ 	 * disown the turnstile and return.
+ 	 */
+ 	if (turnstile_empty_queue(ts, queue)) {
+ 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw);
+ 		atomic_store_rel_ptr(&rw->rw_lock, v);
+ 		turnstile_disown(ts);
+ 		turnstile_release(&rw->lock_object);
+ 		return;
+ 	}
+ #endif
+ 
+ 	/* Wake up all waiters for the specific queue. */
+ 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
+ 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
+ 		    queue == TS_SHARED_QUEUE ? "read" : "write");
+ 	turnstile_broadcast_queue(ts, queue);
+ 	atomic_store_rel_ptr(&rw->rw_lock, v);
+ 	turnstile_unpend_queue(ts, TS_EXCLUSIVE_LOCK);
+ 	turnstile_release(&rw->lock_object);
+ }
+ 
+ /*
+  * Attempt to do a non-blocking upgrade from a read lock to a write
+  * lock.  This will only succeed if this thread holds a single read
+  * lock.  Returns true if the upgrade succeeded and false otherwise.
+  */
+ int
+ _rw_try_upgrade(struct rwlock *rw, const char *file, int line)
+ {
+ 	uintptr_t v, tid;
+ 	int success;
+ 
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
+ 	_rw_assert(rw, RA_RLOCKED, file, line);
+ 
+ 	/*
+ 	 * Attempt to switch from one reader to a writer.  If there
+ 	 * are any write waiters, then we will have to lock the
+ 	 * turnstile first to prevent races with another writer
+ 	 * calling turnstile_wait() before we have claimed this
+ 	 * turnstile.  So, do the simple case of no waiters first.
+ 	 */
+ 	tid = (uintptr_t)curthread;
+ 	if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) {
+ 		success = atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
+ 		    tid);
+ 		goto out;
+ 	}
+ 
+ 	/*
+ 	 * Ok, we think we have write waiters, so lock the
+ 	 * turnstile.
+ 	 */
+ 	turnstile_lock(&rw->lock_object);
+ 
+ 	/*
+ 	 * Try to switch from one reader to a writer again.  This time
+ 	 * we honor the current state of the RW_LOCK_WRITE_WAITERS
+ 	 * flag.  If we obtain the lock with the flag set, then claim
+ 	 * ownership of the turnstile.  In the ADAPTIVE_RWLOCKS case
+ 	 * it is possible for there to not be an associated turnstile
+ 	 * even though there are waiters if all of the waiters are
+ 	 * spinning.
+ 	 */
+ 	v = rw->rw_lock & RW_LOCK_WRITE_WAITERS;
+ 	success = atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
+ 	    tid | v);
+ #ifdef ADAPTIVE_RWLOCKS
+ 	if (success && v && turnstile_lookup(&rw->lock_object) != NULL)
+ #else
+ 	if (success && v)
+ #endif
+ 		turnstile_claim(&rw->lock_object);
+ 	else
+ 		turnstile_release(&rw->lock_object);
+ out:
+ 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
+ 	if (success)
+ 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
+ 		    file, line);
+ 	return (success);
+ }
+ 
+ /*
+  * Downgrade a write lock into a single read lock.
+  */
+ void
+ _rw_downgrade(struct rwlock *rw, const char *file, int line)
+ {
+ 	struct turnstile *ts;
+ 	uintptr_t tid, v;
+ 
+ 	KASSERT(rw->rw_lock != RW_DESTROYED,
+ 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
+ 	_rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
+ #ifndef INVARIANTS
+ 	if (rw_recursed(rw))
+ 		panic("downgrade of a recursed lock");
+ #endif
+ 
+ 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
+ 
+ 	/*
+ 	 * Convert from a writer to a single reader.  First we handle
+ 	 * the easy case with no waiters.  If there are any waiters, we
+ 	 * lock the turnstile, "disown" the lock, and awaken any read
+ 	 * waiters.
+ 	 */
+ 	tid = (uintptr_t)curthread;
+ 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
+ 		goto out;
+ 
+ 	/*
+ 	 * Ok, we think we have waiters, so lock the turnstile so we can
+ 	 * read the waiter flags without any races.
+ 	 */
+ 	turnstile_lock(&rw->lock_object);
+ 	v = rw->rw_lock;
+ 	MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS));
+ 
+ 	/*
+ 	 * Downgrade from a write lock while preserving
+ 	 * RW_LOCK_WRITE_WAITERS and give up ownership of the
+ 	 * turnstile.  If there are any read waiters, wake them up.
+ 	 *
+ 	 * For ADAPTIVE_RWLOCKS, we have to allow for the fact that
+ 	 * all of the read waiters might be spinning.  In that case,
+ 	 * act as if RW_LOCK_READ_WAITERS is not set.  Also, only
+ 	 * preserve the RW_LOCK_WRITE_WAITERS flag if at least one
+ 	 * writer is blocked on the turnstile.
+ 	 */
+ 	ts = turnstile_lookup(&rw->lock_object);
+ #ifdef ADAPTIVE_RWLOCKS
+ 	if (ts == NULL)
+ 		v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS);
+ 	else if (v & RW_LOCK_READ_WAITERS &&
+ 	    turnstile_empty_queue(ts, TS_SHARED_QUEUE))
+ 		v &= ~RW_LOCK_READ_WAITERS;
+ 	else if (v & RW_LOCK_WRITE_WAITERS &&
+ 	    turnstile_empty_queue(ts, TS_EXCLUSIVE_QUEUE))
+ 		v &= ~RW_LOCK_WRITE_WAITERS;
+ #else
+ 	MPASS(ts != NULL);
+ #endif
+ 	if (v & RW_LOCK_READ_WAITERS)
+ 		turnstile_broadcast_queue(ts, TS_SHARED_QUEUE);
+ 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) |
+ 	    (v & RW_LOCK_WRITE_WAITERS));
+ 	if (v & RW_LOCK_READ_WAITERS)
+ 		turnstile_unpend_queue(ts, TS_EXCLUSIVE_LOCK);
+ 	else if (ts)
+ 		turnstile_disown(ts);
+ 	turnstile_release(&rw->lock_object);
+ out:
+ 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
+ }
+ 
+ #ifdef INVARIANT_SUPPORT
+ #ifndef INVARIANTS
+ #undef _rw_assert
+ #endif
+ 
+ /*
+  * In the non-WITNESS case, rw_assert() can only detect that at least
+  * *some* thread owns an rlock, but it cannot guarantee that *this*
+  * thread owns an rlock.
+  */
+ void
+ _rw_assert(struct rwlock *rw, int what, const char *file, int line)
+ {
+ 
+ 	if (panicstr != NULL)
+ 		return;
+ 	switch (what) {
+ 	case RA_LOCKED:
+ 	case RA_LOCKED | RA_RECURSED:
+ 	case RA_LOCKED | RA_NOTRECURSED:
+ 	case RA_RLOCKED:
+ #ifdef WITNESS
+ 		witness_assert(&rw->lock_object, what, file, line);
+ #else
+ 		/*
+ 		 * If some other thread has a write lock or we have one
+ 		 * and are asserting a read lock, fail.  Also, if no one
+ 		 * has a lock at all, fail.
+ 		 */
+ 		if (rw->rw_lock == RW_UNLOCKED ||
+ 		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
+ 		    rw_wowner(rw) != curthread)))
+ 			panic("Lock %s not %slocked @ %s:%d\n",
+ 			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
+ 			    "read " : "", file, line);
+ 
+ 		if (!(rw->rw_lock & RW_LOCK_READ)) {
+ 			if (rw_recursed(rw)) {
+ 				if (what & RA_NOTRECURSED)
+ 					panic("Lock %s recursed @ %s:%d\n",
+ 					    rw->lock_object.lo_name, file,
+ 					    line);
+ 			} else if (what & RA_RECURSED)
+ 				panic("Lock %s not recursed @ %s:%d\n",
+ 				    rw->lock_object.lo_name, file, line);
+ 		}
+ #endif
+ 		break;
+ 	case RA_WLOCKED:
+ 	case RA_WLOCKED | RA_RECURSED:
+ 	case RA_WLOCKED | RA_NOTRECURSED:
+ 		if (rw_wowner(rw) != curthread)
+ 			panic("Lock %s not exclusively locked @ %s:%d\n",
+ 			    rw->lock_object.lo_name, file, line);
+ 		if (rw_recursed(rw)) {
+ 			if (what & RA_NOTRECURSED)
+ 				panic("Lock %s recursed @ %s:%d\n",
+ 				    rw->lock_object.lo_name, file, line);
+ 		} else if (what & RA_RECURSED)
+ 			panic("Lock %s not recursed @ %s:%d\n",
+ 			    rw->lock_object.lo_name, file, line);
+ 		break;
+ 	case RA_UNLOCKED:
+ #ifdef WITNESS
+ 		witness_assert(&rw->lock_object, what, file, line);
+ #else
+ 		/*
+ 		 * If we hold a write lock fail.  We can't reliably check
+ 		 * to see if we hold a read lock or not.
+ 		 */
+ 		if (rw_wowner(rw) == curthread)
+ 			panic("Lock %s exclusively locked @ %s:%d\n",
+ 			    rw->lock_object.lo_name, file, line);
+ #endif
+ 		break;
+ 	default:
+ 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
+ 		    line);
+ 	}
+ }
+ #endif /* INVARIANT_SUPPORT */
+ 
+ #ifdef DDB
+ void
+ db_show_rwlock(struct lock_object *lock)
+ {
+ 	struct rwlock *rw;
+ 	struct thread *td;
+ 
+ 	rw = (struct rwlock *)lock;
+ 
+ 	db_printf(" state: ");
+ 	if (rw->rw_lock == RW_UNLOCKED)
+ 		db_printf("UNLOCKED\n");
+ 	else if (rw->rw_lock == RW_DESTROYED) {
+ 		db_printf("DESTROYED\n");
+ 		return;
+ 	} else if (rw->rw_lock & RW_LOCK_READ)
+ 		db_printf("RLOCK: %ju locks\n",
+ 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
+ 	else {
+ 		td = rw_wowner(rw);
+ 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
+ 		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
+ 		if (rw_recursed(rw))
+ 			db_printf(" recursed: %u\n", rw->rw_recurse);
+ 	}
+ 	db_printf(" waiters: ");
+ 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
+ 	case RW_LOCK_READ_WAITERS:
+ 		db_printf("readers\n");
+ 		break;
+ 	case RW_LOCK_WRITE_WAITERS:
+ 		db_printf("writers\n");
+ 		break;
+ 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
+ 		db_printf("readers and writers\n");
+ 		break;
+ 	default:
+ 		db_printf("none\n");
+ 		break;
+ 	}
+ }
+ 
+ #endif
Index: kern/kern_sx.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/kern_sx.c,v
retrieving revision 1.25.2.4
diff -c -r1.25.2.4 kern_sx.c
*** kern/kern_sx.c	17 Aug 2006 19:53:06 -0000	1.25.2.4
--- kern/kern_sx.c	31 Aug 2007 01:48:11 -0000
***************
*** 1,12 ****
  /*-
!  * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>.  All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   * 1. Redistributions of source code must retain the above copyright
   *    notice(s), this list of conditions and the following disclaimer as
!  *    the first lines of this file unmodified other than the possible 
   *    addition of one or more copyright notices.
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice(s), this list of conditions and the following disclaimer in the
--- 1,14 ----
  /*-
!  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
!  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
!  * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   * 1. Redistributions of source code must retain the above copyright
   *    notice(s), this list of conditions and the following disclaimer as
!  *    the first lines of this file unmodified other than the possible
   *    addition of one or more copyright notices.
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice(s), this list of conditions and the following disclaimer in the
***************
*** 26,64 ****
   */
  
  /*
!  * Shared/exclusive locks.  This implementation assures deterministic lock
!  * granting behavior, so that slocks and xlocks are interleaved.
   *
   * Priority propagation will not generally raise the priority of lock holders,
   * so should not be relied upon in combination with sx locks.
   */
  
! #include <sys/cdefs.h>
! __FBSDID("$FreeBSD: src/sys/kern/kern_sx.c,v 1.25.2.4 2006/08/17 19:53:06 jhb Exp $");
! 
  #include "opt_ddb.h"
  
  #include <sys/param.h>
- #include <sys/systm.h>
  #include <sys/ktr.h>
- #include <sys/linker_set.h>
- #include <sys/condvar.h>
  #include <sys/lock.h>
  #include <sys/mutex.h>
  #include <sys/proc.h>
  #include <sys/sx.h>
  
  #ifdef DDB
  #include <ddb/ddb.h>
  
  static void	db_show_sx(struct lock_object *lock);
  #endif
  
  struct lock_class lock_class_sx = {
! 	"sx",
! 	LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
  #ifdef DDB
! 	db_show_sx
  #endif
  };
  
--- 28,117 ----
   */
  
  /*
!  * Shared/exclusive locks.  This implementation attempts to ensure
!  * deterministic lock granting behavior, so that slocks and xlocks are
!  * interleaved.
   *
   * Priority propagation will not generally raise the priority of lock holders,
   * so should not be relied upon in combination with sx locks.
   */
  
! #include "opt_adaptive_sx.h"
  #include "opt_ddb.h"
  
+ #include <sys/cdefs.h>
+ __FBSDID("$FreeBSD: src/sys/kern/kern_sx.c,v 1.54 2007/07/06 13:20:44 attilio Exp $");
+ 
  #include <sys/param.h>
  #include <sys/ktr.h>
  #include <sys/lock.h>
  #include <sys/mutex.h>
  #include <sys/proc.h>
+ #include <sys/sleepqueue.h>
  #include <sys/sx.h>
+ #include <sys/systm.h>
+ 
+ #ifdef ADAPTIVE_SX
+ #include <machine/cpu.h>
+ #endif
  
  #ifdef DDB
  #include <ddb/ddb.h>
+ #endif
+ 
+ #if !defined(SMP) && defined(ADAPTIVE_SX)
+ #error "You must have SMP to enable the ADAPTIVE_SX option"
+ #endif
+ 
+ CTASSERT(((SX_ADAPTIVESPIN | SX_RECURSE) & LO_CLASSFLAGS) ==
+     (SX_ADAPTIVESPIN | SX_RECURSE));
+ 
+ /* Handy macros for sleep queues. */
+ #define	SQ_EXCLUSIVE_QUEUE	0
+ #define	SQ_SHARED_QUEUE		1
  
+ /*
+  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
+  * drop Giant anytime we have to sleep or if we adaptively spin.
+  */
+ #define	GIANT_DECLARE							\
+ 	int _giantcnt = 0;						\
+ 	WITNESS_SAVE_DECL(Giant)					\
+ 
+ #define	GIANT_SAVE() do {						\
+ 	if (mtx_owned(&Giant)) {					\
+ 		WITNESS_SAVE(&Giant.mtx_object, Giant);		\
+ 		while (mtx_owned(&Giant)) {				\
+ 			_giantcnt++;					\
+ 			mtx_unlock(&Giant);				\
+ 		}							\
+ 	}								\
+ } while (0)
+ 
+ #define GIANT_RESTORE() do {						\
+ 	if (_giantcnt > 0) {						\
+ 		mtx_assert(&Giant, MA_NOTOWNED);			\
+ 		while (_giantcnt--)					\
+ 			mtx_lock(&Giant);				\
+ 		WITNESS_RESTORE(&Giant.mtx_object, Giant);		\
+ 	}								\
+ } while (0)
+ 
+ /*
+  * Returns true if an exclusive lock is recursed.  It assumes
+  * curthread currently has an exclusive lock.
+  */
+ #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
+ 
+ #ifdef DDB
  static void	db_show_sx(struct lock_object *lock);
  #endif
  
  struct lock_class lock_class_sx = {
! 	.lc_name = "sx",
! 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
  #ifdef DDB
! 	.lc_ddb_show = db_show_sx,
  #endif
  };
  
***************
*** 75,317 ****
  }
  
  void
! sx_init(struct sx *sx, const char *description)
  {
  
! 	sx->sx_lock = mtx_pool_find(mtxpool_lockbuilder, sx);
! 	sx->sx_cnt = 0;
! 	cv_init(&sx->sx_shrd_cv, description);
! 	sx->sx_shrd_wcnt = 0;
! 	cv_init(&sx->sx_excl_cv, description);
! 	sx->sx_excl_wcnt = 0;
! 	sx->sx_xholder = NULL;
! 	lock_init(&sx->sx_object, &lock_class_sx, description, NULL,
! 	    LO_WITNESS | LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE);
  }
  
  void
  sx_destroy(struct sx *sx)
  {
  
! 	KASSERT((sx->sx_cnt == 0 && sx->sx_shrd_wcnt == 0 && sx->sx_excl_wcnt ==
! 	    0), ("%s (%s): holders or waiters\n", __func__,
! 	    sx->sx_object.lo_name));
! 
! 	sx->sx_lock = NULL;
! 	cv_destroy(&sx->sx_shrd_cv);
! 	cv_destroy(&sx->sx_excl_cv);
! 
! 	lock_destroy(&sx->sx_object);
  }
  
! void
! _sx_slock(struct sx *sx, const char *file, int line)
  {
  
! 	mtx_lock(sx->sx_lock);
! 	KASSERT(sx->sx_xholder != curthread,
! 	    ("%s (%s): slock while xlock is held @ %s:%d\n", __func__,
! 	    sx->sx_object.lo_name, file, line));
! 	WITNESS_CHECKORDER(&sx->sx_object, LOP_NEWORDER, file, line);
! 
! 	/*
! 	 * Loop in case we lose the race for lock acquisition.
! 	 */
! 	while (sx->sx_cnt < 0) {
! 		sx->sx_shrd_wcnt++;
! 		cv_wait(&sx->sx_shrd_cv, sx->sx_lock);
! 		sx->sx_shrd_wcnt--;
  	}
  
! 	/* Acquire a shared lock. */
! 	sx->sx_cnt++;
! 
! 	LOCK_LOG_LOCK("SLOCK", &sx->sx_object, 0, 0, file, line);
! 	WITNESS_LOCK(&sx->sx_object, 0, file, line);
! 	curthread->td_locks++;
! 
! 	mtx_unlock(sx->sx_lock);
  }
  
  int
  _sx_try_slock(struct sx *sx, const char *file, int line)
  {
  
! 	mtx_lock(sx->sx_lock);
! 	if (sx->sx_cnt >= 0) {
! 		sx->sx_cnt++;
! 		LOCK_LOG_TRY("SLOCK", &sx->sx_object, 0, 1, file, line);
! 		WITNESS_LOCK(&sx->sx_object, LOP_TRYLOCK, file, line);
  		curthread->td_locks++;
- 		mtx_unlock(sx->sx_lock);
  		return (1);
- 	} else {
- 		LOCK_LOG_TRY("SLOCK", &sx->sx_object, 0, 0, file, line);
- 		mtx_unlock(sx->sx_lock);
- 		return (0);
  	}
  }
  
! void
! _sx_xlock(struct sx *sx, const char *file, int line)
  {
  
! 	mtx_lock(sx->sx_lock);
! 
! 	/*
! 	 * With sx locks, we're absolutely not permitted to recurse on
! 	 * xlocks, as it is fatal (deadlock). Normally, recursion is handled
! 	 * by WITNESS, but as it is not semantically correct to hold the
! 	 * xlock while in here, we consider it API abuse and put it under
! 	 * INVARIANTS.
! 	 */
! 	KASSERT(sx->sx_xholder != curthread,
! 	    ("%s (%s): xlock already held @ %s:%d", __func__,
! 	    sx->sx_object.lo_name, file, line));
! 	WITNESS_CHECKORDER(&sx->sx_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
  	    line);
! 
! 	/* Loop in case we lose the race for lock acquisition. */
! 	while (sx->sx_cnt != 0) {
! 		sx->sx_excl_wcnt++;
! 		cv_wait(&sx->sx_excl_cv, sx->sx_lock);
! 		sx->sx_excl_wcnt--;
  	}
  
! 	MPASS(sx->sx_cnt == 0);
! 
! 	/* Acquire an exclusive lock. */
! 	sx->sx_cnt--;
! 	sx->sx_xholder = curthread;
! 
! 	LOCK_LOG_LOCK("XLOCK", &sx->sx_object, 0, 0, file, line);
! 	WITNESS_LOCK(&sx->sx_object, LOP_EXCLUSIVE, file, line);
! 	curthread->td_locks++;
! 
! 	mtx_unlock(sx->sx_lock);
  }
  
  int
  _sx_try_xlock(struct sx *sx, const char *file, int line)
  {
  
! 	mtx_lock(sx->sx_lock);
! 	if (sx->sx_cnt == 0) {
! 		sx->sx_cnt--;
! 		sx->sx_xholder = curthread;
! 		LOCK_LOG_TRY("XLOCK", &sx->sx_object, 0, 1, file, line);
! 		WITNESS_LOCK(&sx->sx_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file,
! 		    line);
  		curthread->td_locks++;
- 		mtx_unlock(sx->sx_lock);
- 		return (1);
- 	} else {
- 		LOCK_LOG_TRY("XLOCK", &sx->sx_object, 0, 0, file, line);
- 		mtx_unlock(sx->sx_lock);
- 		return (0);
  	}
  }
  
  void
  _sx_sunlock(struct sx *sx, const char *file, int line)
  {
  
! 	_sx_assert(sx, SX_SLOCKED, file, line);
! 	mtx_lock(sx->sx_lock);
  
  	curthread->td_locks--;
! 	WITNESS_UNLOCK(&sx->sx_object, 0, file, line);
  
! 	/* Release. */
! 	sx->sx_cnt--;
  
  	/*
! 	 * If we just released the last shared lock, wake any waiters up, giving
! 	 * exclusive lockers precedence.  In order to make sure that exclusive
! 	 * lockers won't be blocked forever, don't wake shared lock waiters if
! 	 * there are exclusive lock waiters.
  	 */
! 	if (sx->sx_excl_wcnt > 0) {
! 		if (sx->sx_cnt == 0)
! 			cv_signal(&sx->sx_excl_cv);
! 	} else if (sx->sx_shrd_wcnt > 0)
! 		cv_broadcast(&sx->sx_shrd_cv);
! 
! 	LOCK_LOG_LOCK("SUNLOCK", &sx->sx_object, 0, 0, file, line);
! 
! 	mtx_unlock(sx->sx_lock);
  }
  
  void
! _sx_xunlock(struct sx *sx, const char *file, int line)
  {
  
! 	_sx_assert(sx, SX_XLOCKED, file, line);
! 	mtx_lock(sx->sx_lock);
! 	MPASS(sx->sx_cnt == -1);
  
! 	curthread->td_locks--;
! 	WITNESS_UNLOCK(&sx->sx_object, LOP_EXCLUSIVE, file, line);
  
! 	/* Release. */
! 	sx->sx_cnt++;
! 	sx->sx_xholder = NULL;
  
  	/*
! 	 * Wake up waiters if there are any.  Give precedence to slock waiters.
  	 */
! 	if (sx->sx_shrd_wcnt > 0)
! 		cv_broadcast(&sx->sx_shrd_cv);
! 	else if (sx->sx_excl_wcnt > 0)
! 		cv_signal(&sx->sx_excl_cv);
  
! 	LOCK_LOG_LOCK("XUNLOCK", &sx->sx_object, 0, 0, file, line);
  
! 	mtx_unlock(sx->sx_lock);
  }
  
  int
! _sx_try_upgrade(struct sx *sx, const char *file, int line)
  {
  
! 	_sx_assert(sx, SX_SLOCKED, file, line);
! 	mtx_lock(sx->sx_lock);
  
! 	if (sx->sx_cnt == 1) {
! 		sx->sx_cnt = -1;
! 		sx->sx_xholder = curthread;
  
! 		LOCK_LOG_TRY("XUPGRADE", &sx->sx_object, 0, 1, file, line);
! 		WITNESS_UPGRADE(&sx->sx_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
! 		    file, line);
  
! 		mtx_unlock(sx->sx_lock);
! 		return (1);
! 	} else {
! 		LOCK_LOG_TRY("XUPGRADE", &sx->sx_object, 0, 0, file, line);
! 		mtx_unlock(sx->sx_lock);
! 		return (0);
  	}
  }
  
  void
! _sx_downgrade(struct sx *sx, const char *file, int line)
  {
  
! 	_sx_assert(sx, SX_XLOCKED, file, line);
! 	mtx_lock(sx->sx_lock);
! 	MPASS(sx->sx_cnt == -1);
  
! 	WITNESS_DOWNGRADE(&sx->sx_object, 0, file, line);
  
! 	sx->sx_cnt = 1;
! 	sx->sx_xholder = NULL;
!         if (sx->sx_shrd_wcnt > 0)
!                 cv_broadcast(&sx->sx_shrd_cv);
  
! 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->sx_object, 0, 0, file, line);
  
! 	mtx_unlock(sx->sx_lock);
  }
  
  #ifdef INVARIANT_SUPPORT
--- 128,851 ----
  }
  
  void
! sx_init_flags(struct sx *sx, const char *description, int opts)
  {
+ 	struct lock_object *lock;
+ 	int flags;
  
! 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
! 	    SX_NOPROFILE | SX_ADAPTIVESPIN)) == 0);
! 
! 	bzero(sx, sizeof(*sx));
! 
! 	flags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
! 	if (opts & SX_DUPOK)
! 		flags |= LO_DUPOK;
! 	if (!(opts & SX_NOWITNESS))
! 		flags |= LO_WITNESS;
! 	if (opts & SX_QUIET)
! 		flags |= LO_QUIET;
! 
! 	flags |= opts & (SX_ADAPTIVESPIN | SX_RECURSE);
! 	sx->sx_lock = SX_LOCK_UNLOCKED;
! 	sx->sx_recurse = 0;
! 	lock = &sx->lock_object;
! 	lock->lo_class = &lock_class_sx;
! 	lock->lo_flags = flags;
! 	lock->lo_name = lock->lo_type = description;
! 	LOCK_LOG_INIT(lock, opts);
! 	WITNESS_INIT(lock);
  }
  
  void
  sx_destroy(struct sx *sx)
  {
+ 	LOCK_LOG_DESTROY(&sx->lock_object, 0);
  
! 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
! 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
! 	sx->sx_lock = SX_LOCK_DESTROYED;
! 	WITNESS_DESTROY(&sx->lock_object);
  }
  
! int
! _sx_slock(struct sx *sx, int opts, const char *file, int line)
  {
+ 	int error = 0;
  
! 	MPASS(curthread != NULL);
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
! 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line);
! 	error = __sx_slock(sx, opts, file, line);
! 	if (!error) {
! 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
! 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
! 		curthread->td_locks++;
  	}
  
! 	return (error);
  }
  
  int
  _sx_try_slock(struct sx *sx, const char *file, int line)
  {
+ 	uintptr_t x;
  
! 	x = sx->sx_lock;
! 	KASSERT(x != SX_LOCK_DESTROYED,
! 	    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
! 	if ((x & SX_LOCK_SHARED) && atomic_cmpset_acq_ptr(&sx->sx_lock, x,
! 	    x + SX_ONE_SHARER)) {
! 		LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
! 		WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
  		curthread->td_locks++;
  		return (1);
  	}
+ 
+ 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
+ 	return (0);
  }
  
! int
! _sx_xlock(struct sx *sx, int opts, const char *file, int line)
  {
+ 	int error = 0;
  
! 	MPASS(curthread != NULL);
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
! 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
  	    line);
! 	error = __sx_xlock(sx, curthread, opts, file, line);
! 	if (!error) {
! 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
! 		    file, line);
! 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
! 		curthread->td_locks++;
  	}
  
! 	return (error);
  }
  
  int
  _sx_try_xlock(struct sx *sx, const char *file, int line)
  {
+ 	int rval;
  
! 	MPASS(curthread != NULL);
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
! 
! 	if (sx_xlocked(sx) && (sx->lock_object.lo_flags & SX_RECURSE) != 0) {
! 		sx->sx_recurse++;
! 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
! 		rval = 1;
! 	} else
! 		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
! 		    (uintptr_t)curthread);
! 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
! 	if (rval) {
! 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
! 		    file, line);
  		curthread->td_locks++;
  	}
+ 
+ 	return (rval);
  }
  
  void
  _sx_sunlock(struct sx *sx, const char *file, int line)
  {
  
! 	MPASS(curthread != NULL);
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
! 	_sx_assert(sx, SA_SLOCKED, file, line);
! 	curthread->td_locks--;
! 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
! 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
! #ifdef LOCK_PROFILING_SHARED
! 	if (SX_SHARERS(sx->sx_lock) == 1)
! 		lock_profile_release_lock(&sx->lock_object);
! #endif
! 	__sx_sunlock(sx, file, line);
! }
! 
! void
! _sx_xunlock(struct sx *sx, const char *file, int line)
! {
  
+ 	MPASS(curthread != NULL);
+ 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+ 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
+ 	_sx_assert(sx, SA_XLOCKED, file, line);
  	curthread->td_locks--;
! 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
! 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
! 	    line);
! 	if (!sx_recursed(sx))
! 		lock_profile_release_lock(&sx->lock_object);
! 	__sx_xunlock(sx, curthread, file, line);
! }
  
! /*
!  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
!  * This will only succeed if this thread holds a single shared lock.
!  * Return 1 if if the upgrade succeed, 0 otherwise.
!  */
! int
! _sx_try_upgrade(struct sx *sx, const char *file, int line)
! {
! 	uintptr_t x;
! 	int success;
! 
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
! 	_sx_assert(sx, SA_SLOCKED, file, line);
  
  	/*
! 	 * Try to switch from one shared lock to an exclusive lock.  We need
! 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
! 	 * we will wake up the exclusive waiters when we drop the lock.
  	 */
! 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
! 	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
! 	    (uintptr_t)curthread | x);
! 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
! 	if (success)
! 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
! 		    file, line);
! 	return (success);
  }
  
+ /*
+  * Downgrade an unrecursed exclusive lock into a single shared lock.
+  */
  void
! _sx_downgrade(struct sx *sx, const char *file, int line)
  {
+ 	uintptr_t x;
  
! 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
! 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
! 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
! #ifndef INVARIANTS
! 	if (sx_recursed(sx))
! 		panic("downgrade of a recursed lock");
! #endif
  
! 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
  
! 	/*
! 	 * Try to switch from an exclusive lock with no shared waiters
! 	 * to one sharer with no shared waiters.  If there are
! 	 * exclusive waiters, we don't need to lock the sleep queue so
! 	 * long as we preserve the flag.  We do one quick try and if
! 	 * that fails we grab the sleepq lock to keep the flags from
! 	 * changing and do it the slow way.
! 	 *
! 	 * We have to lock the sleep queue if there are shared waiters
! 	 * so we can wake them up.
! 	 */
! 	x = sx->sx_lock;
! 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
! 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
! 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
! 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
! 		return;
! 	}
  
  	/*
! 	 * Lock the sleep queue so we can read the waiters bits
! 	 * without any races and wakeup any shared waiters.
  	 */
! 	sleepq_lock(&sx->lock_object);
  
! 	/*
! 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
! 	 * shared lock.  If there are any shared waiters, wake them up.
! 	 */
! 	x = sx->sx_lock;
! 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
! 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
! 	if (x & SX_LOCK_SHARED_WAITERS)
! 		sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1,
! 		    SQ_SHARED_QUEUE);
! 	else
! 		sleepq_release(&sx->lock_object);
  
! 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
  }
  
+ /*
+  * This function represents the so-called 'hard case' for sx_xlock
+  * operation.  All 'easy case' failures are redirected to this.  Note
+  * that ideally this would be a static function, but it needs to be
+  * accessible from at least sx.h.
+  */
  int
! _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
!     int line)
  {
+ 	GIANT_DECLARE;
+ #ifdef ADAPTIVE_SX
+ 	volatile struct thread *owner;
+ #endif
+ 	/* uint64_t waittime = 0; */
+ 	uintptr_t x;
+ 	int /* contested = 0, */error = 0;
+ 
+ 	/* If we already hold an exclusive lock, then recurse. */
+ 	if (sx_xlocked(sx)) {
+ 		KASSERT((sx->lock_object.lo_flags & SX_RECURSE) != 0,
+ 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
+ 		    sx->lock_object.lo_name, file, line));
+ 		sx->sx_recurse++;
+ 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
+ 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
+ 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
+ 		return (0);
+ 	}
  
! 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
! 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
  
! 	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
! #ifdef ADAPTIVE_SX
! 		/*
! 		 * If the lock is write locked and the owner is
! 		 * running on another CPU, spin until the owner stops
! 		 * running or the state of the lock changes.
! 		 */
! 		x = sx->sx_lock;
! 		if (!(x & SX_LOCK_SHARED) &&
! 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
! 			x = SX_OWNER(x);
! 			owner = (struct thread *)x;
! 			if (TD_IS_RUNNING(owner)) {
! 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 					CTR3(KTR_LOCK,
! 					    "%s: spinning on %p held by %p",
! 					    __func__, sx, owner);
! 				GIANT_SAVE();
! 				lock_profile_obtain_lock_failed(
! 				    &sx->lock_object, &contested, &waittime);
! 				while (SX_OWNER(sx->sx_lock) == x &&
! 				    TD_IS_RUNNING(owner))
! 					cpu_spinwait();
! 				continue;
! 			}
! 		}
! #endif
  
! 		sleepq_lock(&sx->lock_object);
! 		x = sx->sx_lock;
  
! 		/*
! 		 * If the lock was released while spinning on the
! 		 * sleep queue chain lock, try again.
! 		 */
! 		if (x == SX_LOCK_UNLOCKED) {
! 			sleepq_release(&sx->lock_object);
! 			continue;
! 		}
! 
! #ifdef ADAPTIVE_SX
! 		/*
! 		 * The current lock owner might have started executing
! 		 * on another CPU (or the lock could have changed
! 		 * owners) while we were waiting on the sleep queue
! 		 * chain lock.  If so, drop the sleep queue lock and try
! 		 * again.
! 		 */
! 		if (!(x & SX_LOCK_SHARED) &&
! 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
! 			owner = (struct thread *)SX_OWNER(x);
! 			if (TD_IS_RUNNING(owner)) {
! 				sleepq_release(&sx->lock_object);
! 				continue;
! 			}
! 		}
! #endif
! 
! 		/*
! 		 * If an exclusive lock was released with both shared
! 		 * and exclusive waiters and a shared waiter hasn't
! 		 * woken up and acquired the lock yet, sx_lock will be
! 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
! 		 * If we see that value, try to acquire it once.  Note
! 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
! 		 * as there are other exclusive waiters still.  If we
! 		 * fail, restart the loop.
! 		 */
! 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
! 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
! 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
! 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
! 				sleepq_release(&sx->lock_object);
! 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
! 				    __func__, sx);
! 				break;
! 			}
! 			sleepq_release(&sx->lock_object);
! 			continue;
! 		}
! 
! 		/*
! 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
! 		 * than loop back and retry.
! 		 */
! 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
! 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
! 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
! 				sleepq_release(&sx->lock_object);
! 				continue;
! 			}
! 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
! 				    __func__, sx);
! 		}
! 
! 		/*
! 		 * Since we have been unable to acquire the exclusive
! 		 * lock and the exclusive waiters flag is set, we have
! 		 * to sleep.
! 		 */
! #if 0
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
! 			    __func__, sx);
! #endif
! 
! 		GIANT_SAVE();
! 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
! 		    &waittime);
! 		sleepq_add_queue(&sx->lock_object, NULL, sx->lock_object.lo_name,
! 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
! 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
! 		if (!(opts & SX_INTERRUPTIBLE))
! 			sleepq_wait(&sx->lock_object);
! 		else
! 			error = sleepq_wait_sig(&sx->lock_object);
! 
! 		if (error) {
! 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 				CTR2(KTR_LOCK,
! 			"%s: interruptible sleep by %p suspended by signal",
! 				    __func__, sx);
! 			break;
! 		}
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
! 			    __func__, sx);
  	}
+ 
+ 	GIANT_RESTORE();
+ 	if (!error)
+ 		lock_profile_obtain_lock_success(&sx->lock_object, contested,
+ 		    waittime, file, line);
+ 	return (error);
  }
  
+ /*
+  * This function represents the so-called 'hard case' for sx_xunlock
+  * operation.  All 'easy case' failures are redirected to this.  Note
+  * that ideally this would be a static function, but it needs to be
+  * accessible from at least sx.h.
+  */
  void
! _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
! {
! 	uintptr_t x;
! 	int queue;
! 
! 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
! 
! 	/* If the lock is recursed, then unrecurse one level. */
! 	if (sx_xlocked(sx) && sx_recursed(sx)) {
! 		if ((--sx->sx_recurse) == 0)
! 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
! 		return;
! 	}
! 	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
! 	    SX_LOCK_EXCLUSIVE_WAITERS));
! 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
! 
! 	sleepq_lock(&sx->lock_object);
! 	x = SX_LOCK_UNLOCKED;
! 
! 	/*
! 	 * The wake up algorithm here is quite simple and probably not
! 	 * ideal.  It gives precedence to shared waiters if they are
! 	 * present.  For this condition, we have to preserve the
! 	 * state of the exclusive waiters flag.
! 	 */
! 	if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
! 		queue = SQ_SHARED_QUEUE;
! 		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
! 	} else
! 		queue = SQ_EXCLUSIVE_QUEUE;
! 
! 	/* Wake up all the waiters for the specific queue. */
! 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
! 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
! 		    "exclusive");
! 	atomic_store_rel_ptr(&sx->sx_lock, x);
! 	sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1, queue);
! }
! 
! /*
!  * This function represents the so-called 'hard case' for sx_slock
!  * operation.  All 'easy case' failures are redirected to this.  Note
!  * that ideally this would be a static function, but it needs to be
!  * accessible from at least sx.h.
!  */
! int
! _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
! {
! 	GIANT_DECLARE;
! #ifdef ADAPTIVE_SX
! 	volatile struct thread *owner;
! #endif
! #ifdef LOCK_PROFILING_SHARED
! 	uint64_t waittime = 0;
! 	int contested = 0;
! #endif
! 	uintptr_t x;
! 	int error = 0;
! 
! 	/*
! 	 * As with rwlocks, we don't make any attempt to try to block
! 	 * shared locks once there is an exclusive waiter.
! 	 */
! 	for (;;) {
! 		x = sx->sx_lock;
! 
! 		/*
! 		 * If no other thread has an exclusive lock then try to bump up
! 		 * the count of sharers.  Since we have to preserve the state
! 		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
! 		 * shared lock loop back and retry.
! 		 */
! 		if (x & SX_LOCK_SHARED) {
! 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
! 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
! 			    x + SX_ONE_SHARER)) {
! #ifdef LOCK_PROFILING_SHARED
! 				if (SX_SHARERS(x) == 0)
! 					lock_profile_obtain_lock_success(
! 					    &sx->lock_object, contested,
! 					    waittime, file, line);
! #endif
! 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 					CTR4(KTR_LOCK,
! 					    "%s: %p succeed %p -> %p", __func__,
! 					    sx, (void *)x,
! 					    (void *)(x + SX_ONE_SHARER));
! 				break;
! 			}
! 			continue;
! 		}
! 
! #ifdef ADAPTIVE_SX
! 		/*
! 		 * If the owner is running on another CPU, spin until
! 		 * the owner stops running or the state of the lock
! 		 * changes.
! 		 */
! 		else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
! 			x = SX_OWNER(x);
! 			owner = (struct thread *)x;
! 			if (TD_IS_RUNNING(owner)) {
! 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 					CTR3(KTR_LOCK,
! 					    "%s: spinning on %p held by %p",
! 					    __func__, sx, owner);
! 				GIANT_SAVE();
! #ifdef LOCK_PROFILING_SHARED
! 				lock_profile_obtain_lock_failed(
! 				    &sx->lock_object, &contested, &waittime);
! #endif
! 				while (SX_OWNER(sx->sx_lock) == x &&
! 				    TD_IS_RUNNING(owner))
! 					cpu_spinwait();
! 				continue;
! 			}
! 		}
! #endif
! 
! 		/*
! 		 * Some other thread already has an exclusive lock, so
! 		 * start the process of blocking.
! 		 */
! 		sleepq_lock(&sx->lock_object);
! 		x = sx->sx_lock;
! 
! 		/*
! 		 * The lock could have been released while we spun.
! 		 * In this case loop back and retry.
! 		 */
! 		if (x & SX_LOCK_SHARED) {
! 			sleepq_release(&sx->lock_object);
! 			continue;
! 		}
! 
! #ifdef ADAPTIVE_SX
! 		/*
! 		 * If the owner is running on another CPU, spin until
! 		 * the owner stops running or the state of the lock
! 		 * changes.
! 		 */
! 		if (!(x & SX_LOCK_SHARED) &&
! 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
! 			owner = (struct thread *)SX_OWNER(x);
! 			if (TD_IS_RUNNING(owner)) {
! 				sleepq_release(&sx->lock_object);
! 				continue;
! 			}
! 		}
! #endif
! 
! 		/*
! 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
! 		 * fail to set it drop the sleep queue lock and loop
! 		 * back.
! 		 */
! 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
! 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
! 			    x | SX_LOCK_SHARED_WAITERS)) {
! 				sleepq_release(&sx->lock_object);
! 				continue;
! 			}
! 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
! 				    __func__, sx);
! 		}
! 
! 		/*
! 		 * Since we have been unable to acquire the shared lock,
! 		 * we have to sleep.
! 		 */
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
! 			    __func__, sx);
! 
! 		GIANT_SAVE();
! #ifdef LOCK_PROFILING_SHARED
! 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
! 		    &waittime);
! #endif
! 		sleepq_add_queue(&sx->lock_object, NULL, sx->lock_object.lo_name,
! 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
! 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
! 		if (!(opts & SX_INTERRUPTIBLE))
! 			sleepq_wait(&sx->lock_object);
! 		else
! 			error = sleepq_wait_sig(&sx->lock_object);
! 
! 		if (error) {
! 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 				CTR2(KTR_LOCK,
! 			"%s: interruptible sleep by %p suspended by signal",
! 				    __func__, sx);
! 			break;
! 		}
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
! 			    __func__, sx);
! 	}
! 
! 	GIANT_RESTORE();
! 	return (error);
! }
! 
! /*
!  * This function represents the so-called 'hard case' for sx_sunlock
!  * operation.  All 'easy case' failures are redirected to this.  Note
!  * that ideally this would be a static function, but it needs to be
!  * accessible from at least sx.h.
!  */
! void
! _sx_sunlock_hard(struct sx *sx, const char *file, int line)
  {
+ 	uintptr_t x;
+ 
+ 	for (;;) {
+ 		x = sx->sx_lock;
+ 
+ 		/*
+ 		 * We should never have sharers while at least one thread
+ 		 * holds a shared lock.
+ 		 */
+ 		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
+ 		    ("%s: waiting sharers", __func__));
  
! 		/*
! 		 * See if there is more than one shared lock held.  If
! 		 * so, just drop one and return.
! 		 */
! 		if (SX_SHARERS(x) > 1) {
! 			if (atomic_cmpset_ptr(&sx->sx_lock, x,
! 			    x - SX_ONE_SHARER)) {
! 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 					CTR4(KTR_LOCK,
! 					    "%s: %p succeeded %p -> %p",
! 					    __func__, sx, (void *)x,
! 					    (void *)(x - SX_ONE_SHARER));
! 				break;
! 			}
! 			continue;
! 		}
  
! 		/*
! 		 * If there aren't any waiters for an exclusive lock,
! 		 * then try to drop it quickly.
! 		 */
! 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
! 			MPASS(x == SX_SHARERS_LOCK(1));
! 			if (atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1),
! 			    SX_LOCK_UNLOCKED)) {
! 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 					CTR2(KTR_LOCK, "%s: %p last succeeded",
! 					    __func__, sx);
! 				break;
! 			}
! 			continue;
! 		}
  
! 		/*
! 		 * At this point, there should just be one sharer with
! 		 * exclusive waiters.
! 		 */
! 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
  
! 		sleepq_lock(&sx->lock_object);
  
! 		/*
! 		 * Wake up semantic here is quite simple:
! 		 * Just wake up all the exclusive waiters.
! 		 * Note that the state of the lock could have changed,
! 		 * so if it fails loop back and retry.
! 		 */
! 		if (!atomic_cmpset_ptr(&sx->sx_lock,
! 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
! 		    SX_LOCK_UNLOCKED)) {
! 			sleepq_release(&sx->lock_object);
! 			continue;
! 		}
! 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
! 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
! 			    "exclusive queue", __func__, sx);
! 		sleepq_broadcast_queue(&sx->lock_object, SLEEPQ_SX, -1,
! 		    SQ_EXCLUSIVE_QUEUE);
! 		break;
! 	}
  }
  
  #ifdef INVARIANT_SUPPORT
***************
*** 327,370 ****
  void
  _sx_assert(struct sx *sx, int what, const char *file, int line)
  {
  
  	if (panicstr != NULL)
  		return;
  	switch (what) {
! 	case SX_LOCKED:
! 	case SX_SLOCKED:
  #ifdef WITNESS
! 		witness_assert(&sx->sx_object, what, file, line);
  #else
! 		mtx_lock(sx->sx_lock);
! 		if (sx->sx_cnt <= 0 &&
! 		    (what == SX_SLOCKED || sx->sx_xholder != curthread))
  			panic("Lock %s not %slocked @ %s:%d\n",
! 			    sx->sx_object.lo_name, (what == SX_SLOCKED) ?
! 			    "share " : "", file, line);
! 		mtx_unlock(sx->sx_lock);
  #endif
  		break;
! 	case SX_XLOCKED:
! 		mtx_lock(sx->sx_lock);
! 		if (sx->sx_xholder != curthread)
  			panic("Lock %s not exclusively locked @ %s:%d\n",
! 			    sx->sx_object.lo_name, file, line);
! 		mtx_unlock(sx->sx_lock);
  		break;
! 	case SX_UNLOCKED:
  #ifdef WITNESS
! 		witness_assert(&sx->sx_object, what, file, line);
  #else
  		/*
! 		 * We are able to check only exclusive lock here,
! 		 * we cannot assert that *this* thread owns slock.
  		 */
! 		mtx_lock(sx->sx_lock);
! 		if (sx->sx_xholder == curthread)
  			panic("Lock %s exclusively locked @ %s:%d\n",
! 			    sx->sx_object.lo_name, file, line);
! 		mtx_unlock(sx->sx_lock);
  #endif
  		break;
  	default:
--- 861,936 ----
  void
  _sx_assert(struct sx *sx, int what, const char *file, int line)
  {
+ #ifndef WITNESS
+ 	int slocked = 0;
+ #endif
  
  	if (panicstr != NULL)
  		return;
  	switch (what) {
! 	case SA_SLOCKED:
! 	case SA_SLOCKED | SA_NOTRECURSED:
! 	case SA_SLOCKED | SA_RECURSED:
! #ifndef WITNESS
! 		slocked = 1;
! 		/* FALLTHROUGH */
! #endif
! 	case SA_LOCKED:
! 	case SA_LOCKED | SA_NOTRECURSED:
! 	case SA_LOCKED | SA_RECURSED:
  #ifdef WITNESS
! 		witness_assert(&sx->lock_object, what, file, line);
  #else
! 		/*
! 		 * If some other thread has an exclusive lock or we
! 		 * have one and are asserting a shared lock, fail.
! 		 * Also, if no one has a lock at all, fail.
! 		 */
! 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
! 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
! 		    sx_xholder(sx) != curthread)))
  			panic("Lock %s not %slocked @ %s:%d\n",
! 			    sx->lock_object.lo_name, slocked ? "share " : "",
! 			    file, line);
! 
! 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
! 			if (sx_recursed(sx)) {
! 				if (what & SA_NOTRECURSED)
! 					panic("Lock %s recursed @ %s:%d\n",
! 					    sx->lock_object.lo_name, file,
! 					    line);
! 			} else if (what & SA_RECURSED)
! 				panic("Lock %s not recursed @ %s:%d\n",
! 				    sx->lock_object.lo_name, file, line);
! 		}
  #endif
  		break;
! 	case SA_XLOCKED:
! 	case SA_XLOCKED | SA_NOTRECURSED:
! 	case SA_XLOCKED | SA_RECURSED:
! 		if (sx_xholder(sx) != curthread)
  			panic("Lock %s not exclusively locked @ %s:%d\n",
! 			    sx->lock_object.lo_name, file, line);
! 		if (sx_recursed(sx)) {
! 			if (what & SA_NOTRECURSED)
! 				panic("Lock %s recursed @ %s:%d\n",
! 				    sx->lock_object.lo_name, file, line);
! 		} else if (what & SA_RECURSED)
! 			panic("Lock %s not recursed @ %s:%d\n",
! 			    sx->lock_object.lo_name, file, line);
  		break;
! 	case SA_UNLOCKED:
  #ifdef WITNESS
! 		witness_assert(&sx->lock_object, what, file, line);
  #else
  		/*
! 		 * If we hold an exclusve lock fail.  We can't
! 		 * reliably check to see if we hold a shared lock or
! 		 * not.
  		 */
! 		if (sx_xholder(sx) == curthread)
  			panic("Lock %s exclusively locked @ %s:%d\n",
! 			    sx->lock_object.lo_name, file, line);
  #endif
  		break;
  	default:
***************
*** 375,381 ****
  #endif	/* INVARIANT_SUPPORT */
  
  #ifdef DDB
! void
  db_show_sx(struct lock_object *lock)
  {
  	struct thread *td;
--- 941,947 ----
  #endif	/* INVARIANT_SUPPORT */
  
  #ifdef DDB
! static void
  db_show_sx(struct lock_object *lock)
  {
  	struct thread *td;
***************
*** 384,399 ****
  	sx = (struct sx *)lock;
  
  	db_printf(" state: ");
! 	if (sx->sx_cnt < 0) {
! 		td = sx->sx_xholder;
  		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
  		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
! 	} else if (sx->sx_cnt > 0)
! 		db_printf("SLOCK: %d locks\n", sx->sx_cnt);
! 	else
! 		db_printf("UNLOCKED\n");
! 	db_printf(" waiters: %d shared, %d exclusive\n", sx->sx_shrd_wcnt,
! 	    sx->sx_excl_wcnt);
  }
  
  /*
--- 950,985 ----
  	sx = (struct sx *)lock;
  
  	db_printf(" state: ");
! 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
! 		db_printf("UNLOCKED\n");
! 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
! 		db_printf("DESTROYED\n");
! 		return;
! 	} else if (sx->sx_lock & SX_LOCK_SHARED)
! 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
! 	else {
! 		td = sx_xholder(sx);
  		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
  		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
! 		if (sx_recursed(sx))
! 			db_printf(" recursed: %d\n", sx->sx_recurse);
! 	}
! 
! 	db_printf(" waiters: ");
! 	switch(sx->sx_lock &
! 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
! 	case SX_LOCK_SHARED_WAITERS:
! 		db_printf("shared\n");
! 		break;
! 	case SX_LOCK_EXCLUSIVE_WAITERS:
! 		db_printf("exclusive\n");
! 		break;
! 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
! 		db_printf("exclusive and shared\n");
! 		break;
! 	default:
! 		db_printf("none\n");
! 	}
  }
  
  /*
***************
*** 405,451 ****
  sx_chain(struct thread *td, struct thread **ownerp)
  {
  	struct sx *sx;
- 	struct cv *cv;
  
  	/*
! 	 * First, see if it looks like td is blocked on a condition
! 	 * variable.
  	 */
! 	cv = td->td_wchan;
! 	if (cv->cv_description != td->td_wmesg)
  		return (0);
  
- 	/*
- 	 * Ok, see if it looks like td is blocked on the exclusive
- 	 * condition variable.
- 	 */
- 	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_excl_cv));
- 	if (LOCK_CLASS(&sx->sx_object) == &lock_class_sx &&
- 	    sx->sx_excl_wcnt > 0)
- 		goto ok;
- 
- 	/*
- 	 * Second, see if it looks like td is blocked on the shared
- 	 * condition variable.
- 	 */
- 	sx = (struct sx *)((char *)cv - offsetof(struct sx, sx_shrd_cv));
- 	if (LOCK_CLASS(&sx->sx_object) == &lock_class_sx &&
- 	    sx->sx_shrd_wcnt > 0)
- 		goto ok;
- 
- 	/* Doesn't seem to be an sx lock. */
- 	return (0);
- 
- ok:
  	/* We think we have an sx lock, so output some details. */
  	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
! 	if (sx->sx_cnt >= 0) {
! 		db_printf("SLOCK (count %d)\n", sx->sx_cnt);
! 		*ownerp = NULL;
! 	} else {
  		db_printf("XLOCK\n");
- 		*ownerp = sx->sx_xholder;
- 	}
  	return (1);
  }
  #endif
--- 991,1016 ----
  sx_chain(struct thread *td, struct thread **ownerp)
  {
  	struct sx *sx;
  
  	/*
! 	 * Check to see if this thread is blocked on an sx lock.
! 	 * First, we check the lock class.  If that is ok, then we
! 	 * compare the lock name against the wait message.
  	 */
! #define LOCK_CLASS(lo)	(lo)->lo_class
! 	sx = td->td_wchan;
! 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
! 	    sx->lock_object.lo_name != td->td_wmesg)
  		return (0);
  
  	/* We think we have an sx lock, so output some details. */
  	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
! 	*ownerp = sx_xholder(sx);
! 	if (sx->sx_lock & SX_LOCK_SHARED)
! 		db_printf("SLOCK (count %ju)\n",
! 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
! 	else
  		db_printf("XLOCK\n");
  	return (1);
  }
  #endif
Index: kern/kern_thread.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/kern_thread.c,v
retrieving revision 1.216.2.6
diff -c -r1.216.2.6 kern_thread.c
*** kern/kern_thread.c	2 Sep 2006 17:29:57 -0000	1.216.2.6
--- kern/kern_thread.c	2 Sep 2007 21:56:36 -0000
***************
*** 305,311 ****
  
  	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
  	    thread_ctor, thread_dtor, thread_init, thread_fini,
! 	    UMA_ALIGN_CACHE, 0);
  	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
  	    ksegrp_ctor, NULL, NULL, NULL,
  	    UMA_ALIGN_CACHE, 0);
--- 305,311 ----
  
  	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
  	    thread_ctor, thread_dtor, thread_init, thread_fini,
! 	    THREAD_ALIGN, 0);
  	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
  	    ksegrp_ctor, NULL, NULL, NULL,
  	    UMA_ALIGN_CACHE, 0);
Index: kern/subr_sleepqueue.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/subr_sleepqueue.c,v
retrieving revision 1.18.2.4
diff -c -r1.18.2.4 subr_sleepqueue.c
*** kern/subr_sleepqueue.c	17 Aug 2006 19:53:06 -0000	1.18.2.4
--- kern/subr_sleepqueue.c	4 Sep 2007 01:28:11 -0000
***************
*** 82,87 ****
--- 82,93 ----
  #include <ddb/ddb.h>
  #endif
  
+ #include <vm/uma.h>
+ 
+ #ifdef DDB
+ #include <ddb/ddb.h>
+ #endif
+ 
  /*
   * Constants for the hash table of sleep queue chains.  These constants are
   * the same ones that 4BSD (and possibly earlier versions of BSD) used.
***************
*** 94,100 ****
  #define	SC_SHIFT	8
  #define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
  #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
! 
  /*
   * There two different lists of sleep queues.  Both lists are connected
   * via the sq_hash entries.  The first list is the sleep queue chain list
--- 100,106 ----
  #define	SC_SHIFT	8
  #define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
  #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
! #define NR_SLEEPQS      2
  /*
   * There two different lists of sleep queues.  Both lists are connected
   * via the sq_hash entries.  The first list is the sleep queue chain list
***************
*** 114,126 ****
   *  c - sleep queue chain lock
   */
  struct sleepqueue {
! 	TAILQ_HEAD(, thread) sq_blocked;	/* (c) Blocked threads. */
  	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
  	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
  	void	*sq_wchan;			/* (c) Wait channel. */
  #ifdef INVARIANTS
  	int	sq_type;			/* (c) Queue type. */
! 	struct mtx *sq_lock;			/* (c) Associated lock. */
  #endif
  };
  
--- 120,132 ----
   *  c - sleep queue chain lock
   */
  struct sleepqueue {
! 	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
  	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
  	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
  	void	*sq_wchan;			/* (c) Wait channel. */
  #ifdef INVARIANTS
  	int	sq_type;			/* (c) Queue type. */
! 	struct mtx *sq_lock;		/* (c) Associated lock. */
  #endif
  };
  
***************
*** 142,157 ****
      0, "maxmimum depth achieved of a single chain");
  #endif
  static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
! 
! static MALLOC_DEFINE(M_SLEEPQUEUE, "sleep queues", "sleep queues");
  
  /*
   * Prototypes for non-exported routines.
   */
  static int	sleepq_check_timeout(void);
  static void	sleepq_switch(void *wchan);
  static void	sleepq_timeout(void *arg);
- static void	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri);
  
  /*
   * Early initialization of sleep queues that is called from the sleepinit()
--- 148,169 ----
      0, "maxmimum depth achieved of a single chain");
  #endif
  static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
! static uma_zone_t sleepq_zone;
  
  /*
   * Prototypes for non-exported routines.
   */
+ static int	sleepq_catch_signals(void *wchan);
+ static int	sleepq_check_signals(void);
  static int	sleepq_check_timeout(void);
+ #ifdef INVARIANTS
+ static void	sleepq_dtor(void *mem, int size, void *arg);
+ #endif
+ static int	sleepq_init(void *mem, int size, int flags);
+ static void	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
+ 		    int pri);
  static void	sleepq_switch(void *wchan);
  static void	sleepq_timeout(void *arg);
  
  /*
   * Early initialization of sleep queues that is called from the sleepinit()
***************
*** 182,202 ****
  		    NULL);
  #endif
  	}
  	thread0.td_sleepqueue = sleepq_alloc();
  }
  
  /*
!  * Malloc and initialize a new sleep queue for a new thread.
   */
  struct sleepqueue *
  sleepq_alloc(void)
  {
- 	struct sleepqueue *sq;
  
! 	sq = malloc(sizeof(struct sleepqueue), M_SLEEPQUEUE, M_WAITOK | M_ZERO);
! 	TAILQ_INIT(&sq->sq_blocked);
! 	LIST_INIT(&sq->sq_free);
! 	return (sq);
  }
  
  /*
--- 194,217 ----
  		    NULL);
  #endif
  	}
+ 	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
+ #ifdef INVARIANTS
+ 	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
+ #else
+ 	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
+ #endif
+ 	
  	thread0.td_sleepqueue = sleepq_alloc();
  }
  
  /*
!  * Get a sleep queue for a new thread.
   */
  struct sleepqueue *
  sleepq_alloc(void)
  {
  
! 	return (uma_zalloc(sleepq_zone, M_WAITOK));
  }
  
  /*
***************
*** 206,214 ****
  sleepq_free(struct sleepqueue *sq)
  {
  
! 	MPASS(sq != NULL);
! 	MPASS(TAILQ_EMPTY(&sq->sq_blocked));
! 	free(sq, M_SLEEPQUEUE);
  }
  
  /*
--- 221,227 ----
  sleepq_free(struct sleepqueue *sq)
  {
  
! 	uma_zfree(sleepq_zone, sq);
  }
  
  /*
***************
*** 262,268 ****
   * woken up.
   */
  void
! sleepq_add(void *wchan, struct mtx *lock, const char *wmesg, int flags)
  {
  	struct sleepqueue_chain *sc;
  	struct sleepqueue *sq;
--- 275,282 ----
   * woken up.
   */
  void
! sleepq_add_queue(void *wchan, struct mtx *lock, const char *wmesg, int flags,
!     int queue)
  {
  	struct sleepqueue_chain *sc;
  	struct sleepqueue *sq;
***************
*** 273,282 ****
  	mtx_assert(&sc->sc_lock, MA_OWNED);
  	MPASS(td->td_sleepqueue != NULL);
  	MPASS(wchan != NULL);
  
  	/* If this thread is not allowed to sleep, die a horrible death. */
  	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
! 	    ("trying to sleep while sleeping is prohibited"));
  
  	/* Look up the sleep queue associated with the wait channel 'wchan'. */
  	sq = sleepq_lookup(wchan);
--- 287,297 ----
  	mtx_assert(&sc->sc_lock, MA_OWNED);
  	MPASS(td->td_sleepqueue != NULL);
  	MPASS(wchan != NULL);
+ 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  
  	/* If this thread is not allowed to sleep, die a horrible death. */
  	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
! 	    ("Trying sleep, but thread marked as sleeping prohibited"));
  
  	/* Look up the sleep queue associated with the wait channel 'wchan'. */
  	sq = sleepq_lookup(wchan);
***************
*** 287,292 ****
--- 302,320 ----
  	 * into the sleep queue already in use by this wait channel.
  	 */
  	if (sq == NULL) {
+ #ifdef INVARIANTS
+ 		int i;
+ 
+ 		sq = td->td_sleepqueue;
+ 		for (i = 0; i < NR_SLEEPQS; i++)
+ 			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
+ 				("thread's sleep queue %d is not empty", i));
+ 		KASSERT(LIST_EMPTY(&sq->sq_free),
+ 		    ("thread's sleep queue has a non-empty free list"));
+ 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
+ 		sq->sq_lock = lock;
+ 		sq->sq_type = flags & SLEEPQ_TYPE;
+ #endif
  #ifdef SLEEPQUEUE_PROFILING
  		sc->sc_depth++;
  		if (sc->sc_depth > sc->sc_max_depth) {
***************
*** 297,321 ****
  #endif
  		sq = td->td_sleepqueue;
  		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
- 		KASSERT(TAILQ_EMPTY(&sq->sq_blocked),
- 		    ("thread's sleep queue has a non-empty queue"));
- 		KASSERT(LIST_EMPTY(&sq->sq_free),
- 		    ("thread's sleep queue has a non-empty free list"));
- 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  		sq->sq_wchan = wchan;
- #ifdef INVARIANTS
- 		sq->sq_lock = lock;
- 		sq->sq_type = flags & SLEEPQ_TYPE;
- #endif
  	} else {
  		MPASS(wchan == sq->sq_wchan);
  		MPASS(lock == sq->sq_lock);
  		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  	}
! 	TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq);
  	td->td_sleepqueue = NULL;
  	mtx_lock_spin(&sched_lock);
  	td->td_wchan = wchan;
  	td->td_wmesg = wmesg;
  	if (flags & SLEEPQ_INTERRUPTIBLE) {
--- 325,341 ----
  #endif
  		sq = td->td_sleepqueue;
  		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  		sq->sq_wchan = wchan;
  	} else {
  		MPASS(wchan == sq->sq_wchan);
  		MPASS(lock == sq->sq_lock);
  		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  	}
! 	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  	td->td_sleepqueue = NULL;
  	mtx_lock_spin(&sched_lock);
+ 	td->td_sqqueue = queue;
  	td->td_wchan = wchan;
  	td->td_wmesg = wmesg;
  	if (flags & SLEEPQ_INTERRUPTIBLE) {
***************
*** 606,617 ****
  	MPASS(td != NULL);
  	MPASS(sq->sq_wchan != NULL);
  	MPASS(td->td_wchan == sq->sq_wchan);
  	sc = SC_LOOKUP(sq->sq_wchan);
  	mtx_assert(&sc->sc_lock, MA_OWNED);
  	mtx_assert(&sched_lock, MA_OWNED);
  
  	/* Remove the thread from the queue. */
! 	TAILQ_REMOVE(&sq->sq_blocked, td, td_slpq);
  
  	/*
  	 * Get a sleep queue for this thread.  If this is the last waiter,
--- 626,638 ----
  	MPASS(td != NULL);
  	MPASS(sq->sq_wchan != NULL);
  	MPASS(td->td_wchan == sq->sq_wchan);
+ 	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  	sc = SC_LOOKUP(sq->sq_wchan);
  	mtx_assert(&sc->sc_lock, MA_OWNED);
  	mtx_assert(&sched_lock, MA_OWNED);
  
  	/* Remove the thread from the queue. */
! 	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  
  	/*
  	 * Get a sleep queue for this thread.  If this is the last waiter,
***************
*** 652,668 ****
  	setrunnable(td);
  }
  
  /*
   * Find the highest priority thread sleeping on a wait channel and resume it.
   */
  void
! sleepq_signal(void *wchan, int flags, int pri)
  {
  	struct sleepqueue *sq;
  	struct thread *td, *besttd;
  
  	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  	sq = sleepq_lookup(wchan);
  	if (sq == NULL) {
  		sleepq_release(wchan);
--- 673,723 ----
  	setrunnable(td);
  }
  
+ #ifdef INVARIANTS
+ /*
+  * UMA zone item deallocator.
+  */
+ static void
+ sleepq_dtor(void *mem, int size, void *arg)
+ {
+ 	struct sleepqueue *sq;
+ 	int i;
+ 
+ 	sq = mem;
+ 	for (i = 0; i < NR_SLEEPQS; i++)
+ 		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
+ }
+ #endif
+ 
+ /*
+  * UMA zone item initializer.
+  */
+ static int
+ sleepq_init(void *mem, int size, int flags)
+ {
+ 	struct sleepqueue *sq;
+ 	int i;
+ 
+ 	bzero(mem, size);
+ 	sq = mem;
+ 	for (i = 0; i < NR_SLEEPQS; i++)
+ 		TAILQ_INIT(&sq->sq_blocked[i]);
+ 	LIST_INIT(&sq->sq_free);
+ 	return (0);
+ }
+ 
  /*
   * Find the highest priority thread sleeping on a wait channel and resume it.
   */
  void
! sleepq_signal_queue(void *wchan, int flags, int pri, int queue)
  {
  	struct sleepqueue *sq;
  	struct thread *td, *besttd;
  
  	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  	sq = sleepq_lookup(wchan);
  	if (sq == NULL) {
  		sleepq_release(wchan);
***************
*** 678,684 ****
  	 * the tail of sleep queues.
  	 */
  	besttd = NULL;
! 	TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) {
  		if (besttd == NULL || td->td_priority < besttd->td_priority)
  			besttd = td;
  	}
--- 733,739 ----
  	 * the tail of sleep queues.
  	 */
  	besttd = NULL;
! 	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
  		if (besttd == NULL || td->td_priority < besttd->td_priority)
  			besttd = td;
  	}
***************
*** 693,704 ****
   * Resume all threads sleeping on a specified wait channel.
   */
  void
! sleepq_broadcast(void *wchan, int flags, int pri)
  {
  	struct sleepqueue *sq;
  
  	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  	sq = sleepq_lookup(wchan);
  	if (sq == NULL) {
  		sleepq_release(wchan);
--- 748,760 ----
   * Resume all threads sleeping on a specified wait channel.
   */
  void
! sleepq_broadcast_queue(void *wchan, int flags, int pri, int queue)
  {
  	struct sleepqueue *sq;
  
  	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  	sq = sleepq_lookup(wchan);
  	if (sq == NULL) {
  		sleepq_release(wchan);
***************
*** 709,716 ****
  
  	/* Resume all blocked threads on the sleep queue. */
  	mtx_lock_spin(&sched_lock);
! 	while (!TAILQ_EMPTY(&sq->sq_blocked))
! 		sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked), pri);
  	mtx_unlock_spin(&sched_lock);
  	sleepq_release(wchan);
  }
--- 765,773 ----
  
  	/* Resume all blocked threads on the sleep queue. */
  	mtx_lock_spin(&sched_lock);
! 	while (!TAILQ_EMPTY(&sq->sq_blocked[queue]))
! 		sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked[queue]),
! 		    pri);
  	mtx_unlock_spin(&sched_lock);
  	sleepq_release(wchan);
  }
***************
*** 853,913 ****
  	mtx_lock_spin(&sched_lock);
  }
  
- #ifdef DDB
- DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
- {
- 	struct sleepqueue_chain *sc;
- 	struct sleepqueue *sq;
- #ifdef INVARIANTS
- 	struct lock_object *lock;
- #endif
- 	struct thread *td;
- 	void *wchan;
- 	int i;
- 
- 	if (!have_addr)
- 		return;
- 
- 	/*
- 	 * First, see if there is an active sleep queue for the wait channel
- 	 * indicated by the address.
- 	 */
- 	wchan = (void *)addr;
- 	sc = SC_LOOKUP(wchan);
- 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
- 		if (sq->sq_wchan == wchan)
- 			goto found;
- 
- 	/*
- 	 * Second, see if there is an active sleep queue at the address
- 	 * indicated.
- 	 */
- 	for (i = 0; i < SC_TABLESIZE; i++)
- 		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
- 			if (sq == (struct sleepqueue *)addr)
- 				goto found;
- 		}
- 
- 	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
- 	return;
- found:
- 	db_printf("Wait channel: %p\n", sq->sq_wchan);
- #ifdef INVARIANTS
- 	db_printf("Queue type: %d\n", sq->sq_type);
- 	if (sq->sq_lock) {
- 		lock = &sq->sq_lock->mtx_object;
- 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
- 		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
- 	}
- #endif
- 	db_printf("Blocked threads:\n");
- 	if (TAILQ_EMPTY(&sq->sq_blocked))
- 		db_printf("\tempty\n");
- 	else
- 		TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) {
- 			db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
- 			    td->td_tid, td->td_proc->p_pid,
- 			    td->td_proc->p_comm);
- 		}	
- }
- #endif
--- 910,912 ----
Index: kern/subr_turnstile.c
===================================================================
RCS file: /cvs/ncvs/src/sys/kern/subr_turnstile.c,v
retrieving revision 1.152.2.5
diff -c -r1.152.2.5 subr_turnstile.c
*** kern/subr_turnstile.c	23 Jan 2007 22:16:33 -0000	1.152.2.5
--- kern/subr_turnstile.c	31 Aug 2007 02:15:23 -0000
***************
*** 114,120 ****
   *  q - td_contested lock
   */
  struct turnstile {
! 	TAILQ_HEAD(, thread) ts_blocked;	/* (c + q) Blocked threads. */
  	TAILQ_HEAD(, thread) ts_pending;	/* (c) Pending threads. */
  	LIST_ENTRY(turnstile) ts_hash;		/* (c) Chain and free list. */
  	LIST_ENTRY(turnstile) ts_link;		/* (q) Contested locks. */
--- 114,121 ----
   *  q - td_contested lock
   */
  struct turnstile {
! 	/* struct mtx ts_lock;	*/		/* Spin lock for self. */
! 	TAILQ_HEAD(, thread) ts_blocked[2];	/* (c + q) Blocked threads. */
  	TAILQ_HEAD(, thread) ts_pending;	/* (c) Pending threads. */
  	LIST_ENTRY(turnstile) ts_hash;		/* (c) Chain and free list. */
  	LIST_ENTRY(turnstile) ts_link;		/* (q) Contested locks. */
***************
*** 143,148 ****
--- 144,155 ----
  static struct mtx td_contested_lock;
  static struct turnstile_chain turnstile_chains[TC_TABLESIZE];
  
+ /* XXX: stats, remove me */
+ static u_int turnstile_nullowners;
+ SYSCTL_UINT(_debug, OID_AUTO, turnstile_nullowners, CTLFLAG_RD,
+     &turnstile_nullowners, 0, "called with null owner on a shared queue");
+ 
+ 
  static MALLOC_DEFINE(M_TURNSTILE, "turnstiles", "turnstiles");
  
  /*
***************
*** 267,272 ****
--- 274,280 ----
  {
  	struct turnstile_chain *tc;
  	struct thread *td1, *td2;
+ 	int queue;
  
  	mtx_assert(&sched_lock, MA_OWNED);
  	MPASS(TD_ON_LOCK(td));
***************
*** 300,315 ****
  		 * Remove thread from blocked chain and determine where
  		 * it should be moved to.
  		 */
  		mtx_lock_spin(&td_contested_lock);
! 		TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
! 		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) {
  			MPASS(td1->td_proc->p_magic == P_MAGIC);
  			if (td1->td_priority > td->td_priority)
  				break;
  		}
  
  		if (td1 == NULL)
! 			TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
  		else
  			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
  		mtx_unlock_spin(&td_contested_lock);
--- 308,325 ----
  		 * Remove thread from blocked chain and determine where
  		 * it should be moved to.
  		 */
+ 		queue = td->td_tsqueue;
+ 		MPASS(queue == TS_EXCLUSIVE_QUEUE || queue == TS_SHARED_QUEUE);
  		mtx_lock_spin(&td_contested_lock);
! 		TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
! 		TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq) {
  			MPASS(td1->td_proc->p_magic == P_MAGIC);
  			if (td1->td_priority > td->td_priority)
  				break;
  		}
  
  		if (td1 == NULL)
! 			TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
  		else
  			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
  		mtx_unlock_spin(&td_contested_lock);
***************
*** 412,418 ****
  	 * Note that we currently don't try to revoke lent priorities
  	 * when our priority goes up.
  	 */
! 	if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) {
  		mtx_unlock_spin(&tc->tc_lock);
  		critical_enter();
  		propagate_priority(td);
--- 422,431 ----
  	 * Note that we currently don't try to revoke lent priorities
  	 * when our priority goes up.
  	 */
! 	MPASS(td->td_tsqueue == TS_EXCLUSIVE_QUEUE ||
! 	    td->td_tsqueue == TS_SHARED_QUEUE);
! 	if (td == TAILQ_FIRST(&ts->ts_blocked[td->td_tsqueue]) &&
! 	    td->td_priority < oldpri) {
  		mtx_unlock_spin(&tc->tc_lock);
  		critical_enter();
  		propagate_priority(td);
***************
*** 429,436 ****
  {
  
  	mtx_assert(&td_contested_lock, MA_OWNED);
- 	MPASS(owner->td_proc->p_magic == P_MAGIC);
  	MPASS(ts->ts_owner == NULL);
  	ts->ts_owner = owner;
  	LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link);
  }
--- 442,452 ----
  {
  
  	mtx_assert(&td_contested_lock, MA_OWNED);
  	MPASS(ts->ts_owner == NULL);
+ 	if (owner == NULL)
+ 		return;
+ 
+ 	MPASS(owner->td_proc->p_magic == P_MAGIC);
  	ts->ts_owner = owner;
  	LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link);
  }
***************
*** 444,450 ****
  	struct turnstile *ts;
  
  	ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO);
! 	TAILQ_INIT(&ts->ts_blocked);
  	TAILQ_INIT(&ts->ts_pending);
  	LIST_INIT(&ts->ts_free);
  	return (ts);
--- 460,467 ----
  	struct turnstile *ts;
  
  	ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO);
! 	TAILQ_INIT(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
! 	TAILQ_INIT(&ts->ts_blocked[TS_SHARED_QUEUE]);
  	TAILQ_INIT(&ts->ts_pending);
  	LIST_INIT(&ts->ts_free);
  	return (ts);
***************
*** 458,464 ****
  {
  
  	MPASS(ts != NULL);
! 	MPASS(TAILQ_EMPTY(&ts->ts_blocked));
  	MPASS(TAILQ_EMPTY(&ts->ts_pending));
  	free(ts, M_TURNSTILE);
  }
--- 475,482 ----
  {
  
  	MPASS(ts != NULL);
! 	MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]));
! 	MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]));
  	MPASS(TAILQ_EMPTY(&ts->ts_pending));
  	free(ts, M_TURNSTILE);
  }
***************
*** 507,512 ****
--- 525,546 ----
  }
  
  /*
+  * Return a pointer to the thread waiting on this turnstile with the
+  * most important priority or NULL if the turnstile has no waiters.
+  */
+ static struct thread *
+ turnstile_first_waiter(struct turnstile *ts)
+ {
+ 	struct thread *std, *xtd;
+ 
+ 	std = TAILQ_FIRST(&ts->ts_blocked[TS_SHARED_QUEUE]);
+ 	xtd = TAILQ_FIRST(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
+ 	if (xtd == NULL || (std != NULL && std->td_priority < xtd->td_priority))
+ 		return (std);
+ 	return (xtd);
+ }
+ 
+ /*
   * Take ownership of a turnstile and adjust the priority of the new
   * owner appropriately.
   */
***************
*** 527,533 ****
  	turnstile_setowner(ts, owner);
  	mtx_unlock_spin(&td_contested_lock);
  
! 	td = TAILQ_FIRST(&ts->ts_blocked);
  	MPASS(td != NULL);
  	MPASS(td->td_proc->p_magic == P_MAGIC);
  	mtx_unlock_spin(&tc->tc_lock);
--- 561,567 ----
  	turnstile_setowner(ts, owner);
  	mtx_unlock_spin(&td_contested_lock);
  
! 	td = turnstile_first_waiter(ts);
  	MPASS(td != NULL);
  	MPASS(td->td_proc->p_magic == P_MAGIC);
  	mtx_unlock_spin(&tc->tc_lock);
***************
*** 548,554 ****
   * turnstile chain locked and will return with it unlocked.
   */
  void
! turnstile_wait(struct lock_object *lock, struct thread *owner)
  {
  	struct turnstile_chain *tc;
  	struct turnstile *ts;
--- 582,588 ----
   * turnstile chain locked and will return with it unlocked.
   */
  void
! turnstile_wait_queue(struct lock_object *lock, struct thread *owner, int queue)
  {
  	struct turnstile_chain *tc;
  	struct turnstile *ts;
***************
*** 558,565 ****
  	tc = TC_LOOKUP(lock);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  	MPASS(td->td_turnstile != NULL);
! 	MPASS(owner != NULL);
! 	MPASS(owner->td_proc->p_magic == P_MAGIC);
  
  	/* Look up the turnstile associated with the lock 'lock'. */
  	ts = turnstile_lookup(lock);
--- 592,604 ----
  	tc = TC_LOOKUP(lock);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  	MPASS(td->td_turnstile != NULL);
! 	if (owner)
! 		MPASS(owner->td_proc->p_magic == P_MAGIC);
! 	/* XXX: stats, remove me */
! 	if (!owner && queue == TS_SHARED_QUEUE) {
! 		turnstile_nullowners++;
! 	}
! 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
  
  	/* Look up the turnstile associated with the lock 'lock'. */
  	ts = turnstile_lookup(lock);
***************
*** 582,606 ****
  		LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash);
  		KASSERT(TAILQ_EMPTY(&ts->ts_pending),
  		    ("thread's turnstile has pending threads"));
! 		KASSERT(TAILQ_EMPTY(&ts->ts_blocked),
! 		    ("thread's turnstile has a non-empty queue"));
  		KASSERT(LIST_EMPTY(&ts->ts_free),
  		    ("thread's turnstile has a non-empty free list"));
  		KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer"));
  		ts->ts_lockobj = lock;
  		mtx_lock_spin(&td_contested_lock);
! 		TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
  		turnstile_setowner(ts, owner);
  		mtx_unlock_spin(&td_contested_lock);
  	} else {
! 		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq)
  			if (td1->td_priority > td->td_priority)
  				break;
  		mtx_lock_spin(&td_contested_lock);
  		if (td1 != NULL)
  			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
  		else
! 			TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
  		mtx_unlock_spin(&td_contested_lock);
  		MPASS(td->td_turnstile != NULL);
  		LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
--- 621,647 ----
  		LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash);
  		KASSERT(TAILQ_EMPTY(&ts->ts_pending),
  		    ("thread's turnstile has pending threads"));
! 		KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]),
! 		    ("thread's turnstile has exclusive waiters"));
! 		KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]),
! 		    ("thread's turnstile has shared waiters"));
  		KASSERT(LIST_EMPTY(&ts->ts_free),
  		    ("thread's turnstile has a non-empty free list"));
  		KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer"));
  		ts->ts_lockobj = lock;
  		mtx_lock_spin(&td_contested_lock);
! 		TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
  		turnstile_setowner(ts, owner);
  		mtx_unlock_spin(&td_contested_lock);
  	} else {
! 		TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq)
  			if (td1->td_priority > td->td_priority)
  				break;
  		mtx_lock_spin(&td_contested_lock);
  		if (td1 != NULL)
  			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
  		else
! 			TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
  		mtx_unlock_spin(&td_contested_lock);
  		MPASS(td->td_turnstile != NULL);
  		LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
***************
*** 664,670 ****
   * pending list.  This must be called with the turnstile chain locked.
   */
  int
! turnstile_signal(struct turnstile *ts)
  {
  	struct turnstile_chain *tc;
  	struct thread *td;
--- 705,711 ----
   * pending list.  This must be called with the turnstile chain locked.
   */
  int
! turnstile_signal_queue(struct turnstile *ts, int queue)
  {
  	struct turnstile_chain *tc;
  	struct thread *td;
***************
*** 675,689 ****
  	MPASS(ts->ts_owner == curthread);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  
  	/*
  	 * Pick the highest priority thread blocked on this lock and
  	 * move it to the pending list.
  	 */
! 	td = TAILQ_FIRST(&ts->ts_blocked);
  	MPASS(td->td_proc->p_magic == P_MAGIC);
  	mtx_lock_spin(&td_contested_lock);
! 	TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
  	mtx_unlock_spin(&td_contested_lock);
  	TAILQ_INSERT_TAIL(&ts->ts_pending, td, td_lockq);
  
--- 716,733 ----
  	MPASS(ts->ts_owner == curthread);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
+ 	MPASS(ts->ts_owner == curthread ||
+ 	    (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
+ 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
  
  	/*
  	 * Pick the highest priority thread blocked on this lock and
  	 * move it to the pending list.
  	 */
! 	td = TAILQ_FIRST(&ts->ts_blocked[queue]);
  	MPASS(td->td_proc->p_magic == P_MAGIC);
  	mtx_lock_spin(&td_contested_lock);
! 	TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
  	mtx_unlock_spin(&td_contested_lock);
  	TAILQ_INSERT_TAIL(&ts->ts_pending, td, td_lockq);
  
***************
*** 692,698 ****
  	 * give it to the about-to-be-woken thread.  Otherwise take a
  	 * turnstile from the free list and give it to the thread.
  	 */
! 	empty = TAILQ_EMPTY(&ts->ts_blocked);
  	if (empty) {
  		MPASS(LIST_EMPTY(&ts->ts_free));
  #ifdef TURNSTILE_PROFILING
--- 736,743 ----
  	 * give it to the about-to-be-woken thread.  Otherwise take a
  	 * turnstile from the free list and give it to the thread.
  	 */
! 	empty = TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
! 	    TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]);
  	if (empty) {
  		MPASS(LIST_EMPTY(&ts->ts_free));
  #ifdef TURNSTILE_PROFILING
***************
*** 712,718 ****
   * the turnstile chain locked.
   */
  void
! turnstile_broadcast(struct turnstile *ts)
  {
  	struct turnstile_chain *tc;
  	struct turnstile *ts1;
--- 757,763 ----
   * the turnstile chain locked.
   */
  void
! turnstile_broadcast_queue(struct turnstile *ts, int queue)
  {
  	struct turnstile_chain *tc;
  	struct turnstile *ts1;
***************
*** 720,734 ****
  
  	MPASS(ts != NULL);
  	MPASS(curthread->td_proc->p_magic == P_MAGIC);
! 	MPASS(ts->ts_owner == curthread);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  
  	/*
  	 * Transfer the blocked list to the pending list.
  	 */
  	mtx_lock_spin(&td_contested_lock);
! 	TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked, td_lockq);
  	mtx_unlock_spin(&td_contested_lock);
  
  	/*
--- 765,781 ----
  
  	MPASS(ts != NULL);
  	MPASS(curthread->td_proc->p_magic == P_MAGIC);
! 	MPASS(ts->ts_owner == curthread ||
! 		    (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
+ 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
  
  	/*
  	 * Transfer the blocked list to the pending list.
  	 */
  	mtx_lock_spin(&td_contested_lock);
! 	TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked[queue], td_lockq);
  	mtx_unlock_spin(&td_contested_lock);
  
  	/*
***************
*** 756,770 ****
   * chain locked.
   */
  void
! turnstile_unpend(struct turnstile *ts)
  {
  	TAILQ_HEAD( ,thread) pending_threads;
  	struct turnstile_chain *tc;
  	struct thread *td;
  	u_char cp, pri;
  
  	MPASS(ts != NULL);
! 	MPASS(ts->ts_owner == curthread);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  	MPASS(!TAILQ_EMPTY(&ts->ts_pending));
--- 803,819 ----
   * chain locked.
   */
  void
! turnstile_unpend_queue(struct turnstile *ts, int owner_type)
  {
  	TAILQ_HEAD( ,thread) pending_threads;
  	struct turnstile_chain *tc;
+ 	struct turnstile *nts;
  	struct thread *td;
  	u_char cp, pri;
  
  	MPASS(ts != NULL);
! 	MPASS(ts->ts_owner == curthread ||
! 	    (owner_type == TS_SHARED_LOCK && ts->ts_owner == NULL));
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  	MPASS(!TAILQ_EMPTY(&ts->ts_pending));
***************
*** 776,782 ****
  	TAILQ_INIT(&pending_threads);
  	TAILQ_CONCAT(&pending_threads, &ts->ts_pending, td_lockq);
  #ifdef INVARIANTS
! 	if (TAILQ_EMPTY(&ts->ts_blocked))
  		ts->ts_lockobj = NULL;
  #endif
  
--- 825,832 ----
  	TAILQ_INIT(&pending_threads);
  	TAILQ_CONCAT(&pending_threads, &ts->ts_pending, td_lockq);
  #ifdef INVARIANTS
! 	if (TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
! 	    TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]))
  		ts->ts_lockobj = NULL;
  #endif
  
***************
*** 802,809 ****
  	pri = PRI_MAX;
  	mtx_lock_spin(&sched_lock);
  	mtx_lock_spin(&td_contested_lock);
! 	LIST_FOREACH(ts, &td->td_contested, ts_link) {
! 		cp = TAILQ_FIRST(&ts->ts_blocked)->td_priority;
  		if (cp < pri)
  			pri = cp;
  	}
--- 852,859 ----
  	pri = PRI_MAX;
  	mtx_lock_spin(&sched_lock);
  	mtx_lock_spin(&td_contested_lock);
! 	LIST_FOREACH(nts, &td->td_contested, ts_link) {
! 		cp = turnstile_first_waiter(nts)->td_priority;
  		if (cp < pri)
  			pri = cp;
  	}
***************
*** 837,855 ****
  }
  
  /*
   * Return the first thread in a turnstile.
   */
  struct thread *
! turnstile_head(struct turnstile *ts)
  {
  #ifdef INVARIANTS
  	struct turnstile_chain *tc;
  
  	MPASS(ts != NULL);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  #endif
! 	return (TAILQ_FIRST(&ts->ts_blocked));
  }
  
  #ifdef DDB
--- 887,955 ----
  }
  
  /*
+  * Give up ownership of a turnstile.  This must be called with the
+  * turnstile chain locked.
+  */
+ void
+ turnstile_disown(struct turnstile *ts)
+ {
+ 	struct turnstile_chain *tc;
+ 	struct thread *td;
+ 	u_char cp, pri;
+ 
+ 	MPASS(ts != NULL);
+ 	MPASS(ts->ts_owner == curthread);
+ 	tc = TC_LOOKUP(ts->ts_lockobj);
+ 	mtx_assert(&tc->tc_lock, MA_OWNED);
+ 	MPASS(TAILQ_EMPTY(&ts->ts_pending));
+ 	MPASS(!TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) ||
+ 	    !TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]));
+ 
+ 	/*
+ 	 * Remove the turnstile from this thread's list of contested locks
+ 	 * since this thread doesn't own it anymore.  New threads will
+ 	 * not be blocking on the turnstile until it is claimed by a new
+ 	 * owner.
+ 	 */
+ 	mtx_lock_spin(&td_contested_lock);
+ 	ts->ts_owner = NULL;
+ 	LIST_REMOVE(ts, ts_link);
+ 	mtx_unlock_spin(&td_contested_lock);
+ 
+ 	/*
+ 	 * Adjust the priority of curthread based on other contested
+ 	 * locks it owns.  Don't lower the priority below the base
+ 	 * priority however.
+ 	 */
+ 	td = curthread;
+ 	pri = PRI_MAX;
+ 	mtx_lock_spin(&sched_lock);
+ 	mtx_lock_spin(&td_contested_lock);
+ 	LIST_FOREACH(ts, &td->td_contested, ts_link) {
+ 		cp = turnstile_first_waiter(ts)->td_priority;
+ 		if (cp < pri)
+ 			pri = cp;
+ 	}
+ 	mtx_unlock_spin(&td_contested_lock);
+ 	sched_unlend_prio(td, pri);
+ 	mtx_unlock_spin(&sched_lock);
+ }
+ 
+ /*
   * Return the first thread in a turnstile.
   */
  struct thread *
! turnstile_head_queue(struct turnstile *ts, int queue)
  {
  #ifdef INVARIANTS
  	struct turnstile_chain *tc;
  
  	MPASS(ts != NULL);
  	tc = TC_LOOKUP(ts->ts_lockobj);
+ 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  #endif
! 	return (TAILQ_FIRST(&ts->ts_blocked[queue]));
  }
  
  #ifdef DDB
***************
*** 1146,1152 ****
   * Returns true if a turnstile is empty.
   */
  int
! turnstile_empty(struct turnstile *ts)
  {
  #ifdef INVARIANTS
  	struct turnstile_chain *tc;
--- 1246,1252 ----
   * Returns true if a turnstile is empty.
   */
  int
! turnstile_empty_queue(struct turnstile *ts, int queue)
  {
  #ifdef INVARIANTS
  	struct turnstile_chain *tc;
***************
*** 1154,1159 ****
  	MPASS(ts != NULL);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
  #endif
! 	return (TAILQ_EMPTY(&ts->ts_blocked));
  }
--- 1254,1260 ----
  	MPASS(ts != NULL);
  	tc = TC_LOOKUP(ts->ts_lockobj);
  	mtx_assert(&tc->tc_lock, MA_OWNED);
+ 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
  #endif
! 	return (TAILQ_EMPTY(&ts->ts_blocked[queue]));
  }
Index: netinet6/in6_src.c
===================================================================
RCS file: /cvs/ncvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.30.2.4
diff -c -r1.30.2.4 in6_src.c
*** netinet6/in6_src.c	25 Dec 2005 14:03:37 -0000	1.30.2.4
--- netinet6/in6_src.c	31 Aug 2007 01:23:38 -0000
***************
*** 76,81 ****
--- 76,82 ----
  #include <sys/errno.h>
  #include <sys/time.h>
  #include <sys/kernel.h>
+ #include <sys/lock.h>
  #include <sys/sx.h>
  
  #include <net/if.h>
Index: sys/_rwlock.h
===================================================================
RCS file: sys/_rwlock.h
diff -N sys/_rwlock.h
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- sys/_rwlock.h	31 Aug 2007 00:39:59 -0000
***************
*** 0 ****
--- 1,44 ----
+ /*-
+  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  * 3. Neither the name of the author nor the names of any co-contributors
+  *    may be used to endorse or promote products derived from this software
+  *    without specific prior written permission.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * $FreeBSD: src/sys/sys/_rwlock.h,v 1.4 2007/06/26 21:31:56 attilio Exp $
+  */
+ 
+ #ifndef _SYS__RWLOCK_H_
+ #define	_SYS__RWLOCK_H_
+ 
+ /*
+  * Reader/writer lock.
+  */
+ struct rwlock {
+ 	struct lock_object	lock_object;
+ 	volatile uintptr_t	rw_lock;
+ 	volatile unsigned	rw_recurse;
+ };
+ 
+ #endif /* !_SYS__RWLOCK_H_ */
Index: sys/_sx.h
===================================================================
RCS file: sys/_sx.h
diff -N sys/_sx.h
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- sys/_sx.h	4 Sep 2007 01:20:03 -0000
***************
*** 0 ****
--- 1,59 ----
+ /*-
+  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice(s), this list of conditions and the following disclaimer as
+  *    the first lines of this file unmodified other than the possible 
+  *    addition of one or more copyright notices.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice(s), this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
+  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+  * DAMAGE.
+  *
+  * $FreeBSD: src/sys/sys/_sx.h,v 1.1 2007/03/31 23:23:42 jhb Exp $
+  */
+ 
+ #ifndef	_SYS__SX_H_
+ #define	_SYS__SX_H_
+ 
+ #include <sys/condvar.h>
+ 
+ /*
+  * Shared/exclusive lock main structure definition.
+  *
+  * Note, to preserve compatibility we have extra fields from
+  * the previous implementation left over.
+  */
+ struct sx {
+ 	struct lock_object	lock_object;
+ 	/* was: struct mtx *sx_lock; */
+ 	volatile uintptr_t	sx_lock;
+ 	/* was: int sx_cnt; */
+ 	volatile unsigned	sx_recurse;
+ 	/*
+ 	 * The following fields are unused but kept to preserve
+ 	 * sizeof(struct sx) for 6.x compat.
+ 	 */
+ 	struct cv       sx_shrd_cv;	/* unused */
+ 	int             sx_shrd_wcnt;	/* unused */
+ 	struct cv       sx_excl_cv;	/* unused */
+ 	int             sx_excl_wcnt;	/* unused */
+ 	struct thread   *sx_xholder;	/* unused */
+ };
+ 
+ #endif	/* !_SYS__SX_H_ */
Index: sys/lock_profile.h
===================================================================
RCS file: sys/lock_profile.h
diff -N sys/lock_profile.h
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- sys/lock_profile.h	31 Aug 2007 03:21:42 -0000
***************
*** 0 ****
--- 1,45 ----
+ /*-
+  * Copyright (c) 2007 Juniper Networks
+  * Author: Alfred Perlstein
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice(s), this list of conditions and the following disclaimer as
+  *    the first lines of this file unmodified other than the possible 
+  *    addition of one or more copyright notices.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice(s), this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
+  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+  * DAMAGE.
+  *
+  * $FreeBSD$
+  */
+ 
+ #ifndef	_LOCK_PROFILE_H_
+ #define	_LOCK_PROFILE_H_
+ 
+ /* temp stubs for lock profiling, not supported in 6.x (yet) */
+ 
+ #define lock_profile_obtain_lock_success(lock_object, a0, a1, file, line) \
+     do { ; } while (0)
+ #define lock_profile_release_lock(lock_object) \
+     do { ; } while (0)
+ #define lock_profile_obtain_lock_failed(lock_object, contested, waittime) \
+     do { ; } while (0)
+ 
+ #endif
+ 
Index: sys/proc.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.432.2.10
diff -c -r1.432.2.10 proc.h
*** sys/proc.h	11 Jun 2007 11:27:04 -0000	1.432.2.10
--- sys/proc.h	2 Sep 2007 21:56:16 -0000
***************
*** 237,243 ****
--- 237,252 ----
   * When waiting to be run, threads are hung off the KSEGRP in priority order.
   * With N runnable and queued KSEs in the KSEGRP, the first N threads
   * are linked to them. Other threads are not yet assigned.
+  *
+  * We must force at least 16 byte alignment for "struct thread"
+  * because the rwlocks and sxlocks expect to use the bottom bits
+  * of the pointer for bookkeeping information.
+  *
+  * This causes problems for the thread0 data structure because it
+  * may not be properly aligned otherwise.
   */
+ #define THREAD_ALIGN	16
+ 
  struct thread {
  	struct proc	*td_proc;	/* (*) Associated process. */
  	struct ksegrp	*td_ksegrp;	/* (*) Associated KSEG. */
***************
*** 261,272 ****
--- 270,283 ----
  	int		td_inhibitors;	/* (j) Why can not run. */
  	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
  	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
+ 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
  	void		*td_wchan;	/* (j) Sleep address. */
  	const char	*td_wmesg;	/* (j) Reason for sleep. */
  	u_char		td_lastcpu;	/* (j) Last cpu we were on. */
  	u_char		td_oncpu;	/* (j) Which cpu we are on. */
  	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
  	short		td_locks;	/* (k) Count of non-spin locks. */
+ 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
  	struct turnstile *td_blocked;	/* (j) Lock process is blocked on. */
  	void		*td_ithd;	/* (n) Unused, kept to preserve ABI. */
  	const char	*td_lockname;	/* (j) Name of lock blocked on. */
***************
*** 324,330 ****
  	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
  	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
  	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
! };
  
  /*
   * Flags kept in td_flags:
--- 335,342 ----
  	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
  	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
  	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
! } __attribute__ ((aligned (THREAD_ALIGN)));
! 
  
  /*
   * Flags kept in td_flags:
Index: sys/rwlock.h
===================================================================
RCS file: sys/rwlock.h
diff -N sys/rwlock.h
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- sys/rwlock.h	31 Aug 2007 03:26:18 -0000
***************
*** 0 ****
--- 1,223 ----
+ /*-
+  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in the
+  *    documentation and/or other materials provided with the distribution.
+  * 3. Neither the name of the author nor the names of any co-contributors
+  *    may be used to endorse or promote products derived from this software
+  *    without specific prior written permission.
+  *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  *
+  * $FreeBSD: src/sys/sys/rwlock.h,v 1.13 2007/06/26 21:31:56 attilio Exp $
+  */
+ 
+ #ifndef _SYS_RWLOCK_H_
+ #define _SYS_RWLOCK_H_
+ 
+ #include <sys/_lock.h>
+ #include <sys/_rwlock.h>
+ #include <sys/lock_profile.h>
+ 
+ #ifdef _KERNEL
+ #include <sys/pcpu.h>
+ #include <machine/atomic.h>
+ #endif
+ 
+ /*
+  * The rw_lock field consists of several fields.  The low bit indicates
+  * if the lock is locked with a read (shared) or write (exclusive) lock.
+  * A value of 0 indicates a write lock, and a value of 1 indicates a read
+  * lock.  Bit 1 is a boolean indicating if there are any threads waiting
+  * for a read lock.  Bit 2 is a boolean indicating if there are any threads
+  * waiting for a write lock.  The rest of the variable's definition is
+  * dependent on the value of the first bit.  For a write lock, it is a
+  * pointer to the thread holding the lock, similar to the mtx_lock field of
+  * mutexes.  For read locks, it is a count of read locks that are held.
+  *
+  * When the lock is not locked by any thread, it is encoded as a read lock
+  * with zero waiters.
+  *
+  * A note about memory barriers.  Write locks need to use the same memory
+  * barriers as mutexes: _acq when acquiring a write lock and _rel when
+  * releasing a write lock.  Read locks also need to use an _acq barrier when
+  * acquiring a read lock.  However, since read locks do not update any
+  * locked data (modulo bugs of course), no memory barrier is needed when
+  * releasing a read lock.
+  */
+ 
+ #define	RW_LOCK_READ		0x01
+ #define	RW_LOCK_READ_WAITERS	0x02
+ #define	RW_LOCK_WRITE_WAITERS	0x04
+ #define	RW_LOCK_RECURSED	0x08
+ #define	RW_LOCK_FLAGMASK						\
+ 	(RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS |	\
+ 	RW_LOCK_RECURSED)
+ 
+ #define	RW_OWNER(x)		((x) & ~RW_LOCK_FLAGMASK)
+ #define	RW_READERS_SHIFT	4
+ #define	RW_READERS(x)		(RW_OWNER((x)) >> RW_READERS_SHIFT)
+ #define	RW_READERS_LOCK(x)	((x) << RW_READERS_SHIFT | RW_LOCK_READ)
+ #define	RW_ONE_READER		(1 << RW_READERS_SHIFT)
+ 
+ #define	RW_UNLOCKED		RW_READERS_LOCK(0)
+ #define	RW_DESTROYED		(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)
+ 
+ #ifdef _KERNEL
+ 
+ /* Very simple operations on rw_lock. */
+ 
+ /* Try to obtain a write lock once. */
+ #define	_rw_write_lock(rw, tid)						\
+ 	atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
+ 
+ /* Release a write lock quickly if there are no waiters. */
+ #define	_rw_write_unlock(rw, tid)					\
+ 	atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
+ 
+ /*
+  * Full lock operations that are suitable to be inlined in non-debug
+  * kernels.  If the lock cannot be acquired or released trivially then
+  * the work is deferred to another function.
+  */
+ 
+ /* Acquire a write lock. */
+ #define	__rw_wlock(rw, tid, file, line) do {	\
+ 	uintptr_t _tid = (uintptr_t)(tid);				\
+ 	/* int contested = 0; XXX: notsup */                                             \
+         /*uint64_t waitstart = 0; XXX: notsup */                                         \
+ 						                        \
+ 	if (!_rw_write_lock((rw), _tid)) {				\
+ 		lock_profile_obtain_lock_failed(&(rw)->lock_object,	\
+ 		    &contested, &waitstart);				\
+ 		_rw_wlock_hard((rw), _tid, (file), (line));		\
+ 	}                                                               \
+ 	lock_profile_obtain_lock_success(&(rw)->lock_object, contested,	\
+ 	    waitstart, (file), (line));					\
+ } while (0)
+ 
+ /* Release a write lock. */
+ #define	__rw_wunlock(rw, tid, file, line) do {				\
+ 	uintptr_t _tid = (uintptr_t)(tid);				\
+ 									\
+ 	if (!_rw_write_unlock((rw), _tid))				\
+ 		_rw_wunlock_hard((rw), _tid, (file), (line));		\
+ } while (0)
+ 
+ /*
+  * Function prototypes.  Routines that start with _ are not part of the
+  * external API and should not be called directly.  Wrapper macros should
+  * be used instead.
+  */
+ 
+ #define	rw_init(rw, name)	rw_init_flags((rw), (name), 0)
+ void	rw_init_flags(struct rwlock *rw, const char *name, int opts);
+ void	rw_destroy(struct rwlock *rw);
+ void	rw_sysinit(void *arg);
+ int	rw_wowned(struct rwlock *rw);
+ void	_rw_wlock(struct rwlock *rw, const char *file, int line);
+ void	_rw_wunlock(struct rwlock *rw, const char *file, int line);
+ void	_rw_rlock(struct rwlock *rw, const char *file, int line);
+ void	_rw_runlock(struct rwlock *rw, const char *file, int line);
+ void	_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file,
+ 	    int line);
+ void	_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file,
+ 	    int line);
+ int	_rw_try_upgrade(struct rwlock *rw, const char *file, int line);
+ void	_rw_downgrade(struct rwlock *rw, const char *file, int line);
+ #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
+ void	_rw_assert(struct rwlock *rw, int what, const char *file, int line);
+ #endif
+ 
+ /*
+  * Public interface for lock operations.
+  *
+  * XXX: Missing try locks.
+  */
+ 
+ #ifndef LOCK_DEBUG
+ #error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/rwlock.h>
+ #endif
+ #if LOCK_DEBUG > 0 || defined(RWLOCK_NOINLINE)
+ #define	rw_wlock(rw)		_rw_wlock((rw), LOCK_FILE, LOCK_LINE)
+ #define	rw_wunlock(rw)		_rw_wunlock((rw), LOCK_FILE, LOCK_LINE)
+ #else
+ #define	rw_wlock(rw)							\
+ 	__rw_wlock((rw), curthread, LOCK_FILE, LOCK_LINE)
+ #define	rw_wunlock(rw)							\
+ 	__rw_wunlock((rw), curthread, LOCK_FILE, LOCK_LINE)
+ #endif
+ #define	rw_rlock(rw)		_rw_rlock((rw), LOCK_FILE, LOCK_LINE)
+ #define	rw_runlock(rw)		_rw_runlock((rw), LOCK_FILE, LOCK_LINE)
+ #define	rw_try_upgrade(rw)	_rw_try_upgrade((rw), LOCK_FILE, LOCK_LINE)
+ #define	rw_downgrade(rw)	_rw_downgrade((rw), LOCK_FILE, LOCK_LINE)
+ #define	rw_sleep(chan, rw, pri, wmesg, timo)				\
+ 	_sleep((chan), &(rw)->lock_object, (pri), (wmesg), (timo))
+ 
+ #define	rw_initialized(rw)	lock_initalized(&(rw)->lock_object)
+ 
+ struct rw_args {
+ 	struct rwlock	*ra_rw;
+ 	const char 	*ra_desc;
+ };
+ 
+ #define	RW_SYSINIT(name, rw, desc)					\
+ 	static struct rw_args name##_args = {				\
+ 		(rw),							\
+ 		(desc),							\
+ 	};								\
+ 	SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
+ 	    rw_sysinit, &name##_args);					\
+ 	SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
+ 	    rw_destroy, (rw))
+ 
+ /*
+  * Options passed to rw_init_flags().
+  */
+ #define	RW_DUPOK	0x01
+ #define	RW_NOPROFILE	0x02
+ #define	RW_NOWITNESS	0x04
+ #define	RW_QUIET	0x08
+ #define	RW_RECURSE	0x10
+ 
+ /*
+  * The INVARIANTS-enabled rw_assert() functionality.
+  *
+  * The constants need to be defined for INVARIANT_SUPPORT infrastructure
+  * support as _rw_assert() itself uses them and the latter implies that
+  * _rw_assert() must build.
+  */
+ #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
+ #define	RA_LOCKED		LA_LOCKED
+ #define	RA_RLOCKED		LA_SLOCKED
+ #define	RA_WLOCKED		LA_XLOCKED
+ #define	RA_UNLOCKED		LA_UNLOCKED
+ #define	RA_RECURSED		LA_RECURSED
+ #define	RA_NOTRECURSED		LA_NOTRECURSED
+ #endif
+ 
+ #ifdef INVARIANTS
+ #define	rw_assert(rw, what)	_rw_assert((rw), (what), LOCK_FILE, LOCK_LINE)
+ #else
+ #define	rw_assert(rw, what)
+ #endif
+ 
+ #endif /* _KERNEL */
+ #endif /* !_SYS_RWLOCK_H_ */
Index: sys/sleepqueue.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/sleepqueue.h,v
retrieving revision 1.6.2.1
diff -c -r1.6.2.1 sleepqueue.h
*** sys/sleepqueue.h	27 Feb 2006 00:19:39 -0000	1.6.2.1
--- sys/sleepqueue.h	31 Aug 2007 01:47:52 -0000
***************
*** 26,40 ****
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   *
!  * $FreeBSD: src/sys/sys/sleepqueue.h,v 1.6.2.1 2006/02/27 00:19:39 davidxu Exp $
   */
  
  #ifndef _SYS_SLEEPQUEUE_H_
  #define _SYS_SLEEPQUEUE_H_
  
  /*
!  * Sleep queue interface.  Sleep/wakeup and condition variables use a sleep
!  * queue for the queue of threads blocked on a sleep channel.
   *
   * A thread calls sleepq_lock() to lock the sleep queue chain associated
   * with a given wait channel.  A thread can then call call sleepq_add() to
--- 26,41 ----
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   *
!  * $FreeBSD: src/sys/sys/sleepqueue.h,v 1.12 2007/03/31 23:23:42 jhb Exp $
   */
  
  #ifndef _SYS_SLEEPQUEUE_H_
  #define _SYS_SLEEPQUEUE_H_
  
  /*
!  * Sleep queue interface.  Sleep/wakeup, condition variables, and sx
!  * locks use a sleep queue for the queue of threads blocked on a sleep
!  * channel.
   *
   * A thread calls sleepq_lock() to lock the sleep queue chain associated
   * with a given wait channel.  A thread can then call call sleepq_add() to
***************
*** 84,107 ****
  #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
  #define	SLEEPQ_MSLEEP		0x00		/* Used by msleep/wakeup. */
  #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
  #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
  
  void	init_sleepqueues(void);
  void	sleepq_abort(struct thread *td, int intrval);
! void	sleepq_add(void *, struct mtx *, const char *, int);
  struct sleepqueue *sleepq_alloc(void);
! void	sleepq_broadcast(void *, int, int);
  void	sleepq_free(struct sleepqueue *);
  void	sleepq_lock(void *);
  struct sleepqueue *sleepq_lookup(void *);
  void	sleepq_release(void *);
  void	sleepq_remove(struct thread *, void *);
! void	sleepq_signal(void *, int, int);
  void	sleepq_set_timeout(void *wchan, int timo);
  int	sleepq_timedwait(void *wchan);
  int	sleepq_timedwait_sig(void *wchan);
  void	sleepq_wait(void *);
  int	sleepq_wait_sig(void *wchan);
  
  #endif	/* _KERNEL */
  #endif	/* !_SYS_SLEEPQUEUE_H_ */
--- 85,117 ----
  #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
  #define	SLEEPQ_MSLEEP		0x00		/* Used by msleep/wakeup. */
  #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
+ #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
  #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
  
  void	init_sleepqueues(void);
  void	sleepq_abort(struct thread *td, int intrval);
! void	sleepq_add_queue(void *, struct mtx *, const char *, int, int);
  struct sleepqueue *sleepq_alloc(void);
! void	sleepq_broadcast_queue(void *, int, int, int);
  void	sleepq_free(struct sleepqueue *);
  void	sleepq_lock(void *);
  struct sleepqueue *sleepq_lookup(void *);
  void	sleepq_release(void *);
  void	sleepq_remove(struct thread *, void *);
! void	sleepq_signal_queue(void *, int, int, int);
  void	sleepq_set_timeout(void *wchan, int timo);
  int	sleepq_timedwait(void *wchan);
  int	sleepq_timedwait_sig(void *wchan);
  void	sleepq_wait(void *);
  int	sleepq_wait_sig(void *wchan);
  
+ /* Preserve source compat with 6.x */
+ #define sleepq_add(wchan, lock, wmesg, flags)		    \
+     sleepq_add_queue(wchan, lock, wmesg, flags, 0)
+ #define sleepq_broadcast(wchan, flags, pri)		     \
+     sleepq_broadcast_queue(wchan, flags, pri, 0)
+ #define sleepq_signal(wchan, flags, pri)		    \
+     sleepq_signal_queue(wchan, flags, pri, 0)
+ 
  #endif	/* _KERNEL */
  #endif	/* !_SYS_SLEEPQUEUE_H_ */
Index: sys/sx.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/sx.h,v
retrieving revision 1.21.2.5
diff -c -r1.21.2.5 sx.h
*** sys/sx.h	27 Aug 2007 13:45:35 -0000	1.21.2.5
--- sys/sx.h	31 Aug 2007 01:02:47 -0000
***************
*** 1,5 ****
  /*-
!  * Copyright (C) 2001 Jason Evans <jasone@freebsd.org>.  All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
--- 1,7 ----
  /*-
!  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
!  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
!  * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
***************
*** 24,62 ****
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   * DAMAGE.
   *
!  * $FreeBSD: src/sys/sys/sx.h,v 1.21.2.5 2007/08/27 13:45:35 jhb Exp $
   */
  
  #ifndef	_SYS_SX_H_
  #define	_SYS_SX_H_
  
- #include <sys/queue.h>
  #include <sys/_lock.h>
! #include <sys/condvar.h>	/* XXX */
  
! struct sx {
! 	struct lock_object sx_object;	/* Common lock properties. */
! 	struct mtx	*sx_lock;	/* General protection lock. */
! 	int		sx_cnt;		/* -1: xlock, > 0: slock count. */
! 	struct cv	sx_shrd_cv;	/* slock waiters. */
! 	int		sx_shrd_wcnt;	/* Number of slock waiters. */
! 	struct cv	sx_excl_cv;	/* xlock waiters. */
! 	int		sx_excl_wcnt;	/* Number of xlock waiters. */
! 	struct thread	*sx_xholder;	/* Thread presently holding xlock. */
! };
  
  #ifdef _KERNEL
  void	sx_sysinit(void *arg);
! void	sx_init(struct sx *sx, const char *description);
  void	sx_destroy(struct sx *sx);
! void	_sx_slock(struct sx *sx, const char *file, int line);
! void	_sx_xlock(struct sx *sx, const char *file, int line);
  int	_sx_try_slock(struct sx *sx, const char *file, int line);
  int	_sx_try_xlock(struct sx *sx, const char *file, int line);
  void	_sx_sunlock(struct sx *sx, const char *file, int line);
  void	_sx_xunlock(struct sx *sx, const char *file, int line);
  int	_sx_try_upgrade(struct sx *sx, const char *file, int line);
  void	_sx_downgrade(struct sx *sx, const char *file, int line);
  #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
  void	_sx_assert(struct sx *sx, int what, const char *file, int line);
  #endif
--- 26,118 ----
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   * DAMAGE.
   *
!  * $FreeBSD: src/sys/sys/sx.h,v 1.37 2007/07/06 13:20:44 attilio Exp $
   */
  
  #ifndef	_SYS_SX_H_
  #define	_SYS_SX_H_
  
  #include <sys/_lock.h>
! #include <sys/_sx.h>
! #include <sys/lock_profile.h>
  
! #ifdef	_KERNEL
! #include <machine/atomic.h>
! #endif
! 
! /*
!  * In general, the sx locks and rwlocks use very similar algorithms.
!  * The main difference in the implementations is how threads are
!  * blocked when a lock is unavailable.  For this, sx locks use sleep
!  * queues which do not support priority propagation, and rwlocks use
!  * turnstiles which do.
!  *
!  * The sx_lock field consists of several fields.  The low bit
!  * indicates if the lock is locked with a shared or exclusive lock.  A
!  * value of 0 indicates an exclusive lock, and a value of 1 indicates
!  * a shared lock.  Bit 1 is a boolean indicating if there are any
!  * threads waiting for a shared lock.  Bit 2 is a boolean indicating
!  * if there are any threads waiting for an exclusive lock.  Bit 3 is a
!  * boolean indicating if an exclusive lock is recursively held.  The
!  * rest of the variable's definition is dependent on the value of the
!  * first bit.  For an exclusive lock, it is a pointer to the thread
!  * holding the lock, similar to the mtx_lock field of mutexes.  For
!  * shared locks, it is a count of read locks that are held.
!  *
!  * When the lock is not locked by any thread, it is encoded as a
!  * shared lock with zero waiters.
!  *
!  * A note about memory barriers.  Exclusive locks need to use the same
!  * memory barriers as mutexes: _acq when acquiring an exclusive lock
!  * and _rel when releasing an exclusive lock.  On the other side,
!  * shared lock needs to use an _acq barrier when acquiring the lock
!  * but, since they don't update any locked data, no memory barrier is
!  * needed when releasing a shared lock.
!  */
! 
! #define	SX_LOCK_SHARED			0x01
! #define	SX_LOCK_SHARED_WAITERS		0x02
! #define	SX_LOCK_EXCLUSIVE_WAITERS	0x04
! #define	SX_LOCK_RECURSED		0x08
! #define	SX_LOCK_FLAGMASK						\
! 	(SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS |			\
! 	SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED)
! 
! #define	SX_OWNER(x)			((x) & ~SX_LOCK_FLAGMASK)
! #define	SX_SHARERS_SHIFT		4
! #define	SX_SHARERS(x)			(SX_OWNER(x) >> SX_SHARERS_SHIFT)
! #define	SX_SHARERS_LOCK(x)						\
! 	((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED)
! #define	SX_ONE_SHARER			(1 << SX_SHARERS_SHIFT)
! 
! #define	SX_LOCK_UNLOCKED		SX_SHARERS_LOCK(0)
! #define	SX_LOCK_DESTROYED						\
! 	(SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)
  
  #ifdef _KERNEL
+ 
+ /*
+  * Function prototipes.  Routines that start with an underscore are not part
+  * of the public interface and are wrappered with a macro.
+  */
  void	sx_sysinit(void *arg);
! #define	sx_init(sx, desc)	sx_init_flags((sx), (desc), 0)
! void	sx_init_flags(struct sx *sx, const char *description, int opts);
  void	sx_destroy(struct sx *sx);
! int	_sx_slock(struct sx *sx, int opts, const char *file, int line);
! int	_sx_xlock(struct sx *sx, int opts, const char *file, int line);
  int	_sx_try_slock(struct sx *sx, const char *file, int line);
  int	_sx_try_xlock(struct sx *sx, const char *file, int line);
  void	_sx_sunlock(struct sx *sx, const char *file, int line);
  void	_sx_xunlock(struct sx *sx, const char *file, int line);
  int	_sx_try_upgrade(struct sx *sx, const char *file, int line);
  void	_sx_downgrade(struct sx *sx, const char *file, int line);
+ int	_sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
+ 	    const char *file, int line);
+ int	_sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
+ void	_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
+ 	    line);
+ void	_sx_sunlock_hard(struct sx *sx, const char *file, int line);
  #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
  void	_sx_assert(struct sx *sx, int what, const char *file, int line);
  #endif
***************
*** 79,93 ****
  	SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
  	    sx_destroy, (sxa))
  
! #define	sx_xlocked(sx)		((sx)->sx_cnt < 0 && (sx)->sx_xholder == curthread)
! #define	sx_slock(sx)		_sx_slock((sx), LOCK_FILE, LOCK_LINE)
! #define	sx_xlock(sx)		_sx_xlock((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_try_slock(sx)	_sx_try_slock((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_try_xlock(sx)	_sx_try_xlock((sx), LOCK_FILE, LOCK_LINE)
- #define	sx_sunlock(sx)		_sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
- #define	sx_xunlock(sx)		_sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_try_upgrade(sx)	_sx_try_upgrade((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_downgrade(sx)	_sx_downgrade((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_unlock(sx) do {						\
  	if (sx_xlocked(sx))						\
  		sx_xunlock(sx);						\
--- 135,253 ----
  	SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
  	    sx_destroy, (sxa))
  
! /*
!  * Full lock operations that are suitable to be inlined in non-debug kernels.
!  * If the lock can't be acquired or released trivially then the work is
!  * deferred to 'tougher' functions.
!  */
! 
! /* Acquire an exclusive lock. */
! static __inline int
! __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
!     int line)
! {
! 	uintptr_t tid = (uintptr_t)td;
! 	int error = 0;
! 
! 	if (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
! 		error = _sx_xlock_hard(sx, tid, opts, file, line);
! 	else
! 		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
! 		    line);
! 
! 	return (error);
! }
! 
! /* Release an exclusive lock. */
! static __inline void
! __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
! {
! 	uintptr_t tid = (uintptr_t)td;
! 
! 	if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
! 		_sx_xunlock_hard(sx, tid, file, line);
! }
! 
! /* Acquire a shared lock. */
! static __inline int
! __sx_slock(struct sx *sx, int opts, const char *file, int line)
! {
! 	uintptr_t x = sx->sx_lock;
! 	int error = 0;
! 
! 	if (!(x & SX_LOCK_SHARED) ||
! 	    !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
! 		error = _sx_slock_hard(sx, opts, file, line);
! #ifdef LOCK_PROFILING_SHARED
! 	else if (SX_SHARERS(x) == 0)
! 		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
! 		    line);
! #endif
! 
! 	return (error);
! }
! 
! /*
!  * Release a shared lock.  We can just drop a single shared lock so
!  * long as we aren't trying to drop the last shared lock when other
!  * threads are waiting for an exclusive lock.  This takes advantage of
!  * the fact that an unlocked lock is encoded as a shared lock with a
!  * count of 0.
!  */
! static __inline void
! __sx_sunlock(struct sx *sx, const char *file, int line)
! {
! 	uintptr_t x = sx->sx_lock;
! 
! 	if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
! 	    !atomic_cmpset_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
! 		_sx_sunlock_hard(sx, file, line);
! }
! 
! /*
!  * Public interface for lock operations.
!  */
! #ifndef LOCK_DEBUG
! #error	"LOCK_DEBUG not defined, include <sys/lock.h> before <sys/sx.h>"
! #endif
! #if	(LOCK_DEBUG > 0) || defined(SX_NOINLINE)
! #define	sx_xlock(sx)		(void)_sx_xlock((sx), 0, LOCK_FILE, LOCK_LINE)
! #define	sx_xlock_sig(sx)						\
! 	_sx_xlock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
! #define	sx_xunlock(sx)		_sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
! #define	sx_slock(sx)		(void)_sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
! #define	sx_slock_sig(sx)						\
! 	_sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
! #define	sx_sunlock(sx)		_sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
! #else
! #define	sx_xlock(sx)							\
! 	(void)__sx_xlock((sx), curthread, 0, LOCK_FILE, LOCK_LINE)
! #define	sx_xlock_sig(sx)						\
! 	__sx_xlock((sx), curthread, SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
! #define	sx_xunlock(sx)							\
! 	__sx_xunlock((sx), curthread, LOCK_FILE, LOCK_LINE)
! #define	sx_slock(sx)		(void)__sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
! #define	sx_slock_sig(sx)						\
! 	__sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
! #define	sx_sunlock(sx)		__sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
! #endif	/* LOCK_DEBUG > 0 || SX_NOINLINE */
  #define	sx_try_slock(sx)	_sx_try_slock((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_try_xlock(sx)	_sx_try_xlock((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_try_upgrade(sx)	_sx_try_upgrade((sx), LOCK_FILE, LOCK_LINE)
  #define	sx_downgrade(sx)	_sx_downgrade((sx), LOCK_FILE, LOCK_LINE)
+ 
+ /*
+  * Return a pointer to the owning thread if the lock is exclusively
+  * locked.
+  */
+ #define	sx_xholder(sx)							\
+ 	((sx)->sx_lock & SX_LOCK_SHARED ? NULL :			\
+ 	(struct thread *)SX_OWNER((sx)->sx_lock))
+ 
+ #define	sx_xlocked(sx)							\
+ 	(((sx)->sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)) ==	\
+ 	    (uintptr_t)curthread)
+ 
  #define	sx_unlock(sx) do {						\
  	if (sx_xlocked(sx))						\
  		sx_xunlock(sx);						\
***************
*** 95,111 ****
--- 255,293 ----
  		sx_sunlock(sx);						\
  } while (0)
  
+ #define	sx_sleep(chan, sx, pri, wmesg, timo)				\
+ 	_sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
+ 
+ /*
+  * Options passed to sx_init_flags().
+  */
+ #define	SX_DUPOK		0x01
+ #define	SX_NOPROFILE		0x02
+ #define	SX_NOWITNESS		0x04
+ #define	SX_QUIET		0x08
+ #define	SX_ADAPTIVESPIN		0x10
+ #define	SX_RECURSE		0x20
+ 
+ /*
+  * Options passed to sx_*lock_hard().
+  */
+ #define	SX_INTERRUPTIBLE	0x40
+ 
  #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
  #define	SA_LOCKED		LA_LOCKED
  #define	SA_SLOCKED		LA_SLOCKED
  #define	SA_XLOCKED		LA_XLOCKED
  #define	SA_UNLOCKED		LA_UNLOCKED
+ #define	SA_RECURSED		LA_RECURSED
+ #define	SA_NOTRECURSED		LA_NOTRECURSED
  
  /* Backwards compatability. */
  #define	SX_LOCKED		LA_LOCKED
  #define	SX_SLOCKED		LA_SLOCKED
  #define	SX_XLOCKED		LA_XLOCKED
  #define	SX_UNLOCKED		LA_UNLOCKED
+ #define	SX_RECURSED		LA_RECURSED
+ #define	SX_NOTRECURSED		LA_NOTRECURSED
  #endif
  
  #ifdef INVARIANTS
Index: sys/turnstile.h
===================================================================
RCS file: /cvs/ncvs/src/sys/sys/turnstile.h,v
retrieving revision 1.7
diff -c -r1.7 turnstile.h
*** sys/turnstile.h	7 Jan 2005 02:29:24 -0000	1.7
--- sys/turnstile.h	31 Aug 2007 00:39:59 -0000
***************
*** 73,92 ****
  
  #ifdef _KERNEL
  
  void	init_turnstiles(void);
  void	turnstile_adjust(struct thread *, u_char);
  struct turnstile *turnstile_alloc(void);
! void	turnstile_broadcast(struct turnstile *);
  void	turnstile_claim(struct lock_object *);
! int	turnstile_empty(struct turnstile *);
  void	turnstile_free(struct turnstile *);
! struct thread *turnstile_head(struct turnstile *);
  void	turnstile_lock(struct lock_object *);
  struct turnstile *turnstile_lookup(struct lock_object *);
  void	turnstile_release(struct lock_object *);
! int	turnstile_signal(struct turnstile *);
! void	turnstile_unpend(struct turnstile *);
! void	turnstile_wait(struct lock_object *, struct thread *);
  
  #endif	/* _KERNEL */
  #endif	/* _SYS_TURNSTILE_H_ */
--- 73,115 ----
  
  #ifdef _KERNEL
  
+ /* Which queue to block on or which queue to wakeup one or more threads from. */
+ #define       TS_EXCLUSIVE_QUEUE      0
+ #define       TS_SHARED_QUEUE         1
+ 
+ /* The type of lock currently held. */
+ #define       TS_EXCLUSIVE_LOCK       TS_EXCLUSIVE_QUEUE
+ #define       TS_SHARED_LOCK          TS_SHARED_QUEUE
+ 
  void	init_turnstiles(void);
  void	turnstile_adjust(struct thread *, u_char);
  struct turnstile *turnstile_alloc(void);
! #define turnstile_wakeup(turnstile) turnstile_broadcast(turnstile)
! #define	turnstile_broadcast(turnstile) \
!     turnstile_broadcast_queue(turnstile, TS_EXCLUSIVE_QUEUE)
! void	turnstile_broadcast_queue(struct turnstile *, int);
  void	turnstile_claim(struct lock_object *);
! void	turnstile_disown(struct turnstile *);
! #define	turnstile_empty(turnstile)  \
!     turnstile_empty_queue(turnstile, TS_EXCLUSIVE_QUEUE);
! int	turnstile_empty_queue(struct turnstile *, int);
  void	turnstile_free(struct turnstile *);
! #define	turnstile_head(turnstile)   \
!     turnstile_head_queue(turnstile, TS_EXCLUSIVE_QUEUE)
! struct thread *turnstile_head_queue(struct turnstile *, int);
  void	turnstile_lock(struct lock_object *);
  struct turnstile *turnstile_lookup(struct lock_object *);
  void	turnstile_release(struct lock_object *);
! #define turnstile_signal(turnstile) \
!     turnstile_signal_queue(turnstile, TS_EXCLUSIVE_QUEUE)
! int	turnstile_signal_queue(struct turnstile *, int);
! struct turnstile *turnstile_trywait(struct lock_object *);
! #define	turnstile_unpend(turnstile) \
!     turnstile_unpend_queue(turnstile, TS_EXCLUSIVE_QUEUE);
! void	turnstile_unpend_queue(struct turnstile *, int);
! #define turnstile_wait(lock_object, thread) \
!     turnstile_wait_queue(lock_object, thread, TS_EXCLUSIVE_QUEUE) 
! void	turnstile_wait_queue(struct lock_object *, struct thread *, int);
  
  #endif	/* _KERNEL */
  #endif	/* _SYS_TURNSTILE_H_ */
Index: vm/vm_map.c
===================================================================
RCS file: /cvs/ncvs/src/sys/vm/vm_map.c,v
retrieving revision 1.366.2.4
diff -c -r1.366.2.4 vm_map.c
*** vm/vm_map.c	30 Aug 2007 02:32:04 -0000	1.366.2.4
--- vm/vm_map.c	31 Aug 2007 03:12:00 -0000
***************
*** 429,435 ****
  	if (map->system_map)
  		_mtx_lock_flags(&map->system_mtx, 0, file, line);
  	else
! 		_sx_xlock(&map->lock, file, line);
  	map->timestamp++;
  }
  
--- 429,435 ----
  	if (map->system_map)
  		_mtx_lock_flags(&map->system_mtx, 0, file, line);
  	else
! 		(void) _sx_xlock(&map->lock, 0, file, line);
  	map->timestamp++;
  }
  
***************
*** 450,456 ****
  	if (map->system_map)
  		_mtx_lock_flags(&map->system_mtx, 0, file, line);
  	else
! 		_sx_xlock(&map->lock, file, line);
  }
  
  void
--- 450,456 ----
  	if (map->system_map)
  		_mtx_lock_flags(&map->system_mtx, 0, file, line);
  	else
! 		(void) _sx_xlock(&map->lock, 0, file, line);
  }
  
  void

--SnV5plBeK2Ge1I9g--

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070904023152.GZ87451>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation