Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 18 May 2009 19:47:55 +0000 (UTC)
From:      Kip Macy <kmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r192332 - in user/kmacy/releng_7_2_fcs/sys: dev/hwpmc kern sys
Message-ID:  <200905181947.n4IJltxt072926@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kmacy
Date: Mon May 18 19:47:54 2009
New Revision: 192332
URL: http://svn.freebsd.org/changeset/base/192332

Log:
  merge hwpmc fix and new witness
  180794
  - Provide kernelname as the name for process with P_KTHREAD set as
      otherwise their textvp is NULL.
  
  181695
   Introduce some WITNESS improvements:
   - Speedup the lock orderings lookup modifying the witness graph from a
     linked tree to a matrix. A table lookup caches the lock orderings in
     order to make a O(1) access for them. Any witness object has an unique
     index withing this lookup cache table.
   - Reduce the lock contention on w_mtx acquiring it only when the LOR
     actually happens and not in a sane case. In order to do this don't totally
     flush lock lists (per-CPU spinlocks list and per-thread sleeplocks list)
     but check for ll_count anytime we need to have to verify allocations sanity.
   - Introduce the function witness_thread_exit() in the witness namespace which
     should verify a thread doesn't hold any witness occurrence why exiting.
   - Rename the sysctl debug.witness.graphs into debug.witness.fullgraph and
     add debug.witness.badstacks which prints out stacks for LOR revealed.
     This is implemented using the stack(9) support, which makes WITNESS to be
     dependent by the STACK option or by the DDB (including STACK) option.
   - Fix style(9) for src/sys/kern/subr_witness.c
  
   The hash table approach has been developed by Ilya Maykov on the behalf of
   Isilon Systems which kindly released the patch.
   Jeff Roberson, ported the patch to -CURRENT and fixed w_mtx contention, on the
   behalf of Nokia.

Modified:
  user/kmacy/releng_7_2_fcs/sys/dev/hwpmc/hwpmc_mod.c
  user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
  user/kmacy/releng_7_2_fcs/sys/kern/subr_witness.c
  user/kmacy/releng_7_2_fcs/sys/sys/lock.h

Modified: user/kmacy/releng_7_2_fcs/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/dev/hwpmc/hwpmc_mod.c	Mon May 18 19:33:59 2009	(r192331)
+++ user/kmacy/releng_7_2_fcs/sys/dev/hwpmc/hwpmc_mod.c	Mon May 18 19:47:54 2009	(r192332)
@@ -952,7 +952,11 @@ pmc_attach_one_process(struct proc *p, s
 	/* issue an attach event to a configured log file */
 	if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
 		pmc_getfilename(p->p_textvp, &fullpath, &freepath);
-		pmclog_process_pmcattach(pm, p->p_pid, fullpath);
+		if (p->p_flag & P_KTHREAD) {
+			fullpath = kernelname;
+			freepath = NULL;
+		} else
+			pmclog_process_pmcattach(pm, p->p_pid, fullpath);
 		if (freepath)
 			FREE(freepath, M_TEMP);
 	}

Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c	Mon May 18 19:33:59 2009	(r192331)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c	Mon May 18 19:47:54 2009	(r192332)
@@ -26,6 +26,8 @@
  * DAMAGE.
  */
 
+#include "opt_witness.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -502,6 +504,9 @@ thread_exit(void)
 	ruxagg(&p->p_rux, td);
 	PROC_SUNLOCK(p);
 	td->td_state = TDS_INACTIVE;
+#ifdef WITNESS
+	witness_thread_exit(td);
+#endif
 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
 	sched_throw(td);
 	panic("I'm a teapot!");

Modified: user/kmacy/releng_7_2_fcs/sys/kern/subr_witness.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/subr_witness.c	Mon May 18 19:33:59 2009	(r192331)
+++ user/kmacy/releng_7_2_fcs/sys/kern/subr_witness.c	Mon May 18 19:47:54 2009	(r192332)
@@ -1,5 +1,8 @@
 /*-
- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
+ * Copyright (c) 2008 Isilon Systems, Inc.
+ * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com>
+ * Copyright (c) 1998 Berkeley Software Design, Inc.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -86,6 +89,7 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
+#include "opt_stack.h"
 #include "opt_witness.h"
 
 #include <sys/param.h>
@@ -98,13 +102,20 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
+#ifdef DDB
 #include <ddb/ddb.h>
+#endif
 
 #include <machine/stdarg.h>
 
+#if !defined(DDB) && !defined(STACK)
+#error "DDB or STACK options are required for WITNESS"
+#endif
+
 /* Note that these traces do not work with KTR_ALQ. */
 #if 0
 #define	KTR_WITNESS	KTR_SUBSYS
@@ -119,95 +130,250 @@ __FBSDID("$FreeBSD$");
 /* Define this to check for blessed mutexes */
 #undef BLESSING
 
-#define WITNESS_COUNT 1024
-#define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4)
+#define	WITNESS_COUNT 		1024
+#define	WITNESS_CHILDCOUNT 	(WITNESS_COUNT * 4)
+#define	WITNESS_HASH_SIZE	251	/* Prime, gives load factor < 2 */
+#define	WITNESS_PENDLIST	512
+
+/* Allocate 256 KB of stack data space */
+#define	WITNESS_LO_DATA_COUNT	2048
+
+/* Prime, gives load factor of ~2 at full load */
+#define	WITNESS_LO_HASH_SIZE	1021
+
 /*
- * XXX: This is somewhat bogus, as we assume here that at most 1024 threads
- * will hold LOCK_NCHILDREN * 2 locks.  We handle failure ok, and we should
+ * XXX: This is somewhat bogus, as we assume here that at most 2048 threads
+ * will hold LOCK_NCHILDREN locks.  We handle failure ok, and we should
  * probably be safe for the most part, but it's still a SWAG.
  */
-#define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2
+#define	LOCK_NCHILDREN	5
+#define	LOCK_CHILDCOUNT	2048
 
-#define	WITNESS_NCHILDREN 6
+#define	MAX_W_NAME	64
 
-struct witness_child_list_entry;
+#define	BADSTACK_SBUF_SIZE	(256 * WITNESS_COUNT)
+#define	CYCLEGRAPH_SBUF_SIZE	8192
+#define	FULLGRAPH_SBUF_SIZE	32768
 
-struct witness {
-	const	char *w_name;
-	struct	lock_class *w_class;
-	STAILQ_ENTRY(witness) w_list;		/* List of all witnesses. */
-	STAILQ_ENTRY(witness) w_typelist;	/* Witnesses of a type. */
-	struct	witness_child_list_entry *w_children;	/* Great evilness... */
-	const	char *w_file;
-	int	w_line;
-	u_int	w_level;
-	u_int	w_refcount;
-	u_char	w_Giant_squawked:1;
-	u_char	w_other_squawked:1;
-	u_char	w_same_squawked:1;
-	u_char	w_displayed:1;
+/*
+ * These flags go in the witness relationship matrix and describe the
+ * relationship between any two struct witness objects.
+ */
+#define	WITNESS_UNRELATED        0x00    /* No lock order relation. */
+#define	WITNESS_PARENT           0x01    /* Parent, aka direct ancestor. */
+#define	WITNESS_ANCESTOR         0x02    /* Direct or indirect ancestor. */
+#define	WITNESS_CHILD            0x04    /* Child, aka direct descendant. */
+#define	WITNESS_DESCENDANT       0x08    /* Direct or indirect descendant. */
+#define	WITNESS_ANCESTOR_MASK    (WITNESS_PARENT | WITNESS_ANCESTOR)
+#define	WITNESS_DESCENDANT_MASK  (WITNESS_CHILD | WITNESS_DESCENDANT)
+#define	WITNESS_RELATED_MASK						\
+	(WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
+#define	WITNESS_REVERSAL         0x10    /* A lock order reversal has been
+					  * observed. */
+#define	WITNESS_RESERVED1        0x20    /* Unused flag, reserved. */
+#define	WITNESS_RESERVED2        0x40    /* Unused flag, reserved. */
+#define	WITNESS_LOCK_ORDER_KNOWN 0x80    /* This lock order is known. */
+
+/* Descendant to ancestor flags */
+#define	WITNESS_DTOA(x)	(((x) & WITNESS_RELATED_MASK) >> 2)
+
+/* Ancestor to descendant flags */
+#define	WITNESS_ATOD(x)	(((x) & WITNESS_RELATED_MASK) << 2)
+
+#define	WITNESS_INDEX_ASSERT(i)						\
+	MPASS((i) > 0 && (i) <= w_max_used_index && (i) < WITNESS_COUNT)
+
+MALLOC_DEFINE(M_WITNESS, "Witness", "Witness");
+
+/*
+ * Lock instances.  A lock instance is the data associated with a lock while
+ * it is held by witness.  For example, a lock instance will hold the
+ * recursion count of a lock.  Lock instances are held in lists.  Spin locks
+ * are held in a per-cpu list while sleep locks are held in per-thread list.
+ */
+struct lock_instance {
+	struct lock_object	*li_lock;
+	const char		*li_file;
+	int			li_line;
+	u_int			li_flags;
 };
 
-struct witness_child_list_entry {
-	struct	witness_child_list_entry *wcl_next;
-	struct	witness *wcl_children[WITNESS_NCHILDREN];
-	u_int	wcl_count;
+/*
+ * A simple list type used to build the list of locks held by a thread
+ * or CPU.  We can't simply embed the list in struct lock_object since a
+ * lock may be held by more than one thread if it is a shared lock.  Locks
+ * are added to the head of the list, so we fill up each list entry from
+ * "the back" logically.  To ease some of the arithmetic, we actually fill
+ * in each list entry the normal way (children[0] then children[1], etc.) but
+ * when we traverse the list we read children[count-1] as the first entry
+ * down to children[0] as the final entry.
+ */
+struct lock_list_entry {
+	struct lock_list_entry	*ll_next;
+	struct lock_instance	ll_children[LOCK_NCHILDREN];
+	u_int			ll_count;
+};
+
+/*
+ * The main witness structure. One of these per named lock type in the system
+ * (for example, "vnode interlock").
+ */
+struct witness {
+	char  			w_name[MAX_W_NAME];
+	uint32_t 		w_index;  /* Index in the relationship matrix */
+	struct lock_class	*w_class;
+	STAILQ_ENTRY(witness) 	w_list;		/* List of all witnesses. */
+	STAILQ_ENTRY(witness) 	w_typelist;	/* Witnesses of a type. */
+	struct witness		*w_hash_next; /* Linked list in hash buckets. */
+	const char		*w_file; /* File where last acquired */
+	uint32_t 		w_line; /* Line where last acquired */
+	uint32_t 		w_refcount;
+	uint16_t 		w_num_ancestors; /* direct/indirect
+						  * ancestor count */
+	uint16_t 		w_num_descendants; /* direct/indirect
+						    * descendant count */
+	int16_t 		w_ddb_level;
+	int 			w_displayed:1;
+	int 			w_reversed:1;
 };
 
 STAILQ_HEAD(witness_list, witness);
 
+/*
+ * The witness hash table. Keys are witness names (const char *), elements are
+ * witness objects (struct witness *).
+ */
+struct witness_hash {
+	struct witness	*wh_array[WITNESS_HASH_SIZE];
+	uint32_t	wh_size;
+	uint32_t	wh_count;
+};
+
+/*
+ * Key type for the lock order data hash table.
+ */
+struct witness_lock_order_key {
+	uint16_t	from;
+	uint16_t	to;
+};
+
+struct witness_lock_order_data {
+	struct stack			wlod_stack;
+	struct witness_lock_order_key	wlod_key;
+	struct witness_lock_order_data	*wlod_next;
+};
+
+/*
+ * The witness lock order data hash table. Keys are witness index tuples
+ * (struct witness_lock_order_key), elements are lock order data objects
+ * (struct witness_lock_order_data). 
+ */
+struct witness_lock_order_hash {
+	struct witness_lock_order_data	*wloh_array[WITNESS_LO_HASH_SIZE];
+	u_int	wloh_size;
+	u_int	wloh_count;
+};
+
 #ifdef BLESSING
 struct witness_blessed {
-	const	char *b_lock1;
-	const	char *b_lock2;
+	const char	*b_lock1;
+	const char	*b_lock2;
 };
 #endif
 
 struct witness_order_list_entry {
-	const	char *w_name;
-	struct	lock_class *w_class;
+	const char		*w_name;
+	struct lock_class	*w_class;
 };
 
+/*
+ * Returns 0 if one of the locks is a spin lock and the other is not.
+ * Returns 1 otherwise.
+ */
+static __inline int
+witness_lock_type_equal(struct witness *w1, struct witness *w2)
+{
+
+	return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
+		(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
+}
+
+static __inline int
+witness_lock_order_key_empty(const struct witness_lock_order_key *key)
+{
+
+	return (key->from == 0 && key->to == 0);
+}
+
+static __inline int
+witness_lock_order_key_equal(const struct witness_lock_order_key *a,
+    const struct witness_lock_order_key *b)
+{
+
+	return (a->from == b->from && a->to == b->to);
+}
+
+static int	_isitmyx(struct witness *w1, struct witness *w2, int rmask,
+		    const char *fname);
+#ifdef KDB
+static void	_witness_debugger(int cond, const char *msg);
+#endif
+static void	adopt(struct witness *parent, struct witness *child);
 #ifdef BLESSING
 static int	blessed(struct witness *, struct witness *);
 #endif
 static int	depart(struct witness *w);
-static struct	witness *enroll(const char *description,
-				struct lock_class *lock_class);
-static int	insertchild(struct witness *parent, struct witness *child);
+static struct witness	*enroll(const char *description,
+			    struct lock_class *lock_class);
+static struct lock_instance	*find_instance(struct lock_list_entry *list,
+				    struct lock_object *lock);
 static int	isitmychild(struct witness *parent, struct witness *child);
 static int	isitmydescendant(struct witness *parent, struct witness *child);
-static int	itismychild(struct witness *parent, struct witness *child);
-static void	removechild(struct witness *parent, struct witness *child);
+static void	itismychild(struct witness *parent, struct witness *child);
+static int	sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS);
-static const char *fixup_filename(const char *file);
-static struct	witness *witness_get(void);
+static int	sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS);
+static void	witness_add_fullgraph(struct sbuf *sb, struct witness *parent);
+#ifdef DDB
+static void	witness_ddb_compute_levels(void);
+static void	witness_ddb_display(void(*)(const char *fmt, ...));
+static void	witness_ddb_display_descendants(void(*)(const char *fmt, ...),
+		    struct witness *, int indent);
+static void	witness_ddb_display_list(void(*prnt)(const char *fmt, ...),
+		    struct witness_list *list);
+static void	witness_ddb_level_descendants(struct witness *parent, int l);
+static void	witness_ddb_list(struct thread *td);
+#endif
 static void	witness_free(struct witness *m);
-static struct	witness_child_list_entry *witness_child_get(void);
-static void	witness_child_free(struct witness_child_list_entry *wcl);
-static struct	lock_list_entry *witness_lock_list_get(void);
+static struct witness	*witness_get(void);
+static uint32_t	witness_hash_djb2(const uint8_t *key, uint32_t size);
+static struct witness	*witness_hash_get(const char *key);
+static void	witness_hash_put(struct witness *w);
+static void	witness_init_hash_tables(void);
+static void	witness_increment_graph_generation(void);
 static void	witness_lock_list_free(struct lock_list_entry *lle);
-static struct	lock_instance *find_instance(struct lock_list_entry *lock_list,
-					     struct lock_object *lock);
+static struct lock_list_entry	*witness_lock_list_get(void);
+static int	witness_lock_order_add(struct witness *parent,
+		    struct witness *child);
+static int	witness_lock_order_check(struct witness *parent,
+		    struct witness *child);
+static struct witness_lock_order_data	*witness_lock_order_get(
+					    struct witness *parent,
+					    struct witness *child);
 static void	witness_list_lock(struct lock_instance *instance);
-#ifdef DDB
-static void	witness_leveldescendents(struct witness *parent, int level);
-static void	witness_levelall(void);
-static void	witness_displaydescendants(void(*)(const char *fmt, ...),
-					   struct witness *, int indent);
-static void	witness_display_list(void(*prnt)(const char *fmt, ...),
-				     struct witness_list *list);
-static void	witness_display(void(*)(const char *fmt, ...));
-static void	witness_list(struct thread *td);
+
+#ifdef KDB
+#define	witness_debugger(c)	_witness_debugger(c, __func__)
+#else
+#define	witness_debugger(c)
 #endif
 
 SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking");
 
 /*
- * If set to 0, witness is disabled.  If set to a non-zero value, witness
- * performs full lock order checking for all locks.  At runtime, this
- * value may be set to 0 to turn off witness.  witness is not allowed be
- * turned on once it is turned off, however.
+ * If set to 0, witness is disabled.  Otherwise witness performs full lock order
+ * checking for all locks.  At runtime, witness is allowed to be turned off.
+ * witness is not allowed be turned on once it is turned off, however.
  */
 static int witness_watch = 1;
 TUNABLE_INT("debug.witness.watch", &witness_watch);
@@ -216,7 +382,7 @@ SYSCTL_PROC(_debug_witness, OID_AUTO, wa
 
 #ifdef KDB
 /*
- * When KDB is enabled and witness_kdb is set to 1, it will cause the system
+ * When KDB is enabled and witness_kdb is 1, it will cause the system
  * to drop into kdebug() when:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
@@ -230,7 +396,7 @@ TUNABLE_INT("debug.witness.kdb", &witnes
 SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RW, &witness_kdb, 0, "");
 
 /*
- * When KDB is enabled and witness_trace is set to 1, it will cause the system
+ * When KDB is enabled and witness_trace is 1, it will cause the system
  * to print a stack trace:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
@@ -246,30 +412,54 @@ int	witness_skipspin = 1;
 int	witness_skipspin = 0;
 #endif
 TUNABLE_INT("debug.witness.skipspin", &witness_skipspin);
-SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN,
-    &witness_skipspin, 0, "");
+SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin,
+    0, "");
+
+/*
+ * Call this to print out the relations between locks.
+ */
+SYSCTL_PROC(_debug_witness, OID_AUTO, fullgraph, CTLTYPE_STRING | CTLFLAG_RD,
+    NULL, 0, sysctl_debug_witness_fullgraph, "A", "Show locks relation graphs");
+
+/*
+ * Call this to print out the witness faulty stacks.
+ */
+SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks, CTLTYPE_STRING | CTLFLAG_RD,
+    NULL, 0, sysctl_debug_witness_badstacks, "A", "Show bad witness stacks");
 
 static struct mtx w_mtx;
+
+/* w_list */
 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free);
 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all);
+
+/* w_typelist */
 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin);
 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep);
-static struct witness_child_list_entry *w_child_free = NULL;
+
+/* lock list */
 static struct lock_list_entry *w_lock_list_free = NULL;
 
-static int w_free_cnt, w_spin_cnt, w_sleep_cnt, w_child_free_cnt, w_child_cnt;
+static int w_free_cnt, w_spin_cnt, w_sleep_cnt;
 SYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, spin_cnt, CTLFLAG_RD, &w_spin_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0,
     "");
-SYSCTL_INT(_debug_witness, OID_AUTO, child_free_cnt, CTLFLAG_RD,
-    &w_child_free_cnt, 0, "");
-SYSCTL_INT(_debug_witness, OID_AUTO, child_cnt, CTLFLAG_RD, &w_child_cnt, 0,
-    "");
 
-static struct witness w_data[WITNESS_COUNT];
-static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT];
+static struct witness *w_data;
+static uint8_t w_rmatrix[WITNESS_COUNT+1][WITNESS_COUNT+1];
 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
+static struct witness_hash w_hash;	/* The witness hash table. */
+
+/* The lock order data hash */
+static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT];
+static struct witness_lock_order_data *w_lofree = NULL;
+static struct witness_lock_order_hash w_lohash;
+static int w_max_used_index = 0;
+static unsigned int w_generation = 0;
+static const char *w_notrunning = "Witness not running, witness_watch == 0\n";
+static const char *w_stillcold = "Witness is still cold\n";
+
 
 static struct witness_order_list_entry order_lists[] = {
 	/*
@@ -522,6 +712,10 @@ witness_initialize(void *dummy __unused)
 	struct witness *w, *w1;
 	int i;
 
+	MALLOC(w_data, struct witness *,
+	    sizeof (struct witness) * WITNESS_COUNT, M_WITNESS,
+	    M_NOWAIT | M_ZERO);
+
 	/*
 	 * We have to release Giant before initializing its witness
 	 * structure so that WITNESS doesn't get confused.
@@ -532,12 +726,25 @@ witness_initialize(void *dummy __unused)
 	CTR1(KTR_WITNESS, "%s: initializing witness", __func__);
 	mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET |
 	    MTX_NOWITNESS | MTX_NOPROFILE);
-	for (i = 0; i < WITNESS_COUNT; i++)
-		witness_free(&w_data[i]);
-	for (i = 0; i < WITNESS_CHILDCOUNT; i++)
-		witness_child_free(&w_childdata[i]);
+	for (i = WITNESS_COUNT - 1; i >= 0; i--) {
+		w = &w_data[i];
+		memset(w, 0, sizeof(*w));
+		w_data[i].w_index = i;	/* Witness index never changes. */
+		witness_free(w);
+	}
+	KASSERT(STAILQ_FIRST(&w_free)->w_index == 0,
+	    ("%s: Invalid list of free witness objects", __func__));
+
+	/* Witness with index 0 is not used to aid in debugging. */
+	STAILQ_REMOVE_HEAD(&w_free, w_list);
+	w_free_cnt--;
+
+	memset(w_rmatrix, 0,
+	    (sizeof(**w_rmatrix) * (WITNESS_COUNT+1) * (WITNESS_COUNT+1)));
+
 	for (i = 0; i < LOCK_CHILDCOUNT; i++)
 		witness_lock_list_free(&w_locklistdata[i]);
+	witness_init_hash_tables();
 
 	/* First add in all the specified order lists. */
 	for (order = order_lists; order->w_name != NULL; order++) {
@@ -550,8 +757,7 @@ witness_initialize(void *dummy __unused)
 			if (w1 == NULL)
 				continue;
 			w1->w_file = "order list";
-			if (!itismychild(w, w1))
-				panic("Not enough memory for static orders!");
+			itismychild(w, w1);
 			w = w1;
 		}
 	}
@@ -575,23 +781,6 @@ witness_initialize(void *dummy __unused)
 SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize,
     NULL);
 
-static int
-sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS)
-{
-	int error, value;
-
-	value = witness_watch;
-	error = sysctl_handle_int(oidp, &value, 0, req);
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-	if (value == witness_watch)
-		return (0);
-	if (value != 0)
-		return (EINVAL);
-	witness_watch = 0;
-	return (0);
-}
-
 void
 witness_init(struct lock_object *lock)
 {
@@ -636,25 +825,23 @@ witness_destroy(struct lock_object *lock
 	struct witness *w;
 
 	class = LOCK_CLASS(lock);
+
 	if (witness_cold)
 		panic("lock (%s) %s destroyed while witness_cold",
 		    class->lc_name, lock->lo_name);
 
 	/* XXX: need to verify that no one holds the lock */
-	if ((lock->lo_flags & (LO_WITNESS | LO_ENROLLPEND)) == LO_WITNESS &&
-	    lock->lo_witness != NULL) {
-		w = lock->lo_witness;
-		mtx_lock_spin(&w_mtx);
-		MPASS(w->w_refcount > 0);
-		w->w_refcount--;
+	if ((lock->lo_flags & LO_WITNESS) == 0 || lock->lo_witness == NULL)
+		return;
+	w = lock->lo_witness;
 
-		/*
-		 * Lock is already released if we have an allocation failure
-		 * and depart() fails.
-		 */
-		if (w->w_refcount != 0 || depart(w))
-			mtx_unlock_spin(&w_mtx);
-	}
+	mtx_lock_spin(&w_mtx);
+	MPASS(w->w_refcount > 0);
+	w->w_refcount--;
+
+	if (w->w_refcount == 0)
+		depart(w);
+	mtx_unlock_spin(&w_mtx);
 
 	/*
 	 * If this lock is destroyed before witness is up and running,
@@ -668,127 +855,114 @@ witness_destroy(struct lock_object *lock
 
 #ifdef DDB
 static void
-witness_levelall (void)
+witness_ddb_compute_levels(void)
 {
-	struct witness_list *list;
-	struct witness *w, *w1;
+	struct witness *w;
 
 	/*
 	 * First clear all levels.
 	 */
-	STAILQ_FOREACH(w, &w_all, w_list) {
-		w->w_level = 0;
-	}
+	STAILQ_FOREACH(w, &w_all, w_list)
+		w->w_ddb_level = -1;
 
 	/*
-	 * Look for locks with no parent and level all their descendants.
+	 * Look for locks with no parents and level all their descendants.
 	 */
 	STAILQ_FOREACH(w, &w_all, w_list) {
-		/*
-		 * This is just an optimization, technically we could get
-		 * away just walking the all list each time.
-		 */
-		if (w->w_class->lc_flags & LC_SLEEPLOCK)
-			list = &w_sleep;
-		else
-			list = &w_spin;
-		STAILQ_FOREACH(w1, list, w_typelist) {
-			if (isitmychild(w1, w))
-				goto skip;
-		}
-		witness_leveldescendents(w, 0);
-	skip:
-		;	/* silence GCC 3.x */
+
+		/* If the witness has ancestors (is not a root), skip it. */
+		if (w->w_num_ancestors > 0)
+			continue;
+		witness_ddb_level_descendants(w, 0);
 	}
 }
 
 static void
-witness_leveldescendents(struct witness *parent, int level)
+witness_ddb_level_descendants(struct witness *w, int l)
 {
-	struct witness_child_list_entry *wcl;
 	int i;
 
-	if (parent->w_level < level)
-		parent->w_level = level;
-	level++;
-	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
-		for (i = 0; i < wcl->wcl_count; i++)
-			witness_leveldescendents(wcl->wcl_children[i], level);
+	if (w->w_ddb_level >= l)
+		return;
+
+	w->w_ddb_level = l;
+	l++;
+
+	for (i = 1; i <= w_max_used_index; i++) {
+		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
+			witness_ddb_level_descendants(&w_data[i], l);
+	}
 }
 
 static void
-witness_displaydescendants(void(*prnt)(const char *fmt, ...),
-			   struct witness *parent, int indent)
+witness_ddb_display_descendants(void(*prnt)(const char *fmt, ...),
+    struct witness *w, int indent)
 {
-	struct witness_child_list_entry *wcl;
-	int i, level;
+	int i;
 
-	level = parent->w_level;
-	prnt("%-2d", level);
-	for (i = 0; i < indent; i++)
-		prnt(" ");
-	if (parent->w_refcount > 0)
-		prnt("%s", parent->w_name);
+ 	for (i = 0; i < indent; i++)
+ 		prnt(" ");
+	prnt("%s (type: %s, depth: %d, active refs: %d)",
+	     w->w_name, w->w_class->lc_name,
+	     w->w_ddb_level, w->w_refcount);
+ 	if (w->w_displayed) {
+ 		prnt(" -- (already displayed)\n");
+ 		return;
+ 	}
+ 	w->w_displayed = 1;
+	if (w->w_file != NULL && w->w_line != 0)
+		prnt(" -- last acquired @ %s:%d\n", w->w_file,
+		    w->w_line);
 	else
-		prnt("(dead)");
-	if (parent->w_displayed) {
-		prnt(" -- (already displayed)\n");
-		return;
-	}
-	parent->w_displayed = 1;
-	if (parent->w_refcount > 0) {
-		if (parent->w_file != NULL)
-			prnt(" -- last acquired @ %s:%d", parent->w_file,
-			    parent->w_line);
-	}
-	prnt("\n");
-	for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
-		for (i = 0; i < wcl->wcl_count; i++)
-			    witness_displaydescendants(prnt,
-				wcl->wcl_children[i], indent + 1);
+		prnt(" -- never acquired\n");
+	indent++;
+	WITNESS_INDEX_ASSERT(w->w_index);
+	for (i = 1; i <= w_max_used_index; i++) {
+		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
+			witness_ddb_display_descendants(prnt, &w_data[i],
+			    indent);
+	}
 }
 
 static void
-witness_display_list(void(*prnt)(const char *fmt, ...),
-		     struct witness_list *list)
+witness_ddb_display_list(void(*prnt)(const char *fmt, ...),
+    struct witness_list *list)
 {
 	struct witness *w;
 
 	STAILQ_FOREACH(w, list, w_typelist) {
-		if (w->w_file == NULL || w->w_level > 0)
+		if (w->w_file == NULL || w->w_ddb_level > 0)
 			continue;
-		/*
-		 * This lock has no anscestors, display its descendants. 
-		 */
-		witness_displaydescendants(prnt, w, 0);
+
+		/* This lock has no anscestors - display its descendants. */
+		witness_ddb_display_descendants(prnt, w, 0);
 	}
 }
 	
 static void
-witness_display(void(*prnt)(const char *fmt, ...))
+witness_ddb_display(void(*prnt)(const char *fmt, ...))
 {
 	struct witness *w;
 
-	KASSERT(!witness_cold, ("%s: witness_cold", __func__));
-	witness_levelall();
+	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
+	witness_ddb_compute_levels();
 
 	/* Clear all the displayed flags. */
-	STAILQ_FOREACH(w, &w_all, w_list) {
+	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_displayed = 0;
-	}
 
 	/*
 	 * First, handle sleep locks which have been acquired at least
 	 * once.
 	 */
 	prnt("Sleep locks:\n");
-	witness_display_list(prnt, &w_sleep);
+	witness_ddb_display_list(prnt, &w_sleep);
 	
 	/*
 	 * Now do spin locks which have been acquired at least once.
 	 */
 	prnt("\nSpin locks:\n");
-	witness_display_list(prnt, &w_spin);
+	witness_ddb_display_list(prnt, &w_spin);
 	
 	/*
 	 * Finally, any locks which have not been acquired yet.
@@ -797,7 +971,8 @@ witness_display(void(*prnt)(const char *
 	STAILQ_FOREACH(w, &w_all, w_list) {
 		if (w->w_file != NULL || w->w_refcount == 0)
 			continue;
-		prnt("%s\n", w->w_name);
+		prnt("%s (type: %s, depth: %d)\n", w->w_name,
+		    w->w_class->lc_name, w->w_ddb_level);
 	}
 }
 #endif /* DDB */
@@ -826,7 +1001,7 @@ witness_defineorder(struct lock_object *
 	    lock2->lo_witness == NULL)
 		return (EINVAL);
 
-	MPASS(!mtx_owned(&w_mtx));
+	mtx_assert(&w_mtx, MA_NOTOWNED);
 	mtx_lock_spin(&w_mtx);
 
 	/*
@@ -841,8 +1016,7 @@ witness_defineorder(struct lock_object *
 	/* Try to add the new order. */
 	CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
 	    lock2->lo_type, lock1->lo_type);
-	if (!itismychild(lock1->lo_witness, lock2->lo_witness))
-		return (ENOMEM);
+	itismychild(lock1->lo_witness, lock2->lo_witness);
 	mtx_unlock_spin(&w_mtx);
 	return (0);
 }
@@ -862,23 +1036,13 @@ witness_checkorder(struct lock_object *l
 	    panicstr != NULL)
 		return;
 
-	/*
-	 * Try locks do not block if they fail to acquire the lock, thus
-	 * there is no danger of deadlocks or of switching while holding a
-	 * spin lock if we acquire a lock via a try operation.  This
-	 * function shouldn't even be called for try locks, so panic if
-	 * that happens.
-	 */
-	if (flags & LOP_TRYLOCK)
-		panic("%s should not be called for try lock operations",
-		    __func__);
-
 	w = lock->lo_witness;
 	class = LOCK_CLASS(lock);
 	td = curthread;
 	file = fixup_filename(file);
 
 	if (class->lc_flags & LC_SLEEPLOCK) {
+
 		/*
 		 * Since spin locks include a critical section, this check
 		 * implicitly enforces a lock order of all sleep locks before
@@ -896,6 +1060,7 @@ witness_checkorder(struct lock_object *l
 			return;
 		lock_list = &td->td_sleeplocks;
 	} else {
+
 		/*
 		 * If this is the first lock, just return as no order
 		 * checking is needed.  We check this in both if clauses
@@ -910,6 +1075,10 @@ witness_checkorder(struct lock_object *l
 		lock_list = PCPU_PTR(spinlocks);
 	}
 
+	/* Empty list? */
+	if ((*lock_list)->ll_count == 0)
+		return;
+
 	/*
 	 * Check to see if we are recursing on a lock we already own.  If
 	 * so, make sure that we don't mismatch exclusive and shared lock
@@ -937,11 +1106,12 @@ witness_checkorder(struct lock_object *l
 	}
 
 	/*
-	 * Try locks do not block if they fail to acquire the lock, thus
-	 * there is no danger of deadlocks or of switching while holding a
-	 * spin lock if we acquire a lock via a try operation.
+	 * Try to perform most checks without a lock.  If this succeeds we
+	 * can skip acquiring the lock and return success.
 	 */
-	if (flags & LOP_TRYLOCK)
+	lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
+	w1 = lock1->li_lock->lo_witness;
+	if (witness_lock_order_check(w1, w))
 		return;
 
 	/*
@@ -949,35 +1119,35 @@ witness_checkorder(struct lock_object *l
 	 * have to check for this on the last lock we just acquired.  Any
 	 * other cases will be caught as lock order violations.
 	 */
-	lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
-	w1 = lock1->li_lock->lo_witness;
+	mtx_lock_spin(&w_mtx);
+	witness_lock_order_add(w1, w);
 	if (w1 == w) {
-		if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK) ||
-		    (flags & LOP_DUPOK))
-			return;
-		w->w_same_squawked = 1;
+		i = w->w_index;
+		if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) &&
+		    !(w_rmatrix[i][i] & WITNESS_REVERSAL)) {
+		    w_rmatrix[i][i] |= WITNESS_REVERSAL;
+			w->w_reversed = 1;
+			mtx_unlock_spin(&w_mtx);
 		printf("acquiring duplicate lock of same type: \"%s\"\n", 
-			lock->lo_type);
-		printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name,
-		    lock1->li_file, lock1->li_line);
-		printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line);
-#ifdef KDB
-		goto debugger;
-#else
+			    w->w_name);
+			printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name,
+			       lock1->li_file, lock1->li_line);
+			printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line);
+			witness_debugger(1);
+		    } else
+			    mtx_unlock_spin(&w_mtx);
 		return;
-#endif
 	}
-	MPASS(!mtx_owned(&w_mtx));
-	mtx_lock_spin(&w_mtx);
+	mtx_assert(&w_mtx, MA_OWNED);
+
 	/*
 	 * If we know that the the lock we are acquiring comes after
 	 * the lock we most recently acquired in the lock order tree,
 	 * then there is no need for any further checks.
 	 */
-	if (isitmychild(w1, w)) {
-		mtx_unlock_spin(&w_mtx);
-		return;
-	}
+	if (isitmychild(w1, w))
+		goto out;
+
 	for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) {
 		for (i = lle->ll_count - 1; i >= 0; i--, j++) {
 
@@ -994,6 +1164,7 @@ witness_checkorder(struct lock_object *l
 				    ("lock missing witness structure"));
 				continue;
 			}
+
 			/*
 			 * If we are locking Giant and this is a sleepable
 			 * lock, then skip it.
@@ -1001,6 +1172,7 @@ witness_checkorder(struct lock_object *l
 			if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    lock == &Giant.lock_object)
 				continue;
+
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * is Giant, then skip it.
@@ -1008,6 +1180,7 @@ witness_checkorder(struct lock_object *l
 			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    lock1->li_lock == &Giant.lock_object)
 				continue;
+
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * isn't sleepable, we want to treat it as a lock
@@ -1017,6 +1190,7 @@ witness_checkorder(struct lock_object *l
 			if (((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
 				goto reversal;
+
 			/*
 			 * If we are locking Giant and this is a non-sleepable
 			 * lock, then treat it as a reversal.
@@ -1024,37 +1198,40 @@ witness_checkorder(struct lock_object *l
 			if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 &&
 			    lock == &Giant.lock_object)
 				goto reversal;
+
 			/*
 			 * Check the lock order hierarchy for a reveresal.
 			 */
 			if (!isitmydescendant(w, w1))
 				continue;
 		reversal:
+
 			/*
 			 * We have a lock order violation, check to see if it
 			 * is allowed or has already been yelled about.
 			 */
-			mtx_unlock_spin(&w_mtx);
 #ifdef BLESSING
+
 			/*
 			 * If the lock order is blessed, just bail.  We don't
 			 * look for other lock order violations though, which
 			 * may be a bug.
 			 */
 			if (blessed(w, w1))
-				return;
+				goto out;
 #endif
-			if (lock1->li_lock == &Giant.lock_object) {
-				if (w1->w_Giant_squawked)
-					return;
-				else
-					w1->w_Giant_squawked = 1;
-			} else {
-				if (w1->w_other_squawked)
-					return;
-				else
-					w1->w_other_squawked = 1;
-			}
+
+			/* Bail if this violation is known */
+			if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL)
+				goto out;
+
+			/* Record this as a violation */
+			w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL;
+			w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL;
+			w->w_reversed = w1->w_reversed = 1;
+			witness_increment_graph_generation();
+			mtx_unlock_spin(&w_mtx);
+			
 			/*
 			 * Ok, yell about it.
 			 */
@@ -1068,6 +1245,7 @@ witness_checkorder(struct lock_object *l
 		"lock order reversal: (Giant after non-sleepable)\n");
 			else
 				printf("lock order reversal:\n");
+
 			/*

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905181947.n4IJltxt072926>