Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Jun 2009 17:25:38 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r194450 - vendor/opensolaris/dist/cmd/lockstat
Message-ID:  <200906181725.n5IHPc0o017945@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Jun 18 17:25:38 2009
New Revision: 194450
URL: http://svn.freebsd.org/changeset/base/194450

Log:
  Import the lockstat(1) sources from OpenSolaris as of 20080410.

Added:
  vendor/opensolaris/dist/cmd/lockstat/
  vendor/opensolaris/dist/cmd/lockstat/lockstat.c   (contents, props changed)
  vendor/opensolaris/dist/cmd/lockstat/sym.c   (contents, props changed)

Added: vendor/opensolaris/dist/cmd/lockstat/lockstat.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ vendor/opensolaris/dist/cmd/lockstat/lockstat.c	Thu Jun 18 17:25:38 2009	(r194450)
@@ -0,0 +1,1858 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <dtrace.h>
+#include <sys/lockstat.h>
+#include <alloca.h>
+#include <signal.h>
+#include <assert.h>
+
+#define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
+
+#define	LS_MAX_STACK_DEPTH	50
+#define	LS_MAX_EVENTS		64
+
+typedef struct lsrec {
+	struct lsrec	*ls_next;	/* next in hash chain */
+	uintptr_t	ls_lock;	/* lock address */
+	uintptr_t	ls_caller;	/* caller address */
+	uint32_t	ls_count;	/* cumulative event count */
+	uint32_t	ls_event;	/* type of event */
+	uintptr_t	ls_refcnt;	/* cumulative reference count */
+	uint64_t	ls_time;	/* cumulative event duration */
+	uint32_t	ls_hist[64];	/* log2(duration) histogram */
+	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
+} lsrec_t;
+
+typedef struct lsdata {
+	struct lsrec	*lsd_next;	/* next available */
+	int		lsd_count;	/* number of records */
+} lsdata_t;
+
+/*
+ * Definitions for the types of experiments which can be run.  They are
+ * listed in increasing order of memory cost and processing time cost.
+ * The numerical value of each type is the number of bytes needed per record.
+ */
+#define	LS_BASIC	offsetof(lsrec_t, ls_time)
+#define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
+#define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
+#define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
+
+static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
+static void report_trace(FILE *, lsrec_t **);
+
+extern int symtab_init(void);
+extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
+extern uintptr_t sym_to_addr(char *name);
+extern size_t sym_size(char *name);
+extern char *strtok_r(char *, const char *, char **);
+
+#define	DEFAULT_NRECS	10000
+#define	DEFAULT_HZ	97
+#define	MAX_HZ		1000
+#define	MIN_AGGSIZE	(16 * 1024)
+#define	MAX_AGGSIZE	(32 * 1024 * 1024)
+
+static int g_stkdepth;
+static int g_topn = INT_MAX;
+static hrtime_t g_elapsed;
+static int g_rates = 0;
+static int g_pflag = 0;
+static int g_Pflag = 0;
+static int g_wflag = 0;
+static int g_Wflag = 0;
+static int g_cflag = 0;
+static int g_kflag = 0;
+static int g_gflag = 0;
+static int g_Vflag = 0;
+static int g_tracing = 0;
+static size_t g_recsize;
+static size_t g_nrecs;
+static int g_nrecs_used;
+static uchar_t g_enabled[LS_MAX_EVENTS];
+static hrtime_t g_min_duration[LS_MAX_EVENTS];
+static dtrace_hdl_t *g_dtp;
+static char *g_predicate;
+static char *g_ipredicate;
+static char *g_prog;
+static int g_proglen;
+static int g_dropped;
+
+typedef struct ls_event_info {
+	char	ev_type;
+	char	ev_lhdr[20];
+	char	ev_desc[80];
+	char	ev_units[10];
+	char	ev_name[DTRACE_NAMELEN];
+	char	*ev_predicate;
+	char	*ev_acquire;
+} ls_event_info_t;
+
+static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
+	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
+	    "lockstat:::adaptive-spin" },
+	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
+	    "lockstat:::adaptive-block" },
+	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
+	    "lockstat:::spin-spin" },
+	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
+	    "lockstat:::thread-spin" },
+	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
+	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
+	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
+	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
+	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"Unknown event (type 8)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 9)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 10)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 11)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 12)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 13)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 14)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 15)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 19)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
+	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
+	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
+	    "lockstat:::adaptive-release", NULL,
+	    "lockstat:::adaptive-acquire" },
+	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
+	    "lockstat:::spin-release", NULL,
+	    "lockstat:::spin-acquire" },
+	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
+	    "lockstat:::rw-release", "arg1 == 0",
+	    "lockstat:::rw-acquire" },
+	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
+	    "lockstat:::rw-release", "arg1 != 0",
+	    "lockstat:::rw-acquire" },
+	{ 'H',	"Lock",	"Unknown event (type 36)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 37)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
+	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
+	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
+	    "profile:::profile-97", NULL },
+	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
+	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
+	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
+	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
+	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
+	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
+	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
+	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
+};
+
+static void
+fail(int do_perror, const char *message, ...)
+{
+	va_list args;
+	int save_errno = errno;
+
+	va_start(args, message);
+	(void) fprintf(stderr, "lockstat: ");
+	(void) vfprintf(stderr, message, args);
+	va_end(args);
+	if (do_perror)
+		(void) fprintf(stderr, ": %s", strerror(save_errno));
+	(void) fprintf(stderr, "\n");
+	exit(2);
+}
+
+static void
+dfail(const char *message, ...)
+{
+	va_list args;
+
+	va_start(args, message);
+	(void) fprintf(stderr, "lockstat: ");
+	(void) vfprintf(stderr, message, args);
+	va_end(args);
+	(void) fprintf(stderr, ": %s\n",
+	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
+
+	exit(2);
+}
+
+static void
+show_events(char event_type, char *desc)
+{
+	int i, first = -1, last;
+
+	for (i = 0; i < LS_MAX_EVENTS; i++) {
+		ls_event_info_t *evp = &g_event_info[i];
+		if (evp->ev_type != event_type ||
+		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
+			continue;
+		if (first == -1)
+			first = i;
+		last = i;
+	}
+
+	(void) fprintf(stderr,
+	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
+	    desc, event_type, first, last);
+
+	for (i = first; i <= last; i++)
+		(void) fprintf(stderr,
+		    "%4d = %s\n", i, g_event_info[i].ev_desc);
+}
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "Usage: lockstat [options] command [args]\n"
+	    "\nEvent selection options:\n\n"
+	    "  -C              watch contention events [on by default]\n"
+	    "  -E              watch error events [off by default]\n"
+	    "  -H              watch hold events [off by default]\n"
+	    "  -I              watch interrupt events [off by default]\n"
+	    "  -A              watch all lock events [equivalent to -CH]\n"
+	    "  -e event_list   only watch the specified events (shown below);\n"
+	    "                  <event_list> is a comma-separated list of\n"
+	    "                  events or ranges of events, e.g. 1,4-7,35\n"
+	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
+	    "\nData gathering options:\n\n"
+	    "  -b              basic statistics (lock, caller, event count)\n"
+	    "  -t              timing for all events [default]\n"
+	    "  -h              histograms for event times\n"
+	    "  -s depth        stack traces <depth> deep\n"
+	    "  -x opt[=val]    enable or modify DTrace options\n"
+	    "\nData filtering options:\n\n"
+	    "  -n nrecords     maximum number of data records [default: %d]\n"
+	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
+	    "                  symbolic name or hex address; <size> defaults\n"
+	    "                  to the ELF symbol size if available, 1 if not\n"
+	    "  -f func[,size]  only watch events generated by <func>\n"
+	    "  -d duration     only watch events longer than <duration>\n"
+	    "  -T              trace (rather than sample) events\n"
+	    "\nData reporting options:\n\n"
+	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
+	    "  -k              coalesce PCs within functions\n"
+	    "  -g              show total events generated by function\n"
+	    "  -w              wherever: don't distinguish events by caller\n"
+	    "  -W              whichever: don't distinguish events by lock\n"
+	    "  -R              display rates rather than counts\n"
+	    "  -p              parsable output format (awk(1)-friendly)\n"
+	    "  -P              sort lock data by (count * avg_time) product\n"
+	    "  -D n            only display top <n> events of each type\n"
+	    "  -o filename     send output to <filename>\n",
+	    DEFAULT_HZ, DEFAULT_NRECS);
+
+	show_events('C', "Contention");
+	show_events('H', "Hold-time");
+	show_events('I', "Interrupt");
+	show_events('E', "Error");
+	(void) fprintf(stderr, "\n");
+
+	exit(1);
+}
+
+static int
+lockcmp(lsrec_t *a, lsrec_t *b)
+{
+	int i;
+
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	for (i = g_stkdepth - 1; i >= 0; i--) {
+		if (a->ls_stack[i] < b->ls_stack[i])
+			return (-1);
+		if (a->ls_stack[i] > b->ls_stack[i])
+			return (1);
+	}
+
+	if (a->ls_caller < b->ls_caller)
+		return (-1);
+	if (a->ls_caller > b->ls_caller)
+		return (1);
+
+	if (a->ls_lock < b->ls_lock)
+		return (-1);
+	if (a->ls_lock > b->ls_lock)
+		return (1);
+
+	return (0);
+}
+
+static int
+countcmp(lsrec_t *a, lsrec_t *b)
+{
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	return (b->ls_count - a->ls_count);
+}
+
+static int
+timecmp(lsrec_t *a, lsrec_t *b)
+{
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	if (a->ls_time < b->ls_time)
+		return (1);
+	if (a->ls_time > b->ls_time)
+		return (-1);
+
+	return (0);
+}
+
+static int
+lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
+{
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	if (a->ls_lock < b->ls_lock)
+		return (-1);
+	if (a->ls_lock > b->ls_lock)
+		return (1);
+
+	return (0);
+}
+
+static int
+lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
+{
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	if (a->ls_lock < b->ls_lock)
+		return (-1);
+	if (a->ls_lock > b->ls_lock)
+		return (1);
+
+	return (b->ls_count - a->ls_count);
+}
+
+static int
+sitecmp_anylock(lsrec_t *a, lsrec_t *b)
+{
+	int i;
+
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	for (i = g_stkdepth - 1; i >= 0; i--) {
+		if (a->ls_stack[i] < b->ls_stack[i])
+			return (-1);
+		if (a->ls_stack[i] > b->ls_stack[i])
+			return (1);
+	}
+
+	if (a->ls_caller < b->ls_caller)
+		return (-1);
+	if (a->ls_caller > b->ls_caller)
+		return (1);
+
+	return (0);
+}
+
+static int
+site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
+{
+	int i;
+
+	if (a->ls_event < b->ls_event)
+		return (-1);
+	if (a->ls_event > b->ls_event)
+		return (1);
+
+	for (i = g_stkdepth - 1; i >= 0; i--) {
+		if (a->ls_stack[i] < b->ls_stack[i])
+			return (-1);
+		if (a->ls_stack[i] > b->ls_stack[i])
+			return (1);
+	}
+
+	if (a->ls_caller < b->ls_caller)
+		return (-1);
+	if (a->ls_caller > b->ls_caller)
+		return (1);
+
+	return (b->ls_count - a->ls_count);
+}
+
+static void
+mergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
+{
+	int m = n / 2;
+	int i, j;
+
+	if (m > 1)
+		mergesort(cmp, a, b, m);
+	if (n - m > 1)
+		mergesort(cmp, a + m, b + m, n - m);
+	for (i = m; i > 0; i--)
+		b[i - 1] = a[i - 1];
+	for (j = m - 1; j < n - 1; j++)
+		b[n + m - j - 2] = a[j + 1];
+	while (i < j)
+		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
+	*a = b[i];
+}
+
+static void
+coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
+{
+	int i, j;
+	lsrec_t *target, *current;
+
+	target = lock[0];
+
+	for (i = 1; i < n; i++) {
+		current = lock[i];
+		if (cmp(current, target) != 0) {
+			target = current;
+			continue;
+		}
+		current->ls_event = LS_MAX_EVENTS;
+		target->ls_count += current->ls_count;
+		target->ls_refcnt += current->ls_refcnt;
+		if (g_recsize < LS_TIME)
+			continue;
+		target->ls_time += current->ls_time;
+		if (g_recsize < LS_HIST)
+			continue;
+		for (j = 0; j < 64; j++)
+			target->ls_hist[j] += current->ls_hist[j];
+	}
+}
+
+static void
+coalesce_symbol(uintptr_t *addrp)
+{
+	uintptr_t symoff;
+	size_t symsize;
+
+	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
+		*addrp -= symoff;
+}
+
+static void
+predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
+{
+	char *new;
+	int len, newlen;
+
+	if (what == NULL)
+		return;
+
+	if (*pred == NULL) {
+		*pred = malloc(1);
+		*pred[0] = '\0';
+	}
+
+	len = strlen(*pred);
+	newlen = len + strlen(what) + 32 + strlen("( && )");
+	new = malloc(newlen);
+
+	if (*pred[0] != '\0') {
+		if (cmp != NULL) {
+			(void) sprintf(new, "(%s) && (%s %s 0x%p)",
+			    *pred, what, cmp, (void *)value);
+		} else {
+			(void) sprintf(new, "(%s) && (%s)", *pred, what);
+		}
+	} else {
+		if (cmp != NULL) {
+			(void) sprintf(new, "%s %s 0x%p",
+			    what, cmp, (void *)value);
+		} else {
+			(void) sprintf(new, "%s", what);
+		}
+	}
+
+	free(*pred);
+	*pred = new;
+}
+
+static void
+predicate_destroy(char **pred)
+{
+	free(*pred);
+	*pred = NULL;
+}
+
+static void
+filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
+{
+	char buf[256], *c = buf, *new;
+	int len, newlen;
+
+	if (*filt == NULL) {
+		*filt = malloc(1);
+		*filt[0] = '\0';
+	}
+
+	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
+	    " || " : "", what, (void *)base, what, (void *)(base + size));
+
+	newlen = (len = strlen(*filt) + 1) + strlen(c);
+	new = malloc(newlen);
+	bcopy(*filt, new, len);
+	(void) strcat(new, c);
+	free(*filt);
+	*filt = new;
+}
+
+static void
+filter_destroy(char **filt)
+{
+	free(*filt);
+	*filt = NULL;
+}
+
+static void
+dprog_add(const char *fmt, ...)
+{
+	va_list args;
+	int size, offs;
+	char c;
+
+	va_start(args, fmt);
+	size = vsnprintf(&c, 1, fmt, args) + 1;
+
+	if (g_proglen == 0) {
+		offs = 0;
+	} else {
+		offs = g_proglen - 1;
+	}
+
+	g_proglen = offs + size;
+
+	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
+		fail(1, "failed to reallocate program text");
+
+	(void) vsnprintf(&g_prog[offs], size, fmt, args);
+}
+
+/*
+ * This function may read like an open sewer, but keep in mind that programs
+ * that generate other programs are rarely pretty.  If one has the unenviable
+ * task of maintaining or -- worse -- extending this code, use the -V option
+ * to examine the D program as generated by this function.
+ */
+static void
+dprog_addevent(int event)
+{
+	ls_event_info_t *info = &g_event_info[event];
+	char *pred = NULL;
+	char stack[20];
+	const char *arg0, *caller;
+	char *arg1 = "arg1";
+	char buf[80];
+	hrtime_t dur;
+	int depth;
+
+	if (info->ev_name[0] == '\0')
+		return;
+
+	if (info->ev_type == 'I') {
+		/*
+		 * For interrupt events, arg0 (normally the lock pointer) is
+		 * the CPU address plus the current pil, and arg1 (normally
+		 * the number of nanoseconds) is the number of nanoseconds
+		 * late -- and it's stored in arg2.
+		 */
+		arg0 = "(uintptr_t)curthread->t_cpu + \n"
+		    "\t    curthread->t_cpu->cpu_profile_pil";
+		caller = "(uintptr_t)arg0";
+		arg1 = "arg2";
+	} else {
+		arg0 = "(uintptr_t)arg0";
+		caller = "caller";
+	}
+
+	if (g_recsize > LS_HIST) {
+		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
+			continue;
+
+		if (g_tracing) {
+			(void) sprintf(stack, "\tstack(%d);\n", depth);
+		} else {
+			(void) sprintf(stack, ", stack(%d)", depth);
+		}
+	} else {
+		(void) sprintf(stack, "");
+	}
+
+	if (info->ev_acquire != NULL) {
+		/*
+		 * If this is a hold event, we need to generate an additional
+		 * clause for the acquire; the clause for the release will be
+		 * generated with the aggregating statement, below.
+		 */
+		dprog_add("%s\n", info->ev_acquire);
+		predicate_add(&pred, info->ev_predicate, NULL, 0);
+		predicate_add(&pred, g_predicate, NULL, 0);
+		if (pred != NULL)
+			dprog_add("/%s/\n", pred);
+
+		dprog_add("{\n");
+		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
+
+		if (info->ev_type == 'H') {
+			dprog_add("\t%s = timestamp;\n", buf);
+		} else {
+			/*
+			 * If this isn't a hold event, it's the recursive
+			 * error event.  For this, we simply bump the
+			 * thread-local, per-lock count.
+			 */
+			dprog_add("\t%s++;\n", buf);
+		}
+
+		dprog_add("}\n\n");
+		predicate_destroy(&pred);
+		pred = NULL;
+
+		if (info->ev_type == 'E') {
+			/*
+			 * If this is the recursive lock error event, we need
+			 * to generate an additional clause to decrement the
+			 * thread-local, per-lock count.  This assures that we
+			 * only execute the aggregating clause if we have
+			 * recursive entry.
+			 */
+			dprog_add("%s\n", info->ev_name);
+			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
+		}
+
+		predicate_add(&pred, buf, NULL, 0);
+
+		if (info->ev_type == 'H') {
+			(void) sprintf(buf, "timestamp -\n\t    "
+			    "self->ev%d[(uintptr_t)arg0]", event);
+		}
+
+		arg1 = buf;
+	} else {
+		predicate_add(&pred, info->ev_predicate, NULL, 0);
+		if (info->ev_type != 'I')
+			predicate_add(&pred, g_predicate, NULL, 0);
+		else
+			predicate_add(&pred, g_ipredicate, NULL, 0);
+	}
+
+	if ((dur = g_min_duration[event]) != 0)
+		predicate_add(&pred, arg1, ">=", dur);
+
+	dprog_add("%s\n", info->ev_name);
+
+	if (pred != NULL)
+		dprog_add("/%s/\n", pred);
+	predicate_destroy(&pred);
+
+	dprog_add("{\n");
+
+	if (g_tracing) {
+		dprog_add("\ttrace(%dULL);\n", event);
+		dprog_add("\ttrace(%s);\n", arg0);
+		dprog_add("\ttrace(%s);\n", caller);
+		dprog_add(stack);
+	} else {
+		/*
+		 * The ordering here is important:  when we process the
+		 * aggregate, we count on the fact that @avg appears before
+		 * @hist in program order to assure that @avg is assigned the
+		 * first aggregation variable ID and @hist assigned the
+		 * second; see the comment in process_aggregate() for details.
+		 */
+		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
+		    event, arg0, caller, stack, arg1);
+
+		if (g_recsize >= LS_HIST) {
+			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
+			    "(%s);\n", event, arg0, caller, stack, arg1);
+		}
+	}
+
+	if (info->ev_acquire != NULL)
+		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
+
+	dprog_add("}\n\n");
+}
+
+static void
+dprog_compile()
+{
+	dtrace_prog_t *prog;
+	dtrace_proginfo_t info;
+
+	if (g_Vflag) {
+		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
+		(void) fputs(g_prog, stderr);
+		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
+	}
+
+	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
+	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
+		dfail("failed to compile program");
+
+	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
+		dfail("failed to enable probes");
+
+	if (dtrace_go(g_dtp) != 0)
+		dfail("couldn't start tracing");
+}
+
+static void
+status_fire(void)
+{}
+
+static void
+status_init(void)
+{
+	dtrace_optval_t val, status, agg;
+	struct sigaction act;
+	struct itimerspec ts;
+	struct sigevent ev;
+	timer_t tid;
+
+	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
+		dfail("failed to get 'statusrate'");
+
+	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
+		dfail("failed to get 'statusrate'");
+
+	/*
+	 * We would want to awaken at a rate that is the GCD of the statusrate
+	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
+	 * simply awaken at a rate that is the more frequent of the two, which
+	 * assures that we're never later than the interval implied by the
+	 * more frequent rate.
+	 */
+	val = status < agg ? status : agg;
+
+	(void) sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = status_fire;
+	(void) sigaction(SIGUSR1, &act, NULL);
+
+	ev.sigev_notify = SIGEV_SIGNAL;
+	ev.sigev_signo = SIGUSR1;
+
+	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
+		dfail("cannot create CLOCK_REALTIME timer");
+
+	ts.it_value.tv_sec = val / NANOSEC;
+	ts.it_value.tv_nsec = val % NANOSEC;
+	ts.it_interval = ts.it_value;
+
+	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
+		dfail("cannot set time on CLOCK_REALTIME timer");
+}
+
+static void
+status_check(void)
+{
+	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
+		dfail("failed to snap aggregate");
+
+	if (dtrace_status(g_dtp) == -1)
+		dfail("dtrace_status()");
+}
+
+static void
+lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
+{
+	bzero(lsrec, g_recsize);
+	lsrec->ls_count = 1;
+
+	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
+		fail(0, "truncated DTrace record");
+
+	if (rec->dtrd_size != sizeof (uint64_t))
+		fail(0, "bad event size in first record");
+
+	/* LINTED - alignment */
+	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
+	rec++;
+
+	if (rec->dtrd_size != sizeof (uintptr_t))
+		fail(0, "bad lock address size in second record");
+
+	/* LINTED - alignment */
+	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
+	rec++;
+
+	if (rec->dtrd_size != sizeof (uintptr_t))
+		fail(0, "bad caller size in third record");
+
+	/* LINTED - alignment */
+	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
+	rec++;
+
+	if (g_recsize > LS_HIST) {
+		int frames, i;
+		pc_t *stack;
+
+		frames = rec->dtrd_size / sizeof (pc_t);
+		/* LINTED - alignment */
+		stack = (pc_t *)(data + rec->dtrd_offset);
+
+		for (i = 1; i < frames; i++)
+			lsrec->ls_stack[i - 1] = stack[i];
+	}
+}
+
+/*ARGSUSED*/
+static int
+count_aggregate(const dtrace_aggdata_t *agg, void *arg)
+{
+	*((size_t *)arg) += 1;
+
+	return (DTRACE_AGGWALK_NEXT);
+}
+
+static int
+process_aggregate(const dtrace_aggdata_t *agg, void *arg)
+{
+	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
+	caddr_t data = agg->dtada_data;
+	lsdata_t *lsdata = arg;
+	lsrec_t *lsrec = lsdata->lsd_next;
+	const dtrace_recdesc_t *rec;
+	uint64_t *avg, *quantized;
+	int i, j;
+
+	assert(lsdata->lsd_count < g_nrecs);
+
+	/*
+	 * Aggregation variable IDs are guaranteed to be generated in program
+	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
+	 * plus one.  As "avg" appears before "hist" in program order, we know
+	 * that "avg" will be allocated the first aggregation variable ID, and
+	 * "hist" will be allocated the second aggregation variable ID -- and
+	 * we therefore use the aggregation variable ID to differentiate the
+	 * cases.
+	 */
+	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
+		/*
+		 * If this is the histogram entry.  We'll copy the quantized
+		 * data into lc_hist, and jump over the rest.
+		 */
+		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
+
+		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
+			fail(0, "bad variable ID in aggregation record");
+
+		if (rec->dtrd_size !=
+		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
+			fail(0, "bad quantize size in aggregation record");
+
+		/* LINTED - alignment */
+		quantized = (uint64_t *)(data + rec->dtrd_offset);
+
+		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
+		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
+			lsrec->ls_hist[j] = quantized[i];
+
+		goto out;
+	}
+
+	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
+	    aggdesc->dtagd_nrecs - 1, data);
+
+	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
+
+	if (rec->dtrd_size != 2 * sizeof (uint64_t))
+		fail(0, "bad avg size in aggregation record");
+
+	/* LINTED - alignment */
+	avg = (uint64_t *)(data + rec->dtrd_offset);
+	lsrec->ls_count = (uint32_t)avg[0];
+	lsrec->ls_time = (uintptr_t)avg[1];
+
+	if (g_recsize >= LS_HIST)
+		return (DTRACE_AGGWALK_NEXT);
+
+out:
+	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
+	lsdata->lsd_count++;
+
+	return (DTRACE_AGGWALK_NEXT);
+}
+
+static int
+process_trace(const dtrace_probedata_t *pdata, void *arg)
+{
+	lsdata_t *lsdata = arg;
+	lsrec_t *lsrec = lsdata->lsd_next;
+	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
+	caddr_t data = pdata->dtpda_data;
+
+	if (lsdata->lsd_count >= g_nrecs)
+		return (DTRACE_CONSUME_NEXT);
+
+	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
+
+	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
+	lsdata->lsd_count++;
+
+	return (DTRACE_CONSUME_NEXT);
+}
+
+static int
+process_data(FILE *out, char *data)
+{
+	lsdata_t lsdata;
+
+	/* LINTED - alignment */
+	lsdata.lsd_next = (lsrec_t *)data;
+	lsdata.lsd_count = 0;
+
+	if (g_tracing) {
+		if (dtrace_consume(g_dtp, out,
+		    process_trace, NULL, &lsdata) != 0)
+			dfail("failed to consume buffer");
+
+		return (lsdata.lsd_count);
+	}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200906181725.n5IHPc0o017945>