Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Jun 2010 12:48:57 +0200
From:      Ulrich =?utf-8?B?U3DDtnJsZWlu?= <uqs@spoerlein.net>
To:        freebsd-current@freebsd.org
Subject:   Patch for really fixing bsnmpd hrProcessorLoad values
Message-ID:  <20100609104857.GM39829@acme.spoerlein.net>

next in thread | raw e-mail | index | archive | help

--Izn7cH1Com+I3R9J
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi guys,

I finally got fed up with bsnmpd no longer returning the right CPU load
values when running under ULE. After taking a look at how top(1) does
it, I came up with the following initial patch to bsnmpd, that seems to
DTRT.

A few technical questions remain:

- Is the idle state guaranteed to always be in the last cp_time column?
- Is handling overflow of kern.cp_times worth it? There's only a 60s
  window of reporting wrong stats, better than what we have now anyway.
- Why is kern.cp_times often times *way* longer than hw.ncpu *
  CPUSTATES? Is it guaranteed that the first n entries correspond to the
  first n*CPUSTATES values in kern.cp_times? Can there be holes?

Patch has been tested on 8-STABLE, 4xCPUs and SCHED_ULE. Other reports
would be very much appreciated.

Regards,
Uli


--Izn7cH1Com+I3R9J
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="bsnmpd.diff"

Index: usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c	(revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c	(working copy)
@@ -63,6 +63,7 @@
 
 	/* the samples from the last minute, as required by MIB */
 	double		samples[MAX_CPU_SAMPLES];
+	long		states[MAX_CPU_SAMPLES][CPUSTATES];
 
 	/* current sample to fill in next time, must be < MAX_CPU_SAMPLES */
 	uint32_t	cur_sample_idx;
@@ -112,6 +113,43 @@
 	return ((int)floor((double)sum/(double)e->sample_cnt));
 }
 
+static int
+get_avg_usage(struct processor_entry *e)
+{
+	u_int i, oldest;
+	long delta = 0;
+	double load = 0.0;
+
+	assert(e != NULL);
+
+	/* Need two samples to perform delta calculation */
+	if (e->sample_cnt <= 1)
+		return (0);
+
+	/* oldest usable index */
+	if (e->sample_cnt == MAX_CPU_SAMPLES)
+		oldest = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
+	else
+		oldest = 0;
+
+	/* FIXME handle wrap around */
+	for (i = 0; i < CPUSTATES; i++) {
+		delta += e->states[e->cur_sample_idx][i];
+		delta -= e->states[oldest][i];
+	}
+	if (delta == 0)
+		return 0;
+
+	/* XXX idle time is in the last index always?!? */
+	load = (double)(e->states[e->cur_sample_idx][CPUSTATES-1] -
+	    e->states[oldest][CPUSTATES-1]) / delta;
+	load = 100 - (load*100);
+	HRDBG("CPU no. %d delta ticks %ld pct usage %.2f", e->cpu_no,
+	    delta, load);
+
+	return (floor(load));
+}
+
 /*
  * Stolen from /usr/src/bin/ps/print.c. The idle process should never
  * be swapped out :-)
@@ -132,11 +170,15 @@
  * Save a new sample
  */
 static void
-save_sample(struct processor_entry *e, struct kinfo_proc *kp)
+save_sample(struct processor_entry *e, struct kinfo_proc *kp, long *cp_times)
 {
+	int i;
 
+	for (i = 0; cp_times != NULL && i < CPUSTATES; i++)
+		e->states[e->cur_sample_idx][i] = cp_times[i];
+
 	e->samples[e->cur_sample_idx] = 100.0 - processor_getpcpu(kp);
-	e->load = get_avg_load(e);
+	e->load = get_avg_usage(e);
 	e->cur_sample_idx = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
 
 	if (++e->sample_cnt > MAX_CPU_SAMPLES)
@@ -241,8 +283,6 @@
 		entry->idle_pid = kp->ki_pid;
 		HRDBG("CPU no. %d with SNMP index=%d has idle PID %d",
 		    entry->cpu_no, entry->index, entry->idle_pid);
-
-		save_sample(entry, kp);
 	}
 }
 
@@ -386,12 +426,22 @@
 refresh_processor_tbl(void)
 {
 	struct processor_entry *entry;
-	int need_pids;
+	int need_pids, nproc;
 	struct kinfo_proc *plist;
-	int nproc;
+	size_t size;
 
 	processor_refill_tbl();
 
+	long pcpu_cp_times[hw_ncpu * CPUSTATES];
+	memset(pcpu_cp_times, 0, sizeof(pcpu_cp_times));
+
+	size = hw_ncpu * CPUSTATES * sizeof(long);
+	/* FIXME: assert entry->ncpu <= hw_ncpu <= length of cp_times */
+	if (sysctlbyname("kern.cp_times", pcpu_cp_times, &size, NULL, 0) == -1) {
+		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.cp_times) failed");
+		return;
+	}
+
 	need_pids = 0;
 	TAILQ_FOREACH(entry, &processor_tbl, link) {
 		if (entry->idle_pid <= 0) {
@@ -410,7 +460,7 @@
 			need_pids = 1;
 			continue;
 		}
-		save_sample(entry, plist);
+		save_sample(entry, plist, &pcpu_cp_times[entry->cpu_no * CPUSTATES]);
 	}
 
 	if (need_pids == 1)
Index: usr.sbin/bsnmpd/modules/snmp_hostres/Makefile
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/Makefile	(revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/Makefile	(working copy)
@@ -48,7 +48,8 @@
 	printcap.c
 
 #Not having NDEBUG defined will enable assertions and a lot of output on stderr
-CFLAGS+= -DNDEBUG -I${LPRSRC}
+WARNS?=	1
+CFLAGS+= -I${LPRSRC}
 XSYM=	host hrStorageOther hrStorageRam hrStorageVirtualMemory \
 	hrStorageFixedDisk hrStorageRemovableDisk hrStorageFloppyDisk \
 	hrStorageCompactDisc hrStorageRamDisk hrStorageFlashMemory \

--Izn7cH1Com+I3R9J--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100609104857.GM39829>