Date: Wed, 9 Jun 2010 12:48:57 +0200 From: Ulrich =?utf-8?B?U3DDtnJsZWlu?= <uqs@spoerlein.net> To: freebsd-current@freebsd.org Subject: Patch for really fixing bsnmpd hrProcessorLoad values Message-ID: <20100609104857.GM39829@acme.spoerlein.net>
next in thread | raw e-mail | index | archive | help
--Izn7cH1Com+I3R9J
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi guys,
I finally got fed up with bsnmpd no longer returning the right CPU load
values when running under ULE. After taking a look at how top(1) does
it, I came up with the following initial patch to bsnmpd, that seems to
DTRT.
A few technical questions remain:
- Is the idle state guaranteed to always be in the last cp_time column?
- Is handling overflow of kern.cp_times worth it? There's only a 60s
window of reporting wrong stats, better than what we have now anyway.
- Why is kern.cp_times often times *way* longer than hw.ncpu *
CPUSTATES? Is it guaranteed that the first n entries correspond to the
first n*CPUSTATES values in kern.cp_times? Can there be holes?
Patch has been tested on 8-STABLE, 4xCPUs and SCHED_ULE. Other reports
would be very much appreciated.
Regards,
Uli
--Izn7cH1Com+I3R9J
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="bsnmpd.diff"
Index: usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c (revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c (working copy)
@@ -63,6 +63,7 @@
/* the samples from the last minute, as required by MIB */
double samples[MAX_CPU_SAMPLES];
+ long states[MAX_CPU_SAMPLES][CPUSTATES];
/* current sample to fill in next time, must be < MAX_CPU_SAMPLES */
uint32_t cur_sample_idx;
@@ -112,6 +113,43 @@
return ((int)floor((double)sum/(double)e->sample_cnt));
}
+static int
+get_avg_usage(struct processor_entry *e)
+{
+ u_int i, oldest;
+ long delta = 0;
+ double load = 0.0;
+
+ assert(e != NULL);
+
+ /* Need two samples to perform delta calculation */
+ if (e->sample_cnt <= 1)
+ return (0);
+
+ /* oldest usable index */
+ if (e->sample_cnt == MAX_CPU_SAMPLES)
+ oldest = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
+ else
+ oldest = 0;
+
+ /* FIXME handle wrap around */
+ for (i = 0; i < CPUSTATES; i++) {
+ delta += e->states[e->cur_sample_idx][i];
+ delta -= e->states[oldest][i];
+ }
+ if (delta == 0)
+ return 0;
+
+ /* XXX idle time is in the last index always?!? */
+ load = (double)(e->states[e->cur_sample_idx][CPUSTATES-1] -
+ e->states[oldest][CPUSTATES-1]) / delta;
+ load = 100 - (load*100);
+ HRDBG("CPU no. %d delta ticks %ld pct usage %.2f", e->cpu_no,
+ delta, load);
+
+ return (floor(load));
+}
+
/*
* Stolen from /usr/src/bin/ps/print.c. The idle process should never
* be swapped out :-)
@@ -132,11 +170,15 @@
* Save a new sample
*/
static void
-save_sample(struct processor_entry *e, struct kinfo_proc *kp)
+save_sample(struct processor_entry *e, struct kinfo_proc *kp, long *cp_times)
{
+ int i;
+ for (i = 0; cp_times != NULL && i < CPUSTATES; i++)
+ e->states[e->cur_sample_idx][i] = cp_times[i];
+
e->samples[e->cur_sample_idx] = 100.0 - processor_getpcpu(kp);
- e->load = get_avg_load(e);
+ e->load = get_avg_usage(e);
e->cur_sample_idx = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
if (++e->sample_cnt > MAX_CPU_SAMPLES)
@@ -241,8 +283,6 @@
entry->idle_pid = kp->ki_pid;
HRDBG("CPU no. %d with SNMP index=%d has idle PID %d",
entry->cpu_no, entry->index, entry->idle_pid);
-
- save_sample(entry, kp);
}
}
@@ -386,12 +426,22 @@
refresh_processor_tbl(void)
{
struct processor_entry *entry;
- int need_pids;
+ int need_pids, nproc;
struct kinfo_proc *plist;
- int nproc;
+ size_t size;
processor_refill_tbl();
+ long pcpu_cp_times[hw_ncpu * CPUSTATES];
+ memset(pcpu_cp_times, 0, sizeof(pcpu_cp_times));
+
+ size = hw_ncpu * CPUSTATES * sizeof(long);
+ /* FIXME: assert entry->ncpu <= hw_ncpu <= length of cp_times */
+ if (sysctlbyname("kern.cp_times", pcpu_cp_times, &size, NULL, 0) == -1) {
+ syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.cp_times) failed");
+ return;
+ }
+
need_pids = 0;
TAILQ_FOREACH(entry, &processor_tbl, link) {
if (entry->idle_pid <= 0) {
@@ -410,7 +460,7 @@
need_pids = 1;
continue;
}
- save_sample(entry, plist);
+ save_sample(entry, plist, &pcpu_cp_times[entry->cpu_no * CPUSTATES]);
}
if (need_pids == 1)
Index: usr.sbin/bsnmpd/modules/snmp_hostres/Makefile
===================================================================
--- usr.sbin/bsnmpd/modules/snmp_hostres/Makefile (revision 208628)
+++ usr.sbin/bsnmpd/modules/snmp_hostres/Makefile (working copy)
@@ -48,7 +48,8 @@
printcap.c
#Not having NDEBUG defined will enable assertions and a lot of output on stderr
-CFLAGS+= -DNDEBUG -I${LPRSRC}
+WARNS?= 1
+CFLAGS+= -I${LPRSRC}
XSYM= host hrStorageOther hrStorageRam hrStorageVirtualMemory \
hrStorageFixedDisk hrStorageRemovableDisk hrStorageFloppyDisk \
hrStorageCompactDisc hrStorageRamDisk hrStorageFlashMemory \
--Izn7cH1Com+I3R9J--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100609104857.GM39829>
