Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 15 Jul 2001 23:19:42 +0100
From:      Ian Dowse <iedowse@maths.tcd.ie>
To:        freebsd-current@freebsd.org
Subject:   Load average synchronisation and phantom loads
Message-ID:   <200107152319.aa46183@salmon.maths.tcd.ie>

next in thread | raw e-mail | index | archive | help

There are a few PRs and a number of messages in the mailing list
archives that describe a problem where the load average occasionally
remains at 1.0 or greater even though top(1) reports that the CPU
is nearly 100% idle. The PRs I could find in a quick search are
kern/21155, kern/23448 and kern/27334.

The most probable cause for this effect is a synchonisation between
the load measurement and processes that periodically run for short
amounts of time. The load average is based on samples of the number
of running processes taken at exact 5-second intervals. If some
other process regularly runs with a period that divides into 5
seconds, that process may always be seen as running even though it
may only run for a tiny fraction of the available CPU time.

A very likely candidate process is bufdaemon; it sleeps for 1 second
at a time, so if it happens to get scheduled in the same tick as
the load measurement and before the load measurement, it will always
be seen as running.

The patch below causes the samples of running processes to be
somewhat randomised; instead of being taken every 5 seconds, the
gap now varies in the range 4 to 6 seconds, so that synchronisation
should no longer occur. Would there be any objections to my committing
this?

Two comments on the patch:
- This patch removes the SSLEEP case in loadav(), because in the
  existing code, p->p_slptime has always just been incremented in
  schedcpu() so this case never made a difference. To keep the same
  load average behaviour when loadav() is called at different times,
  this case needs to be removed.

- The load average calculation now has really nothing to do with
  the VM system, so it could be moved elsewhere. I've just left
  it in vm_meter.c because that's where it's always been.

Ian

Index: vm/vm_meter.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/vm/vm_meter.c,v
retrieving revision 1.57
diff -u -r1.57 vm_meter.c
--- vm/vm_meter.c	2001/07/04 19:00:12	1.57
+++ vm/vm_meter.c	2001/07/15 20:54:38
@@ -53,8 +53,11 @@
 #include <vm/vm_object.h>
 #include <sys/sysctl.h>
 
+static void loadav_init(void);
+
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
+static struct callout loadav_callout;
 
 struct vmmeter cnt;
 
@@ -75,19 +78,17 @@
  * 1, 5 and 15 minute intervals.
  */
 static void
-loadav(struct loadavg *avg)
+loadav(void *arg)
 {
 	int i, nrun;
+	struct loadavg *avg;
 	struct proc *p;
 
+	avg = (struct loadavg *)arg;
 	sx_slock(&allproc_lock);
-	for (nrun = 0, p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) {
+	for (nrun = 0, p = LIST_FIRST(&allproc); p != 0;
+	     p = LIST_NEXT(p, p_list)) {
 		switch (p->p_stat) {
-		case SSLEEP:
-			if (p->p_pri.pri_level > PZERO ||
-			    p->p_slptime != 0)
-				continue;
-			/* FALLTHROUGH */
 		case SRUN:
 			if ((p->p_flag & P_NOLOAD) != 0)
 				continue;
@@ -100,15 +101,24 @@
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+
+	/*
+	 * Schedule the next update to occur in 5 seconds, but add a
+	 * random variation to help avoid synchronisation with
+	 * processes that run at regular intervals.
+	 */
+	callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2)),
+	    loadav, arg);
 }
 
-void
-vmmeter()
+static void
+loadav_init()
 {
-
-	if (time_second % 5 == 0)
-		loadav(&averunnable);
+	callout_init(&loadav_callout, 0);
+	loadav(&averunnable);
 }
+SYSINIT(loadav, SI_SUB_PSEUDO, SI_ORDER_ANY, loadav_init, NULL)
+
 
 SYSCTL_UINT(_vm, VM_V_FREE_MIN, v_free_min,
 	CTLFLAG_RW, &cnt.v_free_min, 0, "");
Index: vm/vm_extern.h
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/vm/vm_extern.h,v
retrieving revision 1.47
diff -u -r1.47 vm_extern.h
--- vm/vm_extern.h	2000/03/13 10:47:24	1.47
+++ vm/vm_extern.h	2001/07/15 20:36:14
@@ -84,7 +84,6 @@
 int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, void *, vm_ooffset_t));
 vm_offset_t vm_page_alloc_contig __P((vm_offset_t, vm_offset_t, vm_offset_t, vm_offset_t));
 void vm_set_page_size __P((void));
-void vmmeter __P((void));
 struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t));
 struct vmspace *vmspace_fork __P((struct vmspace *));
 void vmspace_exec __P((struct proc *));
Index: kern/kern_synch.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/kern/kern_synch.c,v
retrieving revision 1.148
diff -u -r1.148 kern_synch.c
--- kern/kern_synch.c	2001/07/06 01:16:42	1.148
+++ kern/kern_synch.c	2001/07/15 20:38:13
@@ -56,8 +56,6 @@
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
@@ -295,7 +293,6 @@
 		mtx_unlock_spin(&sched_lock);
 	}
 	sx_sunlock(&allproc_lock);
-	vmmeter();
 	wakeup((caddr_t)&lbolt);
 	callout_reset(&schedcpu_callout, hz, schedcpu, NULL);
 }

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi? <200107152319.aa46183>