Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Sep 2007 14:15:36 -0700 (PDT)
From:      Jeff Roberson <jroberson@chesapeake.net>
To:        current@freebsd.org
Subject:   Fix ULE swapping.
Message-ID:  <20070917141407.B558@10.0.0.1>

next in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
The enclosed patch changes the way the kernel detects how long as thread 
has been swapped out or sleeping so that it is more compatible with ULE. 
For those of you who have run into swapping problems with ULE leading to 
poor interactivity, please test and report back your findings.

Thanks,
Jeff
[-- Attachment #2 --]
Index: lib/libkvm/kvm_proc.c
===================================================================
RCS file: /home/ncvs/src/lib/libkvm/kvm_proc.c,v
retrieving revision 1.93
diff -u -p -r1.93 kvm_proc.c
--- lib/libkvm/kvm_proc.c	17 Sep 2007 05:27:18 -0000	1.93
+++ lib/libkvm/kvm_proc.c	17 Sep 2007 05:58:09 -0000
@@ -85,6 +85,9 @@ __FBSDID("$FreeBSD: src/lib/libkvm/kvm_p
 #define KREAD(kd, addr, obj) \
 	(kvm_read(kd, addr, (char *)(obj), sizeof(*obj)) != sizeof(*obj))
 
+int ticks;
+int hz;
+
 /*
  * Read proc's from memory file into buffer bp, which has space to hold
  * at most maxcnt procs.
@@ -368,7 +371,7 @@ nopgrp:
 		kp->ki_acflag = proc.p_acflag;
 		kp->ki_lock = proc.p_lock;
 		if (proc.p_state != PRS_ZOMBIE) {
-			kp->ki_swtime = proc.p_swtime;
+			kp->ki_swtime = (ticks - proc.p_swtick) / hz;
 			kp->ki_flag = proc.p_flag;
 			kp->ki_sflag = 0;
 			kp->ki_nice = proc.p_nice;
@@ -535,12 +538,14 @@ kvm_getprocs(kd, op, arg, cnt)
 liveout:
 		nprocs = size == 0 ? 0 : size / kd->procbase->ki_structsize;
 	} else {
-		struct nlist nl[4], *p;
+		struct nlist nl[6], *p;
 
 		nl[0].n_name = "_nprocs";
 		nl[1].n_name = "_allproc";
 		nl[2].n_name = "_zombproc";
-		nl[3].n_name = 0;
+		nl[3].n_name = "_ticks";
+		nl[4].n_name = "_hz";
+		nl[5].n_name = 0;
 
 		if (kvm_nlist(kd, nl) != 0) {
 			for (p = nl; p->n_type != 0; ++p)
@@ -553,6 +558,14 @@ liveout:
 			_kvm_err(kd, kd->program, "can't read nprocs");
 			return (0);
 		}
+		if (KREAD(kd, nl[3].n_value, &ticks)) {
+			_kvm_err(kd, kd->program, "can't read ticks");
+			return (0);
+		}
+		if (KREAD(kd, nl[4].n_value, &hz)) {
+			_kvm_err(kd, kd->program, "can't read hz");
+			return (0);
+		}
 		size = nprocs * sizeof(struct kinfo_proc);
 		kd->procbase = (struct kinfo_proc *)_kvm_malloc(kd, size);
 		if (kd->procbase == 0)
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.281
diff -u -p -r1.281 kern_fork.c
--- sys/kern/kern_fork.c	17 Sep 2007 05:27:20 -0000	1.281
+++ sys/kern/kern_fork.c	17 Sep 2007 05:48:53 -0000
@@ -500,6 +500,7 @@ again:
 	 * Increase reference counts on shared objects.
 	 */
 	p2->p_flag = P_INMEM;
+	p2->p_swtick = ticks;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);
 	td2->td_ucred = crhold(p2->p_ucred);
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.251
diff -u -p -r1.251 kern_proc.c
--- sys/kern/kern_proc.c	17 Sep 2007 05:27:20 -0000	1.251
+++ sys/kern/kern_proc.c	17 Sep 2007 06:03:21 -0000
@@ -694,7 +694,8 @@ fill_kinfo_proc_only(struct proc *p, str
 		kp->ki_sflag = PS_INMEM;
 	else
 		kp->ki_sflag = 0;
-	kp->ki_swtime = p->p_swtime;
+	/* Calculate legacy swtime as seconds since 'swtick'. */
+	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
 	rufetch(p, &kp->ki_rusage);
@@ -812,7 +813,7 @@ fill_kinfo_thread(struct thread *td, str
 	kp->ki_kstack = (void *)td->td_kstack;
 	kp->ki_pctcpu = sched_pctcpu(td);
 	kp->ki_estcpu = td->td_estcpu;
-	kp->ki_slptime = td->td_slptime;
+	kp->ki_slptime = (ticks - td->td_slptick) / hz;
 	kp->ki_pri.pri_class = td->td_pri_class;
 	kp->ki_pri.pri_user = td->td_user_pri;
 
Index: sys/kern/sched_4bsd.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.104
diff -u -p -r1.104 sched_4bsd.c
--- sys/kern/sched_4bsd.c	17 Sep 2007 05:27:20 -0000	1.104
+++ sys/kern/sched_4bsd.c	17 Sep 2007 06:06:39 -0000
@@ -84,6 +84,7 @@ struct td_sched {
 	fixpt_t		ts_pctcpu;	/* (j) %cpu during p_swtime. */
 	u_char		ts_rqindex;	/* (j) Run queue index. */
 	int		ts_cpticks;	/* (j) Ticks of cpu time. */
+	int		ts_slptime;	/* (j) Seconds !RUNNING. */
 	struct runq	*ts_runq;	/* runq the thread is currently on */
 };
 
@@ -379,11 +380,6 @@ schedcpu(void)
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_SLOCK(p);
-		/*
-		 * Increment time in/out of memory.  We ignore overflow; with
-		 * 16-bit int's (remember them?) overflow takes 45 days.
-		 */
-		p->p_swtime++;
 		FOREACH_THREAD_IN_PROC(p, td) { 
 			awake = 0;
 			thread_lock(td);
@@ -440,7 +436,7 @@ XXX  this is broken
 
 			 */
 			if (awake) {
-				if (td->td_slptime > 1) {
+				if (ts->ts_slptime > 1) {
 					/*
 					 * In an ideal world, this should not
 					 * happen, because whoever woke us
@@ -452,10 +448,10 @@ XXX  this is broken
 					 */
 					updatepri(td);
 				}
-				td->td_slptime = 0;
+				ts->ts_slptime = 0;
 			} else
-				td->td_slptime++;
-			if (td->td_slptime > 1) {
+				ts->ts_slptime++;
+			if (ts->ts_slptime > 1) {
 				thread_unlock(td);
 				continue;
 			}
@@ -490,16 +486,18 @@ schedcpu_thread(void)
 static void
 updatepri(struct thread *td)
 {
-	register fixpt_t loadfac;
-	register unsigned int newcpu;
+	struct td_sched *ts;
+	fixpt_t loadfac;
+	unsigned int newcpu;
 
+	ts = td->td_sched;
 	loadfac = loadfactor(averunnable.ldavg[0]);
-	if (td->td_slptime > 5 * loadfac)
+	if (ts->ts_slptime > 5 * loadfac)
 		td->td_estcpu = 0;
 	else {
 		newcpu = td->td_estcpu;
-		td->td_slptime--;	/* was incremented in schedcpu() */
-		while (newcpu && --td->td_slptime)
+		ts->ts_slptime--;	/* was incremented in schedcpu() */
+		while (newcpu && --ts->ts_slptime)
 			newcpu = decay_cpu(loadfac, newcpu);
 		td->td_estcpu = newcpu;
 	}
@@ -827,7 +825,8 @@ sched_sleep(struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	td->td_slptime = 0;
+	td->td_slptick = ticks;
+	td->td_sched->ts_slptime = 0;
 }
 
 void
@@ -939,12 +938,16 @@ sched_switch(struct thread *td, struct t
 void
 sched_wakeup(struct thread *td)
 {
+	struct td_sched *ts;
+
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	if (td->td_slptime > 1) {
+	ts = td->td_sched;
+	if (ts->ts_slptime > 1) {
 		updatepri(td);
 		resetpriority(td);
 	}
-	td->td_slptime = 0;
+	td->td_slptick = ticks;
+	ts->ts_slptime = 0;
 	sched_add(td, SRQ_BORING);
 }
 
Index: sys/kern/sched_ule.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v
retrieving revision 1.206
diff -u -p -r1.206 sched_ule.c
--- sys/kern/sched_ule.c	17 Sep 2007 05:27:20 -0000	1.206
+++ sys/kern/sched_ule.c	17 Sep 2007 06:07:30 -0000
@@ -88,7 +88,6 @@ struct td_sched {	
 	short		ts_flags;	/* TSF_* flags. */
 	u_char		ts_rqindex;	/* Run queue index. */
 	u_char		ts_cpu;		/* CPU that we have affinity for. */
-	int		ts_slptick;	/* Tick when we went to sleep. */
 	int		ts_slice;	/* Ticks of slice remaining. */
 	u_int		ts_slptime;	/* Number of ticks we vol. slept */
 	u_int		ts_runtime;	/* Number of ticks we were running */
@@ -1914,7 +1913,7 @@ sched_sleep(struct thread *td)
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
-	td->td_sched->ts_slptick = ticks;
+	td->td_slptick = ticks;
 }
 
 /*
@@ -1933,8 +1932,8 @@ sched_wakeup(struct thread *td)
 	 * If we slept for more than a tick update our interactivity and
 	 * priority.
 	 */
-	slptick = ts->ts_slptick;
-	ts->ts_slptick = 0;
+	slptick = td->td_slptick;
+	td->td_slptick = 0;
 	if (slptick && slptick != ticks) {
 		u_int hzticks;
 
@@ -2435,7 +2434,6 @@ sched_pctcpu(struct thread *td)
 		rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz);
 		pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT;
 	}
-	td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick;
 	thread_unlock(td);
 
 	return (pctcpu);
Index: sys/sys/proc.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.490
diff -u -p -r1.490 proc.h
--- sys/sys/proc.h	17 Sep 2007 05:27:21 -0000	1.490
+++ sys/sys/proc.h	17 Sep 2007 06:00:50 -0000
@@ -242,7 +242,7 @@ struct thread {
 	struct thread	*td_standin;	/* (k + a) Use this for an upcall. */
 	struct kse_upcall *td_upcall;	/* (k + t) Upcall structure. */
 	u_int		td_estcpu;	/* (t) estimated cpu utilization */
-	u_int		td_slptime;	/* (t) How long completely blocked. */
+	u_int		td_slptick;	/* (t) Time at sleep. */
 	struct rusage	td_ru;		/* (t) rusage information */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
@@ -520,7 +520,7 @@ struct proc {
 #define	p_startzero	p_oppid
 	pid_t		p_oppid;	/* (c + e) Save ppid in ptrace. XXX */
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
-	u_int		p_swtime;	/* (j) Time swapped in or out. */
+	u_int		p_swtick;	/* (j) Tick when swapped in or out. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cj) Internal resource usage. */
Index: sys/vm/vm_glue.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_glue.c,v
retrieving revision 1.224
diff -u -p -r1.224 vm_glue.c
--- sys/vm/vm_glue.c	17 Sep 2007 05:27:21 -0000	1.224
+++ sys/vm/vm_glue.c	17 Sep 2007 06:05:07 -0000
@@ -636,7 +636,7 @@ faultin(p)
 		PROC_LOCK(p);
 		PROC_SLOCK(p);
 		swapclear(p);
-		p->p_swtime = 0;
+		p->p_swtick = ticks;
 		PROC_SUNLOCK(p);
 
 		wakeup(&p->p_flag);
@@ -663,9 +663,11 @@ scheduler(dummy)
 {
 	struct proc *p;
 	struct thread *td;
-	int pri;
 	struct proc *pp;
+	int slptime;
+	int swtime;
 	int ppri;
+	int pri;
 
 	mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(&Giant);
@@ -688,6 +690,7 @@ loop:
 			PROC_UNLOCK(p);
 			continue;
 		}
+		swtime = (ticks - p->p_swtick) / hz;
 		PROC_SLOCK(p);
 		FOREACH_THREAD_IN_PROC(p, td) {
 			/*
@@ -697,7 +700,8 @@ loop:
 			 */
 			thread_lock(td);
 			if (td->td_inhibitors == TDI_SWAPPED) {
-				pri = p->p_swtime + td->td_slptime;
+				slptime = (ticks - td->td_slptick) / hz;
+				pri = swtime + slptime;
 				if ((td->td_flags & TDF_SWAPINREQ) == 0)
 					pri -= p->p_nice * 8;
 				/*
@@ -816,6 +820,7 @@ retry:
 	FOREACH_PROC_IN_SYSTEM(p) {
 		struct vmspace *vm;
 		int minslptime = 100000;
+		int slptime;
 		
 		/*
 		 * Watch out for a process in
@@ -882,12 +887,12 @@ retry:
 					thread_unlock(td);
 					goto nextproc;
 				}
-
+				slptime = (ticks - td->td_slptick) / hz;
 				/*
 				 * Guarantee swap_idle_threshold1
 				 * time in memory.
 				 */
-				if (td->td_slptime < swap_idle_threshold1) {
+				if (slptime < swap_idle_threshold1) {
 					thread_unlock(td);
 					goto nextproc;
 				}
@@ -914,13 +919,13 @@ retry:
 				 */
 				if (((action & VM_SWAP_NORMAL) == 0) &&
 				    (((action & VM_SWAP_IDLE) == 0) ||
-				    (td->td_slptime < swap_idle_threshold2))) {
+				    (slptime < swap_idle_threshold2))) {
 					thread_unlock(td);
 					goto nextproc;
 				}
 
-				if (minslptime > td->td_slptime)
-					minslptime = td->td_slptime;
+				if (minslptime > slptime)
+					minslptime = slptime;
 				thread_unlock(td);
 			}
 
@@ -1038,7 +1043,7 @@ swapout(p)
 	PROC_LOCK(p);
 	p->p_flag &= ~P_SWAPPINGOUT;
 	PROC_SLOCK(p);
-	p->p_swtime = 0;
+	p->p_swtick = ticks;
 	return (0);
 }
 #endif /* !NO_SWAPPING */

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070917141407.B558>