From owner-svn-src-all@FreeBSD.ORG Thu Aug 9 18:10:00 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 878CD106566C; Thu, 9 Aug 2012 18:10:00 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 675D58FC0A; Thu, 9 Aug 2012 18:10:00 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q79IA08R037354; Thu, 9 Aug 2012 18:10:00 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q79IA0eG037352; Thu, 9 Aug 2012 18:10:00 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201208091810.q79IA0eG037352@svn.freebsd.org> From: Alexander Motin Date: Thu, 9 Aug 2012 18:10:00 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r239153 - head/sys/kern X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 09 Aug 2012 18:10:00 -0000 Author: mav Date: Thu Aug 9 18:09:59 2012 New Revision: 239153 URL: http://svn.freebsd.org/changeset/base/239153 Log: SCHED_4BSD scheduling quantum mechanism appears to be broken for some time. With switchticks variable being reset each time thread preempted (that is done regularly by interrupt threads) scheduling quantum may never expire. It was not noticed in time because several other factors still regularly trigger context switches. Handle the problem by replacing that mechanism with its equivalent from SCHED_ULE called time slice. It is effectively the same, just measured in context of stathz instead of hz. Some unification is probably not bad. Modified: head/sys/kern/sched_4bsd.c Modified: head/sys/kern/sched_4bsd.c ============================================================================== --- head/sys/kern/sched_4bsd.c Thu Aug 9 16:38:17 2012 (r239152) +++ head/sys/kern/sched_4bsd.c Thu Aug 9 18:09:59 2012 (r239153) @@ -94,6 +94,7 @@ struct td_sched { fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ int ts_cpticks; /* (j) Ticks of cpu time. */ int ts_slptime; /* (j) Seconds !RUNNING. */ + int ts_slice; /* Remaining part of time slice. */ int ts_flags; struct runq *ts_runq; /* runq the thread is currently on */ #ifdef KTR @@ -117,9 +118,9 @@ struct td_sched { static struct td_sched td_sched0; struct mtx sched_lock; +static int realstathz; /* stathz is sometimes 0 and run off of hz. */ static int sched_tdcnt; /* Total runnable threads in the system. */ -static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ -#define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ +static int sched_slice = 1; /* Thread run time before rescheduling. */ static void setup_runqs(void); static void schedcpu(void); @@ -145,6 +146,10 @@ SYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, &sched_kp); SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); +static void sched_initticks(void *dummy); +SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, + NULL); + /* * Global run queue. */ @@ -179,31 +184,12 @@ setup_runqs(void) runq_init(&runq); } -static int -sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) -{ - int error, new_val; - - new_val = sched_quantum * tick; - error = sysctl_handle_int(oidp, &new_val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (new_val < tick) - return (EINVAL); - sched_quantum = new_val / tick; - hogticks = 2 * sched_quantum; - return (0); -} - SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler"); SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, "Scheduler name"); - -SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW, - 0, sizeof sched_quantum, sysctl_kern_quantum, "I", - "Roundrobin scheduling quantum in microseconds"); - +SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, + "Slice size for timeshare threads"); #ifdef SMP /* Enable forwarding of wakeups to all other cpus */ static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, @@ -471,9 +457,8 @@ schedcpu(void) struct thread *td; struct proc *p; struct td_sched *ts; - int awake, realstathz; + int awake; - realstathz = stathz ? stathz : hz; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); @@ -645,14 +630,28 @@ sched_setup(void *dummy) { setup_runqs(); - if (sched_quantum == 0) - sched_quantum = SCHED_QUANTUM; - hogticks = 2 * sched_quantum; + /* + * To avoid divide-by-zero, we set realstathz a dummy value + * in case which sched_clock() called before sched_initticks(). + */ + realstathz = hz; + sched_slice = realstathz / 10; /* ~100ms */ /* Account for thread0. */ sched_load_add(); } +/* + * This routine determines the sched_slice after stathz and hz are setup. + */ +static void +sched_initticks(void *dummy) +{ + + realstathz = stathz ? stathz : hz; + sched_slice = realstathz / 10; /* ~100ms */ +} + /* External interfaces start here */ /* @@ -670,6 +669,7 @@ schedinit(void) proc0.p_sched = NULL; /* XXX */ thread0.td_sched = &td_sched0; thread0.td_lock = &sched_lock; + td_sched0.ts_slice = sched_slice; mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); } @@ -686,9 +686,9 @@ sched_runnable(void) int sched_rr_interval(void) { - if (sched_quantum == 0) - sched_quantum = SCHED_QUANTUM; - return (sched_quantum); + + /* Convert sched_slice from stathz to hz. */ + return (hz / (realstathz / sched_slice)); } /* @@ -725,9 +725,10 @@ sched_clock(struct thread *td) * Force a context switch if the current thread has used up a full * quantum (default quantum is 100ms). */ - if (!TD_IS_IDLETHREAD(td) && - ticks - PCPU_GET(switchticks) >= sched_quantum) + if (!TD_IS_IDLETHREAD(td) && (--ts->ts_slice <= 0)) { + ts->ts_slice = sched_slice; td->td_flags |= TDF_NEEDRESCHED; + } stat = DPCPU_PTR(idlestat); stat->oldidlecalls = stat->idlecalls; @@ -781,6 +782,7 @@ sched_fork_thread(struct thread *td, str ts = childtd->td_sched; bzero(ts, sizeof(*ts)); ts->ts_flags |= (td->td_sched->ts_flags & TSF_AFFINITY); + ts->ts_slice = 1; } void @@ -1077,6 +1079,7 @@ sched_wakeup(struct thread *td) } td->td_slptick = 0; ts->ts_slptime = 0; + ts->ts_slice = sched_slice; sched_add(td, SRQ_BORING); }