From owner-svn-src-stable-8@FreeBSD.ORG Sun Jan 15 22:53:40 2012 Return-Path: Delivered-To: svn-src-stable-8@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D411B106564A; Sun, 15 Jan 2012 22:53:40 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from citadel.icyb.net.ua (citadel.icyb.net.ua [212.40.38.140]) by mx1.freebsd.org (Postfix) with ESMTP id 036DA8FC0A; Sun, 15 Jan 2012 22:53:39 +0000 (UTC) Received: from porto.starpoint.kiev.ua (porto-e.starpoint.kiev.ua [212.40.38.100]) by citadel.icyb.net.ua (8.8.8p3/ICyb-2.3exp) with ESMTP id AAA11389; Mon, 16 Jan 2012 00:36:44 +0200 (EET) (envelope-from avg@FreeBSD.org) Received: from localhost ([127.0.0.1]) by porto.starpoint.kiev.ua with esmtp (Exim 4.34 (FreeBSD)) id 1RmYgu-000P8B-2n; Mon, 16 Jan 2012 00:36:44 +0200 Message-ID: <4F1354FA.1070300@FreeBSD.org> Date: Mon, 16 Jan 2012 00:36:42 +0200 From: Andriy Gapon User-Agent: Mozilla/5.0 (X11; FreeBSD amd64; rv:9.0) Gecko/20111222 Thunderbird/9.0 MIME-Version: 1.0 To: src-committers@FreeBSD.org, svn-src-all@FreeBSD.org, svn-src-stable@FreeBSD.org, svn-src-stable-8@FreeBSD.org References: <201201152218.q0FMIsMS003166@svn.freebsd.org> In-Reply-To: <201201152218.q0FMIsMS003166@svn.freebsd.org> X-Enigmail-Version: undefined Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Cc: Subject: Re: svn commit: r230174 - stable/8/sys/kern X-BeenThere: svn-src-stable-8@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 8-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 15 Jan 2012 22:53:41 -0000 on 16/01/2012 00:18 Andriy Gapon said the following: > Author: avg > Date: Sun Jan 15 22:18:54 2012 > New Revision: 230174 > URL: http://svn.freebsd.org/changeset/base/230174 > > Log: > MFC r228718: ule: ensure that batch timeshare threads are scheduled > fairly Apologies, this commit is screwed up. What happened. There was a merge conflict. I initially chose the postpone option. Then edited the file to get a correct merge. Then ran svn resolve. It complained that it needed --accept option, so I used --accept=base. I didn't do another svn diff after that before committing the change and that was my mistake. Apparently svn resolve --accept=base changed the file. > Modified: > stable/8/sys/kern/sched_ule.c > Directory Properties: > stable/8/sys/ (props changed) > stable/8/sys/amd64/include/xen/ (props changed) > stable/8/sys/cddl/contrib/opensolaris/ (props changed) > stable/8/sys/contrib/dev/acpica/ (props changed) > stable/8/sys/contrib/pf/ (props changed) > > Modified: stable/8/sys/kern/sched_ule.c > ============================================================================== > --- stable/8/sys/kern/sched_ule.c Sun Jan 15 22:10:35 2012 (r230173) > +++ stable/8/sys/kern/sched_ule.c Sun Jan 15 22:18:54 2012 (r230174) > @@ -62,10 +62,6 @@ __FBSDID("$FreeBSD$"); > #include > #include > #include > -#ifdef KTRACE > -#include > -#include > -#endif > > #ifdef HWPMC_HOOKS > #include > @@ -80,7 +76,7 @@ dtrace_vtime_switch_func_t dtrace_vtime_ > #include > #include > > -#if defined(__sparc64__) > +#if defined(__powerpc__) && defined(E500) > #error "This architecture is not currently compatible with ULE" > #endif > > @@ -88,7 +84,7 @@ dtrace_vtime_switch_func_t dtrace_vtime_ > > #define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX))) > #define TDQ_NAME_LEN (sizeof("sched lock ") + sizeof(__XSTRING(MAXCPU))) > -#define TDQ_LOADNAME_LEN (PCPU_NAME_LEN + sizeof(" load")) > +#define TDQ_LOADNAME_LEN (sizeof("CPU ") + sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load")) > > /* > * Thread scheduler specific section. All fields are protected > @@ -122,11 +118,17 @@ static struct td_sched td_sched0; > > /* > * Priority ranges used for interactive and non-interactive timeshare > - * threads. Interactive threads use realtime priorities. > - */ > -#define PRI_MIN_INTERACT PRI_MIN_REALTIME > -#define PRI_MAX_INTERACT PRI_MAX_REALTIME > -#define PRI_MIN_BATCH PRI_MIN_TIMESHARE > + * threads. The timeshare priorities are split up into four ranges. > + * The first range handles interactive threads. The last three ranges > + * (NHALF, x, and NHALF) handle non-interactive threads with the outer > + * ranges supporting nice values. > + */ > +#define PRI_TIMESHARE_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) > +#define PRI_INTERACT_RANGE ((PRI_TIMESHARE_RANGE - SCHED_PRI_NRESV) / 2) > + > +#define PRI_MIN_INTERACT PRI_MIN_TIMESHARE > +#define PRI_MAX_INTERACT (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE - 1) > +#define PRI_MIN_BATCH (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE) > #define PRI_MAX_BATCH PRI_MAX_TIMESHARE > > /* > @@ -209,7 +211,7 @@ static int preempt_thresh = 0; > #endif > static int static_boost = PRI_MIN_BATCH; > static int sched_idlespins = 10000; > -static int sched_idlespinthresh = 4; > +static int sched_idlespinthresh = 16; > > /* > * tdq - per processor runqs and statistics. All fields are protected by the > @@ -221,6 +223,7 @@ struct tdq { > struct mtx tdq_lock; /* run queue lock. */ > struct cpu_group *tdq_cg; /* Pointer to cpu topology. */ > volatile int tdq_load; /* Aggregate load. */ > + volatile int tdq_cpu_idle; /* cpu_idle() is active. */ > int tdq_sysload; /* For loadavg, !ITHD load. */ > int tdq_transferable; /* Transferable thread count. */ > short tdq_switchcnt; /* Switches this tick. */ > @@ -561,7 +564,7 @@ struct cpu_search { > > #define CPUSET_FOREACH(cpu, mask) \ > for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \ > - if ((mask) & 1 << (cpu)) > + if (CPU_ISSET(cpu, &mask)) > > static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low, > struct cpu_search *high, const int match); > @@ -836,6 +839,7 @@ sched_balance_pair(struct tdq *high, str > int low_load; > int moved; > int move; > + int cpu; > int diff; > int i; > > @@ -857,10 +861,14 @@ sched_balance_pair(struct tdq *high, str > for (i = 0; i < move; i++) > moved += tdq_move(high, low); > /* > - * IPI the target cpu to force it to reschedule with the new > - * workload. > + * In case the target isn't the current cpu IPI it to force a > + * reschedule with the new workload. > */ > - ipi_cpu(TDQ_ID(low), IPI_PREEMPT); > + cpu = TDQ_ID(low); > + sched_pin(); > + if (cpu != PCPU_GET(cpuid)) > + ipi_cpu(cpu, IPI_PREEMPT); > + sched_unpin(); > } > tdq_unlock_pair(high, low); > return (moved); > @@ -979,7 +987,7 @@ tdq_notify(struct tdq *tdq, struct threa > * If the MD code has an idle wakeup routine try that before > * falling back to IPI. > */ > - if (cpu_idle_wakeup(cpu)) > + if (!tdq->tdq_cpu_idle || cpu_idle_wakeup(cpu)) > return; > } > tdq->tdq_ipipending = 1; > @@ -1426,8 +1434,7 @@ sched_priority(struct thread *td) > } else { > pri = SCHED_PRI_MIN; > if (td->td_sched->ts_ticks) > - pri += min(SCHED_PRI_TICKS(td->td_sched), > - SCHED_PRI_RANGE); > + pri += SCHED_PRI_TICKS(td->td_sched); > pri += SCHED_PRI_NICE(td->td_proc->p_nice); > KASSERT(pri >= PRI_MIN_BATCH && pri <= PRI_MAX_BATCH, > ("sched_priority: invalid priority %d: nice %d, " > @@ -1688,39 +1695,24 @@ sched_prio(struct thread *td, u_char pri > void > sched_user_prio(struct thread *td, u_char prio) > { > - u_char oldprio; > > td->td_base_user_pri = prio; > - if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) > - return; > - oldprio = td->td_user_pri; > + if (td->td_lend_user_pri <= prio) > + return; > td->td_user_pri = prio; > } > > void > sched_lend_user_prio(struct thread *td, u_char prio) > { > - u_char oldprio; > > THREAD_LOCK_ASSERT(td, MA_OWNED); > - td->td_flags |= TDF_UBORROWING; > - oldprio = td->td_user_pri; > - td->td_user_pri = prio; > -} > - > -void > -sched_unlend_user_prio(struct thread *td, u_char prio) > -{ > - u_char base_pri; > - > - THREAD_LOCK_ASSERT(td, MA_OWNED); > - base_pri = td->td_base_user_pri; > - if (prio >= base_pri) { > - td->td_flags &= ~TDF_UBORROWING; > - sched_user_prio(td, base_pri); > - } else { > - sched_lend_user_prio(td, prio); > - } > + td->td_lend_user_pri = prio; > + td->td_user_pri = min(prio, td->td_base_user_pri); > + if (td->td_priority > td->td_user_pri) > + sched_prio(td, td->td_user_pri); > + else if (td->td_priority != td->td_user_pri) > + td->td_flags |= TDF_NEEDRESCHED; > } > > /* > @@ -1913,6 +1905,8 @@ sched_sleep(struct thread *td, int prio) > td->td_slptick = ticks; > if (TD_IS_SUSPENDED(td) || prio >= PSOCK) > td->td_flags |= TDF_CANSWAP; > + if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE) > + return; > if (static_boost == 1 && prio) > sched_prio(td, prio); > else if (static_boost && td->td_priority > static_boost) > @@ -2179,7 +2173,7 @@ sched_clock(struct thread *td) > * is easier than trying to scale based on stathz. > */ > void > -sched_tick(void) > +sched_tick(int cnt) > { > struct td_sched *ts; > > @@ -2191,7 +2185,7 @@ sched_tick(void) > if (ts->ts_incrtick == ticks) > return; > /* Adjust ticks for pctcpu */ > - ts->ts_ticks += 1 << SCHED_TICK_SHIFT; > + ts->ts_ticks += cnt << SCHED_TICK_SHIFT; > ts->ts_ltick = ticks; > ts->ts_incrtick = ticks; > /* > @@ -2562,8 +2556,14 @@ sched_idletd(void *dummy) > } > } > switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; > - if (tdq->tdq_load == 0) > - cpu_idle(switchcnt > 1); > + if (tdq->tdq_load == 0) { > + tdq->tdq_cpu_idle = 1; > + if (tdq->tdq_load == 0) { > + cpu_idle(switchcnt > sched_idlespinthresh * 4); > + tdq->tdq_switchcnt++; > + } > + tdq->tdq_cpu_idle = 0; > + } > if (tdq->tdq_load) { > thread_lock(td); > mi_switch(SW_VOL | SWT_IDLE, NULL); > @@ -2586,8 +2586,6 @@ sched_throw(struct thread *td) > /* Correct spinlock nesting and acquire the correct lock. */ > TDQ_LOCK(tdq); > spinlock_exit(); > - PCPU_SET(switchtime, cpu_ticks()); > - PCPU_SET(switchticks, ticks); > } else { > MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); > tdq_load_rem(tdq, td); > @@ -2596,6 +2594,8 @@ sched_throw(struct thread *td) > KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); > newtd = choosethread(); > TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd; > + PCPU_SET(switchtime, cpu_ticks()); > + PCPU_SET(switchticks, ticks); > cpu_throw(td, newtd); /* doesn't return */ > } > > @@ -2655,15 +2655,16 @@ static int > sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, > int indent) > { > + char cpusetbuf[CPUSETBUFSIZ]; > int i, first; > > sbuf_printf(sb, "%*s\n", indent, > "", 1 + indent / 2, cg->cg_level); > - sbuf_printf(sb, "%*s ", indent, "", > - cg->cg_count, cg->cg_mask); > + sbuf_printf(sb, "%*s ", indent, "", > + cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); > first = TRUE; > for (i = 0; i < MAXCPU; i++) { > - if ((cg->cg_mask & (1 << i)) != 0) { > + if (CPU_ISSET(i, &cg->cg_mask)) { > if (!first) > sbuf_printf(sb, ", "); > else > @@ -2722,6 +2723,7 @@ sysctl_kern_sched_topology_spec(SYSCTL_H > sbuf_delete(topo); > return (err); > } > + > #endif > > SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); > @@ -2758,6 +2760,7 @@ SYSCTL_INT(_kern_sched, OID_AUTO, steal_ > SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING | > CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A", > "XML dump of detected CPU topology"); > + > #endif > > /* ps compat. All cpu percentages from ULE are weighted. */ -- Andriy Gapon