From owner-svn-src-stable@FreeBSD.ORG  Sun Jan 15 22:43:34 2012
Return-Path: <owner-svn-src-stable@FreeBSD.ORG>
Delivered-To: svn-src-stable@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 819BD1065675;
	Sun, 15 Jan 2012 22:43:34 +0000 (UTC) (envelope-from avg@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 6B94C8FC0A;
	Sun, 15 Jan 2012 22:43:34 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q0FMhYFp004132;
	Sun, 15 Jan 2012 22:43:34 GMT (envelope-from avg@svn.freebsd.org)
Received: (from avg@localhost)
	by svn.freebsd.org (8.14.4/8.14.4/Submit) id q0FMhYra004130;
	Sun, 15 Jan 2012 22:43:34 GMT (envelope-from avg@svn.freebsd.org)
Message-Id: <201201152243.q0FMhYra004130@svn.freebsd.org>
From: Andriy Gapon <avg@FreeBSD.org>
Date: Sun, 15 Jan 2012 22:43:34 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
X-SVN-Group: stable-8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r230177 - stable/8/sys/kern
X-BeenThere: svn-src-stable@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for all the -stable branches of the src tree
	<svn-src-stable.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable>, 
	<mailto:svn-src-stable-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-stable>
List-Post: <mailto:svn-src-stable@freebsd.org>
List-Help: <mailto:svn-src-stable-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable>,
	<mailto:svn-src-stable-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sun, 15 Jan 2012 22:43:34 -0000

Author: avg
Date: Sun Jan 15 22:43:34 2012
New Revision: 230177
URL: http://svn.freebsd.org/changeset/base/230177

Log:
  Revert r230174, a botched MFC
  
  This is a direct commit to the branch for obvious reasons.
  
  Pointyhat to:	avg

Modified:
  stable/8/sys/kern/sched_ule.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)

Modified: stable/8/sys/kern/sched_ule.c
==============================================================================
--- stable/8/sys/kern/sched_ule.c	Sun Jan 15 22:23:41 2012	(r230176)
+++ stable/8/sys/kern/sched_ule.c	Sun Jan 15 22:43:34 2012	(r230177)
@@ -62,6 +62,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/vmmeter.h>
 #include <sys/cpuset.h>
 #include <sys/sbuf.h>
+#ifdef KTRACE
+#include <sys/uio.h>
+#include <sys/ktrace.h>
+#endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
@@ -76,7 +80,7 @@ dtrace_vtime_switch_func_t	dtrace_vtime_
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
-#if defined(__powerpc__) && defined(E500)
+#if defined(__sparc64__)
 #error "This architecture is not currently compatible with ULE"
 #endif
 
@@ -84,7 +88,7 @@ dtrace_vtime_switch_func_t	dtrace_vtime_
 
 #define	TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
 #define	TDQ_NAME_LEN	(sizeof("sched lock ") + sizeof(__XSTRING(MAXCPU)))
-#define	TDQ_LOADNAME_LEN	(sizeof("CPU ") + sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load"))
+#define	TDQ_LOADNAME_LEN	(PCPU_NAME_LEN + sizeof(" load"))
 
 /*
  * Thread scheduler specific section.  All fields are protected
@@ -118,17 +122,11 @@ static struct td_sched td_sched0;
 
 /*
  * Priority ranges used for interactive and non-interactive timeshare
- * threads.  The timeshare priorities are split up into four ranges.
- * The first range handles interactive threads.  The last three ranges
- * (NHALF, x, and NHALF) handle non-interactive threads with the outer
- * ranges supporting nice values.
- */
-#define	PRI_TIMESHARE_RANGE	(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
-#define	PRI_INTERACT_RANGE	((PRI_TIMESHARE_RANGE - SCHED_PRI_NRESV) / 2)
-
-#define	PRI_MIN_INTERACT	PRI_MIN_TIMESHARE
-#define	PRI_MAX_INTERACT	(PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE - 1)
-#define	PRI_MIN_BATCH		(PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE)
+ * threads.  Interactive threads use realtime priorities.
+ */
+#define	PRI_MIN_INTERACT	PRI_MIN_REALTIME
+#define	PRI_MAX_INTERACT	PRI_MAX_REALTIME
+#define	PRI_MIN_BATCH		PRI_MIN_TIMESHARE
 #define	PRI_MAX_BATCH		PRI_MAX_TIMESHARE
 
 /*
@@ -211,7 +209,7 @@ static int preempt_thresh = 0;
 #endif
 static int static_boost = PRI_MIN_BATCH;
 static int sched_idlespins = 10000;
-static int sched_idlespinthresh = 16;
+static int sched_idlespinthresh = 4;
 
 /*
  * tdq - per processor runqs and statistics.  All fields are protected by the
@@ -223,7 +221,6 @@ struct tdq {
 	struct mtx	tdq_lock;		/* run queue lock. */
 	struct cpu_group *tdq_cg;		/* Pointer to cpu topology. */
 	volatile int	tdq_load;		/* Aggregate load. */
-	volatile int	tdq_cpu_idle;		/* cpu_idle() is active. */
 	int		tdq_sysload;		/* For loadavg, !ITHD load. */
 	int		tdq_transferable;	/* Transferable thread count. */
 	short		tdq_switchcnt;		/* Switches this tick. */
@@ -564,7 +561,7 @@ struct cpu_search {
 
 #define	CPUSET_FOREACH(cpu, mask)				\
 	for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++)		\
-		if (CPU_ISSET(cpu, &mask))
+		if ((mask) & 1 << (cpu))
 
 static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low,
     struct cpu_search *high, const int match);
@@ -839,7 +836,6 @@ sched_balance_pair(struct tdq *high, str
 	int low_load;
 	int moved;
 	int move;
-	int cpu;
 	int diff;
 	int i;
 
@@ -861,14 +857,10 @@ sched_balance_pair(struct tdq *high, str
 		for (i = 0; i < move; i++)
 			moved += tdq_move(high, low);
 		/*
-		 * In case the target isn't the current cpu IPI it to force a
-		 * reschedule with the new workload.
+		 * IPI the target cpu to force it to reschedule with the new
+		 * workload.
 		 */
-		cpu = TDQ_ID(low);
-		sched_pin();
-		if (cpu != PCPU_GET(cpuid))
-			ipi_cpu(cpu, IPI_PREEMPT);
-		sched_unpin();
+		ipi_cpu(TDQ_ID(low), IPI_PREEMPT);
 	}
 	tdq_unlock_pair(high, low);
 	return (moved);
@@ -987,7 +979,7 @@ tdq_notify(struct tdq *tdq, struct threa
 		 * If the MD code has an idle wakeup routine try that before
 		 * falling back to IPI.
 		 */
-		if (!tdq->tdq_cpu_idle || cpu_idle_wakeup(cpu))
+		if (cpu_idle_wakeup(cpu))
 			return;
 	}
 	tdq->tdq_ipipending = 1;
@@ -1434,7 +1426,8 @@ sched_priority(struct thread *td)
 	} else {
 		pri = SCHED_PRI_MIN;
 		if (td->td_sched->ts_ticks)
-			pri += SCHED_PRI_TICKS(td->td_sched);
+			pri += min(SCHED_PRI_TICKS(td->td_sched),
+			    SCHED_PRI_RANGE);
 		pri += SCHED_PRI_NICE(td->td_proc->p_nice);
 		KASSERT(pri >= PRI_MIN_BATCH && pri <= PRI_MAX_BATCH,
 		    ("sched_priority: invalid priority %d: nice %d, " 
@@ -1695,24 +1688,39 @@ sched_prio(struct thread *td, u_char pri
 void
 sched_user_prio(struct thread *td, u_char prio)
 {
+	u_char oldprio;
 
 	td->td_base_user_pri = prio;
-	if (td->td_lend_user_pri <= prio)
-		return;
+	if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio)
+                return;
+	oldprio = td->td_user_pri;
 	td->td_user_pri = prio;
 }
 
 void
 sched_lend_user_prio(struct thread *td, u_char prio)
 {
+	u_char oldprio;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	td->td_lend_user_pri = prio;
-	td->td_user_pri = min(prio, td->td_base_user_pri);
-	if (td->td_priority > td->td_user_pri)
-		sched_prio(td, td->td_user_pri);
-	else if (td->td_priority != td->td_user_pri)
-		td->td_flags |= TDF_NEEDRESCHED;
+	td->td_flags |= TDF_UBORROWING;
+	oldprio = td->td_user_pri;
+	td->td_user_pri = prio;
+}
+
+void
+sched_unlend_user_prio(struct thread *td, u_char prio)
+{
+	u_char base_pri;
+
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	base_pri = td->td_base_user_pri;
+	if (prio >= base_pri) {
+		td->td_flags &= ~TDF_UBORROWING;
+		sched_user_prio(td, base_pri);
+	} else {
+		sched_lend_user_prio(td, prio);
+	}
 }
 
 /*
@@ -1905,8 +1913,6 @@ sched_sleep(struct thread *td, int prio)
 	td->td_slptick = ticks;
 	if (TD_IS_SUSPENDED(td) || prio >= PSOCK)
 		td->td_flags |= TDF_CANSWAP;
-	if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)
-		return;
 	if (static_boost == 1 && prio)
 		sched_prio(td, prio);
 	else if (static_boost && td->td_priority > static_boost)
@@ -2173,7 +2179,7 @@ sched_clock(struct thread *td)
  * is easier than trying to scale based on stathz.
  */
 void
-sched_tick(int cnt)
+sched_tick(void)
 {
 	struct td_sched *ts;
 
@@ -2185,7 +2191,7 @@ sched_tick(int cnt)
 	if (ts->ts_incrtick == ticks)
 		return;
 	/* Adjust ticks for pctcpu */
-	ts->ts_ticks += cnt << SCHED_TICK_SHIFT;
+	ts->ts_ticks += 1 << SCHED_TICK_SHIFT;
 	ts->ts_ltick = ticks;
 	ts->ts_incrtick = ticks;
 	/*
@@ -2556,14 +2562,8 @@ sched_idletd(void *dummy)
 			}
 		}
 		switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
-		if (tdq->tdq_load == 0) {
-			tdq->tdq_cpu_idle = 1;
-			if (tdq->tdq_load == 0) {
-				cpu_idle(switchcnt > sched_idlespinthresh * 4);
-				tdq->tdq_switchcnt++;
-			}
-			tdq->tdq_cpu_idle = 0;
-		}
+		if (tdq->tdq_load == 0)
+			cpu_idle(switchcnt > 1);
 		if (tdq->tdq_load) {
 			thread_lock(td);
 			mi_switch(SW_VOL | SWT_IDLE, NULL);
@@ -2586,6 +2586,8 @@ sched_throw(struct thread *td)
 		/* Correct spinlock nesting and acquire the correct lock. */
 		TDQ_LOCK(tdq);
 		spinlock_exit();
+		PCPU_SET(switchtime, cpu_ticks());
+		PCPU_SET(switchticks, ticks);
 	} else {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
 		tdq_load_rem(tdq, td);
@@ -2594,8 +2596,6 @@ sched_throw(struct thread *td)
 	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
 	newtd = choosethread();
 	TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
-	PCPU_SET(switchtime, cpu_ticks());
-	PCPU_SET(switchticks, ticks);
 	cpu_throw(td, newtd);		/* doesn't return */
 }
 
@@ -2655,16 +2655,15 @@ static int
 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
     int indent)
 {
-	char cpusetbuf[CPUSETBUFSIZ];
 	int i, first;
 
 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
 	    "", 1 + indent / 2, cg->cg_level);
-	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
-	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
+	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "",
+	    cg->cg_count, cg->cg_mask);
 	first = TRUE;
 	for (i = 0; i < MAXCPU; i++) {
-		if (CPU_ISSET(i, &cg->cg_mask)) {
+		if ((cg->cg_mask & (1 << i)) != 0) {
 			if (!first)
 				sbuf_printf(sb, ", ");
 			else
@@ -2723,7 +2722,6 @@ sysctl_kern_sched_topology_spec(SYSCTL_H
 	sbuf_delete(topo);
 	return (err);
 }
-
 #endif
 
 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler");
@@ -2760,7 +2758,6 @@ SYSCTL_INT(_kern_sched, OID_AUTO, steal_
 SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
     CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A", 
     "XML dump of detected CPU topology");
-
 #endif
 
 /* ps compat.  All cpu percentages from ULE are weighted. */