Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 9 Jun 2006 04:16:11 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 98846 for review
Message-ID:  <200606090416.k594GB44017948@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=98846

Change 98846 by kmacy@kmacy_storage:sun4v_work on 2006/06/09 04:15:34

	eliminate sched_lock acquisition from the common case code paths in the timer interrupt handler

Affected files ...

.. //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/vfs_vnops.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/posix4/ksched.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/security/mac_lomac/mac_lomac.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/vm/vm_glue.c#5 edit

Differences ...

==== //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 (text+ko) ====

@@ -746,9 +746,7 @@
 	PROC_UNLOCK(initproc);
 	crfree(oldcred);
 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
-	mtx_lock_spin(&sched_lock);
-	initproc->p_sflag |= PS_INMEM;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&initproc->p_sflag, PS_INMEM);
 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
 }
 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 (text+ko) ====

@@ -196,29 +196,30 @@
 	struct pstats *pstats;
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
+	int sflag = 0;
 
 	/*
 	 * Run current process's virtual and profile time, as needed.
 	 */
-	mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
+	mtx_lock_spin_flags(&timer_lock, MTX_QUIET);
 	pstats = p->p_stats;
 	if (usermode &&
-	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
-	    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
-		p->p_sflag |= PS_ALRMPEND;
-		td->td_flags |= TDF_ASTPENDING;
-	}
+	        timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+	    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 
+		sflag = PS_ALRMPEND;
 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
-	    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
-		p->p_sflag |= PS_PROFPEND;
-		td->td_flags |= TDF_ASTPENDING;
-	}
-	mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
-
+	    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 
+		sflag = PS_PROFPEND;
+	mtx_unlock_spin_flags(&timer_lock, MTX_QUIET);
 #ifdef	HWPMC_HOOKS
 	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
 #endif
+	if (!sflag)
+		return;
+
+	atomic_set_int(&p->p_sflag, sflag);
+	atomic_set_int(&td->td_flags, TDF_ASTPENDING);
 }
 
 /*
@@ -404,7 +405,6 @@
 	td = curthread;
 	p = td->td_proc;
 
-	mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
 	if (usermode) {
 		/*
 		 * Charge the time as appropriate.
@@ -456,7 +456,7 @@
 	rss = pgtok(vmspace_resident_count(vm));
 	if (ru->ru_maxrss < rss)
 		ru->ru_maxrss = rss;
-	mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
+
 }
 
 void
@@ -536,7 +536,7 @@
 
 /*
  * Handle a watchdog timeout by dumping interrupt information and
- * then either dropping to DDB or panicing.
+ * then either dropping to DDB or panicking.
  */
 static void
 watchdog_fire(void)

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 (text+ko) ====

@@ -557,9 +557,7 @@
 	}
 #endif
 #endif
-	mtx_lock_spin(&sched_lock);
-	td->td_flags |= TDF_INPANIC;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&td->td_flags, TDF_INPANIC);
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
 	boot(bootopt);

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 (text+ko) ====

@@ -584,9 +584,7 @@
 	if (! SIGISEMPTY(set))
 		sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set);
 	if (SIGPENDING(td)) {
-		mtx_lock_spin(&sched_lock);
-		td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
-		mtx_unlock_spin(&sched_lock);
+		atomic_set_int(&td->td_flags, (TDF_NEEDSIGCHK|TDF_ASTPENDING));
 	}
 }
 
@@ -2361,7 +2359,7 @@
 			thread_suspend_one(td2);
 		} else {
 			if (sending || td != td2)
-				td2->td_flags |= TDF_ASTPENDING;
+				atomic_set_int(&td2->td_flags, TDF_ASTPENDING);
 #ifdef SMP
 			if (TD_IS_RUNNING(td2) && td2 != td)
 				forward_signal(td2);
@@ -2379,15 +2377,11 @@
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 	    &p->p_mtx.mtx_object, "Stopping for traced signal");
 
-	mtx_lock_spin(&sched_lock);
-	td->td_flags |= TDF_XSIG;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&td->td_flags, TDF_XSIG);
 	td->td_xsig = sig;
 	while ((p->p_flag & P_TRACED) && (td->td_flags & TDF_XSIG)) {
 		if (p->p_flag & P_SINGLE_EXIT) {
-			mtx_lock_spin(&sched_lock);
-			td->td_flags &= ~TDF_XSIG;
-			mtx_unlock_spin(&sched_lock);
+			atomic_clear_int(&td->td_flags, TDF_XSIG);
 			return (sig);
 		}
 		/*

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 (text+ko) ====

@@ -99,7 +99,7 @@
 		/* Shutting down, run idlethread on AP's */
 		td = PCPU_GET(idlethread);
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
-		td->td_kse->ke_flags |= KEF_DIDRUN;
+		atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN);
 		TD_SET_RUNNING(td);
 		return (td);
 	}
@@ -115,7 +115,7 @@
 		td = PCPU_GET(idlethread);
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
 	}
-	td->td_kse->ke_flags |= KEF_DIDRUN;
+	atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN);
 
 	/*
 	 * If we are in panic, only allow system threads,

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 (text+ko) ====

@@ -393,8 +393,8 @@
 	 */
 	if (p->p_cpulimit != RLIM_INFINITY &&
 	    p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) {
-		p->p_sflag |= PS_XCPU;
-		td->td_flags |= TDF_ASTPENDING;
+		atomic_set_int(&p->p_sflag, PS_XCPU);
+		atomic_set_int(&td->td_flags, TDF_ASTPENDING);
 	}
 
 	/*
@@ -474,7 +474,7 @@
 	}
 	if ((p->p_sflag & PS_INMEM) == 0) {
 		if ((p->p_sflag & PS_SWAPPINGIN) == 0) {
-			p->p_sflag |= PS_SWAPINREQ;
+			atomic_set_int(&p->p_sflag, PS_SWAPINREQ);
 			/*
 			 * due to a LOR between sched_lock and
 			 * the sleepqueue chain locks, use

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 (text+ko) ====

@@ -46,6 +46,7 @@
 #include <sys/limits.h>
 
 #include <machine/frame.h>
+#include "opt_global.h"
 
 extern int max_threads_per_proc;
 extern int max_groups_per_proc;
@@ -88,6 +89,19 @@
 		return (EINVAL);
 	if ((error = copyin(uap->param, &param, sizeof(param))))
 		return (error);
+#ifndef __NO_STRICT_ALIGNMENT
+#ifdef SUN4V
+	if ((param.stack_size & (64-1)) != 0)
+		return (EINVAL);
+	if (((u_long)param.stack_base & (64-1)) != 0)
+		return (EINVAL);
+#else
+	if ((param.stack_size & (sizeof(void *)-1)) != 0)
+		return (EINVAL);
+	if (((u_long)param.stack_base & (sizeof(void *)-1)) != 0)
+		return (EINVAL);
+#endif
+#endif
 	error = create_thread(td, NULL, param.start_func, param.arg,
 		param.stack_base, param.stack_size, param.tls_base,
 		param.child_tid, param.parent_tid, param.flags);
@@ -301,9 +315,7 @@
 		error = msleep((void *)td, &td->td_proc->p_mtx, PCATCH, "lthr",
 		    hz);
 	if (td->td_flags & TDF_THRWAKEUP) {
-		mtx_lock_spin(&sched_lock);
-		td->td_flags &= ~TDF_THRWAKEUP;
-		mtx_unlock_spin(&sched_lock);
+		atomic_clear_int(&td->td_flags, TDF_THRWAKEUP);
 		PROC_UNLOCK(td->td_proc);
 		return (0);
 	}
@@ -331,9 +343,7 @@
 		PROC_UNLOCK(p);
 		return (ESRCH);
 	}
-	mtx_lock_spin(&sched_lock);
-	ttd->td_flags |= TDF_THRWAKEUP;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&ttd->td_flags, TDF_THRWAKEUP);
 	wakeup((void *)ttd);
 	PROC_UNLOCK(p);
 	return (0);

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 (text+ko) ====

@@ -544,12 +544,12 @@
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (td2 == td)
 				continue;
-			td2->td_flags |= TDF_ASTPENDING;
+			atomic_set_int(&td2->td_flags, TDF_ASTPENDING);
 			if (TD_IS_INHIBITED(td2)) {
 				switch (mode) {
 				case SINGLE_EXIT:
 					if (td->td_flags & TDF_DBSUSPEND)
-						td->td_flags &= ~TDF_DBSUSPEND;
+						atomic_clear_int(&td->td_flags, TDF_DBSUSPEND);
 					if (TD_IS_SUSPENDED(td2))
 						thread_unsuspend_one(td2);
 					if (TD_ON_SLEEPQ(td2) &&
@@ -717,7 +717,7 @@
 		thread_suspend_one(td);
 		if (return_instead == 0) {
 			p->p_boundary_count++;
-			td->td_flags |= TDF_BOUNDARY;
+			atomic_set_int(&td->td_flags, TDF_BOUNDARY);
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			if (p->p_numthreads == p->p_suspcount) 
@@ -727,7 +727,7 @@
 		mi_switch(SW_INVOL, NULL);
 		if (return_instead == 0) {
 			p->p_boundary_count--;
-			td->td_flags &= ~TDF_BOUNDARY;
+			atomic_clear_int(&td->td_flags, TDF_BOUNDARY);
 		}
 		mtx_unlock_spin(&sched_lock);
 		PROC_LOCK(p);

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 (text+ko) ====

@@ -588,9 +588,9 @@
 				timevalsub(&aitv->it_value, &ctv);
 		}
 	} else {
-		mtx_lock_spin(&sched_lock);
+		mtx_lock_spin(&timer_lock);
 		*aitv = p->p_stats->p_timer[which];
-		mtx_unlock_spin(&sched_lock);
+		mtx_unlock_spin(&timer_lock);
 	}
 	return (0);
 }
@@ -663,10 +663,10 @@
 				timevalsub(&oitv->it_value, &ctv);
 		}
 	} else {
-		mtx_lock_spin(&sched_lock);
+		mtx_lock_spin(&timer_lock);
 		*oitv = p->p_stats->p_timer[which];
 		p->p_stats->p_timer[which] = *aitv;
-		mtx_unlock_spin(&sched_lock);
+		mtx_unlock_spin(&timer_lock);
 	}
 	return (0);
 }

==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 (text+ko) ====

@@ -259,7 +259,7 @@
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (td->td_priority < curthread->td_priority)
-		curthread->td_flags |= TDF_NEEDRESCHED;
+		atomic_set_int(&curthread->td_flags, TDF_NEEDRESCHED);
 }
 
 /*
@@ -408,14 +408,14 @@
 			 */
 			if (ke->ke_state == KES_ONRUNQ) {
 				awake = 1;
-				ke->ke_flags &= ~KEF_DIDRUN;
+				atomic_clear_int(&ke->ke_flags, KEF_DIDRUN);
 			} else if ((ke->ke_state == KES_THREAD) &&
 			    (TD_IS_RUNNING(td))) {
 				awake = 1;
 				/* Do not clear KEF_DIDRUN */
 			} else if (ke->ke_flags & KEF_DIDRUN) {
 				awake = 1;
-				ke->ke_flags &= ~KEF_DIDRUN;
+				atomic_clear_int(&ke->ke_flags, KEF_DIDRUN);
 			}
 
 			/*
@@ -626,14 +626,15 @@
 {
 	struct kse *ke;
 
-	mtx_assert(&sched_lock, MA_OWNED);
 	ke = td->td_kse;
 
-	ke->ke_cpticks++;
+	atomic_add_int(&ke->ke_cpticks, 1);
 	td->td_estcpu = ESTCPULIM(td->td_estcpu + 1);
 	if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
+		mtx_lock_spin(&sched_lock);
 		resetpriority(td);
 		resetpriority_thread(td);
+		mtx_unlock_spin(&sched_lock);
 	}
 }
 
@@ -709,7 +710,7 @@
 sched_lend_prio(struct thread *td, u_char prio)
 {
 
-	td->td_flags |= TDF_BORROWING;
+	atomic_set_int(&td->td_flags, TDF_BORROWING);
 	sched_priority(td, prio);
 }
 
@@ -732,7 +733,7 @@
 	else
 		base_pri = td->td_base_pri;
 	if (prio >= base_pri) {
-		td->td_flags &= ~TDF_BORROWING;
+		atomic_clear_int(&td->td_flags, TDF_BORROWING);
 		sched_prio(td, base_pri);
 	} else
 		sched_lend_prio(td, prio);
@@ -778,19 +779,22 @@
 {
 	struct kse *ke;
 	struct proc *p;
+	struct thread *choosetd;
 
 	ke = td->td_kse;
 	p = td->td_proc;
+	choosetd = NULL;
 
-	mtx_assert(&sched_lock, MA_OWNED);
+	if (newtd == NULL)
+		choosetd = choosethread();
 
 	if ((p->p_flag & P_NOLOAD) == 0)
 		sched_load_rem();
 	if (newtd) 
-		newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED);
+		atomic_set_int(&newtd->td_flags, (td->td_flags & TDF_NEEDRESCHED));
 
 	td->td_lastcpu = td->td_oncpu;
-	td->td_flags &= ~TDF_NEEDRESCHED;
+	atomic_clear_int(&td->td_flags, TDF_NEEDRESCHED);
 	td->td_owepreempt = 0;
 	td->td_oncpu = NOCPU;
 	/*
@@ -819,21 +823,12 @@
 		 */
 		KASSERT((newtd->td_inhibitors == 0),
 			("trying to run inhibitted thread"));
-		newtd->td_kse->ke_flags |= KEF_DIDRUN;
+		atomic_set_int(&newtd->td_kse->ke_flags, KEF_DIDRUN);
         	TD_SET_RUNNING(newtd);
 		if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
 			sched_load_add();
 	} else {
-#if 0
-		spinlock_enter();
-		mtx_unlock_spin(&sched_lock);
-#endif
-		newtd = choosethread();
-#if 0
-		mtx_lock_spin(&sched_lock);
-		spinlock_exit();
-#endif
-
+		newtd = choosetd;
 	}
 
 	if (td != newtd) {
@@ -948,8 +943,6 @@
 		ipi_selected(map, IPI_AST);
 		return (1);
 	}
-	if (cpunum == NOCPU)
-		printf("forward_wakeup: Idle processor not found\n");
 	return (0);
 }
 #endif
@@ -982,7 +975,7 @@
 	}
 #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
 
-	pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
+	atomic_set_int(&pcpu->pc_curthread->td_flags, TDF_NEEDRESCHED);
 	ipi_selected( pcpu->pc_cpumask , IPI_AST);
 	return;
 }
@@ -1175,7 +1168,7 @@
 	KASSERT(TD_IS_RUNNING(td),
 	    ("sched_bind: cannot bind non-running thread"));
 	ke = td->td_kse;
-	ke->ke_flags |= KEF_BOUND;
+	atomic_set_int(&ke->ke_flags, KEF_BOUND);
 #ifdef SMP
 	ke->ke_runq = &runq_pcpu[cpu];
 	if (PCPU_GET(cpuid) == cpu)
@@ -1189,15 +1182,13 @@
 sched_unbind(struct thread* td)
 {
 
-	mtx_assert(&sched_lock, MA_OWNED);
-	td->td_kse->ke_flags &= ~KEF_BOUND;
+	atomic_clear_int(&td->td_kse->ke_flags, KEF_BOUND);
 }
 
 int
 sched_is_bound(struct thread *td)
 {
 
-	mtx_assert(&sched_lock, MA_OWNED);
 	return (td->td_kse->ke_flags & KEF_BOUND);
 }
 

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 (text+ko) ====

@@ -484,9 +484,7 @@
 		td->td_profil_addr = pc;
 		td->td_profil_ticks = ticks;
 		td->td_pflags |= TDP_OWEUPC;
-		mtx_lock_spin(&sched_lock);
-		td->td_flags |= TDF_ASTPENDING;
-		mtx_unlock_spin(&sched_lock);
+		atomic_set_int(&td->td_flags, TDF_ASTPENDING);
 	}
 }
 

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 (text+ko) ====

@@ -315,14 +315,13 @@
 	}
 	TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq);
 	td->td_sleepqueue = NULL;
-	mtx_lock_spin(&sched_lock);
 	td->td_wchan = wchan;
 	td->td_wmesg = wmesg;
 	if (flags & SLEEPQ_INTERRUPTIBLE) {
-		td->td_flags |= TDF_SINTR;
-		td->td_flags &= ~TDF_SLEEPABORT;
+		atomic_set_int(&td->td_flags, TDF_SINTR);
+		atomic_clear_int(&td->td_flags, TDF_SLEEPABORT);
 	}
-	mtx_unlock_spin(&sched_lock);
+
 }
 
 /*
@@ -468,7 +467,7 @@
 	 * If TDF_TIMEOUT is set, we timed out.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
-		td->td_flags &= ~TDF_TIMEOUT;
+		atomic_clear_int(&td->td_flags, TDF_TIMEOUT);
 		return (EWOULDBLOCK);
 	}
 
@@ -477,15 +476,16 @@
 	 * already been woken up.
 	 */
 	if (td->td_flags & TDF_TIMOFAIL)
-		td->td_flags &= ~TDF_TIMOFAIL;
-
+		atomic_clear_int(&td->td_flags, TDF_TIMOFAIL);
+	
 	/*
 	 * If callout_stop() fails, then the timeout is running on
 	 * another CPU, so synchronize with it to avoid having it
 	 * accidentally wake up a subsequent sleep.
 	 */
 	else if (callout_stop(&td->td_slpcallout) == 0) {
-		td->td_flags |= TDF_TIMEOUT;
+		atomic_set_int(&td->td_flags, TDF_TIMEOUT);
+		mtx_lock_spin(&sched_lock);
 		TD_SET_SLEEPING(td);
 		mi_switch(SW_INVOL, NULL);
 	}
@@ -500,15 +500,14 @@
 {
 	struct thread *td;
 
-	mtx_assert(&sched_lock, MA_OWNED);
 	td = curthread;
 
 	/* We are no longer in an interruptible sleep. */
 	if (td->td_flags & TDF_SINTR)
-		td->td_flags &= ~TDF_SINTR;
+		atomic_clear_int(&td->td_flags, TDF_SINTR);
 
 	if (td->td_flags & TDF_SLEEPABORT) {
-		td->td_flags &= ~TDF_SLEEPABORT;
+		atomic_clear_int(&td->td_flags, TDF_SLEEPABORT);
 		return (td->td_intrval);
 	}
 
@@ -547,7 +546,7 @@
 	else
 		sleepq_release(wchan);
 	rval = sleepq_check_signals();
-	mtx_unlock_spin(&sched_lock); 
+	mtx_unlock_spin(&sched_lock);
 	if (rcatch)
 		return (rcatch);
 	return (rval);
@@ -632,7 +631,7 @@
 
 	td->td_wmesg = NULL;
 	td->td_wchan = NULL;
-	td->td_flags &= ~TDF_SINTR;
+	atomic_clear_int(&td->td_flags, TDF_SINTR);
 
 	/*
 	 * Note that thread td might not be sleeping if it is running
@@ -759,7 +758,7 @@
 	if (TD_ON_SLEEPQ(td)) {
 		MPASS(td->td_wchan == wchan);
 		MPASS(sq != NULL);
-		td->td_flags |= TDF_TIMEOUT;
+		atomic_set_int(&td->td_flags, TDF_TIMEOUT);
 		sleepq_resume_thread(sq, td, -1);
 		mtx_unlock_spin(&sched_lock);
 		sleepq_release(wchan);
@@ -778,11 +777,11 @@
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		MPASS(TD_IS_SLEEPING(td));
-		td->td_flags &= ~TDF_TIMEOUT;
+		atomic_clear_int(&td->td_flags, TDF_TIMEOUT);
 		TD_CLR_SLEEPING(td);
 		setrunnable(td);
 	} else
-		td->td_flags |= TDF_TIMOFAIL;
+		atomic_set_int(&td->td_flags, TDF_TIMOFAIL);
 	mtx_unlock_spin(&sched_lock);
 }
 
@@ -846,7 +845,7 @@
 	wchan = td->td_wchan;
 	if (wchan != NULL) {
 		td->td_intrval = intrval;
-		td->td_flags |= TDF_SLEEPABORT;
+		atomic_set_int(&td->td_flags, TDF_SLEEPABORT);
 	}
 	mtx_unlock_spin(&sched_lock);
 	sleepq_remove(td, wchan);

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 (text+ko) ====

@@ -202,7 +202,7 @@
 		id = pc->pc_cpumask;
 		if (id != me && (id & stopped_cpus) == 0 &&
 		    td != pc->pc_idlethread) {
-			td->td_flags |= TDF_NEEDRESCHED;
+			atomic_set_int(&td->td_flags, TDF_NEEDRESCHED);
 			map |= id;
 		}
 	}

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 (text+ko) ====

@@ -77,7 +77,7 @@
 userret(struct thread *td, struct trapframe *frame)
 {
 	struct proc *p = td->td_proc;
-
+	
 	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
             p->p_comm);
 #ifdef DIAGNOSTIC
@@ -150,6 +150,7 @@
 	td = curthread;
 	p = td->td_proc;
 
+
 	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
             p->p_comm);
 	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
@@ -166,19 +167,18 @@
 	 * AST's saved in sflag, the astpending flag will be set and
 	 * ast() will be called again.
 	 */
-	mtx_lock_spin(&sched_lock);
+
 	flags = td->td_flags;
 	sflag = p->p_sflag;
 	if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU))
-		p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU);
+		atomic_clear_int(&p->p_sflag, (PS_ALRMPEND | PS_PROFPEND | PS_XCPU));
 #ifdef MAC
 	if (p->p_sflag & PS_MACPEND)
-		p->p_sflag &= ~PS_MACPEND;
+		atomic_clear_int(&p->p_sflag, PS_MACPEND);
 #endif
-	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK |
-	    TDF_NEEDRESCHED | TDF_INTERRUPT);
 	cnt.v_soft++;
-	mtx_unlock_spin(&sched_lock);
+	atomic_clear_int(&td->td_flags, (TDF_ASTPENDING | TDF_NEEDSIGCHK |
+	    TDF_NEEDRESCHED | TDF_INTERRUPT));
 
 	/*
 	 * XXXKSE While the fact that we owe a user profiling

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 (text+ko) ====

@@ -640,15 +640,13 @@
 	td->td_turnstile = NULL;
 	mtx_unlock_spin(&tc->tc_lock);
 
-	mtx_lock_spin(&sched_lock);
 	/*
 	 * Handle race condition where a thread on another CPU that owns
 	 * lock 'lock' could have woken us in between us dropping the
 	 * turnstile chain lock and acquiring the sched_lock.
 	 */
 	if (td->td_flags & TDF_TSNOBLOCK) {
-		td->td_flags &= ~TDF_TSNOBLOCK;
-		mtx_unlock_spin(&sched_lock);
+		atomic_clear_int(&td->td_flags, TDF_TSNOBLOCK);
 		return;
 	}
 		
@@ -668,7 +666,7 @@
 		}
 	}
 #endif
-
+	mtx_lock_spin(&sched_lock);
 	/* Save who we are blocked on and switch. */
 	td->td_tsqueue = queue;
 	td->td_blocked = ts;
@@ -871,7 +869,7 @@
 			MPASS(TD_CAN_RUN(td));
 			setrunqueue(td, SRQ_BORING);
 		} else {
-			td->td_flags |= TDF_TSNOBLOCK;
+			atomic_set_int(&td->td_flags, TDF_TSNOBLOCK);
 			MPASS(TD_IS_RUNNING(td) || TD_ON_RUNQ(td));
 		}
 	}

==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 (text+ko) ====

@@ -755,9 +755,7 @@
 	mtx_lock(&sellock);
 retry:
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
-	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&td->td_flags, TDF_SELECT);
 	mtx_unlock(&sellock);
 
 	error = selscan(td, ibits, obits, nd);
@@ -797,9 +795,7 @@
 
 done:
 	clear_selinfo_list(td);
-	mtx_lock_spin(&sched_lock);
-	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	atomic_clear_int(&td->td_flags, TDF_SELECT);
 	mtx_unlock(&sellock);
 
 done_nosellock:
@@ -935,9 +931,7 @@
 	mtx_lock(&sellock);
 retry:
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
-	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&td->td_flags, TDF_SELECT);
 	mtx_unlock(&sellock);
 
 	error = pollscan(td, bits, nfds);
@@ -958,12 +952,9 @@
 	 * sellock, so check TDF_SELECT and the number of collisions
 	 * and rescan the file descriptors if necessary.
 	 */
-	mtx_lock_spin(&sched_lock);
 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		mtx_unlock_spin(&sched_lock);
 		goto retry;
 	}
-	mtx_unlock_spin(&sched_lock);
 
 	if (timo > 0)
 		error = cv_timedwait_sig(&selwait, &sellock, timo);
@@ -975,9 +966,7 @@
 
 done:
 	clear_selinfo_list(td);
-	mtx_lock_spin(&sched_lock);
-	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	atomic_clear_int(&td->td_flags, TDF_SELECT);
 	mtx_unlock(&sellock);
 
 done_nosellock:
@@ -1150,9 +1139,7 @@
 	}
 	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
 	sip->si_thread = NULL;
-	mtx_lock_spin(&sched_lock);
-	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	atomic_clear_int(&td->td_flags, TDF_SELECT);
 	sleepq_remove(td, &selwait);
 	mtx_unlock(&sellock);
 }

==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 (text+ko) ====

@@ -708,15 +708,11 @@
 		break;
 
 	case PT_SUSPEND:
-		mtx_lock_spin(&sched_lock);
-		td2->td_flags |= TDF_DBSUSPEND;
-		mtx_unlock_spin(&sched_lock);
+		atomic_set_int(&td2->td_flags, TDF_DBSUSPEND);
 		break;
 
 	case PT_RESUME:
-		mtx_lock_spin(&sched_lock);
-		td2->td_flags &= ~TDF_DBSUSPEND;
-		mtx_unlock_spin(&sched_lock);
+		atomic_clear_int(&td2->td_flags, TDF_DBSUSPEND);
 		break;
 
 	case PT_STEP:
@@ -787,9 +783,7 @@
 			proctree_locked = 0;
 		}
 		/* deliver or queue signal */
-		mtx_lock_spin(&sched_lock);
-		td2->td_flags &= ~TDF_XSIG;
-		mtx_unlock_spin(&sched_lock);
+		atomic_clear_int(&td2->td_flags, TDF_XSIG);
 		td2->td_xsig = data;
 		p->p_xstat = data;
 		p->p_xthread = NULL;
@@ -798,7 +792,8 @@
 			if (req == PT_DETACH) {
 				struct thread *td3;
 				FOREACH_THREAD_IN_PROC(p, td3)
-					td3->td_flags &= ~TDF_DBSUSPEND; 
+					atomic_clear_int(&td3->td_flags, TDF_DBSUSPEND);
+
 			}
 			/*
 			 * unsuspend all threads, to not let a thread run,

==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 (text+ko) ====

@@ -63,7 +63,7 @@
 	.fo_kqfilter = soo_kqfilter,
 	.fo_stat = soo_stat,
 	.fo_close = soo_close,
-	.fo_flags = DFLAG_PASSABLE
+	.fo_flags = DFLAG_PASSABLE | DFLAG_MPSAFE
 };
 
 /* ARGSUSED */

==== //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 (text+ko) ====

@@ -88,32 +88,99 @@
 struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
 
 /*
- * Currently, UNIX domain sockets are protected by a single subsystem lock,
- * which covers global data structures and variables, the contents of each
- * per-socket unpcb structure, and the so_pcb field in sockets attached to
- * the UNIX domain.  This provides for a moderate degree of paralellism, as
- * receive operations on UNIX domain sockets do not need to acquire the
- * subsystem lock.  Finer grained locking to permit send() without acquiring
- * a global lock would be a logical next step.
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace.  Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#ifndef PIPSIZ
+#define	PIPSIZ	8192
+#endif
+static u_long	unpst_sendspace = PIPSIZ;
+static u_long	unpst_recvspace = PIPSIZ;
+static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
+static u_long	unpdg_recvspace = 4*1024;
+
+static int	unp_rights;			/* file descriptors in flight */
+
+SYSCTL_DECL(_net_local_stream);
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
+	   &unpst_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
+	   &unpst_recvspace, 0, "");
+SYSCTL_DECL(_net_local_dgram);
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
+	   &unpdg_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
+	   &unpdg_recvspace, 0, "");
+SYSCTL_DECL(_net_local);
+SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
+
+/*
+ * Locking and synchronization:
+ *
+ * A global UNIX domain socket mutex protects all global variables in the
+ * implementation, as well as the linked lists tracking the set of allocated
+ * UNIX domain sockets.  These variables/fields may be read lockless using
+ * atomic operations if stale values are permissible; otherwise the global
+ * mutex is required to read or read-modify-write.  The global mutex also
+ * serves to prevent deadlock when multiple PCB locks may be acquired at once
+ * (see below).  Finally, the global mutex protects uncounted references from
+ * vnodes to sockets bound to those vnodes: to safely dereference the
+ * v_socket pointer, the global mutex must be held while a full reference is
+ * acquired.
+ *
+ * UNIX domain sockets each have one unpcb PCB associated with them from
+ * pru_attach() to pru_detach() via the so_pcb pointer.  The validity of that
+ * reference is an invariant for the lifetime of the socket, so no lock is
+ * required to dereference the so_pcb pointer if a valid socket reference is
+ * held.
+ *
+ * Each PCB has a back-pointer to its socket, unp_socket.  This pointer may
+ * only be safely dereferenced as long as a valid reference to the PCB is
+ * held.  Typically, this reference will be from the socket, or from another
+ * PCB when the referring PCB's lock is held (in order that the reference not
+ * be invalidated during use).  In particular, to follow
+ * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn.
+ *
+ * Fields of PCBs are locked using a per-unpcb lock, unp_mtx.  Individual
+ * atomic reads without the lock may be performed "lockless", but more
+ * complex reads and read-modify-writes require the mutex to be held.  No
+ * lock order is defined between PCB locks -- multiple PCB locks may be
+ * acquired at the same time only when holding the global UNIX domain socket
+ * mutex, which prevents deadlocks.  To prevent inter-PCB references from
+ * becoming invalid, the lock protecting the reference must be held for the
+ * lifetime of use of the reference.
  *
- * The UNIX domain socket lock preceds all socket layer locks, including the
- * socket lock and socket buffer lock, permitting UNIX domain socket code to
- * call into socket support routines without releasing its locks.
+ * Blocking with UNIX domain sockets is a tricky issue: unlike most network
+ * protocols, bind() is a non-atomic operation, and connect() requires
+ * potential sleeping in the protocol, due to potentially waiting on local or
+ * distributed file systems.  We try to separate "lookup" operations, which
+ * may sleep, and the IPC operations themselves, which typically can occur
+ * with relative atomicity as locks can be held over the entire operation.
  *
- * Some caution is required in areas where the UNIX domain socket code enters
- * VFS in order to create or find rendezvous points.  This results in
- * dropping of the UNIX domain socket subsystem lock, acquisition of the
- * Giant lock, and potential sleeping.  This increases the chances of races,
- * and exposes weaknesses in the socket->protocol API by offering poor
- * failure modes.
+ * Another tricky issue is simultaneous multi-threaded or multi-process
+ * access to a single UNIX domain socket.  These are handled by the flags
+ * UNP_CONNECTING and UNP_BINDING.
  */
-static struct mtx unp_mtx;
-#define	UNP_LOCK_INIT() \
-	mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
-#define	UNP_LOCK()		mtx_lock(&unp_mtx)
-#define	UNP_UNLOCK()		mtx_unlock(&unp_mtx)
-#define	UNP_LOCK_ASSERT()	mtx_assert(&unp_mtx, MA_OWNED)
-#define	UNP_UNLOCK_ASSERT()	mtx_assert(&unp_mtx, MA_NOTOWNED)
+static struct mtx	unp_global_mtx;
+
+#define	UNP_GLOBAL_LOCK_INIT()		mtx_init(&unp_global_mtx,	\
+					    "unp_global_mtx", NULL, MTX_DEF)
+#define	UNP_GLOBAL_LOCK()		mtx_lock(&unp_global_mtx)
+#define	UNP_GLOBAL_UNLOCK()		mtx_unlock(&unp_global_mtx)
+#define	UNP_GLOBAL_UNLOCK_ASSERT()	mtx_assert(&unp_global_mtx, MA_NOTOWNED)
+#define	UNP_GLOBAL_LOCK_ASSERT()	mtx_assert(&unp_global_mtx, MA_OWNED)
+
+#define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
+					    "unp_mtx", "unp_mtx",	\
+					    MTX_DUPOK|MTX_DEF|MTX_RECURSE)
+#define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
+#define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
+#define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
+#define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
@@ -123,12 +190,10 @@
  */
 static struct task	unp_gc_task;
 
-static int     unp_attach(struct socket *);
 static void    unp_detach(struct unpcb *);
-static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
 static int     unp_connect2(struct socket *so, struct socket *so2, int);
-static void    unp_disconnect(struct unpcb *);
+static void    unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void    unp_shutdown(struct unpcb *);
 static void    unp_drop(struct unpcb *, int);
 static void    unp_gc(__unused void *, int);
@@ -137,8 +202,6 @@
 static void    unp_discard(struct file *);
 static void    unp_freerights(struct file **, int);
 static int     unp_internalize(struct mbuf **, struct thread *);
-static int     unp_listen(struct socket *, struct unpcb *, int,
-		   struct thread *);
 
 static void
 uipc_abort(struct socket *so)
@@ -147,83 +210,238 @@
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
-	UNP_LOCK();
+
+	UNP_GLOBAL_LOCK();

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200606090416.k594GB44017948>