Date: Fri, 9 Jun 2006 04:16:11 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 98846 for review Message-ID: <200606090416.k594GB44017948@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=98846 Change 98846 by kmacy@kmacy_storage:sun4v_work on 2006/06/09 04:15:34 eliminate sched_lock acquisition from the common case code paths in the timer interrupt handler Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/vfs_vnops.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/posix4/ksched.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/security/mac_lomac/mac_lomac.c#3 edit .. //depot/projects/kmacy_sun4v/src/sys/vm/vm_glue.c#5 edit Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 (text+ko) ==== @@ -746,9 +746,7 @@ PROC_UNLOCK(initproc); crfree(oldcred); cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); - mtx_lock_spin(&sched_lock); - initproc->p_sflag |= PS_INMEM; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&initproc->p_sflag, PS_INMEM); cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL) ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 (text+ko) ==== @@ -196,29 +196,30 @@ struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; + int sflag = 0; /* * Run current process's virtual and profile time, as needed. */ - mtx_lock_spin_flags(&sched_lock, MTX_QUIET); + mtx_lock_spin_flags(&timer_lock, MTX_QUIET); pstats = p->p_stats; if (usermode && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { - p->p_sflag |= PS_ALRMPEND; - td->td_flags |= TDF_ASTPENDING; - } + timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) + sflag = PS_ALRMPEND; if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { - p->p_sflag |= PS_PROFPEND; - td->td_flags |= TDF_ASTPENDING; - } - mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); - + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) + sflag = PS_PROFPEND; + mtx_unlock_spin_flags(&timer_lock, MTX_QUIET); #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); #endif + if (!sflag) + return; + + atomic_set_int(&p->p_sflag, sflag); + atomic_set_int(&td->td_flags, TDF_ASTPENDING); } /* @@ -404,7 +405,6 @@ td = curthread; p = td->td_proc; - mtx_lock_spin_flags(&sched_lock, MTX_QUIET); if (usermode) { /* * Charge the time as appropriate. @@ -456,7 +456,7 @@ rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; - mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); + } void @@ -536,7 +536,7 @@ /* * Handle a watchdog timeout by dumping interrupt information and - * then either dropping to DDB or panicing. + * then either dropping to DDB or panicking. */ static void watchdog_fire(void) ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 (text+ko) ==== @@ -557,9 +557,7 @@ } #endif #endif - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_INPANIC; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, TDF_INPANIC); if (!sync_on_panic) bootopt |= RB_NOSYNC; boot(bootopt); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 (text+ko) ==== @@ -584,9 +584,7 @@ if (! SIGISEMPTY(set)) sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set); if (SIGPENDING(td)) { - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, (TDF_NEEDSIGCHK|TDF_ASTPENDING)); } } @@ -2361,7 +2359,7 @@ thread_suspend_one(td2); } else { if (sending || td != td2) - td2->td_flags |= TDF_ASTPENDING; + atomic_set_int(&td2->td_flags, TDF_ASTPENDING); #ifdef SMP if (TD_IS_RUNNING(td2) && td2 != td) forward_signal(td2); @@ -2379,15 +2377,11 @@ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.mtx_object, "Stopping for traced signal"); - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_XSIG; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, TDF_XSIG); td->td_xsig = sig; while ((p->p_flag & P_TRACED) && (td->td_flags & TDF_XSIG)) { if (p->p_flag & P_SINGLE_EXIT) { - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_XSIG; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_XSIG); return (sig); } /* ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 (text+ko) ==== @@ -99,7 +99,7 @@ /* Shutting down, run idlethread on AP's */ td = PCPU_GET(idlethread); CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); - td->td_kse->ke_flags |= KEF_DIDRUN; + atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN); TD_SET_RUNNING(td); return (td); } @@ -115,7 +115,7 @@ td = PCPU_GET(idlethread); CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); } - td->td_kse->ke_flags |= KEF_DIDRUN; + atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN); /* * If we are in panic, only allow system threads, ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 (text+ko) ==== @@ -393,8 +393,8 @@ */ if (p->p_cpulimit != RLIM_INFINITY && p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) { - p->p_sflag |= PS_XCPU; - td->td_flags |= TDF_ASTPENDING; + atomic_set_int(&p->p_sflag, PS_XCPU); + atomic_set_int(&td->td_flags, TDF_ASTPENDING); } /* @@ -474,7 +474,7 @@ } if ((p->p_sflag & PS_INMEM) == 0) { if ((p->p_sflag & PS_SWAPPINGIN) == 0) { - p->p_sflag |= PS_SWAPINREQ; + atomic_set_int(&p->p_sflag, PS_SWAPINREQ); /* * due to a LOR between sched_lock and * the sleepqueue chain locks, use ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 (text+ko) ==== @@ -46,6 +46,7 @@ #include <sys/limits.h> #include <machine/frame.h> +#include "opt_global.h" extern int max_threads_per_proc; extern int max_groups_per_proc; @@ -88,6 +89,19 @@ return (EINVAL); if ((error = copyin(uap->param, ¶m, sizeof(param)))) return (error); +#ifndef __NO_STRICT_ALIGNMENT +#ifdef SUN4V + if ((param.stack_size & (64-1)) != 0) + return (EINVAL); + if (((u_long)param.stack_base & (64-1)) != 0) + return (EINVAL); +#else + if ((param.stack_size & (sizeof(void *)-1)) != 0) + return (EINVAL); + if (((u_long)param.stack_base & (sizeof(void *)-1)) != 0) + return (EINVAL); +#endif +#endif error = create_thread(td, NULL, param.start_func, param.arg, param.stack_base, param.stack_size, param.tls_base, param.child_tid, param.parent_tid, param.flags); @@ -301,9 +315,7 @@ error = msleep((void *)td, &td->td_proc->p_mtx, PCATCH, "lthr", hz); if (td->td_flags & TDF_THRWAKEUP) { - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_THRWAKEUP; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_THRWAKEUP); PROC_UNLOCK(td->td_proc); return (0); } @@ -331,9 +343,7 @@ PROC_UNLOCK(p); return (ESRCH); } - mtx_lock_spin(&sched_lock); - ttd->td_flags |= TDF_THRWAKEUP; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&ttd->td_flags, TDF_THRWAKEUP); wakeup((void *)ttd); PROC_UNLOCK(p); return (0); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 (text+ko) ==== @@ -544,12 +544,12 @@ FOREACH_THREAD_IN_PROC(p, td2) { if (td2 == td) continue; - td2->td_flags |= TDF_ASTPENDING; + atomic_set_int(&td2->td_flags, TDF_ASTPENDING); if (TD_IS_INHIBITED(td2)) { switch (mode) { case SINGLE_EXIT: if (td->td_flags & TDF_DBSUSPEND) - td->td_flags &= ~TDF_DBSUSPEND; + atomic_clear_int(&td->td_flags, TDF_DBSUSPEND); if (TD_IS_SUSPENDED(td2)) thread_unsuspend_one(td2); if (TD_ON_SLEEPQ(td2) && @@ -717,7 +717,7 @@ thread_suspend_one(td); if (return_instead == 0) { p->p_boundary_count++; - td->td_flags |= TDF_BOUNDARY; + atomic_set_int(&td->td_flags, TDF_BOUNDARY); } if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { if (p->p_numthreads == p->p_suspcount) @@ -727,7 +727,7 @@ mi_switch(SW_INVOL, NULL); if (return_instead == 0) { p->p_boundary_count--; - td->td_flags &= ~TDF_BOUNDARY; + atomic_clear_int(&td->td_flags, TDF_BOUNDARY); } mtx_unlock_spin(&sched_lock); PROC_LOCK(p); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 (text+ko) ==== @@ -588,9 +588,9 @@ timevalsub(&aitv->it_value, &ctv); } } else { - mtx_lock_spin(&sched_lock); + mtx_lock_spin(&timer_lock); *aitv = p->p_stats->p_timer[which]; - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&timer_lock); } return (0); } @@ -663,10 +663,10 @@ timevalsub(&oitv->it_value, &ctv); } } else { - mtx_lock_spin(&sched_lock); + mtx_lock_spin(&timer_lock); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&timer_lock); } return (0); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 (text+ko) ==== @@ -259,7 +259,7 @@ mtx_assert(&sched_lock, MA_OWNED); if (td->td_priority < curthread->td_priority) - curthread->td_flags |= TDF_NEEDRESCHED; + atomic_set_int(&curthread->td_flags, TDF_NEEDRESCHED); } /* @@ -408,14 +408,14 @@ */ if (ke->ke_state == KES_ONRUNQ) { awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; + atomic_clear_int(&ke->ke_flags, KEF_DIDRUN); } else if ((ke->ke_state == KES_THREAD) && (TD_IS_RUNNING(td))) { awake = 1; /* Do not clear KEF_DIDRUN */ } else if (ke->ke_flags & KEF_DIDRUN) { awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; + atomic_clear_int(&ke->ke_flags, KEF_DIDRUN); } /* @@ -626,14 +626,15 @@ { struct kse *ke; - mtx_assert(&sched_lock, MA_OWNED); ke = td->td_kse; - ke->ke_cpticks++; + atomic_add_int(&ke->ke_cpticks, 1); td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { + mtx_lock_spin(&sched_lock); resetpriority(td); resetpriority_thread(td); + mtx_unlock_spin(&sched_lock); } } @@ -709,7 +710,7 @@ sched_lend_prio(struct thread *td, u_char prio) { - td->td_flags |= TDF_BORROWING; + atomic_set_int(&td->td_flags, TDF_BORROWING); sched_priority(td, prio); } @@ -732,7 +733,7 @@ else base_pri = td->td_base_pri; if (prio >= base_pri) { - td->td_flags &= ~TDF_BORROWING; + atomic_clear_int(&td->td_flags, TDF_BORROWING); sched_prio(td, base_pri); } else sched_lend_prio(td, prio); @@ -778,19 +779,22 @@ { struct kse *ke; struct proc *p; + struct thread *choosetd; ke = td->td_kse; p = td->td_proc; + choosetd = NULL; - mtx_assert(&sched_lock, MA_OWNED); + if (newtd == NULL) + choosetd = choosethread(); if ((p->p_flag & P_NOLOAD) == 0) sched_load_rem(); if (newtd) - newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); + atomic_set_int(&newtd->td_flags, (td->td_flags & TDF_NEEDRESCHED)); td->td_lastcpu = td->td_oncpu; - td->td_flags &= ~TDF_NEEDRESCHED; + atomic_clear_int(&td->td_flags, TDF_NEEDRESCHED); td->td_owepreempt = 0; td->td_oncpu = NOCPU; /* @@ -819,21 +823,12 @@ */ KASSERT((newtd->td_inhibitors == 0), ("trying to run inhibitted thread")); - newtd->td_kse->ke_flags |= KEF_DIDRUN; + atomic_set_int(&newtd->td_kse->ke_flags, KEF_DIDRUN); TD_SET_RUNNING(newtd); if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); } else { -#if 0 - spinlock_enter(); - mtx_unlock_spin(&sched_lock); -#endif - newtd = choosethread(); -#if 0 - mtx_lock_spin(&sched_lock); - spinlock_exit(); -#endif - + newtd = choosetd; } if (td != newtd) { @@ -948,8 +943,6 @@ ipi_selected(map, IPI_AST); return (1); } - if (cpunum == NOCPU) - printf("forward_wakeup: Idle processor not found\n"); return (0); } #endif @@ -982,7 +975,7 @@ } #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ - pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; + atomic_set_int(&pcpu->pc_curthread->td_flags, TDF_NEEDRESCHED); ipi_selected( pcpu->pc_cpumask , IPI_AST); return; } @@ -1175,7 +1168,7 @@ KASSERT(TD_IS_RUNNING(td), ("sched_bind: cannot bind non-running thread")); ke = td->td_kse; - ke->ke_flags |= KEF_BOUND; + atomic_set_int(&ke->ke_flags, KEF_BOUND); #ifdef SMP ke->ke_runq = &runq_pcpu[cpu]; if (PCPU_GET(cpuid) == cpu) @@ -1189,15 +1182,13 @@ sched_unbind(struct thread* td) { - mtx_assert(&sched_lock, MA_OWNED); - td->td_kse->ke_flags &= ~KEF_BOUND; + atomic_clear_int(&td->td_kse->ke_flags, KEF_BOUND); } int sched_is_bound(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); return (td->td_kse->ke_flags & KEF_BOUND); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 (text+ko) ==== @@ -484,9 +484,7 @@ td->td_profil_addr = pc; td->td_profil_ticks = ticks; td->td_pflags |= TDP_OWEUPC; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, TDF_ASTPENDING); } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 (text+ko) ==== @@ -315,14 +315,13 @@ } TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq); td->td_sleepqueue = NULL; - mtx_lock_spin(&sched_lock); td->td_wchan = wchan; td->td_wmesg = wmesg; if (flags & SLEEPQ_INTERRUPTIBLE) { - td->td_flags |= TDF_SINTR; - td->td_flags &= ~TDF_SLEEPABORT; + atomic_set_int(&td->td_flags, TDF_SINTR); + atomic_clear_int(&td->td_flags, TDF_SLEEPABORT); } - mtx_unlock_spin(&sched_lock); + } /* @@ -468,7 +467,7 @@ * If TDF_TIMEOUT is set, we timed out. */ if (td->td_flags & TDF_TIMEOUT) { - td->td_flags &= ~TDF_TIMEOUT; + atomic_clear_int(&td->td_flags, TDF_TIMEOUT); return (EWOULDBLOCK); } @@ -477,15 +476,16 @@ * already been woken up. */ if (td->td_flags & TDF_TIMOFAIL) - td->td_flags &= ~TDF_TIMOFAIL; - + atomic_clear_int(&td->td_flags, TDF_TIMOFAIL); + /* * If callout_stop() fails, then the timeout is running on * another CPU, so synchronize with it to avoid having it * accidentally wake up a subsequent sleep. */ else if (callout_stop(&td->td_slpcallout) == 0) { - td->td_flags |= TDF_TIMEOUT; + atomic_set_int(&td->td_flags, TDF_TIMEOUT); + mtx_lock_spin(&sched_lock); TD_SET_SLEEPING(td); mi_switch(SW_INVOL, NULL); } @@ -500,15 +500,14 @@ { struct thread *td; - mtx_assert(&sched_lock, MA_OWNED); td = curthread; /* We are no longer in an interruptible sleep. */ if (td->td_flags & TDF_SINTR) - td->td_flags &= ~TDF_SINTR; + atomic_clear_int(&td->td_flags, TDF_SINTR); if (td->td_flags & TDF_SLEEPABORT) { - td->td_flags &= ~TDF_SLEEPABORT; + atomic_clear_int(&td->td_flags, TDF_SLEEPABORT); return (td->td_intrval); } @@ -547,7 +546,7 @@ else sleepq_release(wchan); rval = sleepq_check_signals(); - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&sched_lock); if (rcatch) return (rcatch); return (rval); @@ -632,7 +631,7 @@ td->td_wmesg = NULL; td->td_wchan = NULL; - td->td_flags &= ~TDF_SINTR; + atomic_clear_int(&td->td_flags, TDF_SINTR); /* * Note that thread td might not be sleeping if it is running @@ -759,7 +758,7 @@ if (TD_ON_SLEEPQ(td)) { MPASS(td->td_wchan == wchan); MPASS(sq != NULL); - td->td_flags |= TDF_TIMEOUT; + atomic_set_int(&td->td_flags, TDF_TIMEOUT); sleepq_resume_thread(sq, td, -1); mtx_unlock_spin(&sched_lock); sleepq_release(wchan); @@ -778,11 +777,11 @@ */ if (td->td_flags & TDF_TIMEOUT) { MPASS(TD_IS_SLEEPING(td)); - td->td_flags &= ~TDF_TIMEOUT; + atomic_clear_int(&td->td_flags, TDF_TIMEOUT); TD_CLR_SLEEPING(td); setrunnable(td); } else - td->td_flags |= TDF_TIMOFAIL; + atomic_set_int(&td->td_flags, TDF_TIMOFAIL); mtx_unlock_spin(&sched_lock); } @@ -846,7 +845,7 @@ wchan = td->td_wchan; if (wchan != NULL) { td->td_intrval = intrval; - td->td_flags |= TDF_SLEEPABORT; + atomic_set_int(&td->td_flags, TDF_SLEEPABORT); } mtx_unlock_spin(&sched_lock); sleepq_remove(td, wchan); ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 (text+ko) ==== @@ -202,7 +202,7 @@ id = pc->pc_cpumask; if (id != me && (id & stopped_cpus) == 0 && td != pc->pc_idlethread) { - td->td_flags |= TDF_NEEDRESCHED; + atomic_set_int(&td->td_flags, TDF_NEEDRESCHED); map |= id; } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 (text+ko) ==== @@ -77,7 +77,7 @@ userret(struct thread *td, struct trapframe *frame) { struct proc *p = td->td_proc; - + CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); #ifdef DIAGNOSTIC @@ -150,6 +150,7 @@ td = curthread; p = td->td_proc; + CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); @@ -166,19 +167,18 @@ * AST's saved in sflag, the astpending flag will be set and * ast() will be called again. */ - mtx_lock_spin(&sched_lock); + flags = td->td_flags; sflag = p->p_sflag; if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU)) - p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU); + atomic_clear_int(&p->p_sflag, (PS_ALRMPEND | PS_PROFPEND | PS_XCPU)); #ifdef MAC if (p->p_sflag & PS_MACPEND) - p->p_sflag &= ~PS_MACPEND; + atomic_clear_int(&p->p_sflag, PS_MACPEND); #endif - td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | - TDF_NEEDRESCHED | TDF_INTERRUPT); cnt.v_soft++; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, (TDF_ASTPENDING | TDF_NEEDSIGCHK | + TDF_NEEDRESCHED | TDF_INTERRUPT)); /* * XXXKSE While the fact that we owe a user profiling ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 (text+ko) ==== @@ -640,15 +640,13 @@ td->td_turnstile = NULL; mtx_unlock_spin(&tc->tc_lock); - mtx_lock_spin(&sched_lock); /* * Handle race condition where a thread on another CPU that owns * lock 'lock' could have woken us in between us dropping the * turnstile chain lock and acquiring the sched_lock. */ if (td->td_flags & TDF_TSNOBLOCK) { - td->td_flags &= ~TDF_TSNOBLOCK; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_TSNOBLOCK); return; } @@ -668,7 +666,7 @@ } } #endif - + mtx_lock_spin(&sched_lock); /* Save who we are blocked on and switch. */ td->td_tsqueue = queue; td->td_blocked = ts; @@ -871,7 +869,7 @@ MPASS(TD_CAN_RUN(td)); setrunqueue(td, SRQ_BORING); } else { - td->td_flags |= TDF_TSNOBLOCK; + atomic_set_int(&td->td_flags, TDF_TSNOBLOCK); MPASS(TD_IS_RUNNING(td) || TD_ON_RUNQ(td)); } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 (text+ko) ==== @@ -755,9 +755,7 @@ mtx_lock(&sellock); retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, TDF_SELECT); mtx_unlock(&sellock); error = selscan(td, ibits, obits, nd); @@ -797,9 +795,7 @@ done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_SELECT); mtx_unlock(&sellock); done_nosellock: @@ -935,9 +931,7 @@ mtx_lock(&sellock); retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td->td_flags, TDF_SELECT); mtx_unlock(&sellock); error = pollscan(td, bits, nfds); @@ -958,12 +952,9 @@ * sellock, so check TDF_SELECT and the number of collisions * and rescan the file descriptors if necessary. */ - mtx_lock_spin(&sched_lock); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); goto retry; } - mtx_unlock_spin(&sched_lock); if (timo > 0) error = cv_timedwait_sig(&selwait, &sellock, timo); @@ -975,9 +966,7 @@ done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_SELECT); mtx_unlock(&sellock); done_nosellock: @@ -1150,9 +1139,7 @@ } TAILQ_REMOVE(&td->td_selq, sip, si_thrlist); sip->si_thread = NULL; - mtx_lock_spin(&sched_lock); - td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td->td_flags, TDF_SELECT); sleepq_remove(td, &selwait); mtx_unlock(&sellock); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 (text+ko) ==== @@ -708,15 +708,11 @@ break; case PT_SUSPEND: - mtx_lock_spin(&sched_lock); - td2->td_flags |= TDF_DBSUSPEND; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&td2->td_flags, TDF_DBSUSPEND); break; case PT_RESUME: - mtx_lock_spin(&sched_lock); - td2->td_flags &= ~TDF_DBSUSPEND; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td2->td_flags, TDF_DBSUSPEND); break; case PT_STEP: @@ -787,9 +783,7 @@ proctree_locked = 0; } /* deliver or queue signal */ - mtx_lock_spin(&sched_lock); - td2->td_flags &= ~TDF_XSIG; - mtx_unlock_spin(&sched_lock); + atomic_clear_int(&td2->td_flags, TDF_XSIG); td2->td_xsig = data; p->p_xstat = data; p->p_xthread = NULL; @@ -798,7 +792,8 @@ if (req == PT_DETACH) { struct thread *td3; FOREACH_THREAD_IN_PROC(p, td3) - td3->td_flags &= ~TDF_DBSUSPEND; + atomic_clear_int(&td3->td_flags, TDF_DBSUSPEND); + } /* * unsuspend all threads, to not let a thread run, ==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 (text+ko) ==== @@ -63,7 +63,7 @@ .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, - .fo_flags = DFLAG_PASSABLE + .fo_flags = DFLAG_PASSABLE | DFLAG_MPSAFE }; /* ARGSUSED */ ==== //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 (text+ko) ==== @@ -88,32 +88,99 @@ struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); /* - * Currently, UNIX domain sockets are protected by a single subsystem lock, - * which covers global data structures and variables, the contents of each - * per-socket unpcb structure, and the so_pcb field in sockets attached to - * the UNIX domain. This provides for a moderate degree of paralellism, as - * receive operations on UNIX domain sockets do not need to acquire the - * subsystem lock. Finer grained locking to permit send() without acquiring - * a global lock would be a logical next step. + * Both send and receive buffers are allocated PIPSIZ bytes of buffering + * for stream sockets, although the total for sender and receiver is + * actually only PIPSIZ. + * Datagram sockets really use the sendspace as the maximum datagram size, + * and don't really want to reserve the sendspace. Their recvspace should + * be large enough for at least one max-size datagram plus address. + */ +#ifndef PIPSIZ +#define PIPSIZ 8192 +#endif +static u_long unpst_sendspace = PIPSIZ; +static u_long unpst_recvspace = PIPSIZ; +static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ +static u_long unpdg_recvspace = 4*1024; + +static int unp_rights; /* file descriptors in flight */ + +SYSCTL_DECL(_net_local_stream); +SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, + &unpst_sendspace, 0, ""); +SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, + &unpst_recvspace, 0, ""); +SYSCTL_DECL(_net_local_dgram); +SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, + &unpdg_sendspace, 0, ""); +SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, + &unpdg_recvspace, 0, ""); +SYSCTL_DECL(_net_local); +SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); + +/* + * Locking and synchronization: + * + * A global UNIX domain socket mutex protects all global variables in the + * implementation, as well as the linked lists tracking the set of allocated + * UNIX domain sockets. These variables/fields may be read lockless using + * atomic operations if stale values are permissible; otherwise the global + * mutex is required to read or read-modify-write. The global mutex also + * serves to prevent deadlock when multiple PCB locks may be acquired at once + * (see below). Finally, the global mutex protects uncounted references from + * vnodes to sockets bound to those vnodes: to safely dereference the + * v_socket pointer, the global mutex must be held while a full reference is + * acquired. + * + * UNIX domain sockets each have one unpcb PCB associated with them from + * pru_attach() to pru_detach() via the so_pcb pointer. The validity of that + * reference is an invariant for the lifetime of the socket, so no lock is + * required to dereference the so_pcb pointer if a valid socket reference is + * held. + * + * Each PCB has a back-pointer to its socket, unp_socket. This pointer may + * only be safely dereferenced as long as a valid reference to the PCB is + * held. Typically, this reference will be from the socket, or from another + * PCB when the referring PCB's lock is held (in order that the reference not + * be invalidated during use). In particular, to follow + * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn. + * + * Fields of PCBs are locked using a per-unpcb lock, unp_mtx. Individual + * atomic reads without the lock may be performed "lockless", but more + * complex reads and read-modify-writes require the mutex to be held. No + * lock order is defined between PCB locks -- multiple PCB locks may be + * acquired at the same time only when holding the global UNIX domain socket + * mutex, which prevents deadlocks. To prevent inter-PCB references from + * becoming invalid, the lock protecting the reference must be held for the + * lifetime of use of the reference. * - * The UNIX domain socket lock preceds all socket layer locks, including the - * socket lock and socket buffer lock, permitting UNIX domain socket code to - * call into socket support routines without releasing its locks. + * Blocking with UNIX domain sockets is a tricky issue: unlike most network + * protocols, bind() is a non-atomic operation, and connect() requires + * potential sleeping in the protocol, due to potentially waiting on local or + * distributed file systems. We try to separate "lookup" operations, which + * may sleep, and the IPC operations themselves, which typically can occur + * with relative atomicity as locks can be held over the entire operation. * - * Some caution is required in areas where the UNIX domain socket code enters - * VFS in order to create or find rendezvous points. This results in - * dropping of the UNIX domain socket subsystem lock, acquisition of the - * Giant lock, and potential sleeping. This increases the chances of races, - * and exposes weaknesses in the socket->protocol API by offering poor - * failure modes. + * Another tricky issue is simultaneous multi-threaded or multi-process + * access to a single UNIX domain socket. These are handled by the flags + * UNP_CONNECTING and UNP_BINDING. */ -static struct mtx unp_mtx; -#define UNP_LOCK_INIT() \ - mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) -#define UNP_LOCK() mtx_lock(&unp_mtx) -#define UNP_UNLOCK() mtx_unlock(&unp_mtx) -#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) -#define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED) +static struct mtx unp_global_mtx; + +#define UNP_GLOBAL_LOCK_INIT() mtx_init(&unp_global_mtx, \ + "unp_global_mtx", NULL, MTX_DEF) +#define UNP_GLOBAL_LOCK() mtx_lock(&unp_global_mtx) +#define UNP_GLOBAL_UNLOCK() mtx_unlock(&unp_global_mtx) +#define UNP_GLOBAL_UNLOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_NOTOWNED) +#define UNP_GLOBAL_LOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_OWNED) + +#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ + "unp_mtx", "unp_mtx", \ + MTX_DUPOK|MTX_DEF|MTX_RECURSE) +#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) +#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) +#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) +#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) /* * Garbage collection of cyclic file descriptor/socket references occurs @@ -123,12 +190,10 @@ */ static struct task unp_gc_task; -static int unp_attach(struct socket *); static void unp_detach(struct unpcb *); -static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); static int unp_connect(struct socket *,struct sockaddr *, struct thread *); static int unp_connect2(struct socket *so, struct socket *so2, int); -static void unp_disconnect(struct unpcb *); +static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); @@ -137,8 +202,6 @@ static void unp_discard(struct file *); static void unp_freerights(struct file **, int); static int unp_internalize(struct mbuf **, struct thread *); -static int unp_listen(struct socket *, struct unpcb *, int, - struct thread *); static void uipc_abort(struct socket *so) @@ -147,83 +210,238 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200606090416.k594GB44017948>