From owner-p4-projects@FreeBSD.ORG Tue Jun 6 21:59:45 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id DDE6C16B618; Tue, 6 Jun 2006 21:48:18 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 12DB816D23C for ; Tue, 6 Jun 2006 21:37:07 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id B1EA643D53 for ; Tue, 6 Jun 2006 21:37:06 +0000 (GMT) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id k56LZGga033482 for ; Tue, 6 Jun 2006 21:35:17 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id k56LZGPI033479 for perforce@freebsd.org; Tue, 6 Jun 2006 21:35:16 GMT (envelope-from kmacy@freebsd.org) Date: Tue, 6 Jun 2006 21:35:16 GMT Message-Id: <200606062135.k56LZGPI033479@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 98698 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Jun 2006 21:59:46 -0000 http://perforce.freebsd.org/chv.cgi?CH=98698 Change 98698 by kmacy@kmacy_storage:sun4v_work_test on 2006/06/06 21:34:38 reduce idle thread contention by moving choosethread (largely) out from under sched_lock Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 edit .. //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 edit Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 (text+ko) ==== @@ -104,9 +104,7 @@ p = td->td_proc; #ifdef SMP mycpu = PCPU_GET(cpumask); - mtx_lock_spin(&sched_lock); - idle_cpus_mask |= mycpu; - mtx_unlock_spin(&sched_lock); + atomic_set_int(&idle_cpus_mask, mycpu); #endif for (;;) { mtx_assert(&Giant, MA_NOTOWNED); @@ -114,15 +112,21 @@ while (sched_runnable() == 0) cpu_idle(); - mtx_lock_spin(&sched_lock); + #ifdef SMP - idle_cpus_mask &= ~mycpu; + atomic_clear_int(&idle_cpus_mask, mycpu); #endif - if ((td = choosethread()) != curthread) - sched_switch(curthread, td, SW_VOL); + spinlock_enter(); /* avoid preemption after choosethread */ + if ((td = choosethread()) != curthread) { + mtx_lock_spin(&sched_lock); + spinlock_exit(); + sched_switch(curthread, td, SW_VOL); + mtx_unlock_spin(&sched_lock); + } else + spinlock_exit(); #ifdef SMP - idle_cpus_mask |= mycpu; + atomic_set_int(&idle_cpus_mask, mycpu); #endif - mtx_unlock_spin(&sched_lock); + } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 (text+ko) ==== @@ -47,6 +47,11 @@ #if defined(SMP) && defined(SCHED_4BSD) #include #endif +#ifndef SMP +#error "use SMP!" +#define runq_lock(a, b) +#define runq_unlock(a, b) +#endif /* Uncomment this to enable logging of critical_enter/exit. */ #if 0 @@ -330,9 +335,17 @@ rqb->rqb_bits[RQB_WORD(pri)], rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), RQB_BIT(pri), RQB_WORD(pri)); - rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); + atomic_clear_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri)); } +static __inline int +runq_isset(struct runq *rq, int pri) +{ + struct rqbits *rqb; + + rqb = &rq->rq_status; + return ((rqb->rqb_bits[RQB_WORD(pri)] & RQB_BIT(pri)) ? 1 : 0); +} /* * Find the index of the first non-empty run queue. This is done by * scanning the status bits, a set bit indicates a non-empty queue. @@ -343,11 +356,30 @@ struct rqbits *rqb; int pri; int i; - +#ifdef SMP + u_long lockbits; +#endif rqb = &rq->rq_status; for (i = 0; i < RQB_LEN; i++) if (rqb->rqb_bits[i]) { pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); +#ifdef SMP + lockbits = rq->rq_lockbits[i]; + if (!atomic_cmpset_acq_long(&rq->rq_lockbits[i], + (lockbits & ~RQB_BIT(pri)), + (lockbits | RQB_BIT(pri)))) + { + i = 0; + continue; + } + if (!runq_isset(rq, pri)) { + atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], + RQB_BIT(pri)); + i = 0; + continue; + } + runq_clrbit(rq, pri); +#endif CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", rqb->rqb_bits[i], i, pri); return (pri); @@ -370,7 +402,8 @@ rqb->rqb_bits[RQB_WORD(pri)], rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), RQB_BIT(pri), RQB_WORD(pri)); - rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); + /* XXX only works on 64-bit - 32 bit will need a mutex */ + atomic_set_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri)); } /* @@ -385,14 +418,18 @@ pri = ke->ke_thread->td_priority / RQ_PPQ; ke->ke_rqindex = pri; - runq_setbit(rq, pri); rqh = &rq->rq_queues[pri]; CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); + runq_lock(ke->ke_runq, ke); if (flags & SRQ_PREEMPTED) TAILQ_INSERT_HEAD(rqh, ke, ke_procq); else TAILQ_INSERT_TAIL(rqh, ke, ke_procq); + runq_unlock(ke->ke_runq, ke); +#ifndef SMP + runq_setbit(rq, pri); +#endif } /* @@ -423,6 +460,30 @@ SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); #endif +static struct kse * +runq_check_lastcpu(struct rqhead *rqh, int count) +{ + /* + * In the first couple of entries, check if + * there is one for our CPU as a preference. + */ + int cpu = PCPU_GET(cpuid); + struct kse *ke, *ke2; + ke2 = ke = TAILQ_FIRST(rqh); + + while (count-- && ke2) { + if (ke->ke_thread->td_lastcpu == cpu) { + ke = ke2; + break; + } + ke2 = TAILQ_NEXT(ke2, ke_procq); + } + KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); + CTR2(KTR_RUNQ, + "runq_choose: kse=%p rqh=%p", ke, rqh); + return (ke); +} + /* * Find the highest priority process on the run queue. */ @@ -433,31 +494,21 @@ struct kse *ke; int pri; - mtx_assert(&sched_lock, MA_OWNED); while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; #if defined(SMP) && defined(SCHED_4BSD) /* fuzz == 1 is normal.. 0 or less are ignored */ - if (runq_fuzz > 1) { - /* - * In the first couple of entries, check if - * there is one for our CPU as a preference. - */ - int count = runq_fuzz; - int cpu = PCPU_GET(cpuid); - struct kse *ke2; - ke2 = ke = TAILQ_FIRST(rqh); - - while (count-- && ke2) { - if (ke->ke_thread->td_lastcpu == cpu) { - ke = ke2; - break; - } - ke2 = TAILQ_NEXT(ke2, ke_procq); - } - } else + if (runq_fuzz > 1) + ke = runq_check_lastcpu(rqh, runq_fuzz); + else #endif ke = TAILQ_FIRST(rqh); + if (ke) { + runq_remove_unlocked(rq, ke); + runq_unlock(rq, ke); + } else + panic("bit set but runq empty for bit %d - lockbits=0x%lx availbits=0x%lx", + pri, rq->rq_lockbits[0], rq->rq_status.rqb_bits[0]); KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); CTR3(KTR_RUNQ, "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); @@ -465,7 +516,7 @@ } CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); - return (NULL); + return (NULL); } /* @@ -473,8 +524,8 @@ * corresponding status bit if the queue becomes empty. * Caller must set ke->ke_state afterwards. */ -void -runq_remove(struct runq *rq, struct kse *ke) +static __inline void +_runq_remove(struct runq *rq, struct kse *ke) { struct rqhead *rqh; int pri; @@ -487,12 +538,56 @@ ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); TAILQ_REMOVE(rqh, ke, ke_procq); +#ifndef SMP if (TAILQ_EMPTY(rqh)) { CTR0(KTR_RUNQ, "runq_remove: empty"); runq_clrbit(rq, pri); } +#endif } +void +runq_remove(struct runq *rq, struct kse *ke) +{ + runq_lock(rq, ke); + _runq_remove(rq, ke); + runq_unlock(rq, ke); +} + +void +runq_remove_unlocked(struct runq *rq, struct kse *ke) +{ + _runq_remove(rq, ke); +} + +#ifdef SMP +void +runq_lock(struct runq *rq, struct kse *ke) +{ + int pri; + u_long lockbits; + + pri = ke->ke_rqindex; + do { + lockbits = (rq->rq_lockbits[RQB_WORD(pri)] & ~RQB_BIT(pri)); + } while (!atomic_cmpset_acq_long(&rq->rq_lockbits[RQB_WORD(pri)], lockbits, + (lockbits | RQB_BIT(pri)))); + runq_clrbit(rq, pri); +} + +void +runq_unlock(struct runq *rq, struct kse *ke) +{ + struct rqhead *rqh; + int pri; + + pri = ke->ke_rqindex; + rqh = &rq->rq_queues[pri]; + if (!TAILQ_EMPTY(rqh)) + runq_setbit(rq, pri); + atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], RQB_BIT(pri)); +} +#endif /****** functions that are temporarily here ***********/ #include extern struct mtx kse_zombie_lock; ==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 (text+ko) ==== @@ -824,7 +824,16 @@ if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); } else { +#if 0 + spinlock_enter(); + mtx_unlock_spin(&sched_lock); +#endif newtd = choosethread(); +#if 0 + mtx_lock_spin(&sched_lock); + spinlock_exit(); +#endif + } if (td != newtd) { @@ -1110,14 +1119,15 @@ struct thread * sched_choose(void) { - struct kse *ke; + struct kse *ke, *kesel; struct runq *rq; + struct thread *td = NULL; #ifdef SMP struct kse *kecpu; rq = &runq; - ke = runq_choose(&runq); + kesel = ke = runq_choose(&runq); kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); if (ke == NULL || @@ -1125,26 +1135,27 @@ kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) { CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu, PCPU_GET(cpuid)); - ke = kecpu; + kesel = kecpu; rq = &runq_pcpu[PCPU_GET(cpuid)]; + if (ke) + runq_add(rq, ke, SRQ_PREEMPTED); } else { + if (kecpu) + runq_add(rq, kecpu, SRQ_PREEMPTED); CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke); } - #else rq = &runq; - ke = runq_choose(&runq); + kesel = ke = runq_choose(&runq); #endif + if (kesel) { + kesel->ke_state = KES_THREAD; - if (ke) { - runq_remove(rq, ke); - ke->ke_state = KES_THREAD; - KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, ("sched_choose: process swapped out")); - return (ke->ke_thread); + td = kesel->ke_thread; } - return (NULL); + return (td); } void ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 (text+ko) ==== @@ -400,6 +400,7 @@ { "vm page queue free mutex", &lock_class_mtx_spin }, { "icu", &lock_class_mtx_spin }, #ifdef SMP + { "runq lock", &lock_class_mtx_spin }, { "smp rendezvous", &lock_class_mtx_spin }, #if defined(__i386__) || defined(__amd64__) { "tlb", &lock_class_mtx_spin }, ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 (text+ko) ==== @@ -42,17 +42,19 @@ * Type of run queue status word. */ typedef u_int64_t rqb_word_t; +static int ffslut64[] = { + 64, 1, 48, 2, 57, 49, 28, 3, + 61, 58, 50, 42, 38, 29, 17, 4, + 62, 55, 59, 36, 53, 51, 43, 22, + 45, 39, 33, 30, 24, 18, 12, 5, + 63, 47, 56, 27, 60, 41, 37, 16, + 54, 35, 52, 21, 44, 32, 23, 11, + 46, 26, 40, 15, 34, 20, 31, 10, + 25, 14, 19, 9, 13, 8, 7, 6 +}; -static __inline u_long -ffs64(u_long mask) +static inline u_long ffs64(uint64_t mask) { - u_long bit; - - if (mask == 0) - return (0); - for (bit = 1; (mask & 1UL) == 0; bit++) - mask >>= 1UL; - return (bit); + return mask ? ffslut64[((mask & (-mask)) * 0x07EF3AE369961512) >> 58] : 0; } - #endif ==== //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 (text+ko) ==== @@ -59,6 +59,7 @@ */ struct runq { struct rqbits rq_status; + rqb_word_t rq_lockbits[RQB_LEN]; struct rqhead rq_queues[RQ_NQS]; }; @@ -67,5 +68,8 @@ struct kse *runq_choose(struct runq *); void runq_init(struct runq *); void runq_remove(struct runq *, struct kse *); +void runq_remove_unlocked(struct runq *, struct kse *); +void runq_lock(struct runq *, struct kse *); +void runq_unlock(struct runq *, struct kse *); #endif