Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 27 Jan 2012 20:30:48 -0500
From:      Ryan Stone <rysto32@gmail.com>
To:        freebsd-hackers@freebsd.org
Subject:   Kernel threads inherit CPU affinity from random sibling
Message-ID:  <CAFMmRNxF1uMOr39BbZkpPN=uM7G09dtcckAYw8ag6n6bi=FeOw@mail.gmail.com>

next in thread | raw e-mail | index | archive | help
Right now, whenever a thread is spawned, it inherits CPU affinity from
its "parent" thread.  I put parent in scare quotes because as far as I
can tell, for a kernel thread the parent is essentially chosen
arbitrarily (it looks like it is the most recent thread spawned in
that kernel process).  Inheriting affinity is arguably correct for
userland threads (at the very least, an explicit design decision to
implement inheritance was clearly implemented).  However for kernel
threads, this behaviour leads to bizarre results that clearly weren't
intended.  For example, I added a printf to sched_fork_thread() that
prints a message every time a thread inherits affinity from its
"parent".  Here's the results from booting a dual-core VM with
net.isr.bindthreads=1:

Thread 100006 inheriting affinity from parent swi1: netisr 0(100005)
Thread 100007 inheriting affinity from parent swi3: vm(100006)
Thread 100008 inheriting affinity from parent swi4: clock(100007)
Thread 100014 inheriting affinity from parent swi4: clock(100008)
Thread 100017 inheriting affinity from parent swi5: +(100014)
Thread 100018 inheriting affinity from parent swi6: Giant taskq(100017)
Thread 100022 inheriting affinity from parent swi6: task queue(100018)
Thread 100025 inheriting affinity from parent swi2: cambio(100022)
Thread 100026 inheriting affinity from parent irq14: ata0(100025)
Thread 100027 inheriting affinity from parent irq15: ata1(100026)
Thread 100028 inheriting affinity from parent irq19: le0(100027)
Thread 100029 inheriting affinity from parent irq21: pcm0(100028)
Thread 100034 inheriting affinity from parent irq22: ohci0(100029)
Thread 100035 inheriting affinity from parent irq1: atkbd0(100034)

The result is that every thread in intr kernel process ends up
inheriting the affinity(my favourite part is that both softclock
threads are now forced to fight over the same CPU).  I am working on
the following patch that adds a flag to
sched_fork()/sched_fork_thread() that can be passed to force the child
thread to *not* inherit affinity.  However, this doesn't address
affinity inheritance from calls to kproc_create().  I suppose that I
could push the flag up into fork1(), but I'd prefer not to.  I am
wondering if it even makes sense for affinity to be inherited from a
parent process even for userland processes.

What are peoples thoughts?  Is this the right approach?

[rstone@pcbsd-1039 ~/freebsd/head]$ svn diff
Index: sys/sys/sched.h
===================================================================
--- sys/sys/sched.h     (revision 230616)
+++ sys/sys/sched.h     (working copy)
@@ -80,7 +80,7 @@
  * Proc related scheduling hooks.
  */
 void   sched_exit(struct proc *p, struct thread *childtd);
-void   sched_fork(struct thread *td, struct thread *childtd);
+void   sched_fork(struct thread *td, struct thread *childtd, int flags);
 void   sched_fork_exit(struct thread *td);
 void   sched_class(struct thread *td, int class);
 void   sched_nice(struct proc *p, int nice);
@@ -90,7 +90,7 @@
  * priorities inherited from their procs, and use up cpu time.
  */
 void   sched_exit_thread(struct thread *td, struct thread *child);
-void   sched_fork_thread(struct thread *td, struct thread *child);
+void   sched_fork_thread(struct thread *td, struct thread *child, int flags);
 void   sched_lend_prio(struct thread *td, u_char prio);
 void   sched_lend_user_prio(struct thread *td, u_char pri);
 fixpt_t        sched_pctcpu(struct thread *td);
@@ -105,6 +105,13 @@
 void   sched_preempt(struct thread *td);

 /*
+ * Flags for sched_fork() and sched_fork_thread().
+ */
+
+/* New child thread will not inherit affinity of parent. */
+#define SCHED_NO_AFFINITY 0x01
+
+/*
  * Threads are moved on and off of run queues
  */
 void   sched_add(struct thread *td, int flags);
Index: sys/kern/sched_ule.c
===================================================================
--- sys/kern/sched_ule.c        (revision 230616)
+++ sys/kern/sched_ule.c        (working copy)
@@ -1951,10 +1951,10 @@
  * priority.
  */
 void
-sched_fork(struct thread *td, struct thread *child)
+sched_fork(struct thread *td, struct thread *child, int flags)
 {
        THREAD_LOCK_ASSERT(td, MA_OWNED);
-       sched_fork_thread(td, child);
+       sched_fork_thread(td, child, flags);
        /*
         * Penalize the parent and child for forking.
         */
@@ -1969,7 +1969,7 @@
  * Fork a new thread, may be within the same process.
  */
 void
-sched_fork_thread(struct thread *td, struct thread *child)
+sched_fork_thread(struct thread *td, struct thread *child, int flags)
 {
        struct td_sched *ts;
        struct td_sched *ts2;
@@ -2004,6 +2004,9 @@
 #ifdef KTR
        bzero(ts2->ts_name, sizeof(ts2->ts_name));
 #endif
+
+       if (flags & SCHED_NO_AFFINITY)
+               cpuset_setthread(child->td_tid, cpuset_root);
 }

 /*
Index: sys/kern/kern_thr.c
===================================================================
--- sys/kern/kern_thr.c (revision 230616)
+++ sys/kern/kern_thr.c (working copy)
@@ -257,7 +257,7 @@
        bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
        thread_lock(td);
        /* let the scheduler know about these things. */
-       sched_fork_thread(td, newtd);
+       sched_fork_thread(td, newtd, 0);
        thread_unlock(td);
        if (P_SHOULDSTOP(p))
                newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
Index: sys/kern/sched_4bsd.c
===================================================================
--- sys/kern/sched_4bsd.c       (revision 230616)
+++ sys/kern/sched_4bsd.c       (working copy)
@@ -58,6 +58,9 @@
 #include <machine/pcb.h>
 #include <machine/smp.h>

+
+#include <sys/kdb.h>
+
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
@@ -743,13 +746,13 @@
 }

 void
-sched_fork(struct thread *td, struct thread *childtd)
+sched_fork(struct thread *td, struct thread *childtd, int flags)
 {
-       sched_fork_thread(td, childtd);
+       sched_fork_thread(td, childtd, flags);
 }

 void
-sched_fork_thread(struct thread *td, struct thread *childtd)
+sched_fork_thread(struct thread *td, struct thread *childtd, int flags)
 {
        struct td_sched *ts;

@@ -759,7 +762,10 @@
        childtd->td_priority = childtd->td_base_pri;
        ts = childtd->td_sched;
        bzero(ts, sizeof(*ts));
-       ts->ts_flags |= (td->td_sched->ts_flags & TSF_AFFINITY);
+       if (flags & SCHED_NO_AFFINITY)
+               cpuset_setthread(childtd->td_tid, cpuset_root);
+       else
+               ts->ts_flags |= (td->td_sched->ts_flags & TSF_AFFINITY);
 }

 void
@@ -1613,7 +1619,6 @@
        return (td->td_name);
 #endif
 }
-
 void
 sched_affinity(struct thread *td)
 {
@@ -1642,6 +1647,10 @@
        if (!(ts->ts_flags & TSF_AFFINITY))
                return;

+/*
+       printf("Thread %s(%d) having affinity applied.\n",
td->td_name, td->td_tid);
+       kdb_backtrace();*/
+
        /* Pinned threads and bound threads should be left alone. */
        if (td->td_pinned != 0 || td->td_flags & TDF_BOUND)
                return;
Index: sys/kern/kern_kthread.c
===================================================================
--- sys/kern/kern_kthread.c     (revision 230616)
+++ sys/kern/kern_kthread.c     (working copy)
@@ -29,6 +29,7 @@

 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/cpuset.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -292,7 +293,7 @@
        thread_link(newtd, p);
        thread_lock(oldtd);
        /* let the scheduler know about these things. */
-       sched_fork_thread(oldtd, newtd);
+       sched_fork_thread(oldtd, newtd, SCHED_NO_AFFINITY);
        TD_SET_CAN_RUN(newtd);
        thread_unlock(oldtd);
        PROC_UNLOCK(p);
Index: sys/kern/kern_intr.c
===================================================================
--- sys/kern/kern_intr.c        (revision 230616)
+++ sys/kern/kern_intr.c        (working copy)
@@ -456,6 +456,7 @@
        thread_lock(td);
        sched_class(td, PRI_ITHD);
        TD_SET_IWAIT(td);
+
        thread_unlock(td);
        td->td_pflags |= TDP_ITHREAD;
        ithd->it_thread = td;
Index: sys/kern/kern_fork.c
===================================================================
--- sys/kern/kern_fork.c        (revision 230616)
+++ sys/kern/kern_fork.c        (working copy)
@@ -488,7 +488,7 @@
         * Allow the scheduler to initialize the child.
         */
        thread_lock(td);
-       sched_fork(td, td2);
+       sched_fork(td, td2, 0);
        thread_unlock(td);

        /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAFMmRNxF1uMOr39BbZkpPN=uM7G09dtcckAYw8ag6n6bi=FeOw>