Date: Wed, 19 Dec 2007 21:19:32 -1000 (HST) From: Jeff Roberson <jroberson@chesapeake.net> To: arch@freebsd.org, brde@optusnet.com.au Subject: Linux compatible setaffinity. Message-ID: <20071219211025.T899@desktop>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
I have implemented a linux compatible sched_setaffinity() call which is
somewhat crippled. This allows a userspace process to supply a bitmask of
processors which it will run on. I have copied the linux interface such
that it should be api compatible because I believe it is a sensible
interface and they beat us to it by 3 years.
My implementation is crippled in that it supports binding by curthread
only and to a single cpu only. Neither of the schedulers presently
support binding to multiple cpus or binding a non-curthread thread. This
property is not inherited by forked threads and does not effect other
threads in the same process. These two limitations can gradually be
weakened without effecting the syscall api.
The linux api is:
int sched_setaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t
*mask);
The cpu_set_t is the same as a fdset for select. The cpusetsize argument
is used to determine the size of the array in mask.
I'm mostly interested in feedback on how best to reduce the namespace
pollution and avoid pulling the sched.h file into the generated syscall
files (sysproto.h, etc). Anyone who feels this is a terrible interface
for such a thing should speak up now.
I also feel that in the medium term we will have to deal with machines
with more cores than bits in their native word. Using these CPU_SET,
CPU_CLR macros is a fine way to deal with this issue.
I also have a primitive 'taskset', although I don't like the name, it
allows you to run arbitrary programs bound to a single cpu.
Thanks,
Jeff
[-- Attachment #2 --]
Index: kern/kern_resource.c
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/kern_resource.c,v
retrieving revision 1.2.10.2
diff -u -r1.2.10.2 kern_resource.c
--- kern/kern_resource.c 17 Nov 2007 01:01:39 -0000 1.2.10.2
+++ kern/kern_resource.c 20 Dec 2007 07:09:11 -0000
@@ -52,6 +52,7 @@
#include <sys/refcount.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@@ -731,6 +732,45 @@
return (error);
}
+#ifndef _SYS_SYSPROTO_H_
+struct sched_setaffinity_args {
+ pid_t pid;
+ unsigned int cpusetsize;
+ cpu_set_t *mask;
+};
+#endif
+
+int
+sched_setaffinity(struct thread *td, struct sched_setaffinity_args *uap)
+{
+ cpu_set_t mask;
+ int error;
+ int cpu;
+ int i;
+
+ if (uap->pid != 0)
+ return (EPERM);
+ if (uap->cpusetsize != CPU_SETSIZE)
+ return (EINVAL);
+ error = copyin(uap->mask, &mask, sizeof(mask));
+ if (error)
+ return (error);
+ for (cpu = 0, i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &mask))
+ continue;
+ if (cpu)
+ return (EINVAL);
+ cpu = i + 1;
+ }
+ cpu--;
+ if (CPU_ABSENT(cpu))
+ return (EINVAL);
+ thread_lock(curthread);
+ sched_bind(curthread, cpu);
+ thread_unlock(curthread);
+ return (0);
+}
+
/*
* Transform the running time and tick information for children of proc p
* into user and system time usage.
Index: kern/makesyscalls.sh
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/makesyscalls.sh,v
retrieving revision 1.1
diff -u -r1.1 makesyscalls.sh
--- kern/makesyscalls.sh 10 Feb 2006 03:54:18 -0000 1.1
+++ kern/makesyscalls.sh 20 Dec 2007 07:09:11 -0000
@@ -117,6 +117,8 @@
printf "#define\t%s\n\n", sysproto_h > sysarg
printf "#include <sys/signal.h>\n" > sysarg
printf "#include <sys/acl.h>\n" > sysarg
+ printf "#include <sys/proc.h>\n" > sysarg
+ printf "#include <sys/sched.h>\n" > sysarg
printf "#include <sys/thr.h>\n" > sysarg
printf "#include <sys/umtx.h>\n" > sysarg
printf "#include <posix4/_semaphore.h>\n\n" > sysarg
Index: kern/sched_4bsd.c
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.7.6.2
diff -u -r1.7.6.2 sched_4bsd.c
--- kern/sched_4bsd.c 29 Nov 2007 01:53:51 -0000 1.7.6.2
+++ kern/sched_4bsd.c 20 Dec 2007 07:09:11 -0000
@@ -1442,6 +1442,7 @@
cpu_idle();
}
mtx_lock_spin(&sched_lock);
+ SCHED_STAT_INC(switch_idle);
mi_switch(SW_VOL, NULL);
mtx_unlock_spin(&sched_lock);
}
Index: kern/syscalls.master
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/kern/syscalls.master,v
retrieving revision 1.2
diff -u -r1.2 syscalls.master
--- kern/syscalls.master 21 Feb 2007 06:34:30 -0000 1.2
+++ kern/syscalls.master 20 Dec 2007 07:09:12 -0000
@@ -793,6 +793,8 @@
long id, void *uaddr, void *uaddr2); }
455 AUE_NULL MSTD { int thr_new(struct thr_param *param, \
int param_size); }
+456 AUE_NULL MSTD { int sched_setaffinity(pid_t pid, \
+ unsigned int cpusetsize, cpu_set_t *mask); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/sched.h
===================================================================
RCS file: /CVS/CVS_IPSO/src/sys/sys/sched.h,v
retrieving revision 1.2.10.2
diff -u -r1.2.10.2 sched.h
--- sys/sched.h 3 Dec 2007 21:47:09 -0000 1.2.10.2
+++ sys/sched.h 20 Dec 2007 07:09:18 -0000
@@ -198,6 +198,37 @@
int sched_priority;
};
+typedef unsigned long __cpu_mask;
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE 1024U
+#endif
+
+#define _NCPUBITS (sizeof(__cpu_mask) * 8) /* bits per mask */
+
+#ifndef _howmany
+#define _howmany(x, y) (((x) + ((y) - 1)) / (y))
+#endif
+
+typedef struct cpu_set {
+ __cpu_mask __cpus_bits[_howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpu_set_t;
+
+#define __cpuset_mask(n) ((__cpu_mask)1 << ((n) % _NCPUBITS))
+#define CPU_CLR(n, p) ((p)->__cpus_bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n))
+#define CPU_COPY(f, t) (void)(*(t) = *(f))
+#define CPU_ISSET(n, p) (((p)->__cpus_bits[(n)/_NCPUBITS] & __cpuset_mask(n)) != 0)
+#define CPU_SET(n, p) ((p)->__cpus_bits[(n)/_NCPUBITS] |= __cpuset_mask(n))
+#define CPU_ZERO(p) do { \
+ cpu_set_t *_p; \
+ __size_t _n; \
+ \
+ _p = (p); \
+ _n = _howmany(CPU_SETSIZE, _NCPUBITS); \
+ while (_n > 0) \
+ _p->__cpus_bits[--_n] = 0; \
+} while (0)
+
/*
* POSIX scheduling declarations for userland.
*/
@@ -213,6 +244,8 @@
struct timespec;
__BEGIN_DECLS
+int sched_setaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t *mask);
+int sched_getaffinity(pid_t pid, unsigned int cpusetsize, cpu_set_t *mask);
int sched_get_priority_max(int);
int sched_get_priority_min(int);
int sched_getparam(pid_t, struct sched_param *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20071219211025.T899>
