Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 5 Jun 2014 03:31:22 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        "Alexander V. Chernikov" <melifaro@FreeBSD.org>
Cc:        freebsd-hackers@freebsd.org, hackers@freebsd.org
Subject:   Re: Permit init(8) use its own cpuset group.
Message-ID:  <20140605003122.GF3991@kib.kiev.ua>
In-Reply-To: <538F70AB.5030701@FreeBSD.org>
References:  <538C8F9A.4020301@FreeBSD.org> <20140602164850.GS3991@kib.kiev.ua> <201406041106.11659.jhb@freebsd.org> <538F70AB.5030701@FreeBSD.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--0h4+rw7qdMK7T5x1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Wed, Jun 04, 2014 at 11:16:59PM +0400, Alexander V. Chernikov wrote:
> On 04.06.2014 19:06, John Baldwin wrote:
> > On Monday, June 02, 2014 12:48:50 pm Konstantin Belousov wrote:
> >> On Mon, Jun 02, 2014 at 06:52:10PM +0400, Alexander V. Chernikov wrote:
> >>> Hello list!
> >>>
> >>> Currently init(8) uses group 1 which is root group.
> >>> Modifications of this group affects both kernel and userland threads.
> >>> Additionally, such modifications are impossible, for example, in pres=
ence
> >>> of multi-queue NIC drivers (like igb or ixgbe) which binds their thre=
ads
> > to
> >>> particular cpus.
> >>>
> >>> Proposed change ("init_cpuset" loader tunable) permits changing cpu
> >>> masks for
> >>> userland more easily. Restricting user processes to migrate to/from C=
PU
> >>> cores
> >>> used for network traffic processing is one of the cases.
> >>>
> >>> Phabricator: https://phabric.freebsd.org/D141 (the same version attac=
hed
> >>> inline)
> >>>
> >>> If there are no objections, I'll commit this next week.
> >> Why is the tunable needed ?
> > Because some people already depend on doing 'cpuset -l 0 -s 1'.  It is =
also
> > documented in our manpages that processes start in cpuset 1 by default =
so
> > that you can use 'cpuset -l 0 -s 1' to move all processes, etc.
> >
> > For the stated problem (bound ithreads in drivers), I would actually li=
ke to
> > fix ithreads that are bound to a specific CPU to create a different cpu=
set
> > instead so they don't conflict with set 1.
> Yes, this seem to be much better approach.
> Please take a look on the new patch (here or in the phabricator).
> Comments:
>=20
> Use different approach for modifyable root set:
>=20
>   * Make sets in 0..15 internal
>   * Define CPUSET_SET1 & CPUSET_ITHREAD in that range
>   * Add cpuset_lookup_builtin() to retrieve such cpu sets by id
>   * Create additional root set for ithreads
>   * Use this set in ithread_create()
>   * Change intr_setaffinity() to use cpuset_iroot (do we really need this=
)?
>=20
> We can probably do the same for kprocs, but I'm unsure if we really need =
it.
I think this is fine.  See below for the style comments.

With the proliferation of the special cpuset ids, it is probably time
to add an assert to cpuset_rel() and cpuset_rel_defer() that it never=20
try to free  the preallocated set.

Since you use static pre-known set id for ithreads, it should be documented
along with the set 1 in the man page ?

>=20
>=20
> >
>=20

> Index: sys/kern/kern_cpuset.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- sys/kern/kern_cpuset.c
> +++ sys/kern/kern_cpuset.c
> @@ -112,7 +112,7 @@
>  SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD,
>  	0, sizeof(cpuset_t), "sizeof(cpuset_t)");
> =20
> -cpuset_t *cpuset_root;
> +cpuset_t *cpuset_root, *cpuset_iroot;
> =20
>  /*
>   * Acquire a reference to a cpuset, all pointers must be tracked with re=
fs.
> @@ -213,7 +213,7 @@
>   * Find a set based on an id.  Returns it with a ref.
>   */
>  static struct cpuset *
> -cpuset_lookup(cpusetid_t setid, struct thread *td)
> +cpuset_lookup_id(cpusetid_t setid)
>  {
>  	struct cpuset *set;
> =20
> @@ -227,6 +227,17 @@
>  		cpuset_ref(set);
>  	mtx_unlock_spin(&cpuset_lock);
> =20
> +	return (set);
> +}
> +
> +static struct cpuset *
> +cpuset_lookup(cpusetid_t setid, struct thread *td)
> +{
> +	struct cpuset *set;
> +
> +	set =3D cpuset_lookup_id(setid);
> +
> +
Too many emtpy lines.  The statement above should probably go after the
KASSERT() line below.

>  	KASSERT(td !=3D NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
>  	if (set !=3D NULL && jailed(td->td_ucred)) {
>  		struct cpuset *jset, *tset;
> @@ -245,6 +256,25 @@
>  }
> =20
>  /*
> + * Find a set based on an id.  Returns it with a ref.
> + */
> +struct cpuset *
> +cpuset_lookup_builtin(cpusetid_t setid)
> +{
> +	struct cpuset *set;
> +
> +	KASSERT(setid <=3D CPUSET_RESERVED_MAX,
> +	    ("[%s:%d] wrong set id: %d", __func__, __LINE__, setid));
> +
> +	set =3D cpuset_lookup_id(setid);
> +
> +	KASSERT(set !=3D NULL,
> +	    ("[%s:%d] set id %d not found", __func__, __LINE__, setid));
> +
> +	return (set);
Again the empty lines are not needed.

> +}
> +
> +/*
>   * Create a set in the space provided in 'set' with the provided paramet=
ers.
>   * The set is returned with a single ref.  May return EDEADLK if the set
>   * will have no valid cpu based on restrictions from the parent.
> @@ -725,9 +755,10 @@
>   * 1 - The default set which all processes are a member of until changed.
>   *     This allows an administrator to move all threads off of given cpu=
s to
>   *     dedicate them to high priority tasks or save power etc.
> + * 2 - The root set which should be used for all interruot/ithreads.
Typo in the 'interrupt'.

>   */
> -struct cpuset *
> -cpuset_thread0(void)
> +static void
> +cpuset_init_sets(void)
>  {
>  	struct cpuset *set;
>  	int error;
> @@ -751,14 +782,31 @@
>  	 * Now derive a default, modifiable set from that to give out.
>  	 */
>  	set =3D uma_zalloc(cpuset_zone, M_WAITOK);
> -	error =3D _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1);
> +	error =3D _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask,
> +	    CPUSET_SET1);
>  	KASSERT(error =3D=3D 0, ("Error creating default set: %d\n", error));
>  	/*
> -	 * Initialize the unit allocator. 0 and 1 are allocated above.
> +	 * Create another root set for interrupt bindings.
>  	 */
> -	cpuset_unr =3D new_unrhdr(2, INT_MAX, NULL);
> +	set =3D uma_zalloc(cpuset_zone, M_WAITOK);
> +	error =3D _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask,
> +	    CPUSET_ITHREAD);
> +	KASSERT(error =3D=3D 0, ("Error creating ithread cpuset: %d\n", error));
> +	set->cs_flags |=3D CPU_SET_ROOT;
> +	cpuset_iroot =3D &set->cs_mask;
> +	/*
> +	 * Initialize the unit allocator. Reserve 0..15 for special sets.
> +	 */
> +	cpuset_unr =3D new_unrhdr(CPUSET_RESERVED_MAX + 1, INT_MAX, NULL);
Style requests that multi-line block comments have empty line before.
Spelling CPUSET_RESERVED_MAX as 15 in the comment looks strange.

> +}
> =20
> -	return (set);
> +struct cpuset *
> +cpuset_thread0(void)
> +{
> +
> +	cpuset_init_sets();
> +
Empty line.

> +	return (cpuset_lookup_builtin(CPUSET_SET1));
>  }
> =20
>  /*
> Index: sys/kern/kern_intr.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- sys/kern/kern_intr.c
> +++ sys/kern/kern_intr.c
> @@ -318,7 +318,7 @@
>  	if (ie->ie_thread !=3D NULL) {
>  		CPU_ZERO(&mask);
>  		if (cpu =3D=3D NOCPU)
> -			CPU_COPY(cpuset_root, &mask);
> +			CPU_COPY(cpuset_iroot, &mask);
>  		else
>  			CPU_SET(cpu, &mask);
>  		id =3D ie->ie_thread->it_thread->td_tid;
> @@ -334,7 +334,7 @@
>  		if (ie->ie_thread !=3D NULL) {
>  			CPU_ZERO(&mask);
>  			if (ie->ie_cpu =3D=3D NOCPU)
> -				CPU_COPY(cpuset_root, &mask);
> +				CPU_COPY(cpuset_iroot, &mask);
>  			else
>  				CPU_SET(ie->ie_cpu, &mask);
>  			id =3D ie->ie_thread->it_thread->td_tid;
> @@ -381,7 +381,7 @@
>  	 * If we're setting all cpus we can unbind.  Otherwise make sure
>  	 * only one cpu is in the set.
>  	 */
> -	if (CPU_CMP(cpuset_root, mask)) {
> +	if (CPU_CMP(cpuset_iroot, mask)) {
>  		for (n =3D 0; n < CPU_SETSIZE; n++) {
>  			if (!CPU_ISSET(n, mask))
>  				continue;
> @@ -409,7 +409,7 @@
>  	CPU_ZERO(mask);
>  	mtx_lock(&ie->ie_lock);
>  	if (ie->ie_cpu =3D=3D NOCPU)
> -		CPU_COPY(cpuset_root, mask);
> +		CPU_COPY(cpuset_iroot, mask);
>  	else
>  		CPU_SET(ie->ie_cpu, mask);
>  	mtx_unlock(&ie->ie_lock);
> @@ -461,6 +461,7 @@
>  	TD_SET_IWAIT(td);
>  	thread_unlock(td);
>  	td->td_pflags |=3D TDP_ITHREAD;
> +	td->td_cpuset =3D cpuset_lookup_builtin(CPUSET_ITHREAD);
>  	ithd->it_thread =3D td;
>  	CTR2(KTR_INTR, "%s: created %s", __func__, name);
>  	return (ithd);
> @@ -485,6 +486,7 @@
>  	TD_SET_IWAIT(td);
>  	thread_unlock(td);
>  	td->td_pflags |=3D TDP_ITHREAD;
> +	td->td_cpuset =3D cpuset_lookup_builtin(CPUSET_ITHREAD);
>  	ithd->it_thread =3D td;
>  	CTR2(KTR_INTR, "%s: created %s", __func__, name);
>  	return (ithd);
> Index: sys/sys/cpuset.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- sys/sys/cpuset.h
> +++ sys/sys/cpuset.h
> @@ -81,6 +81,10 @@
>   */
>  #define	CPUSET_INVALID	-1
>  #define	CPUSET_DEFAULT	0
> +#define	CPUSET_SET1	1
> +#define	CPUSET_ITHREAD	2
> +
> +#define	CPUSET_RESERVED_MAX	15
> =20
>  #ifdef _KERNEL
>  LIST_HEAD(setlist, cpuset);
> @@ -110,11 +114,12 @@
>  #define CPU_SET_ROOT    0x0001  /* Set is a root set. */
>  #define CPU_SET_RDONLY  0x0002  /* No modification allowed. */
> =20
> -extern cpuset_t *cpuset_root;
> +extern cpuset_t *cpuset_root, *cpuset_iroot;
>  struct prison;
>  struct proc;
> =20
>  struct cpuset *cpuset_thread0(void);
> +struct cpuset *cpuset_lookup_builtin(cpusetid_t setid);
>  struct cpuset *cpuset_ref(struct cpuset *);
>  void	cpuset_rel(struct cpuset *);
>  int	cpuset_setthread(lwpid_t id, cpuset_t *);


--0h4+rw7qdMK7T5x1
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBAgAGBQJTj7pZAAoJEJDCuSvBvK1BuRAP/RLySYCcEj1/8IlCnZV0wscv
T+d8qXmRMGeaSgijhCUqxYIvO4uKjguGgGg9PJyYSqKAdjg+HhWR+EYENrJ5xzD1
VWP4UM5f98icqwKO3DlACLAYiKhHVI8Wn8fxHrdPWWJmM+3ORaeHuUgqEwqNlD0i
snFxOXPrJeVESV8bKQYJpKkeMv8ows4wH2KAHHN4sOo+Ue52JoTctZUbeL2ilrMq
6+t3lxckG8/QN/B7iDjON/jQwVHb51zYVVcAn1ZSyO9KWQvztI1aURiSw5LUwcKR
ySPRf9+RtvWR0dB4N/cjR1n/XVPIg31A21t8ikCBNDsqXYesAe20KlfIRqd8Ryhk
qEyoFaQD3uB1SyLliUo+LS1v0YAXDvqo/uOnxZkIyUexJAYYzTCFmq24YNiV8NIH
k0UFsUNTyVvw7P/BrDimDrUwaHDeiWgNOZmllB1VDtuxkO+tWnioOjwCIBDVamWu
yZICzU7gxzi6Hqe/CQvswHKuE0p454fy4SD3gQAXWuN4nzY8B61l7n8P9VkzUddI
p0hFiNq5it7KrjrCx+LajpcMOl9aAqamFv1SWYC4iXmf5I2D0qnOgWtmZoVyi8LL
oFqPRvaXOXw7TGPYpCqLg9xmrwtEXcS3gMPDVRJ0IXyAcf7tzJm07NbwRBNonvb7
XkDskKPwvpkgG8MEDDd1
=QChH
-----END PGP SIGNATURE-----

--0h4+rw7qdMK7T5x1--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140605003122.GF3991>