From owner-freebsd-current Sat Feb 16 13:56:42 2002 Delivered-To: freebsd-current@freebsd.org Received: from apollo.backplane.com (apollo.backplane.com [216.240.41.2]) by hub.freebsd.org (Postfix) with ESMTP id C36F937B405; Sat, 16 Feb 2002 13:56:34 -0800 (PST) Received: (from dillon@localhost) by apollo.backplane.com (8.11.6/8.9.1) id g1GLuX639707; Sat, 16 Feb 2002 13:56:33 -0800 (PST) (envelope-from dillon) Date: Sat, 16 Feb 2002 13:56:33 -0800 (PST) From: Matthew Dillon Message-Id: <200202162156.g1GLuX639707@apollo.backplane.com> To: John Baldwin Cc: Terry Lambert , Julian Elischer , bde@FreeBSD.ORG, current@FreeBSD.ORG, Alfred Perlstein Subject: tentitive ucred mutex cleanup patch, MATT's version (was Re: ucred holding patch, BDE version) References: Sender: owner-freebsd-current@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG Please review this patch. I haven't looked at BDE's patch but this was so simple I decided to just go do it. There are many situations where the allocation of a system structure can be safely done with Giant, but deallocation occurs dangerously deep in the call stack. For these situations I believe it makes sense to have a recycle queue (maybe even a global recycle queue) to handle freeing the refcount->0 structures at a later time (like in the allocation code where we might already need Giant). At least until MALLOC/FREE are really made MP safe. In anycase, here is the patch. It is VERY simple and it also cuts the size of the ucred structure down by a huge amount. As a bonus, here are the gettimeofday() loop test results with this code and the gettimeofday() giant removal: CURRENT, gettimeofday() SMP patch, ucred patch: 1 TG running: 396365 calls/sec 2 TGs running: 322000 calls/sec each. Now with my gdb -k on /dev/mem I note that the contention is occuring in userret(), where Giant is being obtained for the signal processing. If we could optimize that I'll bet the call rate would go above 400K calls per second with two running. -Matt Index: sys/ucred.h =================================================================== RCS file: /home/ncvs/src/sys/sys/ucred.h,v retrieving revision 1.26 diff -u -r1.26 ucred.h --- sys/ucred.h 11 Oct 2001 23:38:17 -0000 1.26 +++ sys/ucred.h 16 Feb 2002 21:36:58 -0000 @@ -60,8 +60,9 @@ struct uidinfo *cr_uidinfo; /* per euid resource consumption */ struct uidinfo *cr_ruidinfo; /* per ruid resource consumption */ struct prison *cr_prison; /* jail(4) */ -#define cr_endcopy cr_mtx - struct mtx cr_mtx; /* protect refcount */ +#define cr_endcopy cr_mtxp + struct mtx *cr_mtxp; /* protect refcount */ + struct ucred *cr_freenext; /* next on free list */ }; #define cr_gid cr_groups[0] #define NOCRED ((struct ucred *)0) /* no credential available */ Index: kern/kern_prot.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_prot.c,v retrieving revision 1.133 diff -u -r1.133 kern_prot.c --- kern/kern_prot.c 16 Jan 2002 06:55:30 -0000 1.133 +++ kern/kern_prot.c 16 Feb 2002 21:42:17 -0000 @@ -72,6 +72,9 @@ int dummy; }; #endif + +static struct ucred *ucred_free_list; + /* * MPSAFE */ @@ -1582,11 +1585,40 @@ struct ucred * crget() { - register struct ucred *cr; + struct ucred *cr; + + GIANT_REQUIRED; + + mtx_pool_lock(&ucred_free_list); + if (ucred_free_list) { + cr = ucred_free_list; + ucred_free_list = cr->cr_freenext; + cr->cr_freenext = NULL; + mtx_pool_unlock(&ucred_free_list); - MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); + /* + * Cleanup previous user of this recycled ucred + * + * Some callers of crget(), such as nfs_statfs(), + * allocate a temporary credential, but don't + * allocate a uidinfo structure. + */ + if (cr->cr_uidinfo != NULL) + uifree(cr->cr_uidinfo); + if (cr->cr_ruidinfo != NULL) + uifree(cr->cr_ruidinfo); + /* + * Free a prison, if any. + */ + if (jailed(cr)) + prison_free(cr->cr_prison); + } else { + mtx_pool_unlock(&ucred_free_list); + MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK); + } + bzero(cr, sizeof(struct ucred)); cr->cr_ref = 1; - mtx_init(&cr->cr_mtx, "ucred", MTX_DEF); + cr->cr_mtxp = mtx_pool_find(cr); return (cr); } @@ -1598,42 +1630,36 @@ struct ucred *cr; { - mtx_lock(&cr->cr_mtx); + mtx_lock(cr->cr_mtxp); cr->cr_ref++; - mtx_unlock(&cr->cr_mtx); + mtx_unlock(cr->cr_mtxp); return (cr); } /* * Free a cred structure. * Throws away space when ref count gets to 0. + * + * XXX we should not have to obtain the mutex lock if cr_ref is 1, since + * we are the last user. */ void crfree(cr) struct ucred *cr; { + struct mtx *mtxp = cr->cr_mtxp; - mtx_lock(&cr->cr_mtx); + mtx_lock(mtxp); KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); if (--cr->cr_ref == 0) { - mtx_destroy(&cr->cr_mtx); - /* - * Some callers of crget(), such as nfs_statfs(), - * allocate a temporary credential, but don't - * allocate a uidinfo structure. - */ - if (cr->cr_uidinfo != NULL) - uifree(cr->cr_uidinfo); - if (cr->cr_ruidinfo != NULL) - uifree(cr->cr_ruidinfo); - /* - * Free a prison, if any. - */ - if (jailed(cr)) - prison_free(cr->cr_prison); - FREE((caddr_t)cr, M_CRED); - } else - mtx_unlock(&cr->cr_mtx); + mtx_unlock(mtxp); + mtx_pool_lock(&ucred_free_list); + cr->cr_freenext = ucred_free_list; + ucred_free_list = cr; + mtx_pool_unlock(&ucred_free_list); + } else { + mtx_unlock(mtxp); + } } /* @@ -1645,9 +1671,9 @@ { int shared; - mtx_lock(&cr->cr_mtx); + mtx_lock(cr->cr_mtxp); shared = (cr->cr_ref > 1); - mtx_unlock(&cr->cr_mtx); + mtx_unlock(cr->cr_mtxp); return (shared); } Index: i386/i386/trap.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v retrieving revision 1.211 diff -u -r1.211 trap.c --- i386/i386/trap.c 10 Jan 2002 11:49:54 -0000 1.211 +++ i386/i386/trap.c 16 Feb 2002 21:46:05 -0000 @@ -1099,9 +1099,13 @@ */ STOPEVENT(p, S_SCX, code); +#if 0 mtx_lock(&Giant); +#endif crfree(td->td_ucred); +#if 0 mtx_unlock(&Giant); +#endif td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message