From owner-p4-projects@FreeBSD.ORG Mon Jun 15 20:55:58 2009 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id E5A5C1065672; Mon, 15 Jun 2009 20:55:57 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A4430106566B for ; Mon, 15 Jun 2009 20:55:57 +0000 (UTC) (envelope-from trasz@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 90EA68FC16 for ; Mon, 15 Jun 2009 20:55:57 +0000 (UTC) (envelope-from trasz@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id n5FKtvu2007480 for ; Mon, 15 Jun 2009 20:55:57 GMT (envelope-from trasz@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id n5FKtvvo007478 for perforce@freebsd.org; Mon, 15 Jun 2009 20:55:57 GMT (envelope-from trasz@freebsd.org) Date: Mon, 15 Jun 2009 20:55:57 GMT Message-Id: <200906152055.n5FKtvvo007478@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to trasz@freebsd.org using -f From: Edward Tomasz Napierala To: Perforce Change Reviews Cc: Subject: PERFORCE change 164465 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 15 Jun 2009 20:55:58 -0000 http://perforce.freebsd.org/chv.cgi?CH=164465 Change 164465 by trasz@trasz_victim on 2009/06/15 20:55:11 Code for per-jail and per-group resource accounting. Not really tested. XXX: What about hierarchical jails? XXX2: This is going to be really slow, unless I invent something clever. ;-/ Affected files ... .. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 edit .. //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 edit Differences ... ==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 (text+ko) ==== @@ -453,6 +453,7 @@ /* Create credentials. */ p->p_ucred = crget(); p->p_ucred->cr_ngroups = 1; /* group 0 */ + p->p_ucred->cr_gidinfos[0] = gifind(0); p->p_ucred->cr_uidinfo = uifind(0); p->p_ucred->cr_ruidinfo = uifind(0); p->p_ucred->cr_prison = &prison0; ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 (text+ko) ==== @@ -31,10 +31,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -110,6 +112,7 @@ */ p->p_accounting.ha_resources[resource] += amount; p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += amount; + p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += amount; /* * XXX: When denying, return proper errno - EFSIZ, ENOMEM etc. @@ -126,6 +129,7 @@ diff = amount - p->p_accounting.ha_resources[resource]; p->p_accounting.ha_resources[resource] += diff; p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += diff; + p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += diff; /* * XXX: Make sure process can lower its resource consumption, @@ -143,6 +147,7 @@ p->p_accounting.ha_resources[resource] -= amount; p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] -= amount; + p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] -= amount; } int @@ -292,12 +297,12 @@ } static int -hrl_get_acc_uid(struct thread *td, id_t pid, void *bufp, size_t buflen) +hrl_get_acc_uid(struct thread *td, id_t uid, void *bufp, size_t buflen) { int error; struct uidinfo *uip; - uip = uifind(pid); + uip = uifind(uid); if (uip == NULL) return (ESRCH); error = copyout(&uip->ui_accounting, bufp, sizeof(uip->ui_accounting)); @@ -306,35 +311,73 @@ return (error); } -int -hrl(struct thread *td, struct hrl_args *uap) +static int +hrl_get_acc_gid(struct thread *td, id_t gid, void *bufp, size_t buflen) { int error; - id_t id; + struct gidinfo *gip; + + gip = gifind(gid); + if (gip == NULL) + return (ESRCH); + error = copyout(&gip->gi_accounting, bufp, sizeof(gip->gi_accounting)); + gifree(gip); + + return (error); +} - if (uap->op == HRL_OP_GET_RULES) - return (hrl_get_rules(td, uap->outbufp, uap->outbuflen)); +static int +hrl_get_acc_jid(struct thread *td, id_t jid, void *bufp, size_t buflen) +{ + int error; + struct prison *pr; - if (uap->inbuflen != sizeof(id_t)) - return (EINVAL); + sx_xlock(&allprison_lock); + pr = prison_find(jid); + if (pr == NULL) { + sx_xunlock(&allprison_lock); + return (ENOENT); + } + error = copyout(&pr->pr_accounting, bufp, sizeof(pr->pr_accounting)); + prison_free(pr); + sx_xunlock(&allprison_lock); - error = copyin(uap->inbufp, &id, sizeof(id_t)); - if (error) - return (error); + return (error); +} - if (id <= 0) - return (EINVAL); +int +hrl(struct thread *td, struct hrl_args *uap) +{ + int error; + id_t id; - if (uap->outbuflen < sizeof(struct hrl_acc)) - return (EFBIG); + if (uap->op != HRL_OP_GET_RULES) { + if (uap->inbuflen != sizeof(id_t)) + return (EINVAL); + error = copyin(uap->inbufp, &id, sizeof(id_t)); + if (error) + return (error); + if (id <= 0) + return (EINVAL); + if (uap->outbuflen < sizeof(struct hrl_acc)) + return (EFBIG); + } - if (uap->op == HRL_OP_GET_ACC_PID) + switch (uap->op) { + case HRL_OP_GET_RULES: + return (hrl_get_rules(td, uap->outbufp, uap->outbuflen)); + case HRL_OP_GET_ACC_PID: return (hrl_get_acc_pid(td, id, uap->outbufp, uap->outbuflen)); - - if (uap->op == HRL_OP_GET_ACC_UID) + case HRL_OP_GET_ACC_UID: return (hrl_get_acc_uid(td, id, uap->outbufp, uap->outbuflen)); - - return (EINVAL); + case HRL_OP_GET_ACC_GID: + return (hrl_get_acc_gid(td, id, uap->outbufp, uap->outbuflen)); + case HRL_OP_GET_ACC_JAILID: + return (hrl_get_acc_jid(td, id, uap->outbufp, uap->outbuflen)); + default: + return (EINVAL); + } + /* NOTREACHED */ } static void ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 (text+ko) ==== @@ -165,6 +165,7 @@ proc_ctor, proc_dtor, proc_init, proc_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uihashinit(); + gihashinit(); } /* ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 (text+ko) ==== @@ -807,7 +807,7 @@ { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; - int error; + int i, error; if (ngrp > NGROUPS) return (EINVAL); @@ -839,9 +839,15 @@ * when running non-BSD software if we do not do the same. */ newcred->cr_ngroups = 1; + for (i = 1; i < newcred->cr_ngroups; i++) + gifree(newcred->cr_gidinfos[i]); } else { + for (i = 0; i < newcred->cr_ngroups; i++) + gifree(newcred->cr_gidinfos[i]); bcopy(groups, newcred->cr_groups, ngrp * sizeof(gid_t)); newcred->cr_ngroups = ngrp; + for (i = 0; i < newcred->cr_ngroups; i++) + newcred->cr_gidinfos[i] = gifind(newcred->cr_groups[i]); } setsugid(p); p->p_ucred = newcred; @@ -1802,6 +1808,7 @@ void crfree(struct ucred *cr) { + int i; KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred")); @@ -1815,6 +1822,8 @@ uifree(cr->cr_uidinfo); if (cr->cr_ruidinfo != NULL) uifree(cr->cr_ruidinfo); + for (i = 0; i < cr->cr_ngroups; i++) + gifree(cr->cr_gidinfos[i]); /* * Free a prison, if any. */ @@ -1851,6 +1860,7 @@ void crcopy(struct ucred *dest, struct ucred *src) { + int i; KASSERT(crshared(dest) == 0, ("crcopy of shared ucred")); bcopy(&src->cr_startcopy, &dest->cr_startcopy, @@ -1858,6 +1868,8 @@ (caddr_t)&src->cr_startcopy)); uihold(dest->cr_uidinfo); uihold(dest->cr_ruidinfo); + for (i = 0; i < dest->cr_ngroups; i++) + gihold(dest->cr_gidinfos[i]); prison_hold(dest->cr_prison); #ifdef VIMAGE KASSERT(src->cr_vimage != NULL, ("cr_vimage == NULL")); @@ -2014,7 +2026,9 @@ change_egid(struct ucred *newcred, gid_t egid) { + gifree(newcred->cr_gidinfos[0]); newcred->cr_groups[0] = egid; + newcred->cr_gidinfos[0] = gifind(egid); } /*- ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 (text+ko) ==== @@ -72,11 +72,17 @@ static struct rwlock uihashtbl_lock; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ +static MALLOC_DEFINE(M_GIDINFO, "gidinfo", "gidinfo structures"); +#define GIHASH(gid) (&gihashtbl[(gid) & gihash]) +static struct rwlock gihashtbl_lock; +static LIST_HEAD(gihashhead, gidinfo) *gihashtbl; +static u_long gihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); +static struct gidinfo *gilookup(gid_t gid); /* * Resource controls and accounting. @@ -1358,6 +1364,128 @@ } /* + * Find the gidinfo structure for a gid. This structure is used to + * track the total resource consumption (process count, socket buffer + * size, etc.) for the gid and impose limits. + */ +void +gihashinit() +{ + + gihashtbl = hashinit(maxproc / 16, M_GIDINFO, &gihash); + rw_init(&gihashtbl_lock, "gidinfo hash"); +} + +/* + * Look up a gidinfo struct for the parameter gid. + * gihashtbl_lock must be locked. + */ +static struct gidinfo * +gilookup(gid) + gid_t gid; +{ + struct gihashhead *gipp; + struct gidinfo *gip; + + rw_assert(&gihashtbl_lock, RA_LOCKED); + gipp = GIHASH(gid); + LIST_FOREACH(gip, gipp, gi_hash) + if (gip->gi_gid == gid) + break; + + return (gip); +} + +/* + * Find or allocate a struct gidinfo for a particular gid. + * Increase refcount on gidinfo struct returned. + * gifree() should be called on a struct gidinfo when released. + */ +struct gidinfo * +gifind(gid) + gid_t gid; +{ + struct gidinfo *old_gip, *gip; + + rw_rlock(&gihashtbl_lock); + gip = gilookup(gid); + if (gip == NULL) { + rw_runlock(&gihashtbl_lock); + gip = malloc(sizeof(*gip), M_GIDINFO, M_WAITOK | M_ZERO); + rw_wlock(&gihashtbl_lock); + /* + * There's a chance someone created our gidinfo while we + * were in malloc and not holding the lock, so we have to + * make sure we don't insert a duplicate gidinfo. + */ + if ((old_gip = gilookup(gid)) != NULL) { + /* Someone else beat us to it. */ + free(gip, M_GIDINFO); + gip = old_gip; + } else { + refcount_init(&gip->gi_ref, 0); + gip->gi_gid = gid; + LIST_INSERT_HEAD(GIHASH(gid), gip, gi_hash); + } + } + gihold(gip); + rw_unlock(&gihashtbl_lock); + return (gip); +} + +/* + * Place another refcount on a gidinfo struct. + */ +void +gihold(gip) + struct gidinfo *gip; +{ + + refcount_acquire(&gip->gi_ref); +} + +/*- + * Since gidinfo structs have a long lifetime, we use an + * opportunistic refcounting scheme to avoid locking the lookup hash + * for each release. + * + * If the refcount hits 0, we need to free the structure, + * which means we need to lock the hash. + * Optimal case: + * After locking the struct and lowering the refcount, if we find + * that we don't need to free, simply unlock and return. + * Suboptimal case: + * If refcount lowering results in need to free, bump the count + * back up, lose the lock and acquire the locks in the proper + * order to try again. + */ +void +gifree(gip) + struct gidinfo *gip; +{ + int old; + + /* Prepare for optimal case. */ + old = gip->gi_ref; + if (old > 1 && atomic_cmpset_int(&gip->gi_ref, old, old - 1)) + return; + + /* Prepare for suboptimal case. */ + rw_wlock(&gihashtbl_lock); + if (refcount_release(&gip->gi_ref)) { + LIST_REMOVE(gip, gi_hash); + rw_wunlock(&gihashtbl_lock); + free(gip, M_GIDINFO); + return; + } + /* + * Someone added a reference between atomic_cmpset_int() and + * rw_wlock(&gihashtbl_lock). + */ + rw_wunlock(&gihashtbl_lock); +} + +/* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ ==== //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 (text+ko) ==== @@ -96,6 +96,7 @@ #define HRL_OP_GET_ACC_PID 2 #define HRL_OP_GET_ACC_UID 3 #define HRL_OP_GET_ACC_GID 4 +#define HRL_OP_GET_ACC_JAILID 5 /* * 'hrl_acc' defines resource consumption for a particular ==== //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 (text+ko) ==== @@ -30,6 +30,8 @@ #ifndef _SYS_JAIL_H_ #define _SYS_JAIL_H_ +#include + #ifdef _KERNEL struct jail_v0 { u_int32_t version; @@ -171,6 +173,7 @@ char pr_domain[MAXHOSTNAMELEN]; /* (p) jail domainname */ char pr_uuid[HOSTUUIDLEN]; /* (p) jail hostuuid */ unsigned long pr_hostid; /* (p) jail hostid */ + struct hrl_acc pr_accounting; /* (*) HRL resource accounting */ }; #endif /* _KERNEL || _WANT_PRISON */ ==== //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 (text+ko) ==== @@ -98,6 +98,21 @@ struct hrl_acc ui_accounting; /* (*) HRL resource accounting */ }; +/* + * Per gid resource consumption + * + * Locking guide: + * (a) Constant from inception + * (b) Lockless, updated using atomics + * (c) Locked by global uihashtbl_mtx + */ +struct gidinfo { + LIST_ENTRY(gidinfo) gi_hash; /* (c) hash chain of gidinfos */ + gid_t gi_gid; /* (a) gid */ + u_int gi_ref; /* (b) reference count */ + struct hrl_acc gi_accounting; /* (*) HRL resource accounting */ +}; + struct proc; struct rusage_ext; struct thread; @@ -134,6 +149,11 @@ void uifree(struct uidinfo *uip); void uihashinit(void); void uihold(struct uidinfo *uip); +struct gidinfo + *gifind(gid_t gid); +void gifree(struct gidinfo *gip); +void gihashinit(void); +void gihold(struct gidinfo *gip); #endif /* _KERNEL */ #endif /* !_SYS_RESOURCEVAR_H_ */ ==== //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 (text+ko) ==== @@ -50,6 +50,7 @@ uid_t cr_svuid; /* saved user id */ short cr_ngroups; /* number of groups */ gid_t cr_groups[NGROUPS]; /* groups */ + struct gidinfo *cr_gidinfos[NGROUPS]; /* group resource consumption */ gid_t cr_rgid; /* real group id */ gid_t cr_svgid; /* saved group id */ struct uidinfo *cr_uidinfo; /* per euid resource consumption */ ==== //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 (text+ko) ==== @@ -302,18 +302,19 @@ usage(void) { - fprintf(stderr, "usage: hrl [-u user | -g group | -p pid]\n"); + fprintf(stderr, "usage: hrl [-u user | -g group | -p pid | -j jailid]\n"); + exit(1); } int main(int argc __unused, char **argv __unused) { - int ch, op, pflag = 0, uflag = 0, gflag = 0; + int ch, op, pflag = 0, uflag = 0, gflag = 0, jflag = 0; id_t id = 0; op = HRL_OP_GET_RULES; - while ((ch = getopt(argc, argv, "p:u:g:")) != -1) { + while ((ch = getopt(argc, argv, "p:u:g:j:")) != -1) { switch (ch) { case 'p': pflag = 1; @@ -330,14 +331,19 @@ op = HRL_OP_GET_ACC_GID; id = parse_group(optarg); break; + case 'j': + jflag = 1; + op = HRL_OP_GET_ACC_JAILID; + id = parse_group(optarg); + break; case '?': default: usage(); } } - if (pflag + uflag + gflag > 1) - errx(1, "only one of the -p, -u and -g may be specified " + if (pflag + uflag + gflag + jflag > 1) + errx(1, "only one of the -p, -u, -g and -j may be specified " "at the same time"); switch (op) { @@ -348,6 +354,7 @@ case HRL_OP_GET_ACC_PID: case HRL_OP_GET_ACC_UID: case HRL_OP_GET_ACC_GID: + case HRL_OP_GET_ACC_JAILID: print_accounting(op, id); break; }