Date: Thu, 27 Jan 2011 21:11:15 GMT From: Edward Tomasz Napierala <trasz@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 188243 for review Message-ID: <201101272111.p0RLBFiG093232@skunkworks.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@188243?ac=10 Change 188243 by trasz@trasz_victim on 2011/01/27 21:10:21 Don't embed "struct container" inside "struct proc" et al; it's somewhat big, and we don't want #ifdefs in structures, so embedding it would be pessimisation for people who don't want to use containers. While here, clean up includes somewhat. Affected files ... .. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#37 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#66 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_descrip.c#20 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_jail.c#33 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_loginclass.c#28 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#18 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#60 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/vfs_vnops.c#22 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/container.h#25 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#20 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/kernel.h#7 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/loginclass.h#13 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#30 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#25 edit Differences ... ==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#37 (text+ko) ==== @@ -526,6 +526,9 @@ p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; + /* Initialize resource accounting structures. */ + container_create(&p->p_container); + p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#66 (text+ko) ==== @@ -67,6 +67,8 @@ static struct mtx container_lock; MTX_SYSINIT(container_lock, &container_lock, "container lock", MTX_DEF); +static uma_zone_t container_zone; + static void container_sub(struct container *dest, const struct container *src); static void rusage_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount); static void rusage_add_cred_locked(struct ucred *cred, int resource, uint64_t amount); @@ -261,26 +263,29 @@ } void -container_create(struct container *container) +container_create(struct container **containerp) { - int i; + + SDT_PROBE(container, kernel, container, create, containerp, 0, 0, 0, 0); - SDT_PROBE(container, kernel, container, create, container, 0, 0, 0, 0); + KASSERT(*containerp == NULL, ("container already allocated")); - for (i = 0; i <= RUSAGE_MAX; i++) - KASSERT(container->c_resources[i] == 0, - ("container->c_resources[%d] != 0", i)); + *containerp = uma_zalloc(container_zone, M_WAITOK | M_ZERO); } static void -container_destroy_locked(struct container *container) +container_destroy_locked(struct container **containerp) { int i; + struct container *container; - SDT_PROBE(container, kernel, container, destroy, container, 0, 0, 0, 0); + SDT_PROBE(container, kernel, container, destroy, containerp, 0, 0, 0, 0); mtx_assert(&container_lock, MA_OWNED); - KASSERT(container != NULL, ("NULL container")); + KASSERT(containerp != NULL, ("NULL containerp")); + KASSERT(*containerp != NULL, ("NULL container")); + + container = *containerp; for (i = 0; i <= RUSAGE_MAX; i++) { if (rusage_is_sloppy(i)) @@ -294,10 +299,12 @@ "%ju allocated for resource %d\n", container->c_resources[i], i)); } + uma_zfree(container_zone, container); + *containerp = NULL; } void -container_destroy(struct container *container) +container_destroy(struct container **container) { mtx_lock(&container_lock); @@ -349,6 +356,7 @@ PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(amount >= 0, ("rusage_add: invalid amount for resource %d: %ju", resource, amount)); + KASSERT(p->p_container != NULL, ("rusage_add: NULL container for proc %p", p)); mtx_lock(&container_lock); #ifdef RCTL @@ -359,7 +367,7 @@ return (error); } #endif - container_alloc_resource(&p->p_container, resource, amount); + container_alloc_resource(p->p_container, resource, amount); rusage_add_cred_locked(p->p_ucred, resource, amount); mtx_unlock(&container_lock); @@ -376,10 +384,10 @@ KASSERT(amount >= 0, ("rusage_add_cred: invalid amount for resource %d: %ju", resource, amount)); - container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, amount); + container_alloc_resource(cred->cr_ruidinfo->ui_container, resource, amount); for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) - container_alloc_resource(&pr->pr_container, resource, amount); - container_alloc_resource(&cred->cr_loginclass->lc_container, resource, amount); + container_alloc_resource(pr->pr_container, resource, amount); + container_alloc_resource(cred->cr_loginclass->lc_container, resource, amount); } /* @@ -416,9 +424,10 @@ PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(amount >= 0, ("rusage_add_force: invalid amount for resource %d: %ju", resource, amount)); + KASSERT(p->p_container != NULL, ("rusage_add_force: NULL container for proc %p", p)); mtx_lock(&container_lock); - container_alloc_resource(&p->p_container, resource, amount); + container_alloc_resource(p->p_container, resource, amount); mtx_unlock(&container_lock); rusage_add_cred(p->p_ucred, resource, amount); } @@ -442,8 +451,9 @@ PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(amount >= 0, ("rusage_set: invalid amount for resource %d: %ju", resource, amount)); + KASSERT(p->p_container != NULL, ("rusage_set_locked: NULL container for proc %p", p)); - diff = amount - p->p_container.c_resources[resource]; + diff = amount - p->p_container->c_resources[resource]; #ifdef notyet KASSERT(diff >= 0 || rusage_is_reclaimable(resource), ("rusage_set: usage of non-reclaimable resource %d dropping", @@ -458,7 +468,7 @@ } } #endif - container_alloc_resource(&p->p_container, resource, diff); + container_alloc_resource(p->p_container, resource, diff); if (diff > 0) rusage_add_cred_locked(p->p_ucred, resource, diff); else if (diff < 0) @@ -499,12 +509,13 @@ * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); - KASSERT(amount >= 0, ("rusage_set: invalid amount for resource %d: %ju", + KASSERT(amount >= 0, ("rusage_set_force: invalid amount for resource %d: %ju", resource, amount)); + KASSERT(p->p_container != NULL, ("rusage_set_force: NULL container for proc %p", p)); mtx_lock(&container_lock); - diff = amount - p->p_container.c_resources[resource]; - container_alloc_resource(&p->p_container, resource, diff); + diff = amount - p->p_container->c_resources[resource]; + container_alloc_resource(p->p_container, resource, diff); if (diff > 0) rusage_add_cred_locked(p->p_ucred, resource, diff); else if (diff < 0) @@ -564,16 +575,17 @@ PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(amount >= 0, ("rusage_sub: invalid amount for resource %d: %ju", resource, amount)); + KASSERT(p->p_container != NULL, ("rusage_sub: NULL container for proc %p", p)); KASSERT(rusage_is_reclaimable(resource), ("rusage_sub: called for non-reclaimable resource %d", resource)); mtx_lock(&container_lock); - KASSERT(amount <= p->p_container.c_resources[resource], + KASSERT(amount <= p->p_container->c_resources[resource], ("rusage_sub: freeing %ju of resource %d, which is more than allocated " "%ld for %s (pid %d)", amount, resource, - p->p_container.c_resources[resource], p->p_comm, p->p_pid)); + p->p_container->c_resources[resource], p->p_comm, p->p_pid)); - container_alloc_resource(&p->p_container, resource, -amount); + container_alloc_resource(p->p_container, resource, -amount); rusage_sub_cred_locked(p->p_ucred, resource, amount); mtx_unlock(&container_lock); } @@ -592,10 +604,10 @@ ("rusage_sub_cred: called for non-reclaimable resource %d", resource)); #endif - container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, -amount); + container_alloc_resource(cred->cr_ruidinfo->ui_container, resource, -amount); for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) - container_alloc_resource(&pr->pr_container, resource, -amount); - container_alloc_resource(&cred->cr_loginclass->lc_container, resource, -amount); + container_alloc_resource(pr->pr_container, resource, -amount); + container_alloc_resource(cred->cr_loginclass->lc_container, resource, -amount); } /* @@ -620,6 +632,11 @@ int i, error = 0; /* + * Create container for the child process. + */ + container_create(&child->p_container); + + /* * No resource accounting for kernel processes. */ if (child->p_flag & P_SYSTEM) @@ -629,49 +646,46 @@ PROC_LOCK(child); mtx_lock(&container_lock); - /* - * Create container for the child process. - */ - bzero(&child->p_container, sizeof(child->p_container)); - container_create(&child->p_container); + if (parent->p_container != NULL) { + /* + * Inherit resource usage. + */ + for (i = 0; i <= RUSAGE_MAX; i++) { + if (parent->p_container->c_resources[i] == 0 || + !rusage_is_inheritable(i)) + continue; - /* - * Inherit resource usage. - */ - for (i = 0; i <= RUSAGE_MAX; i++) { - if (parent->p_container.c_resources[i] == 0 || - !rusage_is_inheritable(i)) - continue; - - error = rusage_set_locked(child, i, parent->p_container.c_resources[i]); - if (error != 0) { - /* - * XXX: The only purpose of these two lines is to prevent from - * tripping checks in container_destroy(). - */ - for (i = 0; i <= RUSAGE_MAX; i++) - rusage_set_locked(child, i, 0); - container_destroy_locked(&child->p_container); - goto out; + error = rusage_set_locked(child, i, parent->p_container->c_resources[i]); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is to prevent from + * tripping checks in container_destroy(). + */ + for (i = 0; i <= RUSAGE_MAX; i++) + rusage_set_locked(child, i, 0); + goto out; + } } + } else { + KASSERT(parent->p_flag & P_SYSTEM, + ("non-system process without container; p = %p", parent)); } -out: #ifdef RCTL - if (error == 0) { - error = rctl_proc_fork(parent, child); - if (error != 0) { - /* - * XXX: The only purpose of these two lines is to prevent from - * tripping checks in container_destroy(). - */ - for (i = 0; i <= RUSAGE_MAX; i++) - rusage_set_locked(child, i, 0); - container_destroy_locked(&child->p_container); - } + error = rctl_proc_fork(parent, child); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is to prevent from + * tripping checks in container_destroy(). + */ + for (i = 0; i <= RUSAGE_MAX; i++) + rusage_set_locked(child, i, 0); } #endif +out: + if (error != 0) + container_destroy_locked(&child->p_container); mtx_unlock(&container_lock); PROC_UNLOCK(child); PROC_UNLOCK(parent); @@ -737,18 +751,18 @@ mtx_lock(&container_lock); if (newuip != olduip) { - container_sub(&olduip->ui_container, &p->p_container); - container_add(&newuip->ui_container, &p->p_container); + container_sub(olduip->ui_container, p->p_container); + container_add(newuip->ui_container, p->p_container); } if (newlc != oldlc) { - container_sub(&oldlc->lc_container, &p->p_container); - container_add(&newlc->lc_container, &p->p_container); + container_sub(oldlc->lc_container, p->p_container); + container_add(newlc->lc_container, p->p_container); } if (newpr != oldpr) { for (pr = oldpr; pr != NULL; pr = pr->pr_parent) - container_sub(&pr->pr_container, &p->p_container); + container_sub(pr->pr_container, p->p_container); for (pr = newpr; pr != NULL; pr = pr->pr_parent) - container_add(&pr->pr_container, &p->p_container); + container_add(pr->pr_container, p->p_container); } mtx_unlock(&container_lock); @@ -833,6 +847,11 @@ timevalsub(&wallclock, &p->p_stats->p_start); pctcpu_limit = rusage_get_available(p, RUSAGE_PCTCPU); PROC_LOCK(p); + if (p->p_flag & P_SYSTEM) { + PROC_UNLOCK(p); + continue; + } + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); @@ -856,7 +875,7 @@ rusage_throttle(p, 0); mtx_lock(&container_lock); rusage_set_locked(p, RUSAGE_CPU, runtime); - p->p_container.c_resources[RUSAGE_PCTCPU] = 0; + p->p_container->c_resources[RUSAGE_PCTCPU] = 0; rusage_set_locked(p, RUSAGE_PCTCPU, pctcpu); rusage_set_locked(p, RUSAGE_WALLCLOCK, wallclock.tv_sec * 1000000 + wallclock.tv_usec); @@ -873,7 +892,7 @@ containerd, NULL }; -SYSINIT(containerd, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &containerd_kp); +SYSINIT(containerd, SI_SUB_CONTAINERD, SI_ORDER_FIRST, kproc_start, &containerd_kp); static void container_proc_fork_sched(void *arg __unused, struct proc *p1, @@ -894,10 +913,16 @@ container_init(void) { + container_zone = uma_zcreate("container", sizeof(struct container), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); EVENTHANDLER_REGISTER(process_fork, container_proc_fork_sched, NULL, EVENTHANDLER_PRI_ANY); + /* + * XXX: Move this somewhere. + */ + container_create(&prison0.pr_container); } -SYSINIT(container, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, container_init, NULL); +SYSINIT(container, SI_SUB_CONTAINER, SI_ORDER_FIRST, container_init, NULL); #else /* !CONTAINERS */ @@ -945,12 +970,12 @@ } void -container_create(struct container *container) +container_create(struct container **containerp) { } void -container_destroy(struct container *container) +container_destroy(struct container **containerp) { } ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_descrip.c#20 (text+ko) ==== @@ -45,6 +45,7 @@ #include <sys/systm.h> #include <sys/conf.h> +#include <sys/container.h> #include <sys/domain.h> #include <sys/fcntl.h> #include <sys/file.h> ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_jail.c#33 (text+ko) ==== @@ -38,6 +38,7 @@ #include <sys/types.h> #include <sys/kernel.h> #include <sys/systm.h> +#include <sys/container.h> #include <sys/errno.h> #include <sys/sysproto.h> #include <sys/malloc.h> @@ -4270,7 +4271,7 @@ sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) - (callback)(&pr->pr_container, arg2, arg3); + (callback)(pr->pr_container, arg2, arg3); sx_sunlock(&allprison_lock); } ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_loginclass.c#28 (text+ko) ==== @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> +#include <sys/container.h> #include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/lock.h> @@ -116,6 +117,7 @@ ("loginclass_find: got too long name")); newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK); + container_create(&newlc->lc_container); mtx_lock(&loginclasses_lock); LIST_FOREACH(lc, &loginclasses, lc_next) { @@ -125,12 +127,12 @@ /* Found loginclass with a matching name? */ loginclass_acquire(lc); mtx_unlock(&loginclasses_lock); + container_destroy(&newlc->lc_container); free(newlc, M_LOGINCLASS); return (lc); } /* Add new loginclass. */ - container_create(&newlc->lc_container); strcpy(newlc->lc_name, name); refcount_init(&newlc->lc_refcount, 1); LIST_INSERT_HEAD(&loginclasses, newlc, lc_next); @@ -222,7 +224,7 @@ mtx_lock(&loginclasses_lock); LIST_FOREACH(lc, &loginclasses, lc_next) - (callback)(&lc->lc_container, arg2, arg3); + (callback)(lc->lc_container, arg2, arg3); mtx_unlock(&loginclasses_lock); } ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#18 (text+ko) ==== @@ -135,7 +135,7 @@ { NULL, -1 }}; static void rctl_init(void); -SYSINIT(rctl, SI_SUB_CPU, SI_ORDER_FIRST, rctl_init, NULL); +SYSINIT(rctl, SI_SUB_CONTAINER, SI_ORDER_FIRST, rctl_init, NULL); static uma_zone_t rctl_rule_link_zone; static uma_zone_t rctl_rule_zone; @@ -203,19 +203,19 @@ switch (rule->rr_per) { case RCTL_SUBJECT_TYPE_PROCESS: available = rule->rr_amount - - p->p_container.c_resources[resource]; + p->p_container->c_resources[resource]; break; case RCTL_SUBJECT_TYPE_USER: available = rule->rr_amount - - cred->cr_ruidinfo->ui_container.c_resources[resource]; + cred->cr_ruidinfo->ui_container->c_resources[resource]; break; case RCTL_SUBJECT_TYPE_LOGINCLASS: available = rule->rr_amount - - cred->cr_loginclass->lc_container.c_resources[resource]; + cred->cr_loginclass->lc_container->c_resources[resource]; break; case RCTL_SUBJECT_TYPE_JAIL: available = rule->rr_amount - - cred->cr_prison->pr_container.c_resources[resource]; + cred->cr_prison->pr_container->c_resources[resource]; break; default: panic("rctl_compute_available: unknown per %d", @@ -278,7 +278,7 @@ * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; @@ -357,7 +357,7 @@ * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; @@ -387,7 +387,7 @@ * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; @@ -403,7 +403,7 @@ /* * XXX: Think about this _hard_. */ - allocated = p->p_container.c_resources[resource]; + allocated = p->p_container->c_resources[resource]; if (minavailable < INT64_MAX - allocated) minavailable += allocated; if (minavailable < 0) @@ -923,7 +923,7 @@ case RCTL_SUBJECT_TYPE_PROCESS: p = rule->rr_subject.rs_proc; KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); - rctl_container_add_rule(&p->p_container, rule); + rctl_container_add_rule(p->p_container, rule); /* * In case of per-process rule, we don't have anything more * to do. @@ -933,19 +933,19 @@ case RCTL_SUBJECT_TYPE_USER: uip = rule->rr_subject.rs_uip; KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); - rctl_container_add_rule(&uip->ui_container, rule); + rctl_container_add_rule(uip->ui_container, rule); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: lc = rule->rr_subject.hr_loginclass; KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); - rctl_container_add_rule(&lc->lc_container, rule); + rctl_container_add_rule(lc->lc_container, rule); break; case RCTL_SUBJECT_TYPE_JAIL: pr = rule->rr_subject.rs_prison; KASSERT(pr != NULL, ("rctl_rule_add: NULL pr")); - rctl_container_add_rule(&pr->pr_container, rule); + rctl_container_add_rule(pr->pr_container, rule); break; default: @@ -986,7 +986,7 @@ rule->rr_subject_type); } - rctl_container_add_rule(&p->p_container, rule); + rctl_container_add_rule(p->p_container, rule); } return (0); @@ -1018,7 +1018,7 @@ filter->rr_subject.rs_proc != NULL) { p = filter->rr_subject.rs_proc; rw_wlock(&rctl_lock); - found = rctl_container_remove_rules(&p->p_container, filter); + found = rctl_container_remove_rules(p->p_container, filter); rw_wunlock(&rctl_lock); if (found) return (0); @@ -1035,7 +1035,7 @@ sx_assert(&allproc_lock, SA_LOCKED); rw_wlock(&rctl_lock); FOREACH_PROC_IN_SYSTEM(p) { - found += rctl_container_remove_rules(&p->p_container, filter); + found += rctl_container_remove_rules(p->p_container, filter); } rw_wunlock(&rctl_lock); @@ -1195,7 +1195,7 @@ error = EINVAL; goto out; } - outputsbuf = rctl_container_to_sbuf(&p->p_container, 0); + outputsbuf = rctl_container_to_sbuf(p->p_container, 0); break; case RCTL_SUBJECT_TYPE_USER: uip = filter->rr_subject.rs_uip; @@ -1203,7 +1203,7 @@ error = EINVAL; goto out; } - outputsbuf = rctl_container_to_sbuf(&uip->ui_container, 1); + outputsbuf = rctl_container_to_sbuf(uip->ui_container, 1); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: lc = filter->rr_subject.hr_loginclass; @@ -1211,7 +1211,7 @@ error = EINVAL; goto out; } - outputsbuf = rctl_container_to_sbuf(&lc->lc_container, 1); + outputsbuf = rctl_container_to_sbuf(lc->lc_container, 1); break; case RCTL_SUBJECT_TYPE_JAIL: pr = filter->rr_subject.rs_prison; @@ -1219,7 +1219,7 @@ error = EINVAL; goto out; } - outputsbuf = rctl_container_to_sbuf(&pr->pr_container, 1); + outputsbuf = rctl_container_to_sbuf(pr->pr_container, 1); break; default: error = EINVAL; @@ -1283,7 +1283,7 @@ sx_assert(&allproc_lock, SA_LOCKED); FOREACH_PROC_IN_SYSTEM(p) { rw_rlock(&rctl_lock); - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { /* * Non-process rules will be added to the buffer later. * Adding them here would result in duplicated output. @@ -1366,7 +1366,7 @@ KASSERT(sb != NULL, ("sbuf_new failed")); rw_rlock(&rctl_lock); - LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_container->c_rule_links, rrl_next) { rctl_rule_to_sbuf(sb, link->rrl_rule); sbuf_printf(sb, ","); } @@ -1498,16 +1498,16 @@ */ rulecnt = 0; rw_rlock(&rctl_lock); - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) rulecnt++; } - LIST_FOREACH(link, &newuip->ui_container.c_rule_links, rrl_next) + LIST_FOREACH(link, &newuip->ui_container->c_rule_links, rrl_next) rulecnt++; - LIST_FOREACH(link, &newlc->lc_container.c_rule_links, rrl_next) + LIST_FOREACH(link, &newlc->lc_container->c_rule_links, rrl_next) rulecnt++; - LIST_FOREACH(link, &newpr->pr_container.c_rule_links, rrl_next) + LIST_FOREACH(link, &newpr->pr_container->c_rule_links, rrl_next) rulecnt++; rw_runlock(&rctl_lock); @@ -1527,7 +1527,7 @@ * Assign rules to the newly allocated list entries. */ rw_wlock(&rctl_lock); - LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) { if (newlink == NULL) @@ -1539,7 +1539,7 @@ } } - LIST_FOREACH(link, &newuip->ui_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &newuip->ui_container->c_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); @@ -1548,7 +1548,7 @@ rulecnt--; } - LIST_FOREACH(link, &newlc->lc_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &newlc->lc_container->c_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); @@ -1557,7 +1557,7 @@ rulecnt--; } - LIST_FOREACH(link, &newpr->pr_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &newpr->pr_container->c_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); @@ -1570,8 +1570,8 @@ /* * Free the old rule list. */ - while (!LIST_EMPTY(&p->p_container.c_rule_links)) { - link = LIST_FIRST(&p->p_container.c_rule_links); + while (!LIST_EMPTY(&p->p_container->c_rule_links)) { + link = LIST_FIRST(&p->p_container->c_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); @@ -1586,7 +1586,7 @@ while (!LIST_EMPTY(&newrules)) { newlink = LIST_FIRST(&newrules); LIST_REMOVE(newlink, rrl_next); - LIST_INSERT_HEAD(&p->p_container.c_rule_links, + LIST_INSERT_HEAD(&p->p_container->c_rule_links, newlink, rrl_next); } @@ -1623,12 +1623,23 @@ struct rctl_rule_link *link; struct rctl_rule *rule; + LIST_INIT(&child->p_container->c_rule_links); + /* * No limits for kernel processes. */ if (child->p_flag & P_SYSTEM) return (0); + /* + * Nothing to inherit from P_SYSTEM parents. + */ + if (parent->p_container == NULL) { + KASSERT(parent->p_flag & P_SYSTEM, + ("non-system process without container; p = %p", parent)); + return (0); + } + rw_wlock(&rctl_lock); /* @@ -1636,7 +1647,7 @@ * Rules with 'process' subject have to be duplicated in order to make their * rr_subject point to the new process. */ - LIST_FOREACH(link, &parent->p_container.c_rule_links, rrl_next) { + LIST_FOREACH(link, &parent->p_container->c_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) { rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); if (rule == NULL) @@ -1644,12 +1655,12 @@ KASSERT(rule->rr_subject.rs_proc == parent, ("rule->rr_subject.rs_proc == parent")); rule->rr_subject.rs_proc = child; - error = rctl_container_add_rule_locked(&child->p_container, rule); + error = rctl_container_add_rule_locked(child->p_container, rule); rctl_rule_release(rule); if (error != 0) goto fail; } else { - error = rctl_container_add_rule_locked(&child->p_container, link->rrl_rule); + error = rctl_container_add_rule_locked(child->p_container, link->rrl_rule); if (error != 0) goto fail; } @@ -1659,8 +1670,8 @@ return (0); fail: - while (!LIST_EMPTY(&child->p_container.c_rule_links)) { - link = LIST_FIRST(&child->p_container.c_rule_links); + while (!LIST_EMPTY(&child->p_container->c_rule_links)) { + link = LIST_FIRST(&child->p_container->c_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); @@ -1678,8 +1689,8 @@ struct rctl_rule_link *link; rw_wlock(&rctl_lock); - while (!LIST_EMPTY(&p->p_container.c_rule_links)) { - link = LIST_FIRST(&p->p_container.c_rule_links); + while (!LIST_EMPTY(&p->p_container->c_rule_links)) { + link = LIST_FIRST(&p->p_container->c_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#60 (text+ko) ==== @@ -1205,6 +1205,7 @@ if (uip == NULL) { rw_runlock(&uihashtbl_lock); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); + container_create(&uip->ui_container); rw_wlock(&uihashtbl_lock); /* * There's a chance someone created our uidinfo while we @@ -1213,6 +1214,7 @@ */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ + container_destroy(&uip->ui_container); free(uip, M_UIDINFO); uip = old_uip; } else { @@ -1221,7 +1223,6 @@ mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); - container_create(&uip->ui_container); } } uihold(uip); @@ -1302,7 +1303,7 @@ rw_rlock(&uihashtbl_lock); for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) { LIST_FOREACH(uip, uih, ui_hash) { - (callback)(&uip->ui_container, arg2, arg3); + (callback)(uip->ui_container, arg2, arg3); } } rw_runlock(&uihashtbl_lock); ==== //depot/projects/soc2009/trasz_limits/sys/kern/vfs_vnops.c#22 (text+ko) ==== @@ -39,6 +39,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/container.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/kdb.h> ==== //depot/projects/soc2009/trasz_limits/sys/sys/container.h#25 (text+ko) ==== @@ -104,8 +104,8 @@ uint64_t rusage_get_limit(struct proc *p, int resource); uint64_t rusage_get_available(struct proc *p, int resource); -void container_create(struct container *container); -void container_destroy(struct container *container); +void container_create(struct container **containerp); +void container_destroy(struct container **containerp); int container_proc_fork(struct proc *parent, struct proc *child); void container_proc_exit(struct proc *p); ==== //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#20 (text+ko) ==== @@ -30,8 +30,6 @@ #ifndef _SYS_JAIL_H_ #define _SYS_JAIL_H_ -#include <sys/container.h> - #ifdef _KERNEL struct jail_v0 { u_int32_t version; @@ -137,6 +135,8 @@ #define HOSTUUIDLEN 64 +struct container; + /* * This structure describes a prison. It is pointed to by all struct * ucreds's of the inmates. pr_ref keeps track of them and is used to @@ -181,7 +181,7 @@ char pr_hostname[MAXHOSTNAMELEN]; /* (p) jail hostname */ char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */ char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */ - struct container pr_container; /* (*) resource accounting */ + struct container *pr_container; /* (c) resource accounting */ }; #endif /* _KERNEL || _WANT_PRISON */ ==== //depot/projects/soc2009/trasz_limits/sys/sys/kernel.h#7 (text+ko) ==== @@ -109,6 +109,7 @@ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ + SI_SUB_CONTAINER = 0x2110000, /* resource accounting */ SI_SUB_RANDOM = 0x2120000, /* random number generator */ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ @@ -169,6 +170,7 @@ SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ SI_SUB_SMP = 0xf000000, /* start the APs*/ + SI_SUB_CONTAINERD = 0xf100000, /* start containerd*/ SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/ }; ==== //depot/projects/soc2009/trasz_limits/sys/sys/loginclass.h#13 (text+ko) ==== @@ -27,7 +27,7 @@ #ifndef _SYS_LOGINCLASS_H_ #define _SYS_LOGINCLASS_H_ -#include <sys/container.h> +struct container; /* * Exactly one of these structures exists per login class. @@ -36,7 +36,7 @@ LIST_ENTRY(loginclass) lc_next; char lc_name[MAXLOGNAME]; u_int lc_refcount; - struct container lc_container; + struct container *lc_container; }; void loginclass_acquire(struct loginclass *lc); ==== //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#30 (text+ko) ==== @@ -44,7 +44,6 @@ #ifndef _KERNEL #include <sys/filedesc.h> #endif -#include <sys/container.h> #include <sys/queue.h> #include <sys/_lock.h> #include <sys/lock_profile.h> @@ -158,6 +157,7 @@ * either lock is sufficient for read access, but both locks must be held * for write access. */ +struct container; struct kaudit_record; struct td_sched; struct nlminfo; @@ -526,7 +526,6 @@ int p_boundary_count;/* (c) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ - uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ /* End area that is zeroed on creation. */ #define p_endzero p_magic @@ -560,7 +559,8 @@ LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/ struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */ struct cv p_pwait; /* (*) wait cv for exit/exec */ - struct container p_container; /* (*) Resource usage accounting. */ + uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ + struct container *p_container; /* (b) Resource usage accounting. */ }; #define p_session p_pgrp->pg_session ==== //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#25 (text+ko) ==== @@ -38,7 +38,6 @@ #ifdef _KERNEL #include <sys/_lock.h> #include <sys/_mutex.h> -#include <sys/container.h> #endif /* @@ -80,6 +79,8 @@ int pl_refcnt; /* number of references */ }; +struct container; + /*- * Per uid resource consumption. This structure is used to track * the total resource consumption (process count, socket buffer size, @@ -100,7 +101,7 @@ long ui_ptscnt; /* (b) number of pseudo-terminals */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ - struct container ui_container; /* (*) resource usage accounting */ + struct container *ui_container; /* (a) resource usage accounting */ }; >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201101272111.p0RLBFiG093232>