From owner-p4-projects@FreeBSD.ORG Wed Jan 19 18:22:50 2011 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 5B00A1065673; Wed, 19 Jan 2011 18:22:50 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 1D96D106566B for ; Wed, 19 Jan 2011 18:22:50 +0000 (UTC) (envelope-from trasz@freebsd.org) Received: from skunkworks.freebsd.org (skunkworks.freebsd.org [IPv6:2001:4f8:fff6::2d]) by mx1.freebsd.org (Postfix) with ESMTP id 0A7388FC1D for ; Wed, 19 Jan 2011 18:22:50 +0000 (UTC) Received: from skunkworks.freebsd.org (localhost [127.0.0.1]) by skunkworks.freebsd.org (8.14.4/8.14.4) with ESMTP id p0JIMnsK001907 for ; Wed, 19 Jan 2011 18:22:49 GMT (envelope-from trasz@freebsd.org) Received: (from perforce@localhost) by skunkworks.freebsd.org (8.14.4/8.14.4/Submit) id p0JIMnD6001904 for perforce@freebsd.org; Wed, 19 Jan 2011 18:22:49 GMT (envelope-from trasz@freebsd.org) Date: Wed, 19 Jan 2011 18:22:49 GMT Message-Id: <201101191822.p0JIMnD6001904@skunkworks.freebsd.org> X-Authentication-Warning: skunkworks.freebsd.org: perforce set sender to trasz@freebsd.org using -f From: Edward Tomasz Napierala To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 187961 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 19 Jan 2011 18:22:50 -0000 http://p4web.freebsd.org/@@187961?ac=10 Change 187961 by trasz@trasz_victim on 2011/01/19 18:22:01 Improve RSS enforcement. Affected files ... .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#56 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#12 edit .. //depot/projects/soc2009/trasz_limits/sys/kern/sysv_sem.c#10 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/container.h#22 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/rctl.h#6 edit .. //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#23 edit Differences ... ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#56 (text+ko) ==== @@ -477,6 +477,23 @@ } /* + * Returns amount of 'resource' the process 'p' can keep allocated. + * Allocating more than that would be denied, unless the resource + * is marked undeniable. Amount of already allocated resource does + * matter. + */ +uint64_t +rusage_get_available(struct proc *p, int resource) +{ + +#ifdef RCTL + return (rctl_get_available(p, resource)); +#else + return (UINT64_MAX); +#endif +} + +/* * Decrease allocation of 'resource' by 'amount' for process 'p'. */ void @@ -616,6 +633,7 @@ rusage_set(p, RUSAGE_FSIZE, 0); rusage_set(p, RUSAGE_NPTS, 0); rusage_set(p, RUSAGE_NTHR, 0); + rusage_set(p, RUSAGE_RSS, 0); #ifdef RCTL rctl_proc_exit(p); ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#12 (text+ko) ==== @@ -389,6 +389,45 @@ return (amount); } +uint64_t +rctl_get_available(struct proc *p, int resource) +{ + struct rctl_rule *rule; + struct rctl_rule_link *link; + int64_t available, minavailable, allocated; + + minavailable = INT64_MAX; + + rw_rlock(&rctl_lock); + + /* + * There may be more than one matching rule; go through all of them. + * Denial should be done last, after logging and sending signals. + */ + LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) { + rule = link->rrl_rule; + if (rule->rr_resource != resource) + continue; + if (rule->rr_action != RCTL_ACTION_DENY) + continue; + available = rctl_available_resource(p, rule); + if (available < minavailable) + minavailable = available; + } + + rw_runlock(&rctl_lock); + + /* + * XXX: Think about this _hard_. + */ + allocated = p->p_container.c_resources[resource]; + if (minavailable < INT64_MAX - allocated) + minavailable += allocated; + if (minavailable < 0) + minavailable = 0; + return (minavailable); +} + static int rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) { ==== //depot/projects/soc2009/trasz_limits/sys/kern/sysv_sem.c#10 (text+ko) ==== @@ -1018,7 +1018,7 @@ DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm, nsops)); return (E2BIG); - } else if (nsops > rusage_get_limit(td->td_proc, RUSAGE_NSEMOP)) { + } else if (nsops > rusage_get_available(td->td_proc, RUSAGE_NSEMOP)) { return (E2BIG); } else sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK); ==== //depot/projects/soc2009/trasz_limits/sys/sys/container.h#22 (text+ko) ==== @@ -102,6 +102,7 @@ void rusage_sub(struct proc *p, int resource, uint64_t amount); void rusage_sub_cred(struct ucred *cred, int resource, uint64_t amount); uint64_t rusage_get_limit(struct proc *p, int resource); +uint64_t rusage_get_available(struct proc *p, int resource); void container_create(struct container *container); void container_destroy(struct container *container); ==== //depot/projects/soc2009/trasz_limits/sys/sys/rctl.h#6 (text+ko) ==== @@ -121,6 +121,7 @@ int rctl_enforce(struct proc *p, int resource, uint64_t amount); uint64_t rctl_get_limit(struct proc *p, int resource); +uint64_t rctl_get_available(struct proc *p, int resource); const char *rctl_resource_name(int resource); int rctl_proc_fork(struct proc *parent, struct proc *child); void rctl_proc_exit(struct proc *p); ==== //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#23 (text+ko) ==== @@ -1631,8 +1631,8 @@ struct proc *p; struct thread *td; struct vmspace *vm; - int breakout, swapout_flags; - uint64_t maxsize; + int breakout, swapout_flags, tryagain; + uint64_t rsize, ravailable; while (TRUE) { mtx_lock(&vm_daemon_mtx); @@ -1651,6 +1651,8 @@ * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ +again: + tryagain = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; @@ -1708,29 +1710,39 @@ vm_pageout_map_deactivate_pages( &vm->vm_map, limit); } - rusage_set(p, RUSAGE_RSS, IDX_TO_OFF(size)); - maxsize = OFF_TO_IDX(rusage_get_limit(p, RUSAGE_RSS)); - if (size > maxsize) { + rsize = IDX_TO_OFF(size); + rusage_set(p, RUSAGE_RSS, rsize); + ravailable = rusage_get_available(p, RUSAGE_RSS); + if (rsize > ravailable) { /* * Don't be overly aggressive; this might be * an innocent process, and the limit could've * been exceeded by some memory hog. Don't - * try to deactivate more than half of process' + * try to deactivate more than 1/4th of process' * resident set size. * * XXX: Reconsider. */ - if (maxsize < size / 2) - maxsize = size / 2; + if (ravailable < rsize - (rsize / 4)) + ravailable = rsize - (rsize / 4); vm_pageout_map_deactivate_pages( - &vm->vm_map, maxsize); + &vm->vm_map, OFF_TO_IDX(ravailable)); /* Update RSS usage after paging out. */ size = vmspace_resident_count(vm); - rusage_set(p, RUSAGE_RSS, IDX_TO_OFF(size)); + rsize = IDX_TO_OFF(size); + rusage_set(p, RUSAGE_RSS, rsize); + if (rsize > ravailable) + tryagain++; + if (tryagain > 20) { + printf("still too much: rsize = %ld, ravailable = %zd\n", rsize, ravailable); + tryagain = 0; + } } vmspace_free(vm); } sx_sunlock(&allproc_lock); + if (tryagain != 0) + goto again; } } #endif /* !defined(NO_SWAPPING) */