From owner-p4-projects@FreeBSD.ORG Mon Dec 27 10:01:19 2010 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id D96231065693; Mon, 27 Dec 2010 10:01:18 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 908641065679 for ; Mon, 27 Dec 2010 10:01:18 +0000 (UTC) (envelope-from trasz@freebsd.org) Received: from skunkworks.freebsd.org (skunkworks.freebsd.org [IPv6:2001:4f8:fff6::2d]) by mx1.freebsd.org (Postfix) with ESMTP id 7D0918FC15 for ; Mon, 27 Dec 2010 10:01:18 +0000 (UTC) Received: from skunkworks.freebsd.org (localhost [127.0.0.1]) by skunkworks.freebsd.org (8.14.4/8.14.4) with ESMTP id oBRA1IZ9094481 for ; Mon, 27 Dec 2010 10:01:18 GMT (envelope-from trasz@freebsd.org) Received: (from perforce@localhost) by skunkworks.freebsd.org (8.14.4/8.14.4/Submit) id oBRA1Iqm094474 for perforce@freebsd.org; Mon, 27 Dec 2010 10:01:18 GMT (envelope-from trasz@freebsd.org) Date: Mon, 27 Dec 2010 10:01:18 GMT Message-Id: <201012271001.oBRA1Iqm094474@skunkworks.freebsd.org> X-Authentication-Warning: skunkworks.freebsd.org: perforce set sender to trasz@freebsd.org using -f From: Edward Tomasz Napierala To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 187209 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 27 Dec 2010 10:01:19 -0000 http://p4web.freebsd.org/@@187209?ac=10 Change 187209 by trasz@trasz_victim on 2010/12/27 10:01:12 Christmas fixes. Affected files ... .. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 edit .. //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 edit .. //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 edit Differences ... ==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 (text+ko) ==== @@ -34,7 +34,9 @@ #include "opt_kdtrace.h" +#include #include +#include #include #include #include @@ -619,35 +621,37 @@ #endif } -/* - * Stuff below runs from a "containerd" kernel process. - */ static void -rusage_throttle(struct thread *td, int throttle) +rusage_throttle(struct proc *p, int throttle) { + struct thread *td; u_char oldpri; u_char newpri; int type; if (throttle) { - td->td_flags |= TDF_THROTTLED; + p->p_flag |= P_THROTTLED; newpri = PRI_MIN_IDLE; type = RTP_PRIO_IDLE; - } else if (td->td_flags & TDF_THROTTLED) { - td->td_flags &= ~TDF_THROTTLED; + } else if (p->p_flag & P_THROTTLED) { + p->p_flag &= ~P_THROTTLED; newpri = PRI_MIN_TIMESHARE; type = RTP_PRIO_NORMAL; } else return; - /* Mostly copied from rtp_to_pri(). */ - sched_class(td, type); /* XXX fix */ - oldpri = td->td_user_pri; - sched_user_prio(td, newpri); - if (TD_IS_RUNNING(td) || TD_CAN_RUN(td)) - sched_prio(td, td->td_user_pri); /* XXX dubious */ - if (TD_ON_UPILOCK(td) && oldpri != newpri) - umtx_pi_adjust(td, oldpri); + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + /* Mostly copied from rtp_to_pri(). */ + sched_class(td, type); /* XXX fix */ + oldpri = td->td_user_pri; + sched_user_prio(td, newpri); + if (TD_IS_RUNNING(td) || TD_CAN_RUN(td)) + sched_prio(td, td->td_user_pri); /* XXX dubious */ + if (TD_ON_UPILOCK(td) && oldpri != newpri) + umtx_pi_adjust(td, oldpri); + thread_unlock(td); + } } static void @@ -663,29 +667,24 @@ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { pctcpu_limit = rusage_get_limit(p, RUSAGE_PCTCPU); + pctcpu = 0; PROC_SLOCK(p); - pctcpu = 0; FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); thread_lock(td); pctcpu += sched_pctcpu(td); - /* - * We are making this decision based on data from - * the previous run. The assumption is that this runs - * so often it doesn't matter. - */ - if (pctcpu > pctcpu_limit) - rusage_throttle(td, 1); - else - rusage_throttle(td, 0); thread_unlock(td); } + pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100; + if (pctcpu > pctcpu_limit) + rusage_throttle(p, 1); + else + rusage_throttle(p, 0); PROC_SUNLOCK(p); rusage_set(p, RUSAGE_CPU, cputick2usec(p->p_rux.rux_runtime)); microuptime(&wallclock); timevalsub(&wallclock, &p->p_stats->p_start); rusage_set(p, RUSAGE_WALLCLOCK, wallclock.tv_sec * 1000000 + wallclock.tv_usec); - pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100; rusage_set(p, RUSAGE_PCTCPU, pctcpu); } sx_sunlock(&allproc_lock); @@ -700,6 +699,30 @@ }; SYSINIT(containerd, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &containerd_kp); +static void +container_proc_fork_sched(void *arg __unused, struct proc *p1, + struct proc *newproc, int flags) +{ + uint64_t pctcpu_limit; + + /* + * Newly created process may already be over the %CPU limit. Throttle + * it immediately after fork instead of waiting for containerd. + */ + pctcpu_limit = rusage_get_limit(newproc, RUSAGE_PCTCPU); + if (pctcpu_limit <= 0) + rusage_throttle(newproc, 1); +} + +static void +container_init(void) +{ + + EVENTHANDLER_REGISTER(process_fork, container_proc_fork_sched, NULL, + EVENTHANDLER_PRI_ANY); +} +SYSINIT(container, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, container_init, NULL); + #else /* !CONTAINERS */ int ==== //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 (text+ko) ==== @@ -355,7 +355,7 @@ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */ -#define TDF_THROTTLED 0x00080000 /* Throttled due to %cpu usage */ +#define TDF_UNUSED19 0x00080000 /* --available-- */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_UNUSED21 0x00200000 /* --available-- */ #define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */ @@ -603,6 +603,7 @@ #define P_INMEM 0x10000000 /* Loaded into memory. */ #define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */ #define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */ +#define P_THROTTLED 0x80000000 /* Throttled due to %cpu usage */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) ==== //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 (text+ko) ==== @@ -1711,6 +1711,17 @@ rusage_set(p, RUSAGE_RSS, IDX_TO_OFF(size)); maxsize = OFF_TO_IDX(rusage_get_limit(p, RUSAGE_RSS)); if (size > maxsize) { + /* + * Don't be overly aggressive; this might be + * an innocent process, and the limit could've + * been exceeded by some memory hog. Don't + * try to deactivate more than half of process' + * resident set size. + * + * XXX: Reconsider. + */ + if (maxsize < size / 2) + maxsize = size / 2; vm_pageout_map_deactivate_pages( &vm->vm_map, maxsize); /* Update RSS usage after paging out. */