Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 03 May 2010 20:13:00 +0000
From:      Alexander Krizhanovsky <ak@natsys-lab.com>
To:        Kostik Belousov <kostikbel@gmail.com>
Cc:        freebsd-hackers@freebsd.org, ur4ina@gmail.com
Subject:   Re: [PATCH] RUSAGE_THREAD
Message-ID:  <4BDF2E4C.4090009@natsys-lab.com>
In-Reply-To: <20100425145929.GJ2422@deviant.kiev.zoral.com.ua>
References:  <4BCBA7F8.3060109@natsys-lab.com> <20100425145929.GJ2422@deviant.kiev.zoral.com.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------090003080800020009070304
Content-Type: text/plain; charset=KOI8-R; format=flowed
Content-Transfer-Encoding: 7bit

Kostik,

thank you very much for the review!

Kostik Belousov wrote:
> On Mon, Apr 19, 2010 at 12:46:48AM +0000, Alexander Krizhanovsky wrote:
>   
>> Hi all!
>>
>> This patch implements per-thread rusage statistic (like RUSAGE_THREAD in 
>> Linux and RUSAGE_LWP in OpenSolaris).
>>
>> Unfortunately, we have to acquire a number of locks to read/update 
>> system and user times for current thread rusage information because it's 
>> also used for whole process statistic and needs to be zeroed.
>>
>> Any comments are very appreciated.
>>
>> It's tested against 8.0-RELEASE. Appropriate PR is submitted.
>>
>> -- 
>> Alexander Krizhanovsky
>> NatSys Lab. (http://natsys-lab.com)
>> tel: +7 (916) 717-3899, +7 (499) 747-6304
>> e-mail: ak@natsys-lab.com
>>
>>     
> I think this should be somewhat modified before commit.
>
> First, please separate patch into two pieces. One would introduce
> ruxagg_tlock() helper and use it in existing locations instead of
> three-liners. Possibly, add K&R->ANSI C kern_getrusage definition
> change.
>
> Second would actually add RUSAGE_THREAD. There, please follow
> the style(9), in particular, do not initialize the local variables
> at the declaration, instead, use explicit initialization in the code.
>   
Done. I have also introduced third patch for casting microseconds to 
timeval and replaced a few appropriate places in whole kernel code.
patch-rusage-thread-03052010.txt is incremental for 
patch-ruxagg_tlock-03052010.txt, which is by turn incremental for 
patch-usec2timeval-03052010.txt.

I have made following change for calcru1():
-       if ((int64_t)tu < 0) {
-               /* XXX: this should be an assert /phk */
-               printf("calcru: negative runtime of %jd usec for pid %d 
(%s)\n",
-                   (intmax_t)tu, p->p_pid, p->p_comm);
-               tu = ruxp->rux_tu;
-       }
+       KASSERT((int64_t)tu < 0, ("calcru: negative runtime of %jd usec "
+                               "for pid %d (%s)\n",
+                               (intmax_t)tu, p->p_pid, p->p_comm));

tu can become negative at about 300 thousand years of uptime, so this 
condition seems quite unrealistic and indeed should be replaced by an 
assertion. However I didn't understand why it isn't done so far and the 
comment only was added. Did I miss something?

> Should calctru() share the code with calcru1() ? I am esp. concerned
> with sanity check like negative runtime etc. Possibly, this code
> from calcru1() should be separated into function used from both
> calcru1() and calctru().
>
> Man page for getrusage(2) should be updated.
>   
It's added to patch-rusage-thread-03052010.txt.

In the end - shoud I raise a new PR (the original one is kern/145813)?

> Thanks for the submission.
>
>   
>> --- sys/sys/resource.h.orig	2009-10-25 01:10:29.000000000 +0000
>> +++ sys/sys/resource.h	2010-04-11 23:31:14.000000000 +0000
>> @@ -56,6 +56,7 @@
>>  
>>  #define	RUSAGE_SELF	0
>>  #define	RUSAGE_CHILDREN	-1
>> +#define	RUSAGE_THREAD	1
>>  
>>  struct rusage {
>>  	struct timeval ru_utime;	/* user time used */
>> --- sys/kern/kern_resource.c.orig	2009-10-25 01:10:29.000000000 +0000
>> +++ sys/kern/kern_resource.c	2010-04-18 23:49:04.000000000 +0000
>> @@ -76,6 +76,7 @@ static void	calcru1(struct proc *p, stru
>>  		    struct timeval *up, struct timeval *sp);
>>  static int	donice(struct thread *td, struct proc *chgp, int n);
>>  static struct uidinfo *uilookup(uid_t uid);
>> +static void ruxagg_tlock(struct proc *p, struct thread *td);
>>  
>>  /*
>>   * Resource controls and accounting.
>> @@ -623,9 +624,7 @@ lim_cb(void *arg)
>>  		return;
>>  	PROC_SLOCK(p);
>>  	FOREACH_THREAD_IN_PROC(p, td) {
>> -		thread_lock(td);
>> -		ruxagg(&p->p_rux, td);
>> -		thread_unlock(td);
>> +		ruxagg_tlock(p, td);
>>  	}
>>  	PROC_SUNLOCK(p);
>>  	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
>> @@ -836,9 +835,7 @@ calcru(struct proc *p, struct timeval *u
>>  	FOREACH_THREAD_IN_PROC(p, td) {
>>  		if (td->td_incruntime == 0)
>>  			continue;
>> -		thread_lock(td);
>> -		ruxagg(&p->p_rux, td);
>> -		thread_unlock(td);
>> +		ruxagg_tlock(p, td);
>>  	}
>>  	calcru1(p, &p->p_rux, up, sp);
>>  }
>> @@ -918,6 +915,29 @@ calcru1(struct proc *p, struct rusage_ex
>>  	sp->tv_usec = su % 1000000;
>>  }
>>  
>> +static void
>> +calctru(struct thread *td)
>> +{
>> +	u_int64_t tu = cputick2usec(td->td_incruntime);
>> +	u_int64_t ut = td->td_uticks;
>> +	u_int64_t it = td->td_iticks;
>> +	u_int64_t st = td->td_sticks;
>> +	u_int64_t tt, uu, su;
>> +
>> +	tt = ut + st + it;
>> +	if (!tt) {
>> +		/* Avoid divide by zero */
>> +		st = 1;
>> +		tt = 1;
>> +	}
>> +	uu = td->td_ru.ru_utime.tv_usec + (ut * tu) / tt;
>> +	su = td->td_ru.ru_stime.tv_usec + (st * tu) / tt;
>> +	td->td_ru.ru_utime.tv_sec += uu / 1000000;
>> +	td->td_ru.ru_utime.tv_usec = uu % 1000000;
>> +	td->td_ru.ru_stime.tv_sec += su / 1000000;
>> +	td->td_ru.ru_stime.tv_usec = su % 1000000;
>> +}
>> +
>>  #ifndef _SYS_SYSPROTO_H_
>>  struct getrusage_args {
>>  	int	who;
>> @@ -939,10 +959,7 @@ getrusage(td, uap)
>>  }
>>  
>>  int
>> -kern_getrusage(td, who, rup)
>> -	struct thread *td;
>> -	int who;
>> -	struct rusage *rup;
>> +kern_getrusage(struct thread *td, int who, struct rusage *rup)
>>  {
>>  	struct proc *p;
>>  	int error;
>> @@ -961,6 +978,13 @@ kern_getrusage(td, who, rup)
>>  		calccru(p, &rup->ru_utime, &rup->ru_stime);
>>  		break;
>>  
>> +	case RUSAGE_THREAD:
>> +		PROC_SLOCK(p);
>> +		ruxagg_tlock(p, td);
>> +		PROC_SUNLOCK(p);
>> +		*rup = td->td_ru;
>> +		break;
>> +
>>  	default:
>>  		error = EINVAL;
>>  	}
>> @@ -1010,12 +1034,24 @@ ruxagg(struct rusage_ext *rux, struct th
>>  	rux->rux_uticks += td->td_uticks;
>>  	rux->rux_sticks += td->td_sticks;
>>  	rux->rux_iticks += td->td_iticks;
>> +
>> +	/* update thread rusage before ticks counters cleaning */
>> +	calctru(td);
>> +
>>  	td->td_incruntime = 0;
>>  	td->td_uticks = 0;
>>  	td->td_iticks = 0;
>>  	td->td_sticks = 0;
>>  }
>>  
>> +static void
>> +ruxagg_tlock(struct proc *p, struct thread *td)
>> +{
>> +	thread_lock(td);
>> +	ruxagg(&p->p_rux, td);
>> +	thread_unlock(td);
>> +}
>> +
>>  /*
>>   * Update the rusage_ext structure and fetch a valid aggregate rusage
>>   * for proc p if storage for one is supplied.
>> @@ -1030,9 +1066,7 @@ rufetch(struct proc *p, struct rusage *r
>>  	*ru = p->p_ru;
>>  	if (p->p_numthreads > 0)  {
>>  		FOREACH_THREAD_IN_PROC(p, td) {
>> -			thread_lock(td);
>> -			ruxagg(&p->p_rux, td);
>> -			thread_unlock(td);
>> +			ruxagg_tlock(p, td);
>>  			rucollect(ru, &td->td_ru);
>>  		}
>>  	}
>>     
>
>   
>> _______________________________________________
>> freebsd-hackers@freebsd.org mailing list
>> http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
>> To unsubscribe, send any mail to "freebsd-hackers-unsubscribe@freebsd.org"
>>     


-- 
Alexander Krizhanovsky
NatSys Lab. (http://natsys-lab.com)
tel: +7 (916) 717-3899, +7 (499) 747-6304
e-mail: ak@natsys-lab.com


--------------090003080800020009070304
Content-Type: text/plain;
 name="patch-rusage-thread-03052010.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch-rusage-thread-03052010.txt"

--- src/sys/sys/resource.h.orig	2010-05-02 16:37:26.537274709 +0000
+++ src/sys/sys/resource.h	2010-05-03 19:45:46.357159359 +0000
@@ -56,6 +56,7 @@
 
 #define	RUSAGE_SELF	0
 #define	RUSAGE_CHILDREN	-1
+#define	RUSAGE_THREAD	1
 
 struct rusage {
 	struct timeval ru_utime;	/* user time used */
--- src/sys/kern/kern_resource.c.orig	2010-05-03 19:42:08.474513380 +0000
+++ src/sys/kern/kern_resource.c	2010-05-03 19:45:46.358157251 +0000
@@ -840,29 +840,35 @@ calcru(struct proc *p, struct timeval *u
 	calcru1(p, &p->p_rux, up, sp);
 }
 
+static __inline uint64_t
+calcru_adjust_time(uint64_t it, uint64_t ut, uint64_t *st)
+{
+	uint64_t tt;
+
+	tt = it + ut + *st;
+	if (tt == 0) {
+		/* Avoid divide by zero. */
+		*st = 1;
+		tt = 1;
+	}
+	return (tt);
+}
+
 static void
 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
     struct timeval *sp)
 {
 	/* {user, system, interrupt, total} {ticks, usec}: */
-	u_int64_t ut, uu, st, su, it, tt, tu;
+	uint64_t ut, uu, st, su, it, tt, tu;
 
 	ut = ruxp->rux_uticks;
 	st = ruxp->rux_sticks;
 	it = ruxp->rux_iticks;
-	tt = ut + st + it;
-	if (tt == 0) {
-		/* Avoid divide by zero */
-		st = 1;
-		tt = 1;
-	}
+	tt = calcru_adjust_time(it, ut, &st);
 	tu = cputick2usec(ruxp->rux_runtime);
-	if ((int64_t)tu < 0) {
-		/* XXX: this should be an assert /phk */
-		printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
-		    (intmax_t)tu, p->p_pid, p->p_comm);
-		tu = ruxp->rux_tu;
-	}
+	KASSERT((int64_t)tu < 0, ("calcru: negative runtime of %jd usec "
+				"for pid %d (%s)\n",
+				(intmax_t)tu, p->p_pid, p->p_comm));
 
 	if (tu >= ruxp->rux_tu) {
 		/*
@@ -913,6 +919,25 @@ calcru1(struct proc *p, struct rusage_ex
 	usec2timeval(su, sp);
 }
 
+static void
+calctru(struct thread *td)
+{
+	uint64_t ut, uu, st, su, it, tt, tu;
+
+	ut = td->td_uticks;
+	it = td->td_iticks;
+	st = td->td_sticks;
+	tt = calcru_adjust_time(it, ut, &st);
+	tu = cputick2usec(td->td_incruntime);
+	KASSERT((int64_t)tu < 0, ("calctru: negative runtime of %jd usec "
+				"for tid %d\n", (intmax_t)tu, td->td_tid));
+
+	uu = td->td_ru.ru_utime.tv_usec + (ut * tu) / tt;
+	su = td->td_ru.ru_stime.tv_usec + (st * tu) / tt;
+	usec2timeval(&td->td_ru.ru_utime, uu);
+	usec2timeval(&td->td_ru.ru_stime, su);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct getrusage_args {
 	int	who;
@@ -953,6 +978,13 @@ kern_getrusage(struct thread *td, int wh
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 		break;
 
+	case RUSAGE_THREAD:
+		PROC_SLOCK(p);
+		ruxagg_tlock(p, td);
+		PROC_SUNLOCK(p);
+		*rup = td->td_ru;
+		break;
+
 	default:
 		error = EINVAL;
 	}
@@ -1002,6 +1034,10 @@ ruxagg(struct rusage_ext *rux, struct th
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
 	rux->rux_iticks += td->td_iticks;
+
+	/* Update thread rusage before ticks counters cleaning. */
+	calctru(td);
+
 	td->td_incruntime = 0;
 	td->td_uticks = 0;
 	td->td_iticks = 0;
--- src/lib/libc/sys/getrusage.2.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/lib/libc/sys/getrusage.2	2010-05-03 19:45:46.359155143 +0000
@@ -42,6 +42,7 @@
 .In sys/resource.h
 .Fd "#define	RUSAGE_SELF	 0"
 .Fd "#define	RUSAGE_CHILDREN	-1"
+.Fd "#define	RUSAGE_THREAD	1"
 .Ft int
 .Fn getrusage "int who" "struct rusage *rusage"
 .Sh DESCRIPTION
@@ -49,11 +50,13 @@ The
 .Fn getrusage
 system call
 returns information describing the resources utilized by the current
-process, or all its terminated child processes.
-The
+process, the current thread or all terminated children of the
+current process.
+The corresponding
 .Fa who
 argument is either
-.Dv RUSAGE_SELF
+.Dv RUSAGE_SELF ,
+.Dv RUSAGE_THREAD
 or
 .Dv RUSAGE_CHILDREN .
 The buffer to which

--------------090003080800020009070304
Content-Type: text/plain;
 name="patch-ruxagg_tlock-03052010.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch-ruxagg_tlock-03052010.txt"

--- src/sys/kern/kern_resource.c.orig	2010-05-03 19:31:45.051576945 +0000
+++ src/sys/kern/kern_resource.c	2010-05-03 19:42:08.474513380 +0000
@@ -76,6 +76,7 @@ static void	calcru1(struct proc *p, stru
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
+static void	ruxagg_tlock(struct proc *p, struct thread *td);
 
 /*
  * Resource controls and accounting.
@@ -623,9 +624,7 @@ lim_cb(void *arg)
 		return;
 	PROC_SLOCK(p);
 	FOREACH_THREAD_IN_PROC(p, td) {
-		thread_lock(td);
-		ruxagg(&p->p_rux, td);
-		thread_unlock(td);
+		ruxagg_tlock(p, td);
 	}
 	PROC_SUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
@@ -836,9 +835,7 @@ calcru(struct proc *p, struct timeval *u
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_incruntime == 0)
 			continue;
-		thread_lock(td);
-		ruxagg(&p->p_rux, td);
-		thread_unlock(td);
+		ruxagg_tlock(p, td);
 	}
 	calcru1(p, &p->p_rux, up, sp);
 }
@@ -937,10 +934,7 @@ getrusage(td, uap)
 }
 
 int
-kern_getrusage(td, who, rup)
-	struct thread *td;
-	int who;
-	struct rusage *rup;
+kern_getrusage(struct thread *td, int who, struct rusage *rup)
 {
 	struct proc *p;
 	int error;
@@ -1014,6 +1008,15 @@ ruxagg(struct rusage_ext *rux, struct th
 	td->td_sticks = 0;
 }
 
+static void
+ruxagg_tlock(struct proc *p, struct thread *td)
+{
+
+	thread_lock(td);
+	ruxagg(&p->p_rux, td);
+	thread_unlock(td);
+}
+
 /*
  * Update the rusage_ext structure and fetch a valid aggregate rusage
  * for proc p if storage for one is supplied.
@@ -1028,9 +1031,7 @@ rufetch(struct proc *p, struct rusage *r
 	*ru = p->p_ru;
 	if (p->p_numthreads > 0)  {
 		FOREACH_THREAD_IN_PROC(p, td) {
-			thread_lock(td);
-			ruxagg(&p->p_rux, td);
-			thread_unlock(td);
+			ruxagg_tlock(p, td);
 			rucollect(ru, &td->td_ru);
 		}
 	}

--------------090003080800020009070304
Content-Type: text/plain;
 name="patch-usec2timeval-03052010.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch-usec2timeval-03052010.txt"

--- src/sys/sys/time.h.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/sys/sys/time.h	2010-05-03 19:31:45.008535442 +0000
@@ -178,6 +178,14 @@ timeval2bintime(const struct timeval *tv
 
 /* timevaladd and timevalsub are not inlined */
 
+static __inline void
+usec2timeval(uint64_t msec, struct timeval *tv)
+{
+
+	tv->tv_sec = msec / 1000000;
+	tv->tv_usec = msec % 1000000;
+}
+
 #endif /* _KERNEL */
 
 #ifndef _KERNEL			/* NetBSD/OpenBSD compatible interfaces */
--- src/sys/dev/sound/midi/sequencer.c.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/sys/dev/sound/midi/sequencer.c	2010-05-03 19:31:45.037557566 +0000
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD: src/sys/dev/sound/mi
 #include <sys/kthread.h>
 #include <sys/unistd.h>
 #include <sys/selinfo.h>
+#include <sys/time.h>
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_snd.h"
@@ -364,8 +365,7 @@ timer_wait(struct seq_softc *t, int tick
 
 	i = ticks * 60ull * 1000000ull / (t->tempo * t->timerbase);
 
-	when.tv_sec = i / 1000000;
-	when.tv_usec = i % 1000000;
+	usec2timeval(i, &when);
 
 #if 0
 	printf("timer_wait tempo %d timerbase %d ticks %d abs %d u_sec %llu\n",
--- src/sys/kern/kern_ntptime.c.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/sys/kern/kern_ntptime.c	2010-05-03 19:31:45.048574050 +0000
@@ -952,8 +952,7 @@ kern_adjtime(struct thread *td, struct t
 
 	mtx_lock(&Giant);
 	if (olddelta) {
-		atv.tv_sec = time_adjtime / 1000000;
-		atv.tv_usec = time_adjtime % 1000000;
+		usec2timeval(time_adjtime, &atv);
 		if (atv.tv_usec < 0) {
 			atv.tv_usec += 1000000;
 			atv.tv_sec--;
--- src/sys/kern/kern_resource.c.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/sys/kern/kern_resource.c	2010-05-03 19:31:45.051576945 +0000
@@ -912,10 +912,8 @@ calcru1(struct proc *p, struct rusage_ex
 	ruxp->rux_su = su;
 	ruxp->rux_tu = tu;
 
-	up->tv_sec = uu / 1000000;
-	up->tv_usec = uu % 1000000;
-	sp->tv_sec = su / 1000000;
-	sp->tv_usec = su % 1000000;
+	usec2timeval(uu, up);
+	usec2timeval(su, sp);
 }
 
 #ifndef _SYS_SYSPROTO_H_
--- src/sys/netinet/sctp_timer.c.orig	2009-10-25 01:10:29.000000000 +0000
+++ src/sys/netinet/sctp_timer.c	2010-05-03 19:31:45.058544591 +0000
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD: src/sys/netinet/sctp
 #include <netinet/sctp.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/udp.h>
+#include <sys/time.h>
 
 
 void
@@ -75,8 +76,7 @@ sctp_early_fr_timer(struct sctp_inpcb *i
 		cur_rtt = SCTP_BASE_SYSCTL(sctp_early_fr_msec);
 	}
 	cur_rtt *= 1000;
-	tv.tv_sec = cur_rtt / 1000000;
-	tv.tv_usec = cur_rtt % 1000000;
+	usec2timeval(cur_rtt, &tv);
 	min_wait = now;
 	timevalsub(&min_wait, &tv);
 	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {

--------------090003080800020009070304--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4BDF2E4C.4090009>