Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 27 Sep 2012 08:30:11 GMT
From:      "Jukka A. Ukkonen" <jau@oxit.fi>
To:        freebsd-standards@FreeBSD.org
Subject:   Re: standards/170346: Changes to support waitid() and related stuff
Message-ID:  <201209270830.q8R8UBhL054685@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR standards/170346; it has been noted by GNATS.

From: "Jukka A. Ukkonen" <jau@oxit.fi>
To: bug-followup@FreeBSD.org, jau@iki.fi
Cc:  
Subject: Re: standards/170346: Changes to support waitid() and related stuff
Date: Thu, 27 Sep 2012 11:19:00 +0300

 This is a multi-part message in MIME format.
 --------------040300050600080800080009
 Content-Type: text/plain; charset=ISO-8859-1; format=flowed
 Content-Transfer-Encoding: 7bit
 
 
 Right,
 
 Unless someone finds some really major trouble in this version of the 
 patch I
 will try to avoid any further changes.
 On the whole I consider this now mature enough that it deserves also a 
 32 bit
 shim for 64 bit systems to be included.
 
 Previously Jilles proposed allowing resource usage statistics to be 
 collected
 not only from the child process but also from its children. I had myself had
 the same idea already before, but initially I discarded it just thinking
 "Nah... it would not be frequently used anyhow." Being reminded of the idea
 made me think it over from the point of view that even a potentially rarely
 used feature can be enabling technology for purposes I cannot see yet.
 So, I decided to add the feature. For a while I thought I would simply
 replace the old pointer to struct rusage with a pointer to an array of two
 of these rusage structures, but then it dawned to me that it would be error
 prone. For a compiler a pointer to one structure or a pointer to a two slot
 array of the same type of structures would be pretty much the same thing.
 So, I decided to introduce a completely new struct wrusage (for wider
 rusage) and changed wait6() to take a pointer to one of those instead of
 the old rusage. The new struct wrusage contains two fields
 
 struct rusage    wru_self;
 struct rusage    wru_children;
 
 
 This allows compilers to make a distinction between pointers to two
 distinct structures potentially avoiding some confusion and errors.
 
 I had received some notes offline via e-mail from kib. I have tried to pay
 attention to those comments as well.
 
 When trying this, start with a clean 9.1-prerelease source, apply the patch,
 and run these...
 
 ( cd /usr/src/sys/kern ; make sysent )
 ( cd /usr/src/sys/compat/freebsd32 ; make sysent )
 
 
 Without those "make sysent" commands the build will fail.
 
 Cheers,
 // jau
 
 
 --------------040300050600080800080009
 Content-Type: text/plain; charset=UTF-8;
  name="wait6-waitid-9.1.patch"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
  filename="wait6-waitid-9.1.patch"
 
 --- sys/sys/wait.h.orig	2011-09-23 03:51:37.000000000 +0300
 +++ sys/sys/wait.h	2012-09-27 08:17:41.000000000 +0300
 @@ -80,6 +80,8 @@
  #define	WSTOPPED	WUNTRACED   /* SUS compatibility */
  #define	WCONTINUED	4	/* Report a job control continued process. */
  #define	WNOWAIT		8	/* Poll only. Don't delete the proc entry. */
 +#define	WEXITED		16	/* Wait for exited processes. (SUS) */
 +#define	WTRAPPED	32	/* Wait for a process to hit a trap or a breakpoint. (Solaris) */
  
  #if __BSD_VISIBLE
  #define	WLINUXCLONE 0x80000000	/* Wait for kthread spawned from linux_clone. */
 @@ -87,6 +89,8 @@
  
  /*
   * Tokens for special values of the "pid" parameter to wait4.
 + * Extended struct wrusage to collect rusage for both the target
 + * process and its children within one wait6() call.
   */
  #if __BSD_VISIBLE
  #define	WAIT_ANY	(-1)	/* any process */
 @@ -97,12 +101,18 @@
  #include <sys/types.h>
  
  __BEGIN_DECLS
 +struct __siginfo;
  pid_t	wait(int *);
  pid_t	waitpid(pid_t, int *, int);
 +#if __POSIX_VISIBLE >= 200112
 +int	waitid(idtype_t, id_t, struct __siginfo *, int);
 +#endif
  #if __BSD_VISIBLE
  struct rusage;
 +struct wrusage;
  pid_t	wait3(int *, int, struct rusage *);
  pid_t	wait4(pid_t, int *, int, struct rusage *);
 +pid_t	wait6(idtype_t, id_t, int *, int, struct wrusage *, struct __siginfo *);
  #endif
  __END_DECLS
  #endif /* !_KERNEL */
 --- sys/sys/syscallsubr.h.orig	2012-01-06 21:29:16.000000000 +0200
 +++ sys/sys/syscallsubr.h	2012-09-27 08:17:41.000000000 +0300
 @@ -43,6 +43,7 @@
  struct msqid_ds;
  struct rlimit;
  struct rusage;
 +struct wrusage;
  union semun;
  struct sockaddr;
  struct stat;
 @@ -233,6 +234,8 @@
  	    enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
  int	kern_wait(struct thread *td, pid_t pid, int *status, int options,
  	    struct rusage *rup);
 +int	kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status,
 +		   int options, struct wrusage *wrup, siginfo_t *sip);
  int	kern_writev(struct thread *td, int fd, struct uio *auio);
  int	kern_socketpair(struct thread *td, int domain, int type, int protocol,
  	    int *rsv);
 --- sys/bsm/audit_kevents.h.orig	2011-09-23 03:51:37.000000000 +0300
 +++ sys/bsm/audit_kevents.h	2012-09-27 08:17:41.000000000 +0300
 @@ -602,6 +602,7 @@
  #define	AUE_PDKILL		43198	/* FreeBSD. */
  #define	AUE_PDGETPID		43199	/* FreeBSD. */
  #define	AUE_PDWAIT		43200	/* FreeBSD. */
 +#define	AUE_WAIT6		43201	/* FreeBSD. */
  
  /*
   * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
 --- lib/libc/include/namespace.h.orig	2011-09-23 03:51:37.000000000 +0300
 +++ lib/libc/include/namespace.h	2012-09-27 08:17:41.000000000 +0300
 @@ -229,6 +229,7 @@
  #define		socketpair			_socketpair
  #define		usleep				_usleep
  #define		wait4				_wait4
 +#define		wait6				_wait6
  #define		waitpid				_waitpid
  #define		write				_write
  #define		writev				_writev
 --- lib/libc/include/un-namespace.h.orig	2011-09-23 03:51:37.000000000 +0300
 +++ lib/libc/include/un-namespace.h	2012-09-27 08:17:41.000000000 +0300
 @@ -210,6 +210,7 @@
  #undef		socketpair
  #undef		usleep
  #undef		wait4
 +#undef		wait6
  #undef		waitpid
  #undef		write
  #undef		writev
 --- lib/libc/gen/Makefile.inc.orig	2012-03-05 13:43:27.000000000 +0200
 +++ lib/libc/gen/Makefile.inc	2012-09-27 08:17:41.000000000 +0300
 @@ -34,7 +34,7 @@
  	syslog.c telldir.c termios.c time.c times.c timezone.c tls.c \
  	ttyname.c ttyslot.c ualarm.c ulimit.c uname.c unvis.c \
  	usleep.c utime.c utxdb.c valloc.c vis.c wait.c wait3.c waitpid.c \
 -	wordexp.c
 +	waitid.c wordexp.c
  
  CANCELPOINTS_SRCS=sem.c sem_new.c
  .for src in ${CANCELPOINTS_SRCS}
 --- sys/cddl/contrib/opensolaris/uts/common/sys/procset.h.orig	2008-03-29 00:16:13.000000000 +0200
 +++ sys/cddl/contrib/opensolaris/uts/common/sys/procset.h	2012-09-27 08:17:41.000000000 +0300
 @@ -51,6 +51,7 @@
  #define	P_INITUID	0
  #define	P_INITPGID	0
  
 +#ifndef _IDTYPE_T_DECLARED
  
  /*
   *	The following defines the values for an identifier type.  It
 @@ -81,6 +82,9 @@
  	P_PSETID	/* Processor set identifier		*/
  } idtype_t;
  
 +#define	_IDTYPE_T_DECLARED
 +
 +#endif
  
  /*
   *	The following defines the operations which can be performed to
 --- sys/sys/proc.h.orig	2012-08-29 18:21:27.000000000 +0300
 +++ sys/sys/proc.h	2012-09-27 08:17:41.000000000 +0300
 @@ -884,8 +884,7 @@
  void	procinit(void);
  void	proc_linkup0(struct proc *p, struct thread *td);
  void	proc_linkup(struct proc *p, struct thread *td);
 -void	proc_reap(struct thread *td, struct proc *p, int *status, int options,
 -	    struct rusage *rusage);
 +void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
  void	proc_reparent(struct proc *child, struct proc *newparent);
  struct	pstats *pstats_alloc(void);
  void	pstats_fork(struct pstats *src, struct pstats *dst);
 --- lib/libc/gen/Symbol.map.orig	2012-02-21 23:18:59.000000000 +0200
 +++ lib/libc/gen/Symbol.map	2012-09-27 08:17:41.000000000 +0300
 @@ -384,6 +384,8 @@
  	 fdlopen;
  	__FreeBSD_libc_enter_restricted_mode;
  	getcontextx;
 +	waitid;
 +	wait6;
  };
  
  FBSDprivate_1.0 {
 --- sys/sys/types.h.orig	2012-01-02 18:14:52.000000000 +0200
 +++ sys/sys/types.h	2012-09-27 08:17:41.000000000 +0300
 @@ -142,6 +142,44 @@
  #define	_ID_T_DECLARED
  #endif
  
 +#ifndef _IDTYPE_T_DECLARED
 +
 +typedef enum
 +#if defined(__BSD_VISIBLE)
 +	idtype		/* pollutes XPG4.2 namespace */
 +#endif
 +		{
 +	/*
 +	 * These names were mostly lifted from Solaris source code
 +	 * and still use Solaris style naming to avoid breaking any
 +	 * OpenSolaris code which has been ported to FreeBSD.
 +	 * There is no clear FreeBSD counterpart for all of the names.
 +	 * OTOH some have a clear correspondence to FreeBSD entities.
 +	 */
 +	
 +	P_PID,		/* A process identifier.		*/
 +	P_PPID,		/* A parent process identifier.		*/
 +	P_PGID,		/* A process group (job control group)	*/
 +			/* identifier.				*/
 +	P_SID,		/* A session identifier.		*/
 +	P_CID,		/* A scheduling class identifier.	*/
 +	P_UID,		/* A user identifier.			*/
 +	P_GID,		/* A group identifier.			*/
 +	P_ALL,		/* All processes.			*/
 +	P_LWPID,	/* An LWP identifier.			*/
 +	P_TASKID,	/* A task identifier.			*/
 +	P_PROJID,	/* A project identifier.		*/
 +	P_POOLID,	/* A pool identifier.			*/
 +	P_ZONEID,	/* A zone identifier.			*/
 +	P_CTID,		/* A (process) contract identifier.	*/
 +	P_CPUID,	/* CPU identifier.			*/
 +	P_PSETID	/* Processor set identifier		*/
 +} idtype_t;		/* The type of id_t we are using.	*/
 +
 +#define	_IDTYPE_T_DECLARED
 +#endif
 +
 +
  #ifndef _INO_T_DECLARED
  typedef	__ino_t		ino_t;		/* inode number */
  #define	_INO_T_DECLARED
 --- lib/libc/sys/wait.2.orig	2011-09-23 03:51:37.000000000 +0300
 +++ lib/libc/sys/wait.2	2012-09-27 09:30:36.000000000 +0300
 @@ -34,9 +34,11 @@
  .Sh NAME
  .Nm wait ,
  .Nm waitpid ,
 +.Nm waitid ,
 +.Nm wait3 ,
  .Nm wait4 ,
 -.Nm wait3
 -.Nd wait for process termination
 +.Nm wait6
 +.Nd wait for processes to change status
  .Sh LIBRARY
  .Lb libc
  .Sh SYNOPSIS
 @@ -46,12 +48,17 @@
  .Fn wait "int *status"
  .Ft pid_t
  .Fn waitpid "pid_t wpid" "int *status" "int options"
 +.In sys/signal.h
 +.Ft int
 +.Fn waitid "idtype_t idtype" "id_t id" "siginfo_t *info" "int options"
  .In sys/time.h
  .In sys/resource.h
  .Ft pid_t
  .Fn wait3 "int *status" "int options" "struct rusage *rusage"
  .Ft pid_t
  .Fn wait4 "pid_t wpid" "int *status" "int options" "struct rusage *rusage"
 +.Ft pid_t
 +.Fn wait6 "idtype_t idtype" "id_t id" "int *status" "int options" "struct wrusage *wrusage" "siginfo_t *infop"
  .Sh DESCRIPTION
  The
  .Fn wait
 @@ -89,25 +96,207 @@
  The other wait functions are implemented using
  .Fn wait4 .
  .Pp
 +The broadest interface of all functions in this family is
 +.Fn wait6
 +which is otherwise very much like
 +.Fn wait4
 +but with a few very important distinctions.
 +.br
 +It will not wait for existed processes unless the option flag 
 +.Dv WEXITED
 +is explicitly specified.
 +This allows for waiting for processes which have experienced other
 +status changes without having to handle also the exit status from
 +the terminated processes.
 +.br
 +The traditional argument 
 +.Dv rusage
 +has been replaced with a pointer to a new structure
 +.Bd -literal
 +struct wrusage {
 +        struct rusage   wru_self;
 +	struct rusage   wru_children;
 +};
 +.Ed
 +.sp
 +This allows the calling process to collect resource usage statistics
 +from both its own child process as well as from its grand children.
 +When no resource usage statistics are needed this pointer can be
 +.Dv NULL .
 +.br
 +Another important difference is the new last argument
 +which must be either 
 +.Dv NULL
 +or a pointer to a
 +.Fa siginfo_t
 +structure.
 +.br
 +Additionally the old
 +.Fa pid_t
 +argument has been split into two separate arguments
 +.Fa idtype_t
 +and
 +.Fa id_t .
 +.br
 +Allowing for the distinction in how the
 +PID or PGID
 +is passed to the routine, calling
 +.Fn wait6
 +with the bits
 +.Dv WEXITED
 +and
 +.Dv WTRAPPED
 +set in the
 +.Fa options
 +and with
 +.Fa infop
 +set to
 +.Dv NULL ,
 +is still functionally quite analogous to calling
 +.Fn wait4 .
 +The separation of
 +.Fa idtype
 +and
 +.Fa id
 +arguments has the benefit, though, that many other types of
 +IDs can be supported as well in addition to PID and PGID.
 +.sp
 +Notice that
 +.Fn wait6
 +is not required by any standard nor is it common in other
 +operating systems.
 +It is simply a generalized API to support in one function call
 +interface any and all of the functionality available through 
 +any of the other
 +.Fn wait*
 +functions and a superset of them all.
 +Do not use it unless you fully accept the implied
 +limitations to the portability of your code.
 +.Pp
  The
 +.Fa idtype
 +and
 +.Fa id
 +arguments specify which processes
 +.Fn waitid
 +and
 +.Fn wait6
 +shall wait for.
 +.Bl -bullet -offset indent
 +.It
 +If
 +.Fa idtype
 +is 
 +.Dv P_PID ,
 +.Fn waitid
 +and
 +.Fn wait6
 +wait for the child process with a process ID equal to
 +.Dv (pid_t)id .
 +.It
 +If
 +.Fa idtype
 +is 
 +.Dv P_PGID ,
 +.Fn waitid
 +and
 +.Fn wait6
 +wait for the child process with a process group ID equal to
 +.Dv (pid_t)id .
 +.It
 +If
 +.Fa idtype
 +is 
 +.Dv P_ALL ,
 +.Fn waitid
 +and
 +.Fn wait6
 +wait for any child process and the
 +.Dv id
 +is ignored.
 +.It
 +If
 +.Fa idtype
 +is 
 +.Dv P_PID
 +or
 +.Dv P_PGID
 +and the
 +.Dv id
 +is zero,
 +.Fn waitid
 +and
 +.Fn wait6
 +wait for any child process in the same process group as the caller.
 +.It
 +While no standard actually requires such functionality,
 +this implementation supports also other types of IDs to wait.
 +.br
 +Notice anyhow that using any of these non-standard features will
 +most likely seriously degrade the portability of your code.
 +Consider such use only as enabling technology for new creative
 +experimentation locked into its original environment.
 +.br
 +Use
 +.Fa idtype
 +value
 +.Dv P_UID
 +to filter processes based on their effective UID,
 +.Dv P_GID
 +to filter processes based on their effective GID.
 +.br
 +.Dv P_SID
 +could be used to filter based on the session ID.
 +In case the child process started its own new session,
 +SID will be the same as its own PID.
 +Otherwise the SID of a child process will match the caller's SID.
 +.br
 +.Dv P_ZONEID
 +facilitates waiting for processes within a certain jail.
 +.br
 +There could be still more meaningful ID types to wait for
 +like
 +.Dv P_PSETID
 +for processes restricted to a certain set of CPUs,
 +.Dv P_CID
 +to wait for processes in a certain scheduling class or
 +.Dv P_CPUID
 +to wait for processes nailed to a certain CPU.
 +These three
 +have not been implemented at the time of this writing,
 +because the data stored in the thread structures seems
 +to be zeroed when a process terminates before the parent
 +gets to wait for the zombie.
 +They are mentioned here as potentially useful extensions.
 +.El
 +.Pp
 +For all the other
 +.Fn wait*
 +variants the
  .Fa wpid
  argument specifies the set of child processes for which to wait.
 +.Bl -bullet -offset indent
 +.It
  If
  .Fa wpid
  is -1, the call waits for any child process.
 +.It
  If
  .Fa wpid
  is 0,
  the call waits for any child process in the process group of the caller.
 +.It
  If
  .Fa wpid
  is greater than zero, the call waits for the process with process id
  .Fa wpid .
 +.It
  If
  .Fa wpid
  is less than -1, the call waits for any process whose process group id
  equals the absolute value of
  .Fa wpid .
 +.El
  .Pp
  The
  .Fa status
 @@ -116,41 +305,107 @@
  The
  .Fa options
  argument contains the bitwise OR of any of the following options.
 -The
 -.Dv WCONTINUED
 -option indicates that children of the current process that
 +.Bl -tag -width Ds
 +.It Dv WCONTINUED
 +indicates that children of the current process that
  have continued from a job control stop, by receiving a
  .Dv SIGCONT
  signal, should also have their status reported.
 -The
 -.Dv WNOHANG
 -option
 -is used to indicate that the call should not block if
 -there are no processes that wish to report status.
 -If the
 -.Dv WUNTRACED
 -option is set,
 -children of the current process that are stopped
 +.It Dv WNOHANG
 +is used to indicate that the call should not block when
 +there are no processes wishing to report status.
 +.It Dv WUNTRACED
 +indicates that children of the current process which are stopped
  due to a
  .Dv SIGTTIN , SIGTTOU , SIGTSTP ,
  or
  .Dv SIGSTOP
 -signal also have their status reported.
 -The
 -.Dv WSTOPPED
 -option is an alias for
 +signal shall have their status reported.
 +.It Dv WSTOPPED
 +is an alias for
  .Dv WUNTRACED .
 -The
 -.Dv WNOWAIT
 -option keeps the process whose status is returned in a waitable state.
 +.It Dv WTRAPPED
 +allows waiting for processes which have trapped or reached a breakpoint.
 +.It Dv WEXITED
 +indicates that the caller is wants to receive status reports from
 +terminated processes.
 +.br
 +This bit is implicitly set for the older functions
 +.Fn wait ,
 +.Fn waitpid ,
 +.Fn wait3 ,
 +and
 +.Fn wait4 
 +to avoid changing their traditional functionality.
 +.br
 +For the more recent APIs 
 +.Fn waitid
 +and
 +.Fn wait6
 +this bit has to be explicitly included in the 
 +.Fa options ,
 +if status reports from terminated processes are expected.
 +.br
 +This has the benefit that while using the latter two APIs
 +it is possible to request status reports only for processes
 +which have experienced some other status change, but which
 +have not terminated.
 +So, it is possible to avoid receiving reports for terminated
 +processes, in those parts of a program which are not able
 +to properly handle zombies and delay zombie processing to
 +other parts which can handle them properly.
 +.It Dv WNOWAIT
 +keeps the process whose status is returned in a waitable state.
  The process may be waited for again after this call completes.
 +.El
 +.sp
 +For the more recent APIs 
 +.Fn waitid
 +and
 +.Fn wait6
 +at least one of the options
 +.Dv WEXITED ,
 +.Dv WUNTRACED ,
 +.Dv WSTOPPED ,
 +.Dv WTRAPPED ,
 +or
 +.Dv WCONTINUED
 +must be specified.
 +Otherwise there will be no events for the call to report.
 +To avoid hanging indefinitely in such a case these functions currently
 +return -1 with errno set to
 +.Dv EINVAL .
  .Pp
  If
  .Fa rusage
  is non-zero, a summary of the resources used by the terminated
  process and all its
 -children is returned (this information is currently not available
 -for stopped or continued processes).
 +children is returned.
 +.Pp
 +If
 +.Fa infop
 +is non-null, it must point to a 
 +.Dv siginfo_t
 +structure which will be filled such that the
 +.Dv si_signo
 +field will always be
 +.Dv SIGCHLD
 +and the field
 +.Dv si_pid
 +will be non-zero, if there is a status change to report.
 +If there are no status changes to report and WNOHANG is applied,
 +both of these fields will be zero.
 +.br
 +When using the
 +.Fn waitid
 +API with the
 +.Dv WNOHANG
 +option set checking these fields is the only way to know whether
 +there were any status changes to report, because the return value
 +from
 +.Fn waitid
 +will be zero as it is for any successful return from
 +.Fn waitid .
  .Pp
  When the
  .Dv WNOHANG
 @@ -284,6 +539,7 @@
  is set to indicate the error.
  .Pp
  If
 +.Fn wait6 ,
  .Fn wait4 ,
  .Fn wait3 ,
  or
 @@ -306,6 +562,18 @@
  is returned and
  .Va errno
  is set to indicate the error.
 +.Pp
 +If
 +.Fn waitid
 +returns because one or more processes have a state change to report,
 +0 is returned.
 +To indicate an error, -1 will be returned and
 +.Dv errno
 +set to an appropriate value.
 +If
 +.Dv WNOHANG
 +was used, 0 can be returned indicating no error, but no processes
 +may have changed state either, if si_signo and/or si_pid are zero.
  .Sh ERRORS
  The
  .Fn wait
 @@ -335,6 +603,14 @@
  or the signal did not have the
  .Dv SA_RESTART
  flag set.
 +.It Bq Er EINVAL
 +An invalid value was specified for
 +.Fa options ,
 +or
 +.Fa idtype
 +and
 +.Fa id
 +do not specify a valid set of processes.
  .El
  .Sh SEE ALSO
  .Xr _exit 2 ,
 @@ -344,11 +620,13 @@
  .Xr siginfo 3
  .Sh STANDARDS
  The
 -.Fn wait
 +.Fn wait ,
 +.Fn waitpid ,
  and
 -.Fn waitpid
 +.Fn waitid
  functions are defined by POSIX;
 -.Fn wait4
 +.Fn wait6 ,
 +.Fn wait4 ,
  and
  .Fn wait3
  are not specified by POSIX.
 --- sys/kern/kern_exit.c.orig	2012-04-05 13:33:39.000000000 +0300
 +++ sys/kern/kern_exit.c	2012-09-27 08:17:41.000000000 +0300
 @@ -684,7 +684,7 @@
   * The dirty work is handled by kern_wait().
   */
  int
 -sys_wait4(struct thread *td, struct wait_args *uap)
 +sys_wait4(struct thread *td, struct wait4_args *uap)
  {
  	struct rusage ru, *rup;
  	int error, status;
 @@ -693,6 +693,7 @@
  		rup = &ru;
  	else
  		rup = NULL;
 +
  	error = kern_wait(td, uap->pid, &status, uap->options, rup);
  	if (uap->status != NULL && error == 0)
  		error = copyout(&status, uap->status, sizeof(status));
 @@ -701,14 +702,50 @@
  	return (error);
  }
  
 +int
 +sys_wait6(struct thread *td, struct wait6_args *uap)
 +{
 +	struct wrusage wru, *wrup;
 +	siginfo_t  si, *sip;
 +	int error, status;
 +	idtype_t idtype;
 +	id_t id;
 +
 +	idtype = uap->idtype;
 +	id = uap->id;
 +
 +	if (uap->wrusage != NULL)
 +		wrup = &wru;
 +	else
 +		wrup = NULL;
 +
 +	if (uap->info != NULL)
 +		sip = &si;
 +	else
 +		sip = NULL;
 +
 +	/*
 +	 *  We expect all callers of wait6()
 +	 *  to know about WEXITED and WTRAPPED!
 +	 */
 +	error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip);
 +
 +	if (uap->status != NULL && error == 0)
 +		error = copyout(&status, uap->status, sizeof(status));
 +	if (uap->wrusage != NULL && error == 0)
 +		error = copyout(&wru, uap->wrusage, sizeof(wru));
 +	if (uap->info != NULL && error == 0)
 +		error = copyout(&si, uap->info, sizeof(si));
 +	return (error);
 +}
 +
  /*
   * Reap the remains of a zombie process and optionally return status and
   * rusage.  Asserts and will release both the proctree_lock and the process
   * lock as part of its work.
   */
  void
 -proc_reap(struct thread *td, struct proc *p, int *status, int options,
 -    struct rusage *rusage)
 +proc_reap(struct thread *td, struct proc *p, int *status, int options)
  {
  	struct proc *q, *t;
  
 @@ -718,10 +755,7 @@
  	KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE"));
  
  	q = td->td_proc;
 -	if (rusage) {
 -		*rusage = p->p_ru;
 -		calcru(p, &rusage->ru_utime, &rusage->ru_stime);
 -	}
 +
  	PROC_SUNLOCK(p);
  	td->td_retval[0] = p->p_pid;
  	if (status)
 @@ -834,8 +868,10 @@
  }
  
  static int
 -proc_to_reap(struct thread *td, struct proc *p, pid_t pid, int *status,
 -    int options, struct rusage *rusage)
 +proc_to_reap(struct thread *td, struct proc *p,
 +	     idtype_t idtype, id_t id, 
 +	     int *status, int options,
 +	     struct wrusage *wrusage, siginfo_t *siginfo)
  {
  	struct proc *q;
  
 @@ -843,15 +879,105 @@
  
  	q = td->td_proc;
  	PROC_LOCK(p);
 -	if (pid != WAIT_ANY && p->p_pid != pid && p->p_pgid != -pid) {
 +
 +	switch (idtype) {
 +	case P_ALL:
 +		break;
 +	case P_PID:
 +		if (p->p_pid != (pid_t) id) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +	case P_PGID:
 +		if (p->p_pgid != (pid_t) id) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +	case P_SID:
 +		if (p->p_session->s_sid != (pid_t) id) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +	case P_UID:
 +		if (p->p_ucred->cr_uid != (uid_t) id) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +	case P_GID:
 +		if (p->p_ucred->cr_gid != (gid_t) id) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +	case P_ZONEID:	/* jail */
 +		if (! p->p_ucred->cr_prison ||
 +		    (p->p_ucred->cr_prison->pr_id != (int) id)) {
 +			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		break;
 +#if 0
 +		/*
 +		 * It seems that the thread structures get zeroed out
 +		 * at process exit.
 +		 * This makes toast of all useful info related to
 +		 * CPU, CPU set, and scheduling priority class.
 +		 */
 +	case P_PSETID:
 +		{
 +			struct thread	*td1;
 +
 +			td1 = FIRST_THREAD_IN_PROC(p);
 +			if (td1->td_cpuset->cs_id != (cpusetid_t) id) {
 +				PROC_UNLOCK(p);
 +				return (0);
 +			}
 +		}
 +		break;
 +	case P_CID:
 +		{
 +			struct thread	*td1;
 +
 +			td1 = FIRST_THREAD_IN_PROC(p);
 +			if (td1->td_pri_class != (unsigned) id) {
 +				PROC_UNLOCK(p);
 +				return (0);
 +			}
 +		}
 +		break;
 +	case P_CPUID:
 +		{
 +			struct thread	*td1;
 +
 +			td1 = FIRST_THREAD_IN_PROC(p);
 +			if (td1->td_lastcpu != (unsigned) id) {
 +				PROC_UNLOCK(p);
 +				return (0);
 +			}
 +		}
 +		break;
 +#endif
 +
 +	default:
  		PROC_UNLOCK(p);
  		return (0);
 +		break;
  	}
 +
  	if (p_canwait(td, p)) {
  		PROC_UNLOCK(p);
  		return (0);
  	}
  
 +	if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) {
 +		PROC_UNLOCK(p);
 +		return (0);
 +	}
 +		
  	/*
  	 * This special case handles a kthread spawned by linux_clone
  	 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
 @@ -867,8 +993,63 @@
  	}
  
  	PROC_SLOCK(p);
 +
 +	if (siginfo) {
 +		bzero (siginfo, sizeof (*siginfo));
 +		siginfo->si_signo = SIGCHLD;
 +		siginfo->si_errno = 0;
 +
 +		/*
 +		 *  Right, this is still a rough estimate.
 +		 *  We will fix the cases TRAPPED, STOPPED,
 +		 *  and CONTINUED later.
 +		 */
 +
 +		if (WCOREDUMP(p->p_xstat))
 +			siginfo->si_code = CLD_DUMPED;
 +		else if (WIFSIGNALED(p->p_xstat))
 +			siginfo->si_code = CLD_KILLED;
 +		else
 +			siginfo->si_code = CLD_EXITED;
 +
 +		siginfo->si_pid = p->p_pid;
 +		siginfo->si_uid = p->p_ucred->cr_uid;
 +		siginfo->si_status = p->p_xstat;
 +
 +		/*
 +		 * The si_addr field would be useful additional detail,
 +		 * but apparently the PC value may be lost when we reach
 +		 * this point.
 +		 * bzero() above sets siginfo->si_addr to NULL.
 +		 */
 +	}
 +
 +	/*
 +	 * There should be no reason to limit resources usage info
 +	 * to exited processes only.
 +	 * A snapshot about any resources used by a stopped process
 +	 * may be exactly what is needed.
 +	 * We are now within the same PROC_SLOCK anyway.
 +	 */
 +
 +	if (wrusage) {
 +#if 0
 +		*rusage = p->p_ru;
 +		calcru(p, &rusage->ru_utime, &rusage->ru_stime);
 +#endif
 +		struct rusage *rup;
 +
 +		rup = &wrusage->wru_self;
 +		*rup = p->p_ru;
 +		calcru(p, &rup->ru_utime, &rup->ru_stime);
 +
 +		rup = &wrusage->wru_children;
 +		*rup = p->p_stats->p_cru;
 +		calccru(p, &rup->ru_utime, &rup->ru_stime);
 +	}
 +
  	if (p->p_state == PRS_ZOMBIE) {
 -		proc_reap(td, p, status, options, rusage);
 +		proc_reap(td, p, status, options);
  		return (-1);
  	}
  	PROC_SUNLOCK(p);
 @@ -877,24 +1058,83 @@
  }
  
  int
 -kern_wait(struct thread *td, pid_t pid, int *status, int options,
 -    struct rusage *rusage)
 +kern_wait(struct thread *td, pid_t pid,
 +	  int *status, int options, struct rusage *rusage)
 +{
 +	struct wrusage wru, *wrup;
 +	idtype_t idtype;
 +	id_t id;
 +	int ret;
 +
 +	if (pid == WAIT_ANY) {
 +		idtype = P_ALL;
 +		id = 0;
 +	}
 +	else if (pid <= 0) {
 +		idtype = P_PGID;
 +		id = (id_t)-pid;
 +	}
 +	else {
 +		idtype = P_PID;
 +		id = (id_t)pid;
 +	}
 +
 +	if (rusage)
 +		wrup = &wru;
 +	else
 +		wrup = NULL;
 +
 +	/*
 +	 *  For backward compatibility we implicitly
 +	 *  add flags WEXITED and WTRAPPED here.
 +	 */
 +	options |= (WEXITED | WTRAPPED);
 +
 +	ret = kern_wait6 (td, idtype, id, status, options, wrup, NULL);
 +
 +	if (rusage)
 +		*rusage = wru.wru_self;
 +
 +	return (ret);
 +}
 +
 +int
 +kern_wait6(struct thread *td, idtype_t idtype, id_t id,
 +	   int *status, int options,
 +	   struct wrusage *wrusage, siginfo_t *siginfo)
  {
  	struct proc *p, *q;
  	int error, nfound, ret;
  
 -	AUDIT_ARG_PID(pid);
 +	AUDIT_ARG_VALUE((int)idtype);	/* XXX - This is likely wrong! */
 +	AUDIT_ARG_PID((pid_t)id);	/* XXX - This may be wrong! */
  	AUDIT_ARG_VALUE(options);
  
  	q = td->td_proc;
 -	if (pid == 0) {
 +
 +	if (((pid_t)id == WAIT_MYPGRP) &&
 +	    ((idtype == P_PID) || (idtype == P_PGID))) {
  		PROC_LOCK(q);
 -		pid = -q->p_pgid;
 +		id = (id_t)q->p_pgid;
  		PROC_UNLOCK(q);
 +		idtype = P_PGID;
  	}
 +
  	/* If we don't know the option, just return. */
 -	if (options & ~(WUNTRACED|WNOHANG|WCONTINUED|WNOWAIT|WLINUXCLONE))
 +	if (options & ~(WUNTRACED|WNOHANG|WCONTINUED|
 +			WNOWAIT|WEXITED|WTRAPPED|WLINUXCLONE))
  		return (EINVAL);
 +
 +	if ((options & (WEXITED|WUNTRACED|WCONTINUED|WTRAPPED)) == 0) {
 +		/*
 +		 *  We will be unable to find any matching processes,
 +		 *  because there are no known events to look for.
 +		 *  Tell the programmer (s)he is doing something
 +		 *  patently dysfunctional.
 +		 */
 +		return (EINVAL);
 +	}
 +
  loop:
  	if (q->p_flag & P_STATCHILD) {
  		PROC_LOCK(q);
 @@ -904,7 +1144,8 @@
  	nfound = 0;
  	sx_xlock(&proctree_lock);
  	LIST_FOREACH(p, &q->p_children, p_sibling) {
 -		ret = proc_to_reap(td, p, pid, status, options, rusage);
 +		ret = proc_to_reap(td, p, idtype, id,
 +				   status, options, wrusage, siginfo);
  		if (ret == 0)
  			continue;
  		else if (ret == 1)
 @@ -914,37 +1155,84 @@
  
  		PROC_LOCK(p);
  		PROC_SLOCK(p);
 -		if ((p->p_flag & P_STOPPED_SIG) &&
 +
 +		if ((options & WTRAPPED) &&
 +		    (p->p_flag & P_TRACED) &&
 +		    (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) &&
  		    (p->p_suspcount == p->p_numthreads) &&
 -		    (p->p_flag & P_WAITED) == 0 &&
 -		    (p->p_flag & P_TRACED || options & WUNTRACED)) {
 +		    ((p->p_flag & P_WAITED) == 0)) {
  			PROC_SUNLOCK(p);
 -			p->p_flag |= P_WAITED;
 +
 +			if ((options & WNOWAIT) == 0)
 +				p->p_flag |= P_WAITED;
 +
  			sx_xunlock(&proctree_lock);
  			td->td_retval[0] = p->p_pid;
 +
  			if (status)
  				*status = W_STOPCODE(p->p_xstat);
 +			if (siginfo) {
 +				siginfo->si_status = W_STOPCODE(p->p_xstat);
 +				siginfo->si_code = CLD_TRAPPED;
 +			}
 +			if ((options & WNOWAIT) == 0) {
 +				PROC_LOCK(q);
 +				sigqueue_take(p->p_ksi);
 +				PROC_UNLOCK(q);
 +			}
  
 -			PROC_LOCK(q);
 -			sigqueue_take(p->p_ksi);
 -			PROC_UNLOCK(q);
  			PROC_UNLOCK(p);
 +			return (0);
 +		}
 +		if ((options & WUNTRACED) &&
 +		    (p->p_flag & P_STOPPED_SIG) &&
 +		    (p->p_suspcount == p->p_numthreads) &&
 +		    ((p->p_flag & P_WAITED) == 0)) {
 +			PROC_SUNLOCK(p);
 +
 +			if ((options & WNOWAIT) == 0)
 +				p->p_flag |= P_WAITED;
 +
 +			sx_xunlock(&proctree_lock);
 +			td->td_retval[0] = p->p_pid;
 +
 +			if (status)
 +				*status = W_STOPCODE(p->p_xstat);
 +			if (siginfo) {
 +				siginfo->si_status = W_STOPCODE(p->p_xstat);
 +				siginfo->si_code = CLD_STOPPED;
 +			}
 +			if ((options & WNOWAIT) == 0) {
 +				PROC_LOCK(q);
 +				sigqueue_take(p->p_ksi);
 +				PROC_UNLOCK(q);
 +			}
  
 +			PROC_UNLOCK(p);
  			return (0);
  		}
  		PROC_SUNLOCK(p);
  		if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) {
  			sx_xunlock(&proctree_lock);
  			td->td_retval[0] = p->p_pid;
 -			p->p_flag &= ~P_CONTINUED;
  
 -			PROC_LOCK(q);
 -			sigqueue_take(p->p_ksi);
 -			PROC_UNLOCK(q);
 +			if ((options & WNOWAIT) == 0) {
 +				p->p_flag &= ~P_CONTINUED;
 +
 +				PROC_LOCK(q);
 +				sigqueue_take(p->p_ksi);
 +				PROC_UNLOCK(q);
 +			}
 +
  			PROC_UNLOCK(p);
  
  			if (status)
  				*status = SIGCONT;
 +			if (siginfo) {
 +				siginfo->si_status = SIGCONT;
 +				siginfo->si_code = CLD_CONTINUED;
 +			}
 +
  			return (0);
  		}
  		PROC_UNLOCK(p);
 @@ -963,7 +1251,8 @@
  	 * to successfully wait until the child becomes a zombie.
  	 */
  	LIST_FOREACH(p, &q->p_orphans, p_orphan) {
 -		ret = proc_to_reap(td, p, pid, status, options, rusage);
 +		ret = proc_to_reap(td, p, idtype, id,
 +				   status, options, wrusage, siginfo);
  		if (ret == 0)
  			continue;
  		else if (ret == 1)
 @@ -977,6 +1266,20 @@
  	}
  	if (options & WNOHANG) {
  		sx_xunlock(&proctree_lock);
 +		/*
 +		 * The only way for the caller of waitid() to know,
 +		 * whether a processes was found or not, is by checking
 +		 * the siginfo fields si_signo and si_pid.
 +		 * According to SUS waitid() shall return only 0,
 +		 * not the pid, when a process has changed state.
 +		 * Thus the 0 return value does not differentiate
 +		 * between found or not, but only whether there was
 +		 * an error or not.
 +		 * Because proc_to_reap() above may have modified
 +		 * siginfo we clear it now.
 +		 */
 +		if (siginfo)
 +			bzero (siginfo, sizeof (*siginfo));
  		td->td_retval[0] = 0;
  		return (0);
  	}
 --- lib/libc/sys/Makefile.inc.orig	2012-09-22 15:38:19.000000000 +0300
 +++ lib/libc/sys/Makefile.inc	2012-09-27 08:17:41.000000000 +0300
 @@ -214,5 +214,5 @@
  MLINKS+=truncate.2 ftruncate.2
  MLINKS+=unlink.2 unlinkat.2
  MLINKS+=utimes.2 futimes.2 utimes.2 futimesat.2 utimes.2 lutimes.2
 -MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2
 +MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2 wait.2 waitid.2 wait.2 wait6.2
  MLINKS+=write.2 pwrite.2 write.2 pwritev.2 write.2 writev.2
 --- sys/compat/freebsd32/syscalls.master.orig	2012-08-22 22:43:46.000000000 +0300
 +++ sys/compat/freebsd32/syscalls.master	2012-09-27 08:17:41.000000000 +0300
 @@ -997,3 +997,8 @@
  				    uint32_t offset1, uint32_t offset2,\
  				    uint32_t len1, uint32_t len2, \
  				    int advice); }
 +532	AUE_WAIT6	STD	{ int freebsd32_wait6(int idtype, int id, \
 +					    int *status, int options, \
 +					    struct wrusage32 *wrusage, \
 +					    siginfo_t *info); }
 +
 --- sys/kern/syscalls.master.orig	2012-01-06 21:29:16.000000000 +0200
 +++ sys/kern/syscalls.master	2012-09-27 08:17:41.000000000 +0300
 @@ -72,7 +72,7 @@
  6	AUE_CLOSE	STD	{ int close(int fd); }
  7	AUE_WAIT4	STD	{ int wait4(int pid, int *status, \
  				    int options, struct rusage *rusage); } \
 -				    wait4 wait_args int
 +				    wait4 wait4_args int
  8	AUE_CREAT	COMPAT	{ int creat(char *path, int mode); }
  9	AUE_LINK	STD	{ int link(char *path, char *link); }
  10	AUE_UNLINK	STD	{ int unlink(char *path); }
 @@ -949,5 +949,10 @@
  				    off_t offset, off_t len); }
  531	AUE_NULL	STD	{ int posix_fadvise(int fd, off_t offset, \
  				    off_t len, int advice); }
 +532	AUE_WAIT6	STD	{ int wait6(int idtype, int id, \
 +					    int *status, int options, \
 +					    struct wrusage *wrusage, \
 +					    siginfo_t *info); } \
 +					wait6 wait6_args int
  ; Please copy any additions and changes to the following compatability tables:
  ; sys/compat/freebsd32/syscalls.master
 --- sys/compat/freebsd32/freebsd32.h.orig	2012-05-30 07:51:23.000000000 +0300
 +++ sys/compat/freebsd32/freebsd32.h	2012-09-27 08:17:41.000000000 +0300
 @@ -79,6 +79,11 @@
  	int32_t	ru_nivcsw;
  };
  
 +struct wrusage32 {
 +	struct rusage32	wru_self;
 +	struct rusage32 wru_children;
 +};
 +
  struct itimerval32 {
  	struct timeval32 it_interval;
  	struct timeval32 it_value;
 --- sys/compat/freebsd32/freebsd32_misc.c.orig	2012-06-15 13:38:14.000000000 +0300
 +++ sys/compat/freebsd32/freebsd32_misc.c	2012-09-27 08:17:41.000000000 +0300
 @@ -174,6 +174,43 @@
  	return (error);
  }
  
 +int
 +freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap)
 +{
 +	int error, status;
 +	struct wrusage32 wru32;
 +	struct wrusage wru, *wrup;
 +	struct siginfo32 si32;
 +	struct __siginfo si, *sip;
 +
 +	if (uap->wrusage != NULL)
 +		wrup = &wru;
 +	else
 +		wrup = NULL;
 +
 +	if (uap->info != NULL)
 +		sip = &si;
 +	else
 +		sip = NULL;
 +
 +	error = kern_wait6(td, uap->idtype, uap->id, 
 +			   &status, uap->options, wrup, sip);
 +	if (error)
 +		return (error);
 +	if (uap->status != NULL)
 +		error = copyout(&status, uap->status, sizeof(status));
 +	if (uap->wrusage != NULL && error == 0) {
 +		freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self);
 +		freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children);
 +		error = copyout(&wru32, uap->wrusage, sizeof(wru32));
 +	}
 +	if (uap->info != NULL && error == 0) {
 +		siginfo_to_siginfo32 (&si, &si32);
 +		error = copyout(&si32, uap->info, sizeof(si32));
 +	}
 +	return (error);
 +}
 +
  #ifdef COMPAT_FREEBSD4
  static void
  copy_statfs(struct statfs *in, struct statfs32 *out)
 --- sys/sys/resource.h.orig	2012-01-06 18:45:44.000000000 +0200
 +++ sys/sys/resource.h	2012-09-27 08:17:41.000000000 +0300
 @@ -79,6 +79,13 @@
  #define	ru_last		ru_nivcsw
  };
  
 +#if __BSD_VISIBLE
 +struct wrusage {
 +	struct rusage	wru_self;
 +	struct rusage	wru_children;
 +};
 +#endif
 +
  /*
   * Resource limits
   */
 --- /dev/null	2012-09-01 09:22:00.000000000 +0300
 +++ lib/libc/gen/waitid.c	2012-09-01 09:30:16.000000000 +0300
 @@ -0,0 +1,56 @@
 +
 +#include "namespace.h"
 +#include <sys/types.h>
 +#include <sys/wait.h>
 +#include <stddef.h>
 +#include <string.h>
 +#include <signal.h>
 +#include <errno.h>
 +#include "un-namespace.h"
 +
 +int
 +__waitid (idtype, id, info, flags)
 +	idtype_t	idtype;
 +	id_t		id;
 +	siginfo_t	*info;
 +	int		flags;
 +{
 +	int	status;
 +	pid_t	ret;
 +
 +	if (info) {
 +		memset (info, '\0', sizeof (*info));
 +	}
 +
 +	/*
 +	 * In case you wish to start waiting for processes
 +	 * - in a certain scheduling class,
 +	 * - running on a certain cpu, or
 +	 * - nailed to a certain CPU set,
 +	 * - etc.
 +	 *  you will have to extend the kern_wait6() in
 +	 *  the kernel to support such idtype_t flavours.
 +	 */
 +
 +	ret = _wait6 (idtype, id, &status, flags, NULL, info);
 +
 +	/*
 +	 * According to SUS waitid() shall not return a PID when a process
 +	 * is found but only 0.
 +	 * If a process was actually found siginfo_t fields si_signo and
 +	 * si_pid will be non-zero. In case WNOHANG was set in the flags
 +	 * and no process was found those fields will be set to zero.
 +	 */
 +
 +	if (ret < 0) {
 +		ret = -1;
 +	}
 +	else {
 +		ret = 0;
 +	}
 +
 +	return ((int)ret);
 +}
 +
 +__weak_reference(__waitid, waitid);
 +__weak_reference(__waitid, _waitid);
 
 --------------040300050600080800080009--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201209270830.q8R8UBhL054685>