Date: Fri, 5 Sep 2025 13:33:22 +0000 (UTC) From: "Bjoern A. Zeeb" <bz@FreeBSD.org> To: Jamie Gritton <jamie@FreeBSD.org> Cc: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: Re: git: 851dc7f859c2 - main - jail: add jail descriptors Message-ID: <7nnn754n-7r54-79rp-438n-12nsp9non865@SerrOFQ.bet> In-Reply-To: <202509042031.584KVpxY000408@gitrepo.freebsd.org> References: <202509042031.584KVpxY000408@gitrepo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Thu, 4 Sep 2025, Jamie Gritton wrote: > The branch main has been updated by jamie: > > URL: https://cgit.FreeBSD.org/src/commit/?id=851dc7f859c23cab09a348bca03ab655534fb7e0 > > commit 851dc7f859c23cab09a348bca03ab655534fb7e0 > Author: Jamie Gritton <jamie@FreeBSD.org> > AuthorDate: 2025-09-04 20:27:47 +0000 > Commit: Jamie Gritton <jamie@FreeBSD.org> > CommitDate: 2025-09-04 20:27:47 +0000 > > jail: add jail descriptors > > Similar to process descriptors, jail desriptors are allow jail > administration using the file descriptor interface instead of JIDs. > They come from and can be used by jail_set(2) and jail_get(2), > and there are two new system calls, jail_attach_jd(2) and > jail_remove_jd(2). > > Reviewed by: bz, brooks Just for the records, I looked at this more than a year ago but I have not done any (final) review. Still happy that this landed. > Relnotes: yes > Differential Revision: https://reviews.freebsd.org/D43696 > --- > lib/libjail/jail.c | 64 +++- > lib/libsys/Symbol.sys.map | 2 + > lib/libsys/_libsys.h | 4 + > lib/libsys/jail.2 | 267 ++++++++++++++++- > lib/libsys/syscalls.map | 4 + > sys/compat/freebsd32/freebsd32_syscall.h | 4 +- > sys/compat/freebsd32/freebsd32_syscalls.c | 2 + > sys/compat/freebsd32/freebsd32_sysent.c | 2 + > sys/compat/freebsd32/freebsd32_systrace_args.c | 44 +++ > sys/conf/files | 1 + > sys/kern/init_sysent.c | 2 + > sys/kern/kern_descrip.c | 2 + > sys/kern/kern_jail.c | 396 +++++++++++++++++++++++-- > sys/kern/kern_jaildesc.c | 337 +++++++++++++++++++++ > sys/kern/syscalls.c | 2 + > sys/kern/syscalls.master | 10 + > sys/kern/systrace_args.c | 44 +++ > sys/sys/file.h | 1 + > sys/sys/jail.h | 15 +- > sys/sys/jaildesc.h | 85 ++++++ > sys/sys/syscall.h | 4 +- > sys/sys/syscall.mk | 4 +- > sys/sys/sysproto.h | 10 + > sys/sys/user.h | 4 + > 24 files changed, 1256 insertions(+), 54 deletions(-) > > diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c > index 30282e67866c..931391055919 100644 > --- a/lib/libjail/jail.c > +++ b/lib/libjail/jail.c > @@ -75,8 +75,9 @@ int > jail_setv(int flags, ...) > { > va_list ap, tap; > - struct jailparam *jp; > - const char *name, *value; > + struct jailparam *jp, *jp_desc; > + const char *name; > + char *value, *desc_value; > int njp, jid; > > /* Create the parameter list and import the parameters. */ > @@ -86,15 +87,24 @@ jail_setv(int flags, ...) > (void)va_arg(tap, char *); > va_end(tap); > jp = alloca(njp * sizeof(struct jailparam)); > - for (njp = 0; (name = va_arg(ap, char *)) != NULL;) { > + jp_desc = NULL; > + desc_value = NULL; > + for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) { > value = va_arg(ap, char *); > if (jailparam_init(jp + njp, name) < 0) > goto error; > - if (jailparam_import(jp + njp++, value) < 0) > + if (jailparam_import(jp + njp, value) < 0) > goto error; > + if (!strcmp(name, "desc") > + && (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) { > + jp_desc = jp + njp; > + desc_value = value; > + } > } > va_end(ap); > jid = jailparam_set(jp, njp, flags); > + if (jid > 0 && jp_desc != NULL) > + sprintf(desc_value, "%d", *(int *)jp_desc->jp_value); > jailparam_free(jp, njp); > return (jid); > > @@ -112,9 +122,10 @@ int > jail_getv(int flags, ...) > { > va_list ap, tap; > - struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key; > + struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key; > char *valarg, *value; > - const char *name, *key_value, *lastjid_value, *jid_value, *name_value; > + const char *name, *key_value, *desc_value, *lastjid_value, *jid_value; > + const char *name_value; > int njp, i, jid; > > /* Create the parameter list and find the key. */ > @@ -126,15 +137,19 @@ jail_getv(int flags, ...) > > jp = alloca(njp * sizeof(struct jailparam)); > va_copy(tap, ap); > - jp_lastjid = jp_jid = jp_name = NULL; > - lastjid_value = jid_value = name_value = NULL; > + jp_desc = jp_lastjid = jp_jid = jp_name = NULL; > + desc_value = lastjid_value = jid_value = name_value = NULL; > for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) { > value = va_arg(tap, char *); > if (jailparam_init(jp + njp, name) < 0) { > va_end(tap); > goto error; > } > - if (!strcmp(jp[njp].jp_name, "lastjid")) { > + if (!strcmp(jp[njp].jp_name, "desc") > + && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) { > + jp_desc = jp + njp; > + desc_value = value; > + } else if (!strcmp(jp[njp].jp_name, "lastjid")) { > jp_lastjid = jp + njp; > lastjid_value = value; > } else if (!strcmp(jp[njp].jp_name, "jid")) { > @@ -147,7 +162,10 @@ jail_getv(int flags, ...) > } > va_end(tap); > /* Import the key parameter. */ > - if (jp_lastjid != NULL) { > + if (jp_desc != NULL && (flags & JAIL_USE_DESC)) { > + jp_key = jp_desc; > + key_value = desc_value; > + } else if (jp_lastjid != NULL) { > jp_key = jp_lastjid; > key_value = lastjid_value; > } else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) { > @@ -163,6 +181,9 @@ jail_getv(int flags, ...) > } > if (jailparam_import(jp_key, key_value) < 0) > goto error; > + if (jp_desc != NULL && jp_desc != jp_key > + && jailparam_import(jp_desc, desc_value) < 0) > + goto error; > /* Get the jail and export the parameters. */ > jid = jailparam_get(jp, njp, flags); > if (jid < 0) > @@ -571,7 +592,7 @@ int > jailparam_get(struct jailparam *jp, unsigned njp, int flags) > { > struct iovec *jiov; > - struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key; > + struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key; > int i, ai, ki, jid, arrays, sanity; > unsigned j; > > @@ -580,10 +601,13 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags) > * Find the key and any array parameters. > */ > jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1)); > - jp_lastjid = jp_jid = jp_name = NULL; > + jp_desc = jp_lastjid = jp_jid = jp_name = NULL; > arrays = 0; > for (ai = j = 0; j < njp; j++) { > - if (!strcmp(jp[j].jp_name, "lastjid")) > + if (!strcmp(jp[j].jp_name, "desc") > + && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) > + jp_desc = jp + j; > + else if (!strcmp(jp[j].jp_name, "lastjid")) > jp_lastjid = jp + j; > else if (!strcmp(jp[j].jp_name, "jid")) > jp_jid = jp + j; > @@ -599,7 +623,9 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags) > ai++; > } > } > - jp_key = jp_lastjid ? jp_lastjid : > + jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) && > + jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc : > + jp_lastjid ? jp_lastjid : > jp_jid && jp_jid->jp_valuelen == sizeof(int) && > jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name; > if (jp_key == NULL || jp_key->jp_value == NULL) { > @@ -622,6 +648,14 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags) > jiov[ki].iov_len = JAIL_ERRMSGLEN; > ki++; > jail_errmsg[0] = 0; > + if (jp_desc != NULL && jp_desc != jp_key) { > + jiov[ki].iov_base = jp_desc->jp_name; > + jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1; > + ki++; > + jiov[ki].iov_base = jp_desc->jp_value; > + jiov[ki].iov_len = jp_desc->jp_valuelen; > + ki++; > + } > if (arrays && jail_get(jiov, ki, flags) < 0) { > if (!jail_errmsg[0]) > snprintf(jail_errmsg, sizeof(jail_errmsg), > @@ -649,7 +683,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags) > jiov[ai].iov_base = jp[j].jp_value; > memset(jiov[ai].iov_base, 0, jiov[ai].iov_len); > ai++; > - } else if (jp + j != jp_key) { > + } else if (jp + j != jp_key && jp + j != jp_desc) { > jiov[i].iov_base = jp[j].jp_name; > jiov[i].iov_len = strlen(jp[j].jp_name) + 1; > i++; > diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map > index 1a297f9df581..e3fd8ac10621 100644 > --- a/lib/libsys/Symbol.sys.map > +++ b/lib/libsys/Symbol.sys.map > @@ -382,6 +382,8 @@ FBSD_1.8 { > getrlimitusage; > inotify_add_watch_at; > inotify_rm_watch; > + jail_attach_jd; > + jail_remove_jd; > kcmp; > setcred; > setgroups; > diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h > index 34eebc1aa67a..6bd768708a78 100644 > --- a/lib/libsys/_libsys.h > +++ b/lib/libsys/_libsys.h > @@ -468,6 +468,8 @@ typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t); > typedef int (__sys_inotify_rm_watch_t)(int, int); > typedef int (__sys_getgroups_t)(int, gid_t *); > typedef int (__sys_setgroups_t)(int, const gid_t *); > +typedef int (__sys_jail_attach_jd_t)(int); > +typedef int (__sys_jail_remove_jd_t)(int); > > _Noreturn void __sys__exit(int rval); > int __sys_fork(void); > @@ -872,6 +874,8 @@ int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask > int __sys_inotify_rm_watch(int fd, int wd); > int __sys_getgroups(int gidsetsize, gid_t * gidset); > int __sys_setgroups(int gidsetsize, const gid_t * gidset); > +int __sys_jail_attach_jd(int fd); > +int __sys_jail_remove_jd(int fd); > __END_DECLS > > #endif /* __LIBSYS_H_ */ > diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2 > index 8f8b9925c712..a0f47cc61cb3 100644 > --- a/lib/libsys/jail.2 > +++ b/lib/libsys/jail.2 > @@ -23,7 +23,7 @@ > .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > .\" SUCH DAMAGE. > .\" > -.Dd November 29, 2023 > +.Dd September 4, 2025 > .Dt JAIL 2 > .Os > .Sh NAME > @@ -31,7 +31,9 @@ > .Nm jail_get , > .Nm jail_set , > .Nm jail_remove , > -.Nm jail_attach > +.Nm jail_attach , > +.Nm jail_remove_jd , > +.Nm jail_attach_jd > .Nd create and manage system jails > .Sh LIBRARY > .Lb libc > @@ -44,6 +46,10 @@ > .Fn jail_attach "int jid" > .Ft int > .Fn jail_remove "int jid" > +.Ft int > +.Fn jail_attach_jd "int fd" > +.Ft int > +.Fn jail_remove_jd "int fd" > .In sys/uio.h > .Ft int > .Fn jail_get "struct iovec *iov" "u_int niov" "int flags" > @@ -188,6 +194,29 @@ system call. > This is deprecated in > .Fn jail_set > and has no effect. > +.It Dv JAIL_USE_DESC > +Identify the jail by a descriptor in the > +.Va desc > +parameter. > +.It Dv JAIL_AT_DESC > +Operate in the context of the jail described by the > +.Va desc > +parameter, instead of the current jail. > +Only one of > +.Dv JAIL_USE_DESC > +or > +.Dv JAIL_AT_DESC > +may be specified. > +.It Dv JAIL_GET_DESC > +Return a new jail descriptor for the jail in the > +.Va desc > +parameter. > +.It Dv JAIL_OWN_DESC > +Return an > +.Dq owning > +jail descriptor in the > +.Va desc > +parameter. > .El > .Pp > The > @@ -221,6 +250,9 @@ arguments consists of one or more following flags: > .Bl -tag -width indent > .It Dv JAIL_DYING > Allow getting a jail that is in the process of being removed. > +.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC > +These have the same meaning as they do in > +.Fn jail_set . > .El > .Pp > The > @@ -238,6 +270,101 @@ system call removes the jail identified by > .Fa jid . > It will kill all processes belonging to the jail, and remove any children > of that jail. > +.Pp > +The > +.Fn jail_attach_fd > +and > +.Fn jail_remove_fd > +system calls work the same as > +.Fn jail_attach > +and > +.Fn jail_remove , > +except that they operate on the jail identified by jail descriptor > +.Fa fd . > +.Ss Jail Descriptors > +In addition to the jail ID, > +jails can be referred to using a jail descriptor, > +a type of file descriptor tied to a particular jail. > +Jail descriptors are created by calling > +.Fn jail_set > +or > +.Fn jail_get > +with the special parameter > +.Va desc , > +and either the > +.Dv JAIL_GET_DESC > +or > +.Dv JAIL_OWN_DESC > +flags set. > +The difference between the two flags is that descriptors created with > +.Dv JAIL_OWN_DESC > +.Po > +called > +.Dq owning > +descriptors > +.Pc > +will automatically remove the jail when the descriptor is closed. > +.Pp > +Jail descriptors can be passed back to > +.Fn jail_set > +or > +.Fm jail_get > +with the > +.Va desc > +parameter, > +and either the > +.Dv JAIL_USE_DESC > +or > +.Dv JAIL_AT_DESC > +flags set. > +With > +.Dv JAIL_USE_DESC , > +the descriptor identifies the jail to operate on, > +instead of the > +.Va jid > +or > +.Va name > +parameter. > +With > +.Dv JAIL_AT_DESC , > +the descriptor is used in place of the current jail, > +allowing accessing or creating jails that are children of the > +descriptor jail. > +.Pp > +The system calls > +.Fn jail_attach_jd > +and > +.Fn jail_aremove_jd > +work the same as > +.Fn jail_attach > +and > +.Fn jail_remove , > +except that they operate on the jail referred to by the passed descriptor. > +.Pp > +Jail operations via descriptors can be done by processes that do not > +normally have permission to see or affect the jail, > +as long as they are allowed by the file permissions of the jail > +descriptor itself. > +These permissions can be changed by the descriptor owner via > +.Xr fchmod 2 > +and > +.Xr fchown 2 . > +.Fn jail_get > +requires read permission, > +.Fn jail_set > +and > +.Fn jail_remove > +require write permission, > +and > +.Fn jail_attach > +requires execute permission. > +Also, use of a descriptor with the > +.Dv JAIL_AT_DESC > +flag requires execute permission. > +An owning descriptor is identified by the > +.Em sticky bit , > +which may also be changed via > +.Xr fchmod 2 . > .Sh RETURN VALUES > If successful, > .Fn jail , > @@ -249,7 +376,7 @@ They return \-1 on failure, and set > .Va errno > to indicate the error. > .Pp > -.Rv -std jail_attach jail_remove > +.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd > .Sh ERRORS > The > .Fn jail > @@ -275,12 +402,44 @@ The > system call > will fail if: > .Bl -tag -width Er > +.It Bq Er EBADF > +The > +.Va desc > +parameter does not refer to a valid jail descriptor, > +and either the > +.Dv JAIL_USE_DESC > +or > +.Dv JAIL_AT_DESC > +flag was set. > +.It Bq Er EACCES > +Write permission is denied on the jail descriptor in the > +.Va desc > +parameter, > +and the > +.Dv JAIL_USE_DESC > +flag was set. > +.It Bq Er EACCES > +Execute permission is denied on the jail descriptor in the > +.Va desc > +parameter, > +and either the > +.Dv JAIL_AT_DESC > +or > +.Dv JAIL_ATTACH > +flag was set. > .It Bq Er EPERM > This process is not allowed to create a jail, either because it is not > the super-user, or because it would exceed the jail's > .Va children.max > limit. > .It Bq Er EPERM > +The jail descriptor in the > +.Va desc > +parameter was created by a user other than the super-user, > +and the > +.Dv JAIL_USE_DESC > +flag was set. > +.It Bq Er EPERM > A jail parameter was set to a less restrictive value then the current > environment. > .It Bq Er EFAULT > @@ -298,8 +457,12 @@ flag is not set. > .It Bq Er ENOENT > The jail referred to by a > .Va jid > -is not accessible by the process, because the process is in a different > -jail. > +parameter is not accessible by the process, because the process is in a > +different jail. > +.It Bq Er ENOENT > +The jail referred to by a > +.Va desc > +parameter has been removed. > .It Bq Er EEXIST > The jail referred to by a > .Va jid > @@ -326,6 +489,24 @@ flags is not set. > A supplied string parameter is longer than allowed. > .It Bq Er EAGAIN > There are no jail IDs left. > +.It Bq Er EMFILE > +A jail descriptor could not be created for the > +.Va desc > +parameter with either the > +.Dv JAIL_GET_DESC > +or > +.Dv JAIL_OWN_DESC > +flag set, > +because the process has already reached its limit for open file descriptors. > +.It Bq Er ENFILE > +A jail descriptor could not be created for the > +.Va desc > +parameter with either the > +.Dv JAIL_GET_DESC > +or > +.Dv JAIL_OWN_DESC > +flag set, > +because the system file table is full. > .El > .Pp > The > @@ -333,6 +514,29 @@ The > system call > will fail if: > .Bl -tag -width Er > +.It Bq Er EBADF > +The > +.Va desc > +parameter does not refer to a valid jail descriptor, > +and either the > +.Dv JAIL_USE_DESC > +or > +.Dv JAIL_AT_DESC > +flag was set. > +.It Bq Er EACCES > +Read permission is denied on the jail descriptor in the > +.Va desc > +parameter, > +and the > +.Dv JAIL_USE_DESC > +flag was set. > +.It Bq Er EACCES > +Execute permission is denied on the jail descriptor in the > +.Va desc > +parameter, > +and the > +.Dv JAIL_AT_DESC > +flag was set. > .It Bq Er EFAULT > .Fa Iov , > or one of the addresses contained within it, > @@ -352,10 +556,33 @@ jail. > The > .Va lastjid > parameter is greater than the highest current jail ID. > +.It Bq Er ENOENT > +The jail referred to by a > +.Va desc > +parameter has been removed > +.Pq even if the Dv JAIL_CREATE flag has been set . > .It Bq Er EINVAL > A supplied parameter is the wrong size. > .It Bq Er EINVAL > A supplied parameter name does not match any known parameters. > +.It Bq Er EMFILE > +A jail descriptor could not be created for the > +.Va desc > +parameter with either the > +.Dv JAIL_GET_DESC > +or > +.Dv JAIL_OWN_DESC > +flag set, > +because the process has already reached its limit for open file descriptors. > +.It Bq Er ENFILE > +A jail descriptor could not be created for the > +.Va desc > +parameter with either the > +.Dv JAIL_GET_DESC > +or > +.Dv JAIL_OWN_DESC > +flag set, > +because the system file table is full. > .El > .Pp > The > @@ -373,11 +600,39 @@ The jail specified by > does not exist. > .El > .Pp > +The > +.Fn jail_attach_jd > +and > +.Fn jail_remove_jd > +system calls > +will fail if: > +.Bl -tag -width Er > +.It Bq Er EBADF > +The > +.Fa fd > +argument is not a valid jail descriptor. > +.It Bq Er EACCES > +Permission is denied on the jail descriptor > +.Po > +execute permission for > +.Fn jail_attach_fd , > +or write permission for > +.Fn jail_remove_fd > +.Pc . > +.It Bq Er EPERM > +The jail descriptor was created by a user other than the super-user. > +.It Bq Er EINVAL > +The jail specified by > +.Fa jid > +has been removed. > +.El > +.Pp > Further > .Fn jail , > .Fn jail_set , > +.Fn jail_attach , > and > -.Fn jail_attach > +.Fn jail_attach_jd > call > .Xr chroot 2 > internally, so they can fail for all the same reasons. > diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map > index 4cf80a2ffc69..b5400b9849b3 100644 > --- a/lib/libsys/syscalls.map > +++ b/lib/libsys/syscalls.map > @@ -813,4 +813,8 @@ FBSDprivate_1.0 { > __sys_getgroups; > _setgroups; > __sys_setgroups; > + _jail_attach_jd; > + __sys_jail_attach_jd; > + _jail_remove_jd; > + __sys_jail_remove_jd; > }; > diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h > index 90cd21a80923..54063150eef9 100644 > --- a/sys/compat/freebsd32/freebsd32_syscall.h > +++ b/sys/compat/freebsd32/freebsd32_syscall.h > @@ -515,4 +515,6 @@ > #define FREEBSD32_SYS_inotify_rm_watch 594 > #define FREEBSD32_SYS_getgroups 595 > #define FREEBSD32_SYS_setgroups 596 > -#define FREEBSD32_SYS_MAXSYSCALL 597 > +#define FREEBSD32_SYS_jail_attach_jd 597 > +#define FREEBSD32_SYS_jail_remove_jd 598 > +#define FREEBSD32_SYS_MAXSYSCALL 599 > diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c > index f0f8d26554b5..f7cc4c284e4d 100644 > --- a/sys/compat/freebsd32/freebsd32_syscalls.c > +++ b/sys/compat/freebsd32/freebsd32_syscalls.c > @@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = { > "inotify_rm_watch", /* 594 = inotify_rm_watch */ > "getgroups", /* 595 = getgroups */ > "setgroups", /* 596 = setgroups */ > + "jail_attach_jd", /* 597 = jail_attach_jd */ > + "jail_remove_jd", /* 598 = jail_remove_jd */ > }; > diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c > index 12f1a346c3e9..18f809ef04e3 100644 > --- a/sys/compat/freebsd32/freebsd32_sysent.c > +++ b/sys/compat/freebsd32/freebsd32_sysent.c > @@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = { > { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ > { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ > { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ > + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ > + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ > }; > diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c > index e471c5148021..29a5497e9efa 100644 > --- a/sys/compat/freebsd32/freebsd32_systrace_args.c > +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c > @@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) > *n_args = 2; > break; > } > + /* jail_attach_jd */ > + case 597: { > + struct jail_attach_jd_args *p = params; > + iarg[a++] = p->fd; /* int */ > + *n_args = 1; > + break; > + } > + /* jail_remove_jd */ > + case 598: { > + struct jail_remove_jd_args *p = params; > + iarg[a++] = p->fd; /* int */ > + *n_args = 1; > + break; > + } > default: > *n_args = 0; > break; > @@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) > break; > }; > break; > + /* jail_attach_jd */ > + case 597: > + switch (ndx) { > + case 0: > + p = "int"; > + break; > + default: > + break; > + }; > + break; > + /* jail_remove_jd */ > + case 598: > + switch (ndx) { > + case 0: > + p = "int"; > + break; > + default: > + break; > + }; > + break; > default: > break; > }; > @@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) > if (ndx == 0 || ndx == 1) > p = "int"; > break; > + /* jail_attach_jd */ > + case 597: > + if (ndx == 0 || ndx == 1) > + p = "int"; > + break; > + /* jail_remove_jd */ > + case 598: > + if (ndx == 0 || ndx == 1) > + p = "int"; > + break; > default: > break; > }; > diff --git a/sys/conf/files b/sys/conf/files > index d89813c70355..9661bafea8f9 100644 > --- a/sys/conf/files > +++ b/sys/conf/files > @@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard > kern/kern_idle.c standard > kern/kern_intr.c standard > kern/kern_jail.c standard > +kern/kern_jaildesc.c standard > kern/kern_jailmeta.c standard > kern/kern_kcov.c optional kcov \ > compile-with "${NOSAN_C} ${MSAN_CFLAGS}" > diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c > index fcd232cde21e..e42e7dcf8b44 100644 > --- a/sys/kern/init_sysent.c > +++ b/sys/kern/init_sysent.c > @@ -663,4 +663,6 @@ struct sysent sysent[] = { > { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ > { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ > { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ > + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ > + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ > }; > diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c > index a27ab33b34da..057235574eb5 100644 > --- a/sys/kern/kern_descrip.c > +++ b/sys/kern/kern_descrip.c > @@ -5250,6 +5250,8 @@ file_type_to_name(short type) > return ("eventfd"); > case DTYPE_TIMERFD: > return ("timerfd"); > + case DTYPE_JAILDESC: > + return ("jail"); > default: > return ("unkn"); > } > diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c > index 52210553016b..5a1fbe23ddeb 100644 > --- a/sys/kern/kern_jail.c > +++ b/sys/kern/kern_jail.c > @@ -39,6 +39,7 @@ > #include <sys/kernel.h> > #include <sys/systm.h> > #include <sys/errno.h> > +#include <sys/file.h> > #include <sys/sysproto.h> > #include <sys/malloc.h> > #include <sys/osd.h> > @@ -49,6 +50,7 @@ > #include <sys/taskqueue.h> > #include <sys/fcntl.h> > #include <sys/jail.h> > +#include <sys/jaildesc.h> > #include <sys/linker.h> > #include <sys/lock.h> > #include <sys/mman.h> > @@ -988,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af) > int > kern_jail_set(struct thread *td, struct uio *optuio, int flags) > { > + struct file *jfp_out; > + struct jaildesc *desc_in; > struct nameidata nd; > #ifdef INET > struct prison_ip *ip4; > @@ -998,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > struct vfsopt *opt; > struct vfsoptlist *opts; > struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr; > + struct ucred *jdcred; > struct vnode *root; > char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; > char *g_path, *osrelstr; > @@ -1011,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > int created, cuflags, descend, drflags, enforce; > int error, errmsg_len, errmsg_pos; > int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; > - int deadid, jid, jsys, len, level; > + int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level; > int childmax, osreldt, rsnum, slevel; > #ifdef INET > int ip4s; > @@ -1027,17 +1032,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > unsigned tallow; > char numbuf[12]; > > - error = priv_check(td, PRIV_JAIL_SET); > - if (!error && (flags & JAIL_ATTACH)) > - error = priv_check(td, PRIV_JAIL_ATTACH); > - if (error) > - return (error); > mypr = td->td_ucred->cr_prison; > - if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) > + if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE) > + && mypr->pr_childmax == 0) > return (EPERM); > if (flags & ~JAIL_SET_MASK) > return (EINVAL); > + if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) > + == (JAIL_USE_DESC | JAIL_AT_DESC)) > + return (EINVAL); > + prison_hold(mypr); > > +#ifdef INET > + ip4 = NULL; > +#endif > +#ifdef INET6 > + ip6 = NULL; > +#endif > + g_path = NULL; > + jfp_out = NULL; > + jfd_out = -1; > /* > * Check all the parameters before committing to anything. Not all > * errors can be caught early, but we may as well try. Also, this > @@ -1050,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > */ > error = vfs_buildopts(optuio, &opts); > if (error) > - return (error); > -#ifdef INET > - ip4 = NULL; > -#endif > -#ifdef INET6 > - ip6 = NULL; > -#endif > - g_path = NULL; > + goto done_free; > > cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); > if (!cuflags) { > @@ -1066,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > goto done_errmsg; > } > > + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); > + if (error == ENOENT) { > + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | > + JAIL_OWN_DESC)) { > + vfs_opterror(opts, "missing desc"); > + goto done_errmsg; > + } > + jfd_in = -1; > + } else if (error != 0) > + goto done_free; > + else { > + if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | > + JAIL_OWN_DESC))) { > + vfs_opterror(opts, "unexpected desc"); > + goto done_errmsg; > + } > + if (flags & JAIL_AT_DESC) { > + /* > + * Look up and create jails based on the > + * descriptor's prison. > + */ > + prison_free(mypr); > + error = jaildesc_find(td, jfd_in, &desc_in, &mypr, > + NULL); > + if (error != 0) { > + vfs_opterror(opts, error == ENOENT > + ? "descriptor to dead jail" > + : "not a jail descriptor"); > + goto done_errmsg; > + } > + /* > + * Check file permissions using the current > + * credentials, and operation permissions > + * using the descriptor's credentials. > + */ > + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid, > + desc_in->jd_gid, VEXEC, td->td_ucred); > + JAILDESC_UNLOCK(desc_in); > + if (error != 0) > + goto done_free; > + if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) { > + error = EPERM; > + goto done_free; > + } > + } > + if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { > + /* Allocate a jail descriptor to return later. */ > + error = jaildesc_alloc(td, &jfp_out, &jfd_out, > + flags & JAIL_OWN_DESC); > + if (error) > + goto done_free; > + } > + } > + > + /* > + * Delay the permission check if using a jail descriptor, > + * until we get the descriptor's credentials. > + */ > + if (!(flags & JAIL_USE_DESC)) { > + error = priv_check(td, PRIV_JAIL_SET); > + if (error == 0 && (flags & JAIL_ATTACH)) > + error = priv_check(td, PRIV_JAIL_ATTACH); > + if (error) > + goto done_free; > + } > + > error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); > if (error == ENOENT) > jid = 0; > @@ -1441,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) > error = EAGAIN; > goto done_deref; > } > - if (jid != 0) { > + if (flags & JAIL_USE_DESC) { > + /* Get the jail from its descriptor. */ > + error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred); > + if (error) { > + vfs_opterror(opts, error == ENOENT > + ? "descriptor to dead jail" > + : "not a jail descriptor"); > + goto done_deref; > + } > + drflags |= PD_DEREF; > + /* > + * Check file permissions using the current credentials, > + * and operation permissions using the descriptor's > + * credentials. > + */ > + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid, > + desc_in->jd_gid, VWRITE, td->td_ucred); > + if (error == 0 && (flags & JAIL_ATTACH)) > + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid, > + desc_in->jd_gid, VEXEC, td->td_ucred); > + JAILDESC_UNLOCK(desc_in); > + if (error == 0) > + error = priv_check_cred(jdcred, PRIV_JAIL_SET); > + if (error == 0 && (flags & JAIL_ATTACH)) > + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); > + crfree(jdcred); > + if (error) > + goto done_deref; > + mtx_lock(&pr->pr_mtx); > + drflags |= PD_LOCKED; > + if (cuflags == JAIL_CREATE) { > + error = EEXIST; > + vfs_opterror(opts, "jail %d already exists", > + pr->pr_id); > + goto done_deref; > + } > + if (!prison_isalive(pr)) { > + /* While a jid can be resurrected, the prison > + * itself cannot. > + */ > + error = ENOENT; > + vfs_opterror(opts, "jail %d is dying", pr->pr_id); > *** 1065 LINES SKIPPED *** > -- Bjoern A. Zeeb r15:7
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?7nnn754n-7r54-79rp-438n-12nsp9non865>