Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 25 Apr 2016 23:28:00 +0300
From:      Subbsd <subbsd@gmail.com>
To:        Jamie Gritton <jamie@freebsd.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org,  svn-src-head@freebsd.org
Subject:   Re: svn commit: r298585 - in head: sys/kern usr.sbin/jail
Message-ID:  <CAFt_eMqw9JwBb8VJcDtJQZAJUPxMZ-heymKvE7Dm%2B3M%2BJHLx0Q@mail.gmail.com>
In-Reply-To: <201604251706.u3PH6okj031018@repo.freebsd.org>
References:  <201604251706.u3PH6okj031018@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
I do not know how it works for/in jails, but looks like this breaks
work SHM in host. I've got on any QT-based application after this
commit:

EP Get failed: 'Feature not implemented.
The feature requested is not implemented by the recipient or server
and therefore cannot be processed.' (501)"
[23:05:55] W:QNativeImage: Unable to attach to shared memory segment.
Segmentation fault (core dumped)


and tracing stops on:
shmget(0x0,0x634e0,0x3c0)                        = 65580 (0x1002c)
shmat(0x1002c,0x0,0x0)                           ERR#22 'Invalid argument'

On Mon, Apr 25, 2016 at 8:06 PM, Jamie Gritton <jamie@freebsd.org> wrote:
> Author: jamie
> Date: Mon Apr 25 17:06:50 2016
> New Revision: 298585
> URL: https://svnweb.freebsd.org/changeset/base/298585
>
> Log:
>   Encapsulate SYSV IPC objects in jails.  Define per-module parameters
>   sysvmsg, sysvsem, and sysvshm, with the following bahavior:
>
>   inherit: allow full access to the IPC primitives.  This is the same as
>   the current setup with allow.sysvipc is on.  Jails and the base system
>   can see (and moduly) each other's objects, which is generally considered
>   a bad thing (though may be useful in some circumstances).
>
>   disable: all no access, same as the current setup with allow.sysvipc off.
>
>   new: A jail may see use the IPC objects that it has created.  It also
>   gets its own IPC key namespace, so different jails may have their own
>   objects using the same key value.  The parent jail (or base system) can
>   see the jail's IPC objects, but not its keys.
>
>   PR:           48471
>   Submitted by: based on work by kikuchan98@gmail.com
>   MFC after:    5 days
>
> Modified:
>   head/sys/kern/sysv_msg.c
>   head/sys/kern/sysv_sem.c
>   head/sys/kern/sysv_shm.c
>   head/usr.sbin/jail/jail.8
>
> Modified: head/sys/kern/sysv_msg.c
> ==============================================================================
> --- head/sys/kern/sysv_msg.c    Mon Apr 25 17:01:13 2016        (r298584)
> +++ head/sys/kern/sysv_msg.c    Mon Apr 25 17:06:50 2016        (r298585)
> @@ -62,8 +62,11 @@ __FBSDID("$FreeBSD$");
>  #include <sys/lock.h>
>  #include <sys/mutex.h>
>  #include <sys/module.h>
> +#include <sys/mount.h>
>  #include <sys/msg.h>
>  #include <sys/racct.h>
> +#include <sys/sbuf.h>
> +#include <sys/sx.h>
>  #include <sys/syscall.h>
>  #include <sys/syscallsubr.h>
>  #include <sys/sysent.h>
> @@ -80,6 +83,14 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID
>  static int msginit(void);
>  static int msgunload(void);
>  static int sysvmsg_modload(struct module *, int, void *);
> +static void msq_remove(struct msqid_kernel *);
> +static struct prison *msg_find_prison(struct ucred *);
> +static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
> +static int msg_prison_check(void *, void *);
> +static int msg_prison_set(void *, void *);
> +static int msg_prison_get(void *, void *);
> +static int msg_prison_remove(void *, void *);
> +static void msg_prison_cleanup(struct prison *);
>
>
>  #ifdef MSG_DEBUG
> @@ -155,6 +166,7 @@ static struct msgmap *msgmaps;      /* MSGSEG
>  static struct msg *msghdrs;    /* MSGTQL msg headers */
>  static struct msqid_kernel *msqids;    /* MSGMNI msqid_kernel struct's */
>  static struct mtx msq_mtx;     /* global mutex for message queues. */
> +static unsigned msg_prison_slot;/* prison OSD slot */
>
>  static struct syscall_helper_data msg_syscalls[] = {
>         SYSCALL_INIT_HELPER(msgctl),
> @@ -194,7 +206,15 @@ static struct syscall_helper_data msg32_
>  static int
>  msginit()
>  {
> +       struct prison *pr;
> +       void *rsv;
>         int i, error;
> +       osd_method_t methods[PR_MAXMETHOD] = {
> +           [PR_METHOD_CHECK] =         msg_prison_check,
> +           [PR_METHOD_SET] =           msg_prison_set,
> +           [PR_METHOD_GET] =           msg_prison_get,
> +           [PR_METHOD_REMOVE] =        msg_prison_remove,
> +       };
>
>         msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
>         msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
> @@ -252,6 +272,29 @@ msginit()
>         }
>         mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
>
> +       /* Set current prisons according to their allow.sysvipc. */
> +       msg_prison_slot = osd_jail_register(NULL, methods);
> +       rsv = osd_reserve(msg_prison_slot);
> +       prison_lock(&prison0);
> +       (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
> +       prison_unlock(&prison0);
> +       rsv = NULL;
> +       sx_slock(&allprison_lock);
> +       TAILQ_FOREACH(pr, &allprison, pr_list) {
> +               if (rsv == NULL)
> +                       rsv = osd_reserve(msg_prison_slot);
> +               prison_lock(pr);
> +               if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
> +                       (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
> +                           &prison0);
> +                       rsv = NULL;
> +               }
> +               prison_unlock(pr);
> +       }
> +       if (rsv != NULL)
> +               osd_free_reserved(rsv);
> +       sx_sunlock(&allprison_lock);
> +
>         error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
>         if (error != 0)
>                 return (error);
> @@ -292,6 +335,8 @@ msgunload()
>         if (msqid != msginfo.msgmni)
>                 return (EBUSY);
>
> +       if (msg_prison_slot != 0)
> +               osd_jail_deregister(msg_prison_slot);
>  #ifdef MAC
>         for (i = 0; i < msginfo.msgtql; i++)
>                 mac_sysvmsg_destroy(&msghdrs[i]);
> @@ -366,6 +411,67 @@ msg_freehdr(msghdr)
>  #endif
>  }
>
> +static void
> +msq_remove(struct msqid_kernel *msqkptr)
> +{
> +       struct msg *msghdr;
> +
> +       racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
> +       racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
> +       racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
> +       crfree(msqkptr->cred);
> +       msqkptr->cred = NULL;
> +
> +       /* Free the message headers */
> +       msghdr = msqkptr->u.msg_first;
> +       while (msghdr != NULL) {
> +               struct msg *msghdr_tmp;
> +
> +               /* Free the segments of each message */
> +               msqkptr->u.msg_cbytes -= msghdr->msg_ts;
> +               msqkptr->u.msg_qnum--;
> +               msghdr_tmp = msghdr;
> +               msghdr = msghdr->msg_next;
> +               msg_freehdr(msghdr_tmp);
> +       }
> +
> +       if (msqkptr->u.msg_cbytes != 0)
> +               panic("msg_cbytes is screwed up");
> +       if (msqkptr->u.msg_qnum != 0)
> +               panic("msg_qnum is screwed up");
> +
> +       msqkptr->u.msg_qbytes = 0;      /* Mark it as free */
> +
> +#ifdef MAC
> +       mac_sysvmsq_cleanup(msqkptr);
> +#endif
> +
> +       wakeup(msqkptr);
> +}
> +
> +static struct prison *
> +msg_find_prison(struct ucred *cred)
> +{
> +       struct prison *pr, *rpr;
> +
> +       pr = cred->cr_prison;
> +       prison_lock(pr);
> +       rpr = osd_jail_get(pr, msg_prison_slot);
> +       prison_unlock(pr);
> +       return rpr;
> +}
> +
> +static int
> +msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
> +{
> +
> +       if (msqkptr->cred == NULL ||
> +           !(rpr == msqkptr->cred->cr_prison ||
> +             prison_ischild(rpr, msqkptr->cred->cr_prison)))
> +               return (EINVAL);
> +       return (0);
> +}
> +
>  #ifndef _SYS_SYSPROTO_H_
>  struct msgctl_args {
>         int     msqid;
> @@ -402,8 +508,10 @@ kern_msgctl(td, msqid, cmd, msqbuf)
>  {
>         int rval, error, msqix;
>         register struct msqid_kernel *msqkptr;
> +       struct prison *rpr;
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +       rpr = msg_find_prison(td->td_ucred);
> +       if (rpr == NULL)
>                 return (ENOSYS);
>
>         msqix = IPCID_TO_IX(msqid);
> @@ -427,6 +535,13 @@ kern_msgctl(td, msqid, cmd, msqbuf)
>                 error = EINVAL;
>                 goto done2;
>         }
> +
> +       error = msq_prison_cansee(rpr, msqkptr);
> +       if (error != 0) {
> +               DPRINTF(("requester can't see prison\n"));
> +               goto done2;
> +       }
> +
>  #ifdef MAC
>         error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
>         if (error != 0)
> @@ -440,7 +555,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
>
>         case IPC_RMID:
>         {
> +#ifdef MAC
>                 struct msg *msghdr;
> +#endif
>                 if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
>                         goto done2;
>
> @@ -462,37 +579,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
>                 }
>  #endif
>
> -               racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
> -               racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
> -               racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
> -               crfree(msqkptr->cred);
> -               msqkptr->cred = NULL;
> -
> -               /* Free the message headers */
> -               msghdr = msqkptr->u.msg_first;
> -               while (msghdr != NULL) {
> -                       struct msg *msghdr_tmp;
> -
> -                       /* Free the segments of each message */
> -                       msqkptr->u.msg_cbytes -= msghdr->msg_ts;
> -                       msqkptr->u.msg_qnum--;
> -                       msghdr_tmp = msghdr;
> -                       msghdr = msghdr->msg_next;
> -                       msg_freehdr(msghdr_tmp);
> -               }
> -
> -               if (msqkptr->u.msg_cbytes != 0)
> -                       panic("msg_cbytes is screwed up");
> -               if (msqkptr->u.msg_qnum != 0)
> -                       panic("msg_qnum is screwed up");
> -
> -               msqkptr->u.msg_qbytes = 0;      /* Mark it as free */
> -
> -#ifdef MAC
> -               mac_sysvmsq_cleanup(msqkptr);
> -#endif
> -
> -               wakeup(msqkptr);
> +               msq_remove(msqkptr);
>         }
>
>                 break;
> @@ -529,6 +616,8 @@ kern_msgctl(td, msqid, cmd, msqbuf)
>                         goto done2;
>                 }
>                 *msqbuf = msqkptr->u;
> +               if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
> +                       msqbuf->msg_perm.key = IPC_PRIVATE;
>                 break;
>
>         default:
> @@ -564,7 +653,7 @@ sys_msgget(td, uap)
>
>         DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +       if (msg_find_prison(cred) == NULL)
>                 return (ENOSYS);
>
>         mtx_lock(&msq_mtx);
> @@ -572,6 +661,8 @@ sys_msgget(td, uap)
>                 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
>                         msqkptr = &msqids[msqid];
>                         if (msqkptr->u.msg_qbytes != 0 &&
> +                           msqkptr->cred != NULL &&
> +                           msqkptr->cred->cr_prison == cred->cr_prison &&
>                             msqkptr->u.msg_perm.key == key)
>                                 break;
>                 }
> @@ -684,12 +775,14 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf
>         int msqix, segs_needed, error = 0;
>         register struct msqid_kernel *msqkptr;
>         register struct msg *msghdr;
> +       struct prison *rpr;
>         short next;
>  #ifdef RACCT
>         size_t saved_msgsz;
>  #endif
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +       rpr = msg_find_prison(td->td_ucred);
> +       if (rpr == NULL)
>                 return (ENOSYS);
>
>         mtx_lock(&msq_mtx);
> @@ -714,6 +807,11 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf
>                 goto done2;
>         }
>
> +       if ((error = msq_prison_cansee(rpr, msqkptr))) {
> +               DPRINTF(("requester can't see prison\n"));
> +               goto done2;
> +       }
> +
>         if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
>                 DPRINTF(("requester doesn't have write access\n"));
>                 goto done2;
> @@ -1052,10 +1150,12 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt
>         size_t len;
>         register struct msqid_kernel *msqkptr;
>         register struct msg *msghdr;
> +       struct prison *rpr;
>         int msqix, error = 0;
>         short next;
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +       rpr = msg_find_prison(td->td_ucred);
> +       if (rpr == NULL)
>                 return (ENOSYS);
>
>         msqix = IPCID_TO_IX(msqid);
> @@ -1079,6 +1179,11 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt
>                 goto done2;
>         }
>
> +       if ((error = msq_prison_cansee(rpr, msqkptr))) {
> +               DPRINTF(("requester can't see prison\n"));
> +               goto done2;
> +       }
> +
>         if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
>                 DPRINTF(("requester doesn't have read access\n"));
>                 goto done2;
> @@ -1318,9 +1423,39 @@ sys_msgrcv(td, uap)
>  static int
>  sysctl_msqids(SYSCTL_HANDLER_ARGS)
>  {
> +       struct sbuf sb;
> +       struct msqid_kernel tmp, empty;
> +       struct msqid_kernel *msqkptr;
> +       struct prison *rpr;
> +       int error, i;
> +
> +       error = sysctl_wire_old_buffer(req, 0);
> +       if (error != 0)
> +               goto done;
> +       rpr = msg_find_prison(req->td->td_ucred);
> +       sbuf_new_for_sysctl(&sb, NULL, sizeof(struct msqid_kernel) *
> +           msginfo.msgmni, req);
> +
> +       bzero(&empty, sizeof(empty));
> +       for (i = 0; i < msginfo.msgmni; i++) {
> +               msqkptr = &msqids[i];
> +               if (msqkptr->u.msg_qbytes == 0 || rpr == NULL ||
> +                   msq_prison_cansee(rpr, msqkptr) != 0) {
> +                       msqkptr = &empty;
> +               } else if (req->td->td_ucred->cr_prison !=
> +                   msqkptr->cred->cr_prison) {
> +                       bcopy(msqkptr, &tmp, sizeof(tmp));
> +                       msqkptr = &tmp;
> +                       msqkptr->u.msg_perm.key = IPC_PRIVATE;
> +               }
>
> -       return (SYSCTL_OUT(req, msqids,
> -           sizeof(struct msqid_kernel) * msginfo.msgmni));
> +               sbuf_bcat(&sb, msqkptr, sizeof(*msqkptr));
> +       }
> +       error = sbuf_finish(&sb);
> +       sbuf_delete(&sb);
> +
> +done:
> +       return (error);
>  }
>
>  SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
> @@ -1338,6 +1473,181 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg,
>  SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
>      NULL, 0, sysctl_msqids, "", "Message queue IDs");
>
> +static int
> +msg_prison_check(void *obj, void *data)
> +{
> +       struct prison *pr = obj;
> +       struct prison *prpr;
> +       struct vfsoptlist *opts = data;
> +       int error, jsys;
> +
> +       /*
> +        * sysvmsg is a jailsys integer.
> +        * It must be "disable" if the parent jail is disabled.
> +        */
> +       error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
> +       if (error != ENOENT) {
> +               if (error != 0)
> +                       return (error);
> +               switch (jsys) {
> +               case JAIL_SYS_DISABLE:
> +                       break;
> +               case JAIL_SYS_NEW:
> +               case JAIL_SYS_INHERIT:
> +                       prison_lock(pr->pr_parent);
> +                       prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
> +                       prison_unlock(pr->pr_parent);
> +                       if (prpr == NULL)
> +                               return (EPERM);
> +                       break;
> +               default:
> +                       return (EINVAL);
> +               }
> +       }
> +
> +       return (0);
> +}
> +
> +static int
> +msg_prison_set(void *obj, void *data)
> +{
> +       struct prison *pr = obj;
> +       struct prison *tpr, *orpr, *nrpr, *trpr;
> +       struct vfsoptlist *opts = data;
> +       void *rsv;
> +       int jsys, descend;
> +
> +       /*
> +        * sysvmsg controls which jail is the root of the associated msgs (this
> +        * jail or same as the parent), or if the feature is available at all.
> +        */
> +       if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
> +               jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
> +                   ? JAIL_SYS_INHERIT
> +                   : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
> +                   ? JAIL_SYS_DISABLE
> +                   : -1;
> +       if (jsys == JAIL_SYS_DISABLE) {
> +               prison_lock(pr);
> +               orpr = osd_jail_get(pr, msg_prison_slot);
> +               if (orpr != NULL)
> +                       osd_jail_del(pr, msg_prison_slot);
> +               prison_unlock(pr);
> +               if (orpr != NULL) {
> +                       if (orpr == pr)
> +                               msg_prison_cleanup(pr);
> +                       /* Disable all child jails as well. */
> +                       FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
> +                               prison_lock(tpr);
> +                               trpr = osd_jail_get(tpr, msg_prison_slot);
> +                               if (trpr != NULL) {
> +                                       osd_jail_del(tpr, msg_prison_slot);
> +                                       prison_unlock(tpr);
> +                                       if (trpr == tpr)
> +                                               msg_prison_cleanup(tpr);
> +                               } else {
> +                                       prison_unlock(tpr);
> +                                       descend = 0;
> +                               }
> +                       }
> +               }
> +       } else if (jsys != -1) {
> +               if (jsys == JAIL_SYS_NEW)
> +                       nrpr = pr;
> +               else {
> +                       prison_lock(pr->pr_parent);
> +                       nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
> +                       prison_unlock(pr->pr_parent);
> +               }
> +               rsv = osd_reserve(msg_prison_slot);
> +               prison_lock(pr);
> +               orpr = osd_jail_get(pr, msg_prison_slot);
> +               if (orpr != nrpr)
> +                       (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
> +                           nrpr);
> +               else
> +                       osd_free_reserved(rsv);
> +               prison_unlock(pr);
> +               if (orpr != nrpr) {
> +                       if (orpr == pr)
> +                               msg_prison_cleanup(pr);
> +                       if (orpr != NULL) {
> +                               /* Change child jails matching the old root, */
> +                               FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
> +                                       prison_lock(tpr);
> +                                       trpr = osd_jail_get(tpr,
> +                                           msg_prison_slot);
> +                                       if (trpr == orpr) {
> +                                               (void)osd_jail_set(tpr,
> +                                                   msg_prison_slot, nrpr);
> +                                               prison_unlock(tpr);
> +                                               if (trpr == tpr)
> +                                                       msg_prison_cleanup(tpr);
> +                                       } else {
> +                                               prison_unlock(tpr);
> +                                               descend = 0;
> +                                       }
> +                               }
> +                       }
> +               }
> +       }
> +
> +       return (0);
> +}
> +
> +static int
> +msg_prison_get(void *obj, void *data)
> +{
> +       struct prison *pr = obj;
> +       struct prison *rpr;
> +       struct vfsoptlist *opts = data;
> +       int error, jsys;
> +
> +       /* Set sysvmsg based on the jail's root prison. */
> +       prison_lock(pr);
> +       rpr = osd_jail_get(pr, msg_prison_slot);
> +       prison_unlock(pr);
> +       jsys = rpr == NULL ? JAIL_SYS_DISABLE
> +           : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
> +       error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
> +       if (error == ENOENT)
> +               error = 0;
> +       return (error);
> +}
> +
> +static int
> +msg_prison_remove(void *obj, void *data __unused)
> +{
> +       struct prison *pr = obj;
> +       struct prison *rpr;
> +
> +       prison_lock(pr);
> +       rpr = osd_jail_get(pr, msg_prison_slot);
> +       prison_unlock(pr);
> +       if (rpr == pr)
> +               msg_prison_cleanup(pr);
> +       return (0);
> +}
> +
> +static void
> +msg_prison_cleanup(struct prison *pr)
> +{
> +       struct msqid_kernel *msqkptr;
> +       int i;
> +
> +       /* Remove any msqs that belong to this jail. */
> +       mtx_lock(&msq_mtx);
> +       for (i = 0; i < msginfo.msgmni; i++) {
> +               msqkptr = &msqids[i];
> +               if (msqkptr->u.msg_qbytes != 0 &&
> +                   msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
> +                       msq_remove(msqkptr);
> +       }
> +       mtx_unlock(&msq_mtx);
> +}
> +
> +SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
> +
>  #ifdef COMPAT_FREEBSD32
>  int
>  freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
> @@ -1516,8 +1826,6 @@ sys_msgsys(td, uap)
>  {
>         int error;
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> -               return (ENOSYS);
>         if (uap->which < 0 || uap->which >= nitems(msgcalls))
>                 return (EINVAL);
>         error = (*msgcalls[uap->which])(td, &uap->a2);
>
> Modified: head/sys/kern/sysv_sem.c
> ==============================================================================
> --- head/sys/kern/sysv_sem.c    Mon Apr 25 17:01:13 2016        (r298584)
> +++ head/sys/kern/sysv_sem.c    Mon Apr 25 17:06:50 2016        (r298585)
> @@ -52,7 +52,9 @@ __FBSDID("$FreeBSD$");
>  #include <sys/module.h>
>  #include <sys/mutex.h>
>  #include <sys/racct.h>
> +#include <sys/sbuf.h>
>  #include <sys/sem.h>
> +#include <sys/sx.h>
>  #include <sys/syscall.h>
>  #include <sys/syscallsubr.h>
>  #include <sys/sysent.h>
> @@ -78,7 +80,16 @@ static int sysvsem_modload(struct module
>  static int semunload(void);
>  static void semexit_myhook(void *arg, struct proc *p);
>  static int sysctl_sema(SYSCTL_HANDLER_ARGS);
> -static int semvalid(int semid, struct semid_kernel *semakptr);
> +static int semvalid(int semid, struct prison *rpr,
> +    struct semid_kernel *semakptr);
> +static void sem_remove(int semidx, struct ucred *cred);
> +static struct prison *sem_find_prison(struct ucred *);
> +static int sem_prison_cansee(struct prison *, struct semid_kernel *);
> +static int sem_prison_check(void *, void *);
> +static int sem_prison_set(void *, void *);
> +static int sem_prison_get(void *, void *);
> +static int sem_prison_remove(void *, void *);
> +static void sem_prison_cleanup(struct prison *);
>
>  #ifndef _SYS_SYSPROTO_H_
>  struct __semctl_args;
> @@ -104,6 +115,7 @@ LIST_HEAD(, sem_undo) semu_list;    /* list
>  LIST_HEAD(, sem_undo) semu_free_list;  /* list of free undo structures */
>  static int     *semu;          /* undo structure pool */
>  static eventhandler_tag semexit_tag;
> +static unsigned sem_prison_slot;       /* prison OSD slot */
>
>  #define SEMUNDO_MTX            sem_undo_mtx
>  #define SEMUNDO_LOCK()         mtx_lock(&SEMUNDO_MTX);
> @@ -247,7 +259,15 @@ static struct syscall_helper_data sem32_
>  static int
>  seminit(void)
>  {
> +       struct prison *pr;
> +       void *rsv;
>         int i, error;
> +       osd_method_t methods[PR_MAXMETHOD] = {
> +           [PR_METHOD_CHECK] =         sem_prison_check,
> +           [PR_METHOD_SET] =           sem_prison_set,
> +           [PR_METHOD_GET] =           sem_prison_get,
> +           [PR_METHOD_REMOVE] =        sem_prison_remove,
> +       };
>
>         sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK);
>         sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM,
> @@ -278,6 +298,29 @@ seminit(void)
>         semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
>             EVENTHANDLER_PRI_ANY);
>
> +       /* Set current prisons according to their allow.sysvipc. */
> +       sem_prison_slot = osd_jail_register(NULL, methods);
> +       rsv = osd_reserve(sem_prison_slot);
> +       prison_lock(&prison0);
> +       (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0);
> +       prison_unlock(&prison0);
> +       rsv = NULL;
> +       sx_slock(&allprison_lock);
> +       TAILQ_FOREACH(pr, &allprison, pr_list) {
> +               if (rsv == NULL)
> +                       rsv = osd_reserve(sem_prison_slot);
> +               prison_lock(pr);
> +               if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
> +                       (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
> +                           &prison0);
> +                       rsv = NULL;
> +               }
> +               prison_unlock(pr);
> +       }
> +       if (rsv != NULL)
> +               osd_free_reserved(rsv);
> +       sx_sunlock(&allprison_lock);
> +
>         error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD);
>         if (error != 0)
>                 return (error);
> @@ -303,6 +346,8 @@ semunload(void)
>  #endif
>         syscall_helper_unregister(sem_syscalls);
>         EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
> +       if (sem_prison_slot != 0)
> +               osd_jail_deregister(sem_prison_slot);
>  #ifdef MAC
>         for (i = 0; i < seminfo.semmni; i++)
>                 mac_sysvsem_destroy(&sema[i]);
> @@ -489,11 +534,74 @@ semundo_clear(int semid, int semnum)
>  }
>
>  static int
> -semvalid(int semid, struct semid_kernel *semakptr)
> +semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr)
>  {
>
>         return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
> -           semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
> +           semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ||
> +           sem_prison_cansee(rpr, semakptr) ? EINVAL : 0);
> +}
> +
> +static void
> +sem_remove(int semidx, struct ucred *cred)
> +{
> +       struct semid_kernel *semakptr;
> +       int i;
> +
> +       KASSERT(semidx >= 0 && semidx < seminfo.semmni,
> +               ("semidx out of bounds"));
> +       semakptr = &sema[semidx];
> +       semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
> +       semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
> +       semakptr->u.sem_perm.mode = 0;
> +       racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
> +       crfree(semakptr->cred);
> +       semakptr->cred = NULL;
> +       SEMUNDO_LOCK();
> +       semundo_clear(semidx, -1);
> +       SEMUNDO_UNLOCK();
> +#ifdef MAC
> +       mac_sysvsem_cleanup(semakptr);
> +#endif
> +       wakeup(semakptr);
> +       for (i = 0; i < seminfo.semmni; i++) {
> +               if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
> +                   sema[i].u.sem_base > semakptr->u.sem_base)
> +                       mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
> +       }
> +       for (i = semakptr->u.sem_base - sem; i < semtot; i++)
> +               sem[i] = sem[i + semakptr->u.sem_nsems];
> +       for (i = 0; i < seminfo.semmni; i++) {
> +               if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
> +                   sema[i].u.sem_base > semakptr->u.sem_base) {
> +                       sema[i].u.sem_base -= semakptr->u.sem_nsems;
> +                       mtx_unlock(&sema_mtx[i]);
> +               }
> +       }
> +       semtot -= semakptr->u.sem_nsems;
> +}
> +
> +static struct prison *
> +sem_find_prison(struct ucred *cred)
> +{
> +       struct prison *pr, *rpr;
> +
> +       pr = cred->cr_prison;
> +       prison_lock(pr);
> +       rpr = osd_jail_get(pr, sem_prison_slot);
> +       prison_unlock(pr);
> +       return rpr;
> +}
> +
> +static int
> +sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr)
> +{
> +
> +       if (semakptr->cred == NULL ||
> +           !(rpr == semakptr->cred->cr_prison ||
> +             prison_ischild(rpr, semakptr->cred->cr_prison)))
> +               return (EINVAL);
> +       return (0);
>  }
>
>  /*
> @@ -572,6 +680,7 @@ kern_semctl(struct thread *td, int semid
>         u_short *array;
>         struct ucred *cred = td->td_ucred;
>         int i, error;
> +       struct prison *rpr;
>         struct semid_ds *sbuf;
>         struct semid_kernel *semakptr;
>         struct mtx *sema_mtxp;
> @@ -580,7 +689,9 @@ kern_semctl(struct thread *td, int semid
>
>         DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
>             semid, semnum, cmd, arg));
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +
> +       rpr = sem_find_prison(td->td_ucred);
> +       if (sem == NULL)
>                 return (ENOSYS);
>
>         array = NULL;
> @@ -600,6 +711,8 @@ kern_semctl(struct thread *td, int semid
>                         error = EINVAL;
>                         goto done2;
>                 }
> +               if ((error = sem_prison_cansee(rpr, semakptr)))
> +                       goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
>  #ifdef MAC
> @@ -608,6 +721,8 @@ kern_semctl(struct thread *td, int semid
>                         goto done2;
>  #endif
>                 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
> +               if (cred->cr_prison != semakptr->cred->cr_prison)
> +                       arg->buf->sem_perm.key = IPC_PRIVATE;
>                 *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
>                 mtx_unlock(sema_mtxp);
>                 return (0);
> @@ -622,6 +737,7 @@ kern_semctl(struct thread *td, int semid
>         if (cmd == IPC_RMID)
>                 mtx_lock(&sem_mtx);
>         mtx_lock(sema_mtxp);
> +
>  #ifdef MAC
>         error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
>         if (error != 0)
> @@ -633,42 +749,15 @@ kern_semctl(struct thread *td, int semid
>
>         switch (cmd) {
>         case IPC_RMID:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
>                         goto done2;
> -               semakptr->u.sem_perm.cuid = cred->cr_uid;
> -               semakptr->u.sem_perm.uid = cred->cr_uid;
> -               semakptr->u.sem_perm.mode = 0;
> -               racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
> -               crfree(semakptr->cred);
> -               semakptr->cred = NULL;
> -               SEMUNDO_LOCK();
> -               semundo_clear(semidx, -1);
> -               SEMUNDO_UNLOCK();
> -#ifdef MAC
> -               mac_sysvsem_cleanup(semakptr);
> -#endif
> -               wakeup(semakptr);
> -               for (i = 0; i < seminfo.semmni; i++) {
> -                       if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
> -                           sema[i].u.sem_base > semakptr->u.sem_base)
> -                               mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
> -               }
> -               for (i = semakptr->u.sem_base - sem; i < semtot; i++)
> -                       sem[i] = sem[i + semakptr->u.sem_nsems];
> -               for (i = 0; i < seminfo.semmni; i++) {
> -                       if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
> -                           sema[i].u.sem_base > semakptr->u.sem_base) {
> -                               sema[i].u.sem_base -= semakptr->u.sem_nsems;
> -                               mtx_unlock(&sema_mtx[i]);
> -                       }
> -               }
> -               semtot -= semakptr->u.sem_nsems;
> +               sem_remove(semidx, cred);
>                 break;
>
>         case IPC_SET:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
>                         goto done2;
> @@ -681,15 +770,17 @@ kern_semctl(struct thread *td, int semid
>                 break;
>
>         case IPC_STAT:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
>                 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
> +               if (cred->cr_prison != semakptr->cred->cr_prison)
> +                       arg->buf->sem_perm.key = IPC_PRIVATE;
>                 break;
>
>         case GETNCNT:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
> @@ -701,7 +792,7 @@ kern_semctl(struct thread *td, int semid
>                 break;
>
>         case GETPID:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
> @@ -713,7 +804,7 @@ kern_semctl(struct thread *td, int semid
>                 break;
>
>         case GETVAL:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
> @@ -749,7 +840,7 @@ kern_semctl(struct thread *td, int semid
>                 mtx_unlock(sema_mtxp);
>                 array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
>                 mtx_lock(sema_mtxp);
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
> @@ -762,7 +853,7 @@ kern_semctl(struct thread *td, int semid
>                 break;
>
>         case GETZCNT:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
>                         goto done2;
> @@ -774,7 +865,7 @@ kern_semctl(struct thread *td, int semid
>                 break;
>
>         case SETVAL:
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
>                         goto done2;
> @@ -805,7 +896,7 @@ kern_semctl(struct thread *td, int semid
>                 mtx_lock(sema_mtxp);
>                 if (error)
>                         break;
> -               if ((error = semvalid(semid, semakptr)) != 0)
> +               if ((error = semvalid(semid, rpr, semakptr)) != 0)
>                         goto done2;
>                 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
>                 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
> @@ -855,13 +946,16 @@ sys_semget(struct thread *td, struct sem
>         struct ucred *cred = td->td_ucred;
>
>         DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +
> +       if (sem_find_prison(cred) == NULL)
>                 return (ENOSYS);
>
>         mtx_lock(&sem_mtx);
>         if (key != IPC_PRIVATE) {
>                 for (semid = 0; semid < seminfo.semmni; semid++) {
>                         if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
> +                           sema[semid].cred != NULL &&
> +                           sema[semid].cred->cr_prison == cred->cr_prison &&
>                             sema[semid].u.sem_perm.key == key)
>                                 break;
>                 }
> @@ -978,6 +1072,7 @@ sys_semop(struct thread *td, struct semo
>         struct sembuf small_sops[SMALL_SOPS];
>         int semid = uap->semid;
>         size_t nsops = uap->nsops;
> +       struct prison *rpr;
>         struct sembuf *sops;
>         struct semid_kernel *semakptr;
>         struct sembuf *sopptr = NULL;
> @@ -994,7 +1089,8 @@ sys_semop(struct thread *td, struct semo
>  #endif
>         DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
>
> -       if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
> +       rpr = sem_find_prison(td->td_ucred);
> +       if (sem == NULL)
>                 return (ENOSYS);
>
>         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
> @@ -1044,6 +1140,8 @@ sys_semop(struct thread *td, struct semo
>                 error = EINVAL;
>                 goto done2;
>         }
> +       if ((error = sem_prison_cansee(rpr, semakptr)) != 0)
> +               goto done2;
>         /*
>          * Initial pass thru sops to see what permissions are needed.
>          * Also perform any checks that don't need repeating on each
> @@ -1367,11 +1465,217 @@ semexit_myhook(void *arg, struct proc *p
>  static int
>  sysctl_sema(SYSCTL_HANDLER_ARGS)
>  {
> +       struct prison *rpr;
> +       struct sbuf sb;
> +       struct semid_kernel tmp, empty;
> +       struct semid_kernel *semakptr;
> +       int error, i;
> +
> +       error = sysctl_wire_old_buffer(req, 0);
> +       if (error != 0)
> +               goto done;
> +       rpr = sem_find_prison(req->td->td_ucred);
> +       sbuf_new_for_sysctl(&sb, NULL, sizeof(struct semid_kernel) *
> +           seminfo.semmni, req);
> +
> +       bzero(&empty, sizeof(empty));
> +       for (i = 0; i < seminfo.semmni; i++) {
> +               semakptr = &sema[i];
> +               if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
> +                   rpr == NULL || sem_prison_cansee(rpr, semakptr) != 0) {
> +                       semakptr = &empty;
> +               } else if (req->td->td_ucred->cr_prison !=
> +                   semakptr->cred->cr_prison) {
> +                       bcopy(semakptr, &tmp, sizeof(tmp));
> +                       semakptr = &tmp;
> +                       semakptr->u.sem_perm.key = IPC_PRIVATE;
> +               }
>
> -       return (SYSCTL_OUT(req, sema,
> -           sizeof(struct semid_kernel) * seminfo.semmni));
> +               sbuf_bcat(&sb, semakptr, sizeof(*semakptr));
> +       }
> +       error = sbuf_finish(&sb);
> +       sbuf_delete(&sb);
> +
> +done:
> +       return (error);
>  }
>
> +static int
> +sem_prison_check(void *obj, void *data)
> +{
> +       struct prison *pr = obj;
> +       struct prison *prpr;
> +       struct vfsoptlist *opts = data;
> +       int error, jsys;
> +
> +       /*
> +        * sysvsem is a jailsys integer.
> +        * It must be "disable" if the parent jail is disabled.
> +        */
> +       error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys));
> +       if (error != ENOENT) {
> +               if (error != 0)
> +                       return (error);
> +               switch (jsys) {
> +               case JAIL_SYS_DISABLE:
> +                       break;
> +               case JAIL_SYS_NEW:
> +               case JAIL_SYS_INHERIT:
> +                       prison_lock(pr->pr_parent);
> +                       prpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
> +                       prison_unlock(pr->pr_parent);
> +                       if (prpr == NULL)
> +                               return (EPERM);
> +                       break;
> +               default:
> +                       return (EINVAL);
> +               }
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAFt_eMqw9JwBb8VJcDtJQZAJUPxMZ-heymKvE7Dm%2B3M%2BJHLx0Q>