Date: Mon, 25 Apr 2016 16:22:22 -0600 From: James Gritton <jamie@freebsd.org> To: Subbsd <subbsd@gmail.com> Cc: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r298585 - in head: sys/kern usr.sbin/jail Message-ID: <5a7428bcd2767e3fd3b8e2c16828e3f1@gritton.org> In-Reply-To: <CAFt_eMqw9JwBb8VJcDtJQZAJUPxMZ-heymKvE7Dm%2B3M%2BJHLx0Q@mail.gmail.com> References: <201604251706.u3PH6okj031018@repo.freebsd.org> <CAFt_eMqw9JwBb8VJcDtJQZAJUPxMZ-heymKvE7Dm%2B3M%2BJHLx0Q@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
That's a big oops on my part - msg and sem had nearly identical logic, but I got it backwards on shm. I'll put a fix in shortly. - Jamie On 2016-04-25 14:28, Subbsd wrote: > I do not know how it works for/in jails, but looks like this breaks > work SHM in host. I've got on any QT-based application after this > commit: > > EP Get failed: 'Feature not implemented. > The feature requested is not implemented by the recipient or server > and therefore cannot be processed.' (501)" > [23:05:55] W:QNativeImage: Unable to attach to shared memory segment. > Segmentation fault (core dumped) > > > and tracing stops on: > shmget(0x0,0x634e0,0x3c0) = 65580 (0x1002c) > shmat(0x1002c,0x0,0x0) ERR#22 'Invalid > argument' > > On Mon, Apr 25, 2016 at 8:06 PM, Jamie Gritton <jamie@freebsd.org> > wrote: >> Author: jamie >> Date: Mon Apr 25 17:06:50 2016 >> New Revision: 298585 >> URL: https://svnweb.freebsd.org/changeset/base/298585 >> >> Log: >> Encapsulate SYSV IPC objects in jails. Define per-module parameters >> sysvmsg, sysvsem, and sysvshm, with the following bahavior: >> >> inherit: allow full access to the IPC primitives. This is the same >> as >> the current setup with allow.sysvipc is on. Jails and the base >> system >> can see (and moduly) each other's objects, which is generally >> considered >> a bad thing (though may be useful in some circumstances). >> >> disable: all no access, same as the current setup with allow.sysvipc >> off. >> >> new: A jail may see use the IPC objects that it has created. It >> also >> gets its own IPC key namespace, so different jails may have their >> own >> objects using the same key value. The parent jail (or base system) >> can >> see the jail's IPC objects, but not its keys. >> >> PR: 48471 >> Submitted by: based on work by kikuchan98@gmail.com >> MFC after: 5 days >> >> Modified: >> head/sys/kern/sysv_msg.c >> head/sys/kern/sysv_sem.c >> head/sys/kern/sysv_shm.c >> head/usr.sbin/jail/jail.8 >> >> Modified: head/sys/kern/sysv_msg.c >> ============================================================================== >> --- head/sys/kern/sysv_msg.c Mon Apr 25 17:01:13 2016 >> (r298584) >> +++ head/sys/kern/sysv_msg.c Mon Apr 25 17:06:50 2016 >> (r298585) >> @@ -62,8 +62,11 @@ __FBSDID("$FreeBSD$"); >> #include <sys/lock.h> >> #include <sys/mutex.h> >> #include <sys/module.h> >> +#include <sys/mount.h> >> #include <sys/msg.h> >> #include <sys/racct.h> >> +#include <sys/sbuf.h> >> +#include <sys/sx.h> >> #include <sys/syscall.h> >> #include <sys/syscallsubr.h> >> #include <sys/sysent.h> >> @@ -80,6 +83,14 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID >> static int msginit(void); >> static int msgunload(void); >> static int sysvmsg_modload(struct module *, int, void *); >> +static void msq_remove(struct msqid_kernel *); >> +static struct prison *msg_find_prison(struct ucred *); >> +static int msq_prison_cansee(struct prison *, struct msqid_kernel *); >> +static int msg_prison_check(void *, void *); >> +static int msg_prison_set(void *, void *); >> +static int msg_prison_get(void *, void *); >> +static int msg_prison_remove(void *, void *); >> +static void msg_prison_cleanup(struct prison *); >> >> >> #ifdef MSG_DEBUG >> @@ -155,6 +166,7 @@ static struct msgmap *msgmaps; /* MSGSEG >> static struct msg *msghdrs; /* MSGTQL msg headers */ >> static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel >> struct's */ >> static struct mtx msq_mtx; /* global mutex for message queues. */ >> +static unsigned msg_prison_slot;/* prison OSD slot */ >> >> static struct syscall_helper_data msg_syscalls[] = { >> SYSCALL_INIT_HELPER(msgctl), >> @@ -194,7 +206,15 @@ static struct syscall_helper_data msg32_ >> static int >> msginit() >> { >> + struct prison *pr; >> + void *rsv; >> int i, error; >> + osd_method_t methods[PR_MAXMETHOD] = { >> + [PR_METHOD_CHECK] = msg_prison_check, >> + [PR_METHOD_SET] = msg_prison_set, >> + [PR_METHOD_GET] = msg_prison_get, >> + [PR_METHOD_REMOVE] = msg_prison_remove, >> + }; >> >> msginfo.msgmax = msginfo.msgseg * msginfo.msgssz; >> msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK); >> @@ -252,6 +272,29 @@ msginit() >> } >> mtx_init(&msq_mtx, "msq", NULL, MTX_DEF); >> >> + /* Set current prisons according to their allow.sysvipc. */ >> + msg_prison_slot = osd_jail_register(NULL, methods); >> + rsv = osd_reserve(msg_prison_slot); >> + prison_lock(&prison0); >> + (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, >> &prison0); >> + prison_unlock(&prison0); >> + rsv = NULL; >> + sx_slock(&allprison_lock); >> + TAILQ_FOREACH(pr, &allprison, pr_list) { >> + if (rsv == NULL) >> + rsv = osd_reserve(msg_prison_slot); >> + prison_lock(pr); >> + if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > >> 0) { >> + (void)osd_jail_set_reserved(pr, >> msg_prison_slot, rsv, >> + &prison0); >> + rsv = NULL; >> + } >> + prison_unlock(pr); >> + } >> + if (rsv != NULL) >> + osd_free_reserved(rsv); >> + sx_sunlock(&allprison_lock); >> + >> error = syscall_helper_register(msg_syscalls, >> SY_THR_STATIC_KLD); >> if (error != 0) >> return (error); >> @@ -292,6 +335,8 @@ msgunload() >> if (msqid != msginfo.msgmni) >> return (EBUSY); >> >> + if (msg_prison_slot != 0) >> + osd_jail_deregister(msg_prison_slot); >> #ifdef MAC >> for (i = 0; i < msginfo.msgtql; i++) >> mac_sysvmsg_destroy(&msghdrs[i]); >> @@ -366,6 +411,67 @@ msg_freehdr(msghdr) >> #endif >> } >> >> +static void >> +msq_remove(struct msqid_kernel *msqkptr) >> +{ >> + struct msg *msghdr; >> + >> + racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); >> + racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, >> msqkptr->u.msg_qnum); >> + racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, >> msqkptr->u.msg_cbytes); >> + crfree(msqkptr->cred); >> + msqkptr->cred = NULL; >> + >> + /* Free the message headers */ >> + msghdr = msqkptr->u.msg_first; >> + while (msghdr != NULL) { >> + struct msg *msghdr_tmp; >> + >> + /* Free the segments of each message */ >> + msqkptr->u.msg_cbytes -= msghdr->msg_ts; >> + msqkptr->u.msg_qnum--; >> + msghdr_tmp = msghdr; >> + msghdr = msghdr->msg_next; >> + msg_freehdr(msghdr_tmp); >> + } >> + >> + if (msqkptr->u.msg_cbytes != 0) >> + panic("msg_cbytes is screwed up"); >> + if (msqkptr->u.msg_qnum != 0) >> + panic("msg_qnum is screwed up"); >> + >> + msqkptr->u.msg_qbytes = 0; /* Mark it as free */ >> + >> +#ifdef MAC >> + mac_sysvmsq_cleanup(msqkptr); >> +#endif >> + >> + wakeup(msqkptr); >> +} >> + >> +static struct prison * >> +msg_find_prison(struct ucred *cred) >> +{ >> + struct prison *pr, *rpr; >> + >> + pr = cred->cr_prison; >> + prison_lock(pr); >> + rpr = osd_jail_get(pr, msg_prison_slot); >> + prison_unlock(pr); >> + return rpr; >> +} >> + >> +static int >> +msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr) >> +{ >> + >> + if (msqkptr->cred == NULL || >> + !(rpr == msqkptr->cred->cr_prison || >> + prison_ischild(rpr, msqkptr->cred->cr_prison))) >> + return (EINVAL); >> + return (0); >> +} >> + >> #ifndef _SYS_SYSPROTO_H_ >> struct msgctl_args { >> int msqid; >> @@ -402,8 +508,10 @@ kern_msgctl(td, msqid, cmd, msqbuf) >> { >> int rval, error, msqix; >> register struct msqid_kernel *msqkptr; >> + struct prison *rpr; >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + rpr = msg_find_prison(td->td_ucred); >> + if (rpr == NULL) >> return (ENOSYS); >> >> msqix = IPCID_TO_IX(msqid); >> @@ -427,6 +535,13 @@ kern_msgctl(td, msqid, cmd, msqbuf) >> error = EINVAL; >> goto done2; >> } >> + >> + error = msq_prison_cansee(rpr, msqkptr); >> + if (error != 0) { >> + DPRINTF(("requester can't see prison\n")); >> + goto done2; >> + } >> + >> #ifdef MAC >> error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd); >> if (error != 0) >> @@ -440,7 +555,9 @@ kern_msgctl(td, msqid, cmd, msqbuf) >> >> case IPC_RMID: >> { >> +#ifdef MAC >> struct msg *msghdr; >> +#endif >> if ((error = ipcperm(td, &msqkptr->u.msg_perm, >> IPC_M))) >> goto done2; >> >> @@ -462,37 +579,7 @@ kern_msgctl(td, msqid, cmd, msqbuf) >> } >> #endif >> >> - racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); >> - racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, >> msqkptr->u.msg_qnum); >> - racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, >> msqkptr->u.msg_cbytes); >> - crfree(msqkptr->cred); >> - msqkptr->cred = NULL; >> - >> - /* Free the message headers */ >> - msghdr = msqkptr->u.msg_first; >> - while (msghdr != NULL) { >> - struct msg *msghdr_tmp; >> - >> - /* Free the segments of each message */ >> - msqkptr->u.msg_cbytes -= msghdr->msg_ts; >> - msqkptr->u.msg_qnum--; >> - msghdr_tmp = msghdr; >> - msghdr = msghdr->msg_next; >> - msg_freehdr(msghdr_tmp); >> - } >> - >> - if (msqkptr->u.msg_cbytes != 0) >> - panic("msg_cbytes is screwed up"); >> - if (msqkptr->u.msg_qnum != 0) >> - panic("msg_qnum is screwed up"); >> - >> - msqkptr->u.msg_qbytes = 0; /* Mark it as free */ >> - >> -#ifdef MAC >> - mac_sysvmsq_cleanup(msqkptr); >> -#endif >> - >> - wakeup(msqkptr); >> + msq_remove(msqkptr); >> } >> >> break; >> @@ -529,6 +616,8 @@ kern_msgctl(td, msqid, cmd, msqbuf) >> goto done2; >> } >> *msqbuf = msqkptr->u; >> + if (td->td_ucred->cr_prison != >> msqkptr->cred->cr_prison) >> + msqbuf->msg_perm.key = IPC_PRIVATE; >> break; >> >> default: >> @@ -564,7 +653,7 @@ sys_msgget(td, uap) >> >> DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + if (msg_find_prison(cred) == NULL) >> return (ENOSYS); >> >> mtx_lock(&msq_mtx); >> @@ -572,6 +661,8 @@ sys_msgget(td, uap) >> for (msqid = 0; msqid < msginfo.msgmni; msqid++) { >> msqkptr = &msqids[msqid]; >> if (msqkptr->u.msg_qbytes != 0 && >> + msqkptr->cred != NULL && >> + msqkptr->cred->cr_prison == >> cred->cr_prison && >> msqkptr->u.msg_perm.key == key) >> break; >> } >> @@ -684,12 +775,14 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf >> int msqix, segs_needed, error = 0; >> register struct msqid_kernel *msqkptr; >> register struct msg *msghdr; >> + struct prison *rpr; >> short next; >> #ifdef RACCT >> size_t saved_msgsz; >> #endif >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + rpr = msg_find_prison(td->td_ucred); >> + if (rpr == NULL) >> return (ENOSYS); >> >> mtx_lock(&msq_mtx); >> @@ -714,6 +807,11 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf >> goto done2; >> } >> >> + if ((error = msq_prison_cansee(rpr, msqkptr))) { >> + DPRINTF(("requester can't see prison\n")); >> + goto done2; >> + } >> + >> if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) { >> DPRINTF(("requester doesn't have write access\n")); >> goto done2; >> @@ -1052,10 +1150,12 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt >> size_t len; >> register struct msqid_kernel *msqkptr; >> register struct msg *msghdr; >> + struct prison *rpr; >> int msqix, error = 0; >> short next; >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + rpr = msg_find_prison(td->td_ucred); >> + if (rpr == NULL) >> return (ENOSYS); >> >> msqix = IPCID_TO_IX(msqid); >> @@ -1079,6 +1179,11 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt >> goto done2; >> } >> >> + if ((error = msq_prison_cansee(rpr, msqkptr))) { >> + DPRINTF(("requester can't see prison\n")); >> + goto done2; >> + } >> + >> if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { >> DPRINTF(("requester doesn't have read access\n")); >> goto done2; >> @@ -1318,9 +1423,39 @@ sys_msgrcv(td, uap) >> static int >> sysctl_msqids(SYSCTL_HANDLER_ARGS) >> { >> + struct sbuf sb; >> + struct msqid_kernel tmp, empty; >> + struct msqid_kernel *msqkptr; >> + struct prison *rpr; >> + int error, i; >> + >> + error = sysctl_wire_old_buffer(req, 0); >> + if (error != 0) >> + goto done; >> + rpr = msg_find_prison(req->td->td_ucred); >> + sbuf_new_for_sysctl(&sb, NULL, sizeof(struct msqid_kernel) * >> + msginfo.msgmni, req); >> + >> + bzero(&empty, sizeof(empty)); >> + for (i = 0; i < msginfo.msgmni; i++) { >> + msqkptr = &msqids[i]; >> + if (msqkptr->u.msg_qbytes == 0 || rpr == NULL || >> + msq_prison_cansee(rpr, msqkptr) != 0) { >> + msqkptr = ∅ >> + } else if (req->td->td_ucred->cr_prison != >> + msqkptr->cred->cr_prison) { >> + bcopy(msqkptr, &tmp, sizeof(tmp)); >> + msqkptr = &tmp; >> + msqkptr->u.msg_perm.key = IPC_PRIVATE; >> + } >> >> - return (SYSCTL_OUT(req, msqids, >> - sizeof(struct msqid_kernel) * msginfo.msgmni)); >> + sbuf_bcat(&sb, msqkptr, sizeof(*msqkptr)); >> + } >> + error = sbuf_finish(&sb); >> + sbuf_delete(&sb); >> + >> +done: >> + return (error); >> } >> >> SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, >> 0, >> @@ -1338,6 +1473,181 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, >> SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD, >> NULL, 0, sysctl_msqids, "", "Message queue IDs"); >> >> +static int >> +msg_prison_check(void *obj, void *data) >> +{ >> + struct prison *pr = obj; >> + struct prison *prpr; >> + struct vfsoptlist *opts = data; >> + int error, jsys; >> + >> + /* >> + * sysvmsg is a jailsys integer. >> + * It must be "disable" if the parent jail is disabled. >> + */ >> + error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)); >> + if (error != ENOENT) { >> + if (error != 0) >> + return (error); >> + switch (jsys) { >> + case JAIL_SYS_DISABLE: >> + break; >> + case JAIL_SYS_NEW: >> + case JAIL_SYS_INHERIT: >> + prison_lock(pr->pr_parent); >> + prpr = osd_jail_get(pr->pr_parent, >> msg_prison_slot); >> + prison_unlock(pr->pr_parent); >> + if (prpr == NULL) >> + return (EPERM); >> + break; >> + default: >> + return (EINVAL); >> + } >> + } >> + >> + return (0); >> +} >> + >> +static int >> +msg_prison_set(void *obj, void *data) >> +{ >> + struct prison *pr = obj; >> + struct prison *tpr, *orpr, *nrpr, *trpr; >> + struct vfsoptlist *opts = data; >> + void *rsv; >> + int jsys, descend; >> + >> + /* >> + * sysvmsg controls which jail is the root of the associated >> msgs (this >> + * jail or same as the parent), or if the feature is available >> at all. >> + */ >> + if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == >> ENOENT) >> + jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) >> + ? JAIL_SYS_INHERIT >> + : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) >> + ? JAIL_SYS_DISABLE >> + : -1; >> + if (jsys == JAIL_SYS_DISABLE) { >> + prison_lock(pr); >> + orpr = osd_jail_get(pr, msg_prison_slot); >> + if (orpr != NULL) >> + osd_jail_del(pr, msg_prison_slot); >> + prison_unlock(pr); >> + if (orpr != NULL) { >> + if (orpr == pr) >> + msg_prison_cleanup(pr); >> + /* Disable all child jails as well. */ >> + FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >> + prison_lock(tpr); >> + trpr = osd_jail_get(tpr, >> msg_prison_slot); >> + if (trpr != NULL) { >> + osd_jail_del(tpr, >> msg_prison_slot); >> + prison_unlock(tpr); >> + if (trpr == tpr) >> + >> msg_prison_cleanup(tpr); >> + } else { >> + prison_unlock(tpr); >> + descend = 0; >> + } >> + } >> + } >> + } else if (jsys != -1) { >> + if (jsys == JAIL_SYS_NEW) >> + nrpr = pr; >> + else { >> + prison_lock(pr->pr_parent); >> + nrpr = osd_jail_get(pr->pr_parent, >> msg_prison_slot); >> + prison_unlock(pr->pr_parent); >> + } >> + rsv = osd_reserve(msg_prison_slot); >> + prison_lock(pr); >> + orpr = osd_jail_get(pr, msg_prison_slot); >> + if (orpr != nrpr) >> + (void)osd_jail_set_reserved(pr, >> msg_prison_slot, rsv, >> + nrpr); >> + else >> + osd_free_reserved(rsv); >> + prison_unlock(pr); >> + if (orpr != nrpr) { >> + if (orpr == pr) >> + msg_prison_cleanup(pr); >> + if (orpr != NULL) { >> + /* Change child jails matching the old >> root, */ >> + FOREACH_PRISON_DESCENDANT(pr, tpr, >> descend) { >> + prison_lock(tpr); >> + trpr = osd_jail_get(tpr, >> + msg_prison_slot); >> + if (trpr == orpr) { >> + >> (void)osd_jail_set(tpr, >> + msg_prison_slot, >> nrpr); >> + prison_unlock(tpr); >> + if (trpr == tpr) >> + >> msg_prison_cleanup(tpr); >> + } else { >> + prison_unlock(tpr); >> + descend = 0; >> + } >> + } >> + } >> + } >> + } >> + >> + return (0); >> +} >> + >> +static int >> +msg_prison_get(void *obj, void *data) >> +{ >> + struct prison *pr = obj; >> + struct prison *rpr; >> + struct vfsoptlist *opts = data; >> + int error, jsys; >> + >> + /* Set sysvmsg based on the jail's root prison. */ >> + prison_lock(pr); >> + rpr = osd_jail_get(pr, msg_prison_slot); >> + prison_unlock(pr); >> + jsys = rpr == NULL ? JAIL_SYS_DISABLE >> + : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; >> + error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys)); >> + if (error == ENOENT) >> + error = 0; >> + return (error); >> +} >> + >> +static int >> +msg_prison_remove(void *obj, void *data __unused) >> +{ >> + struct prison *pr = obj; >> + struct prison *rpr; >> + >> + prison_lock(pr); >> + rpr = osd_jail_get(pr, msg_prison_slot); >> + prison_unlock(pr); >> + if (rpr == pr) >> + msg_prison_cleanup(pr); >> + return (0); >> +} >> + >> +static void >> +msg_prison_cleanup(struct prison *pr) >> +{ >> + struct msqid_kernel *msqkptr; >> + int i; >> + >> + /* Remove any msqs that belong to this jail. */ >> + mtx_lock(&msq_mtx); >> + for (i = 0; i < msginfo.msgmni; i++) { >> + msqkptr = &msqids[i]; >> + if (msqkptr->u.msg_qbytes != 0 && >> + msqkptr->cred != NULL && msqkptr->cred->cr_prison >> == pr) >> + msq_remove(msqkptr); >> + } >> + mtx_unlock(&msq_mtx); >> +} >> + >> +SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message >> queues"); >> + >> #ifdef COMPAT_FREEBSD32 >> int >> freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args >> *uap) >> @@ -1516,8 +1826,6 @@ sys_msgsys(td, uap) >> { >> int error; >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> - return (ENOSYS); >> if (uap->which < 0 || uap->which >= nitems(msgcalls)) >> return (EINVAL); >> error = (*msgcalls[uap->which])(td, &uap->a2); >> >> Modified: head/sys/kern/sysv_sem.c >> ============================================================================== >> --- head/sys/kern/sysv_sem.c Mon Apr 25 17:01:13 2016 >> (r298584) >> +++ head/sys/kern/sysv_sem.c Mon Apr 25 17:06:50 2016 >> (r298585) >> @@ -52,7 +52,9 @@ __FBSDID("$FreeBSD$"); >> #include <sys/module.h> >> #include <sys/mutex.h> >> #include <sys/racct.h> >> +#include <sys/sbuf.h> >> #include <sys/sem.h> >> +#include <sys/sx.h> >> #include <sys/syscall.h> >> #include <sys/syscallsubr.h> >> #include <sys/sysent.h> >> @@ -78,7 +80,16 @@ static int sysvsem_modload(struct module >> static int semunload(void); >> static void semexit_myhook(void *arg, struct proc *p); >> static int sysctl_sema(SYSCTL_HANDLER_ARGS); >> -static int semvalid(int semid, struct semid_kernel *semakptr); >> +static int semvalid(int semid, struct prison *rpr, >> + struct semid_kernel *semakptr); >> +static void sem_remove(int semidx, struct ucred *cred); >> +static struct prison *sem_find_prison(struct ucred *); >> +static int sem_prison_cansee(struct prison *, struct semid_kernel *); >> +static int sem_prison_check(void *, void *); >> +static int sem_prison_set(void *, void *); >> +static int sem_prison_get(void *, void *); >> +static int sem_prison_remove(void *, void *); >> +static void sem_prison_cleanup(struct prison *); >> >> #ifndef _SYS_SYSPROTO_H_ >> struct __semctl_args; >> @@ -104,6 +115,7 @@ LIST_HEAD(, sem_undo) semu_list; /* list >> LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo >> structures */ >> static int *semu; /* undo structure pool */ >> static eventhandler_tag semexit_tag; >> +static unsigned sem_prison_slot; /* prison OSD slot */ >> >> #define SEMUNDO_MTX sem_undo_mtx >> #define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX); >> @@ -247,7 +259,15 @@ static struct syscall_helper_data sem32_ >> static int >> seminit(void) >> { >> + struct prison *pr; >> + void *rsv; >> int i, error; >> + osd_method_t methods[PR_MAXMETHOD] = { >> + [PR_METHOD_CHECK] = sem_prison_check, >> + [PR_METHOD_SET] = sem_prison_set, >> + [PR_METHOD_GET] = sem_prison_get, >> + [PR_METHOD_REMOVE] = sem_prison_remove, >> + }; >> >> sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, >> M_WAITOK); >> sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, >> M_SEM, >> @@ -278,6 +298,29 @@ seminit(void) >> semexit_tag = EVENTHANDLER_REGISTER(process_exit, >> semexit_myhook, NULL, >> EVENTHANDLER_PRI_ANY); >> >> + /* Set current prisons according to their allow.sysvipc. */ >> + sem_prison_slot = osd_jail_register(NULL, methods); >> + rsv = osd_reserve(sem_prison_slot); >> + prison_lock(&prison0); >> + (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, >> &prison0); >> + prison_unlock(&prison0); >> + rsv = NULL; >> + sx_slock(&allprison_lock); >> + TAILQ_FOREACH(pr, &allprison, pr_list) { >> + if (rsv == NULL) >> + rsv = osd_reserve(sem_prison_slot); >> + prison_lock(pr); >> + if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > >> 0) { >> + (void)osd_jail_set_reserved(pr, >> sem_prison_slot, rsv, >> + &prison0); >> + rsv = NULL; >> + } >> + prison_unlock(pr); >> + } >> + if (rsv != NULL) >> + osd_free_reserved(rsv); >> + sx_sunlock(&allprison_lock); >> + >> error = syscall_helper_register(sem_syscalls, >> SY_THR_STATIC_KLD); >> if (error != 0) >> return (error); >> @@ -303,6 +346,8 @@ semunload(void) >> #endif >> syscall_helper_unregister(sem_syscalls); >> EVENTHANDLER_DEREGISTER(process_exit, semexit_tag); >> + if (sem_prison_slot != 0) >> + osd_jail_deregister(sem_prison_slot); >> #ifdef MAC >> for (i = 0; i < seminfo.semmni; i++) >> mac_sysvsem_destroy(&sema[i]); >> @@ -489,11 +534,74 @@ semundo_clear(int semid, int semnum) >> } >> >> static int >> -semvalid(int semid, struct semid_kernel *semakptr) >> +semvalid(int semid, struct prison *rpr, struct semid_kernel >> *semakptr) >> { >> >> return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || >> - semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : >> 0); >> + semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) || >> + sem_prison_cansee(rpr, semakptr) ? EINVAL : 0); >> +} >> + >> +static void >> +sem_remove(int semidx, struct ucred *cred) >> +{ >> + struct semid_kernel *semakptr; >> + int i; >> + >> + KASSERT(semidx >= 0 && semidx < seminfo.semmni, >> + ("semidx out of bounds")); >> + semakptr = &sema[semidx]; >> + semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0; >> + semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0; >> + semakptr->u.sem_perm.mode = 0; >> + racct_sub_cred(semakptr->cred, RACCT_NSEM, >> semakptr->u.sem_nsems); >> + crfree(semakptr->cred); >> + semakptr->cred = NULL; >> + SEMUNDO_LOCK(); >> + semundo_clear(semidx, -1); >> + SEMUNDO_UNLOCK(); >> +#ifdef MAC >> + mac_sysvsem_cleanup(semakptr); >> +#endif >> + wakeup(semakptr); >> + for (i = 0; i < seminfo.semmni; i++) { >> + if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >> + sema[i].u.sem_base > semakptr->u.sem_base) >> + mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); >> + } >> + for (i = semakptr->u.sem_base - sem; i < semtot; i++) >> + sem[i] = sem[i + semakptr->u.sem_nsems]; >> + for (i = 0; i < seminfo.semmni; i++) { >> + if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >> + sema[i].u.sem_base > semakptr->u.sem_base) { >> + sema[i].u.sem_base -= semakptr->u.sem_nsems; >> + mtx_unlock(&sema_mtx[i]); >> + } >> + } >> + semtot -= semakptr->u.sem_nsems; >> +} >> + >> +static struct prison * >> +sem_find_prison(struct ucred *cred) >> +{ >> + struct prison *pr, *rpr; >> + >> + pr = cred->cr_prison; >> + prison_lock(pr); >> + rpr = osd_jail_get(pr, sem_prison_slot); >> + prison_unlock(pr); >> + return rpr; >> +} >> + >> +static int >> +sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr) >> +{ >> + >> + if (semakptr->cred == NULL || >> + !(rpr == semakptr->cred->cr_prison || >> + prison_ischild(rpr, semakptr->cred->cr_prison))) >> + return (EINVAL); >> + return (0); >> } >> >> /* >> @@ -572,6 +680,7 @@ kern_semctl(struct thread *td, int semid >> u_short *array; >> struct ucred *cred = td->td_ucred; >> int i, error; >> + struct prison *rpr; >> struct semid_ds *sbuf; >> struct semid_kernel *semakptr; >> struct mtx *sema_mtxp; >> @@ -580,7 +689,9 @@ kern_semctl(struct thread *td, int semid >> >> DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n", >> semid, semnum, cmd, arg)); >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + >> + rpr = sem_find_prison(td->td_ucred); >> + if (sem == NULL) >> return (ENOSYS); >> >> array = NULL; >> @@ -600,6 +711,8 @@ kern_semctl(struct thread *td, int semid >> error = EINVAL; >> goto done2; >> } >> + if ((error = sem_prison_cansee(rpr, semakptr))) >> + goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> #ifdef MAC >> @@ -608,6 +721,8 @@ kern_semctl(struct thread *td, int semid >> goto done2; >> #endif >> bcopy(&semakptr->u, arg->buf, sizeof(struct >> semid_ds)); >> + if (cred->cr_prison != semakptr->cred->cr_prison) >> + arg->buf->sem_perm.key = IPC_PRIVATE; >> *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm); >> mtx_unlock(sema_mtxp); >> return (0); >> @@ -622,6 +737,7 @@ kern_semctl(struct thread *td, int semid >> if (cmd == IPC_RMID) >> mtx_lock(&sem_mtx); >> mtx_lock(sema_mtxp); >> + >> #ifdef MAC >> error = mac_sysvsem_check_semctl(cred, semakptr, cmd); >> if (error != 0) >> @@ -633,42 +749,15 @@ kern_semctl(struct thread *td, int semid >> >> switch (cmd) { >> case IPC_RMID: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_M))) >> goto done2; >> - semakptr->u.sem_perm.cuid = cred->cr_uid; >> - semakptr->u.sem_perm.uid = cred->cr_uid; >> - semakptr->u.sem_perm.mode = 0; >> - racct_sub_cred(semakptr->cred, RACCT_NSEM, >> semakptr->u.sem_nsems); >> - crfree(semakptr->cred); >> - semakptr->cred = NULL; >> - SEMUNDO_LOCK(); >> - semundo_clear(semidx, -1); >> - SEMUNDO_UNLOCK(); >> -#ifdef MAC >> - mac_sysvsem_cleanup(semakptr); >> -#endif >> - wakeup(semakptr); >> - for (i = 0; i < seminfo.semmni; i++) { >> - if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >> - sema[i].u.sem_base > semakptr->u.sem_base) >> - mtx_lock_flags(&sema_mtx[i], >> LOP_DUPOK); >> - } >> - for (i = semakptr->u.sem_base - sem; i < semtot; i++) >> - sem[i] = sem[i + semakptr->u.sem_nsems]; >> - for (i = 0; i < seminfo.semmni; i++) { >> - if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >> - sema[i].u.sem_base > semakptr->u.sem_base) >> { >> - sema[i].u.sem_base -= >> semakptr->u.sem_nsems; >> - mtx_unlock(&sema_mtx[i]); >> - } >> - } >> - semtot -= semakptr->u.sem_nsems; >> + sem_remove(semidx, cred); >> break; >> >> case IPC_SET: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_M))) >> goto done2; >> @@ -681,15 +770,17 @@ kern_semctl(struct thread *td, int semid >> break; >> >> case IPC_STAT: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> bcopy(&semakptr->u, arg->buf, sizeof(struct >> semid_ds)); >> + if (cred->cr_prison != semakptr->cred->cr_prison) >> + arg->buf->sem_perm.key = IPC_PRIVATE; >> break; >> >> case GETNCNT: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> @@ -701,7 +792,7 @@ kern_semctl(struct thread *td, int semid >> break; >> >> case GETPID: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> @@ -713,7 +804,7 @@ kern_semctl(struct thread *td, int semid >> break; >> >> case GETVAL: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> @@ -749,7 +840,7 @@ kern_semctl(struct thread *td, int semid >> mtx_unlock(sema_mtxp); >> array = malloc(sizeof(*array) * count, M_TEMP, >> M_WAITOK); >> mtx_lock(sema_mtxp); >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> KASSERT(count == semakptr->u.sem_nsems, ("nsems >> changed")); >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> @@ -762,7 +853,7 @@ kern_semctl(struct thread *td, int semid >> break; >> >> case GETZCNT: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_R))) >> goto done2; >> @@ -774,7 +865,7 @@ kern_semctl(struct thread *td, int semid >> break; >> >> case SETVAL: >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_W))) >> goto done2; >> @@ -805,7 +896,7 @@ kern_semctl(struct thread *td, int semid >> mtx_lock(sema_mtxp); >> if (error) >> break; >> - if ((error = semvalid(semid, semakptr)) != 0) >> + if ((error = semvalid(semid, rpr, semakptr)) != 0) >> goto done2; >> KASSERT(count == semakptr->u.sem_nsems, ("nsems >> changed")); >> if ((error = ipcperm(td, &semakptr->u.sem_perm, >> IPC_W))) >> @@ -855,13 +946,16 @@ sys_semget(struct thread *td, struct sem >> struct ucred *cred = td->td_ucred; >> >> DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + >> + if (sem_find_prison(cred) == NULL) >> return (ENOSYS); >> >> mtx_lock(&sem_mtx); >> if (key != IPC_PRIVATE) { >> for (semid = 0; semid < seminfo.semmni; semid++) { >> if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) >> && >> + sema[semid].cred != NULL && >> + sema[semid].cred->cr_prison == >> cred->cr_prison && >> sema[semid].u.sem_perm.key == key) >> break; >> } >> @@ -978,6 +1072,7 @@ sys_semop(struct thread *td, struct semo >> struct sembuf small_sops[SMALL_SOPS]; >> int semid = uap->semid; >> size_t nsops = uap->nsops; >> + struct prison *rpr; >> struct sembuf *sops; >> struct semid_kernel *semakptr; >> struct sembuf *sopptr = NULL; >> @@ -994,7 +1089,8 @@ sys_semop(struct thread *td, struct semo >> #endif >> DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops)); >> >> - if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >> + rpr = sem_find_prison(td->td_ucred); >> + if (sem == NULL) >> return (ENOSYS); >> >> semid = IPCID_TO_IX(semid); /* Convert back to zero origin >> */ >> @@ -1044,6 +1140,8 @@ sys_semop(struct thread *td, struct semo >> error = EINVAL; >> goto done2; >> } >> + if ((error = sem_prison_cansee(rpr, semakptr)) != 0) >> + goto done2; >> /* >> * Initial pass thru sops to see what permissions are needed. >> * Also perform any checks that don't need repeating on each >> @@ -1367,11 +1465,217 @@ semexit_myhook(void *arg, struct proc *p >> static int >> sysctl_sema(SYSCTL_HANDLER_ARGS) >> { >> + struct prison *rpr; >> + struct sbuf sb; >> + struct semid_kernel tmp, empty; >> + struct semid_kernel *semakptr; >> + int error, i; >> + >> + error = sysctl_wire_old_buffer(req, 0); >> + if (error != 0) >> + goto done; >> + rpr = sem_find_prison(req->td->td_ucred); >> + sbuf_new_for_sysctl(&sb, NULL, sizeof(struct semid_kernel) * >> + seminfo.semmni, req); >> + >> + bzero(&empty, sizeof(empty)); >> + for (i = 0; i < seminfo.semmni; i++) { >> + semakptr = &sema[i]; >> + if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || >> + rpr == NULL || sem_prison_cansee(rpr, semakptr) != >> 0) { >> + semakptr = ∅ >> + } else if (req->td->td_ucred->cr_prison != >> + semakptr->cred->cr_prison) { >> + bcopy(semakptr, &tmp, sizeof(tmp)); >> + semakptr = &tmp; >> + semakptr->u.sem_perm.key = IPC_PRIVATE; >> + } >> >> - return (SYSCTL_OUT(req, sema, >> - sizeof(struct semid_kernel) * seminfo.semmni)); >> + sbuf_bcat(&sb, semakptr, sizeof(*semakptr)); >> + } >> + error = sbuf_finish(&sb); >> + sbuf_delete(&sb); >> + >> +done: >> + return (error); >> } >> >> +static int >> +sem_prison_check(void *obj, void *data) >> +{ >> + struct prison *pr = obj; >> + struct prison *prpr; >> + struct vfsoptlist *opts = data; >> + int error, jsys; >> + >> + /* >> + * sysvsem is a jailsys integer. >> + * It must be "disable" if the parent jail is disabled. >> + */ >> + error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)); >> + if (error != ENOENT) { >> + if (error != 0) >> + return (error); >> + switch (jsys) { >> + case JAIL_SYS_DISABLE: >> + break; >> + case JAIL_SYS_NEW: >> + case JAIL_SYS_INHERIT: >> + prison_lock(pr->pr_parent); >> + prpr = osd_jail_get(pr->pr_parent, >> sem_prison_slot); >> + prison_unlock(pr->pr_parent); >> + if (prpr == NULL) >> + return (EPERM); >> + break; >> + default: >> + return (EINVAL); >> + } >> >> *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** >> _______________________________________________ >> svn-src-head@freebsd.org mailing list >> https://lists.freebsd.org/mailman/listinfo/svn-src-head >> To unsubscribe, send any mail to >> "svn-src-head-unsubscribe@freebsd.org"
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?5a7428bcd2767e3fd3b8e2c16828e3f1>