Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 30 Apr 2016 04:02:32 +0000 (UTC)
From:      Jamie Gritton <jamie@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r298835 - in stable/10: sys/kern usr.sbin/jail
Message-ID:  <201604300402.u3U42WBM056084@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jamie
Date: Sat Apr 30 04:02:32 2016
New Revision: 298835
URL: https://svnweb.freebsd.org/changeset/base/298835

Log:
  MFC r298584:
  
    Note the existence of module-specific jail paramters, starting with the
    linux.* parameters when linux emulation is loaded.
  
  MFC r298585:
  
    Encapsulate SYSV IPC objects in jails.  Define per-module parameters
    sysvmsg, sysvsem, and sysvshm, with the following bahavior:
  
    inherit: allow full access to the IPC primitives.  This is the same as
    the current setup with allow.sysvipc is on.  Jails and the base system
    can see (and moduly) each other's objects, which is generally considered
    a bad thing (though may be useful in some circumstances).
  
    disable: all no access, same as the current setup with allow.sysvipc off.
  
    new: A jail may see use the IPC objects that it has created.  It also
    gets its own IPC key namespace, so different jails may have their own
    objects using the same key value.  The parent jail (or base system) can
    see the jail's IPC objects, but not its keys.
  
  PR:		48471

Modified:
  stable/10/sys/kern/sysv_msg.c
  stable/10/sys/kern/sysv_sem.c
  stable/10/sys/kern/sysv_shm.c
  stable/10/usr.sbin/jail/jail.8
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/kern/sysv_msg.c
==============================================================================
--- stable/10/sys/kern/sysv_msg.c	Sat Apr 30 04:01:22 2016	(r298834)
+++ stable/10/sys/kern/sysv_msg.c	Sat Apr 30 04:02:32 2016	(r298835)
@@ -62,8 +62,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
+#include <sys/mount.h>
 #include <sys/msg.h>
 #include <sys/racct.h>
+#include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
@@ -80,6 +82,14 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID
 static int msginit(void);
 static int msgunload(void);
 static int sysvmsg_modload(struct module *, int, void *);
+static void msq_remove(struct msqid_kernel *);
+static struct prison *msg_find_prison(struct ucred *);
+static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
+static int msg_prison_check(void *, void *);
+static int msg_prison_set(void *, void *);
+static int msg_prison_get(void *, void *);
+static int msg_prison_remove(void *, void *);
+static void msg_prison_cleanup(struct prison *);
 
 
 #ifdef MSG_DEBUG
@@ -155,6 +165,7 @@ static struct msgmap *msgmaps;	/* MSGSEG
 static struct msg *msghdrs;	/* MSGTQL msg headers */
 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
 static struct mtx msq_mtx;	/* global mutex for message queues. */
+static unsigned msg_prison_slot;/* prison OSD slot */
 
 static struct syscall_helper_data msg_syscalls[] = {
 	SYSCALL_INIT_HELPER(msgctl),
@@ -194,7 +205,15 @@ static struct syscall_helper_data msg32_
 static int
 msginit()
 {
+	struct prison *pr;
+	void *rsv;
 	int i, error;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CHECK] =		msg_prison_check,
+	    [PR_METHOD_SET] =		msg_prison_set,
+	    [PR_METHOD_GET] =		msg_prison_get,
+	    [PR_METHOD_REMOVE] =	msg_prison_remove,
+	};
 
 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
@@ -258,6 +277,29 @@ msginit()
 	}
 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
 
+	/* Set current prisons according to their allow.sysvipc. */
+	msg_prison_slot = osd_jail_register(NULL, methods);
+	rsv = osd_reserve(msg_prison_slot);
+	prison_lock(&prison0);
+	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
+	prison_unlock(&prison0);
+	rsv = NULL;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (rsv == NULL)
+			rsv = osd_reserve(msg_prison_slot);
+		prison_lock(pr);
+		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+			    &prison0);
+			rsv = NULL;
+		}
+		prison_unlock(pr);
+	}
+	if (rsv != NULL)
+		osd_free_reserved(rsv);
+	sx_sunlock(&allprison_lock);
+
 	error = syscall_helper_register(msg_syscalls);
 	if (error != 0)
 		return (error);
@@ -298,6 +340,8 @@ msgunload()
 	if (msqid != msginfo.msgmni)
 		return (EBUSY);
 
+	if (msg_prison_slot != 0)
+		osd_jail_deregister(msg_prison_slot);
 #ifdef MAC
 	for (i = 0; i < msginfo.msgtql; i++)
 		mac_sysvmsg_destroy(&msghdrs[i]);
@@ -372,6 +416,67 @@ msg_freehdr(msghdr)
 #endif
 }
 
+static void
+msq_remove(struct msqid_kernel *msqkptr)
+{
+	struct msg *msghdr;
+
+	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
+	crfree(msqkptr->cred);
+	msqkptr->cred = NULL;
+
+	/* Free the message headers */
+	msghdr = msqkptr->u.msg_first;
+	while (msghdr != NULL) {
+		struct msg *msghdr_tmp;
+
+		/* Free the segments of each message */
+		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
+		msqkptr->u.msg_qnum--;
+		msghdr_tmp = msghdr;
+		msghdr = msghdr->msg_next;
+		msg_freehdr(msghdr_tmp);
+	}
+
+	if (msqkptr->u.msg_cbytes != 0)
+		panic("msg_cbytes is screwed up");
+	if (msqkptr->u.msg_qnum != 0)
+		panic("msg_qnum is screwed up");
+
+	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
+
+#ifdef MAC
+	mac_sysvmsq_cleanup(msqkptr);
+#endif
+
+	wakeup(msqkptr);
+}
+
+static struct prison *
+msg_find_prison(struct ucred *cred)
+{
+	struct prison *pr, *rpr;
+
+	pr = cred->cr_prison;
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	return rpr;
+}
+
+static int
+msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
+{
+
+	if (msqkptr->cred == NULL ||
+	    !(rpr == msqkptr->cred->cr_prison ||
+	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
+		return (EINVAL);
+	return (0);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct msgctl_args {
 	int	msqid;
@@ -408,8 +513,10 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 {
 	int rval, error, msqix;
 	register struct msqid_kernel *msqkptr;
+	struct prison *rpr;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
@@ -433,6 +540,13 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 		error = EINVAL;
 		goto done2;
 	}
+
+	error = msq_prison_cansee(rpr, msqkptr);
+	if (error != 0) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 #ifdef MAC
 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
 	if (error != 0)
@@ -446,7 +560,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 
 	case IPC_RMID:
 	{
+#ifdef MAC
 		struct msg *msghdr;
+#endif
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
 			goto done2;
 
@@ -468,37 +584,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 		}
 #endif
 
-		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
-		crfree(msqkptr->cred);
-		msqkptr->cred = NULL;
-
-		/* Free the message headers */
-		msghdr = msqkptr->u.msg_first;
-		while (msghdr != NULL) {
-			struct msg *msghdr_tmp;
-
-			/* Free the segments of each message */
-			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
-			msqkptr->u.msg_qnum--;
-			msghdr_tmp = msghdr;
-			msghdr = msghdr->msg_next;
-			msg_freehdr(msghdr_tmp);
-		}
-
-		if (msqkptr->u.msg_cbytes != 0)
-			panic("msg_cbytes is screwed up");
-		if (msqkptr->u.msg_qnum != 0)
-			panic("msg_qnum is screwed up");
-
-		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
-
-#ifdef MAC
-		mac_sysvmsq_cleanup(msqkptr);
-#endif
-
-		wakeup(msqkptr);
+		msq_remove(msqkptr);
 	}
 
 		break;
@@ -535,6 +621,8 @@ kern_msgctl(td, msqid, cmd, msqbuf)
 			goto done2;
 		}
 		*msqbuf = msqkptr->u;
+		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
+			msqbuf->msg_perm.key = IPC_PRIVATE;
 		break;
 
 	default:
@@ -570,7 +658,7 @@ sys_msgget(td, uap)
 
 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	if (msg_find_prison(cred) == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
@@ -578,6 +666,8 @@ sys_msgget(td, uap)
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			msqkptr = &msqids[msqid];
 			if (msqkptr->u.msg_qbytes != 0 &&
+			    msqkptr->cred != NULL &&
+			    msqkptr->cred->cr_prison == cred->cr_prison &&
 			    msqkptr->u.msg_perm.key == key)
 				break;
 		}
@@ -690,12 +780,14 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf
 	int msqix, segs_needed, error = 0;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
+	struct prison *rpr;
 	short next;
 #ifdef RACCT
 	size_t saved_msgsz;
 #endif
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
@@ -720,6 +812,11 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgf
 		goto done2;
 	}
 
+	if ((error = msq_prison_cansee(rpr, msqkptr))) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
 		DPRINTF(("requester doesn't have write access\n"));
 		goto done2;
@@ -1058,10 +1155,12 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt
 	size_t len;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
+	struct prison *rpr;
 	int msqix, error = 0;
 	short next;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
@@ -1085,6 +1184,11 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgt
 		goto done2;
 	}
 
+	if ((error = msq_prison_cansee(rpr, msqkptr))) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
 		DPRINTF(("requester doesn't have read access\n"));
 		goto done2;
@@ -1324,9 +1428,29 @@ sys_msgrcv(td, uap)
 static int
 sysctl_msqids(SYSCTL_HANDLER_ARGS)
 {
+	struct msqid_kernel tmsqk;
+	struct prison *pr, *rpr;
+	int error, i;
 
-	return (SYSCTL_OUT(req, msqids,
-	    sizeof(struct msqid_kernel) * msginfo.msgmni));
+	pr = req->td->td_ucred->cr_prison;
+	rpr = msg_find_prison(req->td->td_ucred);
+	error = 0;
+	for (i = 0; i < msginfo.msgmni; i++) {
+		mtx_lock(&msq_mtx);
+		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
+		    msq_prison_cansee(rpr, &msqids[i]) != 0)
+			bzero(&tmsqk, sizeof(tmsqk));
+		else {
+			tmsqk = msqids[i];
+			if (tmsqk.cred->cr_prison != pr)
+				tmsqk.u.msg_perm.key = IPC_PRIVATE;
+		}
+		mtx_unlock(&msq_mtx);
+		error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
+		if (error != 0)
+			break;
+	}
+	return (error);
 }
 
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
@@ -1341,9 +1465,185 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, 
     "Size of a message segment");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
     "Number of message segments");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_msqids, "", "Message queue IDs");
 
+static int
+msg_prison_check(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *prpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/*
+	 * sysvmsg is a jailsys integer.
+	 * It must be "disable" if the parent jail is disabled.
+	 */
+	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+			break;
+		case JAIL_SYS_NEW:
+		case JAIL_SYS_INHERIT:
+			prison_lock(pr->pr_parent);
+			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+			prison_unlock(pr->pr_parent);
+			if (prpr == NULL)
+				return (EPERM);
+			break;
+		default:
+			return (EINVAL);
+		}
+	}
+
+	return (0);
+}
+
+static int
+msg_prison_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *tpr, *orpr, *nrpr, *trpr;
+	struct vfsoptlist *opts = data;
+	void *rsv;
+	int jsys, descend;
+
+	/*
+	 * sysvmsg controls which jail is the root of the associated msgs (this
+	 * jail or same as the parent), or if the feature is available at all.
+	 */
+	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
+		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+		    ? JAIL_SYS_INHERIT
+		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+		    ? JAIL_SYS_DISABLE
+		    : -1;
+	if (jsys == JAIL_SYS_DISABLE) {
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, msg_prison_slot);
+		if (orpr != NULL)
+			osd_jail_del(pr, msg_prison_slot);
+		prison_unlock(pr);
+		if (orpr != NULL) {
+			if (orpr == pr)
+				msg_prison_cleanup(pr);
+			/* Disable all child jails as well. */
+			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+				prison_lock(tpr);
+				trpr = osd_jail_get(tpr, msg_prison_slot);
+				if (trpr != NULL) {
+					osd_jail_del(tpr, msg_prison_slot);
+					prison_unlock(tpr);
+					if (trpr == tpr)
+						msg_prison_cleanup(tpr);
+				} else {
+					prison_unlock(tpr);
+					descend = 0;
+				}
+			}
+		}
+	} else if (jsys != -1) {
+		if (jsys == JAIL_SYS_NEW)
+			nrpr = pr;
+		else {
+			prison_lock(pr->pr_parent);
+			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+			prison_unlock(pr->pr_parent);
+		}
+		rsv = osd_reserve(msg_prison_slot);
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, msg_prison_slot);
+		if (orpr != nrpr)
+			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+			    nrpr);
+		else
+			osd_free_reserved(rsv);
+		prison_unlock(pr);
+		if (orpr != nrpr) {
+			if (orpr == pr)
+				msg_prison_cleanup(pr);
+			if (orpr != NULL) {
+				/* Change child jails matching the old root, */
+				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+					prison_lock(tpr);
+					trpr = osd_jail_get(tpr,
+					    msg_prison_slot);
+					if (trpr == orpr) {
+						(void)osd_jail_set(tpr,
+						    msg_prison_slot, nrpr);
+						prison_unlock(tpr);
+						if (trpr == tpr)
+							msg_prison_cleanup(tpr);
+					} else {
+						prison_unlock(tpr);
+						descend = 0;
+					}
+				}
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+msg_prison_get(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/* Set sysvmsg based on the jail's root prison. */
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	jsys = rpr == NULL ? JAIL_SYS_DISABLE
+	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+static int
+msg_prison_remove(void *obj, void *data __unused)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	if (rpr == pr)
+		msg_prison_cleanup(pr);
+	return (0);
+}
+
+static void
+msg_prison_cleanup(struct prison *pr)
+{
+	struct msqid_kernel *msqkptr;
+	int i;
+
+	/* Remove any msqs that belong to this jail. */
+	mtx_lock(&msq_mtx);
+	for (i = 0; i < msginfo.msgmni; i++) {
+		msqkptr = &msqids[i];
+		if (msqkptr->u.msg_qbytes != 0 &&
+		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
+			msq_remove(msqkptr);
+	}
+	mtx_unlock(&msq_mtx);
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
+
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
@@ -1522,8 +1822,6 @@ sys_msgsys(td, uap)
 {
 	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
-		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
 		return (EINVAL);

Modified: stable/10/sys/kern/sysv_sem.c
==============================================================================
--- stable/10/sys/kern/sysv_sem.c	Sat Apr 30 04:01:22 2016	(r298834)
+++ stable/10/sys/kern/sysv_sem.c	Sat Apr 30 04:02:32 2016	(r298835)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/sem.h>
+#include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
@@ -78,7 +79,16 @@ static int sysvsem_modload(struct module
 static int semunload(void);
 static void semexit_myhook(void *arg, struct proc *p);
 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
-static int semvalid(int semid, struct semid_kernel *semakptr);
+static int semvalid(int semid, struct prison *rpr,
+    struct semid_kernel *semakptr);
+static void sem_remove(int semidx, struct ucred *cred);
+static struct prison *sem_find_prison(struct ucred *);
+static int sem_prison_cansee(struct prison *, struct semid_kernel *);
+static int sem_prison_check(void *, void *);
+static int sem_prison_set(void *, void *);
+static int sem_prison_get(void *, void *);
+static int sem_prison_remove(void *, void *);
+static void sem_prison_cleanup(struct prison *);
 
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args;
@@ -104,6 +114,7 @@ LIST_HEAD(, sem_undo) semu_list;	/* list
 LIST_HEAD(, sem_undo) semu_free_list;	/* list of free undo structures */
 static int	*semu;		/* undo structure pool */
 static eventhandler_tag semexit_tag;
+static unsigned sem_prison_slot;	/* prison OSD slot */
 
 #define SEMUNDO_MTX		sem_undo_mtx
 #define SEMUNDO_LOCK()		mtx_lock(&SEMUNDO_MTX);
@@ -208,7 +219,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, 
     "Semaphore maximum value");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0,
     "Adjust on exit max value");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, sema,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_sema, "", "Semaphore id pool");
 
 static struct syscall_helper_data sem_syscalls[] = {
@@ -247,7 +259,15 @@ static struct syscall_helper_data sem32_
 static int
 seminit(void)
 {
+	struct prison *pr;
+	void *rsv;
 	int i, error;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CHECK] =		sem_prison_check,
+	    [PR_METHOD_SET] =		sem_prison_set,
+	    [PR_METHOD_GET] =		sem_prison_get,
+	    [PR_METHOD_REMOVE] =	sem_prison_remove,
+	};
 
 	TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni);
 	TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns);
@@ -288,6 +308,29 @@ seminit(void)
 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
 	    EVENTHANDLER_PRI_ANY);
 
+	/* Set current prisons according to their allow.sysvipc. */
+	sem_prison_slot = osd_jail_register(NULL, methods);
+	rsv = osd_reserve(sem_prison_slot);
+	prison_lock(&prison0);
+	(void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0);
+	prison_unlock(&prison0);
+	rsv = NULL;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (rsv == NULL)
+			rsv = osd_reserve(sem_prison_slot);
+		prison_lock(pr);
+		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+			(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
+			    &prison0);
+			rsv = NULL;
+		}
+		prison_unlock(pr);
+	}
+	if (rsv != NULL)
+		osd_free_reserved(rsv);
+	sx_sunlock(&allprison_lock);
+
 	error = syscall_helper_register(sem_syscalls);
 	if (error != 0)
 		return (error);
@@ -313,6 +356,8 @@ semunload(void)
 #endif
 	syscall_helper_unregister(sem_syscalls);
 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
+	if (sem_prison_slot != 0)
+		osd_jail_deregister(sem_prison_slot);
 #ifdef MAC
 	for (i = 0; i < seminfo.semmni; i++)
 		mac_sysvsem_destroy(&sema[i]);
@@ -499,11 +544,74 @@ semundo_clear(int semid, int semnum)
 }
 
 static int
-semvalid(int semid, struct semid_kernel *semakptr)
+semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr)
 {
 
 	return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
-	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
+	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ||
+	    sem_prison_cansee(rpr, semakptr) ? EINVAL : 0);
+}
+
+static void
+sem_remove(int semidx, struct ucred *cred)
+{
+	struct semid_kernel *semakptr;
+	int i;
+
+	KASSERT(semidx >= 0 && semidx < seminfo.semmni,
+		("semidx out of bounds"));
+	semakptr = &sema[semidx];
+	semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.mode = 0;
+	racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
+	crfree(semakptr->cred);
+	semakptr->cred = NULL;
+	SEMUNDO_LOCK();
+	semundo_clear(semidx, -1);
+	SEMUNDO_UNLOCK();
+#ifdef MAC
+	mac_sysvsem_cleanup(semakptr);
+#endif
+	wakeup(semakptr);
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base)
+			mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
+	}
+	for (i = semakptr->u.sem_base - sem; i < semtot; i++)
+		sem[i] = sem[i + semakptr->u.sem_nsems];
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base) {
+			sema[i].u.sem_base -= semakptr->u.sem_nsems;
+			mtx_unlock(&sema_mtx[i]);
+		}
+	}
+	semtot -= semakptr->u.sem_nsems;
+}
+
+static struct prison *
+sem_find_prison(struct ucred *cred)
+{
+	struct prison *pr, *rpr;
+
+	pr = cred->cr_prison;
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, sem_prison_slot);
+	prison_unlock(pr);
+	return rpr;
+}
+
+static int
+sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr)
+{
+
+	if (semakptr->cred == NULL ||
+	    !(rpr == semakptr->cred->cr_prison ||
+	      prison_ischild(rpr, semakptr->cred->cr_prison)))
+		return (EINVAL);
+	return (0);
 }
 
 /*
@@ -582,6 +690,7 @@ kern_semctl(struct thread *td, int semid
 	u_short *array;
 	struct ucred *cred = td->td_ucred;
 	int i, error;
+	struct prison *rpr;
 	struct semid_ds *sbuf;
 	struct semid_kernel *semakptr;
 	struct mtx *sema_mtxp;
@@ -590,7 +699,9 @@ kern_semctl(struct thread *td, int semid
 
 	DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
 	    semid, semnum, cmd, arg));
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+	rpr = sem_find_prison(td->td_ucred);
+	if (sem == NULL)
 		return (ENOSYS);
 
 	array = NULL;
@@ -610,6 +721,8 @@ kern_semctl(struct thread *td, int semid
 			error = EINVAL;
 			goto done2;
 		}
+		if ((error = sem_prison_cansee(rpr, semakptr)))
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 #ifdef MAC
@@ -618,6 +731,8 @@ kern_semctl(struct thread *td, int semid
 			goto done2;
 #endif
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+		if (cred->cr_prison != semakptr->cred->cr_prison)
+			arg->buf->sem_perm.key = IPC_PRIVATE;
 		*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
 		mtx_unlock(sema_mtxp);
 		return (0);
@@ -632,6 +747,7 @@ kern_semctl(struct thread *td, int semid
 	if (cmd == IPC_RMID)
 		mtx_lock(&sem_mtx);
 	mtx_lock(sema_mtxp);
+
 #ifdef MAC
 	error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 	if (error != 0)
@@ -643,42 +759,15 @@ kern_semctl(struct thread *td, int semid
 
 	switch (cmd) {
 	case IPC_RMID:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
-		semakptr->u.sem_perm.cuid = cred->cr_uid;
-		semakptr->u.sem_perm.uid = cred->cr_uid;
-		semakptr->u.sem_perm.mode = 0;
-		racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
-		crfree(semakptr->cred);
-		semakptr->cred = NULL;
-		SEMUNDO_LOCK();
-		semundo_clear(semidx, -1);
-		SEMUNDO_UNLOCK();
-#ifdef MAC
-		mac_sysvsem_cleanup(semakptr);
-#endif
-		wakeup(semakptr);
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base)
-				mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
-		}
-		for (i = semakptr->u.sem_base - sem; i < semtot; i++)
-			sem[i] = sem[i + semakptr->u.sem_nsems];
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base) {
-				sema[i].u.sem_base -= semakptr->u.sem_nsems;
-				mtx_unlock(&sema_mtx[i]);
-			}
-		}
-		semtot -= semakptr->u.sem_nsems;
+		sem_remove(semidx, cred);
 		break;
 
 	case IPC_SET:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
@@ -691,15 +780,17 @@ kern_semctl(struct thread *td, int semid
 		break;
 
 	case IPC_STAT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+		if (cred->cr_prison != semakptr->cred->cr_prison)
+			arg->buf->sem_perm.key = IPC_PRIVATE;
 		break;
 
 	case GETNCNT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -711,7 +802,7 @@ kern_semctl(struct thread *td, int semid
 		break;
 
 	case GETPID:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -723,7 +814,7 @@ kern_semctl(struct thread *td, int semid
 		break;
 
 	case GETVAL:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -759,7 +850,7 @@ kern_semctl(struct thread *td, int semid
 		mtx_unlock(sema_mtxp);		    
 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
 		mtx_lock(sema_mtxp);
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
@@ -772,7 +863,7 @@ kern_semctl(struct thread *td, int semid
 		break;
 
 	case GETZCNT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -784,7 +875,7 @@ kern_semctl(struct thread *td, int semid
 		break;
 
 	case SETVAL:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
@@ -815,7 +906,7 @@ kern_semctl(struct thread *td, int semid
 		mtx_lock(sema_mtxp);
 		if (error)
 			break;
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
@@ -865,13 +956,16 @@ sys_semget(struct thread *td, struct sem
 	struct ucred *cred = td->td_ucred;
 
 	DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+	if (sem_find_prison(cred) == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&sem_mtx);
 	if (key != IPC_PRIVATE) {
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
+			    sema[semid].cred != NULL &&
+			    sema[semid].cred->cr_prison == cred->cr_prison &&
 			    sema[semid].u.sem_perm.key == key)
 				break;
 		}
@@ -988,6 +1082,7 @@ sys_semop(struct thread *td, struct semo
 	struct sembuf small_sops[SMALL_SOPS];
 	int semid = uap->semid;
 	size_t nsops = uap->nsops;
+	struct prison *rpr;
 	struct sembuf *sops;
 	struct semid_kernel *semakptr;
 	struct sembuf *sopptr = 0;
@@ -1004,7 +1099,8 @@ sys_semop(struct thread *td, struct semo
 #endif
 	DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = sem_find_prison(td->td_ucred);
+	if (sem == NULL)
 		return (ENOSYS);
 
 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
@@ -1054,6 +1150,8 @@ sys_semop(struct thread *td, struct semo
 		error = EINVAL;
 		goto done2;
 	}
+	if ((error = sem_prison_cansee(rpr, semakptr)) != 0)
+		goto done2;
 	/*
 	 * Initial pass thru sops to see what permissions are needed.
 	 * Also perform any checks that don't need repeating on each
@@ -1377,11 +1475,207 @@ semexit_myhook(void *arg, struct proc *p
 static int
 sysctl_sema(SYSCTL_HANDLER_ARGS)
 {
+	struct prison *pr, *rpr;
+	struct semid_kernel tsemak;
+	int error, i;
 
-	return (SYSCTL_OUT(req, sema,
-	    sizeof(struct semid_kernel) * seminfo.semmni));
+	pr = req->td->td_ucred->cr_prison;
+	rpr = sem_find_prison(req->td->td_ucred);
+	error = 0;
+	for (i = 0; i < seminfo.semmni; i++) {
+		mtx_lock(&sema_mtx[i]);
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 ||
+		    rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0)
+			bzero(&tsemak, sizeof(tsemak));
+		else {
+			tsemak = sema[i];
+			if (tsemak.cred->cr_prison != pr)
+				tsemak.u.sem_perm.key = IPC_PRIVATE;
+		}
+		mtx_unlock(&sema_mtx[i]);
+		error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak));
+		if (error != 0)
+			break;
+	}
+	return (error);
 }
 
+static int
+sem_prison_check(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *prpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/*
+	 * sysvsem is a jailsys integer.
+	 * It must be "disable" if the parent jail is disabled.
+	 */
+	error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+			break;
+		case JAIL_SYS_NEW:
+		case JAIL_SYS_INHERIT:
+			prison_lock(pr->pr_parent);
+			prpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
+			prison_unlock(pr->pr_parent);
+			if (prpr == NULL)
+				return (EPERM);
+			break;
+		default:
+			return (EINVAL);
+		}
+	}
+
+	return (0);
+}
+
+static int
+sem_prison_set(void *obj, void *data)
+{

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201604300402.u3U42WBM056084>