Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 15 Nov 2017 22:45:13 +0000 (UTC)
From:      Gordon Tetlow <gordon@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org
Subject:   svn commit: r325873 - in releng/10.3: share/man/man9 sys/kern sys/sys
Message-ID:  <201711152245.vAFMjDvc010815@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gordon
Date: Wed Nov 15 22:45:13 2017
New Revision: 325873
URL: https://svnweb.freebsd.org/changeset/base/325873

Log:
  Fix namespace issue in POSIX shm implementation for jails. [SA-17:09]
  
  Approved by:	so
  Security:	FreeBSD-SA-17:09.shm
  Security:	CVE-2017-1087

Modified:
  releng/10.3/share/man/man9/osd.9
  releng/10.3/sys/kern/kern_osd.c
  releng/10.3/sys/kern/uipc_mqueue.c
  releng/10.3/sys/kern/uipc_sem.c
  releng/10.3/sys/kern/uipc_shm.c
  releng/10.3/sys/sys/osd.h

Modified: releng/10.3/share/man/man9/osd.9
==============================================================================
--- releng/10.3/share/man/man9/osd.9	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/share/man/man9/osd.9	Wed Nov 15 22:45:13 2017	(r325873)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 5, 2011
+.Dd March 30, 2016
 .Dt OSD 9
 .Os
 .Sh NAME
@@ -33,6 +33,9 @@
 .Nm osd_register ,
 .Nm osd_deregister ,
 .Nm osd_set ,
+.Nm osd_reserve ,
+.Nm osd_set_reserved ,
+.Nm osd_free_reserved ,
 .Nm osd_get ,
 .Nm osd_del ,
 .Nm osd_call ,
@@ -63,6 +66,22 @@
 .Fa "void *value"
 .Fc
 .Ft void *
+.Fo osd_reserve
+.Fa "u_int slot"
+.Fc
+.Ft int
+.Fo osd_set_reserved
+.Fa "u_int type"
+.Fa "struct osd *osd"
+.Fa "u_int slot"
+.Fa "void *rsv"
+.Fa "void *value"
+.Fc
+.Ft void
+.Fo osd_free_reserved
+.Fa "void *rsv"
+.Fc
+.Ft void *
 .Fo osd_get
 .Fa "u_int type"
 .Fa "struct osd *osd"
@@ -198,6 +217,15 @@ argument points to a data object to associate with
 .Fa osd .
 .Pp
 The
+.Fn osd_set_reserved
+function does the same as
+.Fn osd_set ,
+but with an extra argument
+.Fa rsv
+that is internal-use memory previously allocated via
+.Fn osd_reserve .
+.Pp
+The
 .Fn osd_get
 function returns the data pointer associated with a kernel data structure's
 .Vt struct osd
@@ -324,6 +352,24 @@ will proceed without any
 .Xr realloc 9
 calls.
 .Pp
+It is possible for
+.Fn osd_set
+to fail to allocate this array.  To ensure that such allocation succeeds,
+.Fn osd_reserve
+may be called (in a non-blocking context), and it will pre-allocate the
+memory via
+.Xr malloc 9
+with M_WAITOK.
+Then this pre-allocated memory is passed to
+.Fn osd_set_reserved ,
+which will use it if necessary or otherwise discard it.
+The memory may also be explicitly discarded by calling
+.Fn osd_free_reserved .
+As this method always allocates memory whether or not it is ultimately needed,
+it should be used only rarely, such as in the unlikely event that
+.Fn osd_set
+fails.
+.Pp
 The
 .Nm
 API is geared towards slot identifiers storing pointers to the same underlying
@@ -359,14 +405,26 @@ the kernel including most fast paths.
 returns the slot identifier for the newly registered data type.
 .Pp
 .Fn osd_set
-returns zero on success or ENOMEM if the specified type/slot identifier pair
+and
+.Fn osd_set_reserved
+return zero on success or ENOMEM if the specified type/slot identifier pair
 triggered an internal
 .Xr realloc 9
-which failed.
+which failed
+.Fn ( osd_set_reserved
+will always succeed when
+.Fa rsv
+is non-NULL).
 .Pp
 .Fn osd_get
 returns the data pointer for the specified type/slot identifier pair, or NULL if
 the slot has not been initialised yet.
+.Pp
+.Fn osd_reserve
+returns a pointer suitable for passing to
+.Fn osd_set_reserved
+or
+.Fn osd_free_reserved .
 .Pp
 .Fn osd_call
 returns zero if no method is run or the method for each slot runs successfully.

Modified: releng/10.3/sys/kern/kern_osd.c
==============================================================================
--- releng/10.3/sys/kern/kern_osd.c	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/sys/kern/kern_osd.c	Wed Nov 15 22:45:13 2017	(r325873)
@@ -44,6 +44,23 @@ __FBSDID("$FreeBSD$");
 
 /* OSD (Object Specific Data) */
 
+/*
+ * Lock key:
+ *  (m) osd_module_lock
+ *  (o) osd_object_lock
+ *  (l) osd_list_lock
+ */
+struct osd_master {
+	struct sx		 osd_module_lock;
+	struct rmlock		 osd_object_lock;
+	struct mtx		 osd_list_lock;
+	LIST_HEAD(, osd)	 osd_list;		/* (l) */
+	osd_destructor_t	*osd_destructors;	/* (o) */
+	osd_method_t		*osd_methods;		/* (m) */
+	u_int			 osd_ntslots;		/* (m) */
+	const u_int		 osd_nmethods;
+};
+
 static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
 
 static int osd_debug = 0;
@@ -62,25 +79,12 @@ static void do_osd_del(u_int type, struct osd *osd, u_
     int list_locked);
 
 /*
- * Lists of objects with OSD.
- *
- * Lock key:
- *  (m) osd_module_lock
- *  (o) osd_object_lock
- *  (l) osd_list_lock
+ * List of objects with OSD.
  */
-static LIST_HEAD(, osd)	osd_list[OSD_LAST + 1];		/* (m) */
-static osd_method_t *osd_methods[OSD_LAST + 1];		/* (m) */
-static u_int osd_nslots[OSD_LAST + 1];			/* (m) */
-static osd_destructor_t *osd_destructors[OSD_LAST + 1];	/* (o) */
-static const u_int osd_nmethods[OSD_LAST + 1] = {
-	[OSD_JAIL] = PR_MAXMETHOD,
+struct osd_master osdm[OSD_LAST + 1] = {
+	[OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD },
 };
 
-static struct sx osd_module_lock[OSD_LAST + 1];
-static struct rmlock osd_object_lock[OSD_LAST + 1];
-static struct mtx osd_list_lock[OSD_LAST + 1];
-
 static void
 osd_default_destructor(void *value __unused)
 {
@@ -102,12 +106,12 @@ osd_register(u_int type, osd_destructor_t destructor, 
 	if (destructor == NULL)
 		destructor = osd_default_destructor;
 
-	sx_xlock(&osd_module_lock[type]);
+	sx_xlock(&osdm[type].osd_module_lock);
 	/*
 	 * First, we try to find unused slot.
 	 */
-	for (i = 0; i < osd_nslots[type]; i++) {
-		if (osd_destructors[type][i] == NULL) {
+	for (i = 0; i < osdm[type].osd_ntslots; i++) {
+		if (osdm[type].osd_destructors[i] == NULL) {
 			OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
 			    type, i);
 			break;
@@ -116,31 +120,31 @@ osd_register(u_int type, osd_destructor_t destructor, 
 	/*
 	 * If no unused slot was found, allocate one.
 	 */
-	if (i == osd_nslots[type]) {
-		osd_nslots[type]++;
-		if (osd_nmethods[type] != 0)
-			osd_methods[type] = realloc(osd_methods[type],
-			    sizeof(osd_method_t) * osd_nslots[type] *
-			    osd_nmethods[type], M_OSD, M_WAITOK);
-		newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
-		    M_OSD, M_WAITOK);
-		rm_wlock(&osd_object_lock[type]);
-		bcopy(osd_destructors[type], newptr,
+	if (i == osdm[type].osd_ntslots) {
+		osdm[type].osd_ntslots++;
+		if (osdm[type].osd_nmethods != 0)
+			osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+			    sizeof(osd_method_t) * osdm[type].osd_ntslots *
+			    osdm[type].osd_nmethods, M_OSD, M_WAITOK);
+		newptr = malloc(sizeof(osd_destructor_t) *
+		    osdm[type].osd_ntslots, M_OSD, M_WAITOK);
+		rm_wlock(&osdm[type].osd_object_lock);
+		bcopy(osdm[type].osd_destructors, newptr,
 		    sizeof(osd_destructor_t) * i);
-		free(osd_destructors[type], M_OSD);
-		osd_destructors[type] = newptr;
-		rm_wunlock(&osd_object_lock[type]);
+		free(osdm[type].osd_destructors, M_OSD);
+		osdm[type].osd_destructors = newptr;
+		rm_wunlock(&osdm[type].osd_object_lock);
 		OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
 		    type, i + 1);
 	}
 
-	osd_destructors[type][i] = destructor;
-	if (osd_nmethods[type] != 0) {
-		for (m = 0; m < osd_nmethods[type]; m++)
-			osd_methods[type][i * osd_nmethods[type] + m] =
-			    methods != NULL ? methods[m] : NULL;
+	osdm[type].osd_destructors[i] = destructor;
+	if (osdm[type].osd_nmethods != 0) {
+		for (m = 0; m < osdm[type].osd_nmethods; m++)
+			osdm[type].osd_methods[i * osdm[type].osd_nmethods + m]
+			    = methods != NULL ? methods[m] : NULL;
 	}
-	sx_xunlock(&osd_module_lock[type]);
+	sx_xunlock(&osdm[type].osd_module_lock);
 	return (i + 1);
 }
 
@@ -151,105 +155,142 @@ osd_deregister(u_int type, u_int slot)
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	sx_xlock(&osd_module_lock[type]);
-	rm_wlock(&osd_object_lock[type]);
+	sx_xlock(&osdm[type].osd_module_lock);
+	rm_wlock(&osdm[type].osd_object_lock);
 	/*
 	 * Free all OSD for the given slot.
 	 */
-	mtx_lock(&osd_list_lock[type]);
-	LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+	mtx_lock(&osdm[type].osd_list_lock);
+	LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd)
 		do_osd_del(type, osd, slot, 1);
-	mtx_unlock(&osd_list_lock[type]);
+	mtx_unlock(&osdm[type].osd_list_lock);
 	/*
 	 * Set destructor to NULL to free the slot.
 	 */
-	osd_destructors[type][slot - 1] = NULL;
-	if (slot == osd_nslots[type]) {
-		osd_nslots[type]--;
-		osd_destructors[type] = realloc(osd_destructors[type],
-		    sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+	osdm[type].osd_destructors[slot - 1] = NULL;
+	if (slot == osdm[type].osd_ntslots) {
+		osdm[type].osd_ntslots--;
+		osdm[type].osd_destructors = realloc(osdm[type].osd_destructors,
+		    sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD,
 		    M_NOWAIT | M_ZERO);
-		if (osd_nmethods[type] != 0)
-			osd_methods[type] = realloc(osd_methods[type],
-			    sizeof(osd_method_t) * osd_nslots[type] *
-			    osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+		if (osdm[type].osd_nmethods != 0)
+			osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+			    sizeof(osd_method_t) * osdm[type].osd_ntslots *
+			    osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO);
 		/*
 		 * We always reallocate to smaller size, so we assume it will
 		 * always succeed.
 		 */
-		KASSERT(osd_destructors[type] != NULL &&
-		    (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
-		    ("realloc() failed"));
+		KASSERT(osdm[type].osd_destructors != NULL &&
+		    (osdm[type].osd_nmethods == 0 ||
+		     osdm[type].osd_methods != NULL), ("realloc() failed"));
 		OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
 		    type, slot);
 	} else {
 		OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
 		    type, slot);
 	}
-	rm_wunlock(&osd_object_lock[type]);
-	sx_xunlock(&osd_module_lock[type]);
+	rm_wunlock(&osdm[type].osd_object_lock);
+	sx_xunlock(&osdm[type].osd_module_lock);
 }
 
 int
 osd_set(u_int type, struct osd *osd, u_int slot, void *value)
 {
+
+	return (osd_set_reserved(type, osd, slot, NULL, value));
+}
+
+void *
+osd_reserve(u_int slot)
+{
+
+	KASSERT(slot > 0, ("Invalid slot."));
+
+	OSD_DEBUG("Reserving slot array (slot=%u).", slot);
+	return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO));
+}
+
+int
+osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv,
+    void *value)
+{
 	struct rm_priotracker tracker;
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	if (slot > osd->osd_nslots) {
+		void *newptr;
+
 		if (value == NULL) {
 			OSD_DEBUG(
 			    "Not allocating null slot (type=%u, slot=%u).",
 			    type, slot);
-			rm_runlock(&osd_object_lock[type], &tracker);
+			rm_runlock(&osdm[type].osd_object_lock, &tracker);
+			if (rsv)
+				osd_free_reserved(rsv);
 			return (0);
-		} else if (osd->osd_nslots == 0) {
+		}
+
+		/*
+		 * Too few slots allocated here, so we need to extend or create
+		 * the array.
+		 */
+		if (rsv) {
 			/*
-			 * First OSD for this object, so we need to allocate
-			 * space and put it onto the list.
+			 * Use the reserve passed in (assumed to be
+			 * the right size).
 			 */
-			osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
-			    M_NOWAIT | M_ZERO);
-			if (osd->osd_slots == NULL) {
-				rm_runlock(&osd_object_lock[type], &tracker);
-				return (ENOMEM);
+			newptr = rsv;
+			if (osd->osd_nslots != 0) {
+				memcpy(newptr, osd->osd_slots,
+				    sizeof(void *) * osd->osd_nslots);
+				free(osd->osd_slots, M_OSD);
 			}
-			osd->osd_nslots = slot;
-			mtx_lock(&osd_list_lock[type]);
-			LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
-			mtx_unlock(&osd_list_lock[type]);
-			OSD_DEBUG("Setting first slot (type=%u).", type);
 		} else {
-			void *newptr;
-
-			/*
-			 * Too few slots allocated here, needs to extend
-			 * the array.
-			 */
 			newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
 			    M_OSD, M_NOWAIT | M_ZERO);
 			if (newptr == NULL) {
-				rm_runlock(&osd_object_lock[type], &tracker);
+				rm_runlock(&osdm[type].osd_object_lock,
+				    &tracker);
 				return (ENOMEM);
 			}
-			osd->osd_slots = newptr;
-			osd->osd_nslots = slot;
-			OSD_DEBUG("Growing slots array (type=%u).", type);
 		}
-	}
+		if (osd->osd_nslots == 0) {
+			/*
+			 * First OSD for this object, so we need to put it
+			 * onto the list.
+			 */
+			mtx_lock(&osdm[type].osd_list_lock);
+			LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next);
+			mtx_unlock(&osdm[type].osd_list_lock);
+			OSD_DEBUG("Setting first slot (type=%u).", type);
+		} else
+			OSD_DEBUG("Growing slots array (type=%u).", type);
+		osd->osd_slots = newptr;
+		osd->osd_nslots = slot;
+	} else if (rsv)
+		osd_free_reserved(rsv);
 	OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
 	    slot, value);
 	osd->osd_slots[slot - 1] = value;
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	return (0);
 }
 
+void
+osd_free_reserved(void *rsv)
+{
+
+	OSD_DEBUG("Discarding reserved slot array.");
+	free(rsv, M_OSD);
+}
+
 void *
 osd_get(u_int type, struct osd *osd, u_int slot)
 {
@@ -258,9 +299,9 @@ osd_get(u_int type, struct osd *osd, u_int slot)
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	if (slot > osd->osd_nslots) {
 		value = NULL;
 		OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
@@ -269,7 +310,7 @@ osd_get(u_int type, struct osd *osd, u_int slot)
 		OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
 		    type, slot, value);
 	}
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	return (value);
 }
 
@@ -278,9 +319,9 @@ osd_del(u_int type, struct osd *osd, u_int slot)
 {
 	struct rm_priotracker tracker;
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	do_osd_del(type, osd, slot, 0);
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 }
 
 static void
@@ -290,7 +331,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, in
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
 	OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
 
@@ -299,7 +340,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, in
 		return;
 	}
 	if (osd->osd_slots[slot - 1] != NULL) {
-		osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+		osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]);
 		osd->osd_slots[slot - 1] = NULL;
 	}
 	for (i = osd->osd_nslots - 1; i >= 0; i--) {
@@ -313,10 +354,10 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, in
 		/* No values left for this object. */
 		OSD_DEBUG("No more slots left (type=%u).", type);
 		if (!list_locked)
-			mtx_lock(&osd_list_lock[type]);
+			mtx_lock(&osdm[type].osd_list_lock);
 		LIST_REMOVE(osd, osd_next);
 		if (!list_locked)
-			mtx_unlock(&osd_list_lock[type]);
+			mtx_unlock(&osdm[type].osd_list_lock);
 		free(osd->osd_slots, M_OSD);
 		osd->osd_slots = NULL;
 		osd->osd_nslots = 0;
@@ -342,21 +383,21 @@ osd_call(u_int type, u_int method, void *obj, void *da
 	int error, i;
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
-	KASSERT(method < osd_nmethods[type], ("Invalid method."));
+	KASSERT(method < osdm[type].osd_nmethods, ("Invalid method."));
 
 	/*
 	 * Call this method for every slot that defines it, stopping if an
 	 * error is encountered.
 	 */
 	error = 0;
-	sx_slock(&osd_module_lock[type]);
-	for (i = 0; i < osd_nslots[type]; i++) {
-		methodfun =
-		    osd_methods[type][i * osd_nmethods[type] + method];
+	sx_slock(&osdm[type].osd_module_lock);
+	for (i = 0; i < osdm[type].osd_ntslots; i++) {
+		methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods +
+		    method];
 		if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
 			break;
 	}
-	sx_sunlock(&osd_module_lock[type]);
+	sx_sunlock(&osdm[type].osd_module_lock);
 	return (error);
 }
 
@@ -374,14 +415,14 @@ osd_exit(u_int type, struct osd *osd)
 		return;
 	}
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	for (i = 1; i <= osd->osd_nslots; i++) {
-		if (osd_destructors[type][i - 1] != NULL)
+		if (osdm[type].osd_destructors[i - 1] != NULL)
 			do_osd_del(type, osd, i, 0);
 		else
 			OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
 	}
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	OSD_DEBUG("Object exit (type=%u).", type);
 }
 
@@ -391,13 +432,13 @@ osd_init(void *arg __unused)
 	u_int i;
 
 	for (i = OSD_FIRST; i <= OSD_LAST; i++) {
-		osd_nslots[i] = 0;
-		LIST_INIT(&osd_list[i]);
-		sx_init(&osd_module_lock[i], "osd_module");
-		rm_init(&osd_object_lock[i], "osd_object");
-		mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
-		osd_destructors[i] = NULL;
-		osd_methods[i] = NULL;
+		sx_init(&osdm[i].osd_module_lock, "osd_module");
+		rm_init(&osdm[i].osd_object_lock, "osd_object");
+		mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF);
+		LIST_INIT(&osdm[i].osd_list);
+		osdm[i].osd_destructors = NULL;
+		osdm[i].osd_ntslots = 0;
+		osdm[i].osd_methods = NULL;
 	}
 }
 SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);

Modified: releng/10.3/sys/kern/uipc_mqueue.c
==============================================================================
--- releng/10.3/sys/kern/uipc_mqueue.c	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/sys/kern/uipc_mqueue.c	Wed Nov 15 22:45:13 2017	(r325873)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
+#include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/dirent.h>
@@ -60,8 +61,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
+#include <sys/jail.h>
 #include <sys/lock.h>
-#include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mqueue.h>
@@ -131,6 +132,7 @@ struct mqfs_node {
 	LIST_HEAD(,mqfs_node)	mn_children;
 	LIST_ENTRY(mqfs_node)	mn_sibling;
 	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
+	const void		*mn_pr_root;
 	int			mn_refcount;
 	mqfs_type_t		mn_type;
 	int			mn_deleted;
@@ -151,6 +153,11 @@ struct mqfs_node {
 #define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
 				(fp)->f_data)->mn_data))
 
+struct mqfs_osd {
+	struct task	mo_task;
+	const void	*mo_pr_root;
+};
+
 TAILQ_HEAD(msgq, mqueue_msg);
 
 struct mqueue;
@@ -218,6 +225,7 @@ static uma_zone_t		mvdata_zone;
 static uma_zone_t		mqnoti_zone;
 static struct vop_vector	mqfs_vnodeops;
 static struct fileops		mqueueops;
+static unsigned			mqfs_osd_jail_slot;
 
 /*
  * Directory structure construction and manipulation
@@ -235,6 +243,9 @@ static int	mqfs_destroy(struct mqfs_node *mn);
 static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
 static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
 static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
+static int	mqfs_prison_create(void *obj, void *data);
+static void	mqfs_prison_destructor(void *data);
+static void	mqfs_prison_remove_task(void *context, int pending);
 
 /*
  * Message queue construction and maniplation
@@ -435,6 +446,7 @@ mqfs_create_node(const char *name, int namelen, struct
 
 	node = mqnode_alloc();
 	strncpy(node->mn_name, name, namelen);
+	node->mn_pr_root = cred->cr_prison->pr_root;
 	node->mn_type = nodetype;
 	node->mn_refcount = 1;
 	vfs_timestamp(&node->mn_birth);
@@ -643,6 +655,10 @@ mqfs_init(struct vfsconf *vfc)
 {
 	struct mqfs_node *root;
 	struct mqfs_info *mi;
+	struct prison *pr;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CREATE] = mqfs_prison_create,
+	};
 
 	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -669,6 +685,13 @@ mqfs_init(struct vfsconf *vfc)
 	    EVENTHANDLER_PRI_ANY);
 	mq_fdclose = mqueue_fdclose;
 	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
+
+	/* Note current jails. */
+	mqfs_osd_jail_slot = osd_jail_register(mqfs_prison_destructor, methods);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list)
+		(void)mqfs_prison_create(pr, NULL);
+	sx_sunlock(&allprison_lock);
 	return (0);
 }
 
@@ -678,10 +701,14 @@ mqfs_init(struct vfsconf *vfc)
 static int
 mqfs_uninit(struct vfsconf *vfc)
 {
+	unsigned slot;
 	struct mqfs_info *mi;
 
 	if (!unloadable)
 		return (EOPNOTSUPP);
+	slot = mqfs_osd_jail_slot;
+	mqfs_osd_jail_slot = 0;
+	osd_jail_deregister(slot);
 	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
 	mi = &mqfs_data;
 	mqfs_destroy(mi->mi_root);
@@ -799,13 +826,17 @@ found:
  * Search a directory entry
  */
 static struct mqfs_node *
-mqfs_search(struct mqfs_node *pd, const char *name, int len)
+mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
 {
 	struct mqfs_node *pn;
+	const void *pr_root;
 
 	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
+	pr_root = cred->cr_prison->pr_root;
 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
-		if (strncmp(pn->mn_name, name, len) == 0 &&
+		/* Only match names within the same prison root directory */
+		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
+		    strncmp(pn->mn_name, name, len) == 0 &&
 		    pn->mn_name[len] == '\0')
 			return (pn);
 	}
@@ -877,7 +908,7 @@ mqfs_lookupx(struct vop_cachedlookup_args *ap)
 
 	/* named node */
 	sx_xlock(&mqfs->mi_lock);
-	pn = mqfs_search(pd, pname, namelen);
+	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
 	if (pn != NULL)
 		mqnode_addref(pn);
 	sx_xunlock(&mqfs->mi_lock);
@@ -1362,6 +1393,7 @@ mqfs_readdir(struct vop_readdir_args *ap)
 	struct mqfs_node *pn;
 	struct dirent entry;
 	struct uio *uio;
+	const void *pr_root;
 	int *tmp_ncookies = NULL;
 	off_t offset;
 	int error, i;
@@ -1386,10 +1418,18 @@ mqfs_readdir(struct vop_readdir_args *ap)
 	error = 0;
 	offset = 0;
 
+	pr_root = ap->a_cred->cr_prison->pr_root;
 	sx_xlock(&mi->mi_lock);
 
 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
 		entry.d_reclen = sizeof(entry);
+
+		/*
+		 * Only show names within the same prison root directory
+		 * (or not associated with a prison, e.g. "." and "..").
+		 */
+		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
+			continue;
 		if (!pn->mn_fileno)
 			mqfs_fileno_alloc(mi, pn);
 		entry.d_fileno = pn->mn_fileno;
@@ -1522,7 +1562,82 @@ mqfs_rmdir(struct vop_rmdir_args *ap)
 
 #endif /* notyet */
 
+
 /*
+ * Set a destructor task with the prison's root
+ */
+static int
+mqfs_prison_create(void *obj, void *data __unused)
+{
+	struct prison *pr = obj;
+	struct mqfs_osd *mo;
+	void *rsv;
+
+	if (pr->pr_root == pr->pr_parent->pr_root)
+		return(0);
+
+	mo = malloc(sizeof(struct mqfs_osd), M_PRISON, M_WAITOK);
+	rsv = osd_reserve(mqfs_osd_jail_slot);
+	TASK_INIT(&mo->mo_task, 0, mqfs_prison_remove_task, mo);
+	mtx_lock(&pr->pr_mtx);
+	mo->mo_pr_root = pr->pr_root;
+	(void)osd_jail_set_reserved(pr, mqfs_osd_jail_slot, rsv, mo);
+	mtx_unlock(&pr->pr_mtx);
+	return (0);
+}
+
+/*
+ * Queue the task for after jail/OSD locks are released
+ */
+static void
+mqfs_prison_destructor(void *data)
+{
+	struct mqfs_osd *mo = data;
+
+	if (mqfs_osd_jail_slot != 0)
+		taskqueue_enqueue(taskqueue_thread, &mo->mo_task);
+	else
+		free(mo, M_PRISON);
+}
+
+/*
+ * See if this prison root is obsolete, and clean up associated queues if it is
+ */
+static void
+mqfs_prison_remove_task(void *context, int pending)
+{
+	struct mqfs_osd *mo = context;
+	struct mqfs_node *pn, *tpn;
+	const struct prison *pr;
+	const void *pr_root;
+	int found;
+
+	pr_root = mo->mo_pr_root;
+	found = 0;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (pr->pr_root == pr_root)
+			found = 1;
+	}
+	sx_sunlock(&allprison_lock);
+	if (!found) {
+		/*
+		 * No jails are rooted in this directory anymore,
+		 * so no queues should be either.
+		 */
+		sx_xlock(&mqfs_data.mi_lock);
+		LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
+		    mn_sibling, tpn) {
+			if (pn->mn_pr_root == pr_root)
+				(void)do_unlink(pn, curthread->td_ucred);
+		}
+		sx_xunlock(&mqfs_data.mi_lock);
+	}
+	free(mo, M_PRISON);
+}
+
+
+/*
  * Allocate a message queue
  */
 static struct mqueue *
@@ -1982,7 +2097,7 @@ kern_kmq_open(struct thread *td, const char *upath, in
 		return (error);
 
 	sx_xlock(&mqfs_data.mi_lock);
-	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
+	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
 	if (pn == NULL) {
 		if (!(flags & O_CREAT)) {
 			error = ENOENT;
@@ -2077,7 +2192,7 @@ sys_kmq_unlink(struct thread *td, struct kmq_unlink_ar
 		return (EINVAL);
 
 	sx_xlock(&mqfs_data.mi_lock);
-	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
+	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
 	if (pn != NULL)
 		error = do_unlink(pn, td->td_ucred);
 	else

Modified: releng/10.3/sys/kern/uipc_sem.c
==============================================================================
--- releng/10.3/sys/kern/uipc_sem.c	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/sys/kern/uipc_sem.c	Wed Nov 15 22:45:13 2017	(r325873)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/ksem.h>
 #include <sys/lock.h>
@@ -444,12 +445,24 @@ ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucr
 static void
 ksem_info_impl(struct ksem *ks, char *path, size_t size, uint32_t *value)
 {
+	const char *ks_path, *pr_path;
+	size_t pr_pathlen;
 
 	if (ks->ks_path == NULL)
 		return;
 	sx_slock(&ksem_dict_lock);
-	if (ks->ks_path != NULL)
-		strlcpy(path, ks->ks_path, size);
+	ks_path = ks->ks_path;
+	if (ks_path != NULL) {
+		pr_path = curthread->td_ucred->cr_prison->pr_path;
+		if (strcmp(pr_path, "/") != 0) {
+			/* Return the jail-rooted pathname. */
+			pr_pathlen = strlen(pr_path);
+			if (strncmp(ks_path, pr_path, pr_pathlen) == 0 &&
+			    ks_path[pr_pathlen] == '/')
+				ks_path += pr_pathlen;
+		}
+		strlcpy(path, ks_path, size);
+	}
 	if (value != NULL)
 		*value = ks->ks_value;
 	sx_sunlock(&ksem_dict_lock);
@@ -493,6 +506,8 @@ ksem_create(struct thread *td, const char *name, semid
 	struct ksem *ks;
 	struct file *fp;
 	char *path;
+	const char *pr_path;
+	size_t pr_pathlen;
 	Fnv32_t fnv;
 	int error, fd;
 
@@ -529,10 +544,16 @@ ksem_create(struct thread *td, const char *name, semid
 			ks->ks_flags |= KS_ANONYMOUS;
 	} else {
 		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
-		error = copyinstr(name, path, MAXPATHLEN, NULL);
+		pr_path = td->td_ucred->cr_prison->pr_path;
 
+		/* Construct a full pathname for jailed callers. */
+		pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+		    : strlcpy(path, pr_path, MAXPATHLEN);
+		error = copyinstr(name, path + pr_pathlen,
+		    MAXPATHLEN - pr_pathlen, NULL);
+
 		/* Require paths to start with a '/' character. */
-		if (error == 0 && path[0] != '/')
+		if (error == 0 && path[pr_pathlen] != '/')
 			error = EINVAL;
 		if (error) {
 			fdclose(fdp, fp, fd, td);
@@ -668,11 +689,17 @@ int
 sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
 {
 	char *path;
+	const char *pr_path;
+	size_t pr_pathlen;
 	Fnv32_t fnv;
 	int error;
 
 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
-	error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
+	pr_path = td->td_ucred->cr_prison->pr_path;
+	pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+	    : strlcpy(path, pr_path, MAXPATHLEN);
+	error = copyinstr(uap->name, path + pr_pathlen, MAXPATHLEN - pr_pathlen,
+	    NULL);
 	if (error) {
 		free(path, M_TEMP);
 		return (error);

Modified: releng/10.3/sys/kern/uipc_shm.c
==============================================================================
--- releng/10.3/sys/kern/uipc_shm.c	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/sys/kern/uipc_shm.c	Wed Nov 15 22:45:13 2017	(r325873)
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/uio.h>
 #include <sys/signal.h>
+#include <sys/jail.h>
 #include <sys/ktrace.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -711,6 +712,8 @@ sys_shm_open(struct thread *td, struct shm_open_args *
 	struct shmfd *shmfd;
 	struct file *fp;
 	char *path;
+	const char *pr_path;
+	size_t pr_pathlen;
 	Fnv32_t fnv;
 	mode_t cmode;
 	int fd, error;
@@ -748,13 +751,19 @@ sys_shm_open(struct thread *td, struct shm_open_args *
 		shmfd = shm_alloc(td->td_ucred, cmode);
 	} else {
 		path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
-		error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+		pr_path = td->td_ucred->cr_prison->pr_path;
+
+		/* Construct a full pathname for jailed callers. */
+		pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+		    : strlcpy(path, pr_path, MAXPATHLEN);
+		error = copyinstr(uap->path, path + pr_pathlen,
+		    MAXPATHLEN - pr_pathlen, NULL);
 #ifdef KTRACE
 		if (error == 0 && KTRPOINT(curthread, KTR_NAMEI))
 			ktrnamei(path);
 #endif
 		/* Require paths to start with a '/' character. */
-		if (error == 0 && path[0] != '/')
+		if (error == 0 && path[pr_pathlen] != '/')
 			error = EINVAL;
 		if (error) {
 			fdclose(fdp, fp, fd, td);
@@ -841,11 +850,17 @@ int
 sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap)
 {
 	char *path;
+	const char *pr_path;
+	size_t pr_pathlen;
 	Fnv32_t fnv;
 	int error;
 
 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
-	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+	pr_path = td->td_ucred->cr_prison->pr_path;
+	pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+	    : strlcpy(path, pr_path, MAXPATHLEN);
+	error = copyinstr(uap->path, path + pr_pathlen, MAXPATHLEN - pr_pathlen,
+	    NULL);
 	if (error) {
 		free(path, M_TEMP);
 		return (error);
@@ -1052,11 +1067,23 @@ shm_unmap(struct file *fp, void *mem, size_t size)
 void
 shm_path(struct shmfd *shmfd, char *path, size_t size)
 {
+	const char *shm_path, *pr_path;
+	size_t pr_pathlen;
 
 	if (shmfd->shm_path == NULL)
 		return;
 	sx_slock(&shm_dict_lock);
-	if (shmfd->shm_path != NULL)
-		strlcpy(path, shmfd->shm_path, size);
+	shm_path = shmfd->shm_path;
+	if (shm_path != NULL) {
+		pr_path = curthread->td_ucred->cr_prison->pr_path;
+		if (strcmp(pr_path, "/") != 0) {
+			/* Return the jail-rooted pathname. */
+			pr_pathlen = strlen(pr_path);
+			if (strncmp(shm_path, pr_path, pr_pathlen) == 0 &&
+			    shm_path[pr_pathlen] == '/')
+				shm_path += pr_pathlen;
+		}
+		strlcpy(path, shm_path, size);
+	}
 	sx_sunlock(&shm_dict_lock);
 }

Modified: releng/10.3/sys/sys/osd.h
==============================================================================
--- releng/10.3/sys/sys/osd.h	Wed Nov 15 22:42:20 2017	(r325872)
+++ releng/10.3/sys/sys/osd.h	Wed Nov 15 22:45:13 2017	(r325873)
@@ -59,6 +59,10 @@ int osd_register(u_int type, osd_destructor_t destruct
 void osd_deregister(u_int type, u_int slot);
 
 int osd_set(u_int type, struct osd *osd, u_int slot, void *value);
+void *osd_reserve(u_int slot);
+int osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv,
+    void *value);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711152245.vAFMjDvc010815>