Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 27 May 2009 14:11:23 +0000 (UTC)
From:      Jamie Gritton <jamie@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r192895 - in head: . lib/libc/sys sys/compat/freebsd32 sys/compat/linux sys/contrib/ipfilter/netinet sys/fs/procfs sys/kern sys/net sys/netinet sys/netinet6 sys/nfsserver sys/security/m...
Message-ID:  <200905271411.n4REBNKa099209@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jamie
Date: Wed May 27 14:11:23 2009
New Revision: 192895
URL: http://svn.freebsd.org/changeset/base/192895

Log:
  Add hierarchical jails.  A jail may further virtualize its environment
  by creating a child jail, which is visible to that jail and to any
  parent jails.  Child jails may be restricted more than their parents,
  but never less.  Jail names reflect this hierarchy, being MIB-style
  dot-separated strings.
  
  Every thread now points to a jail, the default being prison0, which
  contains information about the physical system.  Prison0's root
  directory is the same as rootvnode; its hostname is the same as the
  global hostname, and its securelevel replaces the global securelevel.
  Note that the variable "securelevel" has actually gone away, which
  should not cause any problems for code that properly uses
  securelevel_gt() and securelevel_ge().
  
  Some jail-related permissions that were kept in global variables and
  set via sysctls are now per-jail settings.  The sysctls still exist for
  backward compatibility, used only by the now-deprecated jail(2) system
  call.
  
  Approved by:	bz (mentor)

Modified:
  head/UPDATING
  head/lib/libc/sys/jail.2
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/linux/linux_mib.c
  head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
  head/sys/contrib/ipfilter/netinet/ip_nat.c
  head/sys/fs/procfs/procfs_status.c
  head/sys/kern/init_main.c
  head/sys/kern/kern_cpuset.c
  head/sys/kern/kern_descrip.c
  head/sys/kern/kern_exit.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_jail.c
  head/sys/kern/kern_linker.c
  head/sys/kern/kern_mib.c
  head/sys/kern/kern_proc.c
  head/sys/kern/kern_prot.c
  head/sys/kern/sysv_msg.c
  head/sys/kern/sysv_sem.c
  head/sys/kern/sysv_shm.c
  head/sys/kern/vfs_lookup.c
  head/sys/kern/vfs_mount.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vfs_syscalls.c
  head/sys/net/rtsock.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet6/in6.c
  head/sys/netinet6/in6_ifattach.c
  head/sys/netinet6/in6_pcb.c
  head/sys/nfsserver/nfs_srvsock.c
  head/sys/security/mac_bsdextended/mac_bsdextended.c
  head/sys/sys/cpuset.h
  head/sys/sys/jail.h
  head/sys/sys/param.h
  head/sys/sys/syscallsubr.h
  head/sys/sys/systm.h
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/UPDATING
==============================================================================
--- head/UPDATING	Wed May 27 13:59:17 2009	(r192894)
+++ head/UPDATING	Wed May 27 14:11:23 2009	(r192895)
@@ -22,6 +22,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20090527:
+	Add support for hierarchical jails.  Remove global securelevel.
+	Bump __FreeBSD_version to 800091.
+
 20090523:
 	The layout of struct vnet_net has changed, therefore modules
 	need to be rebuilt.

Modified: head/lib/libc/sys/jail.2
==============================================================================
--- head/lib/libc/sys/jail.2	Wed May 27 13:59:17 2009	(r192894)
+++ head/lib/libc/sys/jail.2	Wed May 27 14:11:23 2009	(r192895)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 29, 2009
+.Dd May 27, 2009
 .Dt JAIL 2
 .Os
 .Sh NAME
@@ -283,7 +283,7 @@ of the jail for the given address family
 It is possible to identify a process as jailed by examining
 .Dq Li /proc/<pid>/status :
 it will show a field near the end of the line, either as
-a single hyphen for a process at large, or the hostname currently
+a single hyphen for a process at large, or the name currently
 set for the prison for jailed processes.
 .Sh ERRORS
 The
@@ -292,7 +292,10 @@ system call
 will fail if:
 .Bl -tag -width Er
 .It Bq Er EPERM
-This process is not allowed to create a jail.
+This process is not allowed to create a jail, either because it is not
+the super-user, or because it is in a jail where the
+.Va allow.jails
+parameter is not set.
 .It Bq Er EFAULT
 .Fa jail
 points to an address outside the allocated address space of the process.
@@ -308,7 +311,10 @@ system call
 will fail if:
 .Bl -tag -width Er
 .It Bq Er EPERM
-This process is not allowed to create a jail.
+This process is not allowed to create a jail, either because it is not
+the super-user, or because it is in a jail where the
+.Va allow.jails
+parameter is not set.
 .It Bq Er EPERM
 A jail parameter was set to a less restrictive value then the current
 environment.
@@ -324,6 +330,11 @@ or
 parameter does not exist, and the
 .Dv JAIL_CREATE
 flag is not set.
+.It Bq Er ENOENT
+The jail referred to by a
+.Va jid
+is not accessible by the process, because the process is in a different
+jail. 
 .It Bq Er EEXIST
 The jail referred to by a
 .Va jid
@@ -368,6 +379,11 @@ or
 .Va name
 parameter does not exist.
 .It Bq Er ENOENT
+The jail referred to by a
+.Va jid
+is not accessible by the process, because the process is in a different
+jail. 
+.It Bq Er ENOENT
 The
 .Va lastjid
 parameter is greater than the highest current jail ID.
@@ -429,4 +445,4 @@ for R&D Associates
 who contributed it to
 .Fx .
 .An James Gritton
-added the extensible jail parameters.
+added the extensible jail parameters and hierarchical jails.

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	Wed May 27 14:11:23 2009	(r192895)
@@ -112,8 +112,6 @@ CTASSERT(sizeof(struct msghdr32) == 28);
 CTASSERT(sizeof(struct stat32) == 96);
 CTASSERT(sizeof(struct sigaction32) == 24);
 
-extern int jail_max_af_ips;
-
 static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
 
@@ -2044,17 +2042,9 @@ freebsd32_sysctl(struct thread *td, stru
 int
 freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
 {
-	struct iovec optiov[10];
-	struct uio opt;
-	char *u_path, *u_hostname, *u_name;
-#ifdef INET
-	struct in_addr *u_ip4;
-#endif
-#ifdef INET6
-	struct in6_addr *u_ip6;
-#endif
 	uint32_t version;
 	int error;
+	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
@@ -2066,45 +2056,14 @@ freebsd32_jail(struct thread *td, struct
 		/* FreeBSD single IPv4 jails. */
 		struct jail32_v0 j32_v0;
 
+		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
 		if (error)
 			return (error);
-		u_path = malloc(MAXPATHLEN + MAXHOSTNAMELEN, M_TEMP, M_WAITOK);
-		u_hostname = u_path + MAXPATHLEN;
-		opt.uio_iov = optiov;
-		opt.uio_iovcnt = 4;
-		opt.uio_offset = -1;
-		opt.uio_resid = -1;
-		opt.uio_segflg = UIO_SYSSPACE;
-		opt.uio_rw = UIO_READ;
-		opt.uio_td = td;
-		optiov[0].iov_base = "path";
-		optiov[0].iov_len = sizeof("path");
-		optiov[1].iov_base = u_path;
-		error = copyinstr(PTRIN(j32_v0.path), u_path, MAXPATHLEN,
-		    &optiov[1].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-		optiov[2].iov_base = "host.hostname";
-		optiov[2].iov_len = sizeof("host.hostname");
-		optiov[3].iov_base = u_hostname;
-		error = copyinstr(PTRIN(j32_v0.hostname), u_hostname,
-		    MAXHOSTNAMELEN, &optiov[3].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-#ifdef INET
-		optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
-		optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
-		opt.uio_iovcnt++;
-		optiov[opt.uio_iovcnt].iov_base = &j32_v0.ip_number;
-		j32_v0.ip_number = htonl(j32_v0.ip_number);
-		optiov[opt.uio_iovcnt].iov_len = sizeof(j32_v0.ip_number);
-		opt.uio_iovcnt++;
-#endif
+		CP(j32_v0, j, version);
+		PTRIN_CP(j32_v0, j, path);
+		PTRIN_CP(j32_v0, j, hostname);
+		j.ip4s = j32_v0.ip_number;
 		break;
 	}
 
@@ -2119,109 +2078,18 @@ freebsd32_jail(struct thread *td, struct
 	{
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		struct jail32 j32;
-		size_t tmplen;
 
 		error = copyin(uap->jail, &j32, sizeof(struct jail32));
 		if (error)
 			return (error);
-		tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
-#ifdef INET
-		if (j32.ip4s > jail_max_af_ips)
-			return (EINVAL);
-		tmplen += j32.ip4s * sizeof(struct in_addr);
-#else
-		if (j32.ip4s > 0)
-			return (EINVAL);
-#endif
-#ifdef INET6
-		if (j32.ip6s > jail_max_af_ips)
-			return (EINVAL);
-		tmplen += j32.ip6s * sizeof(struct in6_addr);
-#else
-		if (j32.ip6s > 0)
-			return (EINVAL);
-#endif
-		u_path = malloc(tmplen, M_TEMP, M_WAITOK);
-		u_hostname = u_path + MAXPATHLEN;
-		u_name = u_hostname + MAXHOSTNAMELEN;
-#ifdef INET
-		u_ip4 =  (struct in_addr *)(u_name + MAXHOSTNAMELEN);
-#endif
-#ifdef INET6
-#ifdef INET
-		u_ip6 = (struct in6_addr *)(u_ip4 + j32.ip4s);
-#else
-		u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
-#endif
-#endif
-		opt.uio_iov = optiov;
-		opt.uio_iovcnt = 4;
-		opt.uio_offset = -1;
-		opt.uio_resid = -1;
-		opt.uio_segflg = UIO_SYSSPACE;
-		opt.uio_rw = UIO_READ;
-		opt.uio_td = td;
-		optiov[0].iov_base = "path";
-		optiov[0].iov_len = sizeof("path");
-		optiov[1].iov_base = u_path;
-		error = copyinstr(PTRIN(j32.path), u_path, MAXPATHLEN,
-		    &optiov[1].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-		optiov[2].iov_base = "host.hostname";
-		optiov[2].iov_len = sizeof("host.hostname");
-		optiov[3].iov_base = u_hostname;
-		error = copyinstr(PTRIN(j32.hostname), u_hostname,
-		    MAXHOSTNAMELEN, &optiov[3].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-		if (PTRIN(j32.jailname) != NULL) {
-			optiov[opt.uio_iovcnt].iov_base = "name";
-			optiov[opt.uio_iovcnt].iov_len = sizeof("name");
-			opt.uio_iovcnt++;
-			optiov[opt.uio_iovcnt].iov_base = u_name;
-			error = copyinstr(PTRIN(j32.jailname), u_name,
-			    MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len);
-			if (error) {
-				free(u_path, M_TEMP);
-				return (error);
-			}
-			opt.uio_iovcnt++;
-		}
-#ifdef INET
-		optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
-		optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
-		opt.uio_iovcnt++;
-		optiov[opt.uio_iovcnt].iov_base = u_ip4;
-		optiov[opt.uio_iovcnt].iov_len =
-		    j32.ip4s * sizeof(struct in_addr);
-		error = copyin(PTRIN(j32.ip4), u_ip4,
-		    optiov[opt.uio_iovcnt].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-		opt.uio_iovcnt++;
-#endif
-#ifdef INET6
-		optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
-		optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
-		opt.uio_iovcnt++;
-		optiov[opt.uio_iovcnt].iov_base = u_ip6;
-		optiov[opt.uio_iovcnt].iov_len =
-		    j32.ip6s * sizeof(struct in6_addr);
-		error = copyin(PTRIN(j32.ip6), u_ip6,
-		    optiov[opt.uio_iovcnt].iov_len);
-		if (error) {
-			free(u_path, M_TEMP);
-			return (error);
-		}
-		opt.uio_iovcnt++;
-#endif
+		CP(j32, j, version);
+		PTRIN_CP(j32, j, path);
+		PTRIN_CP(j32, j, hostname);
+		PTRIN_CP(j32, j, jailname);
+		CP(j32, j, ip4s);
+		CP(j32, j, ip6s);
+		PTRIN_CP(j32, j, ip4);
+		PTRIN_CP(j32, j, ip6);
 		break;
 	}
 
@@ -2229,9 +2097,7 @@ freebsd32_jail(struct thread *td, struct
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
-	error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
-	free(u_path, M_TEMP);
-	return (error);
+	return (kern_jail(td, &j));
 }
 
 int

Modified: head/sys/compat/linux/linux_mib.c
==============================================================================
--- head/sys/compat/linux/linux_mib.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/compat/linux/linux_mib.c	Wed May 27 14:11:23 2009	(r192895)
@@ -57,16 +57,18 @@ struct linux_prison {
 	int	pr_osrel;
 };
 
+static struct linux_prison lprison0 = {
+	.pr_osname =		"Linux",
+	.pr_osrelease =		"2.6.16",
+	.pr_oss_version =	0x030600,
+	.pr_osrel =		2006016
+};
+
 static unsigned linux_osd_jail_slot;
 
 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0,
 	    "Linux mode");
 
-static struct mtx osname_lock;
-MTX_SYSINIT(linux_osname, &osname_lock, "linux osname", MTX_DEF);
-
-static char	linux_osname[LINUX_MAX_UTSNAME] = "Linux";
-
 static int
 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
 {
@@ -86,9 +88,6 @@ SYSCTL_PROC(_compat_linux, OID_AUTO, osn
 	    0, 0, linux_sysctl_osname, "A",
 	    "Linux kernel OS name");
 
-static char	linux_osrelease[LINUX_MAX_UTSNAME] = "2.6.16";
-static int	linux_osrel = 2006016;
-
 static int
 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
 {
@@ -108,8 +107,6 @@ SYSCTL_PROC(_compat_linux, OID_AUTO, osr
 	    0, 0, linux_sysctl_osrelease, "A",
 	    "Linux kernel OS release");
 
-static int	linux_oss_version = 0x030600;
-
 static int
 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
 {
@@ -161,69 +158,74 @@ linux_map_osrel(char *osrelease, int *os
 }
 
 /*
- * Returns holding the prison mutex if return non-NULL.
+ * Find a prison with Linux info.
+ * Return the Linux info and the (locked) prison.
  */
 static struct linux_prison *
-linux_get_prison(struct thread *td, struct prison **prp)
+linux_find_prison(struct prison *spr, struct prison **prp)
 {
 	struct prison *pr;
 	struct linux_prison *lpr;
 
-	KASSERT(td == curthread, ("linux_get_prison() called on !curthread"));
-	*prp = pr = td->td_ucred->cr_prison;
-	if (pr == NULL || !linux_osd_jail_slot)
-		return (NULL);
-	mtx_lock(&pr->pr_mtx);
-	lpr = osd_jail_get(pr, linux_osd_jail_slot);
-	if (lpr == NULL)
+	if (!linux_osd_jail_slot)
+		/* In case osd_register failed. */
+		spr = &prison0;
+	for (pr = spr;; pr = pr->pr_parent) {
+		mtx_lock(&pr->pr_mtx);
+		lpr = (pr == &prison0)
+		    ? &lprison0
+		    : osd_jail_get(pr, linux_osd_jail_slot);
+		if (lpr != NULL)
+			break;
 		mtx_unlock(&pr->pr_mtx);
+	}
+	*prp = pr;
 	return (lpr);
 }
 
 /*
- * Ensure a prison has its own Linux info.  The prison should be locked on
- * entrance and will be locked on exit (though it may get unlocked in the
- * interrim).
+ * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
+ * the Linux info and lock the prison.
  */
 static int
 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
 {
+	struct prison *ppr;
 	struct linux_prison *lpr, *nlpr;
 	int error;
 
 	/* If this prison already has Linux info, return that. */
 	error = 0;
-	mtx_assert(&pr->pr_mtx, MA_OWNED);
-	lpr = osd_jail_get(pr, linux_osd_jail_slot);
-	if (lpr != NULL)
+	lpr = linux_find_prison(pr, &ppr);
+	if (ppr == pr)
 		goto done;
 	/*
 	 * Allocate a new info record.  Then check again, in case something
 	 * changed during the allocation.
 	 */
-	mtx_unlock(&pr->pr_mtx);
+	mtx_unlock(&ppr->pr_mtx);
 	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
-	mtx_lock(&pr->pr_mtx);
-	lpr = osd_jail_get(pr, linux_osd_jail_slot);
-	if (lpr != NULL) {
+	lpr = linux_find_prison(pr, &ppr);
+	if (ppr == pr) {
 		free(nlpr, M_PRISON);
 		goto done;
 	}
+	/* Inherit the initial values from the ancestor. */
+	mtx_lock(&pr->pr_mtx);
 	error = osd_jail_set(pr, linux_osd_jail_slot, nlpr);
-	if (error)
-		free(nlpr, M_PRISON);
-	else {
+	if (error == 0) {
+		bcopy(lpr, nlpr, sizeof(*lpr));
 		lpr = nlpr;
-		mtx_lock(&osname_lock);
-		strncpy(lpr->pr_osname, linux_osname, LINUX_MAX_UTSNAME);
-		strncpy(lpr->pr_osrelease, linux_osrelease, LINUX_MAX_UTSNAME);
-		lpr->pr_oss_version = linux_oss_version;
-		lpr->pr_osrel = linux_osrel;
-		mtx_unlock(&osname_lock);
+	} else {
+		free(nlpr, M_PRISON);
+		lpr = NULL;
 	}
-done:
+	mtx_unlock(&ppr->pr_mtx);
+ done:
 	if (lprp != NULL)
 		*lprp = lpr;
+	else
+		mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
 
@@ -233,7 +235,6 @@ done:
 static int
 linux_prison_create(void *obj, void *data)
 {
-	int error;
 	struct prison *pr = obj;
 	struct vfsoptlist *opts = data;
 
@@ -243,10 +244,7 @@ linux_prison_create(void *obj, void *dat
 	 * Inherit a prison's initial values from its parent
 	 * (different from NULL which also inherits changes).
 	 */
-	mtx_lock(&pr->pr_mtx);
-	error = linux_alloc_prison(pr, NULL);
-	mtx_unlock(&pr->pr_mtx);
-	return (error);
+	return linux_alloc_prison(pr, NULL);
 }
 
 static int
@@ -254,7 +252,7 @@ linux_prison_check(void *obj __unused, v
 {
 	struct vfsoptlist *opts = data;
 	char *osname, *osrelease;
-	int error, len, oss_version;
+	int error, len, osrel, oss_version;
 
 	/* Check that the parameters are correct. */
 	(void)vfs_flagopt(opts, "linux", NULL, 0);
@@ -280,6 +278,11 @@ linux_prison_check(void *obj __unused, v
 			vfs_opterror(opts, "linux.osrelease too long");
 			return (ENAMETOOLONG);
 		}
+		error = linux_map_osrel(osrelease, &osrel);
+		if (error != 0) {
+			vfs_opterror(opts, "linux.osrelease format error");
+			return (error);
+		}
 	}
 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
 	    sizeof(oss_version));
@@ -310,7 +313,7 @@ linux_prison_set(void *obj, void *data)
 		yeslinux = 1;
 	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
 	    sizeof(oss_version));
-	gotversion = error == 0;
+	gotversion = (error == 0);
 	yeslinux |= gotversion;
 	if (nolinux) {
 		/* "nolinux": inherit the parent's Linux info. */
@@ -322,7 +325,6 @@ linux_prison_set(void *obj, void *data)
 		 * "linux" or "linux.*":
 		 * the prison gets its own Linux info.
 		 */
-		mtx_lock(&pr->pr_mtx);
 		error = linux_alloc_prison(pr, &lpr);
 		if (error) {
 			mtx_unlock(&pr->pr_mtx);
@@ -360,14 +362,16 @@ static int
 linux_prison_get(void *obj, void *data)
 {
 	struct linux_prison *lpr;
+	struct prison *ppr;
 	struct prison *pr = obj;
 	struct vfsoptlist *opts = data;
 	int error, i;
 
-	mtx_lock(&pr->pr_mtx);
-	/* Tell whether this prison has its own Linux info. */
-	lpr = osd_jail_get(pr, linux_osd_jail_slot);
-	i = lpr != NULL;
+	static int version0;
+
+	/* See if this prison is the one with the Linux info. */
+	lpr = linux_find_prison(pr, &ppr);
+	i = (ppr == pr);
 	error = vfs_setopt(opts, "linux", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done;
@@ -375,39 +379,37 @@ linux_prison_get(void *obj, void *data)
 	error = vfs_setopt(opts, "nolinux", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done;
-	/*
-	 * It's kind of bogus to give the root info, but leave it to the caller
-	 * to check the above flag.
-	 */
-	if (lpr != NULL) {
-		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
+	if (i) {
+		/*
+		 * If this prison is inheriting its Linux info, report
+		 * empty/zero parameters.
+		 */
+		error = vfs_setopts(opts, "linux.osname", "");
 		if (error != 0 && error != ENOENT)
 			goto done;
-		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
+		error = vfs_setopts(opts, "linux.osrelease", "");
 		if (error != 0 && error != ENOENT)
 			goto done;
-		error = vfs_setopt(opts, "linux.oss_version",
-		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
+		error = vfs_setopt(opts, "linux.oss_version", &version0,
+		    sizeof(lpr->pr_oss_version));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	} else {
-		mtx_lock(&osname_lock);
-		error = vfs_setopts(opts, "linux.osname", linux_osname);
+		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
 		if (error != 0 && error != ENOENT)
 			goto done;
-		error = vfs_setopts(opts, "linux.osrelease", linux_osrelease);
+		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
 		if (error != 0 && error != ENOENT)
 			goto done;
 		error = vfs_setopt(opts, "linux.oss_version",
-		    &linux_oss_version, sizeof(linux_oss_version));
+		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
 		if (error != 0 && error != ENOENT)
 			goto done;
-		mtx_unlock(&osname_lock);
 	}
 	error = 0;
 
  done:
-	mtx_unlock(&pr->pr_mtx);
+	mtx_unlock(&ppr->pr_mtx);
 	return (error);
 }
 
@@ -434,11 +436,8 @@ linux_osd_jail_register(void)
 	if (linux_osd_jail_slot > 0) {
 		/* Copy the system linux info to any current prisons. */
 		sx_xlock(&allprison_lock);
-		TAILQ_FOREACH(pr, &allprison, pr_list) {
-			mtx_lock(&pr->pr_mtx);
+		TAILQ_FOREACH(pr, &allprison, pr_list)
 			(void)linux_alloc_prison(pr, NULL);
-			mtx_unlock(&pr->pr_mtx);
-		}
 		sx_xunlock(&allprison_lock);
 	}
 }
@@ -457,15 +456,9 @@ linux_get_osname(struct thread *td, char
 	struct prison *pr;
 	struct linux_prison *lpr;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
-		mtx_unlock(&pr->pr_mtx);
-	} else {
-		mtx_lock(&osname_lock);
-		bcopy(linux_osname, dst, LINUX_MAX_UTSNAME);
-		mtx_unlock(&osname_lock);
-	}
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
+	mtx_unlock(&pr->pr_mtx);
 }
 
 int
@@ -474,16 +467,9 @@ linux_set_osname(struct thread *td, char
 	struct prison *pr;
 	struct linux_prison *lpr;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
-		mtx_unlock(&pr->pr_mtx);
-	} else {
-		mtx_lock(&osname_lock);
-		strcpy(linux_osname, osname);
-		mtx_unlock(&osname_lock);
-	}
-
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
+	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 
@@ -493,15 +479,9 @@ linux_get_osrelease(struct thread *td, c
 	struct prison *pr;
 	struct linux_prison *lpr;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
-		mtx_unlock(&pr->pr_mtx);
-	} else {
-		mtx_lock(&osname_lock);
-		bcopy(linux_osrelease, dst, LINUX_MAX_UTSNAME);
-		mtx_unlock(&osname_lock);
-	}
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
+	mtx_unlock(&pr->pr_mtx);
 }
 
 int
@@ -511,12 +491,9 @@ linux_kernver(struct thread *td)
 	struct linux_prison *lpr;
 	int osrel;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		osrel = lpr->pr_osrel;
-		mtx_unlock(&pr->pr_mtx);
-	} else
-		osrel = linux_osrel;
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	osrel = lpr->pr_osrel;
+	mtx_unlock(&pr->pr_mtx);
 	return (osrel);
 }
 
@@ -527,27 +504,12 @@ linux_set_osrelease(struct thread *td, c
 	struct linux_prison *lpr;
 	int error;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		error = linux_map_osrel(osrelease, &lpr->pr_osrel);
-		if (error) {
-			mtx_unlock(&pr->pr_mtx);
-			return (error);
-		}
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
+	if (error == 0)
 		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
-		mtx_unlock(&pr->pr_mtx);
-	} else {
-		mtx_lock(&osname_lock);
-		error = linux_map_osrel(osrelease, &linux_osrel);
-		if (error) {
-			mtx_unlock(&osname_lock);
-			return (error);
-		}
-		strcpy(linux_osrelease, osrelease);
-		mtx_unlock(&osname_lock);
-	}
-
-	return (0);
+	mtx_unlock(&pr->pr_mtx);
+	return (error);
 }
 
 int
@@ -557,12 +519,9 @@ linux_get_oss_version(struct thread *td)
 	struct linux_prison *lpr;
 	int version;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		version = lpr->pr_oss_version;
-		mtx_unlock(&pr->pr_mtx);
-	} else
-		version = linux_oss_version;
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	version = lpr->pr_oss_version;
+	mtx_unlock(&pr->pr_mtx);
 	return (version);
 }
 
@@ -572,16 +531,9 @@ linux_set_oss_version(struct thread *td,
 	struct prison *pr;
 	struct linux_prison *lpr;
 
-	lpr = linux_get_prison(td, &pr);
-	if (lpr != NULL) {
-		lpr->pr_oss_version = oss_version;
-		mtx_unlock(&pr->pr_mtx);
-	} else {
-		mtx_lock(&osname_lock);
-		linux_oss_version = oss_version;
-		mtx_unlock(&osname_lock);
-	}
-
+	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
+	lpr->pr_oss_version = oss_version;
+	mtx_unlock(&pr->pr_mtx);
 	return (0);
 }
 

Modified: head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	Wed May 27 14:11:23 2009	(r192895)
@@ -318,8 +318,10 @@ int iplioctl(dev, cmd, data, mode
 #  if (__FreeBSD_version >= 500024)
 struct thread *p;
 #   if (__FreeBSD_version >= 500043)
+#    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 #   else
+#    define	p_cred	t_proc->p_cred
 #    define	p_uid	t_proc->p_cred->p_ruid
 #   endif
 #  else
@@ -342,7 +344,11 @@ int mode;
 	SPL_INT(s);
 
 #if (BSD >= 199306) && defined(_KERNEL)
+# if (__FreeBSD_version >= 500034)
+	if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
+# else
 	if ((securelevel >= 3) && (mode & FWRITE))
+# endif
 		return EPERM;
 #endif
 

Modified: head/sys/contrib/ipfilter/netinet/ip_nat.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_nat.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/contrib/ipfilter/netinet/ip_nat.c	Wed May 27 14:11:23 2009	(r192895)
@@ -662,7 +662,11 @@ void *ctx;
 		return EPERM;
 	}
 # else
+#  if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
+	if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
+#  else
 	if ((securelevel >= 3) && (mode & FWRITE)) {
+#  endif
 		return EPERM;
 	}
 # endif

Modified: head/sys/fs/procfs/procfs_status.c
==============================================================================
--- head/sys/fs/procfs/procfs_status.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/fs/procfs/procfs_status.c	Wed May 27 14:11:23 2009	(r192895)
@@ -151,10 +151,11 @@ procfs_doprocstatus(PFS_FILL_ARGS)
 		sbuf_printf(sb, ",%lu", (u_long)cr->cr_groups[i]);
 	}
 
-	if (jailed(p->p_ucred)) {
-		mtx_lock(&p->p_ucred->cr_prison->pr_mtx);
-		sbuf_printf(sb, " %s", p->p_ucred->cr_prison->pr_host);
-		mtx_unlock(&p->p_ucred->cr_prison->pr_mtx);
+	if (jailed(cr)) {
+		mtx_lock(&cr->cr_prison->pr_mtx);
+		sbuf_printf(sb, " %s",
+		    prison_name(td->td_ucred->cr_prison, cr->cr_prison));
+		mtx_unlock(&cr->cr_prison->pr_mtx);
 	} else {
 		sbuf_printf(sb, " -");
 	}

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/kern/init_main.c	Wed May 27 14:11:23 2009	(r192895)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/exec.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
+#include <sys/jail.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
@@ -436,6 +437,7 @@ proc0_init(void *dummy __unused)
 	td->td_oncpu = 0;
 	td->td_flags = TDF_INMEM|TDP_KTHREAD;
 	td->td_cpuset = cpuset_thread0();
+	prison0.pr_cpuset = cpuset_ref(td->td_cpuset);
 	p->p_peers = 0;
 	p->p_leader = p;
 
@@ -452,7 +454,7 @@ proc0_init(void *dummy __unused)
 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
 	p->p_ucred->cr_uidinfo = uifind(0);
 	p->p_ucred->cr_ruidinfo = uifind(0);
-	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
+	p->p_ucred->cr_prison = &prison0;
 #ifdef VIMAGE
 	KASSERT(LIST_FIRST(&vimage_head) != NULL, ("vimage_head empty"));
 	P_TO_VIMAGE(p) =  LIST_FIRST(&vimage_head); /* set ucred->cr_vimage */

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/kern/kern_cpuset.c	Wed May 27 14:11:23 2009	(r192895)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -53,7 +54,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
-#include <sys/jail.h>		/* Must come after sys/proc.h */
 
 #include <vm/uma.h>
 
@@ -225,23 +225,16 @@ cpuset_lookup(cpusetid_t setid, struct t
 
 	KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
 	if (set != NULL && jailed(td->td_ucred)) {
-		struct cpuset *rset, *jset;
-		struct prison *pr;
-
-		rset = cpuset_refroot(set);
-
-		pr = td->td_ucred->cr_prison;
-		mtx_lock(&pr->pr_mtx);
-		cpuset_ref(pr->pr_cpuset);
-		jset = pr->pr_cpuset;
-		mtx_unlock(&pr->pr_mtx);
+		struct cpuset *jset, *tset;
 
-		if (jset->cs_id != rset->cs_id) {
+		jset = td->td_ucred->cr_prison->pr_cpuset;
+		for (tset = set; tset != NULL; tset = tset->cs_parent)
+			if (tset == jset)
+				break;
+		if (tset == NULL) {
 			cpuset_rel(set);
 			set = NULL;
 		}
-		cpuset_rel(jset);
-		cpuset_rel(rset);
 	}
 
 	return (set);
@@ -456,25 +449,14 @@ cpuset_which(cpuwhich_t which, id_t id, 
 		struct prison *pr;
 
 		sx_slock(&allprison_lock);
-		pr = prison_find(id);
+		pr = prison_find_child(curthread->td_ucred->cr_prison, id);
 		sx_sunlock(&allprison_lock);
 		if (pr == NULL)
 			return (ESRCH);
-		if (jailed(curthread->td_ucred)) {
-			if (curthread->td_ucred->cr_prison == pr) {
-				cpuset_ref(pr->pr_cpuset);
-				set = pr->pr_cpuset;
-			}
-		} else {
-			cpuset_ref(pr->pr_cpuset);
-			set = pr->pr_cpuset;
-		}
+		cpuset_ref(pr->pr_cpuset);
+		*setp = pr->pr_cpuset;
 		mtx_unlock(&pr->pr_mtx);
-		if (set) {
-			*setp = set;
-			return (0);
-		}
-		return (ESRCH);
+		return (0);
 	}
 	case CPU_WHICH_IRQ:
 		return (0);
@@ -731,21 +713,15 @@ cpuset_thread0(void)
  * In case of no error, returns the set in *setp locked with a reference.
  */
 int
-cpuset_create_root(struct thread *td, struct cpuset **setp)
+cpuset_create_root(struct prison *pr, struct cpuset **setp)
 {
-	struct cpuset *root;
 	struct cpuset *set;
 	int error;
 
-	KASSERT(td != NULL, ("[%s:%d] invalid td", __func__, __LINE__));
+	KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__));
 	KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__));
 
-	thread_lock(td);
-	root = cpuset_refroot(td->td_cpuset);
-	thread_unlock(td);
-
-	error = cpuset_create(setp, td->td_cpuset, &root->cs_mask);
-	cpuset_rel(root);
+	error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask);
 	if (error)
 		return (error);
 

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/kern/kern_descrip.c	Wed May 27 14:11:23 2009	(r192895)
@@ -2416,24 +2416,25 @@ dupfdopen(struct thread *td, struct file
 }
 
 /*
- * Scan all active processes to see if any of them have a current or root
- * directory of `olddp'. If so, replace them with the new mount point.
+ * Scan all active processes and prisons to see if any of them have a current
+ * or root directory of `olddp'. If so, replace them with the new mount point.
  */
 void
 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 {
 	struct filedesc *fdp;
+	struct prison *pr;
 	struct proc *p;
 	int nrele;
 
 	if (vrefcnt(olddp) == 1)
 		return;
+	nrele = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		fdp = fdhold(p);
 		if (fdp == NULL)
 			continue;
-		nrele = 0;
 		FILEDESC_XLOCK(fdp);
 		if (fdp->fd_cdir == olddp) {
 			vref(newdp);
@@ -2445,17 +2446,40 @@ mountcheckdirs(struct vnode *olddp, stru
 			fdp->fd_rdir = newdp;
 			nrele++;
 		}
+		if (fdp->fd_jdir == olddp) {
+			vref(newdp);
+			fdp->fd_jdir = newdp;
+			nrele++;
+		}
 		FILEDESC_XUNLOCK(fdp);
 		fddrop(fdp);
-		while (nrele--)
-			vrele(olddp);
 	}
 	sx_sunlock(&allproc_lock);
 	if (rootvnode == olddp) {
-		vrele(rootvnode);
 		vref(newdp);
 		rootvnode = newdp;
+		nrele++;
+	}
+	mtx_lock(&prison0.pr_mtx);
+	if (prison0.pr_root == olddp) {
+		vref(newdp);
+		prison0.pr_root = newdp;
+		nrele++;
+	}
+	mtx_unlock(&prison0.pr_mtx);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		mtx_lock(&pr->pr_mtx);
+		if (pr->pr_root == olddp) {
+			vref(newdp);
+			pr->pr_root = newdp;
+			nrele++;
+		}
+		mtx_unlock(&pr->pr_mtx);
 	}
+	sx_sunlock(&allprison_lock);
+	while (nrele--)
+		vrele(olddp);
 }
 
 struct filedesc_to_leader *

Modified: head/sys/kern/kern_exit.c
==============================================================================
--- head/sys/kern/kern_exit.c	Wed May 27 13:59:17 2009	(r192894)
+++ head/sys/kern/kern_exit.c	Wed May 27 14:11:23 2009	(r192895)
@@ -455,9 +455,8 @@ exit1(struct thread *td, int rv)
 	p->p_xstat = rv;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905271411.n4REBNKa099209>