Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 1 Apr 2012 20:54:17 -0400 (EDT)
From:      Rick Macklem <rmacklem@uoguelph.ca>
To:        FS List <freebsd-fs@freebsd.org>
Cc:        alc@freebsd.org, Joel Ray Holveck <joelh@juniper.net>, David Wolfskill <david@catwhisker.org>
Subject:   RFC: which patch is preferred for fix to PR#165923
Message-ID:  <1184428460.2076443.1333328057090.JavaMail.root@erie.cs.uoguelph.ca>
In-Reply-To: <302424888.2076325.1333327897003.JavaMail.root@erie.cs.uoguelph.ca>

index | next in thread | previous in thread | raw e-mail

[-- Attachment #1 --]
First, a little background w.r.t. an offlist discussion of
PR# 165923. A critical part of the problem reported is the
credentials used to to the Write RPCs in the NFS client's
VOP_PUTPAGES(). Currently the code simply uses the credentials
of the thread that called VOP_PUTPAGES(), which is often incorrect,
especially when the caller is "root" and the NFS server is doing
"root squashing" (ie mapping root->nobody or similar).

Although there is no correct answer w.r.t. what credentials should
be used, there seemed to be rough consensus that the credentials of
the process that did the mmap() call would be best. (If there are
multiple writers, you could use either the 1st or most recent mmap(),
but since all writers had write permissions at open(), I don't think
it matters much which you use, but others can comment on this.)

However, there seems to be some disagreement as to how a patch to
use the mmap() credentials should be done.

putpages-a.patch - This one captures the credentials during the mmap()
   call by using the property that only mmap() calls vnode_pager_alloc()
   with "prot != 0" and references them in a new field in vm_object_t.

putpages-b.patch - This one adds a new VOP_PUTPAGES_NOTIFY() call, which
   is done by mmap(). For all file systems except NFS clients, it is a
   null op. For the NFS clients, they reference the credentials in a
   new field of the NFS specific part of the vnode.

- I don't have a patch for the third one, but I believe Joel was proposing
   something similar to putpages-a.patch, except that the credentials are
   passed as an extra argument to VOP_PUTPAGES(), instead of the NFS client's
   VOP_PUTPAGES() looking in the vnode's vm_object_t for the cred.

I've attached the first 2, in case anyone wants to look at them.

Here's my take on the advantages/disadvantages of each patch:
        Advantages                          Disadvantages
patch-a simple, doesn't require VOP
        changes and, therefore, can
        be MFC'd easily                     saves a largely NFS specific item
                                            in vm_object_t
                                            depends on the fact that only
                                            mmap() calls vnode_pager_alloc() with
                                            prot != 0
patch-b basically has the advantages/disadvantages of patch1 reversed, although
        it is pretty simple, too

patch3  although it sounds cleaner to pass "cred" as an extra argument to
        VOP_PUTPAGES(), that makes it iffy (I'll let Joel discuss this) to MFC

Sorry this was so long winded, but I figured it needed some explanation. I am
hoping others have opinions w.r.t. which patch is more appropriate and that it
comes to a rough consensus, so I'll know which one to prepare for a commit to
head (and possible MFC). Hopefully Joel and Kostik will post, explaining their
preferences. (Alternately, if they give me permission, I can re-post their previous
offlist comments.)

Thanks in advance for any comments on this, rick

[-- Attachment #2 --]
--- vm/vnode_pager.c.sav	2012-03-13 20:23:03.000000000 -0400
+++ vm/vnode_pager.c	2012-03-15 09:41:02.000000000 -0400
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD: projects/nfsv4.1-cli
 #include <sys/buf.h>
 #include <sys/vmmeter.h>
 #include <sys/limits.h>
+#include <sys/mman.h>
 #include <sys/conf.h>
 #include <sys/sf_buf.h>
 
@@ -223,6 +224,10 @@ retry:
 
 		object->un_pager.vnp.vnp_size = size;
 		object->un_pager.vnp.writemappings = 0;
+		if ((prot & PROT_WRITE) != 0)
+			object->un_pager.vnp.cred = crhold(cred);
+		else
+			object->un_pager.vnp.cred = NULL;
 
 		object->handle = handle;
 		VI_LOCK(vp);
@@ -238,6 +243,9 @@ retry:
 		VI_UNLOCK(vp);
 	} else {
 		object->ref_count++;
+		if (object->un_pager.vnp.cred == NULL &&
+		    (prot & PROT_WRITE) != 0)
+			object->un_pager.vnp.cred = crhold(cred);
 		VM_OBJECT_UNLOCK(object);
 	}
 	vref(vp);
@@ -253,6 +261,7 @@ vnode_pager_dealloc(object)
 {
 	struct vnode *vp;
 	int refs;
+	struct ucred *cred;
 
 	vp = object->handle;
 	if (vp == NULL)
@@ -264,6 +273,8 @@ vnode_pager_dealloc(object)
 
 	object->handle = NULL;
 	object->type = OBJT_DEAD;
+	cred = object->un_pager.vnp.cred;
+	object->un_pager.vnp.cred = NULL;
 	if (object->flags & OBJ_DISCONNECTWNT) {
 		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
 		wakeup(object);
@@ -276,6 +287,8 @@ vnode_pager_dealloc(object)
 	vp->v_object = NULL;
 	vp->v_vflag &= ~VV_TEXT;
 	VM_OBJECT_UNLOCK(object);
+	if (cred != NULL)
+		crfree(cred);
 	while (refs-- > 0)
 		vunref(vp);
 	VM_OBJECT_LOCK(object);
--- vm/vm_object.h.sav	2012-03-13 20:21:26.000000000 -0400
+++ vm/vm_object.h	2012-03-13 20:27:04.000000000 -0400
@@ -109,10 +109,12 @@ struct vm_object {
 		 * VNode pager
 		 *
 		 *	vnp_size - current size of file
+		 *	cred - credentials for NFS I/O
 		 */
 		struct {
 			off_t vnp_size;
 			vm_ooffset_t writemappings;
+			struct ucred *cred;
 		} vnp;
 
 		/*
--- nfsclient/nfs_bio.c.sav	2012-03-13 20:53:20.000000000 -0400
+++ nfsclient/nfs_bio.c	2012-03-15 09:58:13.000000000 -0400
@@ -271,11 +271,11 @@ nfs_putpages(struct vop_putpages_args *a
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	vm_page_t *pages;
+	vm_object_t object;
 
 	vp = ap->a_vp;
 	np = VTONFS(vp);
 	td = curthread;				/* XXX */
-	cred = curthread->td_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
@@ -283,6 +283,19 @@ nfs_putpages(struct vop_putpages_args *a
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 	
+	if ((object = vp->v_object) == NULL) {
+		nfs_printf("nfs_putpages: called with non-merged cache vnode??\n");
+		return (VM_PAGER_ERROR);
+	}
+	/*
+	 * I don't think acquiring a reference count on cred is required here,
+	 * but do it anyhow, just in case.
+	 */
+	if (object->un_pager.vnp.cred != NULL)
+		cred = crhold(object->un_pager.vnp.cred);
+	else
+		cred = crhold(curthread->td_ucred);	/* XXX */
+
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
@@ -339,6 +352,7 @@ nfs_putpages(struct vop_putpages_args *a
 	    iomode = NFSV3WRITE_FILESYNC;
 
 	error = (nmp->nm_rpcops->nr_writerpc)(vp, &uio, cred, &iomode, &must_commit);
+	crfree(cred);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp, &nfs_pbuf_freecnt);
--- fs/nfsclient/nfs_clbio.c.sav	2012-03-13 20:32:48.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c	2012-03-15 09:58:43.000000000 -0400
@@ -276,11 +276,11 @@ ncl_putpages(struct vop_putpages_args *a
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	vm_page_t *pages;
+	vm_object_t object;
 
 	vp = ap->a_vp;
 	np = VTONFS(vp);
 	td = curthread;				/* XXX */
-	cred = curthread->td_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
@@ -288,6 +288,19 @@ ncl_putpages(struct vop_putpages_args *a
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 	
+	if ((object = vp->v_object) == NULL) {
+		ncl_printf("nfs_putpages: called with non-merged cache vnode??\n");
+		return (VM_PAGER_ERROR);
+	}
+	/*
+	 * I don't think acquiring a reference count on cred is required here,
+	 * but do it anyhow, just in case.
+	 */
+	if (object->un_pager.vnp.cred != NULL)
+		cred = crhold(object->un_pager.vnp.cred);
+	else
+		cred = crhold(curthread->td_ucred);	/* XXX */
+
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
@@ -344,6 +357,7 @@ ncl_putpages(struct vop_putpages_args *a
 	    iomode = NFSWRITE_FILESYNC;
 
 	error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);
+	crfree(cred);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp, &ncl_pbuf_freecnt);

[-- Attachment #3 --]
--- nfsclient/nfs_vnops.c.sav	2012-03-22 20:01:07.000000000 -0400
+++ nfsclient/nfs_vnops.c	2012-03-22 20:34:41.000000000 -0400
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD: head/sys/nfsclient/n
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
@@ -147,6 +148,7 @@ static vop_readlink_t	nfs_readlink;
 static vop_print_t	nfs_print;
 static vop_advlock_t	nfs_advlock;
 static vop_advlockasync_t nfs_advlockasync;
+static vop_mmap_notify_t nfs_mmap_notify;
 
 /*
  * Global vfs data structures for nfs
@@ -180,6 +182,7 @@ struct vop_vector nfs_vnodeops = {
 	.vop_strategy =		nfs_strategy,
 	.vop_symlink =		nfs_symlink,
 	.vop_write =		nfs_write,
+	.vop_mmap_notify =	nfs_mmap_notify,
 };
 
 struct vop_vector nfs_fifoops = {
@@ -3525,3 +3528,14 @@ struct buf_ops buf_ops_nfs = {
 	.bop_sync	=	bufsync,
 	.bop_bdflush	=	bufbdflush,
 };
+
+static int
+nfs_mmap_notify(struct vop_mmap_notify_args *ap)
+{
+	struct nfsnode *np = VTONFS(ap->a_vp);
+
+	if (np->n_ppcred == NULL && (ap->a_prot & PROT_WRITE) != 0)
+		np->n_ppcred = crhold(ap->a_cred);
+	return (0);
+}
+
--- nfsclient/nfsnode.h.sav	2012-03-22 20:05:07.000000000 -0400
+++ nfsclient/nfsnode.h	2012-03-22 20:06:00.000000000 -0400
@@ -128,6 +128,7 @@ struct nfsnode {
 	uint32_t		n_namelen;
 	int			n_directio_opens;
 	int                     n_directio_asyncwr;
+	struct ucred		*n_ppcred;	/* Cred. for putpages */
 };
 
 #define n_atim		n_un1.nf_atim
--- nfsclient/nfs_node.c.sav	2012-03-22 20:06:09.000000000 -0400
+++ nfsclient/nfs_node.c	2012-03-22 20:07:04.000000000 -0400
@@ -208,6 +208,7 @@ nfs_inactive(struct vop_inactive_args *a
 	struct nfsnode *np;
 	struct sillyrename *sp;
 	struct thread *td = curthread;	/* XXX */
+	struct ucred *cred;
 
 	np = VTONFS(ap->a_vp);
 	mtx_lock(&np->n_mtx);
@@ -229,7 +230,11 @@ nfs_inactive(struct vop_inactive_args *a
 		mtx_lock(&np->n_mtx);
 	}
 	np->n_flag &= NMODIFIED;
+	cred = np->n_ppcred;
+	np->n_ppcred = NULL;
 	mtx_unlock(&np->n_mtx);
+	if (cred != NULL)
+		crfree(cred);
 	return (0);
 }
 
@@ -270,6 +275,8 @@ nfs_reclaim(struct vop_reclaim_args *ap)
 			free((caddr_t)dp2, M_NFSDIROFF);
 		}
 	}
+	if (np->n_ppcred != NULL)
+		crfree(np->n_ppcred);
 	if (np->n_fhsize > NFS_SMALLFH) {
 		free((caddr_t)np->n_fhp, M_NFSBIGFH);
 	}
--- nfsclient/nfs_bio.c.sav	2012-03-22 20:11:04.000000000 -0400
+++ nfsclient/nfs_bio.c	2012-03-22 20:40:13.000000000 -0400
@@ -299,6 +299,9 @@ nfs_putpages(struct vop_putpages_args *a
 		mtx_lock(&np->n_mtx);
 	}
 
+	/* Set the cred to n_ppcred for the write rpcs. */
+	if (np->n_ppcred != NULL)
+		cred = np->n_ppcred;
 	for (i = 0; i < npages; i++)
 		rtvals[i] = VM_PAGER_ERROR;
 
--- kern/vnode_if.src.sav	2012-03-22 10:18:00.000000000 -0400
+++ kern/vnode_if.src	2012-03-22 10:38:54.000000000 -0400
@@ -660,6 +660,16 @@ vop_unp_detach {
 	IN struct vnode *vp;
 };
 
+%% mmap_notify	vp	L L L
+
+vop_mmap_notify {
+	IN struct vnode *vp;
+	IN vm_offset_t foff;
+	IN int flags;
+	IN vm_prot_t prot;
+	IN struct ucred *cred;
+};
+
 # The VOPs below are spares at the end of the table to allow new VOPs to be
 # added in stable branches without breaking the KBI.  New VOPs in HEAD should
 # be added above these spares.  When merging a new VOP to a stable branch,
--- kern/vfs_default.c.sav	2012-03-22 10:42:08.000000000 -0400
+++ kern/vfs_default.c	2012-03-22 10:46:41.000000000 -0400
@@ -126,6 +126,7 @@ struct vop_vector default_vnodeops = {
 	.vop_unp_bind =		vop_stdunp_bind,
 	.vop_unp_connect =	vop_stdunp_connect,
 	.vop_unp_detach =	vop_stdunp_detach,
+	.vop_mmap_notify =	VOP_NULL,
 };
 
 /*
--- fs/nfsclient/nfs_clvnops.c.sav	2012-03-22 19:45:22.000000000 -0400
+++ fs/nfsclient/nfs_clvnops.c	2012-03-22 20:35:11.000000000 -0400
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD: head/sys/fs/nfsclien
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
@@ -150,6 +151,7 @@ static vop_advlock_t	nfs_advlock;
 static vop_advlockasync_t nfs_advlockasync;
 static vop_getacl_t nfs_getacl;
 static vop_setacl_t nfs_setacl;
+static vop_mmap_notify_t nfs_mmap_notify;
 
 /*
  * Global vfs data structures for nfs
@@ -187,6 +189,7 @@ struct vop_vector newnfs_vnodeops = {
 	.vop_write =		ncl_write,
 	.vop_getacl =		nfs_getacl,
 	.vop_setacl =		nfs_setacl,
+	.vop_mmap_notify =	nfs_mmap_notify,
 };
 
 struct vop_vector newnfs_fifoops = {
@@ -3474,3 +3477,13 @@ nfs_pathconf(struct vop_pathconf_args *a
 	return (error);
 }
 
+static int
+nfs_mmap_notify(struct vop_mmap_notify_args *ap)
+{
+	struct nfsnode *np = VTONFS(ap->a_vp);
+
+	if (np->n_ppcred == NULL && (ap->a_prot & PROT_WRITE) != 0)
+		np->n_ppcred = crhold(ap->a_cred);
+	return (0);
+}
+
--- fs/nfsclient/nfsnode.h.sav	2012-03-22 19:52:31.000000000 -0400
+++ fs/nfsclient/nfsnode.h	2012-03-22 19:53:56.000000000 -0400
@@ -123,6 +123,7 @@ struct nfsnode {
 	int                     n_directio_asyncwr;
 	u_int64_t		 n_change;	/* old Change attribute */
 	struct nfsv4node	*n_v4;		/* extra V4 stuff */
+	struct ucred		*n_ppcred;	/* Cred. for putpages */
 };
 
 #define	n_atim		n_un1.nf_atim
--- fs/nfsclient/nfs_clnode.c.sav	2012-03-22 19:54:44.000000000 -0400
+++ fs/nfsclient/nfs_clnode.c	2012-03-22 20:00:28.000000000 -0400
@@ -210,6 +210,7 @@ ncl_inactive(struct vop_inactive_args *a
 	struct nfsnode *np;
 	struct sillyrename *sp;
 	struct vnode *vp = ap->a_vp;
+	struct ucred *cred;
 
 	np = VTONFS(vp);
 
@@ -243,7 +244,11 @@ ncl_inactive(struct vop_inactive_args *a
 		mtx_lock(&np->n_mtx);
 	}
 	np->n_flag &= NMODIFIED;
+	cred = np->n_ppcred;
+	np->n_ppcred = NULL;
 	mtx_unlock(&np->n_mtx);
+	if (cred != NULL)
+		crfree(cred);
 	return (0);
 }
 
@@ -300,6 +305,8 @@ ncl_reclaim(struct vop_reclaim_args *ap)
 			FREE((caddr_t)dp2, M_NFSDIROFF);
 		}
 	}
+	if (np->n_ppcred != NULL)
+		crfree(np->n_ppcred);
 	FREE((caddr_t)np->n_fhp, M_NFSFH);
 	if (np->n_v4 != NULL)
 		FREE((caddr_t)np->n_v4, M_NFSV4NODE);
--- fs/nfsclient/nfs_clbio.c.sav	2012-03-22 20:07:56.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c	2012-03-22 20:10:47.000000000 -0400
@@ -304,6 +304,9 @@ ncl_putpages(struct vop_putpages_args *a
 		mtx_lock(&np->n_mtx);
 	}
 
+	/* Set the cred to n_ppcred for the write rpcs. */
+	if (np->n_ppcred != NULL)
+		cred = np->n_ppcred;
 	for (i = 0; i < npages; i++)
 		rtvals[i] = VM_PAGER_ERROR;
 
--- vm/vm_mmap.c.sav	2012-03-22 10:33:22.000000000 -0400
+++ vm/vm_mmap.c	2012-03-22 10:36:35.000000000 -0400
@@ -1289,6 +1289,8 @@ vm_mmap_vnode(struct thread *td, vm_size
 	}
 	if ((error = VOP_GETATTR(vp, &va, cred)))
 		goto done;
+	if ((error = VOP_MMAP_NOTIFY(vp, foff, flags, prot, cred)) != 0)
+		goto done;
 #ifdef MAC
 	error = mac_vnode_check_mmap(cred, vp, prot, flags);
 	if (error != 0)
--- sys/vnode.h.sav	2012-03-22 11:00:49.000000000 -0400
+++ sys/vnode.h	2012-03-22 11:01:25.000000000 -0400
@@ -43,6 +43,8 @@
 #include <sys/acl.h>
 #include <sys/ktr.h>
 
+#include <vm/vm.h>
+
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1184428460.2076443.1333328057090.JavaMail.root>