Date: Sun, 1 Apr 2012 20:54:17 -0400 (EDT) From: Rick Macklem <rmacklem@uoguelph.ca> To: FS List <freebsd-fs@freebsd.org> Cc: alc@freebsd.org, Joel Ray Holveck <joelh@juniper.net>, David Wolfskill <david@catwhisker.org> Subject: RFC: which patch is preferred for fix to PR#165923 Message-ID: <1184428460.2076443.1333328057090.JavaMail.root@erie.cs.uoguelph.ca> In-Reply-To: <302424888.2076325.1333327897003.JavaMail.root@erie.cs.uoguelph.ca>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --]
First, a little background w.r.t. an offlist discussion of
PR# 165923. A critical part of the problem reported is the
credentials used to to the Write RPCs in the NFS client's
VOP_PUTPAGES(). Currently the code simply uses the credentials
of the thread that called VOP_PUTPAGES(), which is often incorrect,
especially when the caller is "root" and the NFS server is doing
"root squashing" (ie mapping root->nobody or similar).
Although there is no correct answer w.r.t. what credentials should
be used, there seemed to be rough consensus that the credentials of
the process that did the mmap() call would be best. (If there are
multiple writers, you could use either the 1st or most recent mmap(),
but since all writers had write permissions at open(), I don't think
it matters much which you use, but others can comment on this.)
However, there seems to be some disagreement as to how a patch to
use the mmap() credentials should be done.
putpages-a.patch - This one captures the credentials during the mmap()
call by using the property that only mmap() calls vnode_pager_alloc()
with "prot != 0" and references them in a new field in vm_object_t.
putpages-b.patch - This one adds a new VOP_PUTPAGES_NOTIFY() call, which
is done by mmap(). For all file systems except NFS clients, it is a
null op. For the NFS clients, they reference the credentials in a
new field of the NFS specific part of the vnode.
- I don't have a patch for the third one, but I believe Joel was proposing
something similar to putpages-a.patch, except that the credentials are
passed as an extra argument to VOP_PUTPAGES(), instead of the NFS client's
VOP_PUTPAGES() looking in the vnode's vm_object_t for the cred.
I've attached the first 2, in case anyone wants to look at them.
Here's my take on the advantages/disadvantages of each patch:
Advantages Disadvantages
patch-a simple, doesn't require VOP
changes and, therefore, can
be MFC'd easily saves a largely NFS specific item
in vm_object_t
depends on the fact that only
mmap() calls vnode_pager_alloc() with
prot != 0
patch-b basically has the advantages/disadvantages of patch1 reversed, although
it is pretty simple, too
patch3 although it sounds cleaner to pass "cred" as an extra argument to
VOP_PUTPAGES(), that makes it iffy (I'll let Joel discuss this) to MFC
Sorry this was so long winded, but I figured it needed some explanation. I am
hoping others have opinions w.r.t. which patch is more appropriate and that it
comes to a rough consensus, so I'll know which one to prepare for a commit to
head (and possible MFC). Hopefully Joel and Kostik will post, explaining their
preferences. (Alternately, if they give me permission, I can re-post their previous
offlist comments.)
Thanks in advance for any comments on this, rick
[-- Attachment #2 --]
--- vm/vnode_pager.c.sav 2012-03-13 20:23:03.000000000 -0400
+++ vm/vnode_pager.c 2012-03-15 09:41:02.000000000 -0400
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD: projects/nfsv4.1-cli
#include <sys/buf.h>
#include <sys/vmmeter.h>
#include <sys/limits.h>
+#include <sys/mman.h>
#include <sys/conf.h>
#include <sys/sf_buf.h>
@@ -223,6 +224,10 @@ retry:
object->un_pager.vnp.vnp_size = size;
object->un_pager.vnp.writemappings = 0;
+ if ((prot & PROT_WRITE) != 0)
+ object->un_pager.vnp.cred = crhold(cred);
+ else
+ object->un_pager.vnp.cred = NULL;
object->handle = handle;
VI_LOCK(vp);
@@ -238,6 +243,9 @@ retry:
VI_UNLOCK(vp);
} else {
object->ref_count++;
+ if (object->un_pager.vnp.cred == NULL &&
+ (prot & PROT_WRITE) != 0)
+ object->un_pager.vnp.cred = crhold(cred);
VM_OBJECT_UNLOCK(object);
}
vref(vp);
@@ -253,6 +261,7 @@ vnode_pager_dealloc(object)
{
struct vnode *vp;
int refs;
+ struct ucred *cred;
vp = object->handle;
if (vp == NULL)
@@ -264,6 +273,8 @@ vnode_pager_dealloc(object)
object->handle = NULL;
object->type = OBJT_DEAD;
+ cred = object->un_pager.vnp.cred;
+ object->un_pager.vnp.cred = NULL;
if (object->flags & OBJ_DISCONNECTWNT) {
vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
wakeup(object);
@@ -276,6 +287,8 @@ vnode_pager_dealloc(object)
vp->v_object = NULL;
vp->v_vflag &= ~VV_TEXT;
VM_OBJECT_UNLOCK(object);
+ if (cred != NULL)
+ crfree(cred);
while (refs-- > 0)
vunref(vp);
VM_OBJECT_LOCK(object);
--- vm/vm_object.h.sav 2012-03-13 20:21:26.000000000 -0400
+++ vm/vm_object.h 2012-03-13 20:27:04.000000000 -0400
@@ -109,10 +109,12 @@ struct vm_object {
* VNode pager
*
* vnp_size - current size of file
+ * cred - credentials for NFS I/O
*/
struct {
off_t vnp_size;
vm_ooffset_t writemappings;
+ struct ucred *cred;
} vnp;
/*
--- nfsclient/nfs_bio.c.sav 2012-03-13 20:53:20.000000000 -0400
+++ nfsclient/nfs_bio.c 2012-03-15 09:58:13.000000000 -0400
@@ -271,11 +271,11 @@ nfs_putpages(struct vop_putpages_args *a
struct nfsmount *nmp;
struct nfsnode *np;
vm_page_t *pages;
+ vm_object_t object;
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
- cred = curthread->td_ucred; /* XXX */
nmp = VFSTONFS(vp->v_mount);
pages = ap->a_m;
count = ap->a_count;
@@ -283,6 +283,19 @@ nfs_putpages(struct vop_putpages_args *a
npages = btoc(count);
offset = IDX_TO_OFF(pages[0]->pindex);
+ if ((object = vp->v_object) == NULL) {
+ nfs_printf("nfs_putpages: called with non-merged cache vnode??\n");
+ return (VM_PAGER_ERROR);
+ }
+ /*
+ * I don't think acquiring a reference count on cred is required here,
+ * but do it anyhow, just in case.
+ */
+ if (object->un_pager.vnp.cred != NULL)
+ cred = crhold(object->un_pager.vnp.cred);
+ else
+ cred = crhold(curthread->td_ucred); /* XXX */
+
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
@@ -339,6 +352,7 @@ nfs_putpages(struct vop_putpages_args *a
iomode = NFSV3WRITE_FILESYNC;
error = (nmp->nm_rpcops->nr_writerpc)(vp, &uio, cred, &iomode, &must_commit);
+ crfree(cred);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
--- fs/nfsclient/nfs_clbio.c.sav 2012-03-13 20:32:48.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c 2012-03-15 09:58:43.000000000 -0400
@@ -276,11 +276,11 @@ ncl_putpages(struct vop_putpages_args *a
struct nfsmount *nmp;
struct nfsnode *np;
vm_page_t *pages;
+ vm_object_t object;
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
- cred = curthread->td_ucred; /* XXX */
nmp = VFSTONFS(vp->v_mount);
pages = ap->a_m;
count = ap->a_count;
@@ -288,6 +288,19 @@ ncl_putpages(struct vop_putpages_args *a
npages = btoc(count);
offset = IDX_TO_OFF(pages[0]->pindex);
+ if ((object = vp->v_object) == NULL) {
+ ncl_printf("nfs_putpages: called with non-merged cache vnode??\n");
+ return (VM_PAGER_ERROR);
+ }
+ /*
+ * I don't think acquiring a reference count on cred is required here,
+ * but do it anyhow, just in case.
+ */
+ if (object->un_pager.vnp.cred != NULL)
+ cred = crhold(object->un_pager.vnp.cred);
+ else
+ cred = crhold(curthread->td_ucred); /* XXX */
+
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
@@ -344,6 +357,7 @@ ncl_putpages(struct vop_putpages_args *a
iomode = NFSWRITE_FILESYNC;
error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);
+ crfree(cred);
pmap_qremove(kva, npages);
relpbuf(bp, &ncl_pbuf_freecnt);
[-- Attachment #3 --]
--- nfsclient/nfs_vnops.c.sav 2012-03-22 20:01:07.000000000 -0400
+++ nfsclient/nfs_vnops.c 2012-03-22 20:34:41.000000000 -0400
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD: head/sys/nfsclient/n
#include <sys/jail.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mman.h>
#include <sys/namei.h>
#include <sys/socket.h>
#include <sys/vnode.h>
@@ -147,6 +148,7 @@ static vop_readlink_t nfs_readlink;
static vop_print_t nfs_print;
static vop_advlock_t nfs_advlock;
static vop_advlockasync_t nfs_advlockasync;
+static vop_mmap_notify_t nfs_mmap_notify;
/*
* Global vfs data structures for nfs
@@ -180,6 +182,7 @@ struct vop_vector nfs_vnodeops = {
.vop_strategy = nfs_strategy,
.vop_symlink = nfs_symlink,
.vop_write = nfs_write,
+ .vop_mmap_notify = nfs_mmap_notify,
};
struct vop_vector nfs_fifoops = {
@@ -3525,3 +3528,14 @@ struct buf_ops buf_ops_nfs = {
.bop_sync = bufsync,
.bop_bdflush = bufbdflush,
};
+
+static int
+nfs_mmap_notify(struct vop_mmap_notify_args *ap)
+{
+ struct nfsnode *np = VTONFS(ap->a_vp);
+
+ if (np->n_ppcred == NULL && (ap->a_prot & PROT_WRITE) != 0)
+ np->n_ppcred = crhold(ap->a_cred);
+ return (0);
+}
+
--- nfsclient/nfsnode.h.sav 2012-03-22 20:05:07.000000000 -0400
+++ nfsclient/nfsnode.h 2012-03-22 20:06:00.000000000 -0400
@@ -128,6 +128,7 @@ struct nfsnode {
uint32_t n_namelen;
int n_directio_opens;
int n_directio_asyncwr;
+ struct ucred *n_ppcred; /* Cred. for putpages */
};
#define n_atim n_un1.nf_atim
--- nfsclient/nfs_node.c.sav 2012-03-22 20:06:09.000000000 -0400
+++ nfsclient/nfs_node.c 2012-03-22 20:07:04.000000000 -0400
@@ -208,6 +208,7 @@ nfs_inactive(struct vop_inactive_args *a
struct nfsnode *np;
struct sillyrename *sp;
struct thread *td = curthread; /* XXX */
+ struct ucred *cred;
np = VTONFS(ap->a_vp);
mtx_lock(&np->n_mtx);
@@ -229,7 +230,11 @@ nfs_inactive(struct vop_inactive_args *a
mtx_lock(&np->n_mtx);
}
np->n_flag &= NMODIFIED;
+ cred = np->n_ppcred;
+ np->n_ppcred = NULL;
mtx_unlock(&np->n_mtx);
+ if (cred != NULL)
+ crfree(cred);
return (0);
}
@@ -270,6 +275,8 @@ nfs_reclaim(struct vop_reclaim_args *ap)
free((caddr_t)dp2, M_NFSDIROFF);
}
}
+ if (np->n_ppcred != NULL)
+ crfree(np->n_ppcred);
if (np->n_fhsize > NFS_SMALLFH) {
free((caddr_t)np->n_fhp, M_NFSBIGFH);
}
--- nfsclient/nfs_bio.c.sav 2012-03-22 20:11:04.000000000 -0400
+++ nfsclient/nfs_bio.c 2012-03-22 20:40:13.000000000 -0400
@@ -299,6 +299,9 @@ nfs_putpages(struct vop_putpages_args *a
mtx_lock(&np->n_mtx);
}
+ /* Set the cred to n_ppcred for the write rpcs. */
+ if (np->n_ppcred != NULL)
+ cred = np->n_ppcred;
for (i = 0; i < npages; i++)
rtvals[i] = VM_PAGER_ERROR;
--- kern/vnode_if.src.sav 2012-03-22 10:18:00.000000000 -0400
+++ kern/vnode_if.src 2012-03-22 10:38:54.000000000 -0400
@@ -660,6 +660,16 @@ vop_unp_detach {
IN struct vnode *vp;
};
+%% mmap_notify vp L L L
+
+vop_mmap_notify {
+ IN struct vnode *vp;
+ IN vm_offset_t foff;
+ IN int flags;
+ IN vm_prot_t prot;
+ IN struct ucred *cred;
+};
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
--- kern/vfs_default.c.sav 2012-03-22 10:42:08.000000000 -0400
+++ kern/vfs_default.c 2012-03-22 10:46:41.000000000 -0400
@@ -126,6 +126,7 @@ struct vop_vector default_vnodeops = {
.vop_unp_bind = vop_stdunp_bind,
.vop_unp_connect = vop_stdunp_connect,
.vop_unp_detach = vop_stdunp_detach,
+ .vop_mmap_notify = VOP_NULL,
};
/*
--- fs/nfsclient/nfs_clvnops.c.sav 2012-03-22 19:45:22.000000000 -0400
+++ fs/nfsclient/nfs_clvnops.c 2012-03-22 20:35:11.000000000 -0400
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD: head/sys/fs/nfsclien
#include <sys/systm.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/bio.h>
#include <sys/buf.h>
@@ -150,6 +151,7 @@ static vop_advlock_t nfs_advlock;
static vop_advlockasync_t nfs_advlockasync;
static vop_getacl_t nfs_getacl;
static vop_setacl_t nfs_setacl;
+static vop_mmap_notify_t nfs_mmap_notify;
/*
* Global vfs data structures for nfs
@@ -187,6 +189,7 @@ struct vop_vector newnfs_vnodeops = {
.vop_write = ncl_write,
.vop_getacl = nfs_getacl,
.vop_setacl = nfs_setacl,
+ .vop_mmap_notify = nfs_mmap_notify,
};
struct vop_vector newnfs_fifoops = {
@@ -3474,3 +3477,13 @@ nfs_pathconf(struct vop_pathconf_args *a
return (error);
}
+static int
+nfs_mmap_notify(struct vop_mmap_notify_args *ap)
+{
+ struct nfsnode *np = VTONFS(ap->a_vp);
+
+ if (np->n_ppcred == NULL && (ap->a_prot & PROT_WRITE) != 0)
+ np->n_ppcred = crhold(ap->a_cred);
+ return (0);
+}
+
--- fs/nfsclient/nfsnode.h.sav 2012-03-22 19:52:31.000000000 -0400
+++ fs/nfsclient/nfsnode.h 2012-03-22 19:53:56.000000000 -0400
@@ -123,6 +123,7 @@ struct nfsnode {
int n_directio_asyncwr;
u_int64_t n_change; /* old Change attribute */
struct nfsv4node *n_v4; /* extra V4 stuff */
+ struct ucred *n_ppcred; /* Cred. for putpages */
};
#define n_atim n_un1.nf_atim
--- fs/nfsclient/nfs_clnode.c.sav 2012-03-22 19:54:44.000000000 -0400
+++ fs/nfsclient/nfs_clnode.c 2012-03-22 20:00:28.000000000 -0400
@@ -210,6 +210,7 @@ ncl_inactive(struct vop_inactive_args *a
struct nfsnode *np;
struct sillyrename *sp;
struct vnode *vp = ap->a_vp;
+ struct ucred *cred;
np = VTONFS(vp);
@@ -243,7 +244,11 @@ ncl_inactive(struct vop_inactive_args *a
mtx_lock(&np->n_mtx);
}
np->n_flag &= NMODIFIED;
+ cred = np->n_ppcred;
+ np->n_ppcred = NULL;
mtx_unlock(&np->n_mtx);
+ if (cred != NULL)
+ crfree(cred);
return (0);
}
@@ -300,6 +305,8 @@ ncl_reclaim(struct vop_reclaim_args *ap)
FREE((caddr_t)dp2, M_NFSDIROFF);
}
}
+ if (np->n_ppcred != NULL)
+ crfree(np->n_ppcred);
FREE((caddr_t)np->n_fhp, M_NFSFH);
if (np->n_v4 != NULL)
FREE((caddr_t)np->n_v4, M_NFSV4NODE);
--- fs/nfsclient/nfs_clbio.c.sav 2012-03-22 20:07:56.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c 2012-03-22 20:10:47.000000000 -0400
@@ -304,6 +304,9 @@ ncl_putpages(struct vop_putpages_args *a
mtx_lock(&np->n_mtx);
}
+ /* Set the cred to n_ppcred for the write rpcs. */
+ if (np->n_ppcred != NULL)
+ cred = np->n_ppcred;
for (i = 0; i < npages; i++)
rtvals[i] = VM_PAGER_ERROR;
--- vm/vm_mmap.c.sav 2012-03-22 10:33:22.000000000 -0400
+++ vm/vm_mmap.c 2012-03-22 10:36:35.000000000 -0400
@@ -1289,6 +1289,8 @@ vm_mmap_vnode(struct thread *td, vm_size
}
if ((error = VOP_GETATTR(vp, &va, cred)))
goto done;
+ if ((error = VOP_MMAP_NOTIFY(vp, foff, flags, prot, cred)) != 0)
+ goto done;
#ifdef MAC
error = mac_vnode_check_mmap(cred, vp, prot, flags);
if (error != 0)
--- sys/vnode.h.sav 2012-03-22 11:00:49.000000000 -0400
+++ sys/vnode.h 2012-03-22 11:01:25.000000000 -0400
@@ -43,6 +43,8 @@
#include <sys/acl.h>
#include <sys/ktr.h>
+#include <vm/vm.h>
+
/*
* The vnode is the focus of all file activity in UNIX. There is a
* unique vnode allocated for each active file, each current directory,
help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1184428460.2076443.1333328057090.JavaMail.root>
