Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 30 Jul 2012 19:05:41 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org
Subject:   svn commit: r238913 - in stable/7/sys: kern nfsclient sys
Message-ID:  <201207301905.q6UJ5flk027689@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Mon Jul 30 19:05:41 2012
New Revision: 238913
URL: http://svn.freebsd.org/changeset/base/238913

Log:
  MFC 194358,230394,230441,230489,230552,232116,232420:
  - For dotdot lookup in nfs_lookup, inline the vn_vget_ino() to prevent
    operating on the unmounted mount point and freed mount data in case of
    forced unmount performed while dvp is unlocked to nget the target vnode.
  - Close a race in NFS lookup processing that could result in stale name cache
    entries on one client when a directory was renamed on another client.  The
    root cause for the stale entry being trusted is that each per-vnode nfsnode
    structure has a single 'n_ctime' timestamp used to validate positive name
    cache entries.  However, if there are multiple entries for a single vnode,
    they all share a single timestamp.  To fix this, extend the name cache
    to allow filesystems to optionally store a timestamp value in each name
    cache entry.  The NFS clients now fetch the timestamp associated with
    each name cache entry and use that to validate cache hits instead of the
    timestamps previously stored in the nfsnode.  Another part of the fix is
    that the NFS clients now use timestamps from the post-op attributes of
    RPCs when adding name cache entries rather than pulling the timestamps out
    of the file's attribute cache.  The latter is subject to races with other
    lookups updating the attribute cache concurrently.
  - Adjust the nfs_skip_wcc_data_onerr setting so that it does not block
    post-op attributes for ENOENT errors now that the name caching logic
    depends on working post-op attributes.
  
  Tested by:	Mark Saad  nonesuch longcount org

Modified:
  stable/7/sys/kern/vfs_cache.c
  stable/7/sys/nfsclient/nfs_socket.c
  stable/7/sys/nfsclient/nfs_subs.c
  stable/7/sys/nfsclient/nfs_vnops.c
  stable/7/sys/nfsclient/nfsm_subs.h
  stable/7/sys/nfsclient/nfsnode.h
  stable/7/sys/sys/vnode.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/kern/vfs_cache.c
==============================================================================
--- stable/7/sys/kern/vfs_cache.c	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/kern/vfs_cache.c	Mon Jul 30 19:05:41 2012	(r238913)
@@ -75,6 +75,36 @@ struct	namecache {
 };
 
 /*
+ * struct namecache_ts repeats struct namecache layout up to the
+ * nc_nlen member.
+ * struct namecache_ts is used in place of struct namecache when time(s) need
+ * to be stored.  The nc_dotdottime field is used when a cache entry is mapping
+ * both a non-dotdot directory name plus dotdot for the directory's
+ * parent.
+ */
+struct	namecache_ts {
+	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
+	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
+	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
+	struct	vnode *nc_dvp;		/* vnode of parent of name */
+	struct	vnode *nc_vp;		/* vnode the name refers to */
+	u_char	nc_flag;		/* flag bits */
+	u_char	nc_nlen;		/* length of name */
+	struct	timespec nc_time;	/* timespec provided by fs */
+	struct	timespec nc_dotdottime;	/* dotdot timespec provided by fs */
+	int	nc_ticks;		/* ticks value when entry was added */
+	char	nc_name[0];		/* segment name + nul */
+};
+
+/*
+ * Flags in namecache.nc_flag
+ */
+#define NCF_WHITE	0x01
+#define NCF_ISDOTDOT	0x02
+#define	NCF_TS		0x04
+#define	NCF_DTS		0x08
+
+/*
  * Name caching works as follows:
  *
  * Names found by directory scans are retained in a cache
@@ -128,19 +158,71 @@ MTX_SYSINIT(vfscache, &cache_lock, "Name
  * fit in the small cache.
  */
 static uma_zone_t cache_zone_small;
+static uma_zone_t cache_zone_small_ts;
 static uma_zone_t cache_zone_large;
+static uma_zone_t cache_zone_large_ts;
 
 #define	CACHE_PATH_CUTOFF	32
-#define	CACHE_ZONE_SMALL	(sizeof(struct namecache) + CACHE_PATH_CUTOFF)
-#define	CACHE_ZONE_LARGE	(sizeof(struct namecache) + NAME_MAX)
 
-#define cache_alloc(len)	uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
-	cache_zone_small : cache_zone_large, M_WAITOK)
-#define cache_free(ncp)		do { \
-	if (ncp != NULL) \
-		uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
-		    cache_zone_small : cache_zone_large, (ncp)); \
-} while (0)
+static struct namecache *
+cache_alloc(int len, int ts)
+{
+
+	if (len > CACHE_PATH_CUTOFF) {
+		if (ts)
+			return (uma_zalloc(cache_zone_large_ts, M_WAITOK));
+		else
+			return (uma_zalloc(cache_zone_large, M_WAITOK));
+	}
+	if (ts)
+		return (uma_zalloc(cache_zone_small_ts, M_WAITOK));
+	else
+		return (uma_zalloc(cache_zone_small, M_WAITOK));
+}
+
+static void
+cache_free(struct namecache *ncp)
+{
+	int ts;
+
+	if (ncp == NULL)
+		return;
+	ts = ncp->nc_flag & NCF_TS;
+	if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
+		if (ts)
+			uma_zfree(cache_zone_small_ts, ncp);
+		else
+			uma_zfree(cache_zone_small, ncp);
+	} else if (ts)
+		uma_zfree(cache_zone_large_ts, ncp);
+	else
+		uma_zfree(cache_zone_large, ncp);
+}
+
+static char *
+nc_get_name(struct namecache *ncp)
+{
+	struct namecache_ts *ncp_ts;
+
+	if ((ncp->nc_flag & NCF_TS) == 0)
+		return (ncp->nc_name);
+	ncp_ts = (struct namecache_ts *)ncp;
+	return (ncp_ts->nc_name);
+}
+
+static void
+cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
+{
+
+	KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
+	    (tsp == NULL && ticksp == NULL),
+	    ("No NCF_TS"));
+
+	if (tsp != NULL)
+		*tsp = ((struct namecache_ts *)ncp)->nc_time;
+	if (ticksp != NULL)
+		*ticksp = ((struct namecache_ts *)ncp)->nc_ticks;
+}
 
 static int	doingcache = 1;		/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
@@ -179,12 +261,6 @@ static int vn_fullpath1(struct thread *t
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
-/*
- * Flags in namecache.nc_flag
- */
-#define NCF_WHITE	0x01
-#define NCF_ISDOTDOT	0x02
-
 #ifdef DIAGNOSTIC
 /*
  * Grab an atomic snapshot of the name cache hash chain lengths
@@ -326,10 +402,12 @@ cache_zap(ncp)
  */
 
 int
-cache_lookup(dvp, vpp, cnp)
+cache_lookup_times(dvp, vpp, cnp, tsp, ticksp)
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
+	struct timespec *tsp;
+	int *ticksp;
 {
 	struct namecache *ncp;
 	struct thread *td;
@@ -351,6 +429,10 @@ retry:
 			CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
 			    dvp, cnp->cn_nameptr);
 			dothits++;
+			if (tsp != NULL)
+				timespecclear(tsp);
+			if (ticksp != NULL)
+				*ticksp = ticks;
 			goto success;
 		}
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
@@ -366,17 +448,21 @@ retry:
 				CACHE_UNLOCK();
 				return (0);
 			}
-			if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
-				*vpp = dvp->v_cache_dd->nc_vp;
+			ncp = dvp->v_cache_dd;
+			if (ncp->nc_flag & NCF_ISDOTDOT)
+				*vpp = ncp->nc_vp;
 			else
-				*vpp = dvp->v_cache_dd->nc_dvp;
+				*vpp = ncp->nc_dvp;
 			/* Return failure if negative entry was found. */
-			if (*vpp == NULL) {
-				ncp = dvp->v_cache_dd;
+			if (*vpp == NULL)
 				goto negative_success;
-			}
 			CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
 			    dvp, cnp->cn_nameptr, *vpp);
+			cache_out_ts(ncp, tsp, ticksp);
+			if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
+			    NCF_DTS && tsp != NULL)
+				*tsp = ((struct namecache_ts *)ncp)->
+				    nc_dotdottime;
 			goto success;
 		}
 	}
@@ -386,7 +472,7 @@ retry:
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		numchecks++;
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
-		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
+		    !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
@@ -418,6 +504,7 @@ retry:
 		*vpp = ncp->nc_vp;
 		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
 		    dvp, cnp->cn_nameptr, *vpp, ncp);
+		cache_out_ts(ncp, tsp, ticksp);
 		goto success;
 	}
 
@@ -498,12 +585,15 @@ success:
  * Add an entry to the cache.
  */
 void
-cache_enter(dvp, vp, cnp)
+cache_enter_time(dvp, vp, cnp, tsp, dtsp)
 	struct vnode *dvp;
 	struct vnode *vp;
 	struct componentname *cnp;
+	struct timespec *tsp;
+	struct timespec *dtsp;
 {
 	struct namecache *ncp, *n2;
+	struct namecache_ts *n3;
 	struct nchashhead *ncpp;
 	u_int32_t hash;
 	int flag;
@@ -566,13 +656,23 @@ cache_enter(dvp, vp, cnp)
 	 * Calculate the hash key and setup as much of the new
 	 * namecache entry as possible before acquiring the lock.
 	 */
-	ncp = cache_alloc(cnp->cn_namelen);
+	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
 	ncp->nc_vp = vp;
 	ncp->nc_dvp = dvp;
 	ncp->nc_flag = flag;
+	if (tsp != NULL) {
+		n3 = (struct namecache_ts *)ncp;
+		n3->nc_time = *tsp;
+		n3->nc_ticks = ticks;
+		n3->nc_flag |= NCF_TS;
+		if (dtsp != NULL) {
+			n3->nc_dotdottime = *dtsp;
+			n3->nc_flag |= NCF_DTS;
+		}
+	}
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
-	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
+	bcopy(cnp->cn_nameptr, nc_get_name(ncp), len);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	CACHE_LOCK();
 
@@ -585,7 +685,22 @@ cache_enter(dvp, vp, cnp)
 	LIST_FOREACH(n2, ncpp, nc_hash) {
 		if (n2->nc_dvp == dvp &&
 		    n2->nc_nlen == cnp->cn_namelen &&
-		    !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
+		    !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) {
+			if (tsp != NULL) {
+				KASSERT((n2->nc_flag & NCF_TS) != 0,
+				    ("no NCF_TS"));
+				n3 = (struct namecache_ts *)n2;
+				n3->nc_time =
+				    ((struct namecache_ts *)ncp)->nc_time;
+				n3->nc_ticks =
+				    ((struct namecache_ts *)ncp)->nc_ticks;
+				if (dtsp != NULL) {
+					n3->nc_dotdottime =
+					    ((struct namecache_ts *)ncp)->
+					    nc_dotdottime;
+					n3->nc_flag |= NCF_DTS;
+				}
+			}
 			CACHE_UNLOCK();
 			cache_free(ncp);
 			return;
@@ -614,6 +729,11 @@ cache_enter(dvp, vp, cnp)
 			ncp->nc_flag |= NCF_WHITE;
 	} else if (vp->v_type == VDIR) {
 		if (flag != NCF_ISDOTDOT) {
+			/*
+			 * For this case, the cache entry maps both the
+			 * directory name in it and the name ".." for the
+			 * directory's parent.
+			 */
 			if ((n2 = vp->v_cache_dd) != NULL &&
 			    (n2->nc_flag & NCF_ISDOTDOT) != 0)
 				cache_zap(n2);
@@ -666,10 +786,18 @@ nchinit(void *dummy __unused)
 
 	TAILQ_INIT(&ncneg);
 
-	cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
-	cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+	cache_zone_small = uma_zcreate("S VFS Cache",
+	    sizeof(struct namecache) + CACHE_PATH_CUTOFF,
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+	cache_zone_small_ts = uma_zcreate("STS VFS Cache",
+	    sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF,
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+	cache_zone_large = uma_zcreate("L VFS Cache",
+	    sizeof(struct namecache) + NAME_MAX,
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+	cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
+	    sizeof(struct namecache_ts) + NAME_MAX,
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
 }
@@ -925,7 +1053,7 @@ static int
 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen)
 {
-	char *bp;
+	char *bp, *nc_name;
 	int error, i, slash_prefixed;
 	struct namecache *ncp;
 
@@ -943,8 +1071,9 @@ vn_fullpath1(struct thread *td, struct v
 			CACHE_UNLOCK();
 			return (ENOENT);
 		}
+		nc_name = nc_get_name(ncp);
 		for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--)
-			*--bp = ncp->nc_name[i];
+			*--bp = nc_name[i];
 		if (bp == buf) {
 			numfullpathfail4++;
 			CACHE_UNLOCK();
@@ -976,8 +1105,9 @@ vn_fullpath1(struct thread *td, struct v
 			error = ENOENT;
 			break;
 		}
+		nc_name = nc_get_name(ncp);
 		for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--)
-			*--bp = ncp->nc_name[i];
+			*--bp = nc_name[i];
 		if (bp == buf) {
 			numfullpathfail4++;
 			error = ENOMEM;
@@ -1006,3 +1136,26 @@ vn_fullpath1(struct thread *td, struct v
 	*retbuf = bp;
 	return (0);
 }
+
+/* ABI compat shims for old kernel modules. */
+#undef cache_enter
+#undef cache_lookup
+
+void	cache_enter(struct vnode *dvp, struct vnode *vp,
+	    struct componentname *cnp);
+int	cache_lookup(struct vnode *dvp, struct vnode **vpp,
+	    struct componentname *cnp);
+
+void
+cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
+{
+
+	cache_enter_time(dvp, vp, cnp, NULL, NULL);
+}
+
+int
+cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
+{
+
+	return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL));
+}

Modified: stable/7/sys/nfsclient/nfs_socket.c
==============================================================================
--- stable/7/sys/nfsclient/nfs_socket.c	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/nfsclient/nfs_socket.c	Mon Jul 30 19:05:41 2012	(r238913)
@@ -1365,11 +1365,16 @@ wait_for_pinned_req:
 			if (error == ESTALE)
 				cache_purge(vp);
 			/*
-			 * Skip wcc data on NFS errors for now. NetApp filers return corrupt
-			 * postop attrs in the wcc data for NFS err EROFS. Not sure if they 
-			 * could return corrupt postop attrs for others errors.
+			 * Skip wcc data on non-ENOENT NFS errors for
+			 * now.  NetApp filers return corrupt postop
+			 * attrs in the wcc data for NFS err EROFS.
+			 * Not sure if they could return corrupt
+			 * postop attrs for others errors.  Blocking
+			 * ENOENT post-op attributes breaks negative
+			 * name caching, so always allow it through.
 			 */
-			if ((nmp->nm_flag & NFSMNT_NFSV3) && !nfs_skip_wcc_data_onerr) {
+			if ((nmp->nm_flag & NFSMNT_NFSV3) &&
+			    (!nfs_skip_wcc_data_onerr || error == ENOENT)) {
 				*mrp = mrep;
 				*mdp = md;
 				*dposp = dpos;

Modified: stable/7/sys/nfsclient/nfs_subs.c
==============================================================================
--- stable/7/sys/nfsclient/nfs_subs.c	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/nfsclient/nfs_subs.c	Mon Jul 30 19:05:41 2012	(r238913)
@@ -1025,8 +1025,8 @@ nfsm_loadattr_xx(struct vnode **v, struc
 }
 
 int
-nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
-		    caddr_t *dpos)
+nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va,
+		    struct mbuf **md, caddr_t *dpos)
 {
 	u_int32_t *tl;
 	int t1;
@@ -1037,7 +1037,7 @@ nfsm_postop_attr_xx(struct vnode **v, in
 		return EBADRPC;
 	*f = fxdr_unsigned(int, *tl);
 	if (*f != 0) {
-		t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1);
+		t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 1);
 		if (t1 != 0) {
 			*f = 0;
 			return t1;
@@ -1067,7 +1067,7 @@ nfsm_wcc_data_xx(struct vnode **v, int *
 				  VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); 
 		mtx_unlock(&(VTONFS(*v))->n_mtx);
 	}
-	t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos);
+	t1 = nfsm_postop_attr_xx(v, &ttattrf, NULL, md, dpos);
 	if (t1)
 		return t1;
 	if (*f)

Modified: stable/7/sys/nfsclient/nfs_vnops.c
==============================================================================
--- stable/7/sys/nfsclient/nfs_vnops.c	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/nfsclient/nfs_vnops.c	Mon Jul 30 19:05:41 2012	(r238913)
@@ -860,8 +860,9 @@ nfs_lookup(struct vop_lookup_args *ap)
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
-	struct vattr vattr;
-	struct timespec dmtime;
+	struct mount *mp = dvp->v_mount;
+	struct vattr dvattr, vattr;
+	struct timespec nctime;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	struct nfsmount *nmp;
@@ -870,27 +871,40 @@ nfs_lookup(struct vop_lookup_args *ap)
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np, *newnp;
-	int error = 0, attrflag, fhsize;
+	int error = 0, attrflag, dattrflag, fhsize, ltype, ncticks;
 	int v3 = NFS_ISV3(dvp);
 	struct thread *td = cnp->cn_thread;
 
 	*vpp = NULLVP;
-	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
-	nmp = VFSTONFS(dvp->v_mount);
+	nmp = VFSTONFS(mp);
 	np = VTONFS(dvp);
 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
-	error = cache_lookup(dvp, vpp, cnp);
+	error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
 	if (error > 0 && error != ENOENT)
 		return (error);
 	if (error == -1) {
 		/*
+		 * Lookups of "." are special and always return the
+		 * current directory.  cache_lookup() already handles
+		 * associated locking bookkeeping, etc.
+		 */
+		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+			/* XXX: Is this really correct? */
+			if (cnp->cn_nameiop != LOOKUP &&
+			    (flags & ISLASTCN))
+				cnp->cn_flags |= SAVENAME;
+			return (0);
+		}
+
+		/*
 		 * We only accept a positive hit in the cache if the
 		 * change time of the file matches our cached copy.
 		 * Otherwise, we discard the cache entry and fallback
@@ -914,7 +928,7 @@ nfs_lookup(struct vop_lookup_args *ap)
 			mtx_unlock(&newnp->n_mtx);
 		}
 		if (VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td) == 0 &&
-		    timespeccmp(&vattr.va_ctime, &newnp->n_ctime, ==)) {
+		    timespeccmp(&vattr.va_ctime, &nctime, ==)) {
 			nfsstats.lookupcache_hits++;
 			if (cnp->cn_nameiop != LOOKUP &&
 			    (flags & ISLASTCN))
@@ -931,35 +945,22 @@ nfs_lookup(struct vop_lookup_args *ap)
 		/*
 		 * We only accept a negative hit in the cache if the
 		 * modification time of the parent directory matches
-		 * our cached copy.  Otherwise, we discard all of the
-		 * negative cache entries for this directory. We also
-		 * only trust -ve cache entries for less than
-		 * negnametimeo seconds.
+		 * the cached copy in the name cache entry.
+		 * Otherwise, we discard all of the negative cache
+		 * entries for this directory.  We also only trust
+		 * negative cache entries for up to negnametimeo
+		 * seconds.
 		 */
-		if ((u_int)(ticks - np->n_dmtime_ticks) < (negnametimeo * hz) &&
+		if ((u_int)(ticks - ncticks) < (negnametimeo * hz) &&
 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred, td) == 0 &&
-		    timespeccmp(&vattr.va_mtime, &np->n_dmtime, ==)) {
+		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
 			nfsstats.lookupcache_hits++;
 			return (ENOENT);
 		}
 		cache_purge_negative(dvp);
-		mtx_lock(&np->n_mtx);
-		timespecclear(&np->n_dmtime);
-		mtx_unlock(&np->n_mtx);
 	}
 
-	/*
-	 * Cache the modification time of the parent directory in case
-	 * the lookup fails and results in adding the first negative
-	 * name cache entry for the directory.  Since this is reading
-	 * a single time_t, don't bother with locking.  The
-	 * modification time may be a bit stale, but it must be read
-	 * before performing the lookup RPC to prevent a race where
-	 * another lookup updates the timestamp on the directory after
-	 * the lookup RPC has been performed on the server but before
-	 * n_dmtime is set at the end of this function.
-	 */
-	dmtime = np->n_vattr.va_mtime;
+	attrflag = dattrflag = 0;
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
@@ -974,7 +975,7 @@ nfs_lookup(struct vop_lookup_args *ap)
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
 	if (error) {
 		if (v3) {
-			nfsm_postop_attr(dvp, attrflag);
+			nfsm_postop_attr_va(dvp, dattrflag, &vattr);
 			m_freem(mrep);
 		}
 		goto nfsmout;
@@ -989,7 +990,7 @@ nfs_lookup(struct vop_lookup_args *ap)
 			m_freem(mrep);
 			return (EISDIR);
 		}
-		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
+		error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
 		if (error) {
 			m_freem(mrep);
 			return (error);
@@ -1007,17 +1008,45 @@ nfs_lookup(struct vop_lookup_args *ap)
 	}
 
 	if (flags & ISDOTDOT) {
+		ltype = VOP_ISLOCKED(dvp, td);
+		error = vfs_busy(mp, LK_NOWAIT, NULL, td);
+		if (error != 0) {
+			VOP_UNLOCK(dvp, 0, td);
+			error = vfs_busy(mp, 0, NULL, td);
+			vn_lock(dvp, ltype | LK_RETRY, td);
+			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
+				vfs_unbusy(mp, td);
+				error = ENOENT;
+			}
+			if (error != 0) {
+				m_freem(mrep);
+				return (error);
+			}
+		}
 		VOP_UNLOCK(dvp, 0, td);
-		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
-		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
-		if (error)
+		error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
+		if (error == 0)
+			newvp = NFSTOV(np);
+		vfs_unbusy(mp, td);
+		vn_lock(dvp, ltype | LK_RETRY, td);
+		if (dvp->v_iflag & VI_DOOMED) {
+			if (error == 0) {
+				if (newvp == dvp)
+					vrele(newvp);
+				else
+					vput(newvp);
+			}
+			error = ENOENT;
+		}
+		if (error) {
+			m_freem(mrep);
 			return (error);
-		newvp = NFSTOV(np);
+		}
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
-		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
+		error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
 		if (error) {
 			m_freem(mrep);
 			return (error);
@@ -1038,17 +1067,19 @@ nfs_lookup(struct vop_lookup_args *ap)
 		}
 	}
 	if (v3) {
-		nfsm_postop_attr(newvp, attrflag);
-		nfsm_postop_attr(dvp, attrflag);
-	} else
-		nfsm_loadattr(newvp, NULL);
+		nfsm_postop_attr_va(newvp, attrflag, &vattr);
+		nfsm_postop_attr_va(dvp, dattrflag, &dvattr);
+	} else {
+		nfsm_loadattr(newvp, &vattr);
+		attrflag = 1;
+	}
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
-	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
-		np->n_ctime = np->n_vattr.va_ctime;
-		cache_enter(dvp, newvp, cnp);
-	}
+	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
+	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
+		cache_enter_time(dvp, newvp, cnp, &vattr.va_ctime,
+		    newvp->v_type != VDIR ? NULL : &dvattr.va_ctime);
 	*vpp = newvp;
 	m_freem(mrep);
 nfsmout:
@@ -1069,36 +1100,28 @@ nfsmout:
 			 * VWRITE) here instead of just checking
 			 * MNT_RDONLY.
 			 */
-			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
+			if (mp->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 
-		if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
+		if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
+		    dattrflag) {
 			/*
-			 * Maintain n_dmtime as the modification time
-			 * of the parent directory when the oldest -ve
-			 * name cache entry for this directory was
-			 * added.  If a -ve cache entry has already
-			 * been added with a newer modification time
-			 * by a concurrent lookup, then don't bother
-			 * adding a cache entry.  The modification
-			 * time of the directory might have changed
-			 * due to the file this lookup failed to find
-			 * being created.  In that case a subsequent
-			 * lookup would incorrectly use the entry
-			 * added here instead of doing an extra
-			 * lookup.
+			 * Cache the modification time of the parent
+			 * directory from the post-op attributes in
+			 * the name cache entry.  The negative cache
+			 * entry will be ignored once the directory
+			 * has changed.  Don't bother adding the entry
+			 * if the directory has already changed.
 			 */
 			mtx_lock(&np->n_mtx);
-			if (timespeccmp(&np->n_dmtime, &dmtime, <=)) {
-				if (!timespecisset(&np->n_dmtime)) {
-					np->n_dmtime = dmtime;
-					np->n_dmtime_ticks = ticks;
-				}
+			if (timespeccmp(&np->n_vattr.va_mtime,
+			    &vattr.va_mtime, ==)) {
 				mtx_unlock(&np->n_mtx);
-				cache_enter(dvp, NULL, cnp);
+				cache_enter_time(dvp, NULL, cnp,
+				    &vattr.va_mtime, NULL);
 			} else
 				mtx_unlock(&np->n_mtx);
 		}
@@ -1449,8 +1472,6 @@ nfsmout:
 		if (newvp)
 			vput(newvp);
 	} else {
-		if (cnp->cn_flags & MAKEENTRY)
-			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	mtx_lock(&(VTONFS(dvp))->n_mtx);
@@ -1579,8 +1600,6 @@ nfsmout:
 			vput(newvp);
 	}
 	if (!error) {
-		if (cnp->cn_flags & MAKEENTRY)
-			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	mtx_lock(&(VTONFS(dvp))->n_mtx);
@@ -2363,10 +2382,11 @@ nfs_readdirplusrpc(struct vnode *vp, str
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
+	struct vattr vattr, dvattr;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
-	int attrflag, fhsize;
+	int attrflag, dattrflag, fhsize;
 
 #ifndef nolint
 	dp = NULL;
@@ -2413,7 +2433,7 @@ nfs_readdirplusrpc(struct vnode *vp, str
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
-		nfsm_postop_attr(vp, attrflag);
+		nfsm_postop_attr_va(vp, dattrflag, &dvattr);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
@@ -2544,18 +2564,16 @@ nfs_readdirplusrpc(struct vnode *vp, str
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
-				nfsm_loadattr(newvp, NULL);
+				nfsm_loadattr(newvp, &vattr);
 				dpos = dpossav2;
 				md = mdsav2;
-				dp->d_type =
-				    IFTODT(VTTOIF(np->n_vattr.va_type));
+				dp->d_type = IFTODT(VTTOIF(vattr.va_type));
 				ndp->ni_vp = newvp;
-				/*
-				 * Update n_ctime so subsequent lookup
-				 * doesn't purge entry.
-				 */
-				np->n_ctime = np->n_vattr.va_ctime;
-			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+				if (newvp->v_type != VDIR || dattrflag != 0)
+				    cache_enter_time(ndp->ni_dvp, ndp->ni_vp,
+					cnp, &vattr.va_ctime,
+					newvp->v_type != VDIR ? NULL :
+					&dvattr.va_ctime);
 			    }
 			} else {
 			    /* Just skip over the file handle */

Modified: stable/7/sys/nfsclient/nfsm_subs.h
==============================================================================
--- stable/7/sys/nfsclient/nfsm_subs.h	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/nfsclient/nfsm_subs.h	Mon Jul 30 19:05:41 2012	(r238913)
@@ -167,8 +167,8 @@ int	nfsm_getfh_xx(nfsfh_t **f, int *s, i
 	    caddr_t *dpos);
 int	nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
 	    caddr_t *dpos);
-int	nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
-	    caddr_t *dpos);
+int	nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va,
+	    struct mbuf **md, caddr_t *dpos);
 int	nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md,
 	    caddr_t *dpos);
 
@@ -196,7 +196,14 @@ do { \
 #define	nfsm_postop_attr(v, f) \
 do { \
 	int32_t t1; \
-	t1 = nfsm_postop_attr_xx(&v, &f, &md, &dpos); \
+	t1 = nfsm_postop_attr_xx(&v, &f, NULL, &md, &dpos);	\
+	nfsm_dcheck(t1, mrep); \
+} while (0)
+
+#define	nfsm_postop_attr_va(v, f, va)		\
+do { \
+	int32_t t1; \
+	t1 = nfsm_postop_attr_xx(&v, &f, va, &md, &dpos);	\
 	nfsm_dcheck(t1, mrep); \
 } while (0)
 

Modified: stable/7/sys/nfsclient/nfsnode.h
==============================================================================
--- stable/7/sys/nfsclient/nfsnode.h	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/nfsclient/nfsnode.h	Mon Jul 30 19:05:41 2012	(r238913)
@@ -108,7 +108,7 @@ struct nfsnode {
 	struct timespec		n_mtime;	/* Prev modify time. */
 	time_t			n_unused0;
 	time_t			n_unused1;
-	int			n_dmtime_ticks;	/* Tick of -ve cache entry */
+	int			n_unused3;
 	time_t			n_unused2;
 	nfsfh_t			*n_fhp;		/* NFS File Handle */
 	struct vnode		*n_vnode;	/* associated vnode */
@@ -137,8 +137,8 @@ struct nfsnode {
 	int			n_directio_opens;
 	int                     n_directio_asyncwr;
 	struct nfs_attrcache_timestamp n_unused;
-	struct timespec		n_ctime;	/* Prev create time. */
-	struct timespec		n_dmtime;	/* Prev dir modify time. */
+	struct timespec		n_ctime;	/* NFSv4 only */
+	struct timespec		n_unused5;
 };
 
 #define n_atim		n_un1.nf_atim

Modified: stable/7/sys/sys/vnode.h
==============================================================================
--- stable/7/sys/sys/vnode.h	Mon Jul 30 18:21:48 2012	(r238912)
+++ stable/7/sys/sys/vnode.h	Mon Jul 30 19:05:41 2012	(r238913)
@@ -560,10 +560,15 @@ struct vnode;
 extern int	(*lease_check_hook)(struct vop_lease_args *);
 
 /* cache_* may belong in namei.h. */
-void	cache_enter(struct vnode *dvp, struct vnode *vp,
-	    struct componentname *cnp);
-int	cache_lookup(struct vnode *dvp, struct vnode **vpp,
-	    struct componentname *cnp);
+#define	cache_enter(dvp, vp, cnp)					\
+	cache_enter_time(dvp, vp, cnp, NULL, NULL)
+void	cache_enter_time(struct vnode *dvp, struct vnode *vp,
+	    struct componentname *cnp, struct timespec *tsp,
+	    struct timespec *dtsp);
+#define	cache_lookup(dvp, vpp, cnp)					\
+	cache_lookup_times(dvp, vpp, cnp, NULL, NULL)
+int	cache_lookup_times(struct vnode *dvp, struct vnode **vpp,
+	    struct componentname *cnp, struct timespec *tsp, int *ticksp);
 void	cache_purge(struct vnode *vp);
 void	cache_purge_negative(struct vnode *vp);
 void	cache_purgevfs(struct mount *mp);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201207301905.q6UJ5flk027689>