Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 1 Dec 2011 18:46:29 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r228185 - in head/sys: fs/nfsserver nfsserver
Message-ID:  <201112011846.pB1IkTEc057395@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Dec  1 18:46:28 2011
New Revision: 228185
URL: http://svn.freebsd.org/changeset/base/228185

Log:
  Enhance the sequential access heuristic used to perform readahead in the
  NFS server and reuse it for writes as well to allow writes to the backing
  store to be clustered.
  - Use a prime number for the size of the heuristic table (1017 is not
    prime).
  - Move the logic to locate a heuristic entry from the table and compute
    the sequential count out of VOP_READ() and into a separate routine.
  - Use the logic from sequential_heuristic() in vfs_vnops.c to update the
    seqcount when a sequential access is performed rather than just
    increasing seqcount by 1.  This lets the clustering count ramp up
    faster.
  - Allow for some reordering of RPCs and if it is detected leave the current
    seqcount as-is rather than dropping back to a seqcount of 1.  Also,
    when out of order access is encountered, cut seqcount in half rather than
    dropping it all the way back to 1 to further aid with reordering.
  - Fix the new NFS server to properly update the next offset after a
    successful VOP_READ() so that the readahead actually works.
  
  Some of these changes came from an earlier patch by Bjorn Gronwall that was
  forwarded to me by bde@.
  
  Discussed with:	bde, rmacklem, fs@
  Submitted by:	Bjorn Gronwall (1, 4)
  MFC after:	2 weeks

Modified:
  head/sys/fs/nfsserver/nfs_nfsdport.c
  head/sys/nfsserver/nfs_serv.c

Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c	Thu Dec  1 15:33:58 2011	(r228184)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c	Thu Dec  1 18:46:28 2011	(r228185)
@@ -90,20 +90,78 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_de
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 
-#define	NUM_HEURISTIC		1017
+#define	MAX_REORDERED_RPC	16
+#define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
 #define	NHUSE_INC		16
 #define	NHUSE_MAX		2048
 
 static struct nfsheur {
 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
-	off_t nh_nextr;		/* next offset for sequential detection */
+	off_t nh_nextoff;	/* next offset for sequential detection */
 	int nh_use;		/* use count for selection */
 	int nh_seqcount;	/* heuristic */
 } nfsheur[NUM_HEURISTIC];
 
 
 /*
+ * Heuristic to detect sequential operation.
+ */
+static struct nfsheur *
+nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
+{
+	struct nfsheur *nh;
+	int hi, try;
+
+	/* Locate best candidate. */
+	try = 32;
+	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
+	nh = &nfsheur[hi];
+	while (try--) {
+		if (nfsheur[hi].nh_vp == vp) {
+			nh = &nfsheur[hi];
+			break;
+		}
+		if (nfsheur[hi].nh_use > 0)
+			--nfsheur[hi].nh_use;
+		hi = (hi + 1) % NUM_HEURISTIC;
+		if (nfsheur[hi].nh_use < nh->nh_use)
+			nh = &nfsheur[hi];
+	}
+
+	/* Initialize hint if this is a new file. */
+	if (nh->nh_vp != vp) {
+		nh->nh_vp = vp;
+		nh->nh_nextoff = uio->uio_offset;
+		nh->nh_use = NHUSE_INIT;
+		if (uio->uio_offset == 0)
+			nh->nh_seqcount = 4;
+		else
+			nh->nh_seqcount = 1;
+	}
+
+	/* Calculate heuristic. */
+	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
+	    uio->uio_offset == nh->nh_nextoff) {
+		/* See comments in vfs_vnops.c:sequential_heuristic(). */
+		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
+		if (nh->nh_seqcount > IO_SEQMAX)
+			nh->nh_seqcount = IO_SEQMAX;
+	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
+	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
+		/* Probably a reordered RPC, leave seqcount alone. */
+	} else if (nh->nh_seqcount > 1) {
+		nh->nh_seqcount /= 2;
+	} else {
+		nh->nh_seqcount = 0;
+	}
+	nh->nh_use += NHUSE_INC;
+	if (nh->nh_use > NHUSE_MAX)
+		nh->nh_use = NHUSE_MAX;
+	return (nh);
+}
+
+/*
  * Get attributes into nfsvattr structure.
  */
 int
@@ -567,60 +625,11 @@ nfsvno_read(struct vnode *vp, off_t off,
 	int i;
 	struct iovec *iv;
 	struct iovec *iv2;
-	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
+	int error = 0, len, left, siz, tlen, ioflag = 0;
 	struct mbuf *m2 = NULL, *m3;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
-	/*
-	 * Calculate seqcount for heuristic
-	 */
-	/*
-	 * Locate best candidate
-	 */
-
-	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
-	nh = &nfsheur[hi];
-
-	while (try--) {
-		if (nfsheur[hi].nh_vp == vp) {
-			nh = &nfsheur[hi];
-			break;
-		}
-		if (nfsheur[hi].nh_use > 0)
-			--nfsheur[hi].nh_use;
-		hi = (hi + 1) % NUM_HEURISTIC;
-		if (nfsheur[hi].nh_use < nh->nh_use)
-			nh = &nfsheur[hi];
-	}
-
-	if (nh->nh_vp != vp) {
-		nh->nh_vp = vp;
-		nh->nh_nextr = off;
-		nh->nh_use = NHUSE_INIT;
-		if (off == 0)
-			nh->nh_seqcount = 4;
-		else
-			nh->nh_seqcount = 1;
-	}
-
-	/*
-	 * Calculate heuristic
-	 */
-
-	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
-		if (++nh->nh_seqcount > IO_SEQMAX)
-			nh->nh_seqcount = IO_SEQMAX;
-	} else if (nh->nh_seqcount > 1) {
-		nh->nh_seqcount = 1;
-	} else {
-		nh->nh_seqcount = 0;
-	}
-	nh->nh_use += NHUSE_INC;
-	if (nh->nh_use > NHUSE_MAX)
-		nh->nh_use = NHUSE_MAX;
-	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
-
 	len = left = NFSM_RNDUP(cnt);
 	m3 = NULL;
 	/*
@@ -665,6 +674,8 @@ nfsvno_read(struct vnode *vp, off_t off,
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
+	nh = nfsrv_sequential_heuristic(uiop, vp);
+	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
 	FREE((caddr_t)iv2, M_TEMP);
 	if (error) {
@@ -672,6 +683,7 @@ nfsvno_read(struct vnode *vp, off_t off,
 		*mpp = NULL;
 		goto out;
 	}
+	nh->nh_nextoff = uiop->uio_offset;
 	tlen = len - uiop->uio_resid;
 	cnt = cnt < tlen ? cnt : tlen;
 	tlen = NFSM_RNDUP(cnt);
@@ -700,6 +712,7 @@ nfsvno_write(struct vnode *vp, off_t off
 	struct iovec *iv;
 	int ioflags, error;
 	struct uio io, *uiop = &io;
+	struct nfsheur *nh;
 
 	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
 	    M_WAITOK);
@@ -733,7 +746,11 @@ nfsvno_write(struct vnode *vp, off_t off
 	uiop->uio_segflg = UIO_SYSSPACE;
 	NFSUIOPROC(uiop, p);
 	uiop->uio_offset = off;
+	nh = nfsrv_sequential_heuristic(uiop, vp);
+	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 	error = VOP_WRITE(vp, uiop, ioflags, cred);
+	if (error == 0)
+		nh->nh_nextoff = uiop->uio_offset;
 	FREE((caddr_t)iv, M_TEMP);
 
 	NFSEXITCODE(error);

Modified: head/sys/nfsserver/nfs_serv.c
==============================================================================
--- head/sys/nfsserver/nfs_serv.c	Thu Dec  1 15:33:58 2011	(r228184)
+++ head/sys/nfsserver/nfs_serv.c	Thu Dec  1 18:46:28 2011	(r228185)
@@ -107,14 +107,15 @@ FEATURE(nfsserver, "NFS server");
 
 #define MAX_COMMIT_COUNT	(1024 * 1024)
 
-#define NUM_HEURISTIC		1017
+#define	MAX_REORDERED_RPC	16
+#define NUM_HEURISTIC		1031
 #define NHUSE_INIT		64
 #define NHUSE_INC		16
 #define NHUSE_MAX		2048
 
 static struct nfsheur {
 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
-	off_t nh_nextr;		/* next offset for sequential detection */
+	off_t nh_nextoff;	/* next offset for sequential detection */
 	int nh_use;		/* use count for selection */
 	int nh_seqcount;	/* heuristic */
 } nfsheur[NUM_HEURISTIC];
@@ -187,6 +188,63 @@ nfsrv_lockedpair_nd(int vfs1, struct nam
 }
 
 /*
+ * Heuristic to detect sequential operation.
+ */
+static struct nfsheur *
+nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
+{
+	struct nfsheur *nh;
+	int hi, try;
+
+	/* Locate best candidate. */
+	try = 32;
+	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
+	nh = &nfsheur[hi];
+	while (try--) {
+		if (nfsheur[hi].nh_vp == vp) {
+			nh = &nfsheur[hi];
+			break;
+		}
+		if (nfsheur[hi].nh_use > 0)
+			--nfsheur[hi].nh_use;
+		hi = (hi + 1) % NUM_HEURISTIC;
+		if (nfsheur[hi].nh_use < nh->nh_use)
+			nh = &nfsheur[hi];
+	}
+
+	/* Initialize hint if this is a new file. */
+	if (nh->nh_vp != vp) {
+		nh->nh_vp = vp;
+		nh->nh_nextoff = uio->uio_offset;
+		nh->nh_use = NHUSE_INIT;
+		if (uio->uio_offset == 0)
+			nh->nh_seqcount = 4;
+		else
+			nh->nh_seqcount = 1;
+	}
+
+	/* Calculate heuristic. */
+	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
+	    uio->uio_offset == nh->nh_nextoff) {
+		/* See comments in vfs_vnops.c:sequential_heuristic(). */
+		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
+		if (nh->nh_seqcount > IO_SEQMAX)
+			nh->nh_seqcount = IO_SEQMAX;
+	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
+	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
+		/* Probably a reordered RPC, leave seqcount alone. */
+	} else if (nh->nh_seqcount > 1) {
+		nh->nh_seqcount /= 2;
+	} else {
+		nh->nh_seqcount = 0;
+	}
+	nh->nh_use += NHUSE_INC;
+	if (nh->nh_use > NHUSE_MAX)
+		nh->nh_use = NHUSE_MAX;
+	return (nh);
+}
+
+/*
  * nfs v3 access service
  */
 int
@@ -843,7 +901,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
 	/*
 	 * Calculate byte count to read
 	 */
-
 	if (off >= vap->va_size)
 		cnt = 0;
 	else if ((off + reqlen) > vap->va_size)
@@ -851,61 +908,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
 	else
 		cnt = reqlen;
 
-	/*
-	 * Calculate seqcount for heuristic
-	 */
-
-	{
-		int hi;
-		int try = 32;
-
-		/*
-		 * Locate best candidate
-		 */
-
-		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
-		nh = &nfsheur[hi];
-
-		while (try--) {
-			if (nfsheur[hi].nh_vp == vp) {
-				nh = &nfsheur[hi];
-				break;
-			}
-			if (nfsheur[hi].nh_use > 0)
-				--nfsheur[hi].nh_use;
-			hi = (hi + 1) % NUM_HEURISTIC;
-			if (nfsheur[hi].nh_use < nh->nh_use)
-				nh = &nfsheur[hi];
-		}
-
-		if (nh->nh_vp != vp) {
-			nh->nh_vp = vp;
-			nh->nh_nextr = off;
-			nh->nh_use = NHUSE_INIT;
-			if (off == 0)
-				nh->nh_seqcount = 4;
-			else
-				nh->nh_seqcount = 1;
-		}
-
-		/*
-		 * Calculate heuristic
-		 */
-
-		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
-			if (++nh->nh_seqcount > IO_SEQMAX)
-				nh->nh_seqcount = IO_SEQMAX;
-		} else if (nh->nh_seqcount > 1) {
-			nh->nh_seqcount = 1;
-		} else {
-			nh->nh_seqcount = 0;
-		}
-		nh->nh_use += NHUSE_INC;
-		if (nh->nh_use > NHUSE_MAX)
-			nh->nh_use = NHUSE_MAX;
-		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
-        }
-
 	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
 	if (v3) {
 		tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
@@ -963,9 +965,11 @@ nfsrv_read(struct nfsrv_descript *nfsd, 
 		uiop->uio_resid = len;
 		uiop->uio_rw = UIO_READ;
 		uiop->uio_segflg = UIO_SYSSPACE;
+		nh = nfsrv_sequential_heuristic(uiop, vp);
+		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
-		off = uiop->uio_offset;
-		nh->nh_nextr = off;
+		if (error == 0)
+			nh->nh_nextoff = uiop->uio_offset;
 		free((caddr_t)iv2, M_TEMP);
 		if (error || (getret = VOP_GETATTR(vp, vap, cred))) {
 			if (!error)
@@ -1030,6 +1034,7 @@ nfsrv_write(struct nfsrv_descript *nfsd,
 	int v3 = (nfsd->nd_flag & ND_NFSV3);
 	struct mbuf *mb, *mreq;
 	struct vnode *vp = NULL;
+	struct nfsheur *nh;
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	struct uio io, *uiop = &io;
@@ -1170,7 +1175,11 @@ nfsrv_write(struct nfsrv_descript *nfsd,
 	    uiop->uio_segflg = UIO_SYSSPACE;
 	    uiop->uio_td = NULL;
 	    uiop->uio_offset = off;
+	    nh = nfsrv_sequential_heuristic(uiop, vp);
+	    ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
+	    if (error == 0)
+		    nh->nh_nextoff = uiop->uio_offset;
 	    /* Unlocked write. */
 	    nfsrvstats.srvvop_writes++;
 	    free((caddr_t)iv, M_TEMP);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201112011846.pB1IkTEc057395>