From owner-svn-src-head@FreeBSD.ORG Thu Dec 1 18:46:29 2011 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 41748106566C; Thu, 1 Dec 2011 18:46:29 +0000 (UTC) (envelope-from jhb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 3058A8FC13; Thu, 1 Dec 2011 18:46:29 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id pB1IkT83057398; Thu, 1 Dec 2011 18:46:29 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id pB1IkTEc057395; Thu, 1 Dec 2011 18:46:29 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <201112011846.pB1IkTEc057395@svn.freebsd.org> From: John Baldwin Date: Thu, 1 Dec 2011 18:46:29 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r228185 - in head/sys: fs/nfsserver nfsserver X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Dec 2011 18:46:29 -0000 Author: jhb Date: Thu Dec 1 18:46:28 2011 New Revision: 228185 URL: http://svn.freebsd.org/changeset/base/228185 Log: Enhance the sequential access heuristic used to perform readahead in the NFS server and reuse it for writes as well to allow writes to the backing store to be clustered. - Use a prime number for the size of the heuristic table (1017 is not prime). - Move the logic to locate a heuristic entry from the table and compute the sequential count out of VOP_READ() and into a separate routine. - Use the logic from sequential_heuristic() in vfs_vnops.c to update the seqcount when a sequential access is performed rather than just increasing seqcount by 1. This lets the clustering count ramp up faster. - Allow for some reordering of RPCs and if it is detected leave the current seqcount as-is rather than dropping back to a seqcount of 1. Also, when out of order access is encountered, cut seqcount in half rather than dropping it all the way back to 1 to further aid with reordering. - Fix the new NFS server to properly update the next offset after a successful VOP_READ() so that the readahead actually works. Some of these changes came from an earlier patch by Bjorn Gronwall that was forwarded to me by bde@. Discussed with: bde, rmacklem, fs@ Submitted by: Bjorn Gronwall (1, 4) MFC after: 2 weeks Modified: head/sys/fs/nfsserver/nfs_nfsdport.c head/sys/nfsserver/nfs_serv.c Modified: head/sys/fs/nfsserver/nfs_nfsdport.c ============================================================================== --- head/sys/fs/nfsserver/nfs_nfsdport.c Thu Dec 1 15:33:58 2011 (r228184) +++ head/sys/fs/nfsserver/nfs_nfsdport.c Thu Dec 1 18:46:28 2011 (r228185) @@ -90,20 +90,78 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_de SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); -#define NUM_HEURISTIC 1017 +#define MAX_REORDERED_RPC 16 +#define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ - off_t nh_nextr; /* next offset for sequential detection */ + off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* + * Heuristic to detect sequential operation. + */ +static struct nfsheur * +nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) +{ + struct nfsheur *nh; + int hi, try; + + /* Locate best candidate. */ + try = 32; + hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; + nh = &nfsheur[hi]; + while (try--) { + if (nfsheur[hi].nh_vp == vp) { + nh = &nfsheur[hi]; + break; + } + if (nfsheur[hi].nh_use > 0) + --nfsheur[hi].nh_use; + hi = (hi + 1) % NUM_HEURISTIC; + if (nfsheur[hi].nh_use < nh->nh_use) + nh = &nfsheur[hi]; + } + + /* Initialize hint if this is a new file. */ + if (nh->nh_vp != vp) { + nh->nh_vp = vp; + nh->nh_nextoff = uio->uio_offset; + nh->nh_use = NHUSE_INIT; + if (uio->uio_offset == 0) + nh->nh_seqcount = 4; + else + nh->nh_seqcount = 1; + } + + /* Calculate heuristic. */ + if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || + uio->uio_offset == nh->nh_nextoff) { + /* See comments in vfs_vnops.c:sequential_heuristic(). */ + nh->nh_seqcount += howmany(uio->uio_resid, 16384); + if (nh->nh_seqcount > IO_SEQMAX) + nh->nh_seqcount = IO_SEQMAX; + } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * + imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { + /* Probably a reordered RPC, leave seqcount alone. */ + } else if (nh->nh_seqcount > 1) { + nh->nh_seqcount /= 2; + } else { + nh->nh_seqcount = 0; + } + nh->nh_use += NHUSE_INC; + if (nh->nh_use > NHUSE_MAX) + nh->nh_use = NHUSE_MAX; + return (nh); +} + +/* * Get attributes into nfsvattr structure. */ int @@ -567,60 +625,11 @@ nfsvno_read(struct vnode *vp, off_t off, int i; struct iovec *iv; struct iovec *iv2; - int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32; + int error = 0, len, left, siz, tlen, ioflag = 0; struct mbuf *m2 = NULL, *m3; struct uio io, *uiop = &io; struct nfsheur *nh; - /* - * Calculate seqcount for heuristic - */ - /* - * Locate best candidate - */ - - hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; - nh = &nfsheur[hi]; - - while (try--) { - if (nfsheur[hi].nh_vp == vp) { - nh = &nfsheur[hi]; - break; - } - if (nfsheur[hi].nh_use > 0) - --nfsheur[hi].nh_use; - hi = (hi + 1) % NUM_HEURISTIC; - if (nfsheur[hi].nh_use < nh->nh_use) - nh = &nfsheur[hi]; - } - - if (nh->nh_vp != vp) { - nh->nh_vp = vp; - nh->nh_nextr = off; - nh->nh_use = NHUSE_INIT; - if (off == 0) - nh->nh_seqcount = 4; - else - nh->nh_seqcount = 1; - } - - /* - * Calculate heuristic - */ - - if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) { - if (++nh->nh_seqcount > IO_SEQMAX) - nh->nh_seqcount = IO_SEQMAX; - } else if (nh->nh_seqcount > 1) { - nh->nh_seqcount = 1; - } else { - nh->nh_seqcount = 0; - } - nh->nh_use += NHUSE_INC; - if (nh->nh_use > NHUSE_MAX) - nh->nh_use = NHUSE_MAX; - ioflag |= nh->nh_seqcount << IO_SEQSHIFT; - len = left = NFSM_RNDUP(cnt); m3 = NULL; /* @@ -665,6 +674,8 @@ nfsvno_read(struct vnode *vp, off_t off, uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; + nh = nfsrv_sequential_heuristic(uiop, vp); + ioflag |= nh->nh_seqcount << IO_SEQSHIFT; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); FREE((caddr_t)iv2, M_TEMP); if (error) { @@ -672,6 +683,7 @@ nfsvno_read(struct vnode *vp, off_t off, *mpp = NULL; goto out; } + nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); @@ -700,6 +712,7 @@ nfsvno_write(struct vnode *vp, off_t off struct iovec *iv; int ioflags, error; struct uio io, *uiop = &io; + struct nfsheur *nh; MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); @@ -733,7 +746,11 @@ nfsvno_write(struct vnode *vp, off_t off uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; + nh = nfsrv_sequential_heuristic(uiop, vp); + ioflags |= nh->nh_seqcount << IO_SEQSHIFT; error = VOP_WRITE(vp, uiop, ioflags, cred); + if (error == 0) + nh->nh_nextoff = uiop->uio_offset; FREE((caddr_t)iv, M_TEMP); NFSEXITCODE(error); Modified: head/sys/nfsserver/nfs_serv.c ============================================================================== --- head/sys/nfsserver/nfs_serv.c Thu Dec 1 15:33:58 2011 (r228184) +++ head/sys/nfsserver/nfs_serv.c Thu Dec 1 18:46:28 2011 (r228185) @@ -107,14 +107,15 @@ FEATURE(nfsserver, "NFS server"); #define MAX_COMMIT_COUNT (1024 * 1024) -#define NUM_HEURISTIC 1017 +#define MAX_REORDERED_RPC 16 +#define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ - off_t nh_nextr; /* next offset for sequential detection */ + off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; @@ -187,6 +188,63 @@ nfsrv_lockedpair_nd(int vfs1, struct nam } /* + * Heuristic to detect sequential operation. + */ +static struct nfsheur * +nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) +{ + struct nfsheur *nh; + int hi, try; + + /* Locate best candidate. */ + try = 32; + hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; + nh = &nfsheur[hi]; + while (try--) { + if (nfsheur[hi].nh_vp == vp) { + nh = &nfsheur[hi]; + break; + } + if (nfsheur[hi].nh_use > 0) + --nfsheur[hi].nh_use; + hi = (hi + 1) % NUM_HEURISTIC; + if (nfsheur[hi].nh_use < nh->nh_use) + nh = &nfsheur[hi]; + } + + /* Initialize hint if this is a new file. */ + if (nh->nh_vp != vp) { + nh->nh_vp = vp; + nh->nh_nextoff = uio->uio_offset; + nh->nh_use = NHUSE_INIT; + if (uio->uio_offset == 0) + nh->nh_seqcount = 4; + else + nh->nh_seqcount = 1; + } + + /* Calculate heuristic. */ + if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || + uio->uio_offset == nh->nh_nextoff) { + /* See comments in vfs_vnops.c:sequential_heuristic(). */ + nh->nh_seqcount += howmany(uio->uio_resid, 16384); + if (nh->nh_seqcount > IO_SEQMAX) + nh->nh_seqcount = IO_SEQMAX; + } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * + imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { + /* Probably a reordered RPC, leave seqcount alone. */ + } else if (nh->nh_seqcount > 1) { + nh->nh_seqcount /= 2; + } else { + nh->nh_seqcount = 0; + } + nh->nh_use += NHUSE_INC; + if (nh->nh_use > NHUSE_MAX) + nh->nh_use = NHUSE_MAX; + return (nh); +} + +/* * nfs v3 access service */ int @@ -843,7 +901,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, /* * Calculate byte count to read */ - if (off >= vap->va_size) cnt = 0; else if ((off + reqlen) > vap->va_size) @@ -851,61 +908,6 @@ nfsrv_read(struct nfsrv_descript *nfsd, else cnt = reqlen; - /* - * Calculate seqcount for heuristic - */ - - { - int hi; - int try = 32; - - /* - * Locate best candidate - */ - - hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; - nh = &nfsheur[hi]; - - while (try--) { - if (nfsheur[hi].nh_vp == vp) { - nh = &nfsheur[hi]; - break; - } - if (nfsheur[hi].nh_use > 0) - --nfsheur[hi].nh_use; - hi = (hi + 1) % NUM_HEURISTIC; - if (nfsheur[hi].nh_use < nh->nh_use) - nh = &nfsheur[hi]; - } - - if (nh->nh_vp != vp) { - nh->nh_vp = vp; - nh->nh_nextr = off; - nh->nh_use = NHUSE_INIT; - if (off == 0) - nh->nh_seqcount = 4; - else - nh->nh_seqcount = 1; - } - - /* - * Calculate heuristic - */ - - if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) { - if (++nh->nh_seqcount > IO_SEQMAX) - nh->nh_seqcount = IO_SEQMAX; - } else if (nh->nh_seqcount > 1) { - nh->nh_seqcount = 1; - } else { - nh->nh_seqcount = 0; - } - nh->nh_use += NHUSE_INC; - if (nh->nh_use > NHUSE_MAX) - nh->nh_use = NHUSE_MAX; - ioflag |= nh->nh_seqcount << IO_SEQSHIFT; - } - nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); if (v3) { tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); @@ -963,9 +965,11 @@ nfsrv_read(struct nfsrv_descript *nfsd, uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; + nh = nfsrv_sequential_heuristic(uiop, vp); + ioflag |= nh->nh_seqcount << IO_SEQSHIFT; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); - off = uiop->uio_offset; - nh->nh_nextr = off; + if (error == 0) + nh->nh_nextoff = uiop->uio_offset; free((caddr_t)iv2, M_TEMP); if (error || (getret = VOP_GETATTR(vp, vap, cred))) { if (!error) @@ -1030,6 +1034,7 @@ nfsrv_write(struct nfsrv_descript *nfsd, int v3 = (nfsd->nd_flag & ND_NFSV3); struct mbuf *mb, *mreq; struct vnode *vp = NULL; + struct nfsheur *nh; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; @@ -1170,7 +1175,11 @@ nfsrv_write(struct nfsrv_descript *nfsd, uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; uiop->uio_offset = off; + nh = nfsrv_sequential_heuristic(uiop, vp); + ioflags |= nh->nh_seqcount << IO_SEQSHIFT; error = VOP_WRITE(vp, uiop, ioflags, cred); + if (error == 0) + nh->nh_nextoff = uiop->uio_offset; /* Unlocked write. */ nfsrvstats.srvvop_writes++; free((caddr_t)iv, M_TEMP);