Date: Mon, 3 Jul 2017 19:46:12 +0000 (UTC) From: Rick Macklem <rmacklem@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r320615 - in stable/11/sys: fs/nfs fs/nfsclient kern sys Message-ID: <201707031946.v63JkCUS072818@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: rmacklem Date: Mon Jul 3 19:46:12 2017 New Revision: 320615 URL: https://svnweb.freebsd.org/changeset/base/320615 Log: MFC: r319882, r320062, r320070, r320126 Make MAXBCACHEBUF a tunable called vfs.maxbcachebuf. By making MAXBCACHEBUF a tunable, it can be increased to allow for larger read/write data sizes for the NFS client. The tunable is limited to MAXPHYS, which is currently 128K. Making MAXPHYS a tunable or increasing its value is being discussed, since it would be nice to support a read/write data size of 1Mbyte for the NFS client when mounting the AmazonEFS file service. Also, define NFS_MAXXDR as the upper bound on XDR overhead in an NFS RPC. Modified: stable/11/sys/fs/nfs/nfs_commonkrpc.c stable/11/sys/fs/nfs/nfsport.h stable/11/sys/fs/nfs/nfsproto.h stable/11/sys/fs/nfsclient/nfs_clrpcops.c stable/11/sys/kern/vfs_bio.c stable/11/sys/sys/buf.h stable/11/sys/sys/param.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/fs/nfs/nfs_commonkrpc.c ============================================================================== --- stable/11/sys/fs/nfs/nfs_commonkrpc.c Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/fs/nfs/nfs_commonkrpc.c Mon Jul 3 19:46:12 2017 (r320615) @@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; - int pktscale; + int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; @@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq pktscale = 2; if (pktscale > 64) pktscale = 64; + pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. @@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq goto out; } do { - if (error != 0 && pktscale > 2) + if (error != 0 && pktscale > 2) { + if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale == pktscalesav) + printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; + } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * @@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { - sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; - rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); + if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale <= 2) + printf("Must increase kern.ipc.maxsockbuf or reduce" + " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { Modified: stable/11/sys/fs/nfs/nfsport.h ============================================================================== --- stable/11/sys/fs/nfs/nfsport.h Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/fs/nfs/nfsport.h Mon Jul 3 19:46:12 2017 (r320615) @@ -1022,7 +1022,7 @@ struct nfsreq { }; #ifndef NFS_MAXBSIZE -#define NFS_MAXBSIZE MAXBCACHEBUF +#define NFS_MAXBSIZE (maxbcachebuf) #endif /* Modified: stable/11/sys/fs/nfs/nfsproto.h ============================================================================== --- stable/11/sys/fs/nfs/nfsproto.h Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/fs/nfs/nfsproto.h Mon Jul 3 19:46:12 2017 (r320615) @@ -56,8 +56,22 @@ #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 +/* + * Calculating the maximum XDR overhead for an NFS RPC isn't easy. + * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP. + * NFS_MAXXDR should be sufficient for all NFS versions over TCP. + * It includes: + * - Maximum RPC message header. It can include 2 400byte authenticators plus + * a machine name of unlimited length, although it is usually relatively + * small. + * - XDR overheads for the NFSv4 compound. This can include Owner and + * Owner_group strings, which are usually fairly small, but are allowed + * to be up to 1024 bytes each. + * 4096 is overkill, but should always be sufficient. + */ #define NFS_MAXPKTHDR 404 -#define NFS_MAXPACKET (NFS_SRVMAXIO + 2048) +#define NFS_MAXXDR 4096 +#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ Modified: stable/11/sys/fs/nfsclient/nfs_clrpcops.c ============================================================================== --- stable/11/sys/fs/nfsclient/nfs_clrpcops.c Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/fs/nfsclient/nfs_clrpcops.c Mon Jul 3 19:46:12 2017 (r320615) @@ -4615,7 +4615,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { - uint32_t crflags, *tl; + uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; @@ -4633,8 +4633,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ - *tl++ = txdr_unsigned(100000); /* Max request size */ - *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ + *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ @@ -4681,7 +4681,26 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); - tl += 3; /* Skip the other counts. */ + tl++; /* Skip the header pad size. */ + + /* Make sure nm_wsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_wsize + NFS_MAXXDR) { + if (nmp->nm_wsize > 8096) + nmp->nm_wsize /= 2; + else + break; + } + + /* Make sure nm_rsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_rsize + NFS_MAXXDR) { + if (nmp->nm_rsize > 8096) + nmp->nm_rsize /= 2; + else + break; + } + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); Modified: stable/11/sys/kern/vfs_bio.c ============================================================================== --- stable/11/sys/kern/vfs_bio.c Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/kern/vfs_bio.c Mon Jul 3 19:46:12 2017 (r320615) @@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int); static void bufkva_free(struct buf *); static int buf_import(void *, void **, int, int); static void buf_release(void *, void **, int); +static void maxbcachebuf_adjust(void); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) @@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, &unmapped_buf_allowed, 0, "Permit the use of the unmapped i/o"); +int maxbcachebuf = MAXBCACHEBUF; +SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0, + "Maximum size of a buffer cache block"); /* * This lock synchronizes access to bd_request. @@ -856,6 +860,29 @@ bd_wakeup(void) } /* + * Adjust the maxbcachbuf tunable. + */ +static void +maxbcachebuf_adjust(void) +{ + int i; + + /* + * maxbcachebuf must be a power of 2 >= MAXBSIZE. + */ + i = 2; + while (i * 2 <= maxbcachebuf) + i *= 2; + maxbcachebuf = i; + if (maxbcachebuf < MAXBSIZE) + maxbcachebuf = MAXBSIZE; + if (maxbcachebuf > MAXPHYS) + maxbcachebuf = MAXPHYS; + if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF) + printf("maxbcachebuf=%d\n", maxbcachebuf); +} + +/* * bd_speedup - speedup the buffer cache flushing code */ void @@ -902,6 +929,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) */ physmem_est = physmem_est * (PAGE_SIZE / 1024); + maxbcachebuf_adjust(); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to @@ -1012,7 +1040,9 @@ bufinit(void) struct buf *bp; int i; - CTASSERT(MAXBCACHEBUF >= MAXBSIZE); + KASSERT(maxbcachebuf >= MAXBSIZE, + ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf, + MAXBSIZE)); mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) @@ -1059,7 +1089,7 @@ bufinit(void) * PAGE_SIZE. */ maxbufspace = (long)nbuf * BKVASIZE; - hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); + hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10); lobufspace = (hibufspace / 20) * 19; /* 95% */ bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; @@ -1071,9 +1101,9 @@ bufinit(void) * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ - hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), + hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf), 16 * 1024 * 1024), 1024 * 1024); - lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); + lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf); /* * Limit the amount of malloc memory since it is wired permanently into @@ -3498,9 +3528,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); - if (size > MAXBCACHEBUF) - panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, - MAXBCACHEBUF); + if (size > maxbcachebuf) + panic("getblk: size(%d) > maxbcachebuf(%d)\n", size, + maxbcachebuf); if (!unmapped_buf_allowed) flags &= ~(GB_UNMAPPED | GB_KVAALLOC); Modified: stable/11/sys/sys/buf.h ============================================================================== --- stable/11/sys/sys/buf.h Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/sys/buf.h Mon Jul 3 19:46:12 2017 (r320615) @@ -459,6 +459,7 @@ buf_countdeps(struct buf *bp, int i) extern int nbuf; /* The number of buffer headers */ extern long maxswzone; /* Max KVA for swap structures */ extern long maxbcache; /* Max KVA for buffer cache */ +extern int maxbcachebuf; /* Max buffer cache block size */ extern long runningbufspace; extern long hibufspace; extern int dirtybufthresh; Modified: stable/11/sys/sys/param.h ============================================================================== --- stable/11/sys/sys/param.h Mon Jul 3 19:39:58 2017 (r320614) +++ stable/11/sys/sys/param.h Mon Jul 3 19:46:12 2017 (r320615) @@ -244,9 +244,7 @@ * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture - * basis by defining it in <machine/param.h>. This should - * probably be done to increase its value, when MAXBCACHEBUF is - * defined as a larger value in <machine/param.h>. + * basis by defining it in <machine/param.h>. * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201707031946.v63JkCUS072818>