Date: Mon, 2 Jul 2018 19:21:34 +0000 (UTC) From: Rick Macklem <rmacklem@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r335870 - in head/sys/fs: nfs nfsserver Message-ID: <201807021921.w62JLYcf006879@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: rmacklem Date: Mon Jul 2 19:21:33 2018 New Revision: 335870 URL: https://svnweb.freebsd.org/changeset/base/335870 Log: Add an optional feature to the pNFS server. Without this patch, the pNFS server distributes the data storage files across all of the specified DSs. A tester noted that it would be nice if a system administrator could control which DSs are used to store the file data for a given exported MDS file system. This patch adds the kernel support to do this. It also makes a slight semantic change to nfsv4_findmirror(), since some uses of it no longer require that the DS being searched for have a current mirror. A patch that will be committed in a few minutes will modify the nfsd daemon to support this feature. The patch should only affect sites using the pNFS server (specified via the "-p" command line option for nfsd. Suggested by: james.rose@framestore.com Modified: head/sys/fs/nfs/nfs.h head/sys/fs/nfs/nfs_commonsubs.c head/sys/fs/nfs/nfsrvstate.h head/sys/fs/nfsserver/nfs_nfsdport.c head/sys/fs/nfsserver/nfs_nfsdstate.c Modified: head/sys/fs/nfs/nfs.h ============================================================================== --- head/sys/fs/nfs/nfs.h Mon Jul 2 18:23:43 2018 (r335869) +++ head/sys/fs/nfs/nfs.h Mon Jul 2 19:21:33 2018 (r335870) @@ -185,6 +185,8 @@ struct nfsd_nfsd_args { int dnshostlen; /* Length of DNS names */ char *dspath; /* DS Mount path on MDS */ int dspathlen; /* Length of DS Mount path on MDS */ + char *mdspath; /* MDS mount for DS path on MDS */ + int mdspathlen; /* Length of MDS mount for DS path on MDS */ int mirrorcnt; /* Number of mirrors to create on DSs */ }; Modified: head/sys/fs/nfs/nfs_commonsubs.c ============================================================================== --- head/sys/fs/nfs/nfs_commonsubs.c Mon Jul 2 18:23:43 2018 (r335869) +++ head/sys/fs/nfs/nfs_commonsubs.c Mon Jul 2 19:21:33 2018 (r335870) @@ -4692,37 +4692,26 @@ nfsv4_freeslot(struct nfsclsession *sep, int slot) } /* - * Search for a matching pnfsd mirror device structure, base on the nmp arg. + * Search for a matching pnfsd DS, based on the nmp arg. * Return one if found, NULL otherwise. */ struct nfsdevice * nfsv4_findmirror(struct nfsmount *nmp) { - struct nfsdevice *ds, *fndds; - int fndmirror; + struct nfsdevice *ds; mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); /* * Search the DS server list for a match with nmp. - * Remove the DS entry if found and there is a mirror. */ - fndds = NULL; - fndmirror = 0; if (nfsrv_devidcnt == 0) - return (fndds); + return (NULL); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp == nmp) { - NFSCL_DEBUG(4, "fnd main ds\n"); - fndds = ds; - } else if (ds->nfsdev_nmp != NULL) - fndmirror = 1; - if (fndds != NULL && fndmirror != 0) + NFSCL_DEBUG(4, "nfsv4_findmirror: fnd main ds\n"); break; + } } - if (fndmirror == 0) { - NFSCL_DEBUG(4, "no mirror for DS\n"); - return (NULL); - } - return (fndds); + return (ds); } Modified: head/sys/fs/nfs/nfsrvstate.h ============================================================================== --- head/sys/fs/nfs/nfsrvstate.h Mon Jul 2 18:23:43 2018 (r335869) +++ head/sys/fs/nfs/nfsrvstate.h Mon Jul 2 19:21:33 2018 (r335870) @@ -345,9 +345,11 @@ struct nfsdevice { uint16_t nfsdev_hostnamelen; uint16_t nfsdev_fileaddrlen; uint16_t nfsdev_flexaddrlen; + uint16_t nfsdev_mdsisset; char *nfsdev_fileaddr; char *nfsdev_flexaddr; char *nfsdev_host; + fsid_t nfsdev_mdsfsid; uint32_t nfsdev_nextdir; vnode_t nfsdev_dsdir[0]; }; Modified: head/sys/fs/nfsserver/nfs_nfsdport.c ============================================================================== --- head/sys/fs/nfsserver/nfs_nfsdport.c Mon Jul 2 18:23:43 2018 (r335869) +++ head/sys/fs/nfsserver/nfs_nfsdport.c Mon Jul 2 19:21:33 2018 (r335870) @@ -3355,6 +3355,10 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; + nfsdarg.dspath = NULL; + nfsdarg.dspathlen = 0; + nfsdarg.mdspath = NULL; + nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } } else @@ -3364,14 +3368,15 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && + nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && nfsdarg.mirrorcnt >= 1 && nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && - nfsdarg.dspath != NULL) { + nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" - " mirrorcnt=%d\n", nfsdarg.addrlen, + " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, nfsdarg.dspathlen, nfsdarg.dnshostlen, - nfsdarg.mirrorcnt); + nfsdarg.mdspathlen, nfsdarg.mirrorcnt); cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); if (error != 0) { @@ -3399,6 +3404,17 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap } cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.dspath = cp; + cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); + error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); + if (error != 0) { + free(nfsdarg.addr, M_TEMP); + free(nfsdarg.dnshost, M_TEMP); + free(nfsdarg.dspath, M_TEMP); + free(cp, M_TEMP); + goto out; + } + cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ + nfsdarg.mdspath = cp; } else { nfsdarg.addr = NULL; nfsdarg.addrlen = 0; @@ -3406,12 +3422,15 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; + nfsdarg.mdspath = NULL; + nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } error = nfsrvd_nfsd(td, &nfsdarg); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); + free(nfsdarg.mdspath, M_TEMP); } else if (uap->flag & NFSSVC_PNFSDS) { error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); if (error == 0 && pnfsdarg.op == PNFSDOP_DELDSSERVER) { @@ -3846,9 +3865,12 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, /* Get a DS server directory in a round-robin order. */ mirrorcnt = 1; + mp = vp->v_mount; NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { - if (ds->nfsdev_nmp != NULL) + if (ds->nfsdev_nmp != NULL && (ds->nfsdev_mdsisset == 0 || + (mp->mnt_stat.f_fsid.val[0] == ds->nfsdev_mdsfsid.val[0] && + mp->mnt_stat.f_fsid.val[1] == ds->nfsdev_mdsfsid.val[1]))) break; } if (ds == NULL) { @@ -3862,7 +3884,12 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, mds = TAILQ_NEXT(ds, nfsdev_list); if (nfsrv_maxpnfsmirror > 1 && mds != NULL) { TAILQ_FOREACH_FROM(mds, &nfsrv_devidhead, nfsdev_list) { - if (mds->nfsdev_nmp != NULL) { + if (mds->nfsdev_nmp != NULL && + (mds->nfsdev_mdsisset == 0 || + (mp->mnt_stat.f_fsid.val[0] == + mds->nfsdev_mdsfsid.val[0] && + mp->mnt_stat.f_fsid.val[1] == + mds->nfsdev_mdsfsid.val[1]))) { dsdir[mirrorcnt] = i; dvp[mirrorcnt] = mds->nfsdev_dsdir[i]; mirrorcnt++; @@ -4464,6 +4491,7 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char struct nfsmount *curnmp, int *ippos, int *dsdirp) { struct vnode *dvp, *nvp, **tdvpp; + struct mount *mp; struct nfsmount *nmp, *newnmp; struct sockaddr *sad; struct sockaddr_in *sin; @@ -4485,6 +4513,7 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char newnmp = *newnmpp; else newnmp = NULL; + mp = vp->v_mount; error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", buflenp, buf, p); mirrorcnt = *buflenp / sizeof(*pf); @@ -4545,7 +4574,13 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char fndds = ds; else if (newnmpp != NULL && newnmp == NULL && - (*newnmpp == NULL || fndds == NULL)) + (*newnmpp == NULL || + fndds == NULL) && + (ds->nfsdev_mdsisset == 0 || + (ds->nfsdev_mdsfsid.val[0] == + mp->mnt_stat.f_fsid.val[0] && + ds->nfsdev_mdsfsid.val[1] == + mp->mnt_stat.f_fsid.val[1]))) /* * Return a destination for the * copy in newnmpp. Choose the Modified: head/sys/fs/nfsserver/nfs_nfsdstate.c ============================================================================== --- head/sys/fs/nfsserver/nfs_nfsdstate.c Mon Jul 2 18:23:43 2018 (r335869) +++ head/sys/fs/nfsserver/nfs_nfsdstate.c Mon Jul 2 19:21:33 2018 (r335870) @@ -210,7 +210,7 @@ static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t int iomode); static void nfsrv_freealllayouts(void); static void nfsrv_freedevid(struct nfsdevice *ds); -static int nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, +static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp); static int nfsrv_delds(char *devid, NFSPROC_T *p); static void nfsrv_deleteds(struct nfsdevice *fndds); @@ -232,6 +232,7 @@ static int nfsrv_dontlayout(fhandle_t *fhp); static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, vnode_t *tvpp); +static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp); /* * Scan the client list for a match and either return the current one, @@ -7369,10 +7370,12 @@ nfsrv_freealllayouts(void) * Look up the mount path for the DS server. */ static int -nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct nfsdevice **dsp) +nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, + struct nfsdevice **dsp) { struct nameidata nd; struct nfsdevice *ds; + struct mount *mp; int error, i; char *dsdirpath; size_t dsdirsize; @@ -7400,6 +7403,9 @@ nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct * Allocate a DS server structure with the NFS mounted directory * vnode reference counted, so that a non-forced dismount will * fail with EBUSY. + * This structure is always linked into the list, even if an error + * is being returned. The caller will free the entire list upon + * an error return. */ *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t), M_NFSDSTATE, M_WAITOK | M_ZERO); @@ -7435,6 +7441,36 @@ nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct } free(dsdirpath, M_TEMP); + if (strlen(mdspathp) > 0) { + /* + * This DS stores file for a specific MDS exported file + * system. + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, + UIO_SYSSPACE, mdspathp, p); + error = namei(&nd); + NFSD_DEBUG(4, "mds lookup=%d\n", error); + if (error != 0) + goto out; + if (nd.ni_vp->v_type != VDIR) { + vput(nd.ni_vp); + error = ENOTDIR; + NFSD_DEBUG(4, "mdspath not dir\n"); + goto out; + } + mp = nd.ni_vp->v_mount; + if ((mp->mnt_flag & MNT_EXPORTED) == 0) { + vput(nd.ni_vp); + error = ENXIO; + NFSD_DEBUG(4, "mdspath not an exported fs\n"); + goto out; + } + ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid; + ds->nfsdev_mdsisset = 1; + vput(nd.ni_vp); + } + +out: TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); atomic_add_int(&nfsrv_devidcnt, 1); return (error); @@ -7514,11 +7550,7 @@ nfsrv_deldsnmp(struct nfsmount *nmp, NFSPROC_T *p) NFSD_DEBUG(4, "deldsdvp\n"); NFSDDSLOCK(); - if (nfsrv_faildscnt <= 0) { - NFSDDSUNLOCK(); - return (NULL); - } - fndds = nfsv4_findmirror(nmp); + fndds = nfsrv_findmirroredds(nmp); if (fndds != NULL) nfsrv_deleteds(fndds); NFSDDSUNLOCK(); @@ -7551,21 +7583,35 @@ nfsrv_delds(char *devid, NFSPROC_T *p) nmp = NULL; fndmirror = 0; NFSDDSLOCK(); - if (nfsrv_faildscnt <= 0) { - NFSDDSUNLOCK(); - return (ENXIO); - } TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 && ds->nfsdev_nmp != NULL) { NFSD_DEBUG(4, "fnd main ds\n"); fndds = ds; - } else if (ds->nfsdev_nmp != NULL) - fndmirror = 1; - if (fndds != NULL && fndmirror != 0) break; + } } - if (fndds != NULL && fndmirror != 0) { + if (fndds == NULL) { + NFSDDSUNLOCK(); + return (ENXIO); + } + if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) + fndmirror = 1; + else { + /* For the fsid is set case, search for a mirror. */ + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds != fndds && ds->nfsdev_nmp != NULL && + ds->nfsdev_mdsisset != 0 && + ds->nfsdev_mdsfsid.val[0] == + fndds->nfsdev_mdsfsid.val[0] && + ds->nfsdev_mdsfsid.val[1] == + fndds->nfsdev_mdsfsid.val[1]) { + fndmirror = 1; + break; + } + } + } + if (fndmirror != 0) { nmp = fndds->nfsdev_nmp; NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | @@ -7579,7 +7625,7 @@ nfsrv_delds(char *devid, NFSPROC_T *p) } } NFSDDSUNLOCK(); - if (fndds != NULL && nmp != NULL) { + if (nmp != NULL) { nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); nfsrv_killrpcs(nmp); @@ -7601,7 +7647,8 @@ nfsrv_deleteds(struct nfsdevice *fndds) NFSD_DEBUG(4, "deleteds: deleting a mirror\n"); fndds->nfsdev_nmp = NULL; - nfsrv_faildscnt--; + if (fndds->nfsdev_mdsisset == 0) + nfsrv_faildscnt--; } /* @@ -7687,24 +7734,27 @@ int nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p) { struct nfsdevice *ds; - char *addrp, *dnshostp, *dspathp; + char *addrp, *dnshostp, *dspathp, *mdspathp; int error, i; addrp = args->addr; dnshostp = args->dnshost; dspathp = args->dspath; + mdspathp = args->mdspath; nfsrv_maxpnfsmirror = args->mirrorcnt; - if (addrp == NULL || dnshostp == NULL || dspathp == NULL) + if (addrp == NULL || dnshostp == NULL || dspathp == NULL || + mdspathp == NULL) return (0); /* * Loop around for each nul-terminated string in args->addr, - * args->dnshost and args->dnspath. + * args->dnshost, args->dnspath and args->mdspath. */ while (addrp < (args->addr + args->addrlen) && dnshostp < (args->dnshost + args->dnshostlen) && - dspathp < (args->dspath + args->dspathlen)) { - error = nfsrv_setdsserver(dspathp, p, &ds); + dspathp < (args->dspath + args->dspathlen) && + mdspathp < (args->mdspath + args->mdspathlen)) { + error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds); if (error != 0) { /* Free all DS servers. */ nfsrv_freealldevids(); @@ -7715,6 +7765,7 @@ nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPRO addrp += (strlen(addrp) + 1); dnshostp += (strlen(dnshostp) + 1); dspathp += (strlen(dspathp) + 1); + mdspathp += (strlen(mdspathp) + 1); } if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) { /* Free all DS servers. */ @@ -8299,9 +8350,15 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c } nmp = VFSTONFS(nd.ni_vp->v_mount); - /* Search the nfsdev list for a match. */ + /* + * Search the nfsdevice list for a match. If curnmp == NULL, + * this is a recovery and there must be a mirror. + */ NFSDDSLOCK(); - *dsp = nfsv4_findmirror(nmp); + if (curnmp == NULL) + *dsp = nfsrv_findmirroredds(nmp); + else + *dsp = nfsv4_findmirror(nmp); NFSDDSUNLOCK(); if (*dsp == NULL) { vput(nd.ni_vp); @@ -8331,7 +8388,7 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c if (error == 0 && nmp != NULL) { /* Search the nfsdev list for a match. */ NFSDDSLOCK(); - *dsp = nfsv4_findmirror(nmp); + *dsp = nfsrv_findmirroredds(nmp); NFSDDSUNLOCK(); } if (error == 0 && (nmp == NULL || *dsp == NULL)) { @@ -8374,5 +8431,56 @@ nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *c } else vput(vp); return (error); +} + +/* + * Search for a matching pnfsd mirror device structure, base on the nmp arg. + * Return one if found, NULL otherwise. + */ +static struct nfsdevice * +nfsrv_findmirroredds(struct nfsmount *nmp) +{ + struct nfsdevice *ds, *fndds; + int fndmirror; + + mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); + /* + * Search the DS server list for a match with nmp. + * Remove the DS entry if found and there is a mirror. + */ + fndds = NULL; + fndmirror = 0; + if (nfsrv_devidcnt == 0) + return (fndds); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds->nfsdev_nmp == nmp) { + NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n"); + fndds = ds; + break; + } + } + if (fndds == NULL) + return (fndds); + if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) + fndmirror = 1; + else { + /* For the fsid is set case, search for a mirror. */ + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds != fndds && ds->nfsdev_nmp != NULL && + ds->nfsdev_mdsisset != 0 && + ds->nfsdev_mdsfsid.val[0] == + fndds->nfsdev_mdsfsid.val[0] && + ds->nfsdev_mdsfsid.val[1] == + fndds->nfsdev_mdsfsid.val[1]) { + fndmirror = 1; + break; + } + } + } + if (fndmirror == 0) { + NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n"); + return (NULL); + } + return (fndds); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201807021921.w62JLYcf006879>