Date: Mon, 14 Sep 2009 23:07:42 GMT From: Gleb Kurtsou <gk@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 168553 for review Message-ID: <200909142307.n8EN7gZi091173@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=168553 Change 168553 by gk@gk_h1 on 2009/09/14 23:07:07 implement mapped read and write Affected files ... .. //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 edit Differences ... ==== //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 (text+ko) ==== @@ -51,18 +51,23 @@ #include <sys/mount.h> #include <sys/mutex.h> #include <sys/namei.h> +#include <sys/sf_buf.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/dirent.h> #include <sys/limits.h> - -#include <fs/pefs/pefs.h> +#include <sys/proc.h> +#include <sys/sched.h> #include <vm/vm.h> #include <vm/vm_extern.h> #include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> #include <vm/vnode_pager.h> +#include <fs/pefs/pefs.h> + static int pefs_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, pefs_bug_bypass, CTLFLAG_RW, &pefs_bug_bypass, 0, ""); @@ -631,32 +636,49 @@ } static int -pefs_vreg_grow(struct vnode *vp, u_quad_t nsize, struct ucred *cred) +pefs_tryextend(struct vnode *vp, u_quad_t nsize, struct ucred *cred) { struct vnode *lvp = PEFS_LOWERVP(vp); - struct vattr o_va; + struct vattr va; struct uio *puio; struct pefs_node *pn = VP_TO_PN(vp); struct pefs_chunk pc; struct pefs_ctx *ctx; + u_quad_t osize; off_t offset; size_t bsize, size; int error; - error = VOP_GETATTR(lvp, &o_va, cred); + MPASS(vp->v_type == VREG); + + error = VOP_GETATTR(lvp, &va, cred); if (error) return (error); + osize = va.va_size; - PEFSDEBUG("pefs_vreg_grow: old size %jd, new size %jd\n", - nsize, o_va.va_size); - if (nsize <= o_va.va_size) + if (nsize <= osize) return (0); - if (nsize - o_va.va_size >= INT_MAX) - return (EINVAL); - size = nsize - o_va.va_size; + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { + vn_lock(vp, LK_UPGRADE | LK_RETRY); + error = VOP_GETATTR(lvp, &va, cred); + if (error) + return (error); + osize = va.va_size; + if (nsize <= osize) + return (0); + } + + PEFSDEBUG("pefs_tryextend: old size 0x%jx, new size 0x%jx\n", osize, nsize); + + VATTR_NULL(&va); + va.va_size = nsize; + VOP_SETATTR(lvp, &va, cred); + vnode_pager_setsize(vp, nsize); + + size = nsize - osize; bsize = qmin(size, DFLTPHYS); - offset = o_va.va_size; + offset = osize; pefs_chunk_create(&pc, pn, bsize); ctx = pefs_ctx_get(); @@ -665,14 +687,13 @@ pefs_chunk_zero(&pc); pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc); puio = pefs_chunk_uio(&pc, offset, UIO_WRITE); - PEFSDEBUG("pefs_vreg_grow: resizing file; filling with zeros: offset=%jd, resid=%jd\n", offset, bsize); + PEFSDEBUG("pefs_tryextend: resizing file; filling with zeros: offset=0x%jx, resid=0x%jx\n", offset, bsize); error = VOP_WRITE(lvp, puio, 0, cred); if (error) { /* try to reset */ - size = o_va.va_size; - VATTR_NULL(&o_va); - o_va.va_size = size; - VOP_SETATTR(lvp, &o_va, cred); + VATTR_NULL(&va); + va.va_size = osize; + VOP_SETATTR(lvp, &va, cred); break; } offset += bsize; @@ -695,7 +716,6 @@ pefs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; - struct vnode *lvp; struct ucred *cred = ap->a_cred; struct vattr *vap = ap->a_vap; int error; @@ -723,22 +743,23 @@ * Disallow write attempts if the filesystem is * mounted read-only. */ - if (vp->v_mount->mnt_flag & MNT_RDONLY) + if ((vp->v_mount->mnt_flag & MNT_RDONLY) || + pefs_no_keys(vp)) return (EROFS); if (vp->v_type == VREG) - error = pefs_vreg_grow(vp, vap->va_size, cred); + error = pefs_tryextend(vp, vap->va_size, cred); else - error = EOPNOTSUPP; // TODO pefs_vlnk_chsize + error = EOPNOTSUPP; /* TODO */ if (error) return (error); + vnode_pager_setsize(vp, vap->va_size); break; default: return (EOPNOTSUPP); } } - lvp = PEFS_LOWERVP(vp); - return (VOP_SETATTR(lvp, vap, cred)); + return (VOP_SETATTR(PEFS_LOWERVP(vp), vap, cred)); } /* @@ -1041,6 +1062,15 @@ pefs_node_buf_free(pn); VI_UNLOCK(vp); + if (vp->v_object != NULL) { + if (vp->v_object->resident_page_count > 0) + PEFSDEBUG("pefs_inactive: vobject has dirty pages: vp=%p count=%d\n", + vp, vp->v_object->resident_page_count); + VM_OBJECT_LOCK(vp->v_object); + vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); + VM_OBJECT_UNLOCK(vp->v_object); + } + if ((pn->pn_flags & PN_WANTRECYCLE) || (pn->pn_flags & PN_HASKEY) == 0) vrecycle(vp, td); @@ -1206,7 +1236,6 @@ if (error) break; - /* Nothing was written.. somehow */ if (pc.pc_size == puio->uio_resid) break; pefs_chunk_setsize(&pc, pc.pc_size - puio->uio_resid); @@ -1536,6 +1565,33 @@ return (error); } +static inline int +pefs_getsize(struct vnode *vp, u_quad_t *sizep, struct ucred *cred) +{ + struct vattr va; + int error; + + error = VOP_GETATTR(PEFS_LOWERVP(vp), &va, cred); + if (error == 0) + *sizep = va.va_size; + + return (error); +} + +static inline int +pefs_ismapped(struct vnode *vp) +{ + vm_object_t object = vp->v_object; + + if (object == NULL) + return (0); + + if (object->resident_page_count > 0 || object->cache != NULL || + object->root != NULL) + return (1); + return (0); +} + static int pefs_read(struct vop_read_args *ap) { @@ -1543,16 +1599,21 @@ struct vnode *lvp = PEFS_LOWERVP(vp); struct uio *uio = ap->a_uio; struct uio *puio; + struct ucred *cred = ap->a_cred; struct pefs_node *pn = VP_TO_PN(vp); struct pefs_chunk pc; struct pefs_ctx *ctx; - ssize_t bsize, done; - int error = 0; + vm_page_t m; + vm_offset_t moffset; + u_quad_t fsize; + ssize_t bsize, msize, done; + int ioflag = ap->a_ioflag; + int error = 0, mapped, restart_decrypt; if (vp->v_type == VDIR) return (EISDIR); if (!(pn->pn_flags & PN_HASKEY) || vp->v_type == VFIFO) - return (VOP_READ(lvp, uio, ap->a_ioflag, ap->a_cred)); + return (VOP_READ(lvp, uio, ioflag, cred)); if (vp->v_type != VREG) return (EOPNOTSUPP); if (uio->uio_resid == 0) @@ -1560,40 +1621,72 @@ if (uio->uio_offset < 0) return (EINVAL); - bsize = qmin(uio->uio_resid, DFLTPHYS); + mapped = pefs_ismapped(vp); + bsize = qmin(uio->uio_resid, mapped ? PAGE_SIZE : DFLTPHYS); + error = pefs_getsize(vp, &fsize, cred); + if (error != 0) + return (error); ctx = pefs_ctx_get(); - pefs_data_decrypt_start(ctx, &pn->pn_tkey, uio->uio_offset); pefs_chunk_create(&pc, pn, bsize); - while (uio->uio_resid > 0) { + restart_decrypt = 1; + while (uio->uio_resid > 0 && uio->uio_offset < fsize) { + bsize = qmin(uio->uio_resid, bsize); + bsize = qmin(fsize - uio->uio_offset, bsize); + pefs_chunk_setsize(&pc, bsize); + + if (mapped) { + moffset = uio->uio_offset & PAGE_MASK; + msize = qmin(PAGE_SIZE - moffset, bsize); + + VM_OBJECT_LOCK(vp->v_object); +lookupvpg: + m = vm_page_lookup(vp->v_object, + OFF_TO_IDX(uio->uio_offset)); + if (m != NULL && vm_page_is_valid(m, moffset, msize)) { + if (vm_page_sleep_if_busy(m, FALSE, "pefsmr")) + goto lookupvpg; + vm_page_busy(m); + VM_OBJECT_UNLOCK(vp->v_object); + PEFSDEBUG("pefs_read: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n", + uio->uio_offset, moffset, msize); + error = uiomove_fromphys(&m, moffset, msize, uio); + VM_OBJECT_LOCK(vp->v_object); + vm_page_wakeup(m); + VM_OBJECT_UNLOCK(vp->v_object); + if (error != 0) + break; + restart_decrypt = 1; + continue; + } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { + /* FIXME: UIO_NOCOPY is not supported */ + VM_OBJECT_UNLOCK(vp->v_object); + return (EIO); + } + VM_OBJECT_UNLOCK(vp->v_object); + /* Page not cached. Make next read page-aligned. */ + pefs_chunk_setsize(&pc, msize); + } + + PEFSDEBUG("pefs_read: mapped=%d m=%d offset=0x%jx size=0x%jx\n", + mapped, m != NULL, uio->uio_offset, pc.pc_size); puio = pefs_chunk_uio(&pc, uio->uio_offset, uio->uio_rw); - error = VOP_READ(lvp, puio, ap->a_ioflag, ap->a_cred); - if (error != 0) { + error = VOP_READ(lvp, puio, ioflag, cred); + if (error != 0) break; - } done = pc.pc_size - puio->uio_resid; -#if 0 - error = VOP_GETATTR(lvp, &va, ap->a_cred); - if (error != 0) { - pefs_chunk_free(&pc, pn); - return (error); - } - if (va.va_size < uio->uio_offset) { - /* Read past end of file */ - done -= uio->uio_offset - va.va_size; - MPASS(done >= 0); - } -#endif if (done <= 0) break; pefs_chunk_setsize(&pc, done); + if (restart_decrypt) { + restart_decrypt = 0; + pefs_data_decrypt_start(ctx, &pn->pn_tkey, + uio->uio_offset); + } pefs_data_decrypt_update(ctx, &pn->pn_tkey, &pc); pefs_chunk_copy(&pc, uio); - - bsize = qmin(uio->uio_resid, bsize); - pefs_chunk_setsize(&pc, bsize); } pefs_ctx_free(ctx); pefs_chunk_free(&pc, pn); @@ -1606,19 +1699,28 @@ { struct vnode *vp = ap->a_vp; struct vnode *lvp = PEFS_LOWERVP(vp); + struct ucred *cred = ap->a_cred; struct uio *uio = ap->a_uio; struct uio *puio; + struct sf_buf *sf; struct pefs_node *pn = VP_TO_PN(vp); struct pefs_chunk pc; struct pefs_ctx *ctx; + vm_page_t m = NULL; + vm_offset_t moffset; + vm_pindex_t idx; + u_quad_t nsize; + char *ma; off_t offset; - ssize_t resid, bsize; - int error = 0; + ssize_t resid, bsize, msize; + int ioflag = ap->a_ioflag; + int restart_encrypt; + int error = 0, mapped; if (vp->v_type == VDIR) return (EISDIR); if (vp->v_type == VFIFO) - return (error = VOP_WRITE(lvp, uio, ap->a_ioflag, ap->a_cred)); + return (VOP_WRITE(lvp, uio, ioflag, cred)); if (vp->v_type != VREG) return (EOPNOTSUPP); if (uio->uio_resid == 0) @@ -1629,36 +1731,117 @@ if (!(pn->pn_flags & PN_HASKEY)) return (EROFS); + error = pefs_getsize(vp, &nsize, cred); + if (error != 0) + return (error); + + if (ioflag & IO_APPEND) { + uio->uio_offset = nsize; + ioflag &= ~IO_APPEND; + } + offset = uio->uio_offset; resid = uio->uio_resid; - error = pefs_vreg_grow(vp, offset, ap->a_cred); - if (error != 0) - return (error); + if (offset > nsize) { + error = pefs_tryextend(vp, offset, cred); + if (error != 0) + return (error); + } + + mapped = pefs_ismapped(vp); + bsize = qmin(resid, mapped ? PAGE_SIZE : DFLTPHYS); + + if (offset + resid > nsize) { + PEFSDEBUG("pefs_write: extend: 0x%jx (old size: 0x%jx)\n", offset + resid, nsize); + nsize = offset + resid; + vnode_pager_setsize(vp, nsize); + } - bsize = qmin(resid, DFLTPHYS); ctx = pefs_ctx_get(); - pefs_data_encrypt_start(ctx, &pn->pn_tkey, uio->uio_offset); - pefs_chunk_create(&pc, pn, bsize); + restart_encrypt = 1; + pefs_chunk_create(&pc, pn, mapped ? PAGE_SIZE : bsize); while (resid > 0) { + bsize = qmin(resid, bsize); + if (mapped) { + moffset = offset & PAGE_MASK; + msize = qmin(PAGE_SIZE - moffset, bsize); + msize = qmin(nsize - offset, msize); + pefs_chunk_setsize(&pc, moffset + msize); + + VM_OBJECT_LOCK(vp->v_object); +lookupvpg: + idx = OFF_TO_IDX(offset); + m = vm_page_lookup(vp->v_object, idx); + if (m != NULL && vm_page_is_valid(m, 0, moffset + msize)) { + if (vm_page_sleep_if_busy(m, FALSE, "pefsmw")) + goto lookupvpg; + vm_page_busy(m); + vm_page_lock_queues(); + vm_page_undirty(m); + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(vp->v_object); + PEFSDEBUG("pefs_write: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n", + offset, moffset, msize); + sched_pin(); + sf = sf_buf_alloc(m, SFB_CPUPRIVATE); + ma = (char *)sf_buf_kva(sf); + error = uiomove(ma + moffset, msize, uio); + memcpy(pc.pc_base, ma, pc.pc_size); + sf_buf_free(sf); + sched_unpin(); + VM_OBJECT_LOCK(vp->v_object); + vm_page_wakeup(m); + VM_OBJECT_UNLOCK(vp->v_object); + if (error != 0) { + break; + } + if (moffset != 0) { + resid += moffset; + offset -= moffset; + restart_encrypt = 1; + } + goto lower_update; + } else if (__predict_false(vp->v_object->cache != NULL)) { + PEFSDEBUG("pefs_write: free cache: 0x%jx\n", offset - moffset); + vm_page_cache_free(vp->v_object, idx, + idx + 1); + } + MPASS(m == NULL || + !vm_page_is_valid(m, moffset, msize)); + VM_OBJECT_UNLOCK(vp->v_object); + /* Page align consequent writes */ + pefs_chunk_setsize(&pc, msize); + } else { + pefs_chunk_setsize(&pc, bsize); + } pefs_chunk_copy(&pc, uio); +lower_update: + PEFSDEBUG("pefs_write: mapped=%d m=%d offset=0x%jx size=0x%jx\n", + mapped, m != NULL, offset, pc.pc_size); + if (restart_encrypt) { + restart_encrypt = 0; + pefs_data_encrypt_start(ctx, &pn->pn_tkey, offset); + } pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc); puio = pefs_chunk_uio(&pc, offset, uio->uio_rw); - error = VOP_WRITE(lvp, puio, ap->a_ioflag, ap->a_cred); + /* IO_APPEND handled above to prevent offset change races. */ + error = VOP_WRITE(lvp, puio, ioflag, cred); if (error != 0) break; MPASS(puio->uio_resid == 0); - resid -= bsize; - offset += bsize; + resid -= pc.pc_size; + offset += pc.pc_size; - bsize = qmin(resid, bsize); - pefs_chunk_setsize(&pc, bsize); } pefs_ctx_free(ctx); pefs_chunk_free(&pc, pn); + MPASS(resid == uio->uio_resid); + MPASS(offset == uio->uio_offset); + return (error); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200909142307.n8EN7gZi091173>