From owner-svn-src-stable@freebsd.org Thu Mar 15 12:05:18 2018 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id BB454F4C106; Thu, 15 Mar 2018 12:05:18 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 6C41B7D53A; Thu, 15 Mar 2018 12:05:18 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 62EDB15E08; Thu, 15 Mar 2018 12:05:18 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2FC5IHG040478; Thu, 15 Mar 2018 12:05:18 GMT (envelope-from avg@FreeBSD.org) Received: (from avg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2FC5I0l040474; Thu, 15 Mar 2018 12:05:18 GMT (envelope-from avg@FreeBSD.org) Message-Id: <201803151205.w2FC5I0l040474@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: avg set sender to avg@FreeBSD.org using -f From: Andriy Gapon Date: Thu, 15 Mar 2018 12:05:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r330991 - in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys X-SVN-Group: stable-11 X-SVN-Commit-Author: avg X-SVN-Commit-Paths: in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys X-SVN-Commit-Revision: 330991 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 15 Mar 2018 12:05:19 -0000 Author: avg Date: Thu Mar 15 12:05:17 2018 New Revision: 330991 URL: https://svnweb.freebsd.org/changeset/base/330991 Log: MFC r329363: read-behind / read-ahead support for zfs_getpages() Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Thu Mar 15 11:06:04 2018 (r330990) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Thu Mar 15 12:05:17 2018 (r330991) @@ -1518,6 +1518,188 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_ dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } + +int +dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, + int *rbehind, int *rahead, int last_size) +{ + struct sf_buf *sf; + vm_object_t vmobj; + vm_page_t m; + dmu_buf_t **dbp; + dmu_buf_t *db; + caddr_t va; + int numbufs, i; + int bufoff, pgoff, tocpy; + int mi, di; + int err; + + ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); + ASSERT(last_size <= PAGE_SIZE); + + err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), + IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); + if (err != 0) + return (err); + +#ifdef DEBUG + IMPLY(last_size < PAGE_SIZE, *rahead == 0); + if (dbp[0]->db_offset != 0 || numbufs > 1) { + for (i = 0; i < numbufs; i++) { + ASSERT(ISP2(dbp[i]->db_size)); + ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); + ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); + } + } +#endif + + vmobj = ma[0]->object; + zfs_vmobject_wlock(vmobj); + + db = dbp[0]; + for (i = 0; i < *rbehind; i++) { + m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i, + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY); + if (m == NULL) + break; + if (m->valid != 0) { + ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + break; + } + ASSERT(m->dirty == 0); + ASSERT(!pmap_page_is_mapped(m)); + + ASSERT(db->db_size > PAGE_SIZE); + bufoff = IDX_TO_OFF(m->pindex) % db->db_size; + va = zfs_map_page(m, &sf); + bcopy((char *)db->db_data + bufoff, va, PAGESIZE); + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + vm_page_lock(m); + if ((m->busy_lock & VPB_BIT_WAITERS) != 0) + vm_page_activate(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + } + *rbehind = i; + + bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; + pgoff = 0; + for (mi = 0, di = 0; mi < count && di < numbufs; ) { + if (pgoff == 0) { + m = ma[mi]; + vm_page_assert_xbusied(m); + ASSERT(m->valid == 0); + ASSERT(m->dirty == 0); + ASSERT(!pmap_page_is_mapped(m)); + va = zfs_map_page(m, &sf); + } + if (bufoff == 0) + db = dbp[di]; + + ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, + db->db_offset + bufoff); + + /* + * We do not need to clamp the copy size by the file + * size as the last block is zero-filled beyond the + * end of file anyway. + */ + tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); + bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); + + pgoff += tocpy; + ASSERT(pgoff <= PAGESIZE); + if (pgoff == PAGESIZE) { + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + ASSERT(mi < count); + mi++; + pgoff = 0; + } + + bufoff += tocpy; + ASSERT(bufoff <= db->db_size); + if (bufoff == db->db_size) { + ASSERT(di < numbufs); + di++; + bufoff = 0; + } + } + +#ifdef DEBUG + /* + * Three possibilities: + * - last requested page ends at a buffer boundary and , thus, + * all pages and buffers have been iterated; + * - all requested pages are filled, but the last buffer + * has not been exhausted; + * the read-ahead is possible only in this case; + * - all buffers have been read, but the last page has not been + * fully filled; + * this is only possible if the file has only a single buffer + * with a size that is not a multiple of the page size. + */ + if (mi == count) { + ASSERT(di >= numbufs - 1); + IMPLY(*rahead != 0, di == numbufs - 1); + IMPLY(*rahead != 0, bufoff != 0); + ASSERT(pgoff == 0); + } + if (di == numbufs) { + ASSERT(mi >= count - 1); + ASSERT(*rahead == 0); + IMPLY(pgoff == 0, mi == count); + if (pgoff != 0) { + ASSERT(mi == count - 1); + ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); + } + } +#endif + if (pgoff != 0) { + bzero(va + pgoff, PAGESIZE - pgoff); + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + } + + for (i = 0; i < *rahead; i++) { + m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i, + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY); + if (m == NULL) + break; + if (m->valid != 0) { + ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + break; + } + ASSERT(m->dirty == 0); + ASSERT(!pmap_page_is_mapped(m)); + + ASSERT(db->db_size > PAGE_SIZE); + bufoff = IDX_TO_OFF(m->pindex) % db->db_size; + tocpy = MIN(db->db_size - bufoff, PAGESIZE); + va = zfs_map_page(m, &sf); + bcopy((char *)db->db_data + bufoff, va, tocpy); + if (tocpy < PAGESIZE) { + ASSERT(i == *rahead - 1); + ASSERT((db->db_size & PAGE_MASK) != 0); + bzero(va + tocpy, PAGESIZE - tocpy); + } + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + vm_page_lock(m); + if ((m->busy_lock & VPB_BIT_WAITERS) != 0) + vm_page_activate(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + } + *rahead = i; + zfs_vmobject_wunlock(vmobj); + + dmu_buf_rele_array(dbp, numbufs, FTAG); + return (0); +} #endif /* illumos */ #endif /* _KERNEL */ Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 11:06:04 2018 (r330990) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 12:05:17 2018 (r330991) @@ -753,6 +753,8 @@ int dmu_write_pages(objset_t *os, uint64_t object, uin #else int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct vm_page **ppa, dmu_tx_t *tx); +int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, + int *rbehind, int *rahead, int last_size); #endif #endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 11:06:04 2018 (r330990) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 12:05:17 2018 (r330991) @@ -4515,81 +4515,85 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int fla } static int -zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, +zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, int *rahead) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zp->z_zfsvfs->z_os; - vm_page_t mlast; + rl_t *rl; vm_object_t object; - caddr_t va; - struct sf_buf *sf; - off_t startoff, endoff; - int i, error; - vm_pindex_t reqstart, reqend; - int lsize, size; + off_t start, end, obj_size; + uint_t blksz; + int pgsin_b, pgsin_a; + int error; - object = m[0]->object; - error = 0; - ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); - zfs_vmobject_wlock(object); - if (m[count - 1]->valid != 0 && --count == 0) { - zfs_vmobject_wunlock(object); - goto out; + start = IDX_TO_OFF(ma[0]->pindex); + end = IDX_TO_OFF(ma[count - 1]->pindex + 1); + + /* + * Lock a range covering all required and optional pages. + * Note that we need to handle the case of the block size growing. + */ + for (;;) { + blksz = zp->z_blksz; + rl = zfs_range_lock(zp, rounddown(start, blksz), + roundup(end, blksz) - rounddown(start, blksz), RL_READER); + if (blksz == zp->z_blksz) + break; + zfs_range_unlock(rl); } - mlast = m[count - 1]; - - if (IDX_TO_OFF(mlast->pindex) >= - object->un_pager.vnp.vnp_size) { - zfs_vmobject_wunlock(object); + object = ma[0]->object; + zfs_vmobject_wlock(object); + obj_size = object->un_pager.vnp.vnp_size; + zfs_vmobject_wunlock(object); + if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { + zfs_range_unlock(rl); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } - PCPU_INC(cnt.v_vnodein); - PCPU_ADD(cnt.v_vnodepgsin, count); + pgsin_b = 0; + if (rbehind != NULL) { + pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); + pgsin_b = MIN(*rbehind, pgsin_b); + } - lsize = PAGE_SIZE; - if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) - lsize = object->un_pager.vnp.vnp_size - - IDX_TO_OFF(mlast->pindex); - zfs_vmobject_wunlock(object); - - for (i = 0; i < count; i++) { - size = PAGE_SIZE; - if (i == count - 1) - size = lsize; - va = zfs_map_page(m[i], &sf); - error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), - size, va, DMU_READ_PREFETCH); - if (size != PAGE_SIZE) - bzero(va + size, PAGE_SIZE - size); - zfs_unmap_page(sf); - if (error != 0) - goto out; + pgsin_a = 0; + if (rahead != NULL) { + pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); + if (end + IDX_TO_OFF(pgsin_a) >= obj_size) + pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); + pgsin_a = MIN(*rahead, pgsin_a); } - zfs_vmobject_wlock(object); - for (i = 0; i < count; i++) - m[i]->valid = VM_PAGE_BITS_ALL; - zfs_vmobject_wunlock(object); + /* + * NB: we need to pass the exact byte size of the data that we expect + * to read after accounting for the file size. This is required because + * ZFS will panic if we request DMU to read beyond the end of the last + * allocated block. + */ + error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, + MIN(end, obj_size) - (end - PAGE_SIZE)); -out: + zfs_range_unlock(rl); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); - if (error == 0) { - if (rbehind) - *rbehind = 0; - if (rahead) - *rahead = 0; - return (zfs_vm_pagerret_ok); - } else + + if (error != 0) return (zfs_vm_pagerret_error); + + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a); + if (rbehind != NULL) + *rbehind = pgsin_b; + if (rahead != NULL) + *rahead = pgsin_a; + return (zfs_vm_pagerret_ok); } static int