Date: Fri, 17 Apr 2015 22:16:36 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r281668 - in user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201504172216.t3HMGaLT026329@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Fri Apr 17 22:16:35 2015 New Revision: 281668 URL: https://svnweb.freebsd.org/changeset/base/281668 Log: MFV r277430: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h: Add two offset/lba based AVL trees to the vdev queue object. sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h: Add a second AVL node within each ZIO so that vdev_queue.c can sort ZIOs by both type and priority. sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c: Combine reads and writes, irrespecitve of their priorities into unified, offset sorted, trees. Selection of the ZIO to issue is unchanged, but aggregation now uses the unified tree of the appropriate type so that aggregation across priority classes is possible. Original author: Justin T. Gibbs justing@spectralogic.com Illumos issue: 5313 Allow I/Os to be aggregated across ZIO priority classes Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c Directory Properties: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/ (props changed) Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h ============================================================================== --- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Fri Apr 17 21:21:11 2015 (r281667) +++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Fri Apr 17 22:16:35 2015 (r281668) @@ -113,6 +113,8 @@ struct vdev_queue { vdev_t *vq_vdev; vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE]; avl_tree_t vq_active_tree; + avl_tree_t vq_read_offset_tree; + avl_tree_t vq_write_offset_tree; uint64_t vq_last_offset; hrtime_t vq_io_complete_ts; /* time last i/o completed */ kmutex_t vq_lock; Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h ============================================================================== --- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Fri Apr 17 21:21:11 2015 (r281667) +++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Fri Apr 17 22:16:35 2015 (r281668) @@ -454,6 +454,7 @@ struct zio { uint64_t io_offset; hrtime_t io_timestamp; avl_node_t io_queue_node; + avl_node_t io_offset_node; /* Internal pipeline state */ enum zio_flag io_flags; Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c ============================================================================== --- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c Fri Apr 17 21:21:11 2015 (r281667) +++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c Fri Apr 17 22:16:35 2015 (r281668) @@ -290,6 +290,22 @@ vdev_queue_offset_compare(const void *x1 return (0); } +static inline avl_tree_t * +vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) +{ + return (&vq->vq_class[p].vqc_queued_tree); +} + +static inline avl_tree_t * +vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) +{ + ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE); + if (t == ZIO_TYPE_READ) + return (&vq->vq_read_offset_tree); + else + return (&vq->vq_write_offset_tree); +} + int vdev_queue_timestamp_compare(const void *x1, const void *x2) { @@ -324,19 +340,27 @@ vdev_queue_init(vdev_t *vd) avl_create(&vq->vq_active_tree, vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); + avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ), + vdev_queue_offset_compare, sizeof (zio_t), + offsetof(struct zio, io_offset_node)); + avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), + vdev_queue_offset_compare, sizeof (zio_t), + offsetof(struct zio, io_offset_node)); for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { + int (*compfn) (const void *, const void *); + /* - * The synchronous i/o queues are FIFO rather than LBA ordered. - * This provides more consistent latency for these i/os, and - * they tend to not be tightly clustered anyway so there is - * little to no throughput loss. + * The synchronous i/o queues are dispatched in FIFO rather + * than LBA order. This provides more consistent latency for + * these i/os. */ - boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ || - p == ZIO_PRIORITY_SYNC_WRITE); - avl_create(&vq->vq_class[p].vqc_queued_tree, - fifo ? vdev_queue_timestamp_compare : - vdev_queue_offset_compare, + if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) + compfn = vdev_queue_timestamp_compare; + else + compfn = vdev_queue_offset_compare; + + avl_create(vdev_queue_class_tree(vq, p), compfn, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } @@ -349,8 +373,10 @@ vdev_queue_fini(vdev_t *vd) vdev_queue_t *vq = &vd->vdev_queue; for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) - avl_destroy(&vq->vq_class[p].vqc_queued_tree); + avl_destroy(vdev_queue_class_tree(vq, p)); avl_destroy(&vq->vq_active_tree); + avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); + avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); mutex_destroy(&vq->vq_lock); } @@ -361,7 +387,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_ spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); - avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio); + avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); + avl_add(vdev_queue_type_tree(vq, zio->io_type), zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); @@ -378,7 +405,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, z spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); - avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio); + avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); + avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); @@ -551,7 +579,7 @@ vdev_queue_class_to_issue(vdev_queue_t * /* find a queue that has not reached its minimum # outstanding i/os */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { - if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 && + if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_min_active(p)) return (p); @@ -562,7 +590,7 @@ vdev_queue_class_to_issue(vdev_queue_t * * maximum # outstanding i/os. */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { - if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 && + if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_max_active(spa, p)) return (p); @@ -588,8 +616,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, z uint64_t maxgap = 0; uint64_t size; boolean_t stretch = B_FALSE; - vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority]; - avl_tree_t *t = &vqc->vqc_queued_tree; + avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type); enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; ASSERT(MUTEX_HELD(&vq->vq_lock)); @@ -597,15 +624,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, z if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE) return (NULL); - /* - * The synchronous i/o queues are not sorted by LBA, so we can't - * find adjacent i/os. These i/os tend to not be tightly clustered, - * or too large to aggregate, so this has little impact on performance. - */ - if (zio->io_priority == ZIO_PRIORITY_SYNC_READ || - zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) - return (NULL); - first = last = zio; if (zio->io_type == ZIO_TYPE_READ) @@ -737,7 +755,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq) zio_t *zio, *aio; zio_priority_t p; avl_index_t idx; - vdev_queue_class_t *vqc; + avl_tree_t *tree; zio_t search; again: @@ -756,13 +774,13 @@ again: * * For FIFO queues (sync), issue the i/o with the lowest timestamp. */ - vqc = &vq->vq_class[p]; + tree = vdev_queue_class_tree(vq, p); search.io_timestamp = 0; search.io_offset = vq->vq_last_offset + 1; - VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL); - zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER); + VERIFY3P(avl_find(tree, &search, &idx), ==, NULL); + zio = avl_nearest(tree, idx, AVL_AFTER); if (zio == NULL) - zio = avl_first(&vqc->vqc_queued_tree); + zio = avl_first(tree); ASSERT3U(zio->io_priority, ==, p); aio = vdev_queue_aggregate(vq, zio);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201504172216.t3HMGaLT026329>