Date: Wed, 14 Jun 2017 16:44:11 +0000 (UTC) From: Andriy Gapon <avg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r319950 - vendor-sys/illumos/dist/uts/common/fs/zfs Message-ID: <201706141644.v5EGiBA4075699@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: avg Date: Wed Jun 14 16:44:10 2017 New Revision: 319950 URL: https://svnweb.freebsd.org/changeset/base/319950 Log: 5220 L2ARC does not support devices that do not provide 512B access illumos/illumos-gate@403a8da73c64ff9dfb6230ba045c765a242213fb https://github.com/illumos/illumos-gate/commit/403a8da73c64ff9dfb6230ba045c765a242213fb https://www.illumos.org/issues/5220 There are disk devices that have logical sector size larger than 512B, for example 4KB. That is, their physical sector size is larger than 512B and they do not provide emulation for 512B sector sizes. For such devices both a data offset and a data size must be properly aligned. L2ARC should arrange that because it uses physical I/O. zio_vdev_io_start() performs a necessary transformation if io_size is not aligned to vdev_ashift, but that is done only for logical I/O. Something similar should be done in L2ARC code. * a temporary write buffer should be allocated if the original buffer is not going to be compressed and its size is not aligned * size of a temporary compression buffer should be ashift aligned * for the reads, if a size of a target buffer is not sufficiently large and it is not aligned then a temporary read buffer should be allocated Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com> Approved by: Dan McDonald <danmcd@joyent.com> Author: Andriy Gapon <avg@FreeBSD.org> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Wed Jun 14 16:42:38 2017 (r319949) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c Wed Jun 14 16:44:10 2017 (r319950) @@ -1075,6 +1075,7 @@ typedef struct l2arc_read_callback { blkptr_t l2rcb_bp; /* original blkptr */ zbookmark_phys_t l2rcb_zb; /* original bookmark */ int l2rcb_flags; /* original flags */ + abd_t *l2rcb_abd; /* temporary buffer */ } l2arc_read_callback_t; typedef struct l2arc_write_callback { @@ -5048,6 +5049,8 @@ top: !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && !(l2arc_noprefetch && HDR_PREFETCH(hdr))) { l2arc_read_callback_t *cb; + abd_t *abd; + uint64_t asize; DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_hits); @@ -5059,8 +5062,17 @@ top: cb->l2rcb_zb = *zb; cb->l2rcb_flags = zio_flags; + asize = vdev_psize_to_asize(vd, size); + if (asize != size) { + abd = abd_alloc_for_io(asize, + HDR_ISTYPE_METADATA(hdr)); + cb->l2rcb_abd = abd; + } else { + abd = hdr->b_l1hdr.b_pabd; + } + ASSERT(addr >= VDEV_LABEL_START_SIZE && - addr + lsize < vd->vdev_psize - + addr + asize <= vd->vdev_psize - VDEV_LABEL_END_SIZE); /* @@ -5072,7 +5084,7 @@ top: ASSERT3U(HDR_GET_COMPRESS(hdr), !=, ZIO_COMPRESS_EMPTY); rzio = zio_read_phys(pio, vd, addr, - size, hdr->b_l1hdr.b_pabd, + asize, abd, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | @@ -6566,6 +6578,33 @@ l2arc_read_done(zio_t *zio) mutex_enter(hash_lock); ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + /* + * If the data was read into a temporary buffer, + * move it and free the buffer. + */ + if (cb->l2rcb_abd != NULL) { + ASSERT3U(arc_hdr_size(hdr), <, zio->io_size); + if (zio->io_error == 0) { + abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd, + arc_hdr_size(hdr)); + } + + /* + * The following must be done regardless of whether + * there was an error: + * - free the temporary buffer + * - point zio to the real ARC buffer + * - set zio size accordingly + * These are required because zio is either re-used for + * an I/O of the block in the case of the error + * or the zio is passed to arc_read_done() and it + * needs real data. + */ + abd_free(cb->l2rcb_abd); + zio->io_size = zio->io_orig_size = arc_hdr_size(hdr); + zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + } + ASSERT3P(zio->io_abd, !=, NULL); /* @@ -6903,23 +6942,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint * Normally the L2ARC can use the hdr's data, but if * we're sharing data between the hdr and one of its * bufs, L2ARC needs its own copy of the data so that - * the ZIO below can't race with the buf consumer. To - * ensure that this copy will be available for the + * the ZIO below can't race with the buf consumer. + * Another case where we need to create a copy of the + * data is when the buffer size is not device-aligned + * and we need to pad the block to make it such. + * That also keeps the clock hand suitably aligned. + * + * To ensure that the copy will be available for the * lifetime of the ZIO and be cleaned up afterwards, we * add it to the l2arc_free_on_write queue. */ + uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, + size); abd_t *to_write; - if (!HDR_SHARED_DATA(hdr)) { + if (!HDR_SHARED_DATA(hdr) && size == asize) { to_write = hdr->b_l1hdr.b_pabd; } else { - to_write = abd_alloc_for_io(size, + to_write = abd_alloc_for_io(asize, HDR_ISTYPE_METADATA(hdr)); abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); + if (asize != size) { + abd_zero_off(to_write, size, + asize - size); + } l2arc_free_abd_on_write(to_write, size, arc_buf_type(hdr)); } wzio = zio_write_phys(pio, dev->l2ad_vdev, - hdr->b_l2hdr.b_daddr, size, to_write, + hdr->b_l2hdr.b_daddr, asize, to_write, ZIO_CHECKSUM_OFF, NULL, hdr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE); @@ -6929,11 +6979,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint zio_t *, wzio); write_asize += size; - /* - * Keep the clock hand suitably device-aligned. - */ - uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, - size); write_psize += asize; dev->l2ad_hand += asize;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201706141644.v5EGiBA4075699>