Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Jun 2017 16:44:11 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r319950 - vendor-sys/illumos/dist/uts/common/fs/zfs
Message-ID:  <201706141644.v5EGiBA4075699@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Wed Jun 14 16:44:10 2017
New Revision: 319950
URL: https://svnweb.freebsd.org/changeset/base/319950

Log:
  5220 L2ARC does not support devices that do not provide 512B access
  
  illumos/illumos-gate@403a8da73c64ff9dfb6230ba045c765a242213fb
  https://github.com/illumos/illumos-gate/commit/403a8da73c64ff9dfb6230ba045c765a242213fb
  
  https://www.illumos.org/issues/5220
    There are disk devices that have logical sector size larger than 512B, for
    example 4KB. That is, their physical sector size is larger than 512B and they
    do not provide emulation for 512B sector sizes. For such devices both a data
    offset and a data size must be properly aligned. L2ARC should arrange that
    because it uses physical I/O.
    zio_vdev_io_start() performs a necessary transformation if io_size is not
    aligned to vdev_ashift, but that is done only for logical I/O. Something
    similar should be done in L2ARC code.
        * a temporary write buffer should be allocated if the original buffer is
          not going to be compressed and its size is not aligned
        * size of a temporary compression buffer should be ashift aligned
        * for the reads, if a size of a target buffer is not sufficiently large and
          it is not aligned then a temporary read buffer should be allocated
  
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
  Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
  Approved by: Dan McDonald <danmcd@joyent.com>
  Author: Andriy Gapon <avg@FreeBSD.org>

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Wed Jun 14 16:42:38 2017	(r319949)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Wed Jun 14 16:44:10 2017	(r319950)
@@ -1075,6 +1075,7 @@ typedef struct l2arc_read_callback {
 	blkptr_t		l2rcb_bp;		/* original blkptr */
 	zbookmark_phys_t	l2rcb_zb;		/* original bookmark */
 	int			l2rcb_flags;		/* original flags */
+	abd_t			*l2rcb_abd;		/* temporary buffer */
 } l2arc_read_callback_t;
 
 typedef struct l2arc_write_callback {
@@ -5048,6 +5049,8 @@ top:
 			    !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
 			    !(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
 				l2arc_read_callback_t *cb;
+				abd_t *abd;
+				uint64_t asize;
 
 				DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
 				ARCSTAT_BUMP(arcstat_l2_hits);
@@ -5059,8 +5062,17 @@ top:
 				cb->l2rcb_zb = *zb;
 				cb->l2rcb_flags = zio_flags;
 
+				asize = vdev_psize_to_asize(vd, size);
+				if (asize != size) {
+					abd = abd_alloc_for_io(asize,
+					    HDR_ISTYPE_METADATA(hdr));
+					cb->l2rcb_abd = abd;
+				} else {
+					abd = hdr->b_l1hdr.b_pabd;
+				}
+
 				ASSERT(addr >= VDEV_LABEL_START_SIZE &&
-				    addr + lsize < vd->vdev_psize -
+				    addr + asize <= vd->vdev_psize -
 				    VDEV_LABEL_END_SIZE);
 
 				/*
@@ -5072,7 +5084,7 @@ top:
 				ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
 				    ZIO_COMPRESS_EMPTY);
 				rzio = zio_read_phys(pio, vd, addr,
-				    size, hdr->b_l1hdr.b_pabd,
+				    asize, abd,
 				    ZIO_CHECKSUM_OFF,
 				    l2arc_read_done, cb, priority,
 				    zio_flags | ZIO_FLAG_DONT_CACHE |
@@ -6566,6 +6578,33 @@ l2arc_read_done(zio_t *zio)
 	mutex_enter(hash_lock);
 	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 
+	/*
+	 * If the data was read into a temporary buffer,
+	 * move it and free the buffer.
+	 */
+	if (cb->l2rcb_abd != NULL) {
+		ASSERT3U(arc_hdr_size(hdr), <, zio->io_size);
+		if (zio->io_error == 0) {
+			abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd,
+			    arc_hdr_size(hdr));
+		}
+
+		/*
+		 * The following must be done regardless of whether
+		 * there was an error:
+		 * - free the temporary buffer
+		 * - point zio to the real ARC buffer
+		 * - set zio size accordingly
+		 * These are required because zio is either re-used for
+		 * an I/O of the block in the case of the error
+		 * or the zio is passed to arc_read_done() and it
+		 * needs real data.
+		 */
+		abd_free(cb->l2rcb_abd);
+		zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
+		zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+	}
+
 	ASSERT3P(zio->io_abd, !=, NULL);
 
 	/*
@@ -6903,23 +6942,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
 			 * Normally the L2ARC can use the hdr's data, but if
 			 * we're sharing data between the hdr and one of its
 			 * bufs, L2ARC needs its own copy of the data so that
-			 * the ZIO below can't race with the buf consumer. To
-			 * ensure that this copy will be available for the
+			 * the ZIO below can't race with the buf consumer.
+			 * Another case where we need to create a copy of the
+			 * data is when the buffer size is not device-aligned
+			 * and we need to pad the block to make it such.
+			 * That also keeps the clock hand suitably aligned.
+			 *
+			 * To ensure that the copy will be available for the
 			 * lifetime of the ZIO and be cleaned up afterwards, we
 			 * add it to the l2arc_free_on_write queue.
 			 */
+			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
+			    size);
 			abd_t *to_write;
-			if (!HDR_SHARED_DATA(hdr)) {
+			if (!HDR_SHARED_DATA(hdr) && size == asize) {
 				to_write = hdr->b_l1hdr.b_pabd;
 			} else {
-				to_write = abd_alloc_for_io(size,
+				to_write = abd_alloc_for_io(asize,
 				    HDR_ISTYPE_METADATA(hdr));
 				abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+				if (asize != size) {
+					abd_zero_off(to_write, size,
+					    asize - size);
+				}
 				l2arc_free_abd_on_write(to_write, size,
 				    arc_buf_type(hdr));
 			}
 			wzio = zio_write_phys(pio, dev->l2ad_vdev,
-			    hdr->b_l2hdr.b_daddr, size, to_write,
+			    hdr->b_l2hdr.b_daddr, asize, to_write,
 			    ZIO_CHECKSUM_OFF, NULL, hdr,
 			    ZIO_PRIORITY_ASYNC_WRITE,
 			    ZIO_FLAG_CANFAIL, B_FALSE);
@@ -6929,11 +6979,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
 			    zio_t *, wzio);
 
 			write_asize += size;
-			/*
-			 * Keep the clock hand suitably device-aligned.
-			 */
-			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
-			    size);
 			write_psize += asize;
 			dev->l2ad_hand += asize;
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201706141644.v5EGiBA4075699>