Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Oct 2015 07:28:36 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r289294 - vendor-sys/illumos/dist/uts/common/fs/zfs
Message-ID:  <201510140728.t9E7SaTF053107@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Oct 14 07:28:36 2015
New Revision: 289294
URL: https://svnweb.freebsd.org/changeset/base/289294

Log:
  5219 l2arc_write_buffers() may write beyond target_sz
  
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Reviewed by: Saso Kiselkov <skiselkov@gmail.com>
  Reviewed by: George Wilson <george@delphix.com>
  Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk>
  Reviewed by: Justin Gibbs <gibbs@FreeBSD.org>
  Approved by: Matthew Ahrens <mahrens@delphix.com>
  Author: Andriy Gapon <avg@freebsd.org>
  
  illumos/illumos-gate@d7d9a6d919f92d74ea0510a53f8441396048e800

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Wed Oct 14 06:57:28 2015	(r289293)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Wed Oct 14 07:28:36 2015	(r289294)
@@ -5902,7 +5902,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
     boolean_t *headroom_boost)
 {
 	arc_buf_hdr_t *hdr, *hdr_prev, *head;
-	uint64_t write_asize, write_psize, write_sz, headroom,
+	uint64_t write_asize, write_sz, headroom,
 	    buf_compress_minsz;
 	void *buf_data;
 	boolean_t full;
@@ -5917,7 +5917,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 	*headroom_boost = B_FALSE;
 
 	pio = NULL;
-	write_sz = write_asize = write_psize = 0;
+	write_sz = write_asize = 0;
 	full = B_FALSE;
 	head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
 	head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
@@ -5954,6 +5954,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 		for (; hdr; hdr = hdr_prev) {
 			kmutex_t *hash_lock;
 			uint64_t buf_sz;
+			uint64_t buf_a_sz;
 
 			if (arc_warm == B_FALSE)
 				hdr_prev = multilist_sublist_next(mls, hdr);
@@ -5982,7 +5983,15 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 				continue;
 			}
 
-			if ((write_sz + hdr->b_size) > target_sz) {
+			/*
+			 * Assume that the buffer is not going to be compressed
+			 * and could take more space on disk because of a larger
+			 * disk block size.
+			 */
+			buf_sz = hdr->b_size;
+			buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+
+			if ((write_asize + buf_a_sz) > target_sz) {
 				full = B_TRUE;
 				mutex_exit(hash_lock);
 				break;
@@ -6046,7 +6055,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 			 */
 			hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
 
-			buf_sz = hdr->b_size;
 			hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
 
 			mutex_enter(&dev->l2ad_mtx);
@@ -6063,6 +6071,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 			mutex_exit(hash_lock);
 
 			write_sz += buf_sz;
+			write_asize += buf_a_sz;
 		}
 
 		multilist_sublist_unlock(mls);
@@ -6082,6 +6091,19 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 	mutex_enter(&dev->l2ad_mtx);
 
 	/*
+	 * Note that elsewhere in this file arcstat_l2_asize
+	 * and the used space on l2ad_vdev are updated using b_asize,
+	 * which is not necessarily rounded up to the device block size.
+	 * Too keep accounting consistent we do the same here as well:
+	 * stats_size accumulates the sum of b_asize of the written buffers,
+	 * while write_asize accumulates the sum of b_asize rounded up
+	 * to the device block size.
+	 * The latter sum is used only to validate the corectness of the code.
+	 */
+	uint64_t stats_size = 0;
+	write_asize = 0;
+
+	/*
 	 * Now start writing the buffers. We're starting at the write head
 	 * and work backwards, retracing the course of the buffer selector
 	 * loop above.
@@ -6134,7 +6156,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 
 		/* Compression may have squashed the buffer to zero length. */
 		if (buf_sz != 0) {
-			uint64_t buf_p_sz;
+			uint64_t buf_a_sz;
 
 			wzio = zio_write_phys(pio, dev->l2ad_vdev,
 			    dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
@@ -6145,14 +6167,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 			    zio_t *, wzio);
 			(void) zio_nowait(wzio);
 
-			write_asize += buf_sz;
+			stats_size += buf_sz;
 
 			/*
 			 * Keep the clock hand suitably device-aligned.
 			 */
-			buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
-			write_psize += buf_p_sz;
-			dev->l2ad_hand += buf_p_sz;
+			buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+			write_asize += buf_a_sz;
+			dev->l2ad_hand += buf_a_sz;
 		}
 	}
 
@@ -6162,8 +6184,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 	ARCSTAT_BUMP(arcstat_l2_writes_sent);
 	ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
 	ARCSTAT_INCR(arcstat_l2_size, write_sz);
-	ARCSTAT_INCR(arcstat_l2_asize, write_asize);
-	vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
+	ARCSTAT_INCR(arcstat_l2_asize, stats_size);
+	vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
 
 	/*
 	 * Bump device hand to the device start if it is approaching the end.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201510140728.t9E7SaTF053107>