From owner-svn-src-head@FreeBSD.ORG Thu Mar 21 10:29:06 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id BBBC59D2; Thu, 21 Mar 2013 10:29:06 +0000 (UTC) (envelope-from smh@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 9E732ABB; Thu, 21 Mar 2013 10:29:06 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r2LAT6PX031402; Thu, 21 Mar 2013 10:29:06 GMT (envelope-from smh@svn.freebsd.org) Received: (from smh@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r2LAT6ZB031399; Thu, 21 Mar 2013 10:29:06 GMT (envelope-from smh@svn.freebsd.org) Message-Id: <201303211029.r2LAT6ZB031399@svn.freebsd.org> From: Steven Hartland Date: Thu, 21 Mar 2013 10:29:06 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r248575 - in head/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 21 Mar 2013 10:29:06 -0000 Author: smh Date: Thu Mar 21 10:29:05 2013 New Revision: 248575 URL: http://svnweb.freebsd.org/changeset/base/248575 Log: TRIM cache devices based on time instead of TXGs. Currently, the trim module uses the same algorithm for data and cache devices when deciding to issue TRIM requests, based on how far in the past the TXG is. Unfortunately, this is not ideal for cache devices, because the L2ARC doesn't use the concept of TXGs at all. In fact, when using a pool for reading only, the L2ARC is written but the TXG counter doesn't increase, and so no new TRIM requests are issued to the cache device. This patch fixes the issue by using time instead of the TXG number as the criteria for trimming on cache devices. The basic delay principle stays the same, but parameters are expressed in seconds instead of TXGs. The new parameters are named trim_l2arc_limit and trim_l2arc_batch, and both default to 30 second. Reviewed by: pjd (mentor) Approved by: pjd (mentor) Obtained from: https://github.com/dechamps/zfs/commit/17122c31ac7f82875e837019205c21651c05f8cd MFC after: 2 weeks Modified: head/sys/cddl/compat/opensolaris/sys/time.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c Modified: head/sys/cddl/compat/opensolaris/sys/time.h ============================================================================== --- head/sys/cddl/compat/opensolaris/sys/time.h Thu Mar 21 10:16:10 2013 (r248574) +++ head/sys/cddl/compat/opensolaris/sys/time.h Thu Mar 21 10:29:05 2013 (r248575) @@ -35,6 +35,7 @@ #define MILLISEC 1000 #define MICROSEC 1000000 #define NANOSEC 1000000000 +#define TIME_MAX LLONG_MAX typedef longlong_t hrtime_t; Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c Thu Mar 21 10:16:10 2013 (r248574) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c Thu Mar 21 10:29:05 2013 (r248575) @@ -27,6 +27,7 @@ #include #include #include +#include /* * Calculate the zio end, upgrading based on ashift which would be @@ -54,6 +55,7 @@ typedef struct trim_seg { uint64_t ts_start; /* Starting offset of this segment. */ uint64_t ts_end; /* Ending offset (non-inclusive). */ uint64_t ts_txg; /* Segment creation txg. */ + hrtime_t ts_time; /* Segment creation time. */ } trim_seg_t; extern boolean_t zfs_notrim; @@ -65,6 +67,11 @@ TUNABLE_INT("vfs.zfs.trim_txg_limit", &t SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0, "Delay TRIMs by that many TXGs."); +static int trim_l2arc_limit = 30; +TUNABLE_INT("vfs.zfs.trim_l2arc_limit", &trim_l2arc_limit); +SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_l2arc_limit, CTLFLAG_RWTUN, &trim_l2arc_limit, 0, + "Delay TRIMs by this many seconds for cache devices."); + static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd); static int @@ -176,10 +183,12 @@ trim_map_segment_add(trim_map_t *tm, uin avl_index_t where; trim_seg_t tsearch, *ts_before, *ts_after, *ts; boolean_t merge_before, merge_after; + hrtime_t time; ASSERT(MUTEX_HELD(&tm->tm_lock)); VERIFY(start < end); + time = gethrtime(); tsearch.ts_start = start; tsearch.ts_end = end; @@ -214,6 +223,7 @@ trim_map_segment_add(trim_map_t *tm, uin ts->ts_start = start; ts->ts_end = end; ts->ts_txg = txg; + ts->ts_time = time; avl_insert(&tm->tm_queued_frees, ts, where); list_insert_tail(&tm->tm_head, ts); } @@ -236,6 +246,7 @@ trim_map_segment_remove(trim_map_t *tm, nts->ts_start = end; nts->ts_end = ts->ts_end; nts->ts_txg = ts->ts_txg; + nts->ts_time = ts->ts_time; ts->ts_end = start; avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER); list_insert_after(&tm->tm_head, ts, nts); @@ -359,17 +370,18 @@ trim_map_write_done(zio_t *zio) /* * Return the oldest segment (the one with the lowest txg) or false if * the list is empty or the first element's txg is greater than txg given - * as function argument. + * as function argument, or the first element's time is greater than time + * given as function argument */ static trim_seg_t * -trim_map_first(trim_map_t *tm, uint64_t txg) +trim_map_first(trim_map_t *tm, uint64_t txg, hrtime_t time) { trim_seg_t *ts; ASSERT(MUTEX_HELD(&tm->tm_lock)); ts = list_head(&tm->tm_head); - if (ts != NULL && ts->ts_txg <= txg) + if (ts != NULL && ts->ts_txg <= txg && ts->ts_time <= time) return (ts); return (NULL); } @@ -380,20 +392,28 @@ trim_map_vdev_commit(spa_t *spa, zio_t * trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t start, size, txglimit; + hrtime_t timelimit; ASSERT(vd->vdev_ops->vdev_op_leaf); if (tm == NULL) return; - txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) - - trim_txg_limit; + if (vd->vdev_isl2cache) { + timelimit = gethrtime() - trim_l2arc_limit * NANOSEC; + txglimit = UINT64_MAX; + } else { + timelimit = TIME_MAX; + txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) - + trim_txg_limit; + } mutex_enter(&tm->tm_lock); /* - * Loop until we send all frees up to the txglimit. + * Loop until we send all frees up to the txglimit + * or time limit if this is a cache device. */ - while ((ts = trim_map_first(tm, txglimit)) != NULL) { + while ((ts = trim_map_first(tm, txglimit, timelimit)) != NULL) { list_remove(&tm->tm_head, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts);