Date: Fri, 18 Jul 2014 22:04:22 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r268858 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201407182204.s6IM4Mex017576@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Fri Jul 18 22:04:21 2014 New Revision: 268858 URL: http://svnweb.freebsd.org/changeset/base/268858 Log: MFV r268850: Change the interaction between the DMU and ARC so that when the DMU is shutting down an objset, we do not evict the data from the ARC. Instead we simply coordinate the destruction of the DMU's data with the ARC. The only case where we actually need to explicitly evict from the ARC is when dbuf_rele_and_unlock() determines that the administrator has requested that it not be kept in memory, via the primarycache/secondarycache properties. In this case, we evict the data from the ARC by its blkptr_t, the same way as when a block is freed we explicitly evict it from the ARC. Illumos issue: 4631 zvol_get_stats triggering too many reads MFC after: 2 weeks Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Directory Properties: head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Fri Jul 18 21:29:59 2014 (r268857) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Fri Jul 18 22:04:21 2014 (r268858) @@ -104,7 +104,7 @@ * with the buffer may be evicted prior to the callback. The callback * must be made with *no locks held* (to prevent deadlock). Additionally, * the users of callbacks must ensure that their private data is - * protected from simultaneous callbacks from arc_buf_evict() + * protected from simultaneous callbacks from arc_clear_callback() * and arc_do_user_evicts(). * * Note that the majority of the performance stats are manipulated @@ -1645,8 +1645,12 @@ arc_buf_data_free(arc_buf_t *buf, void ( } } +/* + * Free up buf->b_data and if 'remove' is set, then pull the + * arc_buf_t off of the the arc_buf_hdr_t's list and free it. + */ static void -arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) +arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove) { arc_buf_t **bufp; @@ -1699,7 +1703,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_ } /* only remove the buf if requested */ - if (!all) + if (!remove) return; /* remove the buf from the hdr list */ @@ -2353,7 +2357,7 @@ restart: mutex_exit(&buf->b_evict_lock); if (buf->b_efunc != NULL) - VERIFY(buf->b_efunc(buf) == 0); + VERIFY0(buf->b_efunc(buf->b_private)); buf->b_efunc = NULL; buf->b_private = NULL; @@ -3488,16 +3492,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp } /* - * This is used by the DMU to let the ARC know that a buffer is - * being evicted, so the ARC should clean up. If this arc buf - * is not yet in the evicted state, it will be put there. + * Clear the user eviction callback set by arc_set_callback(), first calling + * it if it exists. Because the presence of a callback keeps an arc_buf cached + * clearing the callback may result in the arc_buf being destroyed. However, + * it will not result in the *last* arc_buf being destroyed, hence the data + * will remain cached in the ARC. We make a copy of the arc buffer here so + * that we can process the callback without holding any locks. + * + * It's possible that the callback is already in the process of being cleared + * by another thread. In this case we can not clear the callback. + * + * Returns B_TRUE if the callback was successfully called and cleared. */ -int -arc_buf_evict(arc_buf_t *buf) +boolean_t +arc_clear_callback(arc_buf_t *buf) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; - arc_buf_t **bufp; + arc_evict_func_t *efunc = buf->b_efunc; + void *private = buf->b_private; list_t *list, *evicted_list; kmutex_t *lock, *evicted_lock; @@ -3509,17 +3522,16 @@ arc_buf_evict(arc_buf_t *buf) */ ASSERT(buf->b_data == NULL); mutex_exit(&buf->b_evict_lock); - return (0); + return (B_FALSE); } else if (buf->b_data == NULL) { - arc_buf_t copy = *buf; /* structure assignment */ /* * We are on the eviction list; process this buffer now * but let arc_do_user_evicts() do the reaping. */ buf->b_efunc = NULL; mutex_exit(&buf->b_evict_lock); - VERIFY(copy.b_efunc(©) == 0); - return (1); + VERIFY0(efunc(private)); + return (B_TRUE); } hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); @@ -3529,50 +3541,21 @@ arc_buf_evict(arc_buf_t *buf) ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt); ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); - /* - * Pull this buffer off of the hdr - */ - bufp = &hdr->b_buf; - while (*bufp != buf) - bufp = &(*bufp)->b_next; - *bufp = buf->b_next; - - ASSERT(buf->b_data != NULL); - arc_buf_destroy(buf, FALSE, FALSE); - - if (hdr->b_datacnt == 0) { - arc_state_t *old_state = hdr->b_state; - arc_state_t *evicted_state; - - ASSERT(hdr->b_buf == NULL); - ASSERT(refcount_is_zero(&hdr->b_refcnt)); - - evicted_state = - (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; - - get_buf_info(hdr, old_state, &list, &lock); - get_buf_info(hdr, evicted_state, &evicted_list, &evicted_lock); - mutex_enter(lock); - mutex_enter(evicted_lock); - - arc_change_state(evicted_state, hdr, hash_lock); - ASSERT(HDR_IN_HASH_TABLE(hdr)); - hdr->b_flags |= ARC_IN_HASH_TABLE; - hdr->b_flags &= ~ARC_BUF_AVAILABLE; + buf->b_efunc = NULL; + buf->b_private = NULL; - mutex_exit(evicted_lock); - mutex_exit(lock); + if (hdr->b_datacnt > 1) { + mutex_exit(&buf->b_evict_lock); + arc_buf_destroy(buf, FALSE, TRUE); + } else { + ASSERT(buf == hdr->b_buf); + hdr->b_flags |= ARC_BUF_AVAILABLE; + mutex_exit(&buf->b_evict_lock); } - mutex_exit(hash_lock); - mutex_exit(&buf->b_evict_lock); - VERIFY(buf->b_efunc(buf) == 0); - buf->b_efunc = NULL; - buf->b_private = NULL; - buf->b_hdr = NULL; - buf->b_next = NULL; - kmem_cache_free(buf_cache, buf); - return (1); + mutex_exit(hash_lock); + VERIFY0(efunc(private)); + return (B_TRUE); } /* @@ -3722,17 +3705,6 @@ arc_released(arc_buf_t *buf) return (released); } -int -arc_has_callback(arc_buf_t *buf) -{ - int callback; - - mutex_enter(&buf->b_evict_lock); - callback = (buf->b_efunc != NULL); - mutex_exit(&buf->b_evict_lock); - return (callback); -} - #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Fri Jul 18 21:29:59 2014 (r268857) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Fri Jul 18 22:04:21 2014 (r268858) @@ -181,8 +181,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db) } /* - * Remove an entry from the hash table. This operation will - * fail if there are any existing holds on the db. + * Remove an entry from the hash table. It must be in the EVICTING state. */ static void dbuf_hash_remove(dmu_buf_impl_t *db) @@ -194,7 +193,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) dmu_buf_impl_t *dbf, **dbp; /* - * We musn't hold db_mtx to maintin lock ordering: + * We musn't hold db_mtx to maintain lock ordering: * DBUF_HASH_MUTEX > db_mtx. */ ASSERT(refcount_is_zero(&db->db_holds)); @@ -431,7 +430,6 @@ static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); - ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf)); db->db_buf = buf; if (buf != NULL) { ASSERT(buf->b_data != NULL); @@ -1544,12 +1542,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, a * when we are not holding the dn_dbufs_mtx, we can't clear the * entry in the dn_dbufs list. We have to wait until dbuf_destroy() * in this case. For callers from the DMU we will usually see: - * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy() + * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy() * For the arc callback, we will usually see: * dbuf_do_evict()->dbuf_clear();dbuf_destroy() * Sometimes, though, we will get a mix of these two: - * DMU: dbuf_clear()->arc_buf_evict() + * DMU: dbuf_clear()->arc_clear_callback() * ARC: dbuf_do_evict()->dbuf_destroy() + * + * This routine will dissociate the dbuf from the arc, by calling + * arc_clear_callback(), but will not evict the data from the ARC. */ void dbuf_clear(dmu_buf_impl_t *db) @@ -1557,7 +1558,7 @@ dbuf_clear(dmu_buf_impl_t *db) dnode_t *dn; dmu_buf_impl_t *parent = db->db_parent; dmu_buf_impl_t *dndb; - int dbuf_gone = FALSE; + boolean_t dbuf_gone = B_FALSE; ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(refcount_is_zero(&db->db_holds)); @@ -1603,7 +1604,7 @@ dbuf_clear(dmu_buf_impl_t *db) } if (db->db_buf) - dbuf_gone = arc_buf_evict(db->db_buf); + dbuf_gone = arc_clear_callback(db->db_buf); if (!dbuf_gone) mutex_exit(&db->db_mtx); @@ -1771,8 +1772,7 @@ dbuf_create(dnode_t *dn, uint8_t level, static int dbuf_do_evict(void *private) { - arc_buf_t *buf = private; - dmu_buf_impl_t *db = buf->b_private; + dmu_buf_impl_t *db = private; if (!MUTEX_HELD(&db->db_mtx)) mutex_enter(&db->db_mtx); @@ -2135,11 +2135,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, * block on-disk. If so, then we simply evict * ourselves. */ - if (!DBUF_IS_CACHEABLE(db) || - arc_buf_eviction_needed(db->db_buf)) + if (!DBUF_IS_CACHEABLE(db)) { + if (db->db_blkptr != NULL && + !BP_IS_HOLE(db->db_blkptr) && + !BP_IS_EMBEDDED(db->db_blkptr)) { + spa_t *spa = + dmu_objset_spa(db->db_objset); + blkptr_t bp = *db->db_blkptr; + dbuf_clear(db); + arc_freed(spa, &bp); + } else { + dbuf_clear(db); + } + } else if (arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); - else + } else { mutex_exit(&db->db_mtx); + } } } else { mutex_exit(&db->db_mtx); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Fri Jul 18 21:29:59 2014 (r268857) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Fri Jul 18 22:04:21 2014 (r268858) @@ -95,7 +95,6 @@ boolean_t arc_buf_remove_ref(arc_buf_t * int arc_buf_size(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); -int arc_has_callback(arc_buf_t *buf); void arc_buf_freeze(arc_buf_t *buf); void arc_buf_thaw(arc_buf_t *buf); boolean_t arc_buf_eviction_needed(arc_buf_t *buf); @@ -114,7 +113,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, void arc_freed(spa_t *spa, const blkptr_t *bp); void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv); -int arc_buf_evict(arc_buf_t *buf); +boolean_t arc_clear_callback(arc_buf_t *buf); void arc_flush(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201407182204.s6IM4Mex017576>