Date: Mon, 14 Dec 2009 03:52:21 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r200501 - in user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <200912140352.nBE3qLw1007017@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Mon Dec 14 03:52:21 2009 New Revision: 200501 URL: http://svn.freebsd.org/changeset/base/200501 Log: - reduce changes to arc.c to including zfs_bio.h and not recycling buffers whose pages are cacheable - streamline zfs_bio.c interfaces so that it can be a drop in backend for zio buf allocation - prefix original zio_buf functions with '_' so that all callers to the zio_buf function are now routed through the page cache logic if page caching is not disabled - change zbio_ functions to zio_ except where a naming conflict could occur in which case they're renamed to zfs_bio - add zio_cache_validate to zio_done to mark pages as valid on read completion - move conditional logic for call to zio_cache_sync and zio_cache_validate in to inline function to minimize churn in core ZFS code Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Mon Dec 14 02:50:04 2009 (r200500) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Mon Dec 14 03:52:21 2009 (r200501) @@ -122,7 +122,6 @@ #include <sys/zio_checksum.h> #include <sys/zfs_context.h> #include <sys/arc.h> -#include <sys/zfs_bio.h> #include <sys/refcount.h> #include <sys/vdev.h> #ifdef _KERNEL @@ -446,32 +445,28 @@ struct arc_write_callback { arc_buf_t *awcb_buf; }; -/* - * Keep initial ordering in-sync with zbio_buf_hdr - */ struct arc_buf_hdr { /* protected by hash lock */ dva_t b_dva; uint64_t b_birth; - uint32_t b_flags; - uint32_t b_datacnt; - - /* immutable */ - arc_buf_contents_t b_type; - uint64_t b_size; - spa_t *b_spa; + uint64_t b_cksum0; - /* protected by hash lock */ kmutex_t b_freeze_lock; zio_cksum_t *b_freeze_cksum; arc_buf_hdr_t *b_hash_next; arc_buf_t *b_buf; - uint64_t b_cksum0; + uint32_t b_flags; + uint32_t b_datacnt; arc_callback_t *b_acb; kcondvar_t b_cv; + /* immutable */ + arc_buf_contents_t b_type; + uint64_t b_size; + spa_t *b_spa; + /* protected by arc state mutex */ arc_state_t *b_state; list_node_t b_arc_node; @@ -637,14 +632,12 @@ struct l2arc_buf_hdr { typedef struct l2arc_data_free { /* protected by l2arc_free_on_write_mtx */ - arc_buf_t *l2df_buf; + void *l2df_data; size_t l2df_size; - void (*l2df_func)(arc_buf_t *, size_t); + void (*l2df_func)(void *, size_t); list_node_t l2df_list_node; } l2arc_data_free_t; -extern int zfs_page_cache_disable; - static kmutex_t l2arc_feed_thr_lock; static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; @@ -1241,7 +1234,6 @@ arc_buf_clone(arc_buf_t *from) buf->b_private = NULL; buf->b_next = hdr->b_buf; hdr->b_buf = buf; - hdr->b_flags |= ZBIO_BUF_CLONING; arc_get_data_buf(buf); bcopy(from->b_data, buf->b_data, size); hdr->b_datacnt += 1; @@ -1285,13 +1277,13 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta * the buffer is placed on l2arc_free_on_write to be freed later. */ static void -arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, size_t), - arc_buf_t *buf, size_t size) +arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t), + void *data, size_t size) { if (HDR_L2_WRITING(hdr)) { l2arc_data_free_t *df; df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP); - df->l2df_buf = buf; + df->l2df_data = data; df->l2df_size = size; df->l2df_func = free_func; mutex_enter(&l2arc_free_on_write_mtx); @@ -1299,7 +1291,7 @@ arc_buf_data_free(arc_buf_hdr_t *hdr, vo mutex_exit(&l2arc_free_on_write_mtx); ARCSTAT_BUMP(arcstat_l2_free_on_write); } else { - free_func(buf, size); + free_func(data, size); } } @@ -1317,13 +1309,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_ arc_cksum_verify(buf); if (!recycle) { if (type == ARC_BUFC_METADATA) { - arc_buf_data_free(buf->b_hdr, zbio_relse, - buf, size); + arc_buf_data_free(buf->b_hdr, zio_buf_free, + buf->b_data, size); arc_space_return(size); } else { ASSERT(type == ARC_BUFC_DATA); arc_buf_data_free(buf->b_hdr, - zbio_relse, buf, size); + zio_data_buf_free, buf->b_data, size); atomic_add_64(&arc_size, -size); } } @@ -1541,15 +1533,8 @@ arc_evict(arc_state_t *state, spa_t *spa ASSERT(state == arc_mru || state == arc_mfu); evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; + recycle = (bytes & PAGE_MASK) ? recycle : FALSE; -#ifdef _KERNEL - /* - * don't recycle page cache bufs - * - */ - if (recycle && ((bytes & PAGE_MASK) != 0) && !zfs_page_cache_disable) - recycle = FALSE; -#endif if (type == ARC_BUFC_METADATA) { offset = 0; list_count = ARC_BUFC_NUMMETADATALISTS; @@ -1973,6 +1958,7 @@ arc_reclaim_needed(void) if (arc_size <= arc_c_min) return (0); +#if 0 /* * If pages are needed or we're within 2048 pages * of needing to page need to reclaim @@ -1980,7 +1966,7 @@ arc_reclaim_needed(void) if (vm_pages_needed || (vm_paging_target() > -2048)) return (1); -#if 0 + /* * take 'desfree' extra pages, so we reclaim sooner, rather than later */ @@ -2284,11 +2270,11 @@ arc_get_data_buf(arc_buf_t *buf) */ if (!arc_evict_needed(type)) { if (type == ARC_BUFC_METADATA) { - zbio_getblk(buf); + buf->b_data = zio_buf_alloc(size); arc_space_consume(size); } else { ASSERT(type == ARC_BUFC_DATA); - zbio_data_getblk(buf); + buf->b_data = zio_data_buf_alloc(size); atomic_add_64(&arc_size, size); } goto out; @@ -2315,11 +2301,11 @@ arc_get_data_buf(arc_buf_t *buf) } if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) { if (type == ARC_BUFC_METADATA) { - zbio_getblk(buf); + buf->b_data = zio_buf_alloc(size); arc_space_consume(size); } else { ASSERT(type == ARC_BUFC_DATA); - zbio_data_getblk(buf); + buf->b_data = zio_data_buf_alloc(size); atomic_add_64(&arc_size, size); } if (size & PAGE_MASK) @@ -4006,9 +3992,9 @@ l2arc_do_free_on_write() for (df = list_tail(buflist); df; df = df_prev) { df_prev = list_prev(buflist, df); - ASSERT(df->l2df_buf != NULL); + ASSERT(df->l2df_data != NULL); ASSERT(df->l2df_func != NULL); - df->l2df_func(df->l2df_buf, df->l2df_size); + df->l2df_func(df->l2df_data, df->l2df_size); list_remove(buflist, df); kmem_free(df, sizeof (l2arc_data_free_t)); } Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Mon Dec 14 02:50:04 2009 (r200500) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Mon Dec 14 03:52:21 2009 (r200501) @@ -52,9 +52,6 @@ struct arc_buf { void *b_data; arc_evict_func_t *b_efunc; void *b_private; -#ifdef _KERNEL - struct buf *b_bp; -#endif }; typedef enum arc_buf_contents { Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h Mon Dec 14 02:50:04 2009 (r200500) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h Mon Dec 14 03:52:21 2009 (r200501) @@ -31,30 +31,43 @@ $FreeBSD$ #ifndef _SYS_ZFS_BIO_H #define _SYS_ZFS_BIO_H +#include <sys/vdev_impl.h> /* vd->vdev_vnode */ +#include <sys/zfs_context.h> -#define ZBIO_BUF_CLONING (1 << 30) /* is being cloned */ +extern int zfs_page_cache_disable; -int zbio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, void *data, uint64_t size, int bio_op); -void zbio_getblk(arc_buf_t *buf); -void zbio_data_getblk(arc_buf_t *buf); -void zbio_relse(arc_buf_t *buf, size_t size); - -typedef struct zbio_buf_hdr zbio_buf_hdr_t; -struct zbio_buf_hdr { - /* protected by hash lock */ - dva_t b_dva; - uint64_t b_birth; - uint32_t b_flags; - uint32_t b_datacnt; - - /* immutable */ - arc_buf_contents_t b_type; - uint64_t b_size; - spa_t *b_spa; -}; +void _zio_cache_valid(void *data, uint64_t size); +int _zio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, void *data, + uint64_t size, zio_type_t type); + +static __inline int +zio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, void *data, + uint64_t size, zio_type_t type, vdev_t *vd) +{ + int io_bypass = 0; + + if (!zfs_page_cache_disable && + ((vd != NULL) && (vd->vdev_vnode != NULL)) && + ((type == ZIO_TYPE_WRITE) || (type == ZIO_TYPE_READ))) + io_bypass = _zio_sync_cache(spa, bp, txg, data, size, type); + + return (io_bypass); +} + +static __inline void +zio_cache_valid(void *data, uint64_t size, zio_type_t type, vdev_t *vd) +{ + + if ((vd != NULL) && (type == ZIO_TYPE_READ) && + (vd->vdev_vnode != NULL) && (size & PAGE_MASK) == 0) + _zio_cache_valid(data, size); +} + +void *zio_getblk(uint64_t size, int flags); +void zio_relse(void *data, size_t size); #ifdef _KERNEL -void zbio_init(void); -void zbio_fini(void); +void zfs_bio_init(void); +void zfs_bio_fini(void); #endif #endif Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Mon Dec 14 02:50:04 2009 (r200500) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Mon Dec 14 03:52:21 2009 (r200501) @@ -371,6 +371,11 @@ extern void zio_buf_free(void *buf, size extern void *zio_data_buf_alloc(size_t size); extern void zio_data_buf_free(void *buf, size_t size); +extern void *_zio_buf_alloc(size_t size); +extern void _zio_buf_free(void *buf, size_t size); +extern void *_zio_data_buf_alloc(size_t size); +extern void _zio_data_buf_free(void *buf, size_t size); + extern void zio_resubmit_stage_async(void *); extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Mon Dec 14 02:50:04 2009 (r200500) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Mon Dec 14 03:52:21 2009 (r200501) @@ -29,7 +29,7 @@ POSSIBILITY OF SUCH DAMAGE. /************************************************************************** This module integrates the caching af pages associated with ARC buffers in a -per-SPA vm object. Each SPA also has an associated "zbio_state_t" which +per-SPA vm object. Each SPA also has an associated "zio_state_t" which tracks bufs allocated for the SPA in two splay trees. The first splay tree tracks bufs by the data pointer's virtual address. @@ -86,16 +86,14 @@ __FBSDID("$FreeBSD$"); #include <sys/zio_checksum.h> #include <sys/zfs_context.h> #include <sys/arc.h> -#include <sys/zfs_bio.h> #include <sys/refcount.h> #include <sys/vdev.h> #include <sys/callb.h> #include <sys/kstat.h> #include <sys/sdt.h> - #include <sys/sf_buf.h> -#include <vm/vm_pageout.h> +#include <sys/zfs_bio.h> #ifdef _KERNEL @@ -110,16 +108,15 @@ TUNABLE_INT("vfs.zfs.page_cache_disable" SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN, &zfs_page_cache_disable, 0, "Disable backing ARC with page cache "); -static eventhandler_tag zbio_event_shutdown = NULL; -struct zbio_state; -typedef struct zbio_state zbio_state_t; +static eventhandler_tag zfs_bio_event_shutdown = NULL; +struct zio_state; +typedef struct zio_state zio_state_t; typedef struct buf buf_t; -typedef uint64_t zbio_pindex_t; MALLOC_DEFINE(M_ZFS_BIO, "zfs_bio", "zfs buffer cache / vm"); #define B_EVICTED B_00000800 -#define B_CLONED B_00001000 +#define B_DATA B_00001000 #define B_ASSIGNED B_00004000 #define ZB_EVICT_ALL 0x1 @@ -133,10 +130,9 @@ MALLOC_DEFINE(M_ZFS_BIO, "zfs_bio", "zfs #define btos(nbytes) ((nbytes)>>DEV_BSHIFT) #define stob(nsectors) ((nsectors)<<DEV_BSHIFT) -#define b_arc_buf b_fsprivate2 #define b_state b_fsprivate3 -struct zbio_state { +struct zio_state { struct mtx mtx; buf_t *blkno_root; /* track buf by blkno */ buf_t *va_root; /* track buf by data address */ @@ -147,19 +143,21 @@ struct zbio_state { TAILQ_HEAD(, buf) va_memq; /* list of resident buffers */ }; -#define ZBIO_STATE_LOCK(zs) mtx_lock(&(zs)->mtx) -#define ZBIO_STATE_UNLOCK(zs) mtx_unlock(&(zs)->mtx) +static zio_state_t global_state; -#define spa_get_bio_state(spa) ((zbio_state_t *)spa_get_vnode((spa))->v_data) +#define ZIO_STATE_LOCK(zs) mtx_lock(&(zs)->mtx) +#define ZIO_STATE_UNLOCK(zs) mtx_unlock(&(zs)->mtx) + +#define spa_get_zio_state(spa) ((zio_state_t *)spa_get_vnode((spa))->v_data) #define spa_get_vm_object(spa) spa_get_vnode((spa))->v_object -#define zbio_buf_get_spa(bp) (((zbio_buf_hdr_t *)((arc_buf_t *)(bp->b_arc_buf))->b_hdr)->b_spa) -#define zbio_buf_get_vm_object(bp) spa_get_vm_object(zbio_buf_get_spa((bp))) +#define zio_buf_get_spa(bp) (((zio_state_t *)bp->b_state)->spa) +#define zio_buf_get_vm_object(bp) spa_get_vm_object(zio_buf_get_spa((bp))) -static void zbio_buf_blkno_remove(buf_t *bp); -static void zbio_buf_va_insert(buf_t *bp, zbio_state_t *object); +static void zio_buf_blkno_remove(buf_t *bp); +static void zio_buf_va_insert(buf_t *bp); /* - * zbio_buf_blkno_splay: [ internal use only ] + * zio_buf_blkno_splay: [ internal use only ] * * Implements Sleator and Tarjan's top-down splay algorithm. Returns * the buf containing the given lblkno. If, however, that @@ -167,7 +165,7 @@ static void zbio_buf_va_insert(buf_t *bp * adjacent to the pindex, coming before or after it. */ static buf_t * -zbio_buf_blkno_splay(daddr_t blkno, buf_t *root) +zio_buf_blkno_splay(daddr_t blkno, buf_t *root) { buf_t dummy; buf_t *lefttreemax, *righttreemin, *y; @@ -216,7 +214,7 @@ zbio_buf_blkno_splay(daddr_t blkno, buf_ } static buf_t * -zbio_buf_va_splay(caddr_t va, buf_t *root) +zio_buf_va_splay(caddr_t va, buf_t *root) { buf_t dummy; buf_t *lefttreemax, *righttreemin, *y; @@ -265,7 +263,7 @@ zbio_buf_va_splay(caddr_t va, buf_t *roo } /* - * zbio_buf_blkno_insert: [ internal use only ] + * zio_buf_blkno_insert: [ internal use only ] * * Inserts the given buf into the state splay tree and state list. * @@ -273,7 +271,7 @@ zbio_buf_va_splay(caddr_t va, buf_t *roo * This routine may not block. */ static void -zbio_buf_blkno_insert(buf_t *bp, zbio_state_t *object) +zio_buf_blkno_insert(buf_t *bp, zio_state_t *object) { buf_t *root; daddr_t root_blkno_end, blkno, blkno_end; @@ -287,7 +285,7 @@ zbio_buf_blkno_insert(buf_t *bp, zbio_st bp->b_right = NULL; TAILQ_INSERT_TAIL(&object->blkno_memq, bp, b_bobufs); } else { - root = zbio_buf_blkno_splay(bp->b_blkno, root); + root = zio_buf_blkno_splay(bp->b_blkno, root); root_blkno_end = root->b_blkno + btos(root->b_bcount); if (blkno < root->b_blkno) { @@ -297,7 +295,7 @@ zbio_buf_blkno_insert(buf_t *bp, zbio_st root->b_left = NULL; TAILQ_INSERT_BEFORE(root, bp, b_bobufs); } else if (blkno == root->b_blkno) { - panic("zbio_buf_blkno_insert: blkno already allocated"); + panic("zio_buf_blkno_insert: blkno already allocated"); } else { KASSERT(root_blkno_end <= blkno, ("buffer overlap!")); @@ -317,7 +315,7 @@ zbio_buf_blkno_insert(buf_t *bp, zbio_st } /* - * zbio_buf_insert: [ internal use only ] + * zio_buf_insert: [ internal use only ] * * Inserts the given buf into the state splay tree and state list. * @@ -325,26 +323,26 @@ zbio_buf_blkno_insert(buf_t *bp, zbio_st * This routine may not block. */ static void -zbio_buf_va_insert(buf_t *bp, zbio_state_t *object) +zio_buf_va_insert(buf_t *bp) { buf_t *root; caddr_t va = bp->b_data; + zio_state_t *object = &global_state; - bp->b_state = object; root = object->va_root; if (root == NULL) { bp->b_left = NULL; bp->b_right = NULL; TAILQ_INSERT_TAIL(&object->va_memq, bp, b_bobufs); } else { - root = zbio_buf_va_splay(bp->b_data, root); + root = zio_buf_va_splay(bp->b_data, root); if (va < root->b_data) { bp->b_left = root->b_left; bp->b_right = root; root->b_left = NULL; TAILQ_INSERT_BEFORE(root, bp, b_bobufs); } else if (va == root->b_data) { - panic("zbio_buf_va_insert: address already allocated"); + panic("zio_buf_va_insert: address already allocated"); } else { bp->b_right = root->b_right; bp->b_left = root; @@ -362,7 +360,7 @@ zbio_buf_va_insert(buf_t *bp, zbio_state } /* - * zbio_buf_remove: + * zio_buf_remove: * * Removes the given buf from the spa's state tree * buf list @@ -371,9 +369,9 @@ zbio_buf_va_insert(buf_t *bp, zbio_state * This routine may not block. */ static void -zbio_buf_blkno_remove(buf_t *bp) +zio_buf_blkno_remove(buf_t *bp) { - zbio_state_t *state; + zio_state_t *state; buf_t *root; daddr_t blkno, blkno_end; @@ -384,11 +382,11 @@ zbio_buf_blkno_remove(buf_t *bp) * Now remove from the object's list of backed pages. */ if (bp != state->blkno_root) - zbio_buf_blkno_splay(bp->b_blkno, state->blkno_root); + zio_buf_blkno_splay(bp->b_blkno, state->blkno_root); if (bp->b_left == NULL) root = bp->b_right; else { - root = zbio_buf_blkno_splay(bp->b_blkno, bp->b_left); + root = zio_buf_blkno_splay(bp->b_blkno, bp->b_left); root->b_right = bp->b_right; } state->blkno_root = root; @@ -402,7 +400,7 @@ zbio_buf_blkno_remove(buf_t *bp) } /* - * zbio_buf_va_remove: + * zio_buf_va_remove: * * Removes the given buf from the spa's state tree * buf list @@ -411,9 +409,9 @@ zbio_buf_blkno_remove(buf_t *bp) * This routine may not block. */ static void -zbio_buf_va_remove(buf_t *bp) +zio_buf_va_remove(buf_t *bp) { - zbio_state_t *state; + zio_state_t *state; buf_t *root; vm_offset_t va; @@ -424,11 +422,11 @@ zbio_buf_va_remove(buf_t *bp) * Now remove from the object's list of backed pages. */ if (bp != state->va_root) - zbio_buf_va_splay(bp->b_data, state->va_root); + zio_buf_va_splay(bp->b_data, state->va_root); if (bp->b_left == NULL) root = bp->b_right; else { - root = zbio_buf_va_splay(bp->b_data, bp->b_left); + root = zio_buf_va_splay(bp->b_data, bp->b_left); root->b_right = bp->b_right; } state->va_root = root; @@ -442,7 +440,7 @@ zbio_buf_va_remove(buf_t *bp) } /* - * zbio_buf_va_lookup: + * zio_buf_va_lookup: * * Returns the range associated with the object/offset * pair specified; if none is found, NULL is returned. @@ -452,13 +450,13 @@ zbio_buf_va_remove(buf_t *bp) * This is a critical path routine */ static buf_t * -zbio_buf_va_lookup(zbio_state_t *state, caddr_t va) +zio_buf_va_lookup(caddr_t va) { buf_t *bp; - if ((bp = state->va_root) != NULL && bp->b_data != va) { - bp = zbio_buf_va_splay(va, bp); - if ((state->va_root = bp)->b_data != va) + if ((bp = global_state.va_root) != NULL && bp->b_data != va) { + bp = zio_buf_va_splay(va, bp); + if ((global_state.va_root = bp)->b_data != va) bp = NULL; } return (bp); @@ -466,7 +464,7 @@ zbio_buf_va_lookup(zbio_state_t *state, /* - * zbio_buf_blkno_lookup: + * zio_buf_blkno_lookup: * * Returns the range associated with the object/offset * pair specified; if none is found, NULL is returned. @@ -476,12 +474,12 @@ zbio_buf_va_lookup(zbio_state_t *state, * This is a critical path routine */ static buf_t * -zbio_buf_blkno_lookup(zbio_state_t *state, daddr_t blkno) +zio_buf_blkno_lookup(zio_state_t *state, daddr_t blkno) { buf_t *bp; if ((bp = state->blkno_root) != NULL && bp->b_blkno != blkno) { - bp = zbio_buf_blkno_splay(blkno, bp); + bp = zio_buf_blkno_splay(blkno, bp); if ((state->blkno_root = bp)->b_blkno != blkno) bp = NULL; } @@ -489,7 +487,7 @@ zbio_buf_blkno_lookup(zbio_state_t *stat } static void -zbio_buf_vm_object_copy(buf_t *bp, int direction) +zio_buf_vm_object_copy(buf_t *bp, int direction) { vm_object_t object; vm_pindex_t start, end; @@ -501,7 +499,7 @@ zbio_buf_vm_object_copy(buf_t *bp, int d vm_page_t m; struct sf_buf *sf; - object = zbio_buf_get_vm_object(bp); + object = zio_buf_get_vm_object(bp); byte_offset = stob(bp->b_blkno); page_offset = byte_offset & PAGE_MASK; start = OFF_TO_IDX(byte_offset); @@ -542,26 +540,26 @@ done: } static void -zbio_buf_vm_object_copyout(buf_t *bp) +zio_buf_vm_object_copyout(buf_t *bp) { - zbio_buf_vm_object_copy(bp, ZB_COPYOUT); + zio_buf_vm_object_copy(bp, ZB_COPYOUT); } static void -zbio_buf_vm_object_copyin(buf_t *bp) +zio_buf_vm_object_copyin(buf_t *bp) { - zbio_buf_vm_object_copy(bp, ZB_COPYIN); + zio_buf_vm_object_copy(bp, ZB_COPYIN); } static void -zbio_buf_vm_object_evict(buf_t *bp) +zio_buf_vm_object_evict(buf_t *bp) { int i; vm_page_t m; - VM_OBJECT_LOCK_ASSERT(zbio_buf_get_vm_object(bp), MA_OWNED); + VM_OBJECT_LOCK_ASSERT(zio_buf_get_vm_object(bp), MA_OWNED); vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; @@ -580,7 +578,7 @@ zbio_buf_vm_object_evict(buf_t *bp) } static void -zbio_buf_vm_object_insert_locked(buf_t *bp, struct vnode *vp, +zio_buf_vm_object_insert_locked(buf_t *bp, struct vnode *vp, vm_object_t object, int valid) { vm_page_t m; @@ -606,19 +604,19 @@ zbio_buf_vm_object_insert_locked(buf_t * } static void -zbio_buf_vm_object_insert(buf_t *bp, int valid) +zio_buf_vm_object_insert(buf_t *bp, int valid) { - spa_t *spa = zbio_buf_get_spa(bp); + spa_t *spa = zio_buf_get_spa(bp); struct vnode *vp = spa_get_vnode(spa); vm_object_t object = vp->v_object; VM_OBJECT_LOCK(object); - zbio_buf_vm_object_insert_locked(bp, vp, object, valid); + zio_buf_vm_object_insert_locked(bp, vp, object, valid); VM_OBJECT_UNLOCK(object); } /* - * zbio_buf_evict_overlap: [ internal use only ] + * zio_buf_evict_overlap: [ internal use only ] * * Evict the pages of any buffers overlapping with this range * @@ -629,7 +627,7 @@ zbio_buf_vm_object_insert(buf_t *bp, int * This routine may not block. */ static void -zbio_buf_evict_overlap_locked(daddr_t blkno, int size, zbio_state_t *state, +zio_buf_evict_overlap_locked(daddr_t blkno, int size, zio_state_t *state, uint64_t txg, int evict_op, vm_object_t object) { buf_t *root, *tmpbp; @@ -643,7 +641,7 @@ zbio_buf_evict_overlap_locked(daddr_t bl goto done; collisions = 0; - root = zbio_buf_blkno_splay(blkno, root); + root = zio_buf_blkno_splay(blkno, root); TAILQ_INIT(&clh); if (blkno < root->b_blkno) tmpbp = TAILQ_PREV(root, cluster_list_head, b_bobufs); @@ -655,8 +653,8 @@ zbio_buf_evict_overlap_locked(daddr_t bl while (tmpbp != NULL && tmpbp->b_blkno < blkno_end) { tmpblkno = tmpbp->b_blkno; tmpblkno_end = tmpblkno + btos(tmpbp->b_bcount); - tmptxg = ((zbio_buf_hdr_t *)((arc_buf_t *)tmpbp->b_arc_buf)->b_hdr)->b_birth; - + tmptxg = tmpbp->b_birth; + if (((tmpblkno >= blkno) && (tmpblkno < blkno_end)) || (tmpblkno_end > blkno) && (tmpblkno_end <= blkno_end) && ((txg == NO_TXG) || (tmptxg < txg))) { @@ -668,7 +666,7 @@ zbio_buf_evict_overlap_locked(daddr_t bl while (!TAILQ_EMPTY(&clh)) { tmpbp = TAILQ_FIRST(&clh); TAILQ_REMOVE(&clh, tmpbp, b_freelist); - zbio_buf_vm_object_evict(tmpbp); + zio_buf_vm_object_evict(tmpbp); KASSERT(tmpbp->b_flags & B_EVICTED == 0, ("buffer has already been evicted")); @@ -677,8 +675,8 @@ zbio_buf_evict_overlap_locked(daddr_t bl /* * move buffer to the unmanaged tree */ - zbio_buf_blkno_remove(tmpbp); - zbio_buf_va_insert(tmpbp, state); + zio_buf_blkno_remove(tmpbp); + zio_buf_va_insert(tmpbp); } done: if (!(collisions == 1 && tmpbp->b_blkno == blkno && tmpbp->b_bcount == size) @@ -697,13 +695,13 @@ done: } static void -zbio_buf_evict_overlap(daddr_t blkno, int size, zbio_state_t *state, +zio_buf_evict_overlap(daddr_t blkno, int size, zio_state_t *state, uint64_t txg, int evict_op) { vm_object_t object = spa_get_vm_object(state->spa); VM_OBJECT_LOCK(object); - zbio_buf_evict_overlap_locked(blkno, size, state, txg, evict_op, object); + zio_buf_evict_overlap_locked(blkno, size, state, txg, evict_op, object); VM_OBJECT_UNLOCK(object); } @@ -742,100 +740,62 @@ D) !B_MALLOC / address is known */ static buf_t * -_zbio_getblk_malloc(zbio_buf_hdr_t *hdr, int flags) +_zio_getblk_malloc(uint64_t size, int flags) { - buf_t *newbp, *tmpbp; + buf_t *newbp; void *data; - daddr_t blkno; - uint64_t size = hdr->b_size; - uint64_t txg = hdr->b_birth; - zbio_state_t *state = spa_get_bio_state(hdr->b_spa); if (flags & GB_NODUMP) - data = zio_data_buf_alloc(size); + data = _zio_data_buf_alloc(size); else - data = zio_buf_alloc(size); + data = _zio_buf_alloc(size); newbp = malloc(sizeof(struct buf), M_ZFS_BIO, M_WAITOK|M_ZERO); newbp->b_data = data; newbp->b_flags = (B_MALLOC|B_INVAL); newbp->b_bcount = size; - - if (hdr->b_flags & ZBIO_BUF_CLONING) { - newbp->b_flags |= B_CLONED; - hdr->b_flags &= ~ZBIO_BUF_CLONING; - } - zbio_buf_va_insert(newbp, state); } static buf_t * -_zbio_getblk_vmio(zbio_buf_hdr_t *hdr, int flags) +_zio_getblk_vmio(uint64_t size, int flags) { buf_t *newbp; - uint64_t size = hdr->b_size; - spa_t *spa = hdr->b_spa; - zbio_state_t *state = spa_get_bio_state(spa); newbp = geteblk(size, flags); - zbio_buf_va_insert(newbp, state); BUF_KERNPROC(newbp); return (newbp); } -static void -_zbio_getblk(arc_buf_t *buf, int flags) +void * +zio_getblk(uint64_t size, int flags) { - zbio_buf_hdr_t *hdr = (zbio_buf_hdr_t *)buf->b_hdr; - uint64_t size = hdr->b_size; - buf_t *newbp; - - if (zfs_page_cache_disable) { - buf->b_data = zio_buf_alloc(size); - hdr->b_flags &= ~ZBIO_BUF_CLONING; - return; - } + buf_t *newbp; - if ((size & PAGE_MASK) || (hdr->b_flags & ZBIO_BUF_CLONING)) - newbp = _zbio_getblk_malloc(hdr, flags); + if (size & PAGE_MASK) + newbp = _zio_getblk_malloc(size, flags); else - newbp = _zbio_getblk_vmio(hdr, flags); - - buf->b_bp = newbp; - buf->b_data = newbp->b_data; - newbp->b_arc_buf = buf; -} - -void -zbio_getblk(arc_buf_t *buf) -{ - - _zbio_getblk(buf, 0); -} + newbp = _zio_getblk_vmio(size, flags); -void -zbio_data_getblk(arc_buf_t *buf) -{ - - _zbio_getblk(buf, GB_NODUMP); + zio_buf_va_insert(newbp); + return (newbp->b_data); } void -zbio_relse(arc_buf_t *buf, size_t size) +zio_relse(void *data, size_t size) { - struct buf *bp = buf->b_bp; + buf_t *bp; - if (zfs_page_cache_disable) { - zio_buf_free(buf->b_data, size); - return; - } + bp = zio_buf_va_lookup(data); + zio_buf_va_remove(bp); if (bp->b_flags & B_ASSIGNED) - zbio_buf_blkno_remove(bp); - else - zbio_buf_va_remove(bp); + zio_buf_blkno_remove(bp); if (bp->b_flags & B_MALLOC) { - zio_buf_free(bp->b_data, size); + if (bp->b_flags & B_DATA) + _zio_data_buf_free(bp->b_data, size); + else + _zio_buf_free(bp->b_data, size); free(bp, M_ZFS_BIO); } else { CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X" @@ -848,11 +808,11 @@ zbio_relse(arc_buf_t *buf, size_t size) } int -zbio_sync_cache(spa_t *spa, blkptr_t *blkp, uint64_t txg, void *data, - uint64_t size, int bio_op) +_zio_sync_cache(spa_t *spa, blkptr_t *blkp, uint64_t txg, void *data, + uint64_t size, zio_type_t bio_op) { buf_t *bp; - zbio_state_t *state = spa_get_bio_state(spa); + zio_state_t *state = spa_get_zio_state(spa); dva_t dva = *BP_IDENTITY(blkp); daddr_t blkno = dva.dva_word[1] & ~(1ULL<<63); struct vnode *vp = spa_get_vnode(spa); @@ -861,47 +821,44 @@ zbio_sync_cache(spa_t *spa, blkptr_t *bl vm_page_t m; int i, io_bypass = FALSE; - if (zfs_page_cache_disable) - return (FALSE); - /* * XXX incomplete */ - if ((bp = zbio_buf_va_lookup(state, data)) != NULL) { - KASSERT(bp->b_flags & (B_CLONED|B_EVICTED) == 0, + if ((bp = zio_buf_va_lookup(data)) != NULL) { + KASSERT(bp->b_flags & B_EVICTED == 0, ("doing I/O with cloned or evicted buffer 0x%x", bp->b_flags)); if (bp->b_flags & B_MALLOC) { - zbio_buf_evict_overlap(blkno, size, state, txg, ZB_EVICT_BUFFERED); + zio_buf_evict_overlap(blkno, size, state, txg, ZB_EVICT_BUFFERED); if (bio_op == BIO_READ) { /* * if page resident - copy in * update zio pipeline */ - zbio_buf_vm_object_copyin(bp); + zio_buf_vm_object_copyin(bp); if (bp->b_flags & B_CACHE) { /* update zio pipeline */ io_bypass = TRUE; } } else { - zbio_buf_vm_object_copyout(bp); + zio_buf_vm_object_copyout(bp); } } else { - zbio_buf_va_remove(bp); + zio_buf_va_remove(bp); VM_OBJECT_LOCK(object); - zbio_buf_evict_overlap_locked(blkno, size, state, NO_TXG, + zio_buf_evict_overlap_locked(blkno, size, state, NO_TXG, ZB_EVICT_ALL, object); bp->b_blkno = bp->b_lblkno = blkno; bp->b_flags |= (B_VMIO|B_ASSIGNED); - zbio_buf_blkno_insert(bp, state); - zbio_buf_vm_object_insert_locked(bp, vp, object, bio_op == BIO_WRITE); + zio_buf_blkno_insert(bp, state); + zio_buf_vm_object_insert_locked(bp, vp, object, bio_op == BIO_WRITE); VM_OBJECT_UNLOCK(object); } } else { - bp = zbio_buf_blkno_lookup(state, blkno); + bp = zio_buf_blkno_lookup(state, blkno); if (bio_op == BIO_READ && (bp->b_flags & (B_CACHE|B_INVAL)) == B_CACHE) io_bypass = TRUE; KASSERT(bp != NULL, ("blkno=%ld data=%p unmanaged", blkno, bp->b_data)); @@ -910,8 +867,15 @@ zbio_sync_cache(spa_t *spa, blkptr_t *bl return (io_bypass); } +void +_zio_cache_valid(void *data, uint64_t size) +{ + + +} + static void -zbio_shutdown(void *arg __unused, int howto __unused) +zfs_bio_shutdown(void *arg __unused, int howto __unused) { struct mount *mp, *tmpmp; int error; @@ -949,55 +913,48 @@ zbio_shutdown(void *arg __unused, int ho } void -zbio_init(void) +zfs_bio_init(void) { if (zfs_page_cache_disable) return; - zbio_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync, - zbio_shutdown, NULL, EVENTHANDLER_PRI_FIRST); + zfs_bio_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync, + zfs_bio_shutdown, NULL, EVENTHANDLER_PRI_FIRST); } void -zbio_fini(void) +zfs_bio_fini(void) { - if (zbio_event_shutdown != NULL) - EVENTHANDLER_DEREGISTER(shutdown_pre_sync, zbio_event_shutdown); + if (zfs_bio_event_shutdown != NULL) + EVENTHANDLER_DEREGISTER(shutdown_pre_sync, zfs_bio_event_shutdown); } #else /* !_KERNEL */ -void -zbio_getblk(arc_buf_t *buf) +void * +zio_getblk(uint64_t size) { - zbio_buf_hdr_t *hdr = (zbio_buf_hdr_t *)buf->b_hdr; - uint64_t size = hdr->b_size; - - buf->b_data = zio_buf_alloc(size); - hdr->b_flags &= ~ZBIO_BUF_CLONING; + return (zio_buf_alloc(size)); } void -zbio_data_getblk(arc_buf_t *buf) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200912140352.nBE3qLw1007017>