From owner-svn-src-all@freebsd.org Fri Jul 24 17:34:06 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 0D80236D479; Fri, 24 Jul 2020 17:34:06 +0000 (UTC) (envelope-from cem@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4BCxC16dmTz3bs2; Fri, 24 Jul 2020 17:34:05 +0000 (UTC) (envelope-from cem@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id C8473DEDA; Fri, 24 Jul 2020 17:34:05 +0000 (UTC) (envelope-from cem@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 06OHY5Zu080450; Fri, 24 Jul 2020 17:34:05 GMT (envelope-from cem@FreeBSD.org) Received: (from cem@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 06OHY53J080448; Fri, 24 Jul 2020 17:34:05 GMT (envelope-from cem@FreeBSD.org) Message-Id: <202007241734.06OHY53J080448@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: cem set sender to cem@FreeBSD.org using -f From: Conrad Meyer Date: Fri, 24 Jul 2020 17:34:05 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r363482 - in head/sys: kern sys X-SVN-Group: head X-SVN-Commit-Author: cem X-SVN-Commit-Paths: in head/sys: kern sys X-SVN-Commit-Revision: 363482 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 24 Jul 2020 17:34:06 -0000 Author: cem Date: Fri Jul 24 17:34:04 2020 New Revision: 363482 URL: https://svnweb.freebsd.org/changeset/base/363482 Log: Add unlocked/SMR fast path to getblk() Convert the bufobj tries to an SMR zone/PCTRIE and add a gbincore_unlocked() API wrapping this functionality. Use it for a fast path in getblkx(), falling back to locked lookup if we raced a thread changing the buf's identity. Reported by: Attilio Reviewed by: kib, markj Testing: pho (in progress) Sponsored by: Isilon Differential Revision: https://reviews.freebsd.org/D25782 Modified: head/sys/kern/vfs_bio.c head/sys/kern/vfs_subr.c head/sys/sys/buf.h Modified: head/sys/kern/vfs_bio.c ============================================================================== --- head/sys/kern/vfs_bio.c Fri Jul 24 17:32:10 2020 (r363481) +++ head/sys/kern/vfs_bio.c Fri Jul 24 17:34:04 2020 (r363482) @@ -3849,7 +3849,7 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn struct buf *bp; struct bufobj *bo; daddr_t d_blkno; - int bsize, error, maxsize, vmio; + int bsize, error, maxsize, vmio, lockflags; off_t offset; CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size); @@ -3864,11 +3864,33 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn bo = &vp->v_bufobj; d_blkno = dblkno; + + /* Attempt lockless lookup first. */ + bp = gbincore_unlocked(bo, blkno); + if (bp == NULL) + goto newbuf_unlocked; + + lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | + ((flags & GB_LOCK_NOWAIT) ? LK_NOWAIT : 0); + + error = BUF_TIMELOCK(bp, lockflags, NULL, "getblku", slpflag, + slptimeo); + if (error == EINTR || error == ERESTART) + return (error); + else if (error != 0) + goto loop; + + /* Verify buf identify has not changed since lookup. */ + if (bp->b_bufobj == bo && bp->b_lblkno == blkno) + goto foundbuf_fastpath; + + /* It changed, fallback to locked lookup. */ + BUF_UNLOCK_RAW(bp); + loop: BO_RLOCK(bo); bp = gbincore(bo, blkno); if (bp != NULL) { - int lockflags; /* * Buffer is in-core. If the buffer is not busy nor managed, * it must be on a queue. @@ -3890,8 +3912,10 @@ loop: /* We timed out or were interrupted. */ else if (error != 0) return (error); + +foundbuf_fastpath: /* If recursed, assume caller knows the rules. */ - else if (BUF_LOCKRECURSED(bp)) + if (BUF_LOCKRECURSED(bp)) goto end; /* @@ -3989,6 +4013,7 @@ loop: * buffer is also considered valid (not marked B_INVAL). */ BO_RUNLOCK(bo); +newbuf_unlocked: /* * If the user does not want us to create the buffer, bail out * here. Modified: head/sys/kern/vfs_subr.c ============================================================================== --- head/sys/kern/vfs_subr.c Fri Jul 24 17:32:10 2020 (r363481) +++ head/sys/kern/vfs_subr.c Fri Jul 24 17:34:04 2020 (r363482) @@ -234,6 +234,7 @@ static struct mtx __exclusive_cache_line vnode_list_mt struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; +static smr_t buf_trie_smr; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; @@ -491,17 +492,16 @@ static int vnsz2log; static void * buf_trie_alloc(struct pctrie *ptree) { - - return uma_zalloc(buf_trie_zone, M_NOWAIT); + return (uma_zalloc_smr(buf_trie_zone, M_NOWAIT)); } static void buf_trie_free(struct pctrie *ptree, void *node) { - - uma_zfree(buf_trie_zone, node); + uma_zfree_smr(buf_trie_zone, node); } -PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); +PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free, + buf_trie_smr); /* * Initialize the vnode management data structures. @@ -675,7 +675,8 @@ vntblinit(void *dummy __unused) */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, - UMA_ZONE_NOFREE); + UMA_ZONE_NOFREE | UMA_ZONE_SMR); + buf_trie_smr = uma_zone_get_smr(buf_trie_zone); uma_prealloc(buf_trie_zone, nbuf); vnodes_created = counter_u64_alloc(M_WAITOK); @@ -2330,7 +2331,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno) bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); - return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); + return (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno)); +} + +/* + * Look up a buf using the buffer tries, without the bufobj lock. This relies + * on SMR for safe lookup, and bufs being in a no-free zone to provide type + * stability of the result. Like other lockless lookups, the found buf may + * already be invalid by the time this function returns. + */ +struct buf * +gbincore_unlocked(struct bufobj *bo, daddr_t lblkno) +{ + struct buf *bp; + + ASSERT_BO_UNLOCKED(bo); + bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno); + if (bp != NULL) + return (bp); + return (BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno)); } /* Modified: head/sys/sys/buf.h ============================================================================== --- head/sys/sys/buf.h Fri Jul 24 17:32:10 2020 (r363481) +++ head/sys/sys/buf.h Fri Jul 24 17:34:04 2020 (r363482) @@ -326,6 +326,9 @@ extern const char *buf_wmesg; /* Default buffer lock KASSERT(((bp)->b_flags & B_REMFREE) == 0, \ ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp))); \ \ + BUF_UNLOCK_RAW((bp)); \ +} while (0) +#define BUF_UNLOCK_RAW(bp) do { \ (void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL, \ LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \ LOCK_FILE, LOCK_LINE); \ @@ -547,6 +550,7 @@ void vfs_busy_pages_acquire(struct buf *bp); void vfs_busy_pages_release(struct buf *bp); struct buf *incore(struct bufobj *, daddr_t); struct buf *gbincore(struct bufobj *, daddr_t); +struct buf *gbincore_unlocked(struct bufobj *, daddr_t); struct buf *getblk(struct vnode *, daddr_t, int, int, int, int); int getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, int flags, struct buf **bpp);