Date: Tue, 28 Jul 2020 21:41:52 +0300 From: Konstantin Belousov <kostikbel@gmail.com> To: Conrad Meyer <cem@freebsd.org> Cc: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r363482 - in head/sys: kern sys Message-ID: <20200728184152.GH2551@kib.kiev.ua> In-Reply-To: <202007241734.06OHY53J080448@repo.freebsd.org> References: <202007241734.06OHY53J080448@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, Jul 24, 2020 at 05:34:05PM +0000, Conrad Meyer wrote: > Author: cem > Date: Fri Jul 24 17:34:04 2020 > New Revision: 363482 > URL: https://svnweb.freebsd.org/changeset/base/363482 > > Log: > Add unlocked/SMR fast path to getblk() > > Convert the bufobj tries to an SMR zone/PCTRIE and add a gbincore_unlocked() > API wrapping this functionality. Use it for a fast path in getblkx(), > falling back to locked lookup if we raced a thread changing the buf's > identity. > > Reported by: Attilio > Reviewed by: kib, markj > Testing: pho (in progress) > Sponsored by: Isilon > Differential Revision: https://reviews.freebsd.org/D25782 > > Modified: > head/sys/kern/vfs_bio.c > head/sys/kern/vfs_subr.c > head/sys/sys/buf.h > > Modified: head/sys/kern/vfs_bio.c > ============================================================================== > --- head/sys/kern/vfs_bio.c Fri Jul 24 17:32:10 2020 (r363481) > +++ head/sys/kern/vfs_bio.c Fri Jul 24 17:34:04 2020 (r363482) > @@ -3849,7 +3849,7 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn > struct buf *bp; > struct bufobj *bo; > daddr_t d_blkno; > - int bsize, error, maxsize, vmio; > + int bsize, error, maxsize, vmio, lockflags; > off_t offset; > > CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size); > @@ -3864,11 +3864,33 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkn > > bo = &vp->v_bufobj; > d_blkno = dblkno; > + > + /* Attempt lockless lookup first. */ > + bp = gbincore_unlocked(bo, blkno); > + if (bp == NULL) > + goto newbuf_unlocked; > + > + lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | > + ((flags & GB_LOCK_NOWAIT) ? LK_NOWAIT : 0); > + > + error = BUF_TIMELOCK(bp, lockflags, NULL, "getblku", slpflag, > + slptimeo); I realized that this is not safe. There is an ordering between buffer types that defines which order buffer locks should obey. For instance, on UFS the critical order is inode buffer -> snaplk -> cg buffer, or data block -> indirect data block. Since buffer identity can change under us, we might end up waiting for a lock of type that is incompatible with the currently owned lock. I think the easiest fix is to use LK_NOWAIT always, after all it is lockless path. ERESTART/EINTR checks below than can be removed. > + if (error == EINTR || error == ERESTART) > + return (error); > + else if (error != 0) > + goto loop; > + > + /* Verify buf identify has not changed since lookup. */ > + if (bp->b_bufobj == bo && bp->b_lblkno == blkno) > + goto foundbuf_fastpath; > + > + /* It changed, fallback to locked lookup. */ > + BUF_UNLOCK_RAW(bp); > + > loop: > BO_RLOCK(bo); > bp = gbincore(bo, blkno); > if (bp != NULL) { > - int lockflags; > /* > * Buffer is in-core. If the buffer is not busy nor managed, > * it must be on a queue. > @@ -3890,8 +3912,10 @@ loop: > /* We timed out or were interrupted. */ > else if (error != 0) > return (error); > + > +foundbuf_fastpath: > /* If recursed, assume caller knows the rules. */ > - else if (BUF_LOCKRECURSED(bp)) > + if (BUF_LOCKRECURSED(bp)) > goto end; > > /* > @@ -3989,6 +4013,7 @@ loop: > * buffer is also considered valid (not marked B_INVAL). > */ > BO_RUNLOCK(bo); > +newbuf_unlocked: > /* > * If the user does not want us to create the buffer, bail out > * here. > > Modified: head/sys/kern/vfs_subr.c > ============================================================================== > --- head/sys/kern/vfs_subr.c Fri Jul 24 17:32:10 2020 (r363481) > +++ head/sys/kern/vfs_subr.c Fri Jul 24 17:34:04 2020 (r363482) > @@ -234,6 +234,7 @@ static struct mtx __exclusive_cache_line vnode_list_mt > struct nfs_public nfs_pub; > > static uma_zone_t buf_trie_zone; > +static smr_t buf_trie_smr; > > /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ > static uma_zone_t vnode_zone; > @@ -491,17 +492,16 @@ static int vnsz2log; > static void * > buf_trie_alloc(struct pctrie *ptree) > { > - > - return uma_zalloc(buf_trie_zone, M_NOWAIT); > + return (uma_zalloc_smr(buf_trie_zone, M_NOWAIT)); > } > > static void > buf_trie_free(struct pctrie *ptree, void *node) > { > - > - uma_zfree(buf_trie_zone, node); > + uma_zfree_smr(buf_trie_zone, node); > } > -PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); > +PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free, > + buf_trie_smr); > > /* > * Initialize the vnode management data structures. > @@ -675,7 +675,8 @@ vntblinit(void *dummy __unused) > */ > buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), > NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, > - UMA_ZONE_NOFREE); > + UMA_ZONE_NOFREE | UMA_ZONE_SMR); > + buf_trie_smr = uma_zone_get_smr(buf_trie_zone); > uma_prealloc(buf_trie_zone, nbuf); > > vnodes_created = counter_u64_alloc(M_WAITOK); > @@ -2330,7 +2331,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno) > bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); > if (bp != NULL) > return (bp); > - return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); > + return (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno)); > +} > + > +/* > + * Look up a buf using the buffer tries, without the bufobj lock. This relies > + * on SMR for safe lookup, and bufs being in a no-free zone to provide type > + * stability of the result. Like other lockless lookups, the found buf may > + * already be invalid by the time this function returns. > + */ > +struct buf * > +gbincore_unlocked(struct bufobj *bo, daddr_t lblkno) > +{ > + struct buf *bp; > + > + ASSERT_BO_UNLOCKED(bo); > + bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno); > + if (bp != NULL) > + return (bp); > + return (BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno)); > } > > /* > > Modified: head/sys/sys/buf.h > ============================================================================== > --- head/sys/sys/buf.h Fri Jul 24 17:32:10 2020 (r363481) > +++ head/sys/sys/buf.h Fri Jul 24 17:34:04 2020 (r363482) > @@ -326,6 +326,9 @@ extern const char *buf_wmesg; /* Default buffer lock > KASSERT(((bp)->b_flags & B_REMFREE) == 0, \ > ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp))); \ > \ > + BUF_UNLOCK_RAW((bp)); \ > +} while (0) > +#define BUF_UNLOCK_RAW(bp) do { \ > (void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL, \ > LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \ > LOCK_FILE, LOCK_LINE); \ > @@ -547,6 +550,7 @@ void vfs_busy_pages_acquire(struct buf *bp); > void vfs_busy_pages_release(struct buf *bp); > struct buf *incore(struct bufobj *, daddr_t); > struct buf *gbincore(struct bufobj *, daddr_t); > +struct buf *gbincore_unlocked(struct bufobj *, daddr_t); > struct buf *getblk(struct vnode *, daddr_t, int, int, int, int); > int getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, > int slpflag, int slptimeo, int flags, struct buf **bpp);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20200728184152.GH2551>