Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 23 Jul 2015 19:13:42 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r285819 - in head/sys: cam dev/nvme kern sys ufs/ffs vm
Message-ID:  <201507231913.t6NJDglX010830@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Thu Jul 23 19:13:41 2015
New Revision: 285819
URL: https://svnweb.freebsd.org/changeset/base/285819

Log:
  Refactor unmapped buffer address handling.
   - Use pointer assignment rather than a combination of pointers and
     flags to switch buffers between unmapped and mapped.  This eliminates
     multiple flags and generally simplifies the logic.
   - Eliminate b_saveaddr since it is only used with pager bufs which have
     their b_data re-initialized on each allocation.
   - Gather up some convenience routines in the buffer cache for
     manipulating buf space and buf malloc space.
   - Add an inline, buf_mapped(), to standardize checks around unmapped
     buffers.
  
  In collaboration with: mlaier
  Reviewed by:	kib
  Tested by:	pho (many small revisions ago)
  Sponsored by:	EMC / Isilon Storage Division

Modified:
  head/sys/cam/cam_periph.c
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/kern/vfs_bio.c
  head/sys/kern/vfs_cluster.c
  head/sys/sys/buf.h
  head/sys/ufs/ffs/ffs_rawread.c
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_pager.c
  head/sys/vm/vnode_pager.c

Modified: head/sys/cam/cam_periph.c
==============================================================================
--- head/sys/cam/cam_periph.c	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/cam/cam_periph.c	Thu Jul 23 19:13:41 2015	(r285819)
@@ -855,12 +855,12 @@ cam_periph_mapmem(union ccb *ccb, struct
 		 */
 		mapinfo->bp[i] = getpbuf(NULL);
 
-		/* save the buffer's data address */
-		mapinfo->bp[i]->b_saveaddr = mapinfo->bp[i]->b_data;
-
 		/* put our pointer in the data slot */
 		mapinfo->bp[i]->b_data = *data_ptrs[i];
 
+		/* save the user's data address */
+		mapinfo->bp[i]->b_caller1 = *data_ptrs[i];
+
 		/* set the transfer length, we know it's < MAXPHYS */
 		mapinfo->bp[i]->b_bufsize = lengths[i];
 
@@ -877,7 +877,7 @@ cam_periph_mapmem(union ccb *ccb, struct
 		 */
 		if (vmapbuf(mapinfo->bp[i], 1) < 0) {
 			for (j = 0; j < i; ++j) {
-				*data_ptrs[j] = mapinfo->bp[j]->b_saveaddr;
+				*data_ptrs[j] = mapinfo->bp[j]->b_caller1;
 				vunmapbuf(mapinfo->bp[j]);
 				relpbuf(mapinfo->bp[j], NULL);
 			}
@@ -958,7 +958,7 @@ cam_periph_unmapmem(union ccb *ccb, stru
 
 	for (i = 0; i < numbufs; i++) {
 		/* Set the user's pointer back to the original value */
-		*data_ptrs[i] = mapinfo->bp[i]->b_saveaddr;
+		*data_ptrs[i] = mapinfo->bp[i]->b_caller1;
 
 		/* unmap the buffer */
 		vunmapbuf(mapinfo->bp[i]);

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Thu Jul 23 19:13:41 2015	(r285819)
@@ -840,7 +840,6 @@ nvme_ctrlr_passthrough_cmd(struct nvme_c
 			 */
 			PHOLD(curproc);
 			buf = getpbuf(NULL);
-			buf->b_saveaddr = buf->b_data;
 			buf->b_data = pt->buf;
 			buf->b_bufsize = pt->len;
 			buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/kern/vfs_bio.c	Thu Jul 23 19:13:41 2015	(r285819)
@@ -137,12 +137,11 @@ SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CT
     &bufspace, 0, sysctl_bufspace, "L", "Virtual memory used for buffers");
 #else
 SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
-    "Virtual memory used for buffers");
+    "Physical memory used for buffers");
 #endif
-static long unmapped_bufspace;
-SYSCTL_LONG(_vfs, OID_AUTO, unmapped_bufspace, CTLFLAG_RD,
-    &unmapped_bufspace, 0,
-    "Amount of unmapped buffers, inclusive in the bufspace");
+static long bufkvaspace;
+SYSCTL_LONG(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, 0,
+    "Kernel virtual memory used for buffers");
 static long maxbufspace;
 SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
     "Maximum allowed value of bufspace (including buf_daemon)");
@@ -454,15 +453,14 @@ bdirtyadd(void)
  *	sufficient buffer space.  Buffer space becomes recoverable when 
  *	bp's get placed back in the queues.
  */
-
 static __inline void
 bufspacewakeup(void)
 {
 	int need_wakeup, on;
 
 	/*
-	 * If someone is waiting for BUF space, wake them up.  Even
-	 * though we haven't freed the kva space yet, the waiting
+	 * If someone is waiting for bufspace, wake them up.  Even
+	 * though we may not have freed the kva space yet, the waiting
 	 * process will be able to now.
 	 */
 	rw_rlock(&nblock);
@@ -482,6 +480,50 @@ bufspacewakeup(void)
 }
 
 /*
+ *	bufspaceadjust:
+ *
+ *	Adjust the reported bufspace for a KVA managed buffer, possibly
+ * 	waking any waiters.
+ */
+static void
+bufspaceadjust(struct buf *bp, int bufsize)
+{
+	int diff;
+
+	KASSERT((bp->b_flags & B_MALLOC) == 0,
+	    ("bufspaceadjust: malloc buf %p", bp));
+	diff = bufsize - bp->b_bufsize;
+	if (diff < 0) {
+		atomic_subtract_long(&bufspace, -diff);
+		bufspacewakeup();
+	} else
+		atomic_add_long(&bufspace, diff);
+	bp->b_bufsize = bufsize;
+}
+
+/*
+ *	bufmallocadjust:
+ *
+ *	Adjust the reported bufspace for a malloc managed buffer, possibly
+ *	waking any waiters.
+ */
+static void
+bufmallocadjust(struct buf *bp, int bufsize)
+{
+	int diff;
+
+	KASSERT((bp->b_flags & B_MALLOC) != 0,
+	    ("bufmallocadjust: non-malloc buf %p", bp));
+	diff = bufsize - bp->b_bufsize;
+	if (diff < 0) {
+		atomic_subtract_long(&bufmallocspace, -diff);
+		bufspacewakeup();
+	} else
+		atomic_add_long(&bufmallocspace, diff);
+	bp->b_bufsize = bufsize;
+}
+
+/*
  *	runningwakeup:
  *
  *	Wake up processes that are waiting on asynchronous writes to fall
@@ -817,6 +859,8 @@ bufinit(void)
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
+	unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
+
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
@@ -826,6 +870,7 @@ bufinit(void)
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_xflags = 0;
+		bp->b_data = bp->b_kvabase = unmapped_buf;
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
@@ -900,7 +945,6 @@ bufinit(void)
 
 	bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
-	unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
 }
 
 #ifdef INVARIANTS
@@ -908,8 +952,6 @@ static inline void
 vfs_buf_check_mapped(struct buf *bp)
 {
 
-	KASSERT((bp->b_flags & B_UNMAPPED) == 0,
-	    ("mapped buf %p %x", bp, bp->b_flags));
 	KASSERT(bp->b_kvabase != unmapped_buf,
 	    ("mapped buf: b_kvabase was not updated %p", bp));
 	KASSERT(bp->b_data != unmapped_buf,
@@ -920,10 +962,6 @@ static inline void
 vfs_buf_check_unmapped(struct buf *bp)
 {
 
-	KASSERT((bp->b_flags & B_UNMAPPED) == B_UNMAPPED,
-	    ("unmapped buf %p %x", bp, bp->b_flags));
-	KASSERT(bp->b_kvabase == unmapped_buf,
-	    ("unmapped buf: corrupted b_kvabase %p", bp));
 	KASSERT(bp->b_data == unmapped_buf,
 	    ("unmapped buf: corrupted b_data %p", bp));
 }
@@ -952,37 +990,6 @@ bpmap_qenter(struct buf *bp)
 }
 
 /*
- * bfreekva() - free the kva allocation for a buffer.
- *
- *	Since this call frees up buffer space, we call bufspacewakeup().
- */
-static void
-bfreekva(struct buf *bp)
-{
-
-	if (bp->b_kvasize == 0)
-		return;
-
-	atomic_add_int(&buffreekvacnt, 1);
-	atomic_subtract_long(&bufspace, bp->b_kvasize);
-	if ((bp->b_flags & B_UNMAPPED) == 0) {
-		BUF_CHECK_MAPPED(bp);
-		vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase,
-		    bp->b_kvasize);
-	} else {
-		BUF_CHECK_UNMAPPED(bp);
-		if ((bp->b_flags & B_KVAALLOC) != 0) {
-			vmem_free(buffer_arena, (vm_offset_t)bp->b_kvaalloc,
-			    bp->b_kvasize);
-		}
-		atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
-		bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC);
-	}
-	bp->b_kvasize = 0;
-	bufspacewakeup();
-}
-
-/*
  *	binsfree:
  *
  *	Insert the buffer into the appropriate free list.
@@ -1104,6 +1111,75 @@ bremfreel(struct buf *bp)
 }
 
 /*
+ *	bufkvafree:
+ *
+ *	Free the kva allocation for a buffer.
+ *
+ */
+static void
+bufkvafree(struct buf *bp)
+{
+
+#ifdef INVARIANTS
+	if (bp->b_kvasize == 0) {
+		KASSERT(bp->b_kvabase == unmapped_buf &&
+		    bp->b_data == unmapped_buf,
+		    ("Leaked KVA space on %p", bp));
+	} else if (buf_mapped(bp))
+		BUF_CHECK_MAPPED(bp);
+	else
+		BUF_CHECK_UNMAPPED(bp);
+#endif
+	if (bp->b_kvasize == 0)
+		return;
+
+	vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize);
+	atomic_subtract_long(&bufkvaspace, bp->b_kvasize);
+	atomic_add_int(&buffreekvacnt, 1);
+	bp->b_data = bp->b_kvabase = unmapped_buf;
+	bp->b_kvasize = 0;
+}
+
+/*
+ *	bufkvaalloc:
+ *
+ *	Allocate the buffer KVA and set b_kvasize and b_kvabase.
+ */
+static int
+bufkvaalloc(struct buf *bp, int maxsize, int gbflags)
+{
+	vm_offset_t addr;
+	int error;
+
+	KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0,
+	    ("Invalid gbflags 0x%x in %s", gbflags, __func__));
+
+	bufkvafree(bp);
+
+	addr = 0;
+	error = vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr);
+	if (error != 0) {
+		/*
+		 * Buffer map is too fragmented.  Request the caller
+		 * to defragment the map.
+		 */
+		atomic_add_int(&bufdefragcnt, 1);
+		return (error);
+	}
+	bp->b_kvabase = (caddr_t)addr;
+	bp->b_kvasize = maxsize;
+	atomic_add_long(&bufkvaspace, bp->b_kvasize);
+	if ((gbflags & GB_UNMAPPED) != 0) {
+		bp->b_data = unmapped_buf;
+		BUF_CHECK_UNMAPPED(bp);
+	} else {
+		bp->b_data = bp->b_kvabase;
+		BUF_CHECK_MAPPED(bp);
+	}
+	return (0);
+}
+
+/*
  * Attempt to initiate asynchronous I/O on read-ahead blocks.  We must
  * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set,
  * the buffer is valid and we do not have to do anything.
@@ -1715,7 +1791,8 @@ brelse(struct buf *bp)
 				}
 				VM_OBJECT_RUNLOCK(obj);
 
-				if ((bp->b_flags & (B_INVAL | B_UNMAPPED)) == 0) {
+				if ((bp->b_flags & B_INVAL) == 0 &&
+				    buf_mapped(bp)) {
 					BUF_CHECK_MAPPED(bp);
 					pmap_qenter(
 					    trunc_page((vm_offset_t)bp->b_data),
@@ -1872,7 +1949,7 @@ vfs_vmio_release(struct buf *bp)
 	vm_page_t m;
 	int i;
 
-	if ((bp->b_flags & B_UNMAPPED) == 0) {
+	if (buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
 	} else
@@ -1905,10 +1982,8 @@ vfs_vmio_release(struct buf *bp)
 	if (obj != NULL)
 		VM_OBJECT_WUNLOCK(obj);
 	
-	if (bp->b_bufsize) {
-		bufspacewakeup();
-		bp->b_bufsize = 0;
-	}
+	if (bp->b_bufsize)
+		bufspaceadjust(bp, 0);
 	bp->b_npages = 0;
 	bp->b_flags &= ~B_VMIO;
 	if (bp->b_vp)
@@ -1977,7 +2052,7 @@ vfs_bio_awrite(struct buf *bp)
 	int gbflags;
 
 	bo = &vp->v_bufobj;
-	gbflags = (bp->b_flags & B_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+	gbflags = (bp->b_data == unmapped_buf) ? GB_UNMAPPED : 0;
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
@@ -2026,49 +2101,6 @@ vfs_bio_awrite(struct buf *bp)
 	return (nwritten);
 }
 
-static void
-setbufkva(struct buf *bp, vm_offset_t addr, int maxsize, int gbflags)
-{
-
-	KASSERT((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 &&
-	    bp->b_kvasize == 0, ("call bfreekva(%p)", bp));
-	if ((gbflags & GB_UNMAPPED) == 0) {
-		bp->b_kvabase = (caddr_t)addr;
-	} else if ((gbflags & GB_KVAALLOC) != 0) {
-		KASSERT((gbflags & GB_UNMAPPED) != 0,
-		    ("GB_KVAALLOC without GB_UNMAPPED"));
-		bp->b_kvaalloc = (caddr_t)addr;
-		bp->b_flags |= B_UNMAPPED | B_KVAALLOC;
-		atomic_add_long(&unmapped_bufspace, bp->b_kvasize);
-	}
-	bp->b_kvasize = maxsize;
-}
-
-/*
- * Allocate the buffer KVA and set b_kvasize. Also set b_kvabase if
- * needed.
- */
-static int
-allocbufkva(struct buf *bp, int maxsize, int gbflags)
-{
-	vm_offset_t addr;
-
-	bfreekva(bp);
-	addr = 0;
-
-	if (vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr)) {
-		/*
-		 * Buffer map is too fragmented.  Request the caller
-		 * to defragment the map.
-		 */
-		atomic_add_int(&bufdefragcnt, 1);
-		return (1);
-	}
-	setbufkva(bp, addr, maxsize, gbflags);
-	atomic_add_long(&bufspace, bp->b_kvasize);
-	return (0);
-}
-
 /*
  * Ask the bufdaemon for help, or act as bufdaemon itself, when a
  * locked vnode is supplied.
@@ -2192,7 +2224,7 @@ getnewbuf_reuse_bp(struct buf *bp, int q
 	if (bp->b_bufsize)
 		allocbuf(bp, 0);
 
-	bp->b_flags &= B_UNMAPPED | B_KVAALLOC;
+	bp->b_flags = 0;
 	bp->b_ioflags = 0;
 	bp->b_xflags = 0;
 	KASSERT((bp->b_flags & B_INFREECNT) == 0,
@@ -2328,14 +2360,11 @@ restart:
 		}
 		/*
 		 * If we are defragging then we need a buffer with 
-		 * b_kvasize != 0.  XXX this situation should no longer
-		 * occur, if defrag is non-zero the buffer's b_kvasize
-		 * should also be non-zero at this point.  XXX
+		 * b_kvasize != 0.  This situation occurs when we
+		 * have many unmapped bufs.
 		 */
-		if (defrag && bp->b_kvasize == 0) {
-			printf("Warning: defrag empty buffer %p\n", bp);
+		if (defrag && bp->b_kvasize == 0)
 			continue;
-		}
 
 		/*
 		 * Start freeing the bp.  This is somewhat involved.  nbp
@@ -2380,7 +2409,7 @@ restart:
 		 */
 		if (defrag) {
 			bp->b_flags |= B_INVAL;
-			bfreekva(bp);
+			bufkvafree(bp);
 			brelse(bp);
 			defrag = 0;
 			goto restart;
@@ -2392,7 +2421,7 @@ restart:
 		 */
 		if (qindex == QUEUE_CLEAN && BUF_LOCKWAITERS(bp)) {
 			bp->b_flags |= B_INVAL;
-			bfreekva(bp);
+			bufkvafree(bp);
 			brelse(bp);
 			goto restart;
 		}
@@ -2409,7 +2438,7 @@ restart:
 			flushingbufs = 1;
 		if (flushingbufs && bp->b_kvasize != 0) {
 			bp->b_flags |= B_INVAL;
-			bfreekva(bp);
+			bufkvafree(bp);
 			brelse(bp);
 			goto restart;
 		}
@@ -2480,65 +2509,27 @@ restart:
 	} else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED) {
 		mtx_assert(&bqclean, MA_NOTOWNED);
 
-		bfreekva(bp);
-		bp->b_flags |= B_UNMAPPED;
-		bp->b_kvabase = bp->b_data = unmapped_buf;
-		bp->b_kvasize = maxsize;
-		atomic_add_long(&bufspace, bp->b_kvasize);
-		atomic_add_long(&unmapped_bufspace, bp->b_kvasize);
+		bufkvafree(bp);
 		atomic_add_int(&bufreusecnt, 1);
 	} else {
 		mtx_assert(&bqclean, MA_NOTOWNED);
 
 		/*
 		 * We finally have a valid bp.  We aren't quite out of the
-		 * woods, we still have to reserve kva space.  In order
-		 * to keep fragmentation sane we only allocate kva in
-		 * BKVASIZE chunks.
+		 * woods, we still have to reserve kva space. In order to
+		 * keep fragmentation sane we only allocate kva in BKVASIZE
+		 * chunks.
 		 */
 		maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
 
-		if (maxsize != bp->b_kvasize || (bp->b_flags & (B_UNMAPPED |
-		    B_KVAALLOC)) == B_UNMAPPED) {
-			if (allocbufkva(bp, maxsize, gbflags)) {
-				defrag = 1;
-				bp->b_flags |= B_INVAL;
-				brelse(bp);
-				goto restart;
-			}
-			atomic_add_int(&bufreusecnt, 1);
-		} else if ((bp->b_flags & B_KVAALLOC) != 0 &&
-		    (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == 0) {
-			/*
-			 * If the reused buffer has KVA allocated,
-			 * reassign b_kvaalloc to b_kvabase.
-			 */
-			bp->b_kvabase = bp->b_kvaalloc;
-			bp->b_flags &= ~B_KVAALLOC;
-			atomic_subtract_long(&unmapped_bufspace,
-			    bp->b_kvasize);
-			atomic_add_int(&bufreusecnt, 1);
-		} else if ((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 &&
-		    (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == (GB_UNMAPPED |
-		    GB_KVAALLOC)) {
-			/*
-			 * The case of reused buffer already have KVA
-			 * mapped, but the request is for unmapped
-			 * buffer with KVA allocated.
-			 */
-			bp->b_kvaalloc = bp->b_kvabase;
-			bp->b_data = bp->b_kvabase = unmapped_buf;
-			bp->b_flags |= B_UNMAPPED | B_KVAALLOC;
-			atomic_add_long(&unmapped_bufspace,
-			    bp->b_kvasize);
-			atomic_add_int(&bufreusecnt, 1);
-		}
-		if ((gbflags & GB_UNMAPPED) == 0) {
-			bp->b_saveaddr = bp->b_kvabase;
-			bp->b_data = bp->b_saveaddr;
-			bp->b_flags &= ~B_UNMAPPED;
-			BUF_CHECK_MAPPED(bp);
+		if (maxsize != bp->b_kvasize &&
+		    bufkvaalloc(bp, maxsize, gbflags)) {
+			defrag = 1;
+			bp->b_flags |= B_INVAL;
+			brelse(bp);
+			goto restart;
 		}
+		atomic_add_int(&bufreusecnt, 1);
 	}
 	return (bp);
 }
@@ -2968,9 +2959,9 @@ vfs_setdirty_locked_object(struct buf *b
 }
 
 /*
- * Allocate the KVA mapping for an existing buffer. It handles the
- * cases of both B_UNMAPPED buffer, and buffer with the preallocated
- * KVA which is not mapped (B_KVAALLOC).
+ * Allocate the KVA mapping for an existing buffer.
+ * If an unmapped buffer is provided but a mapped buffer is requested, take
+ * also care to properly setup mappings between pages and KVA.
  */
 static void
 bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
@@ -2979,25 +2970,22 @@ bp_unmapped_get_kva(struct buf *bp, dadd
 	int bsize, maxsize, need_mapping, need_kva;
 	off_t offset;
 
-	need_mapping = (bp->b_flags & B_UNMAPPED) != 0 &&
+	need_mapping = bp->b_data == unmapped_buf &&
 	    (gbflags & GB_UNMAPPED) == 0;
-	need_kva = (bp->b_flags & (B_KVAALLOC | B_UNMAPPED)) == B_UNMAPPED &&
+	need_kva = bp->b_kvabase == unmapped_buf &&
+	    bp->b_data == unmapped_buf &&
 	    (gbflags & GB_KVAALLOC) != 0;
 	if (!need_mapping && !need_kva)
 		return;
 
 	BUF_CHECK_UNMAPPED(bp);
 
-	if (need_mapping && (bp->b_flags & B_KVAALLOC) != 0) {
+	if (need_mapping && bp->b_kvabase != unmapped_buf) {
 		/*
 		 * Buffer is not mapped, but the KVA was already
 		 * reserved at the time of the instantiation.  Use the
 		 * allocated space.
 		 */
-		bp->b_flags &= ~B_KVAALLOC;
-		KASSERT(bp->b_kvaalloc != 0, ("kvaalloc == 0"));
-		bp->b_kvabase = bp->b_kvaalloc;
-		atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
 		goto has_addr;
 	}
 
@@ -3012,7 +3000,7 @@ bp_unmapped_get_kva(struct buf *bp, dadd
 	maxsize = imax(maxsize, bsize);
 
 mapping_loop:
-	if (allocbufkva(bp, maxsize, gbflags)) {
+	if (bufkvaalloc(bp, maxsize, gbflags)) {
 		/*
 		 * Request defragmentation. getnewbuf() returns us the
 		 * allocated space by the scratch buffer KVA.
@@ -3025,31 +3013,31 @@ mapping_loop:
 				 * XXXKIB: defragmentation cannot
 				 * succeed, not sure what else to do.
 				 */
-				panic("GB_NOWAIT_BD and B_UNMAPPED %p", bp);
+				panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
 			}
 			atomic_add_int(&mappingrestarts, 1);
 			goto mapping_loop;
 		}
-		KASSERT((scratch_bp->b_flags & B_KVAALLOC) != 0,
-		    ("scratch bp !B_KVAALLOC %p", scratch_bp));
-		setbufkva(bp, (vm_offset_t)scratch_bp->b_kvaalloc,
-		    scratch_bp->b_kvasize, gbflags);
+		KASSERT(scratch_bp->b_kvabase != unmapped_buf,
+		    ("scratch bp has no KVA %p", scratch_bp));
+		/* Grab pointers. */
+		bp->b_kvabase = scratch_bp->b_kvabase;
+		bp->b_kvasize = scratch_bp->b_kvasize;
+		bp->b_data = scratch_bp->b_data;
 
 		/* Get rid of the scratch buffer. */
 		scratch_bp->b_kvasize = 0;
 		scratch_bp->b_flags |= B_INVAL;
-		scratch_bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC);
+		scratch_bp->b_data = scratch_bp->b_kvabase = unmapped_buf;
 		brelse(scratch_bp);
 	}
-	if (!need_mapping)
-		return;
-
 has_addr:
-	bp->b_saveaddr = bp->b_kvabase;
-	bp->b_data = bp->b_saveaddr; /* b_offset is handled by bpmap_qenter */
-	bp->b_flags &= ~B_UNMAPPED;
-	BUF_CHECK_MAPPED(bp);
-	bpmap_qenter(bp);
+	if (need_mapping) {
+		/* b_offset is handled by bpmap_qenter. */
+		bp->b_data = bp->b_kvabase;
+		BUF_CHECK_MAPPED(bp);
+		bpmap_qenter(bp);
+	}
 }
 
 /*
@@ -3265,7 +3253,7 @@ loop:
 		} else {
 			maxsize = size;
 			/* Do not allow non-VMIO notmapped buffers. */
-			flags &= ~GB_UNMAPPED;
+			flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 		}
 		maxsize = imax(maxsize, bsize);
 
@@ -3358,7 +3346,6 @@ geteblk(int size, int flags)
 	return (bp);
 }
 
-
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
@@ -3382,7 +3369,7 @@ allocbuf(struct buf *bp, int size)
 
 	BUF_ASSERT_HELD(bp);
 
-	if (bp->b_kvasize < size)
+	if (bp->b_kvasize != 0 && bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 
 	if ((bp->b_flags & B_VMIO) == 0) {
@@ -3407,15 +3394,8 @@ allocbuf(struct buf *bp, int size)
 					bp->b_bcount = size;
 				} else {
 					free(bp->b_data, M_BIOBUF);
-					if (bp->b_bufsize) {
-						atomic_subtract_long(
-						    &bufmallocspace,
-						    bp->b_bufsize);
-						bufspacewakeup();
-						bp->b_bufsize = 0;
-					}
-					bp->b_saveaddr = bp->b_kvabase;
-					bp->b_data = bp->b_saveaddr;
+					bufmallocadjust(bp, 0);
+					bp->b_data = bp->b_kvabase;
 					bp->b_bcount = 0;
 					bp->b_flags &= ~B_MALLOC;
 				}
@@ -3434,33 +3414,28 @@ allocbuf(struct buf *bp, int size)
 			 * is probably extremely rare and not worth worrying
 			 * over.
 			 */
-			if ( (bufmallocspace < maxbufmallocspace) &&
+			if ((bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&
 				(mbsize <= PAGE_SIZE/2)) {
 
 				bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
-				bp->b_bufsize = mbsize;
 				bp->b_bcount = size;
 				bp->b_flags |= B_MALLOC;
-				atomic_add_long(&bufmallocspace, mbsize);
+				bufmallocadjust(bp, mbsize);
 				return 1;
 			}
 			origbuf = NULL;
 			origbufsize = 0;
 			/*
-			 * If the buffer is growing on its other-than-first allocation,
-			 * then we revert to the page-allocation scheme.
+			 * If the buffer is growing on its other-than-first
+			 * allocation then we revert to the page-allocation
+			 * scheme.
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				origbuf = bp->b_data;
 				origbufsize = bp->b_bufsize;
 				bp->b_data = bp->b_kvabase;
-				if (bp->b_bufsize) {
-					atomic_subtract_long(&bufmallocspace,
-					    bp->b_bufsize);
-					bufspacewakeup();
-					bp->b_bufsize = 0;
-				}
+				bufmallocadjust(bp, 0);
 				bp->b_flags &= ~B_MALLOC;
 				newbsize = round_page(newbsize);
 			}
@@ -3498,7 +3473,7 @@ allocbuf(struct buf *bp, int size)
 			if (desiredpages < bp->b_npages) {
 				vm_page_t m;
 
-				if ((bp->b_flags & B_UNMAPPED) == 0) {
+				if (buf_mapped(bp)) {
 					BUF_CHECK_MAPPED(bp);
 					pmap_qremove((vm_offset_t)trunc_page(
 					    (vm_offset_t)bp->b_data) +
@@ -3611,18 +3586,18 @@ allocbuf(struct buf *bp, int size)
 			VM_OBJECT_WUNLOCK(obj);
 
 			/*
-			 * Step 3, fixup the KVM pmap.
+			 * Step 3, fixup the KVA pmap.
 			 */
-			if ((bp->b_flags & B_UNMAPPED) == 0)
+			if (buf_mapped(bp))
 				bpmap_qenter(bp);
 			else
 				BUF_CHECK_UNMAPPED(bp);
 		}
 	}
-	if (newbsize < bp->b_bufsize)
-		bufspacewakeup();
-	bp->b_bufsize = newbsize;	/* actual buffer allocation	*/
-	bp->b_bcount = size;		/* requested buffer size	*/
+	/* Record changes in allocation size. */
+	if (bp->b_bufsize != newbsize)
+		bufspaceadjust(bp, newbsize);
+	bp->b_bcount = size;		/* requested buffer size. */
 	return 1;
 }
 
@@ -3919,7 +3894,7 @@ bufdone_finish(struct buf *bp)
 		}
 		vm_object_pip_wakeupn(obj, 0);
 		VM_OBJECT_WUNLOCK(obj);
-		if (bogus && (bp->b_flags & B_UNMAPPED) == 0) {
+		if (bogus && buf_mapped(bp)) {
 			BUF_CHECK_MAPPED(bp);
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 			    bp->b_pages, bp->b_npages);
@@ -3966,7 +3941,7 @@ vfs_unbusy_pages(struct buf *bp)
 			if (!m)
 				panic("vfs_unbusy_pages: page missing\n");
 			bp->b_pages[i] = m;
-			if ((bp->b_flags & B_UNMAPPED) == 0) {
+			if (buf_mapped(bp)) {
 				BUF_CHECK_MAPPED(bp);
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 				    bp->b_pages, bp->b_npages);
@@ -4140,7 +4115,7 @@ vfs_busy_pages(struct buf *bp, int clear
 		foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	}
 	VM_OBJECT_WUNLOCK(obj);
-	if (bogus && (bp->b_flags & B_UNMAPPED) == 0) {
+	if (bogus && buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 		    bp->b_pages, bp->b_npages);
@@ -4260,7 +4235,7 @@ vfs_bio_bzero_buf(struct buf *bp, int ba
 	vm_page_t m;
 	int i, n;
 
-	if ((bp->b_flags & B_UNMAPPED) == 0) {
+	if (buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		bzero(bp->b_data + base, size);
 	} else {
@@ -4353,11 +4328,12 @@ vm_hold_free_pages(struct buf *bp, int n
  * be valid, a race or a smaller-file mapped into a larger space may
  * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST
  * check the return value.
+ *
+ * This function only works with pager buffers.
  */
 int
 vmapbuf(struct buf *bp, int mapbuf)
 {
-	caddr_t kva;
 	vm_prot_t prot;
 	int pidx;
 
@@ -4371,24 +4347,20 @@ vmapbuf(struct buf *bp, int mapbuf)
 	    btoc(MAXPHYS))) < 0)
 		return (-1);
 	bp->b_npages = pidx;
+	bp->b_offset = ((vm_offset_t)bp->b_data) & PAGE_MASK;
 	if (mapbuf || !unmapped_buf_allowed) {
-		pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
-		kva = bp->b_saveaddr;
-		bp->b_saveaddr = bp->b_data;
-		bp->b_data = kva + (((vm_offset_t)bp->b_data) & PAGE_MASK);
-		bp->b_flags &= ~B_UNMAPPED;
-	} else {
-		bp->b_flags |= B_UNMAPPED;
-		bp->b_offset = ((vm_offset_t)bp->b_data) & PAGE_MASK;
-		bp->b_saveaddr = bp->b_data;
+		pmap_qenter((vm_offset_t)bp->b_kvabase, bp->b_pages, pidx);
+		bp->b_data = bp->b_kvabase + bp->b_offset;
+	} else
 		bp->b_data = unmapped_buf;
-	}
 	return(0);
 }
 
 /*
  * Free the io map PTEs associated with this IO operation.
  * We also invalidate the TLB entries and restore the original b_addr.
+ *
+ * This function only works with pager buffers.
  */
 void
 vunmapbuf(struct buf *bp)
@@ -4396,13 +4368,11 @@ vunmapbuf(struct buf *bp)
 	int npages;
 
 	npages = bp->b_npages;
-	if (bp->b_flags & B_UNMAPPED)
-		bp->b_flags &= ~B_UNMAPPED;
-	else
+	if (buf_mapped(bp))
 		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
 	vm_page_unhold_pages(bp->b_pages, npages);
-	
-	bp->b_data = bp->b_saveaddr;
+
+	bp->b_data = unmapped_buf;
 }
 
 void
@@ -4543,7 +4513,7 @@ void
 bdata2bio(struct buf *bp, struct bio *bip)
 {
 
-	if ((bp->b_flags & B_UNMAPPED) != 0) {
+	if (!buf_mapped(bp)) {
 		KASSERT(unmapped_buf_allowed, ("unmapped"));
 		bip->bio_ma = bp->b_pages;
 		bip->bio_ma_n = bp->b_npages;
@@ -4586,6 +4556,8 @@ DB_SHOW_COMMAND(buffer, db_show_buffer)
 	    bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 	    bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno,
 	    (intmax_t)bp->b_lblkno, bp->b_dep.lh_first);
+	db_printf("b_kvabase = %p, b_kvasize = %d\n",
+	    bp->b_kvabase, bp->b_kvasize);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);

Modified: head/sys/kern/vfs_cluster.c
==============================================================================
--- head/sys/kern/vfs_cluster.c	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/kern/vfs_cluster.c	Thu Jul 23 19:13:41 2015	(r285819)
@@ -354,7 +354,6 @@ cluster_rbuild(struct vnode *vp, u_quad_
 	 */
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
 	if ((gbflags & GB_UNMAPPED) != 0) {
-		bp->b_flags |= B_UNMAPPED;
 		bp->b_data = unmapped_buf;
 	} else {
 		bp->b_data = (char *)((vm_offset_t)bp->b_data |
@@ -517,9 +516,8 @@ clean_sbusy:
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 		    bp->b_bufsize, bp->b_kvasize);
-	bp->b_kvasize = bp->b_bufsize;
 
-	if ((bp->b_flags & B_UNMAPPED) == 0) {
+	if (buf_mapped(bp)) {
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		    (vm_page_t *)bp->b_pages, bp->b_npages);
 	}
@@ -545,7 +543,7 @@ cluster_callback(bp)
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
-	if ((bp->b_flags & B_UNMAPPED) == 0) {
+	if (buf_mapped(bp)) {
 		pmap_qremove(trunc_page((vm_offset_t) bp->b_data),
 		    bp->b_npages);
 	}
@@ -871,7 +869,6 @@ cluster_wbuild(struct vnode *vp, long si
 			bp->b_data = (char *)((vm_offset_t)bp->b_data |
 			    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 		} else {
-			bp->b_flags |= B_UNMAPPED;
 			bp->b_data = unmapped_buf;
 		}
 		bp->b_flags |= B_CLUSTER | (tbp->b_flags & (B_VMIO |
@@ -1004,7 +1001,7 @@ cluster_wbuild(struct vnode *vp, long si
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
-		if ((bp->b_flags & B_UNMAPPED) == 0) {
+		if (buf_mapped(bp)) {
 			pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			    (vm_page_t *)bp->b_pages, bp->b_npages);
 		}
@@ -1012,7 +1009,6 @@ cluster_wbuild(struct vnode *vp, long si
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 			    bp->b_bufsize, bp->b_kvasize);
-		bp->b_kvasize = bp->b_bufsize;
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;

Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/sys/buf.h	Thu Jul 23 19:13:41 2015	(r285819)
@@ -112,17 +112,15 @@ struct buf {
 	b_xflags_t b_xflags;		/* extra flags */
 	struct lock b_lock;		/* Buffer lock */
 	long	b_bufsize;		/* Allocated buffer size. */
-	long	b_runningbufspace;	/* when I/O is running, pipelining */
-	caddr_t	b_kvabase;		/* base kva for buffer */
-	caddr_t	b_kvaalloc;		/* allocated kva for B_KVAALLOC */
+	int	b_runningbufspace;	/* when I/O is running, pipelining */
 	int	b_kvasize;		/* size of kva for buffer */
-	daddr_t b_lblkno;		/* Logical block number. */
-	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
+	caddr_t	b_kvabase;		/* base kva for buffer */
+	daddr_t b_lblkno;		/* Logical block number. */
+	struct	vnode *b_vp;		/* Device vnode. */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
-	void	*b_saveaddr;		/* Original b_addr for physio. */
 	union {
 		TAILQ_ENTRY(buf) bu_freelist; /* (Q) */
 		struct {
@@ -206,8 +204,8 @@ struct buf {
 #define	B_PERSISTENT	0x00000100	/* Perm. ref'ed while EXT2FS mounted. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
-#define	B_UNMAPPED	0x00000800	/* KVA is not mapped. */
-#define	B_KVAALLOC	0x00001000	/* But allocated. */
+#define	B_00000800	0x00000800	/* Available flag. */
+#define	B_00001000	0x00001000	/* Available flag. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_BARRIER	0x00004000	/* Write this and all preceeding first. */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
@@ -231,7 +229,7 @@ struct buf {
 #define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
 	"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26dirty\25b20" \
 	"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
-	"\15kvaalloc\14unmapped\13eintr\12done\11persist\10delwri" \
+	"\15b12\14b11\13eintr\12done\11persist\10delwri" \
 	"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
 
 /*
@@ -374,15 +372,11 @@ struct buf_queue_head {
 };
 
 /*
- * This structure describes a clustered I/O.  It is stored in the b_saveaddr
- * field of the buffer on which I/O is done.  At I/O completion, cluster
- * callback uses the structure to parcel I/O's to individual buffers, and
- * then free's this structure.
+ * This structure describes a clustered I/O. 
  */
 struct cluster_save {
 	long	bs_bcount;		/* Saved b_bcount. */
 	long	bs_bufsize;		/* Saved b_bufsize. */
-	void	*bs_saveaddr;		/* Saved b_addr. */
 	int	bs_nchildren;		/* Number of associated buffers. */
 	struct buf **bs_children;	/* List of associated buffers. */
 };
@@ -478,7 +472,14 @@ extern int	cluster_pbuf_freecnt;	/* Numb
 extern int	vnode_pbuf_freecnt;	/* Number of pbufs for vnode pager */
 extern int	vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
 					     asynchronous reads */
-extern caddr_t	unmapped_buf;
+extern caddr_t	unmapped_buf;	/* Data address for unmapped buffers. */
+
+static inline int
+buf_mapped(struct buf *bp)
+{
+
+	return (bp->b_data != unmapped_buf);
+}
 
 void	runningbufwakeup(struct buf *);
 void	waitrunningbufspace(void);

Modified: head/sys/ufs/ffs/ffs_rawread.c
==============================================================================
--- head/sys/ufs/ffs/ffs_rawread.c	Thu Jul 23 18:11:52 2015	(r285818)
+++ head/sys/ufs/ffs/ffs_rawread.c	Thu Jul 23 19:13:41 2015	(r285819)
@@ -62,8 +62,7 @@ static int ffs_rawread_readahead(struct 
 				 off_t offset,
 				 size_t len,
 				 struct thread *td,
-				 struct buf *bp,
-				 caddr_t sa);
+				 struct buf *bp);
 static int ffs_rawread_main(struct vnode *vp,
 			    struct uio *uio);
 
@@ -190,8 +189,7 @@ ffs_rawread_readahead(struct vnode *vp,
 		      off_t offset,
 		      size_t len,
 		      struct thread *td,
-		      struct buf *bp,
-		      caddr_t sa)
+		      struct buf *bp)
 {
 	int error;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201507231913.t6NJDglX010830>