Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 27 Mar 2015 08:51:20 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r280742 - stable/10/usr.sbin/bhyve
Message-ID:  <201503270851.t2R8pKBq022585@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Fri Mar 27 08:51:20 2015
New Revision: 280742
URL: https://svnweb.freebsd.org/changeset/base/280742

Log:
  MFC r280004: Give block I/O interface multiple (8) execution threads.
  
  On parallel random I/O this allows better utilize wide storage pools.
  To not confuse prefetcher on linear I/O, consecutive requests are executed
  sequentially, following the same logic as was earlier implemented in CTL.
  
  Benchmarks of virtual AHCI disk, backed by ZVOL on RAID10 pool of 4 HDDs,
  show ~3.5 times random read performance improvements, while no degradation
  on linear I/O.

Modified:
  stable/10/usr.sbin/bhyve/block_if.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/block_if.c
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.c	Fri Mar 27 08:50:26 2015	(r280741)
+++ stable/10/usr.sbin/bhyve/block_if.c	Fri Mar 27 08:51:20 2015	(r280742)
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #define BLOCKIF_SIG	0xb109b109
 
 #define BLOCKIF_MAXREQ	33
+#define BLOCKIF_NUMTHR	8
 
 enum blockop {
 	BOP_READ,
@@ -65,6 +66,7 @@ enum blockop {
 
 enum blockstat {
 	BST_FREE,
+	BST_BLOCK,
 	BST_PEND,
 	BST_BUSY,
 	BST_DONE
@@ -76,6 +78,7 @@ struct blockif_elem {
 	enum blockop	     be_op;
 	enum blockstat	     be_status;
 	pthread_t            be_tid;
+	off_t		     be_block;
 };
 
 struct blockif_ctxt {
@@ -88,16 +91,15 @@ struct blockif_ctxt {
 	int			bc_sectsz;
 	int			bc_psectsz;
 	int			bc_psectoff;
-	pthread_t		bc_btid;
+	int			bc_closing;
+	pthread_t		bc_btid[BLOCKIF_NUMTHR];
         pthread_mutex_t		bc_mtx;
         pthread_cond_t		bc_cond;
-	int			bc_closing;
 
 	/* Request elements and free/pending/busy queues */
 	TAILQ_HEAD(, blockif_elem) bc_freeq;       
 	TAILQ_HEAD(, blockif_elem) bc_pendq;
 	TAILQ_HEAD(, blockif_elem) bc_busyq;
-	u_int			bc_req_count;
 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
 };
 
@@ -116,58 +118,83 @@ static int
 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
 		enum blockop op)
 {
-	struct blockif_elem *be;
-
-	assert(bc->bc_req_count < BLOCKIF_MAXREQ);
+	struct blockif_elem *be, *tbe;
+	off_t off;
+	int i;
 
 	be = TAILQ_FIRST(&bc->bc_freeq);
 	assert(be != NULL);
 	assert(be->be_status == BST_FREE);
-
 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
-	be->be_status = BST_PEND;
 	be->be_req = breq;
 	be->be_op = op;
+	switch (op) {
+	case BOP_READ:
+	case BOP_WRITE:
+	case BOP_DELETE:
+		off = breq->br_offset;
+		for (i = 0; i < breq->br_iovcnt; i++)
+			off += breq->br_iov[i].iov_len;
+		break;
+	default:
+		off = OFF_MAX;
+	}
+	be->be_block = off;
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_block == breq->br_offset)
+			break;
+	}
+	if (tbe == NULL) {
+		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
+			if (tbe->be_block == breq->br_offset)
+				break;
+		}
+	}
+	if (tbe == NULL)
+		be->be_status = BST_PEND;
+	else
+		be->be_status = BST_BLOCK;
 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
-
-	bc->bc_req_count++;
-
-	return (0);
+	return (be->be_status == BST_PEND);
 }
 
 static int
-blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
+blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
 {
 	struct blockif_elem *be;
 
-	if (bc->bc_req_count == 0)
-		return (ENOENT);
-
-	be = TAILQ_FIRST(&bc->bc_pendq);
-	assert(be != NULL);
-	assert(be->be_status == BST_PEND);
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_status == BST_PEND)
+			break;
+		assert(be->be_status == BST_BLOCK);
+	}
+	if (be == NULL)
+		return (0);
 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
 	be->be_status = BST_BUSY;
-	be->be_tid = bc->bc_btid;
+	be->be_tid = t;
 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
-
 	*bep = be;
-
-	return (0);
+	return (1);
 }
 
 static void
 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
 {
-	assert(be->be_status == BST_DONE);
+	struct blockif_elem *tbe;
 
-	TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
+		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	else
+		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_req->br_offset == be->be_block)
+			tbe->be_status = BST_PEND;
+	}
 	be->be_tid = 0;
 	be->be_status = BST_FREE;
 	be->be_req = NULL;
 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
-
-	bc->bc_req_count--;
 }
 
 static void
@@ -228,28 +255,27 @@ blockif_thr(void *arg)
 {
 	struct blockif_ctxt *bc;
 	struct blockif_elem *be;
+	pthread_t t;
 
 	bc = arg;
+	t = pthread_self();
 
+	pthread_mutex_lock(&bc->bc_mtx);
 	for (;;) {
-		pthread_mutex_lock(&bc->bc_mtx);
-		while (!blockif_dequeue(bc, &be)) {
+		while (blockif_dequeue(bc, t, &be)) {
 			pthread_mutex_unlock(&bc->bc_mtx);
 			blockif_proc(bc, be);
 			pthread_mutex_lock(&bc->bc_mtx);
 			blockif_complete(bc, be);
 		}
-		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
-		pthread_mutex_unlock(&bc->bc_mtx);
-
-		/*
-		 * Check ctxt status here to see if exit requested
-		 */
+		/* Check ctxt status here to see if exit requested */
 		if (bc->bc_closing)
-			pthread_exit(NULL);
+			break;
+		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
 	}
+	pthread_mutex_unlock(&bc->bc_mtx);
 
-	/* Not reached */
+	pthread_exit(NULL);
 	return (NULL);
 }
 
@@ -388,16 +414,16 @@ blockif_open(const char *optstr, const c
 	TAILQ_INIT(&bc->bc_freeq);
 	TAILQ_INIT(&bc->bc_pendq);
 	TAILQ_INIT(&bc->bc_busyq);
-	bc->bc_req_count = 0;
 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
 		bc->bc_reqs[i].be_status = BST_FREE;
 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
 	}
 
-	pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
-
-	snprintf(tname, sizeof(tname), "blk-%s", ident);
-	pthread_set_name_np(bc->bc_btid, tname);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
+		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
+		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
+		pthread_set_name_np(bc->bc_btid[i], tname);
+	}
 
 	return (bc);
 }
@@ -411,13 +437,13 @@ blockif_request(struct blockif_ctxt *bc,
 	err = 0;
 
 	pthread_mutex_lock(&bc->bc_mtx);
-	if (bc->bc_req_count < BLOCKIF_MAXREQ) {
+	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
 		/*
 		 * Enqueue and inform the block i/o thread
 		 * that there is work available
 		 */
-		blockif_enqueue(bc, breq, op);
-		pthread_cond_signal(&bc->bc_cond);
+		if (blockif_enqueue(bc, breq, op))
+			pthread_cond_signal(&bc->bc_cond);
 	} else {
 		/*
 		 * Callers are not allowed to enqueue more than
@@ -483,11 +509,7 @@ blockif_cancel(struct blockif_ctxt *bc, 
 		/*
 		 * Found it.
 		 */
-		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
-		be->be_status = BST_FREE;
-		be->be_req = NULL;
-		TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
-		bc->bc_req_count--;
+		blockif_complete(bc, be);
 		pthread_mutex_unlock(&bc->bc_mtx);
 
 		return (0);
@@ -548,7 +570,7 @@ int
 blockif_close(struct blockif_ctxt *bc)
 {
 	void *jval;
-	int err;
+	int err, i;
 
 	err = 0;
 
@@ -558,8 +580,9 @@ blockif_close(struct blockif_ctxt *bc)
 	 * Stop the block i/o thread
 	 */
 	bc->bc_closing = 1;
-	pthread_cond_signal(&bc->bc_cond);
-	pthread_join(bc->bc_btid, &jval);
+	pthread_cond_broadcast(&bc->bc_cond);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++)
+		pthread_join(bc->bc_btid[i], &jval);
 
 	/* XXX Cancel queued i/o's ??? */
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201503270851.t2R8pKBq022585>