Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Oct 2016 07:16:04 +0000 (UTC)
From:      Sepherosa Ziehau <sephe@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r307486 - in stable/11/sys/dev/hyperv: include storvsc vmbus
Message-ID:  <201610170716.u9H7G4Wt005854@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sephe
Date: Mon Oct 17 07:16:04 2016
New Revision: 307486
URL: https://svnweb.freebsd.org/changeset/base/307486

Log:
  MFC 304790,304791
  
  304790
      hyperv/vmbus: Add function to calculate max # of elements in a bufring.
  
      Sponsored by:   Microsoft
  
  304791
      hyperv/storvsc: Increase queue depth and rework channel selection.
  
      - Increasing queue depth gives ~100% performance improvement for
        randwrite fio test in Azure.
      - New channel selection, which takes LUN id and the current cpuid
        into consideration, gives additional ~20% performance improvement
        for ranwrite fio test in Azure.
  
      Submitted by:   Hongzhang Jiang <honzhan microsoft com>
      Modified by:    sephe
      Sponsored by:   Microsoft
      Differential Revision:  https://reviews.freebsd.org/D7622

Modified:
  stable/11/sys/dev/hyperv/include/vmbus.h
  stable/11/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
  stable/11/sys/dev/hyperv/storvsc/hv_vstorage.h
  stable/11/sys/dev/hyperv/vmbus/vmbus_brvar.h
  stable/11/sys/dev/hyperv/vmbus/vmbus_chan.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/dev/hyperv/include/vmbus.h
==============================================================================
--- stable/11/sys/dev/hyperv/include/vmbus.h	Mon Oct 17 07:13:48 2016	(r307485)
+++ stable/11/sys/dev/hyperv/include/vmbus.h	Mon Oct 17 07:16:04 2016	(r307486)
@@ -161,5 +161,7 @@ uint32_t	vmbus_chan_subidx(const struct 
 bool		vmbus_chan_is_primary(const struct vmbus_channel *chan);
 const struct hyperv_guid *
 		vmbus_chan_guid_inst(const struct vmbus_channel *chan);
+int		vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
+		    int dlen_max);
 
 #endif	/* !_VMBUS_H_ */

Modified: stable/11/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
==============================================================================
--- stable/11/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	Mon Oct 17 07:13:48 2016	(r307485)
+++ stable/11/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	Mon Oct 17 07:16:04 2016	(r307486)
@@ -75,11 +75,9 @@ __FBSDID("$FreeBSD$");
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/vmbus.h>
-
 #include "hv_vstorage.h"
 #include "vmbus_if.h"
 
-#define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
@@ -121,8 +119,6 @@ struct hv_sgl_page_pool{
 	boolean_t                is_init;
 } g_hv_sgl_page_pool;
 
-#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * STORVSC_DATA_SEGCNT_MAX
-
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
@@ -130,17 +126,35 @@ enum storvsc_request_type {
 };
 
 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
-    "Hyper-V storage interface");
+	"Hyper-V storage interface");
+
+static u_int hv_storvsc_use_win8ext_flags = 1;
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
+	&hv_storvsc_use_win8ext_flags, 0,
+	"Use win8 extension flags or not");
 
 static u_int hv_storvsc_use_pim_unmapped = 1;
-SYSCTL_INT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
-    &hv_storvsc_use_pim_unmapped, 0,
-    "Optimize storvsc by using unmapped I/O");
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
+	&hv_storvsc_use_pim_unmapped, 0,
+	"Optimize storvsc by using unmapped I/O");
+
+static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
+	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
+
+static u_int hv_storvsc_max_io = 512;
+SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
+	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
+
+#define STORVSC_MAX_IO						\
+	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
+	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
 
 struct hv_storvsc_sysctl {
 	u_long		data_bio_cnt;
 	u_long		data_vaddr_cnt;
 	u_long		data_sg_cnt;
+	u_long		chan_send_cnt[MAXCPU];
 };
 
 struct storvsc_gpa_range {
@@ -184,10 +198,18 @@ struct storvsc_softc {
 	device_t			hs_dev;
 	bus_dma_tag_t			storvsc_req_dtag;
 	struct hv_storvsc_sysctl	sysctl_data;
-
-	struct vmbus_channel		*hs_cpu2chan[MAXCPU];
+	uint32_t			hs_nchan;
+	struct vmbus_channel		*hs_sel_chan[MAXCPU];
 };
 
+/*
+ * The size of the vmscsi_request has changed in win8. The
+ * additional size is for the newly added elements in the
+ * structure. These elements are valid only when we are talking
+ * to a win8 host.
+ * Track the correct size we need to apply.
+ */
+static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
 
 /**
  * HyperV storvsc timeout testing cases:
@@ -211,7 +233,7 @@ struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
-	uint8_t		drv_max_ios_per_target;
+	uint32_t	drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
@@ -240,10 +262,10 @@ static const struct hyperv_guid gBlkVscD
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
-	 STORVSC_RINGBUFFER_SIZE},
+	 20*PAGE_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
-	 STORVSC_RINGBUFFER_SIZE}
+	 20*PAGE_SIZE}
 };
 
 /*
@@ -253,14 +275,6 @@ static struct storvsc_driver_props g_drv
 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 
 /*
- * The size of the vmscsi_request has changed in win8. The
- * additional size is for the newly added elements in the
- * structure. These elements are valid only when we are talking
- * to a win8 host.
- * Track the correct size we need to apply.
- */
-static int vmscsi_size_delta;
-/*
  * The storage protocol version is determined during the
  * initial exchange with the host.  It will indicate which
  * storage functionality is available in the host.
@@ -413,6 +427,9 @@ storvsc_send_multichannel_request(struct
 		return;
 	}
 
+	/* Update channel count */
+	sc->hs_nchan = request_channels_cnt + 1;
+
 	/* Wait for sub-channels setup to complete. */
 	subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt);
 
@@ -585,7 +602,6 @@ hv_storvsc_channel_init(struct storvsc_s
 	 */
 	if (support_multichannel)
 		storvsc_send_multichannel_request(sc, max_chans);
-
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
@@ -624,7 +640,6 @@ hv_storvsc_connect_vsp(struct storvsc_so
 	}
 
 	ret = hv_storvsc_channel_init(sc);
-
 	return (ret);
 }
 
@@ -686,7 +701,7 @@ hv_storvsc_io_request(struct storvsc_sof
 {
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
 	struct vmbus_channel* outgoing_channel = NULL;
-	int ret = 0;
+	int ret = 0, ch_sel;
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
@@ -700,7 +715,8 @@ hv_storvsc_io_request(struct storvsc_sof
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
-	outgoing_channel = sc->hs_cpu2chan[curcpu];
+	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
+	outgoing_channel = sc->hs_sel_chan[ch_sel];
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->prp_list.gpa_range.gpa_len) {
@@ -712,6 +728,10 @@ hv_storvsc_io_request(struct storvsc_sof
 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 	}
+	/* statistic for successful request sending on each channel */
+	if (!ret) {
+		sc->sysctl_data.chan_send_cnt[ch_sel]++;
+	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
@@ -907,17 +927,20 @@ storvsc_probe(device_t dev)
 }
 
 static void
-storvsc_create_cpu2chan(struct storvsc_softc *sc)
+storvsc_create_chan_sel(struct storvsc_softc *sc)
 {
-	int cpu;
+	struct vmbus_channel **subch;
+	int i, nsubch;
 
-	CPU_FOREACH(cpu) {
-		sc->hs_cpu2chan[cpu] = vmbus_chan_cpu2chan(sc->hs_chan, cpu);
-		if (bootverbose) {
-			device_printf(sc->hs_dev, "cpu%d -> chan%u\n",
-			    cpu, vmbus_chan_id(sc->hs_cpu2chan[cpu]));
-		}
-	}
+	sc->hs_sel_chan[0] = sc->hs_chan;
+	nsubch = sc->hs_nchan - 1;
+	if (nsubch == 0)
+		return;
+
+	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
+	for (i = 0; i < nsubch; i++)
+		sc->hs_sel_chan[i + 1] = subch[i];
+	vmbus_subchan_rel(subch, nsubch);
 }
 
 static int
@@ -977,7 +1000,10 @@ storvsc_sysctl(device_t dev)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *ch_tree, *chid_tree;
 	struct storvsc_softc *sc;
+	char name[16];
+	int i;
 
 	sc = device_get_softc(dev);
 	ctx = device_get_sysctl_ctx(dev);
@@ -989,6 +1015,28 @@ storvsc_sysctl(device_t dev)
 		&sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
 		&sc->sysctl_data.data_sg_cnt, "# of sg data block");
+
+	/* dev.storvsc.UNIT.channel */
+	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
+		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+	if (ch_tree == NULL)
+		return;
+
+	for (i = 0; i < sc->hs_nchan; i++) {
+		uint32_t ch_id;
+
+		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
+		snprintf(name, sizeof(name), "%d", ch_id);
+		/* dev.storvsc.UNIT.channel.CHID */
+		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
+			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+		if (chid_tree == NULL)
+			return;
+		/* dev.storvsc.UNIT.channel.CHID.send_req */
+		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
+			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
+			"# of request sending from this channel");
+	}
 }
 
 /**
@@ -1019,6 +1067,7 @@ storvsc_attach(device_t dev)
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
+	sc->hs_nchan = 1;
 	sc->hs_chan = vmbus_get_channel(dev);
 
 	stor_type = storvsc_get_storage_type(dev);
@@ -1030,7 +1079,14 @@ storvsc_attach(device_t dev)
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
-
+	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
+	sc->hs_drv_props->drv_max_ios_per_target =
+		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
+	if (bootverbose) {
+		printf("storvsc ringbuffer size: %d, max_io: %d\n",
+			sc->hs_drv_props->drv_ringbuffer_size,
+			sc->hs_drv_props->drv_max_ios_per_target);
+	}
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= dev;
@@ -1052,7 +1108,7 @@ storvsc_attach(device_t dev)
 		 * STORVSC_DATA_SEGCNT_MAX segments, each
 		 * segment has one page buffer
 		 */
-		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
+		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
 			    M_DEVBUF, M_WAITOK|M_ZERO);
 
@@ -1083,7 +1139,7 @@ storvsc_attach(device_t dev)
 	}
 
 	/* Construct cpu to channel mapping */
-	storvsc_create_cpu2chan(sc);
+	storvsc_create_chan_sel(sc);
 
 	/*
 	 * Create the device queue.
@@ -1840,19 +1896,37 @@ create_storvsc_request(union ccb *ccb, s
 			csio->cdb_len);
 	}
 
+	if (hv_storvsc_use_win8ext_flags) {
+		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
+		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
+	}
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
-		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
+		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_DATA_OUT;
+		}
 		break;
 	case CAM_DIR_IN:
 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_DATA_IN;
+		}
 		break;
 	case CAM_DIR_NONE:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		if (hv_storvsc_use_win8ext_flags) {
+			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
+				SRB_FLAGS_NO_DATA_TRANSFER;
+		}
 		break;
 	default:
-		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-		break;
+		printf("Error: unexpected data direction: 0x%x\n",
+			ccb->ccb_h.flags & CAM_DIR_MASK);
+		return (EINVAL);
 	}
 
 	reqp->sense_data     = &csio->sense_data;

Modified: stable/11/sys/dev/hyperv/storvsc/hv_vstorage.h
==============================================================================
--- stable/11/sys/dev/hyperv/storvsc/hv_vstorage.h	Mon Oct 17 07:13:48 2016	(r307485)
+++ stable/11/sys/dev/hyperv/storvsc/hv_vstorage.h	Mon Oct 17 07:16:04 2016	(r307486)
@@ -253,6 +253,22 @@ struct vstor_packet {
 #define SRB_STATUS_AUTOSENSE_VALID      0x80
 #define SRB_STATUS_INVALID_LUN          0X20
 
+/*
+ * SRB Flag Bits
+ */
+
+#define SRB_FLAGS_QUEUE_ACTION_ENABLE           0x00000002
+#define SRB_FLAGS_DISABLE_DISCONNECT            0x00000004
+#define SRB_FLAGS_DISABLE_SYNCH_TRANSFER        0x00000008
+#define SRB_FLAGS_BYPASS_FROZEN_QUEUE           0x00000010
+#define SRB_FLAGS_DISABLE_AUTOSENSE             0x00000020
+#define SRB_FLAGS_DATA_IN                       0x00000040
+#define SRB_FLAGS_DATA_OUT                      0x00000080
+#define SRB_FLAGS_NO_DATA_TRANSFER              0x00000000
+#define SRB_FLAGS_UNSPECIFIED_DIRECTION (SRB_FLAGS_DATA_IN | SRB_FLAGS_DATA_OUT)
+#define SRB_FLAGS_NO_QUEUE_FREEZE               0x00000100
+#define SRB_FLAGS_ADAPTER_CACHE_ENABLE          0x00000200
+#define SRB_FLAGS_FREE_SENSE_BUFFER             0x00000400
 /**
  *  Packet flags
  */

Modified: stable/11/sys/dev/hyperv/vmbus/vmbus_brvar.h
==============================================================================
--- stable/11/sys/dev/hyperv/vmbus/vmbus_brvar.h	Mon Oct 17 07:13:48 2016	(r307485)
+++ stable/11/sys/dev/hyperv/vmbus/vmbus_brvar.h	Mon Oct 17 07:16:04 2016	(r307486)
@@ -74,6 +74,7 @@ struct sysctl_oid;
 static __inline int
 vmbus_txbr_maxpktsz(const struct vmbus_txbr *tbr)
 {
+
 	/*
 	 * - 64 bits for the trailing start index (- sizeof(uint64_t)).
 	 * - The rindex and windex can't be same (- 1).  See
@@ -82,6 +83,17 @@ vmbus_txbr_maxpktsz(const struct vmbus_t
 	return (tbr->txbr_dsize - sizeof(uint64_t) - 1);
 }
 
+static __inline int
+vmbus_br_nelem(int br_size, int elem_size)
+{
+
+	/* Strip bufring header */
+	br_size -= sizeof(struct vmbus_bufring);
+	/* Add per-element trailing index */
+	elem_size += sizeof(uint64_t);
+	return (br_size / elem_size);
+}
+
 void		vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx,
 		    struct sysctl_oid *br_tree, struct vmbus_br *br,
 		    const char *name);

Modified: stable/11/sys/dev/hyperv/vmbus/vmbus_chan.c
==============================================================================
--- stable/11/sys/dev/hyperv/vmbus/vmbus_chan.c	Mon Oct 17 07:13:48 2016	(r307485)
+++ stable/11/sys/dev/hyperv/vmbus/vmbus_chan.c	Mon Oct 17 07:16:04 2016	(r307486)
@@ -1411,3 +1411,16 @@ vmbus_chan_guid_inst(const struct vmbus_
 {
 	return &chan->ch_guid_inst;
 }
+
+int
+vmbus_chan_prplist_nelem(int br_size, int prpcnt_max, int dlen_max)
+{
+	int elem_size;
+
+	elem_size = __offsetof(struct vmbus_chanpkt_prplist,
+	    cp_range[0].gpa_page[prpcnt_max]);
+	elem_size += dlen_max;
+	elem_size = VMBUS_CHANPKT_TOTLEN(elem_size);
+
+	return (vmbus_br_nelem(br_size, elem_size));
+}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201610170716.u9H7G4Wt005854>