Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 12 Sep 2015 20:14:55 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r287722 - in projects/iosched: cddl/contrib/opensolaris/cmd/sgs/tools/common share/man/man4 sys/cam/ctl sys/cam/scsi sys/cddl/contrib/opensolaris/common/avl sys/cddl/contrib/opensolaris...
Message-ID:  <201509122014.t8CKEtjh027158@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Sat Sep 12 20:14:54 2015
New Revision: 287722
URL: https://svnweb.freebsd.org/changeset/base/287722

Log:
  MFC@287721

Modified:
  projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
  projects/iosched/share/man/man4/ctl.4
  projects/iosched/share/man/man4/geom_fox.4
  projects/iosched/sys/cam/ctl/README.ctl.txt
  projects/iosched/sys/cam/ctl/ctl.c
  projects/iosched/sys/cam/ctl/ctl_cmd_table.c
  projects/iosched/sys/cam/ctl/ctl_tpc.c
  projects/iosched/sys/cam/ctl/ctl_tpc_local.c
  projects/iosched/sys/cam/scsi/scsi_all.c
  projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
  projects/iosched/sys/dev/dwc/if_dwc.c
  projects/iosched/sys/netinet/sctp.h
  projects/iosched/sys/netinet/sctp_auth.c
  projects/iosched/sys/netinet/sctp_header.h
  projects/iosched/sys/netinet/sctp_indata.c
  projects/iosched/sys/netinet/sctp_input.c
  projects/iosched/usr.sbin/ctladm/ctladm.8
  projects/iosched/usr.sbin/pw/pw_user.c
Directory Properties:
  projects/iosched/   (props changed)
  projects/iosched/cddl/   (props changed)
  projects/iosched/cddl/contrib/opensolaris/   (props changed)
  projects/iosched/share/   (props changed)
  projects/iosched/share/man/man4/   (props changed)
  projects/iosched/sys/   (props changed)
  projects/iosched/sys/cddl/contrib/opensolaris/   (props changed)

Modified: projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
==============================================================================
--- projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -132,6 +132,8 @@ typedef struct msg_string {
 static msg_string	*msg_head;
 static msg_string	*msg_tail;
 
+int	aok;
+
 /*
  * message_append() is responsible for both inserting strings into
  * the master Str_tbl as well as maintaining a list of the

Modified: projects/iosched/share/man/man4/ctl.4
==============================================================================
--- projects/iosched/share/man/man4/ctl.4	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/share/man/man4/ctl.4	Sat Sep 12 20:14:54 2015	(r287722)
@@ -1,4 +1,5 @@
 .\" Copyright (c) 2013 Edward Tomasz Napierala
+.\" Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -23,7 +24,7 @@
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
-.Dd August 9, 2015
+.Dd September 12, 2015
 .Dt CTL 4
 .Os
 .Sh NAME
@@ -80,6 +81,8 @@ Mode sense/select support
 .It
 Error injection support
 .It
+High Availability clustering support with ALUA
+.It
 All I/O handled in-kernel, no userland context switch overhead
 .El
 .Pp
@@ -99,9 +102,57 @@ log commands with errors;
 .It 2
 log all commands;
 .It 4
-log received data for commands except READ/WRITE.
+log data for commands other then READ/WRITE.
 .El
 Defaults to 0.
+.It Va kern.cam.ctl.ha_id
+Specifies unique position of this node within High Availability cluster.
+Default is 0 -- no HA, 1 and 2 -- HA enabled at specified position.
+.It Va kern.cam.ctl.ha_mode
+Specifies High Availability cluster operation mode:
+.Bl -tag -offset indent -compact
+.It 0
+Active/Standby -- primary node has backend access and processes requests,
+while secondary can only do basic LUN discovery and reservation;
+.It 1
+Active/Active -- both nodes have backend access and process requests,
+while secondary node synchronizes processing with primary one;
+.It 2
+Active/Active -- primary node has backend access and processes requests,
+while secondary node forwards all requests and data to primary one;
+.El
+All above modes require established connection between HA cluster nodes.
+If connection is not configured, secondary node will report Unavailable
+state; if configured but not established -- Transitioning state.
+Defaults to 0.
+.It Va kern.cam.ctl.ha_peer
+String value, specifying method to establish connection to peer HA node.
+Can be "listen IP:port", "connect IP:port" or empty.
+.It Va kern.cam.ctl.ha_link
+Reports present state of connection between HA cluster nodes:
+.Bl -tag -offset indent -compact
+.It 0
+not configured;
+.It 1
+configured but not established;
+.It 2
+established.
+.El
+.It Va kern.cam.ctl.ha_role
+Specifies default role of this node:
+.Bl -tag -offset indent -compact
+.It 0
+primary;
+.It 1
+secondary.
+.El
+This role can be overriden on per-LUN basis using "ha_role" LUN option,
+so that for one LUN one node is primary, while for another -- another.
+Role change from primary to secondary for HA modes 0 and 2 closes backends,
+the opposite change -- opens.
+If there is no primary node (both nodes are secondary, or secondary node has
+no connection to primary one), secondary node(s) report Transitioning state.
+State with two primary nodes is illegal (split brain condition).
 .It Va kern.cam.ctl.iscsi.debug
 Verbosity level for log messages from the kernel part of iSCSI target.
 Set to 0 to disable logging or 1 to warn about potential problems.
@@ -132,5 +183,7 @@ subsystem first appeared in
 .Sh AUTHORS
 The
 .Nm
-subsystem was written by
+subsystem was originally written by
 .An Kenneth Merry Aq Mt ken@FreeBSD.org .
+Later work was done by
+.An Alexander Motin Aq Mt mav@FreeBSD.org .

Modified: projects/iosched/share/man/man4/geom_fox.4
==============================================================================
--- projects/iosched/share/man/man4/geom_fox.4	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/share/man/man4/geom_fox.4	Sat Sep 12 20:14:54 2015	(r287722)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 2, 2005
+.Dd September 12, 2015
 .Dt GEOM_FOX 4
 .Os
 .Sh NAME
@@ -46,6 +46,13 @@ module at boot time, place the following
 geom_fox_load="YES"
 .Ed
 .Sh DESCRIPTION
+.Bf -symbolic
+This driver is obsolete.
+Users are advised to use
+.Xr gmultipath 8
+instead.
+.Ef
+.Pp
 The intent of the
 .Nm
 framework is to provide basic multipathing support to access direct

Modified: projects/iosched/sys/cam/ctl/README.ctl.txt
==============================================================================
--- projects/iosched/sys/cam/ctl/README.ctl.txt	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/ctl/README.ctl.txt	Sat Sep 12 20:14:54 2015	(r287722)
@@ -40,25 +40,24 @@ Features:
  - Support for multiple ports
  - Support for multiple simultaneous initiators
  - Support for multiple simultaneous backing stores
+ - Support for VMWare VAAI: COMPARE AND WRITE, XCOPY, WRITE SAME and
+   UNMAP commands
+ - Support for Microsoft ODX: POPULATE TOKEN/WRITE USING TOKEN, WRITE SAME
+   and UNMAP commands
  - Persistent reservation support
  - Mode sense/select support
  - Error injection support
- - High Availability support
+ - High Availability clustering support with ALUA
  - All I/O handled in-kernel, no userland context switch overhead.
 
 Configuring and Running CTL:
 ===========================
 
- - After applying the CTL patchset to your tree, build world and install it
-   on your target system.
-
- - Add 'device ctl' to your kernel configuration file.
+ - Add 'device ctl' to your kernel configuration file or load the module.
 
  - If you're running with a 8Gb or 4Gb Qlogic FC board, add
-   'options ISP_TARGET_MODE' to your kernel config file.  Keep in mind that
-   the isp(4) driver can run in target or initiator mode, but not both on
-   the same machine.  'device ispfw' or loading the ispfw module is also
-   recommended.
+   'options ISP_TARGET_MODE' to your kernel config file. 'device ispfw' or
+   loading the ispfw module is also recommended.
 
  - Rebuild and install a new kernel.
 

Modified: projects/iosched/sys/cam/ctl/ctl.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/ctl/ctl.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -673,7 +673,10 @@ ctl_isc_ha_link_down(struct ctl_softc *s
 	mtx_lock(&softc->ctl_lock);
 	STAILQ_FOREACH(lun, &softc->lun_list, links) {
 		mtx_lock(&lun->lun_lock);
-		lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+		if (lun->flags & CTL_LUN_PEER_SC_PRIMARY) {
+			lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+			ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+		}
 		mtx_unlock(&lun->lun_lock);
 
 		mtx_unlock(&softc->ctl_lock);
@@ -700,8 +703,11 @@ ctl_isc_ua(struct ctl_softc *softc, unio
 	struct ctl_lun *lun;
 	uint32_t iid = ctl_get_initindex(&msg->hdr.nexus);
 
+	mtx_lock(&softc->ctl_lock);
 	if (msg->hdr.nexus.targ_lun < CTL_MAX_LUNS &&
-	    (lun = softc->ctl_luns[msg->hdr.nexus.targ_lun]) != NULL) {
+	    (lun = softc->ctl_luns[msg->hdr.nexus.targ_mapped_lun]) != NULL) {
+		mtx_lock(&lun->lun_lock);
+		mtx_unlock(&softc->ctl_lock);
 		if (msg->ua.ua_all) {
 			if (msg->ua.ua_set)
 				ctl_est_ua_all(lun, iid, msg->ua.ua_type);
@@ -713,7 +719,9 @@ ctl_isc_ua(struct ctl_softc *softc, unio
 			else
 				ctl_clr_ua(lun, iid, msg->ua.ua_type);
 		}
-	}
+		mtx_unlock(&lun->lun_lock);
+	} else
+		mtx_unlock(&softc->ctl_lock);
 }
 
 static void
@@ -722,58 +730,69 @@ ctl_isc_lun_sync(struct ctl_softc *softc
 	struct ctl_lun *lun;
 	struct ctl_ha_msg_lun_pr_key pr_key;
 	int i, k;
+	ctl_lun_flags oflags;
+	uint32_t targ_lun;
 
-	lun = softc->ctl_luns[msg->hdr.nexus.targ_lun];
-	if (lun == NULL) {
-		CTL_DEBUG_PRINT(("%s: Unknown LUN %d\n", __func__,
-		    msg->hdr.nexus.targ_lun));
+	targ_lun = msg->hdr.nexus.targ_mapped_lun;
+	mtx_lock(&softc->ctl_lock);
+	if ((targ_lun >= CTL_MAX_LUNS) ||
+	    ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+		mtx_unlock(&softc->ctl_lock);
+		return;
+	}
+	mtx_lock(&lun->lun_lock);
+	mtx_unlock(&softc->ctl_lock);
+	if (lun->flags & CTL_LUN_DISABLED) {
+		mtx_unlock(&lun->lun_lock);
+		return;
+	}
+	i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
+	if (msg->lun.lun_devid_len != i || (i > 0 &&
+	    memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
+		mtx_unlock(&lun->lun_lock);
+		printf("%s: Received conflicting HA LUN %d\n",
+		    __func__, msg->hdr.nexus.targ_lun);
+		return;
 	} else {
-		mtx_lock(&lun->lun_lock);
-		i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
-		if (msg->lun.lun_devid_len != i || (i > 0 &&
-		    memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
-			mtx_unlock(&lun->lun_lock);
-			printf("%s: Received conflicting HA LUN %d\n",
-			    __func__, msg->hdr.nexus.targ_lun);
-			return;
-		} else {
-			/* Record whether peer is primary. */
-			if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
-			    (msg->lun.flags & CTL_LUN_DISABLED) == 0)
-				lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
-			else
-				lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
-
-			/* If peer is primary and we are not -- use data */
-			if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
-			    (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
-				lun->PRGeneration = msg->lun.pr_generation;
-				lun->pr_res_idx = msg->lun.pr_res_idx;
-				lun->res_type = msg->lun.pr_res_type;
-				lun->pr_key_count = msg->lun.pr_key_count;
-				for (k = 0; k < CTL_MAX_INITIATORS; k++)
-					ctl_clr_prkey(lun, k);
-				for (k = 0; k < msg->lun.pr_key_count; k++) {
-					memcpy(&pr_key, &msg->lun.data[i],
-					    sizeof(pr_key));
-					ctl_alloc_prkey(lun, pr_key.pr_iid);
-					ctl_set_prkey(lun, pr_key.pr_iid,
-					    pr_key.pr_key);
-					i += sizeof(pr_key);
-				}
+		/* Record whether peer is primary. */
+		oflags = lun->flags;
+		if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
+		    (msg->lun.flags & CTL_LUN_DISABLED) == 0)
+			lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
+		else
+			lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+		if (oflags != lun->flags)
+			ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+
+		/* If peer is primary and we are not -- use data */
+		if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
+		    (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
+			lun->PRGeneration = msg->lun.pr_generation;
+			lun->pr_res_idx = msg->lun.pr_res_idx;
+			lun->res_type = msg->lun.pr_res_type;
+			lun->pr_key_count = msg->lun.pr_key_count;
+			for (k = 0; k < CTL_MAX_INITIATORS; k++)
+				ctl_clr_prkey(lun, k);
+			for (k = 0; k < msg->lun.pr_key_count; k++) {
+				memcpy(&pr_key, &msg->lun.data[i],
+				    sizeof(pr_key));
+				ctl_alloc_prkey(lun, pr_key.pr_iid);
+				ctl_set_prkey(lun, pr_key.pr_iid,
+				    pr_key.pr_key);
+				i += sizeof(pr_key);
 			}
-
-			mtx_unlock(&lun->lun_lock);
-			CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
-			    __func__, msg->hdr.nexus.targ_lun,
-			    (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
-			    "primary" : "secondary"));
-
-			/* If we are primary but peer doesn't know -- notify */
-			if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
-			    (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
-				ctl_isc_announce_lun(lun);
 		}
+
+		mtx_unlock(&lun->lun_lock);
+		CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
+		    __func__, msg->hdr.nexus.targ_lun,
+		    (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
+		    "primary" : "secondary"));
+
+		/* If we are primary but peer doesn't know -- notify */
+		if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
+		    (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
+			ctl_isc_announce_lun(lun);
 	}
 }
 
@@ -1730,20 +1749,24 @@ ctl_serialize_other_sc_cmd(struct ctl_sc
 	softc = control_softc;
 
 	targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun;
+	mtx_lock(&softc->ctl_lock);
 	if ((targ_lun < CTL_MAX_LUNS) &&
 	    ((lun = softc->ctl_luns[targ_lun]) != NULL)) {
+		mtx_lock(&lun->lun_lock);
+		mtx_unlock(&softc->ctl_lock);
 		/*
 		 * If the LUN is invalid, pretend that it doesn't exist.
 		 * It will go away as soon as all pending I/O has been
 		 * completed.
 		 */
-		mtx_lock(&lun->lun_lock);
 		if (lun->flags & CTL_LUN_DISABLED) {
 			mtx_unlock(&lun->lun_lock);
 			lun = NULL;
 		}
-	} else
+	} else {
+		mtx_unlock(&softc->ctl_lock);
 		lun = NULL;
+	}
 	if (lun == NULL) {
 		/*
 		 * The other node would not send this request to us unless
@@ -2514,6 +2537,7 @@ ctl_ioctl(struct cdev *dev, u_long cmd, 
 		if (lun == NULL) {
 			mtx_unlock(&softc->ctl_lock);
 			sync_info->status = CTL_GS_SYNC_NO_LUN;
+			break;
 		}
 		/*
 		 * Get or set the sync interval.  We're not bounds checking
@@ -4531,8 +4555,8 @@ ctl_lun_primary(struct ctl_be_lun *be_lu
 
 	mtx_lock(&lun->lun_lock);
 	lun->flags |= CTL_LUN_PRIMARY_SC;
-	mtx_unlock(&lun->lun_lock);
 	ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+	mtx_unlock(&lun->lun_lock);
 	ctl_isc_announce_lun(lun);
 	return (0);
 }
@@ -4544,8 +4568,8 @@ ctl_lun_secondary(struct ctl_be_lun *be_
 
 	mtx_lock(&lun->lun_lock);
 	lun->flags &= ~CTL_LUN_PRIMARY_SC;
-	mtx_unlock(&lun->lun_lock);
 	ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+	mtx_unlock(&lun->lun_lock);
 	ctl_isc_announce_lun(lun);
 	return (0);
 }
@@ -8380,13 +8404,23 @@ ctl_hndl_per_res_out_on_other_sc(union c
 	struct ctl_lun *lun;
 	struct ctl_softc *softc;
 	int i;
-	uint32_t targ_lun;
+	uint32_t residx, targ_lun;
 
 	softc = control_softc;
-
 	targ_lun = msg->hdr.nexus.targ_mapped_lun;
-	lun = softc->ctl_luns[targ_lun];
+	mtx_lock(&softc->ctl_lock);
+	if ((targ_lun >= CTL_MAX_LUNS) ||
+	    ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+		mtx_unlock(&softc->ctl_lock);
+		return;
+	}
 	mtx_lock(&lun->lun_lock);
+	mtx_unlock(&softc->ctl_lock);
+	if (lun->flags & CTL_LUN_DISABLED) {
+		mtx_unlock(&lun->lun_lock);
+		return;
+	}
+	residx = ctl_get_initindex(&msg->hdr.nexus);
 	switch(msg->pr.pr_info.action) {
 	case CTL_PR_REG_KEY:
 		ctl_alloc_prkey(lun, msg->pr.pr_info.residx);
@@ -8451,8 +8485,9 @@ ctl_hndl_per_res_out_on_other_sc(union c
 		if (lun->res_type != SPR_TYPE_EX_AC
 		 && lun->res_type != SPR_TYPE_WR_EX) {
 			for (i = softc->init_min; i < softc->init_max; i++)
-				if (ctl_get_prkey(lun, i) != 0)
-					ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
+				if (i == residx || ctl_get_prkey(lun, i) == 0)
+					continue;
+				ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
 		}
 
 		lun->flags &= ~CTL_LUN_PR_RESERVED;
@@ -11481,13 +11516,24 @@ ctl_i_t_nexus_reset(union ctl_io *io)
 	struct ctl_lun *lun;
 	uint32_t initidx;
 
+	if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
+		union ctl_ha_msg msg_info;
+
+		msg_info.hdr.nexus = io->io_hdr.nexus;
+		msg_info.task.task_action = CTL_TASK_I_T_NEXUS_RESET;
+		msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
+		msg_info.hdr.original_sc = NULL;
+		msg_info.hdr.serializing_sc = NULL;
+		ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+		    sizeof(msg_info.task), M_WAITOK);
+	}
+
 	initidx = ctl_get_initindex(&io->io_hdr.nexus);
 	mtx_lock(&softc->ctl_lock);
 	STAILQ_FOREACH(lun, &softc->lun_list, links) {
 		mtx_lock(&lun->lun_lock);
 		ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port,
-		    io->io_hdr.nexus.initid,
-		    (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
+		    io->io_hdr.nexus.initid, 1);
 #ifdef CTL_WITH_CA
 		ctl_clear_mask(lun->have_ca, initidx);
 #endif

Modified: projects/iosched/sys/cam/ctl/ctl_cmd_table.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_cmd_table.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/ctl/ctl_cmd_table.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -486,7 +486,7 @@ const struct ctl_cmd_entry ctl_cmd_table
 						CTL_FLAG_DATA_IN |
 						CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
  CTL_LUN_PAT_NONE,
- 12, {0x0a, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
+ 12, {0xea, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
 
 /* 0B */
 {NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},

Modified: projects/iosched/sys/cam/ctl/ctl_tpc.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_tpc.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/ctl/ctl_tpc.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -394,8 +394,7 @@ ctl_inquiry_evpd_tpc(struct ctl_scsiio *
 	scsi_ulto2b(0, rtfb_ptr->optimal_length_granularity);
 	scsi_u64to8b(0, rtfb_ptr->maximum_bytes);
 	scsi_u64to8b(0, rtfb_ptr->optimal_bytes);
-	scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
-	    rtfb_ptr->optimal_bytes_to_token_per_segment);
+	scsi_u64to8b(UINT64_MAX, rtfb_ptr->optimal_bytes_to_token_per_segment);
 	scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
 	    rtfb_ptr->optimal_bytes_from_token_per_segment);
 
@@ -1590,6 +1589,10 @@ ctl_extended_copy_lid1(struct ctl_scsiio
 	cdb = (struct scsi_extended_copy *)ctsio->cdb;
 	len = scsi_4btoul(cdb->length);
 
+	if (len == 0) {
+		ctl_set_success(ctsio);
+		goto done;
+	}
 	if (len < sizeof(struct scsi_extended_copy_lid1_data) ||
 	    len > sizeof(struct scsi_extended_copy_lid1_data) +
 	    TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1620,20 +1623,22 @@ ctl_extended_copy_lid1(struct ctl_scsiio
 	lencscd = scsi_2btoul(data->cscd_list_length);
 	lenseg = scsi_4btoul(data->segment_list_length);
 	leninl = scsi_4btoul(data->inline_data_length);
-	if (len < sizeof(struct scsi_extended_copy_lid1_data) +
-	    lencscd + lenseg + leninl ||
-	    leninl > TPC_MAX_INLINE) {
-		ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
-		    /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
-		goto done;
-	}
 	if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
 		ctl_set_sense(ctsio, /*current_error*/ 1,
 		    /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
 		    /*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
 		goto done;
 	}
-	if (lencscd + lenseg > TPC_MAX_LIST) {
+	if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+		ctl_set_sense(ctsio, /*current_error*/ 1,
+		    /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+		    /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+		goto done;
+	}
+	if (lencscd + lenseg > TPC_MAX_LIST ||
+	    leninl > TPC_MAX_INLINE ||
+	    len < sizeof(struct scsi_extended_copy_lid1_data) +
+	     lencscd + lenseg + leninl) {
 		ctl_set_param_len_error(ctsio);
 		goto done;
 	}
@@ -1717,6 +1722,10 @@ ctl_extended_copy_lid4(struct ctl_scsiio
 	cdb = (struct scsi_extended_copy *)ctsio->cdb;
 	len = scsi_4btoul(cdb->length);
 
+	if (len == 0) {
+		ctl_set_success(ctsio);
+		goto done;
+	}
 	if (len < sizeof(struct scsi_extended_copy_lid4_data) ||
 	    len > sizeof(struct scsi_extended_copy_lid4_data) +
 	    TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1747,20 +1756,22 @@ ctl_extended_copy_lid4(struct ctl_scsiio
 	lencscd = scsi_2btoul(data->cscd_list_length);
 	lenseg = scsi_2btoul(data->segment_list_length);
 	leninl = scsi_2btoul(data->inline_data_length);
-	if (len < sizeof(struct scsi_extended_copy_lid4_data) +
-	    lencscd + lenseg + leninl ||
-	    leninl > TPC_MAX_INLINE) {
-		ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
-		    /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
-		goto done;
-	}
 	if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
 		ctl_set_sense(ctsio, /*current_error*/ 1,
 		    /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
 		    /*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
 		goto done;
 	}
-	if (lencscd + lenseg > TPC_MAX_LIST) {
+	if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+		ctl_set_sense(ctsio, /*current_error*/ 1,
+		    /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+		    /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+		goto done;
+	}
+	if (lencscd + lenseg > TPC_MAX_LIST ||
+	    leninl > TPC_MAX_INLINE ||
+	    len < sizeof(struct scsi_extended_copy_lid1_data) +
+	     lencscd + lenseg + leninl) {
 		ctl_set_param_len_error(ctsio);
 		goto done;
 	}

Modified: projects/iosched/sys/cam/ctl/ctl_tpc_local.c
==============================================================================
--- projects/iosched/sys/cam/ctl/ctl_tpc_local.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/ctl/ctl_tpc_local.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -281,7 +281,8 @@ tpcl_resolve(struct ctl_softc *softc, in
 	struct ctl_lun *lun;
 	uint64_t lunid = UINT64_MAX;
 
-	if (cscd->type_code != EC_CSCD_ID)
+	if (cscd->type_code != EC_CSCD_ID ||
+	    (cscd->luidt_pdt & EC_LUIDT_MASK) != EC_LUIDT_LUN)
 		return (lunid);
 
 	cscdid = (struct scsi_ec_cscd_id *)cscd;

Modified: projects/iosched/sys/cam/scsi/scsi_all.c
==============================================================================
--- projects/iosched/sys/cam/scsi/scsi_all.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cam/scsi/scsi_all.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -509,7 +509,8 @@ static struct op_table_entry scsi_op_cod
 	/* 99 */
 	/* 9A */
 	/* 9B */
-	/* 9C */
+	/* 9C  O              WRITE ATOMIC(16) */
+	{ 0x9C, D, "WRITE ATOMIC(16)" },
 	/* 9D */
 	/* XXX KDM ALL for this?  op-num.txt defines it for none.. */
 	/* 9E                  SERVICE ACTION IN(16) */

Modified: projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/common/avl/avl.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -25,6 +25,7 @@
 
 /*
  * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -635,14 +636,17 @@ avl_add(avl_tree_t *tree, void *new_node
 	/*
 	 * This is unfortunate.  We want to call panic() here, even for
 	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
-	 * in libc or else the rtld build process gets confused.  So, all we can
-	 * do in userland is resort to a normal ASSERT().
+	 * in libc or else the rtld build process gets confused.
+	 * Thankfully, rtld provides us with its own assfail() so we can use
+	 * that here.  We use assfail() directly to get a nice error message
+	 * in the core - much like what panic() does for crashdumps.
 	 */
 	if (avl_find(tree, new_node, &where) != NULL)
 #ifdef _KERNEL
 		panic("avl_find() succeeded inside avl_add()");
 #else
-		ASSERT(0);
+		(void) assfail("avl_find() succeeded inside avl_add()",
+		    __FILE__, __LINE__);
 #endif
 	avl_insert(tree, new_node, where);
 }

Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -213,7 +213,7 @@ static int		arc_min_prefetch_lifespan;
 int arc_lotsfree_percent = 10;
 
 static int arc_dead;
-extern int zfs_prefetch_disable;
+extern boolean_t zfs_prefetch_disable;
 
 /*
  * The arc has filled available memory and has now warmed up.
@@ -582,6 +582,8 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_meta_limit;
 	kstat_named_t arcstat_meta_max;
 	kstat_named_t arcstat_meta_min;
+	kstat_named_t arcstat_sync_wait_for_async;
+	kstat_named_t arcstat_demand_hit_predictive_prefetch;
 } arc_stats_t;
 
 static arc_stats_t arc_stats = {
@@ -680,7 +682,9 @@ static arc_stats_t arc_stats = {
 	{ "arc_meta_used",		KSTAT_DATA_UINT64 },
 	{ "arc_meta_limit",		KSTAT_DATA_UINT64 },
 	{ "arc_meta_max",		KSTAT_DATA_UINT64 },
-	{ "arc_meta_min",		KSTAT_DATA_UINT64 }
+	{ "arc_meta_min",		KSTAT_DATA_UINT64 },
+	{ "sync_wait_for_async",	KSTAT_DATA_UINT64 },
+	{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
 };
 
 #define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
@@ -844,6 +848,7 @@ typedef struct l2arc_buf_hdr {
 	uint64_t		b_daddr;	/* disk address, offset byte */
 	/* real alloc'd buffer size depending on b_compress applied */
 	int32_t			b_asize;
+	uint8_t			b_compress;
 
 	list_node_t		b_l2node;
 } l2arc_buf_hdr_t;
@@ -923,15 +928,6 @@ static arc_buf_hdr_t arc_eviction_hdr;
 #define	HDR_HAS_L1HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
 #define	HDR_HAS_L2HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
 
-/* For storing compression mode in b_flags */
-#define	HDR_COMPRESS_OFFSET	24
-#define	HDR_COMPRESS_NBITS	7
-
-#define	HDR_GET_COMPRESS(hdr)	((enum zio_compress)BF32_GET(hdr->b_flags, \
-	    HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
-#define	HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
-	    HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
-
 /*
  * Other sizes
  */
@@ -2222,7 +2218,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr
 	 * separately compressed buffer, so there's nothing to free (it
 	 * points to the same buffer as the arc_buf_t's b_data field).
 	 */
-	if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) {
+	if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
 		hdr->b_l1hdr.b_tmp_cdata = NULL;
 		return;
 	}
@@ -2231,12 +2227,12 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr
 	 * There's nothing to free since the buffer was all zero's and
 	 * compressed to a zero length buffer.
 	 */
-	if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_EMPTY) {
+	if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
 		ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
 		return;
 	}
 
-	ASSERT(L2ARC_IS_VALID_COMPRESS(HDR_GET_COMPRESS(hdr)));
+	ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
 
 	arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
 	    hdr->b_size, zio_data_buf_free);
@@ -4250,6 +4246,36 @@ top:
 
 		if (HDR_IO_IN_PROGRESS(hdr)) {
 
+			if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
+			    priority == ZIO_PRIORITY_SYNC_READ) {
+				/*
+				 * This sync read must wait for an
+				 * in-progress async read (e.g. a predictive
+				 * prefetch).  Async reads are queued
+				 * separately at the vdev_queue layer, so
+				 * this is a form of priority inversion.
+				 * Ideally, we would "inherit" the demand
+				 * i/o's priority by moving the i/o from
+				 * the async queue to the synchronous queue,
+				 * but there is currently no mechanism to do
+				 * so.  Track this so that we can evaluate
+				 * the magnitude of this potential performance
+				 * problem.
+				 *
+				 * Note that if the prefetch i/o is already
+				 * active (has been issued to the device),
+				 * the prefetch improved performance, because
+				 * we issued it sooner than we would have
+				 * without the prefetch.
+				 */
+				DTRACE_PROBE1(arc__sync__wait__for__async,
+				    arc_buf_hdr_t *, hdr);
+				ARCSTAT_BUMP(arcstat_sync_wait_for_async);
+			}
+			if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+				hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+			}
+
 			if (*arc_flags & ARC_FLAG_WAIT) {
 				cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
 				mutex_exit(hash_lock);
@@ -4258,7 +4284,7 @@ top:
 			ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
 
 			if (done) {
-				arc_callback_t	*acb = NULL;
+				arc_callback_t *acb = NULL;
 
 				acb = kmem_zalloc(sizeof (arc_callback_t),
 				    KM_SLEEP);
@@ -4283,6 +4309,19 @@ top:
 		    hdr->b_l1hdr.b_state == arc_mfu);
 
 		if (done) {
+			if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+				/*
+				 * This is a demand read which does not have to
+				 * wait for i/o because we did a predictive
+				 * prefetch i/o for it, which has completed.
+				 */
+				DTRACE_PROBE1(
+				    arc__demand__hit__predictive__prefetch,
+				    arc_buf_hdr_t *, hdr);
+				ARCSTAT_BUMP(
+				    arcstat_demand_hit_predictive_prefetch);
+				hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+			}
 			add_reference(hdr, hash_lock, private);
 			/*
 			 * If this block is already in use, create a new
@@ -4345,12 +4384,16 @@ top:
 				goto top; /* restart the IO request */
 			}
 
-			/* if this is a prefetch, we don't have a reference */
-			if (*arc_flags & ARC_FLAG_PREFETCH) {
+			/*
+			 * If there is a callback, we pass our reference to
+			 * it; otherwise we remove our reference.
+			 */
+			if (done == NULL) {
 				(void) remove_reference(hdr, hash_lock,
 				    private);
-				hdr->b_flags |= ARC_FLAG_PREFETCH;
 			}
+			if (*arc_flags & ARC_FLAG_PREFETCH)
+				hdr->b_flags |= ARC_FLAG_PREFETCH;
 			if (*arc_flags & ARC_FLAG_L2CACHE)
 				hdr->b_flags |= ARC_FLAG_L2CACHE;
 			if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -4373,11 +4416,13 @@ top:
 			ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
 			ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
 
-			/* if this is a prefetch, we don't have a reference */
+			/*
+			 * If there is a callback, we pass a reference to it.
+			 */
+			if (done != NULL)
+				add_reference(hdr, hash_lock, private);
 			if (*arc_flags & ARC_FLAG_PREFETCH)
 				hdr->b_flags |= ARC_FLAG_PREFETCH;
-			else
-				add_reference(hdr, hash_lock, private);
 			if (*arc_flags & ARC_FLAG_L2CACHE)
 				hdr->b_flags |= ARC_FLAG_L2CACHE;
 			if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -4395,6 +4440,8 @@ top:
 			arc_access(hdr, hash_lock);
 		}
 
+		if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
+			hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH;
 		ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
 
 		acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
@@ -4409,7 +4456,7 @@ top:
 		    (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
 			devw = hdr->b_l2hdr.b_dev->l2ad_writing;
 			addr = hdr->b_l2hdr.b_daddr;
-			b_compress = HDR_GET_COMPRESS(hdr);
+			b_compress = hdr->b_l2hdr.b_compress;
 			b_asize = hdr->b_l2hdr.b_asize;
 			/*
 			 * Lock out device removal.
@@ -4437,6 +4484,11 @@ top:
 		curthread->td_ru.ru_inblock++;
 #endif
 
+		if (priority == ZIO_PRIORITY_ASYNC_READ)
+			hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ;
+		else
+			hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ;
+
 		if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
 			/*
 			 * Read from the L2ARC if the following are true:
@@ -5965,6 +6017,8 @@ l2arc_read_done(zio_t *zio)
 	if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
 		l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
 	ASSERT(zio->io_data != NULL);
+	ASSERT3U(zio->io_size, ==, hdr->b_size);
+	ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
 
 	/*
 	 * Check this survived the L2ARC journey.
@@ -6001,7 +6055,7 @@ l2arc_read_done(zio_t *zio)
 			ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
 
 			zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
-			    buf->b_data, zio->io_size, arc_read_done, buf,
+			    buf->b_data, hdr->b_size, arc_read_done, buf,
 			    zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
 		}
 	}
@@ -6318,7 +6372,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
 			 * can't access without holding the ARC list locks
 			 * (which we want to avoid during compression/writing).
 			 */
-			HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+			hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
 			hdr->b_l2hdr.b_asize = hdr->b_size;
 			hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
 
@@ -6520,7 +6574,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
 	l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
-	ASSERT(HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF);
+	ASSERT3S(l2hdr->b_compress, ==, ZIO_COMPRESS_OFF);
 	ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
 
 	len = l2hdr->b_asize;
@@ -6532,7 +6586,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
 	if (csize == 0) {
 		/* zero block, indicate that there's nothing to write */
 		zio_data_buf_free(cdata, len);
-		HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_EMPTY);
+		l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
 		l2hdr->b_asize = 0;
 		hdr->b_l1hdr.b_tmp_cdata = NULL;
 		ARCSTAT_BUMP(arcstat_l2_compress_zeros);
@@ -6550,7 +6604,7 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr)
 			bzero((char *)cdata + csize, rounded - csize);
 			csize = rounded;
 		}
-		HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_LZ4);
+		l2hdr->b_compress = ZIO_COMPRESS_LZ4;
 		l2hdr->b_asize = csize;
 		hdr->b_l1hdr.b_tmp_cdata = cdata;
 		ARCSTAT_BUMP(arcstat_l2_compress_successes);
@@ -6637,7 +6691,8 @@ l2arc_decompress_zio(zio_t *zio, arc_buf
 static void
 l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
 {
-	enum zio_compress comp = HDR_GET_COMPRESS(hdr);
+	ASSERT(HDR_HAS_L2HDR(hdr));
+	enum zio_compress comp = hdr->b_l2hdr.b_compress;
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));

Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -618,7 +618,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *bu
 }
 
 static void
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 {
 	dnode_t *dn;
 	zbookmark_phys_t zb;
@@ -664,7 +664,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
 		    db->db.db_size, db, type));
 		bzero(db->db.db_data, db->db.db_size);
 		db->db_state = DB_CACHED;
-		*flags |= DB_RF_CACHED;
 		mutex_exit(&db->db_mtx);
 		return;
 	}
@@ -687,10 +686,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
 
 	(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
 	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
-	    (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+	    (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
 	    &aflags, &zb);
-	if (aflags & ARC_FLAG_CACHED)
-		*flags |= DB_RF_CACHED;
 }
 
 int
@@ -723,8 +720,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
 	if (db->db_state == DB_CACHED) {
 		mutex_exit(&db->db_mtx);
 		if (prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
-			    db->db.db_size, TRUE);
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
 		if ((flags & DB_RF_HAVESTRUCT) == 0)
 			rw_exit(&dn->dn_struct_rwlock);
 		DB_DNODE_EXIT(db);
@@ -733,13 +729,12 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
 
 		if (zio == NULL)
 			zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
-		dbuf_read_impl(db, zio, &flags);
+		dbuf_read_impl(db, zio, flags);
 
 		/* dbuf_read_impl has dropped db_mtx for us */
 
 		if (prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
-			    db->db.db_size, flags & DB_RF_CACHED);
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
 
 		if ((flags & DB_RF_HAVESTRUCT) == 0)
 			rw_exit(&dn->dn_struct_rwlock);
@@ -758,8 +753,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio
 		 */
 		mutex_exit(&db->db_mtx);
 		if (prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
-			    db->db.db_size, TRUE);
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
 		if ((flags & DB_RF_HAVESTRUCT) == 0)
 			rw_exit(&dn->dn_struct_rwlock);
 		DB_DNODE_EXIT(db);
@@ -2059,6 +2053,9 @@ dbuf_prefetch(dnode_t *dn, int64_t level
 	ASSERT(blkid != DMU_BONUS_BLKID);
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
 
+	if (blkid > dn->dn_maxblkid)
+		return;
+
 	if (dnode_block_freed(dn, blkid))
 		return;
 

Modified: projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Sep 12 20:06:22 2015	(r287721)
+++ projects/iosched/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Sep 12 20:14:54 2015	(r287722)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  */
 /* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
 /* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -389,7 +389,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus
  */
 static int
 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
-    int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+    boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
 {
 	dmu_buf_t **dbp;
 	uint64_t blkid, nblks, i;
@@ -399,15 +399,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn,
 
 	ASSERT(length <= DMU_MAX_ACCESS);
 
-	dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
-	if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
-		dbuf_flags |= DB_RF_NOPREFETCH;
+	/*
+	 * Note: We directly notify the prefetch code of this read, so that
+	 * we can tell it about the multi-block read.  dbuf_read() only knows
+	 * about the one block it is accessing.
+	 */
+	dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
+	    DB_RF_NOPREFETCH;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	if (dn->dn_datablkshift) {
 		int blkshift = dn->dn_datablkshift;
-		nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
-		    P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+		nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
+		    P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201509122014.t8CKEtjh027158>