From owner-svn-src-stable-10@FreeBSD.ORG Thu Feb 19 14:36:05 2015 Return-Path: Delivered-To: svn-src-stable-10@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 34B01A64; Thu, 19 Feb 2015 14:36:05 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 15A8FD68; Thu, 19 Feb 2015 14:36:05 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t1JEa4IJ004588; Thu, 19 Feb 2015 14:36:04 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t1JEa4ox004579; Thu, 19 Feb 2015 14:36:04 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201502191436.t1JEa4ox004579@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Thu, 19 Feb 2015 14:36:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r279004 - stable/10/sys/cam/ctl X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-10@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: SVN commit messages for only the 10-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 19 Feb 2015 14:36:05 -0000 Author: mav Date: Thu Feb 19 14:36:03 2015 New Revision: 279004 URL: https://svnweb.freebsd.org/changeset/base/279004 Log: MFC r278625: Make XCOPY and WUT commands respect physical block size/offset. This change by 2-3 times improves performance of misaligned XCOPY and WUT commands by avoiding unneeded read-modify-write cycles inside ZFS. Modified: stable/10/sys/cam/ctl/ctl_backend_block.c stable/10/sys/cam/ctl/ctl_tpc.c stable/10/sys/cam/ctl/ctl_tpc.h stable/10/sys/cam/ctl/ctl_tpc_local.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/cam/ctl/ctl_backend_block.c ============================================================================== --- stable/10/sys/cam/ctl/ctl_backend_block.c Thu Feb 19 14:33:46 2015 (r279003) +++ stable/10/sys/cam/ctl/ctl_backend_block.c Thu Feb 19 14:36:03 2015 (r279004) @@ -1190,7 +1190,8 @@ ctl_be_block_cw_dispatch_ws(struct ctl_b struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; struct ctl_lba_len_flags *lbalen; - uint64_t len_left, lba, pb, pbo, adj; + uint64_t len_left, lba; + uint32_t pb, pbo, adj; int i, seglen; uint8_t *buf, *end; @@ -1244,8 +1245,11 @@ ctl_be_block_cw_dispatch_ws(struct ctl_b DPRINTF("WRITE SAME at LBA %jx len %u\n", (uintmax_t)lbalen->lba, lbalen->len); - pb = (uint64_t)be_lun->blocksize << be_lun->pblockexp; - pbo = pb - (uint64_t)be_lun->blocksize * be_lun->pblockoff; + pb = be_lun->blocksize << be_lun->pblockexp; + if (be_lun->pblockoff > 0) + pbo = pb - be_lun->blocksize * be_lun->pblockoff; + else + pbo = 0; len_left = (uint64_t)lbalen->len * be_lun->blocksize; for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { Modified: stable/10/sys/cam/ctl/ctl_tpc.c ============================================================================== --- stable/10/sys/cam/ctl/ctl_tpc.c Thu Feb 19 14:33:46 2015 (r279003) +++ stable/10/sys/cam/ctl/ctl_tpc.c Thu Feb 19 14:36:03 2015 (r279004) @@ -785,18 +785,25 @@ ctl_copy_operation_abort(struct ctl_scsi } static uint64_t -tpc_resolve(struct tpc_list *list, uint16_t idx, uint32_t *ss) +tpc_resolve(struct tpc_list *list, uint16_t idx, uint32_t *ss, + uint32_t *pb, uint32_t *pbo) { if (idx == 0xffff) { if (ss && list->lun->be_lun) *ss = list->lun->be_lun->blocksize; + if (pb && list->lun->be_lun) + *pb = list->lun->be_lun->blocksize << + list->lun->be_lun->pblockexp; + if (pbo && list->lun->be_lun) + *pbo = list->lun->be_lun->blocksize * + list->lun->be_lun->pblockoff; return (list->lun->lun); } if (idx >= list->ncscd) return (UINT64_MAX); return (tpcl_resolve(list->lun->ctl_softc, - list->init_port, &list->cscd[idx], ss)); + list->init_port, &list->cscd[idx], ss, pb, pbo)); } static int @@ -809,7 +816,7 @@ tpc_process_b2b(struct tpc_list *list) uint64_t sl, dl; off_t srclba, dstlba, numbytes, donebytes, roundbytes; int numlba; - uint32_t srcblock, dstblock; + uint32_t srcblock, dstblock, pb, pbo, adj; if (list->stage == 1) { while ((tior = TAILQ_FIRST(&list->allio)) != NULL) { @@ -834,14 +841,16 @@ tpc_process_b2b(struct tpc_list *list) TAILQ_INIT(&list->allio); seg = (struct scsi_ec_segment_b2b *)list->seg[list->curseg]; - sl = tpc_resolve(list, scsi_2btoul(seg->src_cscd), &srcblock); - dl = tpc_resolve(list, scsi_2btoul(seg->dst_cscd), &dstblock); + sl = tpc_resolve(list, scsi_2btoul(seg->src_cscd), &srcblock, NULL, NULL); + dl = tpc_resolve(list, scsi_2btoul(seg->dst_cscd), &dstblock, &pb, &pbo); if (sl >= CTL_MAX_LUNS || dl >= CTL_MAX_LUNS) { ctl_set_sense(list->ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_COPY_ABORTED, /*asc*/ 0x08, /*ascq*/ 0x04, SSD_ELEM_NONE); return (CTL_RETVAL_ERROR); } + if (pbo > 0) + pbo = pb - pbo; sdstp = &list->cscd[scsi_2btoul(seg->src_cscd)].dtsp; if (scsi_3btoul(sdstp->block_length) != 0) srcblock = scsi_3btoul(sdstp->block_length); @@ -878,7 +887,16 @@ tpc_process_b2b(struct tpc_list *list) prun = &run; list->tbdio = 1; while (donebytes < numbytes) { - roundbytes = MIN(numbytes - donebytes, TPC_MAX_IO_SIZE); + roundbytes = numbytes - donebytes; + if (roundbytes > TPC_MAX_IO_SIZE) { + roundbytes = TPC_MAX_IO_SIZE; + roundbytes -= roundbytes % dstblock; + if (pb > dstblock) { + adj = (dstlba * dstblock + roundbytes - pbo) % pb; + if (roundbytes > adj) + roundbytes -= adj; + } + } tior = malloc(sizeof(*tior), M_CTL, M_WAITOK | M_ZERO); TAILQ_INIT(&tior->run); @@ -891,7 +909,7 @@ tpc_process_b2b(struct tpc_list *list) /*read_op*/ 1, /*byte2*/ 0, /*minimum_cdb_size*/ 0, - /*lba*/ srclba + donebytes / srcblock, + /*lba*/ srclba, /*num_blocks*/ roundbytes / srcblock, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); @@ -910,7 +928,7 @@ tpc_process_b2b(struct tpc_list *list) /*read_op*/ 0, /*byte2*/ 0, /*minimum_cdb_size*/ 0, - /*lba*/ dstlba + donebytes / dstblock, + /*lba*/ dstlba, /*num_blocks*/ roundbytes / dstblock, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); @@ -922,6 +940,8 @@ tpc_process_b2b(struct tpc_list *list) TAILQ_INSERT_TAIL(prun, tior, rlinks); prun = &tior->run; donebytes += roundbytes; + srclba += roundbytes / srcblock; + dstlba += roundbytes / dstblock; } while ((tior = TAILQ_FIRST(&run)) != NULL) { @@ -961,7 +981,7 @@ tpc_process_verify(struct tpc_list *list TAILQ_INIT(&list->allio); seg = (struct scsi_ec_segment_verify *)list->seg[list->curseg]; - sl = tpc_resolve(list, scsi_2btoul(seg->src_cscd), NULL); + sl = tpc_resolve(list, scsi_2btoul(seg->src_cscd), NULL, NULL, NULL); if (sl >= CTL_MAX_LUNS) { ctl_set_sense(list->ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_COPY_ABORTED, @@ -1019,7 +1039,7 @@ tpc_process_register_key(struct tpc_list TAILQ_INIT(&list->allio); seg = (struct scsi_ec_segment_register_key *)list->seg[list->curseg]; - dl = tpc_resolve(list, scsi_2btoul(seg->dst_cscd), NULL); + dl = tpc_resolve(list, scsi_2btoul(seg->dst_cscd), NULL, NULL, NULL); if (dl >= CTL_MAX_LUNS) { ctl_set_sense(list->ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_COPY_ABORTED, @@ -1090,7 +1110,7 @@ tpc_process_wut(struct tpc_list *list) int drange, srange; off_t doffset, soffset; off_t srclba, dstlba, numbytes, donebytes, roundbytes; - uint32_t srcblock, dstblock; + uint32_t srcblock, dstblock, pb, pbo, adj; if (list->stage > 0) { /* Cleanup after previous rounds. */ @@ -1118,6 +1138,11 @@ tpc_process_wut(struct tpc_list *list) &drange, &doffset) != 0) return (CTL_RETVAL_COMPLETE); dstblock = list->lun->be_lun->blocksize; + pb = dstblock << list->lun->be_lun->pblockexp; + if (list->lun->be_lun->pblockoff > 0) + pbo = pb - dstblock * list->lun->be_lun->pblockoff; + else + pbo = 0; /* Check where we are on source ranges list. */ srcblock = list->token->blocksize; @@ -1131,12 +1156,20 @@ tpc_process_wut(struct tpc_list *list) } srclba = scsi_8btou64(list->token->range[srange].lba) + soffset; - numbytes = srcblock * omin(TPC_MAX_IOCHUNK_SIZE / srcblock, - (scsi_4btoul(list->token->range[srange].length) - soffset)); dstlba = scsi_8btou64(list->range[drange].lba) + doffset; - numbytes = omin(numbytes, - dstblock * omin(TPC_MAX_IOCHUNK_SIZE / dstblock, - (scsi_4btoul(list->range[drange].length) - doffset))); + numbytes = srcblock * + (scsi_4btoul(list->token->range[srange].length) - soffset); + numbytes = omin(numbytes, dstblock * + (scsi_4btoul(list->range[drange].length) - doffset)); + if (numbytes > TPC_MAX_IOCHUNK_SIZE) { + numbytes = TPC_MAX_IOCHUNK_SIZE; + numbytes -= numbytes % dstblock; + if (pb > dstblock) { + adj = (dstlba * dstblock + numbytes - pbo) % pb; + if (numbytes > adj) + numbytes -= adj; + } + } if (numbytes % srcblock != 0 || numbytes % dstblock != 0) { ctl_set_sense(list->ctsio, /*current_error*/ 1, @@ -1157,7 +1190,16 @@ tpc_process_wut(struct tpc_list *list) list->tbdio = 1; TAILQ_INIT(&list->allio); while (donebytes < numbytes) { - roundbytes = MIN(numbytes - donebytes, TPC_MAX_IO_SIZE); + roundbytes = numbytes - donebytes; + if (roundbytes > TPC_MAX_IO_SIZE) { + roundbytes = TPC_MAX_IO_SIZE; + roundbytes -= roundbytes % dstblock; + if (pb > dstblock) { + adj = (dstlba * dstblock + roundbytes - pbo) % pb; + if (roundbytes > adj) + roundbytes -= adj; + } + } tior = malloc(sizeof(*tior), M_CTL, M_WAITOK | M_ZERO); TAILQ_INIT(&tior->run); @@ -1170,7 +1212,7 @@ tpc_process_wut(struct tpc_list *list) /*read_op*/ 1, /*byte2*/ 0, /*minimum_cdb_size*/ 0, - /*lba*/ srclba + donebytes / srcblock, + /*lba*/ srclba, /*num_blocks*/ roundbytes / srcblock, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); @@ -1189,7 +1231,7 @@ tpc_process_wut(struct tpc_list *list) /*read_op*/ 0, /*byte2*/ 0, /*minimum_cdb_size*/ 0, - /*lba*/ dstlba + donebytes / dstblock, + /*lba*/ dstlba, /*num_blocks*/ roundbytes / dstblock, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); @@ -1201,6 +1243,8 @@ tpc_process_wut(struct tpc_list *list) TAILQ_INSERT_TAIL(prun, tior, rlinks); prun = &tior->run; donebytes += roundbytes; + srclba += roundbytes / srcblock; + dstlba += roundbytes / dstblock; } while ((tior = TAILQ_FIRST(&run)) != NULL) { Modified: stable/10/sys/cam/ctl/ctl_tpc.h ============================================================================== --- stable/10/sys/cam/ctl/ctl_tpc.h Thu Feb 19 14:33:46 2015 (r279003) +++ stable/10/sys/cam/ctl/ctl_tpc.h Thu Feb 19 14:36:03 2015 (r279004) @@ -32,7 +32,7 @@ void tpc_done(union ctl_io *io); uint64_t tpcl_resolve(struct ctl_softc *softc, int init_port, - struct scsi_ec_cscd *cscd, uint32_t *ss); + struct scsi_ec_cscd *cscd, uint32_t *ss, uint32_t *ps, uint32_t *pso); union ctl_io * tpcl_alloc_io(void); int tpcl_queue(union ctl_io *io, uint64_t lun); Modified: stable/10/sys/cam/ctl/ctl_tpc_local.c ============================================================================== --- stable/10/sys/cam/ctl/ctl_tpc_local.c Thu Feb 19 14:33:46 2015 (r279003) +++ stable/10/sys/cam/ctl/ctl_tpc_local.c Thu Feb 19 14:36:03 2015 (r279004) @@ -309,7 +309,7 @@ tpcl_done(union ctl_io *io) uint64_t tpcl_resolve(struct ctl_softc *softc, int init_port, - struct scsi_ec_cscd *cscd, uint32_t *ss) + struct scsi_ec_cscd *cscd, uint32_t *ss, uint32_t *ps, uint32_t *pso) { struct scsi_ec_cscd_id *cscdid; struct ctl_port *port; @@ -337,6 +337,12 @@ tpcl_resolve(struct ctl_softc *softc, in lunid = lun->lun; if (ss && lun->be_lun) *ss = lun->be_lun->blocksize; + if (ps && lun->be_lun) + *ps = lun->be_lun->blocksize << + lun->be_lun->pblockexp; + if (pso && lun->be_lun) + *pso = lun->be_lun->blocksize * + lun->be_lun->pblockoff; break; } }