From owner-svn-src-stable@FreeBSD.ORG Mon Mar 23 14:36:55 2015 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 366F416D; Mon, 23 Mar 2015 14:36:55 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 20C6F68F; Mon, 23 Mar 2015 14:36:55 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t2NEaseG007988; Mon, 23 Mar 2015 14:36:55 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t2NEas9w007985; Mon, 23 Mar 2015 14:36:54 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201503231436.t2NEas9w007985@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Mon, 23 Mar 2015 14:36:54 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r280370 - stable/10/usr.sbin/bhyve X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 23 Mar 2015 14:36:55 -0000 Author: mav Date: Mon Mar 23 14:36:53 2015 New Revision: 280370 URL: https://svnweb.freebsd.org/changeset/base/280370 Log: MFC r279957, r280017: Add DSM TRIM command support for virtual AHCI disks. It works only for virtual disks backed by ZVOLs and raw devices supporting BIO_DELETE. Virtual disks backed by files won't report this capability. Relnotes: yes Modified: stable/10/usr.sbin/bhyve/block_if.c stable/10/usr.sbin/bhyve/block_if.h stable/10/usr.sbin/bhyve/pci_ahci.c Directory Properties: stable/10/ (props changed) Modified: stable/10/usr.sbin/bhyve/block_if.c ============================================================================== --- stable/10/usr.sbin/bhyve/block_if.c Mon Mar 23 13:38:33 2015 (r280369) +++ stable/10/usr.sbin/bhyve/block_if.c Mon Mar 23 14:36:53 2015 (r280370) @@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$"); enum blockop { BOP_READ, BOP_WRITE, - BOP_FLUSH + BOP_FLUSH, + BOP_DELETE }; enum blockstat { @@ -81,6 +82,7 @@ struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; + int bc_candelete; int bc_rdonly; off_t bc_size; int bc_sectsz; @@ -172,6 +174,7 @@ static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_req *br; + off_t arg[2]; int err; br = be->be_req; @@ -197,6 +200,19 @@ blockif_proc(struct blockif_ctxt *bc, st } else if (fsync(bc->bc_fd)) err = errno; break; + case BOP_DELETE: + if (!bc->bc_candelete) + err = EOPNOTSUPP; + else if (bc->bc_rdonly) + err = EROFS; + else if (bc->bc_ischr) { + arg[0] = br->br_offset; + arg[1] = br->br_iov[0].iov_len; + if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) + err = errno; + } else + err = EOPNOTSUPP; + break; default: err = EINVAL; break; @@ -276,9 +292,10 @@ blockif_open(const char *optstr, const c char *nopt, *xopts; struct blockif_ctxt *bc; struct stat sbuf; + struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; - int nocache, sync, ro; + int nocache, sync, ro, candelete; pthread_once(&blockif_once, blockif_init); @@ -332,6 +349,7 @@ blockif_open(const char *optstr, const c size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; + candelete = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { @@ -343,6 +361,10 @@ blockif_open(const char *optstr, const c assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); + strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); + arg.len = sizeof(arg.value.i); + if (ioctl(fd, DIOCGATTR, &arg) == 0) + candelete = arg.value.i; } else psectsz = sbuf.st_blksize; @@ -355,6 +377,7 @@ blockif_open(const char *optstr, const c bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); + bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; @@ -434,6 +457,14 @@ blockif_flush(struct blockif_ctxt *bc, s } int +blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_DELETE)); +} + +int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; @@ -634,3 +665,11 @@ blockif_is_ro(struct blockif_ctxt *bc) assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } + +int +blockif_candelete(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (bc->bc_candelete); +} Modified: stable/10/usr.sbin/bhyve/block_if.h ============================================================================== --- stable/10/usr.sbin/bhyve/block_if.h Mon Mar 23 13:38:33 2015 (r280369) +++ stable/10/usr.sbin/bhyve/block_if.h Mon Mar 23 14:36:53 2015 (r280370) @@ -58,9 +58,11 @@ int blockif_sectsz(struct blockif_ctxt * void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); int blockif_queuesz(struct blockif_ctxt *bc); int blockif_is_ro(struct blockif_ctxt *bc); +int blockif_candelete(struct blockif_ctxt *bc); int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_close(struct blockif_ctxt *bc); Modified: stable/10/usr.sbin/bhyve/pci_ahci.c ============================================================================== --- stable/10/usr.sbin/bhyve/pci_ahci.c Mon Mar 23 13:38:33 2015 (r280369) +++ stable/10/usr.sbin/bhyve/pci_ahci.c Mon Mar 23 14:36:53 2015 (r280370) @@ -644,6 +644,100 @@ ahci_handle_flush(struct ahci_port *p, i } static inline void +read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, + void *buf, int size) +{ + struct ahci_cmd_hdr *hdr; + struct ahci_prdt_entry *prdt; + void *to; + int i, len; + + hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); + len = size; + to = buf; + prdt = (struct ahci_prdt_entry *)(cfis + 0x80); + for (i = 0; i < hdr->prdtl && len; i++) { + uint8_t *ptr; + uint32_t dbcsz; + int sublen; + + dbcsz = (prdt->dbc & DBCMASK) + 1; + ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); + sublen = len < dbcsz ? len : dbcsz; + memcpy(to, ptr, sublen); + len -= sublen; + to += sublen; + prdt++; + } +} + +static void +ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) +{ + struct ahci_ioreq *aior; + struct blockif_req *breq; + uint8_t *entry; + uint64_t elba; + uint32_t len, elen; + int err; + uint8_t buf[512]; + + len = (uint16_t)cfis[13] << 8 | cfis[12]; + len *= 512; + read_prdt(p, slot, cfis, buf, sizeof(buf)); + +next: + entry = &buf[done]; + elba = ((uint64_t)entry[5] << 40) | + ((uint64_t)entry[4] << 32) | + ((uint64_t)entry[3] << 24) | + ((uint64_t)entry[2] << 16) | + ((uint64_t)entry[1] << 8) | + entry[0]; + elen = (uint16_t)entry[7] << 8 | entry[6]; + done += 8; + if (elen == 0) { + if (done >= len) { + ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); + p->pending &= ~(1 << slot); + ahci_check_stopped(p); + return; + } + goto next; + } + + /* + * Pull request off free list + */ + aior = STAILQ_FIRST(&p->iofhd); + assert(aior != NULL); + STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); + aior->cfis = cfis; + aior->slot = slot; + aior->len = len; + aior->done = done; + aior->prdtl = 0; + + breq = &aior->io_req; + breq->br_offset = elba * blockif_sectsz(p->bctx); + breq->br_iovcnt = 1; + breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx); + + /* + * Mark this command in-flight. + */ + p->pending |= 1 << slot; + + /* + * Stuff request onto busy list + */ + TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); + + err = blockif_delete(p->bctx, breq); + assert(err == 0); +} + +static inline void write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf, int size) { @@ -684,10 +778,12 @@ handle_identify(struct ahci_port *p, int } else { uint16_t buf[256]; uint64_t sectors; - int sectsz, psectsz, psectoff; + int sectsz, psectsz, psectoff, candelete, ro; uint16_t cyl; uint8_t sech, heads; + ro = blockif_is_ro(p->bctx); + candelete = blockif_candelete(p->bctx); sectsz = blockif_sectsz(p->bctx); sectors = blockif_size(p->bctx) / sectsz; blockif_chs(p->bctx, &cyl, &heads, &sech); @@ -718,6 +814,7 @@ handle_identify(struct ahci_port *p, int buf[66] = 100; buf[67] = 100; buf[68] = 100; + buf[69] = 0; buf[75] = 31; buf[76] = (1 << 8 | 1 << 2); buf[80] = 0x1f0; @@ -736,6 +833,11 @@ handle_identify(struct ahci_port *p, int buf[101] = (sectors >> 16); buf[102] = (sectors >> 32); buf[103] = (sectors >> 48); + if (candelete && !ro) { + buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; + buf[105] = 1; + buf[169] = ATA_SUPPORT_DSM_TRIM; + } buf[106] = 0x4000; buf[209] = 0x4000; if (psectsz > sectsz) { @@ -1394,6 +1496,15 @@ ahci_handle_cmd(struct ahci_port *p, int case ATA_FLUSHCACHE48: ahci_handle_flush(p, slot, cfis); break; + case ATA_DATA_SET_MANAGEMENT: + if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && + cfis[13] == 0 && cfis[12] == 1) { + ahci_handle_dsm_trim(p, slot, cfis, 0); + break; + } + ahci_write_fis_d2h(p, slot, cfis, + (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); + break; case ATA_STANDBY_CMD: break; case ATA_NOP: @@ -1505,7 +1616,7 @@ ata_ioreq_cb(struct blockif_req *br, int struct pci_ahci_softc *sc; uint32_t tfd; uint8_t *cfis; - int pending, slot, ncq; + int pending, slot, ncq, dsm; DPRINTF("%s %d\n", __func__, err); @@ -1521,6 +1632,8 @@ ata_ioreq_cb(struct blockif_req *br, int if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || cfis[2] == ATA_READ_FPDMA_QUEUED) ncq = 1; + if (cfis[2] == ATA_DATA_SET_MANAGEMENT) + dsm = 1; pthread_mutex_lock(&sc->mtx); @@ -1534,10 +1647,17 @@ ata_ioreq_cb(struct blockif_req *br, int */ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); - if (pending && !err) { - ahci_handle_dma(p, slot, cfis, aior->done, - hdr->prdtl - pending); - goto out; + if (dsm) { + if (aior->done != aior->len && !err) { + ahci_handle_dsm_trim(p, slot, cfis, aior->done); + goto out; + } + } else { + if (pending && !err) { + ahci_handle_dma(p, slot, cfis, aior->done, + hdr->prdtl - pending); + goto out; + } } if (!err && aior->done == aior->len) {