From owner-svn-src-head@freebsd.org Thu Jul 16 16:32:17 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 52BF3366B02; Thu, 16 Jul 2020 16:32:17 +0000 (UTC) (envelope-from allanjude@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4B70CP1YXzz3Vs2; Thu, 16 Jul 2020 16:32:17 +0000 (UTC) (envelope-from allanjude@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 19A49B0E4; Thu, 16 Jul 2020 16:32:17 +0000 (UTC) (envelope-from allanjude@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 06GGWGaw034871; Thu, 16 Jul 2020 16:32:16 GMT (envelope-from allanjude@FreeBSD.org) Received: (from allanjude@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 06GGWGIR034870; Thu, 16 Jul 2020 16:32:16 GMT (envelope-from allanjude@FreeBSD.org) Message-Id: <202007161632.06GGWGIR034870@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: allanjude set sender to allanjude@FreeBSD.org using -f From: Allan Jude Date: Thu, 16 Jul 2020 16:32:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r363255 - head/sys/dev/virtio/block X-SVN-Group: head X-SVN-Commit-Author: allanjude X-SVN-Commit-Paths: head/sys/dev/virtio/block X-SVN-Commit-Revision: 363255 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 16 Jul 2020 16:32:17 -0000 Author: allanjude Date: Thu Jul 16 16:32:16 2020 New Revision: 363255 URL: https://svnweb.freebsd.org/changeset/base/363255 Log: Add VIRTIO_BLK_T_DISCARD support to the virtio-blk driver If the hypervisor advertises support for the DISCARD command then the guest can perform TRIM commands, freeing space on the backing store. If VIRTIO_BLK_F_DISCARD is enabled, advertise DISKFLAG_CANDELETE Tested with FreeBSD guests on bhyve and KVM Reviewed by: jhb Tested by: freqlabs MFC after: 1 month Relnotes: yes Sponsored by: Klara Inc. Differential Revision: https://reviews.freebsd.org/D21708 Modified: head/sys/dev/virtio/block/virtio_blk.c head/sys/dev/virtio/block/virtio_blk.h Modified: head/sys/dev/virtio/block/virtio_blk.c ============================================================================== --- head/sys/dev/virtio/block/virtio_blk.c Thu Jul 16 15:12:52 2020 (r363254) +++ head/sys/dev/virtio/block/virtio_blk.c Thu Jul 16 16:32:16 2020 (r363255) @@ -81,6 +81,7 @@ struct vtblk_softc { #define VTBLK_FLAG_SUSPEND 0x0008 #define VTBLK_FLAG_BARRIER 0x0010 #define VTBLK_FLAG_WC_CONFIG 0x0020 +#define VTBLK_FLAG_DISCARD 0x0040 struct virtqueue *vtblk_vq; struct sglist *vtblk_sglist; @@ -112,6 +113,7 @@ static struct virtio_feature_desc vtblk_feature_desc[] { VIRTIO_BLK_F_WCE, "WriteCache" }, { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, + { VIRTIO_BLK_F_DISCARD, "Discard" }, { 0, NULL } }; @@ -210,6 +212,7 @@ TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writeca VIRTIO_BLK_F_WCE | \ VIRTIO_BLK_F_TOPOLOGY | \ VIRTIO_BLK_F_CONFIG_WCE | \ + VIRTIO_BLK_F_DISCARD | \ VIRTIO_RING_F_INDIRECT_DESC) #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx @@ -459,7 +462,7 @@ vtblk_config_change(device_t dev) vtblk_read_config(sc, &blkcfg); /* Capacity is always in 512-byte units. */ - capacity = blkcfg.capacity * 512; + capacity = blkcfg.capacity * VTBLK_BSIZE; if (sc->vtblk_disk->d_mediasize != capacity) vtblk_resize_disk(sc, capacity); @@ -544,13 +547,14 @@ vtblk_strategy(struct bio *bp) * be a better way to report our readonly'ness to GEOM above. */ if (sc->vtblk_flags & VTBLK_FLAG_READONLY && - (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { + (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH || + bp->bio_cmd == BIO_DELETE)) { vtblk_bio_done(sc, bp, EROFS); return; } if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) && - (bp->bio_cmd != BIO_FLUSH)) { + (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) { vtblk_bio_done(sc, bp, EOPNOTSUPP); return; } @@ -563,6 +567,13 @@ vtblk_strategy(struct bio *bp) return; } + if ((bp->bio_cmd == BIO_DELETE) && + !(sc->vtblk_flags & VTBLK_FLAG_DISCARD)) { + VTBLK_UNLOCK(sc); + vtblk_bio_done(sc, bp, EOPNOTSUPP); + return; + } + bioq_insert_tail(&sc->vtblk_bioq, bp); vtblk_startio(sc); @@ -598,6 +609,8 @@ vtblk_setup_features(struct vtblk_softc *sc) sc->vtblk_flags |= VTBLK_FLAG_BARRIER; if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; + if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) + sc->vtblk_flags |= VTBLK_FLAG_DISCARD; } static int @@ -687,12 +700,12 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio dp->d_dump = vtblk_dump; /* Capacity is always in 512-byte units. */ - dp->d_mediasize = blkcfg->capacity * 512; + dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE; if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) dp->d_sectorsize = blkcfg->blk_size; else - dp->d_sectorsize = 512; + dp->d_sectorsize = VTBLK_BSIZE; /* * The VirtIO maximum I/O size is given in terms of segments. @@ -726,6 +739,11 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio dp->d_stripesize; } + if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) { + dp->d_flags |= DISKFLAG_CANDELETE; + dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE; + } + if (vtblk_write_cache_enabled(sc, blkcfg) != 0) sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; else @@ -876,12 +894,16 @@ vtblk_request_bio(struct vtblk_softc *sc) break; case BIO_READ: req->vbr_hdr.type = VIRTIO_BLK_T_IN; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; break; case BIO_WRITE: req->vbr_hdr.type = VIRTIO_BLK_T_OUT; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; break; + case BIO_DELETE: + req->vbr_hdr.type = VIRTIO_BLK_T_DISCARD; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; + break; default: panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd); } @@ -935,6 +957,20 @@ vtblk_request_execute(struct vtblk_softc *sc, struct v /* BIO_READ means the host writes into our buffer. */ if (bp->bio_cmd == BIO_READ) writable = sg->sg_nseg - 1; + } else if (bp->bio_cmd == BIO_DELETE) { + struct virtio_blk_discard_write_zeroes *discard; + + discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO); + if (discard == NULL) + return (ENOMEM); + discard->sector = bp->bio_offset / VTBLK_BSIZE; + discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE; + bp->bio_driver1 = discard; + error = sglist_append(sg, discard, sizeof(*discard)); + if (error || sg->sg_nseg == sg->sg_maxseg) { + panic("%s: bio %p data buffer too big %d", + __func__, bp, error); + } } writable++; @@ -1095,6 +1131,11 @@ vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, bp->bio_flags |= BIO_ERROR; } + if (bp->bio_driver1 != NULL) { + free(bp->bio_driver1, M_DEVBUF); + bp->bio_driver1 = NULL; + } + biodone(bp); } @@ -1124,7 +1165,12 @@ vtblk_read_config(struct vtblk_softc *sc, struct virti VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg); - VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors, + blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment, + blkcfg); } #undef VTBLK_GET_CONFIG @@ -1282,7 +1328,7 @@ vtblk_dump_write(struct vtblk_softc *sc, void *virtual req->vbr_ack = -1; req->vbr_hdr.type = VIRTIO_BLK_T_OUT; req->vbr_hdr.ioprio = 1; - req->vbr_hdr.sector = offset / 512; + req->vbr_hdr.sector = offset / VTBLK_BSIZE; req->vbr_bp = &buf; g_reset_bio(&buf); @@ -1331,7 +1377,7 @@ vtblk_set_write_cache(struct vtblk_softc *sc, int wc) /* Set either writeback (1) or writethrough (0) mode. */ virtio_write_dev_config_1(sc->vtblk_dev, - offsetof(struct virtio_blk_config, writeback), wc); + offsetof(struct virtio_blk_config, wce), wc); } static int @@ -1346,7 +1392,7 @@ vtblk_write_cache_enabled(struct vtblk_softc *sc, if (wc >= 0 && wc < VTBLK_CACHE_MAX) vtblk_set_write_cache(sc, wc); else - wc = blkcfg->writeback; + wc = blkcfg->wce; } else wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); Modified: head/sys/dev/virtio/block/virtio_blk.h ============================================================================== --- head/sys/dev/virtio/block/virtio_blk.h Thu Jul 16 15:12:52 2020 (r363254) +++ head/sys/dev/virtio/block/virtio_blk.h Thu Jul 16 16:32:16 2020 (r363255) @@ -33,20 +33,27 @@ #ifndef _VIRTIO_BLK_H #define _VIRTIO_BLK_H +#define VTBLK_BSIZE 512 + /* Feature bits */ -#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ -#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ -#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ -#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ -#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ -#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ -#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ -#define VIRTIO_BLK_F_WCE 0x0200 /* Writeback mode enabled after reset */ -#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ -#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ -#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ +#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ +#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_FLUSH 0x0200 /* Flush command supported */ +#define VIRTIO_BLK_F_WCE 0x0200 /* Legacy alias for FLUSH */ +#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ +#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 0x1000 /* Support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 0x2000 /* Trim blocks */ +#define VIRTIO_BLK_F_WRITE_ZEROES 0x4000 /* Write zeros */ +#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ + struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ uint64_t capacity; @@ -66,15 +73,29 @@ struct virtio_blk_config { /* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */ struct virtio_blk_topology { + /* Exponent for physical block per logical block. */ uint8_t physical_block_exp; + /* Alignment offset in logical blocks. */ uint8_t alignment_offset; + /* Minimum I/O size without performance penalty in logical + * blocks. */ uint16_t min_io_size; + /* Optimal sustained I/O size in logical blocks. */ uint32_t opt_io_size; } topology; /* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ - uint8_t writeback; - + uint8_t wce; + uint8_t unused; + /* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + uint16_t num_queues; + uint32_t max_discard_sectors; + uint32_t max_discard_seg; + uint32_t discard_sector_alignment; + uint32_t max_write_zeroes_sectors; + uint32_t max_write_zeroes_seg; + uint8_t write_zeroes_may_unmap; + uint8_t unused1[3]; } __packed; /* @@ -89,24 +110,35 @@ struct virtio_blk_config { */ /* These two define direction. */ -#define VIRTIO_BLK_T_IN 0 -#define VIRTIO_BLK_T_OUT 1 +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 /* This bit says it's a scsi command, not an actual read or write. */ -#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD_OUT 3 /* Cache flush command */ -#define VIRTIO_BLK_T_FLUSH 4 +#define VIRTIO_BLK_T_FLUSH 4 +#define VIRTIO_BLK_T_FLUSH_OUT 5 /* Get device ID command */ -#define VIRTIO_BLK_T_GET_ID 8 +#define VIRTIO_BLK_T_GET_ID 8 +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeros command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + /* Barrier before this op. */ -#define VIRTIO_BLK_T_BARRIER 0x80000000 +#define VIRTIO_BLK_T_BARRIER 0x80000000 /* ID string length */ -#define VIRTIO_BLK_ID_BYTES 20 +#define VIRTIO_BLK_ID_BYTES 20 +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + /* This is the first element of the read scatter-gather list. */ struct virtio_blk_outhdr { /* VIRTIO_BLK_T* */ @@ -115,6 +147,15 @@ struct virtio_blk_outhdr { uint32_t ioprio; /* Sector (ie. 512 byte offset) */ uint64_t sector; +}; + +struct virtio_blk_discard_write_zeroes { + uint64_t sector; + uint32_t num_sectors; + struct { + uint32_t unmap:1; + uint32_t reserved:31; + } flags; }; struct virtio_scsi_inhdr {