Date: Mon, 28 Dec 2009 20:08:01 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r201139 - in head/sys: cam/ata geom sys Message-ID: <200912282008.nBSK81bw099996@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Mon Dec 28 20:08:01 2009 New Revision: 201139 URL: http://svn.freebsd.org/changeset/base/201139 Log: Add BIO_DELETE support to ada(4): - For SSDs use TRIM feature of DATA SET MANAGEMENT command, as defined by ACS-2 specification working draft. - For CompactFlash use CFA ERASE command, same as ad(4) does. With this patch, `newfs -E /dev/ada1` was able to restore write speed of my heavily weared OCZ Vertex SSD (firmware 1.4) up to the initial level for the most part of it's capacity. Previous 1.3 firmware, even reportiong TRIM capabilty bit set, was not working, reporting ABORT error for every DSM command. I have no idea whether it is normal, but for some reason it takes 200ms to handle any TRIM command on this drive, that was making delete extremely slow. But TRIM command is able to accept long list of LBAs and the length of that list seems doesn't affect it's execution time. Implemented request clusting algorithm allowed me to rise delete rate up to reasonable numbers, when many parallel DELETE requests running. Modified: head/sys/cam/ata/ata_all.c head/sys/cam/ata/ata_da.c head/sys/geom/geom_dev.c head/sys/sys/ata.h Modified: head/sys/cam/ata/ata_all.c ============================================================================== --- head/sys/cam/ata/ata_all.c Mon Dec 28 18:59:13 2009 (r201138) +++ head/sys/cam/ata/ata_all.c Mon Dec 28 20:08:01 2009 (r201139) @@ -75,6 +75,11 @@ ata_op_string(struct ata_cmd *cmd) switch (cmd->command) { case 0x00: return ("NOP"); case 0x03: return ("CFA_REQUEST_EXTENDED_ERROR"); + case 0x06: + switch (cmd->features) { + case 0x01: return ("DSM TRIM"); + } + return "DSM"; case 0x08: return ("DEVICE_RESET"); case 0x20: return ("READ"); case 0x24: return ("READ48"); @@ -338,7 +343,8 @@ ata_48bit_cmd(struct ccb_ataio *ataio, u cmd == ATA_WRITE_DMA_FUA48 || cmd == ATA_WRITE_DMA_QUEUED48 || cmd == ATA_WRITE_DMA_QUEUED_FUA48 || - cmd == ATA_WRITE_STREAM_DMA48) + cmd == ATA_WRITE_STREAM_DMA48 || + cmd == ATA_DATA_SET_MANAGEMENT) ataio->cmd.flags |= CAM_ATAIO_DMA; ataio->cmd.command = cmd; ataio->cmd.features = features; Modified: head/sys/cam/ata/ata_da.c ============================================================================== --- head/sys/cam/ata/ata_da.c Mon Dec 28 18:59:13 2009 (r201138) +++ head/sys/cam/ata/ata_da.c Mon Dec 28 20:08:01 2009 (r201139) @@ -74,8 +74,10 @@ typedef enum { ADA_FLAG_CAN_DMA = 0x010, ADA_FLAG_NEED_OTAG = 0x020, ADA_FLAG_WENT_IDLE = 0x040, + ADA_FLAG_CAN_TRIM = 0x080, ADA_FLAG_OPEN = 0x100, - ADA_FLAG_SCTX_INIT = 0x200 + ADA_FLAG_SCTX_INIT = 0x200, + ADA_FLAG_CAN_CFA = 0x400 } ada_flags; typedef enum { @@ -86,6 +88,7 @@ typedef enum { ADA_CCB_BUFFER_IO = 0x03, ADA_CCB_WAITING = 0x04, ADA_CCB_DUMP = 0x05, + ADA_CCB_TRIM = 0x06, ADA_CCB_TYPE_MASK = 0x0F, } ada_ccb_state; @@ -101,13 +104,23 @@ struct disk_params { u_int64_t sectors; /* Total number sectors */ }; +#define TRIM_MAX_BLOCKS 4 +#define TRIM_MAX_RANGES TRIM_MAX_BLOCKS * 64 +struct trim_request { + uint8_t data[TRIM_MAX_RANGES * 8]; + struct bio *bps[TRIM_MAX_RANGES]; +}; + struct ada_softc { struct bio_queue_head bio_queue; + struct bio_queue_head trim_queue; ada_state state; ada_flags flags; ada_quirks quirks; int ordered_tag_count; int outstanding_cmds; + int trim_max_ranges; + int trim_running; struct disk_params params; struct disk *disk; union ccb saved_ccb; @@ -115,6 +128,7 @@ struct ada_softc { struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct callout sendordered_c; + struct trim_request trim_req; }; struct ada_quirk_entry { @@ -309,6 +323,18 @@ adaclose(struct disk *dp) return (0); } +static void +adaschedule(struct cam_periph *periph) +{ + struct ada_softc *softc = (struct ada_softc *)periph->softc; + + if (bioq_first(&softc->bio_queue) || + (!softc->trim_running && bioq_first(&softc->trim_queue))) { + /* Have more work to do, so ensure we stay scheduled */ + xpt_schedule(periph, CAM_PRIORITY_NORMAL); + } +} + /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include @@ -341,12 +367,16 @@ adastrategy(struct bio *bp) /* * Place it in the queue of disk activities for this disk */ - bioq_disksort(&softc->bio_queue, bp); + if (bp->bio_cmd == BIO_DELETE && + (softc->flags & ADA_FLAG_CAN_TRIM)) + bioq_disksort(&softc->trim_queue, bp); + else + bioq_disksort(&softc->bio_queue, bp); /* * Schedule ourselves for performing the work. */ - xpt_schedule(periph, CAM_PRIORITY_NORMAL); + adaschedule(periph); cam_periph_unlock(periph); return; @@ -485,6 +515,7 @@ adaoninvalidate(struct cam_periph *perip * with XPT_ABORT_CCB. */ bioq_flush(&softc->bio_queue, NULL, ENXIO); + bioq_flush(&softc->trim_queue, NULL, ENXIO); disk_gone(softc->disk); xpt_print(periph->path, "lost device\n"); @@ -618,6 +649,7 @@ adaregister(struct cam_periph *periph, v } bioq_init(&softc->bio_queue); + bioq_init(&softc->trim_queue); if (cgd->ident_data.capabilities1 & ATA_SUPPORT_DMA) softc->flags |= ADA_FLAG_CAN_DMA; @@ -628,6 +660,17 @@ adaregister(struct cam_periph *periph, v if (cgd->ident_data.satacapabilities & ATA_SUPPORT_NCQ && cgd->inq_flags & SID_CmdQue) softc->flags |= ADA_FLAG_CAN_NCQ; + if (cgd->ident_data.support_dsm & ATA_SUPPORT_DSM_TRIM) { + softc->flags |= ADA_FLAG_CAN_TRIM; + softc->trim_max_ranges = TRIM_MAX_RANGES; + if (cgd->ident_data.max_dsm_blocks != 0) { + softc->trim_max_ranges = + min(cgd->ident_data.max_dsm_blocks * 64, + softc->trim_max_ranges); + } + } + if (cgd->ident_data.support.command2 & ATA_SUPPORT_CFA) + softc->flags |= ADA_FLAG_CAN_CFA; softc->state = ADA_STATE_NORMAL; periph->softc = softc; @@ -672,7 +715,7 @@ adaregister(struct cam_periph *periph, v maxio = DFLTPHYS; /* traditional default */ else if (maxio > MAXPHYS) maxio = MAXPHYS; /* for safety */ - if (cgd->ident_data.support.command2 & ATA_SUPPORT_ADDRESS48) + if (softc->flags & ADA_FLAG_CAN_48BIT) maxio = min(maxio, 65536 * softc->params.secsize); else /* 28bit ATA command limit */ maxio = min(maxio, 256 * softc->params.secsize); @@ -681,6 +724,10 @@ adaregister(struct cam_periph *periph, v softc->disk->d_flags = 0; if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; + if ((softc->flags & ADA_FLAG_CAN_TRIM) || + ((softc->flags & ADA_FLAG_CAN_CFA) && + !(softc->flags & ADA_FLAG_CAN_48BIT))) + softc->disk->d_flags |= DISKFLAG_CANDELETE; strlcpy(softc->disk->d_ident, cgd->serial_num, MIN(sizeof(softc->disk->d_ident), cgd->serial_num_len + 1)); @@ -743,13 +790,10 @@ adastart(struct cam_periph *periph, unio switch (softc->state) { case ADA_STATE_NORMAL: { - /* Pull a buffer from the queue and get going on it */ struct bio *bp; + u_int8_t tag_code; - /* - * See if there is a buf with work for us to do.. - */ - bp = bioq_first(&softc->bio_queue); + /* Execute immediate CCB if waiting. */ if (periph->immediate_priority <= periph->pinfo.priority) { CAM_DEBUG_PRINT(CAM_DEBUG_SUBTRACE, ("queuing for immediate ccb\n")); @@ -758,115 +802,188 @@ adastart(struct cam_periph *periph, unio periph_links.sle); periph->immediate_priority = CAM_PRIORITY_NONE; wakeup(&periph->ccb_list); - } else if (bp == NULL) { + /* Have more work to do, so ensure we stay scheduled */ + adaschedule(periph); + break; + } + /* Run TRIM if not running yet. */ + if (!softc->trim_running && + (bp = bioq_first(&softc->trim_queue)) != 0) { + struct trim_request *req = &softc->trim_req; + struct bio *bp1; + int bps = 0, ranges = 0; + + softc->trim_running = 1; + bzero(req, sizeof(*req)); + bp1 = bp; + do { + uint64_t lba = bp1->bio_pblkno; + int count = bp1->bio_bcount / + softc->params.secsize; + + bioq_remove(&softc->trim_queue, bp1); + while (count > 0) { + int c = min(count, 0xffff); + int off = ranges * 8; + + req->data[off + 0] = lba & 0xff; + req->data[off + 1] = (lba >> 8) & 0xff; + req->data[off + 2] = (lba >> 16) & 0xff; + req->data[off + 3] = (lba >> 24) & 0xff; + req->data[off + 4] = (lba >> 32) & 0xff; + req->data[off + 5] = (lba >> 40) & 0xff; + req->data[off + 6] = c & 0xff; + req->data[off + 7] = (c >> 8) & 0xff; + lba += c; + count -= c; + ranges++; + } + req->bps[bps++] = bp1; + bp1 = bioq_first(&softc->trim_queue); + if (bp1 == NULL || + bp1->bio_bcount / softc->params.secsize > + (softc->trim_max_ranges - ranges) * 0xffff) + break; + } while (1); + cam_fill_ataio(ataio, + ada_retry_count, + adadone, + CAM_DIR_OUT, + 0, + req->data, + ((ranges + 63) / 64) * 512, + ada_default_timeout * 1000); + ata_48bit_cmd(ataio, ATA_DATA_SET_MANAGEMENT, + ATA_DSM_TRIM, 0, (ranges + 63) / 64); + start_ccb->ccb_h.ccb_state = ADA_CCB_TRIM; + goto out; + } + /* Run regular command. */ + bp = bioq_first(&softc->bio_queue); + if (bp == NULL) { xpt_release_ccb(start_ccb); - } else { - u_int8_t tag_code; - - bioq_remove(&softc->bio_queue, bp); - - if ((softc->flags & ADA_FLAG_NEED_OTAG) != 0) { - softc->flags &= ~ADA_FLAG_NEED_OTAG; - softc->ordered_tag_count++; - tag_code = 0; - } else { - tag_code = 1; - } - switch (bp->bio_cmd) { - case BIO_READ: - case BIO_WRITE: - { - uint64_t lba = bp->bio_pblkno; - uint16_t count = bp->bio_bcount / softc->params.secsize; + break; + } + bioq_remove(&softc->bio_queue, bp); - cam_fill_ataio(ataio, - ada_retry_count, - adadone, - bp->bio_cmd == BIO_READ ? - CAM_DIR_IN : CAM_DIR_OUT, - tag_code, - bp->bio_data, - bp->bio_bcount, - ada_default_timeout*1000); + if ((softc->flags & ADA_FLAG_NEED_OTAG) != 0) { + softc->flags &= ~ADA_FLAG_NEED_OTAG; + softc->ordered_tag_count++; + tag_code = 0; + } else { + tag_code = 1; + } + switch (bp->bio_cmd) { + case BIO_READ: + case BIO_WRITE: + { + uint64_t lba = bp->bio_pblkno; + uint16_t count = bp->bio_bcount / softc->params.secsize; - if ((softc->flags & ADA_FLAG_CAN_NCQ) && tag_code) { + cam_fill_ataio(ataio, + ada_retry_count, + adadone, + bp->bio_cmd == BIO_READ ? + CAM_DIR_IN : CAM_DIR_OUT, + tag_code, + bp->bio_data, + bp->bio_bcount, + ada_default_timeout*1000); + + if ((softc->flags & ADA_FLAG_CAN_NCQ) && tag_code) { + if (bp->bio_cmd == BIO_READ) { + ata_ncq_cmd(ataio, ATA_READ_FPDMA_QUEUED, + lba, count); + } else { + ata_ncq_cmd(ataio, ATA_WRITE_FPDMA_QUEUED, + lba, count); + } + } else if ((softc->flags & ADA_FLAG_CAN_48BIT) && + (lba + count >= ATA_MAX_28BIT_LBA || + count > 256)) { + if (softc->flags & ADA_FLAG_CAN_DMA) { + if (bp->bio_cmd == BIO_READ) { + ata_48bit_cmd(ataio, ATA_READ_DMA48, + 0, lba, count); + } else { + ata_48bit_cmd(ataio, ATA_WRITE_DMA48, + 0, lba, count); + } + } else { if (bp->bio_cmd == BIO_READ) { - ata_ncq_cmd(ataio, ATA_READ_FPDMA_QUEUED, - lba, count); + ata_48bit_cmd(ataio, ATA_READ_MUL48, + 0, lba, count); } else { - ata_ncq_cmd(ataio, ATA_WRITE_FPDMA_QUEUED, - lba, count); + ata_48bit_cmd(ataio, ATA_WRITE_MUL48, + 0, lba, count); } - } else if ((softc->flags & ADA_FLAG_CAN_48BIT) && - (lba + count >= ATA_MAX_28BIT_LBA || - count > 256)) { - if (softc->flags & ADA_FLAG_CAN_DMA) { - if (bp->bio_cmd == BIO_READ) { - ata_48bit_cmd(ataio, ATA_READ_DMA48, - 0, lba, count); - } else { - ata_48bit_cmd(ataio, ATA_WRITE_DMA48, - 0, lba, count); - } + } + } else { + if (count == 256) + count = 0; + if (softc->flags & ADA_FLAG_CAN_DMA) { + if (bp->bio_cmd == BIO_READ) { + ata_28bit_cmd(ataio, ATA_READ_DMA, + 0, lba, count); } else { - if (bp->bio_cmd == BIO_READ) { - ata_48bit_cmd(ataio, ATA_READ_MUL48, - 0, lba, count); - } else { - ata_48bit_cmd(ataio, ATA_WRITE_MUL48, - 0, lba, count); - } + ata_28bit_cmd(ataio, ATA_WRITE_DMA, + 0, lba, count); } } else { - if (count == 256) - count = 0; - if (softc->flags & ADA_FLAG_CAN_DMA) { - if (bp->bio_cmd == BIO_READ) { - ata_28bit_cmd(ataio, ATA_READ_DMA, - 0, lba, count); - } else { - ata_28bit_cmd(ataio, ATA_WRITE_DMA, - 0, lba, count); - } + if (bp->bio_cmd == BIO_READ) { + ata_28bit_cmd(ataio, ATA_READ_MUL, + 0, lba, count); } else { - if (bp->bio_cmd == BIO_READ) { - ata_28bit_cmd(ataio, ATA_READ_MUL, - 0, lba, count); - } else { - ata_28bit_cmd(ataio, ATA_WRITE_MUL, - 0, lba, count); - } + ata_28bit_cmd(ataio, ATA_WRITE_MUL, + 0, lba, count); } } } - break; - case BIO_FLUSH: - cam_fill_ataio(ataio, - 1, - adadone, - CAM_DIR_NONE, - 0, - NULL, - 0, - ada_default_timeout*1000); + break; + } + case BIO_DELETE: + { + uint64_t lba = bp->bio_pblkno; + uint16_t count = bp->bio_bcount / softc->params.secsize; - if (softc->flags & ADA_FLAG_CAN_48BIT) - ata_48bit_cmd(ataio, ATA_FLUSHCACHE48, 0, 0, 0); - else - ata_28bit_cmd(ataio, ATA_FLUSHCACHE, 0, 0, 0); - break; - } - start_ccb->ccb_h.ccb_state = ADA_CCB_BUFFER_IO; - start_ccb->ccb_h.ccb_bp = bp; - softc->outstanding_cmds++; - xpt_action(start_ccb); - bp = bioq_first(&softc->bio_queue); + cam_fill_ataio(ataio, + ada_retry_count, + adadone, + CAM_DIR_NONE, + 0, + NULL, + 0, + ada_default_timeout*1000); + + if (count >= 256) + count = 0; + ata_28bit_cmd(ataio, ATA_CFA_ERASE, 0, lba, count); + break; } - - if (bp != NULL) { - /* Have more work to do, so ensure we stay scheduled */ - xpt_schedule(periph, CAM_PRIORITY_NORMAL); + case BIO_FLUSH: + cam_fill_ataio(ataio, + 1, + adadone, + CAM_DIR_NONE, + 0, + NULL, + 0, + ada_default_timeout*1000); + + if (softc->flags & ADA_FLAG_CAN_48BIT) + ata_48bit_cmd(ataio, ATA_FLUSHCACHE48, 0, 0, 0); + else + ata_28bit_cmd(ataio, ATA_FLUSHCACHE, 0, 0, 0); + break; } + start_ccb->ccb_h.ccb_state = ADA_CCB_BUFFER_IO; +out: + start_ccb->ccb_h.ccb_bp = bp; + softc->outstanding_cmds++; + xpt_action(start_ccb); + + /* May have more work to do, so ensure we stay scheduled */ + adaschedule(periph); break; } } @@ -882,6 +999,7 @@ adadone(struct cam_periph *periph, union ataio = &done_ccb->ataio; switch (ataio->ccb_h.ccb_state & ADA_CCB_TYPE_MASK) { case ADA_CCB_BUFFER_IO: + case ADA_CCB_TRIM: { struct bio *bp; @@ -908,13 +1026,6 @@ adadone(struct cam_periph *periph, union "Invalidating pack\n"); softc->flags |= ADA_FLAG_PACK_INVALID; } - - /* - * return all queued I/O with EIO, so that - * the client can retry these I/Os in the - * proper order should it attempt to recover. - */ - bioq_flush(&softc->bio_queue, NULL, EIO); bp->bio_error = error; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; @@ -940,8 +1051,27 @@ adadone(struct cam_periph *periph, union softc->outstanding_cmds--; if (softc->outstanding_cmds == 0) softc->flags |= ADA_FLAG_WENT_IDLE; - - biodone(bp); + if ((ataio->ccb_h.ccb_state & ADA_CCB_TYPE_MASK) == + ADA_CCB_TRIM) { + struct trim_request *req = + (struct trim_request *)ataio->data_ptr; + int i; + + for (i = 1; i < softc->trim_max_ranges && + req->bps[i]; i++) { + struct bio *bp1 = req->bps[i]; + + bp1->bio_resid = bp->bio_resid; + bp1->bio_error = bp->bio_error; + if (bp->bio_flags & BIO_ERROR) + bp1->bio_flags |= BIO_ERROR; + biodone(bp1); + } + softc->trim_running = 0; + biodone(bp); + adaschedule(periph); + } else + biodone(bp); break; } case ADA_CCB_WAITING: Modified: head/sys/geom/geom_dev.c ============================================================================== --- head/sys/geom/geom_dev.c Mon Dec 28 18:59:13 2009 (r201138) +++ head/sys/geom/geom_dev.c Mon Dec 28 20:08:01 2009 (r201139) @@ -299,8 +299,8 @@ g_dev_ioctl(struct cdev *dev, u_long cmd } while (length > 0) { chunk = length; - if (chunk > 1024 * cp->provider->sectorsize) - chunk = 1024 * cp->provider->sectorsize; + if (chunk > 65536 * cp->provider->sectorsize) + chunk = 65536 * cp->provider->sectorsize; error = g_delete_data(cp, offset, chunk); length -= chunk; offset += chunk; Modified: head/sys/sys/ata.h ============================================================================== --- head/sys/sys/ata.h Mon Dec 28 18:59:13 2009 (r201138) +++ head/sys/sys/ata.h Mon Dec 28 20:08:01 2009 (r201139) @@ -101,7 +101,9 @@ struct ata_params { /*066*/ u_int16_t mwdmarec; /* rec. M/W DMA time ns */ /*067*/ u_int16_t pioblind; /* min. PIO cycle w/o flow */ /*068*/ u_int16_t pioiordy; /* min. PIO cycle IORDY flow */ - u_int16_t reserved69; +/*069*/ u_int16_t support3; +#define ATA_SUPPORT_RZAT 0x0020 +#define ATA_SUPPORT_DRAT 0x4000 u_int16_t reserved70; /*071*/ u_int16_t rlsovlap; /* rel time (us) for overlap */ /*072*/ u_int16_t rlsservice; /* rel time (us) for service */ @@ -204,7 +206,8 @@ struct ata_params { u_int16_t lba_size48_2; u_int16_t lba_size48_3; u_int16_t lba_size48_4; - u_int16_t reserved104[2]; + u_int16_t reserved104; +/*105*/ u_int16_t max_dsm_blocks; /*106*/ u_int16_t pss; #define ATA_PSS_LSPPS 0x000F #define ATA_PSS_LSSABOVE512 0x1000 @@ -230,7 +233,10 @@ struct ata_params { /*162*/ u_int16_t cfa_kms_support; /*163*/ u_int16_t cfa_trueide_modes; /*164*/ u_int16_t cfa_memory_modes; - u_int16_t reserved165[11]; + u_int16_t reserved165[4]; +/*169*/ u_int16_t support_dsm; +#define ATA_SUPPORT_DSM_TRIM 0x0001 + u_int16_t reserved170[6]; /*176*/ u_int8_t media_serial[60]; /*206*/ u_int16_t sct; u_int16_t reserved206[2]; @@ -284,6 +290,8 @@ struct ata_params { #define ATA_NOP 0x00 /* NOP */ #define ATA_NF_FLUSHQUEUE 0x00 /* flush queued cmd's */ #define ATA_NF_AUTOPOLL 0x01 /* start autopoll function */ +#define ATA_DATA_SET_MANAGEMENT 0x06 +#define ATA_DSM_TRIM 0x01 #define ATA_DEVICE_RESET 0x08 /* reset device */ #define ATA_READ 0x20 /* read */ #define ATA_READ48 0x24 /* read 48bit LBA */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200912282008.nBSK81bw099996>