Date: Mon, 25 Oct 2010 15:55:07 +0000 From: Mark Johnston <mjohnston@sandvine.com> To: "freebsd-scsi@freebsd.org" <freebsd-scsi@freebsd.org> Subject: [Patch] camcontrol - determine the defect list length Message-ID: <649630C24D5E884F85DD13645FE903A7640B244F@wtl-exch-2.sandvine.com>
next in thread | raw e-mail | index | archive | help
Hi, Below is a patch for camcontrol from the tree at my work. It was introduced= several years ago to resolve some issues that we were seeing in Adaptec's = RAID controller firmware. The change is to the readdefects() function - whe= n sending the request to read the defect list, instead of using a 65000-ent= ry buffer, we wrote a new function to query the drive and determine the siz= e of the defect list before allocating any memory. Apparently, if we send a= maximum length longer than the number of bytes in the defect list, the cam= call returns an overflow error due to what is probably a firmware bug. It's probable that this issue has been fixed in Adaptec's firmware for a wh= ile now, and that this change is no longer necessary to us. However, it's c= onceivable that other devices could have the same problem, and as far as I = can see, this patch shouldn't introduce any regressions - it essentially co= nsists of sending an additional CAM request before getting the defect list. I'd like to know whether this patch is worth trying to get committed into F= reeBSD. If anyone has some comments or suggestions on the patch, I'd highly= appreciate it. Thanks, -Mark --- camcontrol.c@@/main/RELENG_8/RELENG_8_1/svos_8_1/0 2010-10-21 10:15:24.= 000000000 -0400 +++ camcontrol.c@@/main/RELENG_8/RELENG_8_1/svos_8_1/1 2010-10-21 15:39:04.= 000000000 -0400 @@ -202,6 +202,8 @@ static int scanlun_or_reset_dev(int bus, #ifndef MINIMALISTIC static int readdefects(struct cam_device *device, int argc, char **argv, char *combinedopt, int retry_count, int timeout); +static int readdefectlen(struct cam_device *device, int retry_count,=20 + int timeout, cam_argmask args, int *ret_len); static void modepage(struct cam_device *device, int argc, char **argv, char *combinedopt, int retry_count, int timeout); static int scsicmd(struct cam_device *device, int argc, char **argv,=20 @@ -1722,7 +1724,7 @@ readdefects(struct cam_device *device, i union ccb *ccb =3D NULL; struct scsi_read_defect_data_10 *rdd_cdb; u_int8_t *defect_list =3D NULL; - u_int32_t dlist_length =3D 65000; + u_int32_t dlist_length =3D 0; u_int32_t returned_length =3D 0; u_int32_t num_returned =3D 0; u_int8_t returned_format; @@ -1764,12 +1766,23 @@ readdefects(struct cam_device *device, i =20 ccb =3D cam_getccb(device); =20 - /* - * Hopefully 65000 bytes is enough to hold the defect list. If it - * isn't, the disk is probably dead already. We'd have to go with - * 12 byte command (i.e. alloc_length is 32 bits instead of 16) - * to hold them all. - */ + /* get the defect list length + * We need to do this as a workaround for a bug in either the Adaptec RAI= D or Hitachi scsi + * firmware. For some reason, if we give them a max length greater than t= he number of bytes + * in the dlist + the header length, the cam call returns an overflow err= or. + * It doesn't look like it's in the driver, as this error seems to be pop= ulated by the SRB + * error, and the SRB is an interpreted byte array stored in the FIB, lea= ding me to believe + * that it is directly populated by the firmware. + */ + if((error =3D readdefectlen(device, retry_count, timeout, arglist, &dlist= _length))) + { + warnx("unable to get defect list length"); + goto defect_bailout; + } +=09 + /* the +=3D4 is to hold the header */ + dlist_length +=3D4; +=09 defect_list =3D malloc(dlist_length); if (defect_list =3D=3D NULL) { warnx("can't malloc memory for defect list"); @@ -2004,6 +2017,170 @@ defect_bailout: =20 return(error); } + +/* returns the number of bytes in the appropriate defect list */ +static int readdefectlen(struct cam_device *device, int retry_count,=20 + int timeout, cam_argmask args, int *ret_len) +{ + struct scsi_read_defect_data_10 *rdd_cdb; + u_int8_t *defect_list =3D NULL; + union ccb *ccb =3D NULL; + + u_int8_t returned_format; + /*4 bytes will hold the defect header*/ + u_int32_t dlist_length =3D 4; + u_int32_t returned_length =3D 0; +=09 + int error =3D 0; + int lists_specified =3D 0; + + ccb =3D cam_getccb(device); + + defect_list =3D malloc(dlist_length); + if (defect_list =3D=3D NULL) { + warnx("can't malloc memory for defect list"); + error =3D 1; + goto defect_len_bailout; + } + + rdd_cdb =3D(struct scsi_read_defect_data_10 *)&ccb->csio.cdb_io.cdb_bytes= ; + + /*Essentially just delete stuff from here on*/ + /* + * cam_getccb() zeros the CCB header only. So we need to zero the + * payload portion of the ccb. + */ + bzero(&(&ccb->ccb_h)[1], + sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr)); + + cam_fill_csio(&ccb->csio, + /*retries*/ retry_count, + /*cbfcnp*/ NULL, + /*flags*/ CAM_DIR_IN | ((arglist & CAM_ARG_ERR_RECOVER) ? + CAM_PASS_ERR_RECOVER : 0), + /*tag_action*/ MSG_SIMPLE_Q_TAG, + /*data_ptr*/ defect_list, + /*dxfer_len*/ dlist_length, + /*sense_len*/ SSD_FULL_SIZE, + /*cdb_len*/ sizeof(struct scsi_read_defect_data_10), + /*timeout*/ timeout ? timeout : 5000); + + rdd_cdb->opcode =3D READ_DEFECT_DATA_10; + if (args & CAM_ARG_FORMAT_BLOCK) + rdd_cdb->format =3D SRDD10_BLOCK_FORMAT; + else if (args & CAM_ARG_FORMAT_BFI) + rdd_cdb->format =3D SRDD10_BYTES_FROM_INDEX_FORMAT; + else if (args & CAM_ARG_FORMAT_PHYS) + rdd_cdb->format =3D SRDD10_PHYSICAL_SECTOR_FORMAT; + else { + error =3D 1; + warnx("no defect list format specified"); + goto defect_len_bailout; + } + if (args & CAM_ARG_PLIST) { + rdd_cdb->format |=3D SRDD10_PLIST; + lists_specified++; + } + + if (args & CAM_ARG_GLIST) { + rdd_cdb->format |=3D SRDD10_GLIST; + lists_specified++; + } + + scsi_ulto2b(dlist_length, rdd_cdb->alloc_length); + + /* Disable freezing the device queue */ + ccb->ccb_h.flags |=3D CAM_DEV_QFRZDIS; + + if (cam_send_ccb(device, ccb) < 0) { + perror("error reading defect list"); + + if (args & CAM_ARG_VERBOSE) { + cam_error_print(device, ccb, CAM_ESF_ALL, + CAM_EPF_ALL, stderr); + } + + error =3D 1; + goto defect_len_bailout; + } + + returned_length =3D scsi_2btoul(((struct + scsi_read_defect_data_hdr_10 *)defect_list)->length); + + returned_format =3D ((struct scsi_read_defect_data_hdr_10 *) + defect_list)->format; + + if (((ccb->ccb_h.status & CAM_STATUS_MASK) =3D=3D CAM_SCSI_STATUS_ERROR) + && (ccb->csio.scsi_status =3D=3D SCSI_STATUS_CHECK_COND) + && ((ccb->ccb_h.status & CAM_AUTOSNS_VALID) !=3D 0)) { + struct scsi_sense_data *sense; + int error_code, sense_key, asc, ascq; + + sense =3D &ccb->csio.sense_data; + scsi_extract_sense(sense, &error_code, &sense_key, &asc, &ascq); + + /* + * According to the SCSI spec, if the disk doesn't support + * the requested format, it will generally return a sense + * key of RECOVERED ERROR, and an additional sense code + * of "DEFECT LIST NOT FOUND". So, we check for that, and + * also check to make sure that the returned length is + * greater than 0, and then print out whatever format the + * disk gave us. + */ + if ((sense_key =3D=3D SSD_KEY_RECOVERED_ERROR) + && (asc =3D=3D 0x1c) && (ascq =3D=3D 0x00) + && (returned_length > 0)) { + warnx("requested defect format not available"); + switch(returned_format & SRDDH10_DLIST_FORMAT_MASK) { + case SRDD10_BLOCK_FORMAT: + warnx("Device returned block format"); + break; + case SRDD10_BYTES_FROM_INDEX_FORMAT: + warnx("Device returned bytes from index" + " format"); + break; + case SRDD10_PHYSICAL_SECTOR_FORMAT: + warnx("Device returned physical sector format"); + break; + default: + error =3D 1; + warnx("Device returned unknown defect" + " data format %#x", returned_format); + goto defect_len_bailout; + break; /* NOTREACHED */ + } + } else { + error =3D 1; + warnx("Error returned from read defect data command"); + if (args & CAM_ARG_VERBOSE) + cam_error_print(device, ccb, CAM_ESF_ALL, + CAM_EPF_ALL, stderr); + goto defect_len_bailout; + } + } else if ((ccb->ccb_h.status & CAM_STATUS_MASK) !=3D CAM_REQ_CMP) { + error =3D 1; + warnx("Error returned from read defect data command"); + if (args & CAM_ARG_VERBOSE) + cam_error_print(device, ccb, CAM_ESF_ALL, + CAM_EPF_ALL, stderr); + goto defect_len_bailout; + } + + *ret_len =3D returned_length; + return 0; +defect_len_bailout: + + if (defect_list !=3D NULL) + free(defect_list); + + if (ccb !=3D NULL) + cam_freeccb(ccb); + + return(error); + +} + #endif /* MINIMALISTIC */ =20 #if 0
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?649630C24D5E884F85DD13645FE903A7640B244F>