Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Nov 2012 10:00:08 GMT
From:      Steven Hartland <steven.hartland@multiplay.co.uk>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   kern/173291: mfi corrupts JBOD disks >2TB due to LBA overflow
Message-ID:  <201211021000.qA2A086R091199@red.freebsd.org>
Resent-Message-ID: <201211021010.qA2AA02w001897@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         173291
>Category:       kern
>Synopsis:       mfi corrupts JBOD disks >2TB due to LBA overflow
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri Nov 02 10:10:00 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Steven Hartland
>Release:        8.3-RELEASE
>Organization:
Multiplay
>Environment:
FreeBSD dev 8.3-RELEASE-p4 FreeBSD 8.3-RELEASE-p4 #22: Mon Sep 17 17:18:32 UTC 2012     root@dev:/usr/obj/usr/src/sys/MULTIPLAY  amd64
>Description:
The current code in mfi for SYSPD aka JBOD disks corrupts data due to an LBA overflow.

This is caused because for the syspd case mfi always uses READ 10 / WRITE 10 requests for accessing the device, so a write over 2^32 wraps and starts writing at the beginning of the disk.
>How-To-Repeat:
1. zero out block 1 on the disk
dd if=/dev/zero bs=512 count=1 of=/dev/mfisyspd0
1+0 records in
1+0 records out
512 bytes transferred in 0.000728 secs (703171 bytes/sec)

2. confirm the first block is zeros
dd if=/dev/mfisyspd0 bs=512 count=1 | hexdump -C
1+0 records in
1+0 records out
512 bytes transferred in 0.000250 secs (2047172 bytes/sec)
00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
00000200

3. write 1 block random after the 2TB boundary
dd if=/dev/random bs=512 count=1 of=/dev/mfisyspd0 oseek=4294967296
1+0 records in
1+0 records out
512 bytes transferred in 0.000717 secs (714162 bytes/sec)

4. first block of the disk now contains random data
dd if=/dev/mfisyspd0 bs=512 count=8 | hexdump -C
00000000  9c d1 d2 1d 9f 2c fc 30  ab 09 7a f7 64 16 2a 58  |.....,.0..z.d.*X|
00000010  18 27 9d 1f ae 4d 27 53  1a 50 e7 c1 b1 3a 9b e4  |.'...M'S.P...:..|
00000020  c3 7c d0 25 83 e2 bd 85  33 f2 33 8e 71 55 70 7c  |.|.%....3.3.qUp||
00000030
>Fix:
Apply the attached patch

Patch attached with submission follows:

Adds support for READ/WRITE 6, 10, 12 & 16 to syspd aka JBOD disks
based on sys/cam/scsi_all.c scsi_read_write.

Prior to this disks greater than 2^32 sectors would result in data
corrupt when sectors greater than 2^32 where accessed.

It may be a good idea at some point to extract the CDB creation logic
from scsi_read_write so it can be directly used in cases like this.
--- sys/dev/mfi/mfi.c.orig	2012-11-02 00:44:20.799257878 +0000
+++ sys/dev/mfi/mfi.c	2012-11-02 00:48:14.480092782 +0000
@@ -78,6 +78,7 @@
 #include <machine/bus.h>
 #include <machine/resource.h>
 
+#include <cam/scsi/scsi_all.h>
 #include <dev/mfi/mfireg.h>
 #include <dev/mfi/mfi_ioctl.h>
 #include <dev/mfi/mfivar.h>
@@ -105,6 +106,8 @@
 static struct mfi_command * mfi_bio_command(struct mfi_softc *);
 static void	mfi_bio_complete(struct mfi_command *);
 static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*);
+static int mfi_build_syspd_cdb(struct mfi_pass_frame *pass, uint32_t block_count,
+		   uint64_t lba, uint8_t byte2, int readop);
 static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*);
 static int	mfi_send_frame(struct mfi_softc *, struct mfi_command *);
 static int	mfi_abort(struct mfi_softc *, struct mfi_command *);
@@ -1988,13 +1991,78 @@
 	    mfi_enqueue_bio(sc, bio);
 	return cm;
 }
+
+static int
+mfi_build_syspd_cdb(struct mfi_pass_frame *pass, uint32_t block_count,
+    uint64_t lba, uint8_t byte2, int readop)
+{
+	int cdb_len;
+
+	if (((lba & 0x1fffff) == lba)
+         && ((block_count & 0xff) == block_count)
+         && (byte2 == 0)) {
+		/* We can fit in a 6 byte cdb */
+		struct scsi_rw_6 *scsi_cmd;
+
+		scsi_cmd = (struct scsi_rw_6 *)&pass->cdb;
+		scsi_cmd->opcode = readop ? READ_6 : WRITE_6;
+		scsi_ulto3b(lba, scsi_cmd->addr);
+		scsi_cmd->length = block_count & 0xff;
+		scsi_cmd->control = 0;
+		cdb_len = sizeof(*scsi_cmd);
+	} else if (((block_count & 0xffff) == block_count) && ((lba & 0xffffffff) == lba)) {
+		/* Need a 10 byte CDB */
+		struct scsi_rw_10 *scsi_cmd;
+
+		scsi_cmd = (struct scsi_rw_10 *)&pass->cdb;
+		scsi_cmd->opcode = readop ? READ_10 : WRITE_10;
+		scsi_cmd->byte2 = byte2;
+		scsi_ulto4b(lba, scsi_cmd->addr);
+		scsi_cmd->reserved = 0;
+		scsi_ulto2b(block_count, scsi_cmd->length);
+		scsi_cmd->control = 0;
+		cdb_len = sizeof(*scsi_cmd);
+	} else if (((block_count & 0xffffffff) == block_count) &&
+	    ((lba & 0xffffffff) == lba)) {
+		/* Block count is too big for 10 byte CDB use a 12 byte CDB */
+		struct scsi_rw_12 *scsi_cmd;
+
+		scsi_cmd = (struct scsi_rw_12 *)&pass->cdb;
+		scsi_cmd->opcode = readop ? READ_12 : WRITE_12;
+		scsi_cmd->byte2 = byte2;
+		scsi_ulto4b(lba, scsi_cmd->addr);
+		scsi_cmd->reserved = 0;
+		scsi_ulto4b(block_count, scsi_cmd->length);
+		scsi_cmd->control = 0;
+		cdb_len = sizeof(*scsi_cmd);
+	} else {
+		/*
+		 * 16 byte CDB.  We'll only get here if the LBA is larger
+		 * than 2^32
+		 */
+		struct scsi_rw_16 *scsi_cmd;
+
+		scsi_cmd = (struct scsi_rw_16 *)&pass->cdb;
+		scsi_cmd->opcode = readop ? READ_16 : WRITE_16;
+		scsi_cmd->byte2 = byte2;
+		scsi_u64to8b(lba, scsi_cmd->addr);
+		scsi_cmd->reserved = 0;
+		scsi_ulto4b(block_count, scsi_cmd->length);
+		scsi_cmd->control = 0;
+		cdb_len = sizeof(*scsi_cmd);
+	}
+
+	return cdb_len;
+}
+
 static struct mfi_command *
 mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
 {
 	struct mfi_command *cm;
 	struct mfi_pass_frame *pass;
-	int flags = 0, blkcount = 0;
-	uint32_t context = 0;
+	int flags = 0;
+	uint8_t cdb_len;
+	uint32_t block_count, context = 0;
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 	    return (NULL);
@@ -2008,35 +2076,29 @@
 	pass->header.cmd = MFI_CMD_PD_SCSI_IO;
 	switch (bio->bio_cmd & 0x03) {
 	case BIO_READ:
-#define SCSI_READ 0x28
-		pass->cdb[0] = SCSI_READ;
 		flags = MFI_CMD_DATAIN;
 		break;
 	case BIO_WRITE:
-#define SCSI_WRITE 0x2a
-		pass->cdb[0] = SCSI_WRITE;
 		flags = MFI_CMD_DATAOUT;
 		break;
 	default:
-		panic("Invalid bio command");
+		/* TODO: what about BIO_DELETE??? */
+		panic("Unsupported bio command");
 	}
 
 	/* Cheat with the sector length to avoid a non-constant division */
-	blkcount = (bio->bio_bcount + MFI_SECTOR_LEN - 1) / MFI_SECTOR_LEN;
+	block_count = (bio->bio_bcount + MFI_SECTOR_LEN - 1) / MFI_SECTOR_LEN;
 	/* Fill the LBA and Transfer length in CDB */
-	pass->cdb[2] = (bio->bio_pblkno & 0xff000000) >> 24;
-	pass->cdb[3] = (bio->bio_pblkno & 0x00ff0000) >> 16;
-	pass->cdb[4] = (bio->bio_pblkno & 0x0000ff00) >> 8;
-	pass->cdb[5] = bio->bio_pblkno & 0x000000ff;
-	pass->cdb[7] = (blkcount & 0xff00) >> 8;
-	pass->cdb[8] = (blkcount & 0x00ff);
+	cdb_len = mfi_build_syspd_cdb(pass, block_count, bio->bio_pblkno, 0,
+	    flags == MFI_CMD_DATAIN);
+
 	pass->header.target_id = (uintptr_t)bio->bio_driver1;
 	pass->header.timeout = 0;
 	pass->header.flags = 0;
 	pass->header.scsi_status = 0;
 	pass->header.sense_len = MFI_SENSE_LEN;
 	pass->header.data_len = bio->bio_bcount;
-	pass->header.cdb_len = 10;
+	pass->header.cdb_len = cdb_len;
 	pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	cm->cm_complete = mfi_bio_complete;
@@ -2075,7 +2137,8 @@
 		flags = MFI_CMD_DATAOUT;
 		break;
 	default:
-		panic("Invalid bio command");
+		/* TODO: what about BIO_DELETE??? */
+		panic("Unsupported bio command");
 	}
 
 	/* Cheat with the sector length to avoid a non-constant division */
@@ -2466,7 +2529,7 @@
 	struct mfi_command *cm;
 	struct mfi_pass_frame *pass;
 	int error;
-	int blkcount = 0;
+	uint32_t blkcount;
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 		return (EBUSY);
@@ -2474,21 +2537,14 @@
 	pass = &cm->cm_frame->pass;
 	bzero(pass->cdb, 16);
 	pass->header.cmd = MFI_CMD_PD_SCSI_IO;
-	pass->cdb[0] = SCSI_WRITE;
-	pass->cdb[2] = (lba & 0xff000000) >> 24;
-	pass->cdb[3] = (lba & 0x00ff0000) >> 16;
-	pass->cdb[4] = (lba & 0x0000ff00) >> 8;
-	pass->cdb[5] = (lba & 0x000000ff);
 	blkcount = (len + MFI_SECTOR_LEN - 1) / MFI_SECTOR_LEN;
-	pass->cdb[7] = (blkcount & 0xff00) >> 8;
-	pass->cdb[8] = (blkcount & 0x00ff);
 	pass->header.target_id = id;
 	pass->header.timeout = 0;
 	pass->header.flags = 0;
 	pass->header.scsi_status = 0;
 	pass->header.sense_len = MFI_SENSE_LEN;
 	pass->header.data_len = len;
-	pass->header.cdb_len = 10;
+	pass->header.cdb_len = mfi_build_syspd_cdb(pass, blkcount, lba, 0, 0);
 	pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	cm->cm_data = virt;


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211021000.qA2A086R091199>