Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 12 Apr 2005 23:57:17 -0400
From:      David Sze <dsze@alumni.uwaterloo.ca>
To:        Scott Long <scottl@samsco.org>
Cc:        mb@imp.ch
Subject:   Re: [PATCH] Stability fixes for IPS driver for 4.x
Message-ID:  <6.2.1.2.2.20050412234622.05a6daf8@mail.distrust.net>
In-Reply-To: <425C12E3.5050205@samsco.org>
References:  <4257F20C.70004@samsco.org> <6.2.1.2.2.20050411005214.065dc018@mail.distrust.net> <425A0BB2.10704@samsco.org> <6.2.1.2.2.20050411234713.069afb28@mail.distrust.net> <425C12E3.5050205@samsco.org>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
At 12:26 PM 12/04/2005 -0600, Scott Long wrote this to All:
>David Sze wrote:
>>At 11:31 PM 10/04/2005 -0600, Scott Long wrote this to All:
>>
>>>Making a driver PAE-ified means either teaching it to do 64-bit
>>>scatter-gather (assuming that the peripheral hardware can do this
>>>and that it's documented), or teaching the driver to correctly handle
>>>EINPROGRESS from bus_dmamap_load() along with using the proper busdma
>>>tag limits.  The strategy I took with 6.x/5.x was the second one since
>>>I didn't have good IPS docs in front of me and I wanted it follow the
>>>APIs correctly.  I did test it with 8GB of memory and it performed
>>>correctly under load.  I haven't taken a close enough look at your
>>>MFC patch to say for sure if it's correct or not.  I'm not sure if
>>>I'll have time to take another look in the next few days, unfortunately.
>>>Is there any chance you could test 5.x/6.0 under load with PAE just to
>>>validate the assertion that it works correctly there?
>>
>>I had a chance to test 5.4-RC1 (i386) today with GENERIC, SMP, PAE, and 
>>SMP-PAE kernels (the last one is just PAE with "options SMP").
>>To recap, the hardware is an IBM xSeries 346, Dual Xeon 3GHz (non-E64MT), 
>>ServeRAID-7K.
>>GENERIC and SMP survived "make buildkernel", but PAE and SMP-PAE paniced 
>>reproducibly doing the same.  The DDB stack trace doesn't appear to be 
>>anywhere near the IPS driver though, so I'm way out of my league.
>
>Darnit, hard to say if this is an existing bug in 5.4 or if it's a 
>bug/corruption in ips.Can you re-run with PAE disabled?

Works fine with PAE disabled (or at least I couldn't get it to panic), both 
UP and SMP kernels.


>Would you be
>willing to put the Giant lock back on top of the driver?  This would
>mean modifying the call to bus_intr_config(), adding the D_GIANTNEEDED
>flag to the disk structure in disk_create(), and switching the mutex
>argument in bus_dma_tag_create() for the sg_dmatag tag.

I put Giant back in as you described (patch attached), but it still 
panic'ed with PAE enabled, both UP and SMP kernels.  The stack trace was 
very similar; the fault address (0x24) and the top three stack frames were 
the same as without Giant:

         propagate_priority
         turnstile_wait
         _mtx_lock_sleep

At this point I no longer have access to the hardware, the customer wanted 
his servers back.  They're going into the datacenter with RELENG_4 (w/IPS 
stability patch), without PAE (so the top ~900MB of his 4GB RAM is lost to 
PCI-X address space).



[-- Attachment #2 --]
diff -u sys/dev/ips.orig/ips.c sys/dev/ips/ips.c
--- sys/dev/ips.orig/ips.c	Sun Jan 30 14:41:12 2005
+++ sys/dev/ips/ips.c	Tue Apr 12 12:59:31 2005
@@ -349,8 +349,8 @@
 				/* maxsegsize*/	IPS_COMMAND_LEN + 
 						    IPS_MAX_SG_LEN,
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&sc->command_dmatag) != 0) {
                 device_printf(sc->dev, "can't alloc command dma tag\n");
 		goto error;
@@ -367,7 +367,7 @@
 				/* maxsegsize*/	IPS_MAX_IOBUF_SIZE,
 				/* flags     */	0,
 				/* lockfunc  */ busdma_lock_mutex,
-				/* lockarg   */ &sc->queue_mtx,
+				/* lockarg   */ &Giant,
 				&sc->sg_dmatag) != 0) {
 		device_printf(sc->dev, "can't alloc SG dma tag\n");
 		goto error;
@@ -584,8 +584,8 @@
 				/* numsegs   */	1,
 				/* maxsegsize*/	sizeof(ips_copper_queue_t),
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&dmatag) != 0) {
                 device_printf(sc->dev, "can't alloc dma tag for statue queue\n");
 		error = ENOMEM;
diff -u sys/dev/ips.orig/ips_commands.c sys/dev/ips/ips_commands.c
--- sys/dev/ips.orig/ips_commands.c	Sun Jan 30 14:41:12 2005
+++ sys/dev/ips/ips_commands.c	Tue Apr 12 13:02:19 2005
@@ -209,8 +209,8 @@
 				/* numsegs   */	1,
 				/* maxsegsize*/	IPS_ADAPTER_INFO_LEN,
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&command->data_dmatag) != 0) {
                 printf("ips: can't alloc dma tag for adapter status\n");
 		error = ENOMEM;
@@ -308,8 +308,8 @@
 				/* numsegs   */	1,
 				/* maxsegsize*/	IPS_DRIVE_INFO_LEN,
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&command->data_dmatag) != 0) {
                 printf("ips: can't alloc dma tag for drive status\n");
 		error = ENOMEM;
@@ -547,8 +547,8 @@
 				/* numsegs   */	1,
 				/* maxsegsize*/	IPS_NVRAM_PAGE_SIZE,
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&command->data_dmatag) != 0) {
                 printf("ips: can't alloc dma tag for nvram\n");
 		error = ENOMEM;
diff -u sys/dev/ips.orig/ips_disk.c sys/dev/ips/ips_disk.c
--- sys/dev/ips.orig/ips_disk.c	Sun Jan 30 14:41:12 2005
+++ sys/dev/ips/ips_disk.c	Tue Apr 12 13:04:44 2005
@@ -155,7 +155,7 @@
 	dsc->ipsd_disk->d_sectorsize = IPS_BLKSIZE;
 	dsc->ipsd_disk->d_mediasize = (off_t)totalsectors * IPS_BLKSIZE;
 	dsc->ipsd_disk->d_unit = dsc->unit;
-	dsc->ipsd_disk->d_flags = 0;
+	dsc->ipsd_disk->d_flags = DISKFLAG_NEEDSGIANT;
 	disk_create(dsc->ipsd_disk, DISK_VERSION);
 
 	device_printf(dev, "Logical Drive  (%dMB)\n",
diff -u sys/dev/ips.orig/ips_ioctl.c sys/dev/ips/ips_ioctl.c
--- sys/dev/ips.orig/ips_ioctl.c	Sun Jan 30 14:41:12 2005
+++ sys/dev/ips/ips_ioctl.c	Tue Apr 12 13:02:36 2005
@@ -99,8 +99,8 @@
 			/* numsegs   */	1,
 			/* maxsegsize*/	ioctl_cmd->datasize,
 			/* flags     */	0,
-			/* lockfunc  */ NULL,
-			/* lockarg   */ NULL,
+			/* lockfunc  */ busdma_lock_mutex,
+			/* lockarg   */ &Giant,
 			&ioctl_cmd->dmatag) != 0) {
 		return ENOMEM;
         }
diff -u sys/dev/ips.orig/ips_pci.c sys/dev/ips/ips_pci.c
--- sys/dev/ips.orig/ips_pci.c	Sun Jan 30 14:41:12 2005
+++ sys/dev/ips/ips_pci.c	Tue Apr 12 13:03:31 2005
@@ -126,7 +126,7 @@
                 device_printf(dev, "irq allocation failed\n");
                 goto error;
         }
-	if(bus_setup_intr(dev, sc->irqres, INTR_TYPE_BIO|INTR_MPSAFE, sc->ips_adapter_intr, sc, &sc->irqcookie)){
+	if(bus_setup_intr(dev, sc->irqres, INTR_TYPE_BIO, sc->ips_adapter_intr, sc, &sc->irqcookie)){
                 device_printf(dev, "irq setup failed\n");
                 goto error;
         }
@@ -141,8 +141,8 @@
 				/* numsegs   */	IPS_MAX_SG_ELEMENTS,
 				/* maxsegsize*/	BUS_SPACE_MAXSIZE_32BIT,
 				/* flags     */	0,
-				/* lockfunc  */ NULL,
-				/* lockarg   */ NULL,
+				/* lockfunc  */ busdma_lock_mutex,
+				/* lockarg   */ &Giant,
 				&sc->adapter_dmatag) != 0) {
                 printf("IPS can't alloc dma tag\n");
                 goto error;

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?6.2.1.2.2.20050412234622.05a6daf8>