Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Jun 1997 17:46:37 +0200
From:      Tor Egge <Tor.Egge@idi.ntnu.no>
To:        Tor.Egge@idi.ntnu.no
Cc:        freebsd-scsi@FreeBSD.ORG
Subject:   Re: scsi recovery code causes system freeze
Message-ID:  <199706201546.RAA11875@pat.idi.ntnu.no>
In-Reply-To: Your message of "Mon, 09 Jun 1997 21:16:39 %2B0200"
References:  <199706091916.VAA16067@pat.idi.ntnu.no>

next in thread | previous in thread | raw e-mail | index | archive | help

[I wrote]

> I have some problems with heavy write activity on a scsi bus causing
> scsi timeouts. Sometimes the machine freezes during the error 
> recovery.

Due to several (>5) freezes I ended up writing a workaround.

This workaround is not very well tested (The freezes do not occur
*that* often), but I believe it is mostly correct.


  Jun 20 16:43:13 ikke /kernel: ahc1: Issued Channel A Bus Reset. 11 SCBs aborted
  Jun 20 16:43:13 ikke /kernel: Clearing bus reset
  Jun 20 16:43:13 ikke /kernel: sd7: Will resubmit scsi cmd
  Jun 20 16:43:13 ikke /kernel: Clearing 'in-reset' flag
  Jun 20 16:43:13 ikke /kernel: sd6: no longer in timeout
  Jun 20 16:43:13 ikke /kernel: sd8: no longer in timeout
  Jun 20 16:43:13 ikke /kernel: sd7: UNIT ATTENTION asc:29,2 
  Jun 20 16:43:13 ikke /kernel: , retries:3
  Jun 20 16:43:13 ikke /kernel: sd6: UNIT ATTENTION asc:29,2 
  Jun 20 16:43:13 ikke /kernel: , retries:3
  Jun 20 16:43:13 ikke /kernel: sd8: UNIT ATTENTION asc:29,2 
  Jun 20 16:43:14 ikke /kernel: , retries:3
  Jun 20 16:43:14 ikke /kernel: sd9: UNIT ATTENTION asc:29,2 
  Jun 20 16:43:14 ikke /kernel: , retries:4
  Jun 20 16:43:14 ikke /kernel: sd10: UNIT ATTENTION asc:29,2 
  Jun 20 16:43:14 ikke /kernel: , retries:4
  Jun 20 16:43:14 ikke /kernel: sd7: Resubmitting scsi cmd

----------
Index: aic7xxx.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/scsi/aic7xxx.c,v
retrieving revision 1.118
diff -u -r1.118 aic7xxx.c
--- aic7xxx.c	1997/04/26 05:03:18	1.118
+++ aic7xxx.c	1997/06/20 15:12:35
@@ -2355,6 +2355,56 @@
 #endif
 }
 
+static void ahc_resubmit __P((void *data));
+static void ahc_resubmit(data)
+     void *data;
+{
+	struct scsi_xfer *xs;
+	struct scsi_link *sc_link;
+	int retval;
+	int s;
+	
+	xs = (struct scsi_xfer *) data;
+	sc_link = xs->sc_link;
+
+	sc_print_addr(sc_link);
+	printf("Resubmitting scsi cmd\n");
+	s = splbio();
+	retval = (*(sc_link->adapter->scsi_cmd)) (xs);
+	splx(s);
+
+	switch (retval) {
+	case SUCCESSFULLY_QUEUED:
+	  /* 
+	   * Finally queued properly, or a new resubmit has been scheduled 
+	   */
+	  return;
+	case TRY_AGAIN_LATER:
+	  /*
+	   * Ran out of SCBs. Schedule a new retry in 1 second.
+	   */
+	  xs->error = XS_NOERROR;
+	  xs->flags &= ~ITSDONE;
+	  timeout(ahc_resubmit,(caddr_t) xs,hz);
+	  return;
+	case COMPLETE:
+	  /* 
+	   * Ran out of DMA segments. (aic7xxx.c specific) 
+	   */
+	  xs->flags |= ITSDONE;
+	  s = splbio();
+	  scsi_done(xs);
+	  splx(s);
+	  return;
+	case HAD_ERROR:
+	default:
+	  /* 
+	   * Should not happen (aic7xxx.c specific)
+	   */
+	  panic("ahc_resubmit: Unexpected return code (%d)",retval);
+	}
+}
+
 /*
  * start a scsi operation given the command and
  * the data address, target, and lun all of which
@@ -2387,6 +2437,17 @@
 		  && (ahc->in_reset & CHANNEL_B_RESET) != 0)
 		 || (!IS_SCSIBUS_B(ahc, xs->sc_link)
 		  && (ahc->in_reset & CHANNEL_A_RESET) != 0)) {
+			if ((flags & SCSI_NOMASK) == 0) {
+				sc_print_addr(xs->sc_link);
+				printf("Will resubmit scsi cmd\n");
+				timeout(ahc_resubmit,(caddr_t) xs,hz);
+				return SUCCESSFULLY_QUEUED;
+			}
+			/* 
+			 * This is broken, since it will cause an infinite loop
+			 * of retries while timeouts are blocked.
+			 */
+			printf("Warning: Freeze imminent\n");
 			/* Ick, but I don't want it to abort this */
 			xs->retries++;
 		 	xs->error = XS_BUSY;
--------

- Tor Egge



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199706201546.RAA11875>