Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 8 Oct 2012 09:35:46 -0600
From:      "Kenneth D. Merry" <ken@freebsd.org>
To:        Benjamin Close <Benjamin.Close@clearchain.com>
Cc:        freebsd-stable@freebsd.org
Subject:   Re: usb port issue in 9.1-Prerelease (Possibly Cam related)
Message-ID:  <20121008153546.GA7850@nargothrond.kdm.org>
In-Reply-To: <504DED78.2010203@clearchain.com>
References:  <504DED78.2010203@clearchain.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--EVF5PPMfhYS0aIcm
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Mon, Sep 10, 2012 at 23:09:04 +0930, Benjamin Close wrote:
> Hi Folks,
>     I've facing an intermittent hang with a USB port which seems cam 
> related:
> 
> Event's that happen are:
> 
>     o USB modem (HUAWEI E220) plugged into PC
> 
> ugen3.2: <HUA WEI> at usbus3
> u3g0: <3G Modem> on usbus3
> u3g0: Found 3 ports.
> umass0: <USB MASS STORAGE> on usbus3
> umass0:  SCSI over Bulk-Only; quirks = 0x0000
> umass0:6:0:-1: Attached to scbus6
> umass1: <USB MASS STORAGE> on usbus3
> umass1:  SCSI over Bulk-Only; quirks = 0x0000
> umass1:7:1:-1: Attached to scbus7
> cd1 at umass-sim0 bus 0 scbus6 target 0 lun 0
> cd1: <HUAWEI Mass Storage 2.31> Removable CD-ROM SCSI-2 device
> cd1: 1.000MB/s transfers
> cd1: Attempt to query device size failed: NOT READY, Medium not present
> da0 at umass-sim1 bus 1 scbus7 target 0 lun 0
> da0: <HUAWEI SD Storage 2.31> Removable Direct Access SCSI-2 device
> da0: 1.000MB/s transfers
> da0: Attempt to query device size failed: NOT READY, Medium not present
> 
> 
>     o Time Elapses....many packets passed, no da0 or cd1 used.
> 
> 
>     o USB Modem drops off the bus
>        (It does this occasionally as it resets itself)
> 
>     o Causes USB bus to lose devices
> 
> ugen3.2: <HUA WEI> at usbus3 (disconnected)
> u3g0: at uhub3, port 1, addr 2 (disconnected)
> (cd1:umass-sim0:0:0:0): lost device, 1 refs
> (cd1:umass-sim0:0:0:0): removing device entry
> (pass4:umass-sim0:0:0:0): passdevgonecb: devfs entry is gone
> (da0:umass-sim1:1:0:0): lost device - 0 outstanding, 1 refs
> (da0:umass-sim1:1:0:0): removing device entry
> (pass5:umass-sim1:1:0:0): passdevgonecb: devfs entry is gone
> umass0: at uhub3, port 1, addr 2 (disconnected)
> 
> 
> At this point that particular USB port is effectively useless. Plugging 
> anything into the ports shows no device showing up.
> 
> Running usbconfig hangs with:
> 
>   PID    TID COMM             TDNAME KSTACK
> 48562 101874 usbconfig        -                mi_switch+0x186 
> sleepq_wait+0x42 _sx_xlock_hard+0x426 usbd_enum_lock+0xac 
> usb_ref_device+0x21c usb_open+0xc7 devfs_open+0x197 vn_open_cred+0x2ff 
> kern_openat+0x20a amd64_syscall+0x540 Xfast_syscall+0xf7
> 
> Controller is:
> 
> uhci0@pci0:0:26:0:      class=0x0c0300 card=0x02091028 chip=0x28348086 
> rev=0x02 hdr=0x00
>     vendor     = 'Intel Corporation'
>     device     = '82801H (ICH8 Family) USB UHCI Controller'
>     class      = serial bus
>     subclass   = USB
> uhci1@pci0:0:26:1:      class=0x0c0300 card=0x02091028 chip=0x28358086 
> rev=0x02 hdr=0x00
>     vendor     = 'Intel Corporation'
>     device     = '82801H (ICH8 Family) USB UHCI Controller'
>     class      = serial bus
>     subclass   = USB
> ehci0@pci0:0:26:7:      class=0x0c0320 card=0x02091028 chip=0x283a8086 
> rev=0x02 hdr=0x00
>     vendor     = 'Intel Corporation'
>     device     = '82801H (ICH8 Family) USB2 EHCI Controller'
>     class      = serial bus
>     subclass   = USB
> 
> It does however seem related to cam as looking at the various threads 
> for the usb hub I find:
> 
> (kgdb) bt
> #0  sched_switch (td=0xfffffe0002650000, newtd=0xfffffe000227f000, 
> flags=Variable "flags" is not available.
> ) at /usr/src/sys/kern/sched_ule.c:1927
> #1  0xffffffff808f34c6 in mi_switch (flags=260, newtd=0x0) at 
> /usr/src/sys/kern/kern_synch.c:485
> #2  0xffffffff8092bfd2 in sleepq_wait (wchan=0xfffffe001ec2a900, pri=92) 
> at /usr/src/sys/kern/subr_sleepqueue.c:623
> #3  0xffffffff808f3c69 in _sleep (ident=0xfffffe001ec2a900, 
> lock=0xfffffe00371e9210, priority=Variable "priority" is not available.
> ) at /usr/src/sys/kern/kern_synch.c:250
> #4  0xffffffff802bea02 in cam_sim_free (sim=0xfffffe001ec2a900, 
> free_devq=1) at /usr/src/sys/cam/cam_sim.c:112
> #5  0xffffffff8074f8ba in umass_detach (dev=Variable "dev" is not available.
> ) at /usr/src/sys/dev/usb/storage/umass.c:2183
> #6  0xffffffff8091a054 in device_detach (dev=0xfffffe001ec2e900) at 
> device_if.h:214
> #7  0xffffffff8075c458 in usb_detach_device (udev=0xfffffe0007ce8800, 
> iface_index=32 ' ', flag=Variable "flag" is not available.
> ) at /usr/src/sys/dev/usb/usb_device.c:1065
> #8  0xffffffff8075c5f4 in usb_unconfigure (udev=0xfffffe0007ce8800, 
> flag=Variable "flag" is not available.
> ) at /usr/src/sys/dev/usb/usb_device.c:455
> #9  0xffffffff8075c88e in usb_free_device (udev=0xfffffe0007ce8800, 
> flag=Variable "flag" is not available.
> ) at /usr/src/sys/dev/usb/usb_device.c:2093
> #10 0xffffffff80764e5e in uhub_explore (udev=0xfffffe0007353800) at 
> /usr/src/sys/dev/usb/usb_hub.c:358
> #11 0xffffffff8074f536 in usb_bus_explore (pm=Variable "pm" is not 
> available.
> ) at /usr/src/sys/dev/usb/controller/usb_controller.c:359
> #12 0xffffffff80769173 in usb_process (arg=Variable "arg" is not available.
> ) at /usr/src/sys/dev/usb/usb_process.c:170
> #13 0xffffffff808bc2df in fork_exit (callout=0xffffffff807690a0 
> <usb_process>, arg=0xffffff80007c0e88, frame=0xffffff804743cc40) at 
> /usr/src/sys/kern/kern_fork.c:992
> #14 0xffffffff80bc216e in fork_trampoline () at 
> /usr/src/sys/amd64/amd64/exception.S:602
> 
> 
> From:   cam_sim_free(struct cam_sim *sim, int free_devq)
> 
> (kgdb) l
> 107     {
> 108             int error;
> 109
> 110             sim->refcount--;
> 111             if (sim->refcount > 0) {
> 
> 112                     error = msleep(sim, sim->mtx, PRIBIO, "simfree", 0);
> 
> 113                     KASSERT(error == 0, ("invalid error value for 
> msleep(9)"));
> 114             }
> 115
> 116             KASSERT(sim->refcount == 0, ("sim->refcount == 0"));
> 
> 
> It almost appears as if the cam layer is trying to finalise a write of 
> some sort but the physical device has already gone.
> 
> (kgdb) p sim->refcount
> $1 = 1
> 
> Not sure why refcount is still ==1.
> 
> The same device in a box running 9.0-Stable (as of about 3 months ago) 
> with a ICH9 controller works as expected.
> 
> Any suggestions?

Do you happen to have 'camcontrol devlist -v' output for the above
scenario?

I have attached a patch for the pass(4) driver that might help things
somewhat.  The pass(4) driver won't go away completely if there is a
context that has the driver open when the underlying device goes away.

That might be what is holding up the SIM refcount, although I'm not
certain.

Ken
-- 
Kenneth Merry
ken@FreeBSD.ORG

--EVF5PPMfhYS0aIcm
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="scsi_pass.c.refcount.20121008.1.txt"

*** //depot/vendor/FreeBSD/stable/9/sys/cam/scsi/scsi_pass.c.orig
--- //depot/vendor/FreeBSD/stable/9/sys/cam/scsi/scsi_pass.c
***************
*** 76,81 ****
--- 76,82 ----
  	pass_flags	 flags;
  	u_int8_t	 pd_type;
  	union ccb	 saved_ccb;
+ 	int		 open_count;
  	struct devstat	*device_stats;
  	struct cdev	*dev;
  	struct cdev	*alias_dev;
***************
*** 140,151 ****
  static void
  passdevgonecb(void *arg)
  {
  	struct cam_periph *periph;
  
  	periph = (struct cam_periph *)arg;
  
! 	xpt_print(periph->path, "%s: devfs entry is gone\n", __func__);
! 	cam_periph_release(periph);
  }
  
  static void
--- 141,183 ----
  static void
  passdevgonecb(void *arg)
  {
+ 	struct cam_sim    *sim;
  	struct cam_periph *periph;
+ 	struct pass_softc *softc;
+ 	int i;
  
  	periph = (struct cam_periph *)arg;
+ 	sim = periph->sim;
+ 	softc = (struct pass_softc *)periph->softc;
+ 
+ 	KASSERT(softc->open_count >= 0, ("Negative open count %d",
+ 		softc->open_count));
+ 
+ 	mtx_lock(sim->mtx);
+ 
+ 	/*
+ 	 * When we get this callback, we will get no more close calls from
+ 	 * devfs.  So if we have any dangling opens, we need to release the
+ 	 * reference held for that particular context.
+ 	 */
+ 	for (i = 0; i < softc->open_count; i++)
+ 		cam_periph_release_locked(periph);
+ 
+ 	softc->open_count = 0;
  
! 	/*
! 	 * Release the reference held for the device node, it is gone now.
! 	 */
! 	cam_periph_release_locked(periph);
! 
! 	/*
! 	 * We reference the SIM lock directly here, instead of using
! 	 * cam_periph_unlock().  The reason is that the final call to
! 	 * cam_periph_release_locked() above could result in the periph
! 	 * getting freed.  If that is the case, dereferencing the periph
! 	 * with a cam_periph_unlock() call would cause a page fault.
! 	 */
! 	mtx_unlock(sim->mtx);
  }
  
  static void
***************
*** 219,231 ****
  	}
  	cam_periph_unlock(periph);
  	physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
  	if (xpt_getattr(physpath, MAXPATHLEN,
  			"GEOM::physpath", periph->path) == 0
  	 && strlen(physpath) != 0) {
  
  		make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev,
  					softc->dev, softc->alias_dev, physpath);
! 	}
  	free(physpath, M_DEVBUF);
  
  	/*
--- 251,268 ----
  	}
  	cam_periph_unlock(periph);
  	physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
+ 
+ 	cam_periph_lock(periph);
  	if (xpt_getattr(physpath, MAXPATHLEN,
  			"GEOM::physpath", periph->path) == 0
  	 && strlen(physpath) != 0) {
+ 		cam_periph_unlock(periph);
  
  		make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev,
  					softc->dev, softc->alias_dev, physpath);
! 	} else
! 		cam_periph_unlock(periph);
! 
  	free(physpath, M_DEVBUF);
  
  	/*
***************
*** 371,377 ****
  	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
  		xpt_print(periph->path, "%s: lost periph during "
  			  "registration!\n", __func__);
! 		mtx_lock(periph->sim->mtx);
  		return (CAM_REQ_CMP_ERR);
  	}
  
--- 408,414 ----
  	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
  		xpt_print(periph->path, "%s: lost periph during "
  			  "registration!\n", __func__);
! 		cam_periph_lock(periph);
  		return (CAM_REQ_CMP_ERR);
  	}
  
***************
*** 464,469 ****
--- 501,508 ----
  		return(EINVAL);
  	}
  
+ 	softc->open_count++;
+ 
  	cam_periph_unlock(periph);
  
  	return (error);
***************
*** 472,484 ****
  static int
  passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
  {
  	struct 	cam_periph *periph;
  
  	periph = (struct cam_periph *)dev->si_drv1;
  	if (periph == NULL)
  		return (ENXIO);	
  
! 	cam_periph_release(periph);
  
  	return (0);
  }
--- 511,546 ----
  static int
  passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
  {
+ 	struct  cam_sim    *sim;
  	struct 	cam_periph *periph;
+ 	struct  pass_softc *softc;
  
  	periph = (struct cam_periph *)dev->si_drv1;
  	if (periph == NULL)
  		return (ENXIO);	
  
! 	sim = periph->sim;
! 	softc = periph->softc;
! 
! 	mtx_lock(sim->mtx);
! 
! 	softc->open_count--;
! 
! 	cam_periph_release_locked(periph);
! 
! 	/*
! 	 * We reference the SIM lock directly here, instead of using
! 	 * cam_periph_unlock().  The reason is that the call to
! 	 * cam_periph_release_locked() above could result in the periph
! 	 * getting freed.  If that is the case, dereferencing the periph
! 	 * with a cam_periph_unlock() call would cause a page fault.
! 	 *
! 	 * cam_periph_release() avoids this problem using the same method,
! 	 * but we're manually acquiring and dropping the lock here to
! 	 * protect the open count and avoid another lock acquisition and
! 	 * release.
! 	 */
! 	mtx_unlock(sim->mtx);
  
  	return (0);
  }

--EVF5PPMfhYS0aIcm--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20121008153546.GA7850>