Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Dec 2018 17:11:00 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r342170 - in head/sys: dev/pci kern sys x86/x86
Message-ID:  <201812171711.wBHHB0OO084416@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Mon Dec 17 17:11:00 2018
New Revision: 342170
URL: https://svnweb.freebsd.org/changeset/base/342170

Log:
  add support for marking interrupt handlers as suspended
  
  The goal of this change is to fix a problem with PCI shared interrupts
  during suspend and resume.
  
  I have observed a couple of variations of the following scenario.
  Devices A and B are on the same PCI bus and share the same interrupt.
  Device A's driver is suspended first and the device is powered down.
  Device B generates an interrupt. Interrupt handlers of both drivers are
  called. Device A's interrupt handler accesses registers of the powered
  down device and gets back bogus values (I assume all 0xff). That data is
  interpreted as interrupt status bits, etc. So, the interrupt handler
  gets confused and may produce some noise or enter an infinite loop, etc.
  
  This change affects only PCI devices.  The pci(4) bus driver marks a
  child's interrupt handler as suspended after the child's suspend method
  is called and before the device is powered down.  This is done only for
  traditional PCI interrupts, because only they can be shared.
  
  At the moment the change is only for x86.
  
  Notable changes in core subsystems / interfaces:
  - BUS_SUSPEND_INTR and BUS_RESUME_INTR methods are added to bus
    interface along with convenience functions bus_suspend_intr and
    bus_resume_intr;
  - rman_set_irq_cookie and rman_get_irq_cookie functions are added to
    provide a way to associate an interrupt resource with an interrupt
    cookie;
  - intr_event_suspend_handler and intr_event_resume_handler functions
    are added to the MI interrupt handler interface.
  
  I added two new interrupt handler flags, IH_SUSP and IH_CHANGED, to
  implement the new intr_event functions.  IH_SUSP marks a suspended
  interrupt handler.  IH_CHANGED is used to implement a barrier that
  ensures that a change to the interrupt handler's state is visible
  to future interrupts.
  While there, I fixed some whitespace issues in comments and changed a
  couple of logically boolean variables to be bool.
  
  MFC after:	1 month (maybe)
  Differential Revision: https://reviews.freebsd.org/D15755

Modified:
  head/sys/dev/pci/pci.c
  head/sys/kern/bus_if.m
  head/sys/kern/kern_intr.c
  head/sys/kern/subr_bus.c
  head/sys/kern/subr_rman.c
  head/sys/sys/bus.h
  head/sys/sys/interrupt.h
  head/sys/sys/rman.h
  head/sys/x86/x86/nexus.c

Modified: head/sys/dev/pci/pci.c
==============================================================================
--- head/sys/dev/pci/pci.c	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/dev/pci/pci.c	Mon Dec 17 17:11:00 2018	(r342170)
@@ -4467,6 +4467,7 @@ int
 pci_suspend_child(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo;
+	struct resource_list_entry *rle;
 	int error;
 
 	dinfo = device_get_ivars(child);
@@ -4483,8 +4484,20 @@ pci_suspend_child(device_t dev, device_t child)
 	if (error)
 		return (error);
 
-	if (pci_do_power_suspend)
+	if (pci_do_power_suspend) {
+		/*
+		 * Make sure this device's interrupt handler is not invoked
+		 * in the case the device uses a shared interrupt that can
+		 * be raised by some other device.
+		 * This is applicable only to regular (legacy) PCI interrupts
+		 * as MSI/MSI-X interrupts are never shared.
+		 */
+		rle = resource_list_find(&dinfo->resources,
+		    SYS_RES_IRQ, 0);
+		if (rle != NULL && rle->res != NULL)
+			(void)bus_suspend_intr(child, rle->res);
 		pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
+	}
 
 	return (0);
 }
@@ -4493,6 +4506,7 @@ int
 pci_resume_child(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo;
+	struct resource_list_entry *rle;
 
 	if (pci_do_power_resume)
 		pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
@@ -4503,6 +4517,16 @@ pci_resume_child(device_t dev, device_t child)
 		pci_cfg_save(child, dinfo, 1);
 
 	bus_generic_resume_child(dev, child);
+
+	/*
+	 * Allow interrupts only after fully resuming the driver and hardware.
+	 */
+	if (pci_do_power_suspend) {
+		/* See pci_suspend_child for details. */
+		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
+		if (rle != NULL && rle->res != NULL)
+			(void)bus_resume_intr(child, rle->res);
+	}
 
 	return (0);
 }

Modified: head/sys/kern/bus_if.m
==============================================================================
--- head/sys/kern/bus_if.m	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/kern/bus_if.m	Mon Dec 17 17:11:00 2018	(r342170)
@@ -472,6 +472,44 @@ METHOD int teardown_intr {
 };
 
 /**
+ * @brief Suspend an interrupt handler
+ *
+ * This method is used to mark a handler as suspended in the case
+ * that the associated device is powered down and cannot be a source
+ * for the, typically shared, interrupt.
+ * The value of @p _irq must be the interrupt resource passed
+ * to a previous call to BUS_SETUP_INTR().
+ * 
+ * @param _dev		the parent device of @p _child
+ * @param _child	the device which allocated the resource
+ * @param _irq		the resource representing the interrupt
+ */
+METHOD int suspend_intr {
+	device_t	_dev;
+	device_t	_child;
+	struct resource *_irq;
+} DEFAULT bus_generic_suspend_intr;
+
+/**
+ * @brief Resume an interrupt handler
+ *
+ * This method is used to clear suspended state of a handler when
+ * the associated device is powered up and can be an interrupt source
+ * again.
+ * The value of @p _irq must be the interrupt resource passed
+ * to a previous call to BUS_SETUP_INTR().
+ * 
+ * @param _dev		the parent device of @p _child
+ * @param _child	the device which allocated the resource
+ * @param _irq		the resource representing the interrupt
+ */
+METHOD int resume_intr {
+	device_t	_dev;
+	device_t	_child;
+	struct resource *_irq;
+} DEFAULT bus_generic_resume_intr;
+
+/**
  * @brief Define a resource which can be allocated with
  * BUS_ALLOC_RESOURCE().
  *

Modified: head/sys/kern/kern_intr.c
==============================================================================
--- head/sys/kern/kern_intr.c	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/kern/kern_intr.c	Mon Dec 17 17:11:00 2018	(r342170)
@@ -721,6 +721,28 @@ intr_event_barrier(struct intr_event *ie)
 	atomic_thread_fence_acq();
 }
 
+static void
+intr_handler_barrier(struct intr_handler *handler)
+{
+	struct intr_event *ie;
+
+	ie = handler->ih_event;
+	mtx_assert(&ie->ie_lock, MA_OWNED);
+	KASSERT((handler->ih_flags & IH_DEAD) == 0,
+	    ("update for a removed handler"));
+
+	if (ie->ie_thread == NULL) {
+		intr_event_barrier(ie);
+		return;
+	}
+	if ((handler->ih_flags & IH_CHANGED) == 0) {
+		handler->ih_flags |= IH_CHANGED;
+		intr_event_schedule_thread(ie);
+	}
+	while ((handler->ih_flags & IH_CHANGED) != 0)
+		msleep(handler, &ie->ie_lock, 0, "ih_barr", 0);
+}
+
 /*
  * Sleep until an ithread finishes executing an interrupt handler.
  *
@@ -842,6 +864,49 @@ intr_event_remove_handler(void *cookie)
 	return (0);
 }
 
+int
+intr_event_suspend_handler(void *cookie)
+{
+	struct intr_handler *handler = (struct intr_handler *)cookie;
+	struct intr_event *ie;
+
+	if (handler == NULL)
+		return (EINVAL);
+	ie = handler->ih_event;
+	KASSERT(ie != NULL,
+	    ("interrupt handler \"%s\" has a NULL interrupt event",
+	    handler->ih_name));
+	mtx_lock(&ie->ie_lock);
+	handler->ih_flags |= IH_SUSP;
+	intr_handler_barrier(handler);
+	mtx_unlock(&ie->ie_lock);
+	return (0);
+}
+
+int
+intr_event_resume_handler(void *cookie)
+{
+	struct intr_handler *handler = (struct intr_handler *)cookie;
+	struct intr_event *ie;
+
+	if (handler == NULL)
+		return (EINVAL);
+	ie = handler->ih_event;
+	KASSERT(ie != NULL,
+	    ("interrupt handler \"%s\" has a NULL interrupt event",
+	    handler->ih_name));
+
+	/*
+	 * intr_handler_barrier() acts not only as a barrier,
+	 * it also allows to check for any pending interrupts.
+	 */
+	mtx_lock(&ie->ie_lock);
+	handler->ih_flags &= ~IH_SUSP;
+	intr_handler_barrier(handler);
+	mtx_unlock(&ie->ie_lock);
+	return (0);
+}
+
 static int
 intr_event_schedule_thread(struct intr_event *ie)
 {
@@ -1016,10 +1081,21 @@ intr_event_execute_handlers(struct proc *p, struct int
 		 */
 		ihp = ih;
 
+		if ((ih->ih_flags & IH_CHANGED) != 0) {
+			mtx_lock(&ie->ie_lock);
+			ih->ih_flags &= ~IH_CHANGED;
+			wakeup(ih);
+			mtx_unlock(&ie->ie_lock);
+		}
+
 		/* Skip filter only handlers */
 		if (ih->ih_handler == NULL)
 			continue;
 
+		/* Skip suspended handlers */
+		if ((ih->ih_flags & IH_SUSP) != 0)
+			continue;
+
 		/*
 		 * For software interrupt threads, we only execute
 		 * handlers that have their need flag set.  Hardware
@@ -1178,8 +1254,9 @@ intr_event_handle(struct intr_event *ie, struct trapfr
 	struct intr_handler *ih;
 	struct trapframe *oldframe;
 	struct thread *td;
-	int ret, thread;
 	int phase;
+	int ret;
+	bool filter, thread;
 
 	td = curthread;
 
@@ -1198,7 +1275,8 @@ intr_event_handle(struct intr_event *ie, struct trapfr
 	 * a trapframe as its argument.
 	 */
 	td->td_intr_nesting_level++;
-	thread = 0;
+	filter = false;
+	thread = false;
 	ret = 0;
 	critical_enter();
 	oldframe = td->td_intr_frame;
@@ -1214,8 +1292,10 @@ intr_event_handle(struct intr_event *ie, struct trapfr
 	atomic_thread_fence_seq_cst();
 
 	CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
+		if ((ih->ih_flags & IH_SUSP) != 0)
+			continue;
 		if (ih->ih_filter == NULL) {
-			thread = 1;
+			thread = true;
 			continue;
 		}
 		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
@@ -1230,24 +1310,25 @@ intr_event_handle(struct intr_event *ie, struct trapfr
 		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
 		    ("%s: incorrect return value %#x from %s", __func__, ret,
 		    ih->ih_name));
+		filter = filter || ret == FILTER_HANDLED;
 
-		/* 
+		/*
 		 * Wrapper handler special handling:
 		 *
-		 * in some particular cases (like pccard and pccbb), 
+		 * in some particular cases (like pccard and pccbb),
 		 * the _real_ device handler is wrapped in a couple of
 		 * functions - a filter wrapper and an ithread wrapper.
-		 * In this case (and just in this case), the filter wrapper 
+		 * In this case (and just in this case), the filter wrapper
 		 * could ask the system to schedule the ithread and mask
 		 * the interrupt source if the wrapped handler is composed
 		 * of just an ithread handler.
 		 *
-		 * TODO: write a generic wrapper to avoid people rolling 
-		 * their own
+		 * TODO: write a generic wrapper to avoid people rolling
+		 * their own.
 		 */
 		if (!thread) {
 			if (ret == FILTER_SCHEDULE_THREAD)
-				thread = 1;
+				thread = true;
 		}
 	}
 	atomic_add_rel_int(&ie->ie_active[phase], -1);
@@ -1271,6 +1352,11 @@ intr_event_handle(struct intr_event *ie, struct trapfr
 	}
 	critical_exit();
 	td->td_intr_nesting_level--;
+#ifdef notyet
+	/* The interrupt is not aknowledged by any filter and has no ithread. */
+	if (!thread && !filter)
+		return (EINVAL);
+#endif
 	return (0);
 }
 

Modified: head/sys/kern/subr_bus.c
==============================================================================
--- head/sys/kern/subr_bus.c	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/kern/subr_bus.c	Mon Dec 17 17:11:00 2018	(r342170)
@@ -4057,6 +4057,36 @@ bus_generic_teardown_intr(device_t dev, device_t child
 }
 
 /**
+ * @brief Helper function for implementing BUS_SUSPEND_INTR().
+ *
+ * This simple implementation of BUS_SUSPEND_INTR() simply calls the
+ * BUS_SUSPEND_INTR() method of the parent of @p dev.
+ */
+int
+bus_generic_suspend_intr(device_t dev, device_t child, struct resource *irq)
+{
+	/* Propagate up the bus hierarchy until someone handles it. */
+	if (dev->parent)
+		return (BUS_SUSPEND_INTR(dev->parent, child, irq));
+	return (EINVAL);
+}
+
+/**
+ * @brief Helper function for implementing BUS_RESUME_INTR().
+ *
+ * This simple implementation of BUS_RESUME_INTR() simply calls the
+ * BUS_RESUME_INTR() method of the parent of @p dev.
+ */
+int
+bus_generic_resume_intr(device_t dev, device_t child, struct resource *irq)
+{
+	/* Propagate up the bus hierarchy until someone handles it. */
+	if (dev->parent)
+		return (BUS_RESUME_INTR(dev->parent, child, irq));
+	return (EINVAL);
+}
+
+/**
  * @brief Helper function for implementing BUS_ADJUST_RESOURCE().
  *
  * This simple implementation of BUS_ADJUST_RESOURCE() simply calls the
@@ -4621,6 +4651,34 @@ bus_teardown_intr(device_t dev, struct resource *r, vo
 	if (dev->parent == NULL)
 		return (EINVAL);
 	return (BUS_TEARDOWN_INTR(dev->parent, dev, r, cookie));
+}
+
+/**
+ * @brief Wrapper function for BUS_SUSPEND_INTR().
+ *
+ * This function simply calls the BUS_SUSPEND_INTR() method of the
+ * parent of @p dev.
+ */
+int
+bus_suspend_intr(device_t dev, struct resource *r)
+{
+	if (dev->parent == NULL)
+		return (EINVAL);
+	return (BUS_SUSPEND_INTR(dev->parent, dev, r));
+}
+
+/**
+ * @brief Wrapper function for BUS_RESUME_INTR().
+ *
+ * This function simply calls the BUS_RESUME_INTR() method of the
+ * parent of @p dev.
+ */
+int
+bus_resume_intr(device_t dev, struct resource *r)
+{
+	if (dev->parent == NULL)
+		return (EINVAL);
+	return (BUS_RESUME_INTR(dev->parent, dev, r));
 }
 
 /**

Modified: head/sys/kern/subr_rman.c
==============================================================================
--- head/sys/kern/subr_rman.c	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/kern/subr_rman.c	Mon Dec 17 17:11:00 2018	(r342170)
@@ -94,6 +94,7 @@ struct resource_i {
 	rman_res_t	r_end;		/* index of the last entry (inclusive) */
 	u_int	r_flags;
 	void	*r_virtual;	/* virtual address of this resource */
+	void	*r_irq_cookie;	/* interrupt cookie for this (interrupt) resource */
 	device_t r_dev;	/* device which has allocated this resource */
 	struct rman *r_rm;	/* resource manager from whence this came */
 	int	r_rid;		/* optional rid for this resource. */
@@ -866,6 +867,20 @@ rman_get_virtual(struct resource *r)
 {
 
 	return (r->__r_i->r_virtual);
+}
+
+void
+rman_set_irq_cookie(struct resource *r, void *c)
+{
+
+	r->__r_i->r_irq_cookie = c;
+}
+
+void *
+rman_get_irq_cookie(struct resource *r)
+{
+
+	return (r->__r_i->r_irq_cookie);
 }
 
 void

Modified: head/sys/sys/bus.h
==============================================================================
--- head/sys/sys/bus.h	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/sys/bus.h	Mon Dec 17 17:11:00 2018	(r342170)
@@ -485,6 +485,10 @@ int	bus_generic_suspend(device_t dev);
 int	bus_generic_suspend_child(device_t dev, device_t child);
 int	bus_generic_teardown_intr(device_t dev, device_t child,
 				  struct resource *irq, void *cookie);
+int	bus_generic_suspend_intr(device_t dev, device_t child,
+				  struct resource *irq);
+int	bus_generic_resume_intr(device_t dev, device_t child,
+				  struct resource *irq);
 int	bus_generic_unmap_resource(device_t dev, device_t child, int type,
 				   struct resource *r,
 				   struct resource_map *map);
@@ -535,6 +539,8 @@ int	bus_setup_intr(device_t dev, struct resource *r, i
 		       driver_filter_t filter, driver_intr_t handler, 
 		       void *arg, void **cookiep);
 int	bus_teardown_intr(device_t dev, struct resource *r, void *cookie);
+int	bus_suspend_intr(device_t dev, struct resource *r);
+int	bus_resume_intr(device_t dev, struct resource *r);
 int	bus_bind_intr(device_t dev, struct resource *r, int cpu);
 int	bus_describe_intr(device_t dev, struct resource *irq, void *cookie,
 			  const char *fmt, ...) __printflike(4, 5);

Modified: head/sys/sys/interrupt.h
==============================================================================
--- head/sys/sys/interrupt.h	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/sys/interrupt.h	Mon Dec 17 17:11:00 2018	(r342170)
@@ -62,6 +62,8 @@ struct intr_handler {
 #define	IH_EXCLUSIVE	0x00000002	/* Exclusive interrupt. */
 #define	IH_ENTROPY	0x00000004	/* Device is a good entropy source. */
 #define	IH_DEAD		0x00000008	/* Handler should be removed. */
+#define	IH_SUSP		0x00000010	/* Device is powered down. */
+#define	IH_CHANGED	0x40000000	/* Handler state is changed. */
 #define	IH_MPSAFE	0x80000000	/* Handler does not need Giant. */
 
 /*
@@ -184,6 +186,8 @@ int	intr_event_describe_handler(struct intr_event *ie,
 int	intr_event_destroy(struct intr_event *ie);
 int	intr_event_handle(struct intr_event *ie, struct trapframe *frame);
 int	intr_event_remove_handler(void *cookie);
+int	intr_event_suspend_handler(void *cookie);
+int	intr_event_resume_handler(void *cookie);
 int	intr_getaffinity(int irq, int mode, void *mask);
 void	*intr_handler_source(void *cookie);
 int	intr_setaffinity(int irq, int mode, void *mask);

Modified: head/sys/sys/rman.h
==============================================================================
--- head/sys/sys/rman.h	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/sys/rman.h	Mon Dec 17 17:11:00 2018	(r342170)
@@ -131,6 +131,7 @@ bus_space_tag_t rman_get_bustag(struct resource *);
 rman_res_t	rman_get_end(struct resource *);
 device_t rman_get_device(struct resource *);
 u_int	rman_get_flags(struct resource *);
+void   *rman_get_irq_cookie(struct resource *);
 void	rman_get_mapping(struct resource *, struct resource_map *);
 int	rman_get_rid(struct resource *);
 rman_res_t	rman_get_size(struct resource *);
@@ -155,6 +156,7 @@ void	rman_set_bushandle(struct resource *_r, bus_space
 void	rman_set_bustag(struct resource *_r, bus_space_tag_t _t);
 void	rman_set_device(struct resource *_r, device_t _dev);
 void	rman_set_end(struct resource *_r, rman_res_t _end);
+void	rman_set_irq_cookie(struct resource *_r, void *_c);
 void	rman_set_mapping(struct resource *, struct resource_map *);
 void	rman_set_rid(struct resource *_r, int _rid);
 void	rman_set_start(struct resource *_r, rman_res_t _start);

Modified: head/sys/x86/x86/nexus.c
==============================================================================
--- head/sys/x86/x86/nexus.c	Mon Dec 17 16:01:37 2018	(r342169)
+++ head/sys/x86/x86/nexus.c	Mon Dec 17 17:11:00 2018	(r342170)
@@ -124,6 +124,8 @@ static	int nexus_setup_intr(device_t, device_t, struct
 			      void **);
 static	int nexus_teardown_intr(device_t, device_t, struct resource *,
 				void *);
+static	int nexus_suspend_intr(device_t, device_t, struct resource *);
+static	int nexus_resume_intr(device_t, device_t, struct resource *);
 static struct resource_list *nexus_get_reslist(device_t dev, device_t child);
 static	int nexus_set_resource(device_t, device_t, int, int,
 			       rman_res_t, rman_res_t);
@@ -161,6 +163,8 @@ static device_method_t nexus_methods[] = {
 	DEVMETHOD(bus_unmap_resource,	nexus_unmap_resource),
 	DEVMETHOD(bus_setup_intr,	nexus_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	nexus_teardown_intr),
+	DEVMETHOD(bus_suspend_intr,	nexus_suspend_intr),
+	DEVMETHOD(bus_resume_intr,	nexus_resume_intr),
 #ifdef SMP
 	DEVMETHOD(bus_bind_intr,	nexus_bind_intr),
 #endif
@@ -595,6 +599,8 @@ nexus_setup_intr(device_t bus, device_t child, struct 
 
 	error = intr_add_handler(device_get_nameunit(child),
 	    rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain);
+	if (error == 0)
+		rman_set_irq_cookie(irq, *cookiep);
 
 	return (error);
 }
@@ -602,7 +608,24 @@ nexus_setup_intr(device_t bus, device_t child, struct 
 static int
 nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih)
 {
-	return (intr_remove_handler(ih));
+	int error;
+
+	error = intr_remove_handler(ih);
+	if (error == 0)
+		rman_set_irq_cookie(r, NULL);
+	return (error);
+}
+
+static int
+nexus_suspend_intr(device_t dev, device_t child, struct resource *irq)
+{
+	return (intr_event_suspend_handler(rman_get_irq_cookie(irq)));
+}
+
+static int
+nexus_resume_intr(device_t dev, device_t child, struct resource *irq)
+{
+	return (intr_event_resume_handler(rman_get_irq_cookie(irq)));
 }
 
 #ifdef SMP



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201812171711.wBHHB0OO084416>