Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 01 Jul 2012 20:48:23 +0300
From:      Mikolaj Golub <trociny@freebsd.org>
To:        John Baldwin <jhb@freebsd.org>
Cc:        svn-src-head@freebsd.org, Bjoern Zeeb <bz@freebsd.org>, svn-src-all@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r228969 - head/sys/netinet
Message-ID:  <86sjdbnziw.fsf@kopusha.home.net>
In-Reply-To: <201204020848.04775.jhb@freebsd.org> (John Baldwin's message of "Mon, 2 Apr 2012 08:48:04 -0400")
References:  <201112292041.pBTKfGkj071711@svn.freebsd.org> <CAOnPXZ-JfJK-mJSSYpt3o4K5AD2rCXM3yXY6pXqEU_TfWMy%2BkA@mail.gmail.com> <201204020848.04775.jhb@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
--=-=-=


On Mon, 2 Apr 2012 08:48:04 -0400 John Baldwin wrote:

 JB> On Sunday, April 01, 2012 8:05:00 am Mikolaj Golub wrote:
 >> Hi,
 >> 
 >> On 12/29/11, John Baldwin <jhb@freebsd.org> wrote:
 >> > Author: jhb
 >> > Date: Thu Dec 29 20:41:16 2011
 >> > New Revision: 228969
 >> > URL: http://svn.freebsd.org/changeset/base/228969
 >> >
 >> > Log:
 >> >   Defer the work of freeing IPv4 multicast options from a socket to an
 >> >   asychronous task.  This avoids tearing down multicast state including
 >> >   sending IGMP leave messages and reprogramming MAC filters while holding
 >> >   the per-protocol global pcbinfo lock that is used in the receive path of
 >> >   packet processing.
 >> >
 >> >   Reviewed by:        rwatson
 >> >   MFC after:        1 month
 >> >
 >> > Modified:
 >> >   head/sys/netinet/in_mcast.c
 >> >   head/sys/netinet/ip_var.h
 >> >
 >> > Modified: head/sys/netinet/in_mcast.c
 >> > ==============================================================================
 >> > --- head/sys/netinet/in_mcast.c        Thu Dec 29 19:01:29 2011        (r228968)
 >> > +++ head/sys/netinet/in_mcast.c        Thu Dec 29 20:41:16 2011        (r228969)
 >> > @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 >> >  #include <sys/protosw.h>
 >> >  #include <sys/sysctl.h>
 >> >  #include <sys/ktr.h>
 >> > +#include <sys/taskqueue.h>
 >> >  #include <sys/tree.h>
 >> >
 >> >  #include <net/if.h>
 >> > @@ -144,6 +145,8 @@ static void        inm_purge(struct in_multi *)
 >> >  static void        inm_reap(struct in_multi *);
 >> >  static struct ip_moptions *
 >> >                  inp_findmoptions(struct inpcb *);
 >> > +static void        inp_freemoptions_internal(struct ip_moptions *);
 >> > +static void        inp_gcmoptions(void *, int);
 >> >  static int        inp_get_source_filters(struct inpcb *, struct sockopt *);
 >> >  static int        inp_join_group(struct inpcb *, struct sockopt *);
 >> >  static int        inp_leave_group(struct inpcb *, struct sockopt *);
 >> > @@ -179,6 +182,10 @@ static SYSCTL_NODE(_net_inet_ip_mcast, O
 >> >      CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
 >> >      "Per-interface stack-wide source filters");
 >> >
 >> > +static STAILQ_HEAD(, ip_moptions) imo_gc_list =
 >> > +    STAILQ_HEAD_INITIALIZER(imo_gc_list);
 >> > +static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
 >> > +
 >> >  /*
 >> >   * Inline function which wraps assertions for a valid ifp.
 >> >   * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
 >> > @@ -1518,17 +1525,29 @@ inp_findmoptions(struct inpcb *inp)
 >> >  }
 >> >
 >> >  /*
 >> > - * Discard the IP multicast options (and source filters).
 >> > + * Discard the IP multicast options (and source filters).  To minimize
 >> > + * the amount of work done while holding locks such as the INP's
 >> > + * pcbinfo lock (which is used in the receive path), the free
 >> > + * operation is performed asynchronously in a separate task.
 >> >   *
 >> >   * SMPng: NOTE: assumes INP write lock is held.
 >> >   */
 >> >  void
 >> >  inp_freemoptions(struct ip_moptions *imo)
 >> >  {
 >> > -        struct in_mfilter        *imf;
 >> > -        size_t                         idx, nmships;
 >> >
 >> >          KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
 >> > +        IN_MULTI_LOCK();
 >> > +        STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
 >> > +        IN_MULTI_UNLOCK();
 >> > +        taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
 >> > +}
 >> > +
 >> > +static void
 >> > +inp_freemoptions_internal(struct ip_moptions *imo)
 >> > +{
 >> > +        struct in_mfilter        *imf;
 >> > +        size_t                         idx, nmships;
 >> >
 >> >          nmships = imo->imo_num_memberships;
 >> >          for (idx = 0; idx < nmships; ++idx) {
 >> > @@ -1546,6 +1565,22 @@ inp_freemoptions(struct ip_moptions *imo
 >> >          free(imo, M_IPMOPTS);
 >> >  }
 >> >
 >> > +static void
 >> > +inp_gcmoptions(void *context, int pending)
 >> > +{
 >> > +        struct ip_moptions *imo;
 >> > +
 >> > +        IN_MULTI_LOCK();
 >> > +        while (!STAILQ_EMPTY(&imo_gc_list)) {
 >> > +                imo = STAILQ_FIRST(&imo_gc_list);
 >> > +                STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
 >> > +                IN_MULTI_UNLOCK();
 >> > +                inp_freemoptions_internal(imo);
 >> > +                IN_MULTI_LOCK();
 >> > +        }
 >> > +        IN_MULTI_UNLOCK();
 >> > +}
 >> > +
 >> >  /*
 >> >   * Atomically get source filters on a socket for an IPv4 multicast group.
 >> >   * Called with INP lock held; returns with lock released.
 >> >
 >> > Modified: head/sys/netinet/ip_var.h
 >> > ==============================================================================
 >> > --- head/sys/netinet/ip_var.h        Thu Dec 29 19:01:29 2011        (r228968)
 >> > +++ head/sys/netinet/ip_var.h        Thu Dec 29 20:41:16 2011        (r228969)
 >> > @@ -93,6 +93,7 @@ struct ip_moptions {
 >> >          u_short        imo_max_memberships;        /* max memberships this socket */
 >> >          struct        in_multi **imo_membership;        /* group memberships */
 >> >          struct        in_mfilter *imo_mfilters;        /* source filters */
 >> > +        STAILQ_ENTRY(ip_moptions) imo_link;
 >> >  };
 >> >
 >> >  struct        ipstat {
 >> >
 >> 
 >> I have been observing panics like below after recent upgrade on VIMAGE kernel:
 >> 
 >> #0  doadump (textdump=-2022567936) at pcpu.h:244
 >> #1  0x8051b739 in db_fncall (dummy1=1, dummy2=0, dummy3=-2127531040,
 >> dummy4=0x872b2920 "")
 >>     at /home/golub/freebsd/base/head/sys/ddb/db_command.c:573
 >> #2  0x8051bb31 in db_command (last_cmdp=0x8112eefc, cmd_table=0x0, dopager=1)
 >>     at /home/golub/freebsd/base/head/sys/ddb/db_command.c:449
 >> #3  0x8051bc8a in db_command_loop () at
 >> /home/golub/freebsd/base/head/sys/ddb/db_command.c:502
 >> #4  0x8051dc7d in db_trap (type=12, code=0)
 >>     at /home/golub/freebsd/base/head/sys/ddb/db_main.c:229
 >> #5  0x80a82566 in kdb_trap (type=12, code=0, tf=0x872b2bbc)
 >>     at /home/golub/freebsd/base/head/sys/kern/subr_kdb.c:629
 >> #6  0x80ddd26f in trap_fatal (frame=0x872b2bbc, eva=24)
 >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:1014
 >> #7  0x80ddd347 in trap_pfault (frame=0x872b2bbc, usermode=0, eva=24)
 >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:835
 >> #8  0x80dde411 in trap (frame=0x872b2bbc)
 >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:547
 >> #9  0x80dc7c6c in calltrap () at
 >> /home/golub/freebsd/base/head/sys/i386/i386/exception.s:169
 >> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
 >>     at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
 >> #11 0x80b76f68 in in_leavegroup_locked (inm=0x8ae70480, imf=0x8a655a00)
 >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1239
 >> #12 0x80b76fbd in in_leavegroup (inm=0x8ae70480, imf=0x8a655a00)
 >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1184
 >> #13 0x80b770b4 in inp_gcmoptions (context=0x0, pending=1)
 >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1554
 >> #14 0x80a8ff2b in taskqueue_run_locked (queue=0x87594880)
 >>     at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:308
 >> #15 0x80a90987 in taskqueue_thread_loop (arg=0x81186bcc)
 >>     at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:497
 >> #16 0x80a1b2d8 in fork_exit (callout=0x80a90920
 >> <taskqueue_thread_loop>, arg=0x81186bcc,
 >>     frame=0x872b2d28) at /home/golub/freebsd/base/head/sys/kern/kern_fork.c:992
 >> #17 0x80dc7d14 in fork_trampoline ()
 >>     at /home/golub/freebsd/base/head/sys/i386/i386/exception.s:276
 >> (kgdb) fr 10
 >> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
 >>     at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
 >> 2595                                    V_state_change_timers_running = 1;
 >> 
 >> (kgdb) l
 >> 2590                                        ("%s: enqueue record =
 >> %d", __func__,
 >> 2591                                         retval));
 >> 2592
 >> 2593                                    inm->inm_state = IGMP_LEAVING_MEMBER;
 >> 2594                                    inm->inm_sctimer = 1;
 >> 2595                                    V_state_change_timers_running = 1;
 >> 2596                                    syncstates = 0;
 >> 2597                            }
 >> 2598                            break;
 >> 2599                    }
 >> 
 >> VNET context is not set at that point.
 >> 
 >> The attached patch fixes the issue for me. Not sure about inm->inm_ifp
 >> != NULL assumption but I need interface to get vnet :-).

 JB> I will to defer to bz@ (cc'd) on how best to fix this.  Another option would
 JB> be to save the current vnet in the 'ip_moptions' struct (would have to add a
 JB> new field) when queueing this imo to be free'd via the task.  You could
 JB> then do the curvnet set/restore at a higher level without any locks held, etc.

Hi, do you have any news here? I would really appreciate to have this fixed in
any way, as currently to avoid the crash I always have to remember to apply
the patch when compiling VIMAGE kernels.

Here is another version of the patch. It fixes it in the way suggested above
(storing vnet in ip_moptions).

The thing that worries me though is the case when vnet is destroyed and we
still have options that refer it in the queue. I expect panic in this case.

BTW, isn't the same problem with stale pointer dereferencing possible when
removing interface?

-- 
Mikolaj Golub


--=-=-=
Content-Type: text/x-patch
Content-Disposition: inline; filename=in_mcast.c.inp_gcmoptions.1.patch

Index: sys/netinet/ip_var.h
===================================================================
--- sys/netinet/ip_var.h	(revision 237918)
+++ sys/netinet/ip_var.h	(working copy)
@@ -93,6 +93,7 @@ struct ip_moptions {
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
+	struct	vnet *imo_vnet;		/* pointer to network stack instance */
 	STAILQ_ENTRY(ip_moptions) imo_link;
 };
 
Index: sys/netinet/in_mcast.c
===================================================================
--- sys/netinet/in_mcast.c	(revision 237918)
+++ sys/netinet/in_mcast.c	(working copy)
@@ -1504,6 +1504,7 @@ inp_findmoptions(struct inpcb *inp)
 	imo->imo_num_memberships = 0;
 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	imo->imo_membership = immp;
+	imo->imo_vnet = NULL;
 
 	/* Initialize per-group source filters. */
 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
@@ -1535,6 +1536,7 @@ inp_freemoptions(struct ip_moptions *imo)
 
 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
 	IN_MULTI_LOCK();
+	imo->imo_vnet = curvnet;
 	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
 	IN_MULTI_UNLOCK();
 	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
@@ -1572,7 +1574,9 @@ inp_gcmoptions(void *context, int pending)
 		imo = STAILQ_FIRST(&imo_gc_list);
 		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
 		IN_MULTI_UNLOCK();
+		CURVNET_SET(imo->imo_vnet);
 		inp_freemoptions_internal(imo);
+		CURVNET_RESTORE();
 		IN_MULTI_LOCK();
 	}
 	IN_MULTI_UNLOCK();

--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?86sjdbnziw.fsf>