Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 9 Jun 2005 01:44:51 +0200
From:      Max Laier <max@love2party.net>
To:        "Grooms, Matthew" <MGrooms@seton.org>
Cc:        pf@freebsd.org, glebius@freebsd.org, freebsd-stable@freebsd.org, Palle Girgensohn <girgen@pingpong.net>, Kris Kennaway <kris@obsecurity.org>
Subject:   Re: 5.4-RELEASE lockups on amd64 SMP
Message-ID:  <200506090145.00312.max@love2party.net>
In-Reply-To: <28FCC7CB4CF6EA43AF83BCA2096E97D013E563@AUSEX2VS1.seton.org>
References:  <28FCC7CB4CF6EA43AF83BCA2096E97D013E563@AUSEX2VS1.seton.org>

next in thread | previous in thread | raw e-mail | index | archive | help
--nextPart1672662.2lLyzc9e6N
Content-Type: multipart/mixed;
  boundary="Boundary-01=_3L4pC7cVZTD1gQi"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

--Boundary-01=_3L4pC7cVZTD1gQi
Content-Type: text/plain;
  charset="iso-8859-6"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

On Thursday 09 June 2005 01:23, Grooms, Matthew wrote:
> Max,
>
>      With your patch applied, I get a panic very quickly during the boot
> cycle with output that looks like this ...

My bad, missed the mtx_init() ...=20
| @@ -216,6 +219,9 @@
|         callout_init(&sc->sc_tmo, 0);
|         callout_init(&sc->sc_bulk_tmo, 0);
|         callout_init(&sc->sc_bulkfail_tmo, 0);
| +       callout_init(&sc->sc_send_tmo, 0);
| +       mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
| +           MTX_DEF);
|         if_attach(&sc->sc_if);
| =20
|         LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);

Complete updated patch attached and uploaded to:
http://people.freebsd.org/~mlaier/if_pfsync.senddef5.diff

Sorry.

> net.inet.carp.preempt: 0 -> 1
> Setting hostname: ---.
> em: Link is up 100 Mbps Full Duplex
> panic: mtx_lock() of spin mutex (null) @ ../../../net/if.c:1983
> cpuid =3D 1
> KDB: enter: panic
> [thread pid 282 tid 100157 ]
> Stopped at      kdb_enter+0x2f: nop
> db> trace
> Tracing pid 282 tid 100157 td 0xffffff000af78280
> kdb_enter() at kdb_enter+0x2f
> panic() at panic+0x249
> _mtx_lock_flags() at _mtx_lock_flags+0xd6
> if_handoff() at if_handoff+0x49
> pfsync_sendout() at pfsync_sendout+0x268
> pfsyncioctl() at pfsyncioctl+0x497
> in_control() at in_control+0x8cb
> ifioctl() at ifioctl+0x178
> sooo_ioctl() at soo_ioctl+0x2d6
> ioctl() at ioctl+0xfc
> syscall() at syscall+0x4ab
> Xfast_syscall() at Xfast_syscall+0xa8
> --- syscall (54, FreeBSD ELF64, ioctl), rip =3D 0x800793340, rsp =3D
> 0x7fffffffeca8, rbp =3D 0x7fffffffef8b --- db> show locks
> eclusive sleep mutex pf task mtx r =3D 0 (0xffffffff80752f60) locked @
> contrib/pf/net/if_pfsync.c:973
>
> Rebooting the machine with the same kernel produces an identical panic. L=
et
> me know what else I can do to help. Right now I have just been rebooting
> back to a UP kernel which has never shown any sign of problems.
>
> Matthew Grooms
>
> -----Original Message-----
> From: Grooms, Matthew
> Sent: Wed 6/8/2005 6:22 PM
> To: Max Laier
> Cc: Palle Girgensohn; Kris Kennaway; freebsd-stable@freebsd.org;
> glebius@freebsd.org; pf@freebsd.org Subject: RE: 5.4-RELEASE lockups on
> amd64 SMP
>
> Matthew,
>
> can you try the attached diff.  Available for 5 and CURRENT.  I recall th=
at
> this problem was seen before, strange that I didn't see the problem.=20
> Sounds familiar to you?  Please try the patch and let me know if that
> helps.  Thanks a lot.
>
> On Wednesday 08 June 2005 01:35, Matthew Grooms wrote:
> > Once again, here are the backtraces for the panic and lor ...
> >
> > Tracing id 110 tid 100089 td 0xffffff012f3f0c80
> > kdb_enter() at kdb_enter+0x2f
> > panic() at panic+0x249
> > uma_dbg_free() at uma_dbg_free+0x188
> > uma_zfree_arg() at uma_zfree_arg+0x1b0
> > pf_purge_expired_states() at pf_purge_expired_states+0x41
> > pfsync_input at pfsync_input+xb35
> > pf_input() at ip_input+0x10f
> > netisr_processqueue() at netisr_processqueue+0x17
> > swi_net() at swi_net+0xa8
> > ithread_loop() at ithread_loop+0xd9
> > fork_exit() at fork_exit+0xc3
> > fork_trampoline() at fork_trampoline+0xe
> > --- trap 0, rip =3D 0, rsp =3D 0xffffffffb44f9d00, rbp =3D 0 ---
> > db> continue
> > boot() called on cpu#0
> > Uptime: 13h42m43s
> > Dumping 4864 MB
> >   16 32 ...
> >
> > lock order reversal
>
> ...
>
> > alltraps_with_regs_pushed() at alltraps_with_regs_pushed+0x5
> > pf_state_tree_lan_ext_RB_REMOVE() at
> > pf_state_tree_lan_ext_RB_REMOVE+0x10c
>
> This LOR is a consequence of the fault, so it can be disregarded.

=2D-=20
/"\  Best regards,                      | mlaier@freebsd.org
\ /  Max Laier                          | ICQ #67774661
 X   http://pf4freebsd.love2party.net/  | mlaier@EFnet
/ \  ASCII Ribbon Campaign              | Against HTML Mail and News

--Boundary-01=_3L4pC7cVZTD1gQi
Content-Type: text/x-diff; charset="iso-8859-6"; name="if_pfsync.senddef5.diff"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="if_pfsync.senddef5.diff"

Index: if_pfsync.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.c,v
retrieving revision 1.11.2.2
diff -u -r1.11.2.2 if_pfsync.c
=2D-- if_pfsync.c	19 May 2005 10:59:22 -0000	1.11.2.2
+++ if_pfsync.c	8 Jun 2005 23:42:45 -0000
@@ -130,6 +130,7 @@
=20
 static void	pfsync_clone_destroy(struct ifnet *);
 static int	pfsync_clone_create(struct if_clone *, int);
+static void	pfsync_senddef(void *);
 #else
 void	pfsyncattach(int);
 #endif
@@ -170,6 +171,8 @@
 	callout_stop(&sc->sc_bulk_tmo);
 	callout_stop(&sc->sc_bulkfail_tmo);
=20
+	callout_stop(&sc->sc_send_tmo);
+
 #if NBPFILTER > 0
         bpfdetach(ifp);
 #endif
@@ -216,6 +219,9 @@
 	callout_init(&sc->sc_tmo, 0);
 	callout_init(&sc->sc_bulk_tmo, 0);
 	callout_init(&sc->sc_bulkfail_tmo, 0);
+	callout_init(&sc->sc_send_tmo, 0);
+	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
+	    MTX_DEF);
 	if_attach(&sc->sc_if);
=20
 	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
@@ -913,6 +919,7 @@
 		if (pfsyncr.pfsyncr_maxupdates > 255)
 			return (EINVAL);
 #ifdef __FreeBSD__
+		callout_drain(&sc->sc_send_tmo);
 		PF_LOCK();
 #endif
 		sc->sc_maxupdates =3D pfsyncr.pfsyncr_maxupdates;
@@ -1634,15 +1641,14 @@
 #endif
=20
 		pfsyncstats.pfsyncs_opackets++;
=2D
 #ifdef __FreeBSD__
=2D		PF_UNLOCK();
=2D#endif
+		if (IF_HANDOFF(&sc->sc_ifq, m, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+		else
+			callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
+#else
 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
 			pfsyncstats.pfsyncs_oerrors++;
=2D
=2D#ifdef __FreeBSD__
=2D		PF_LOCK();
 #endif
 	} else
 		m_freem(m);
@@ -1652,6 +1658,22 @@
=20
=20
 #ifdef __FreeBSD__
+static void
+pfsync_senddef(void *arg)
+{
+	struct pfsync_softc *sc =3D (struct pfsync_softc *)arg;
+	struct mbuf *m;
+
+	for(;;) {
+		IF_DEQUEUE(&sc->sc_ifq, m);
+		if (m =3D=3D NULL)
+			break;
+		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+	}
+}
+
+
 static int
 pfsync_modevent(module_t mod, int type, void *data)
 {
Index: if_pfsync.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.h,v
retrieving revision 1.4
diff -u -r1.4 if_pfsync.h
=2D-- if_pfsync.h	16 Jun 2004 23:24:00 -0000	1.4
+++ if_pfsync.h	8 Jun 2005 23:42:59 -0000
@@ -158,8 +158,12 @@
 	struct timeout		 sc_bulkfail_tmo;
 #endif
 	struct in_addr		 sc_sendaddr;
=2D	struct mbuf		*sc_mbuf;	/* current cummulative mbuf */
=2D	struct mbuf		*sc_mbuf_net;	/* current cummulative mbuf */
+	struct mbuf		*sc_mbuf;	/* current cumulative mbuf */
+	struct mbuf		*sc_mbuf_net;	/* current cumulative mbuf */
+#ifdef __FreeBSD__
+	struct ifqueue		 sc_ifq;
+	struct callout		 sc_send_tmo;
+#endif
 	union sc_statep		 sc_statep;
 	union sc_statep		 sc_statep_net;
 	u_int32_t		 sc_ureq_received;

--Boundary-01=_3L4pC7cVZTD1gQi--

--nextPart1672662.2lLyzc9e6N
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (FreeBSD)

iD8DBQBCp4L8XyyEoT62BG0RAnthAJ9AmLndEOMBEkELsSzdDeFL0/2HPwCfVPy5
BibjHx55kNPwyxCAAXAQZTc=
=HmTx
-----END PGP SIGNATURE-----

--nextPart1672662.2lLyzc9e6N--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200506090145.00312.max>