Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 14 Mar 2017 21:39:47 -0500
From:      "Mike Karels" <mike@karels.net>
To:        "Andrey V. Elsukov" <bu7cher@yandex.ru>
Cc:        freebsd-net@FreeBSD.org, "Alexander V. Chernikov" <melifaro@freebsd.org>,  "Eugene Grosbein" <eugen@freebsd.org>, karels@FreeBSD.org
Subject:   Re: LLE reference leak in the L2 cache
Message-ID:  <70D2287B-664C-48E4-9E8B-68B574BE6CE6@karels.net>
In-Reply-To: <3a4c5d87-d42e-5615-5d2b-2a8801376600@yandex.ru>
References:  <201703140840.v2E8ecH2040827@mail.karels.net> <3a4c5d87-d42e-5615-5d2b-2a8801376600@yandex.ru>

next in thread | previous in thread | raw e-mail | index | archive | help

--=_MailMate_E26E402B-89C4-40C9-81B5-0FB9D144830A_=
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
X-MIME-Autoconverted: from 8bit to quoted-printable by mail.karels.net id v2F2dvBA044590



On 14 Mar 2017, at 3:50, Andrey V. Elsukov wrote:

> On 14.03.2017 11:40, Mike Karels wrote:
>>> Hi All,
>>
>>> Eugene has reported about the following assertion in the ARP code:
>>> 	http://www.grosbein.net/freebsd/crash/arp-kassert.txt
>>
>>> After some investigation I found that L2 cache has reference leak, th=
at
>>> can lead to integer overflow and this assertion.
>>> The one of the ways to reproduce this overflow can be demonstrated wi=
th
>>> simple IP forwarding, when ip_forward() is used (not ip_tryforward).
>>
>>> I asked olivier@ to reproduce this leak and he got this result:
>>> 	http://slexy.org/view/s21ql7nA0q
>>
>>> After further investigation I found similar leak in the IPv6 TCP path.
>>> Simple iperf test shows these results:
>>
>>> # dtrace -n 'fbt::in6_lltable_dump_entry:entry {printf("%d",
>>> args[1]->lle_refcnt);}'
>>> dtrace: description 'fbt::in6_lltable_dump_entry:entry ' matched 1 pr=
obe
>>> CPU     ID                    FUNCTION:NAME
>>>  51  18589     in6_lltable_dump_entry:entry 55721
>>>  51  18589     in6_lltable_dump_entry:entry 1
>>>  51  18589     in6_lltable_dump_entry:entry 1
>>>  51  18589     in6_lltable_dump_entry:entry 2
>>>  38  18589     in6_lltable_dump_entry:entry 111417
>>>  38  18589     in6_lltable_dump_entry:entry 1
>>>  38  18589     in6_lltable_dump_entry:entry 1
>>
>>> --
>>> WBR, Andrey V. Elsukov
>>
>> Thanks!  Could you try the following patch (compiles, but untested):
>>
>> Index: netinet/ip_input.c
>> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>> --- netinet/ip_input.c	(revision 315160)
>> +++ netinet/ip_input.c	(working copy)
>> @@ -60,6 +60,7 @@
>>  #include <net/if_types.h>
>>  #include <net/if_var.h>
>>  #include <net/if_dl.h>
>> +#include <net/if_llatbl.h>
>>  #include <net/route.h>
>>  #include <net/netisr.h>
>>  #include <net/rss_config.h>
>> @@ -1066,6 +1067,8 @@
>>  	if (error =3D=3D EMSGSIZE && ro.ro_rt)
>>  		mtu =3D ro.ro_rt->rt_mtu;
>>  	RO_RTFREE(&ro);
>> +	if (ro.ro_lle)
>> +		LLE_FREE(ro.ro_lle);
>>
>>  	if (error)
>>  		IPSTAT_INC(ips_cantforward);
>
> I think it would be better to set RT_LLE_CACHE flag only for protocols
> that expect presence of L2 cache. I.e. only for the TCP and UDP and do
> it in the corresponding protocol output routine, not in the ip[6]_outpu=
t.

Hmm, let me think about that.  TCP and UDP know nothing about L2 cache,
they just use the in_pcb cache which handles it.  L3 caching was removed
earlier by someone who thought of it as a layering violation, which is wh=
y
I tried to keep in the IP layer for the most part.  I can probably find a
way to encapsulate it.

>
>> Index: netinet6/ip6_forward.c
>> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>> --- netinet6/ip6_forward.c	(revision 315160)
>> +++ netinet6/ip6_forward.c	(working copy)
>> @@ -52,6 +52,7 @@
>>  #include <net/if.h>
>>  #include <net/if_var.h>
>>  #include <net/netisr.h>
>> +#include <net/if_llatbl.h>
>>  #include <net/route.h>
>>  #include <net/pfil.h>
>>
>> @@ -431,4 +432,6 @@
>>  out:
>>  	if (rt !=3D NULL)
>>  		RTFREE(rt);
>> +	if (rin6.ro_lle)
>> +		LLE_FREE(rin6.ro_lle);
>>  }
>
> I don't think this chunk will help. ip6_forward() doesn't use
> ip6_output(). And IPv6 forwarding is not affected by this problem. Look
> at the tcp_output(), it uses local route variable for IPv6 output.

Ah, right, I obviously didn=E2=80=99t read closely enough earlier.  I=E2=80=
=99ve attached
a patch that should fix this, as well as adding route caching for TCP/IPv=
6.

> I'm not sure, but probably SCTP also can be affected by this problem.

Probably true.  SCTP could probably benefit from L2 caching, but this als=
o
argues for making this more transparent.

		Mike
--=_MailMate_E26E402B-89C4-40C9-81B5-0FB9D144830A_=
Content-Disposition: attachment; filename=patch
Content-Transfer-Encoding: quoted-printable

Index: netinet/ip_input.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- netinet/ip_input.c	(revision 315160)
+++ netinet/ip_input.c	(working copy)
@@ -60,6 +60,7 @@
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
+#include <net/if_llatbl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
@@ -1066,6 +1067,8 @@
 	if (error =3D=3D EMSGSIZE && ro.ro_rt)
 		mtu =3D ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
+	if (ro.ro_lle)
+		LLE_FREE(ro.ro_lle);
 =

 	if (error)
 		IPSTAT_INC(ips_cantforward);
Index: netinet/tcp_output.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- netinet/tcp_output.c	(revision 315160)
+++ netinet/tcp_output.c	(working copy)
@@ -1377,9 +1377,6 @@
 	 */
 #ifdef INET6
 	if (isipv6) {
-		struct route_in6 ro;
-
-		bzero(&ro, sizeof(ro));
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt.
@@ -1411,13 +1408,13 @@
 #endif
 =

 		/* TODO: IPv6 IP6TOS_ECT bit on */
-		error =3D ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
+		error =3D ip6_output(m, tp->t_inpcb->in6p_outputopts,
+		    &tp->t_inpcb->inp_route6,
 		    ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
 		    NULL, NULL, tp->t_inpcb);
 =

-		if (error =3D=3D EMSGSIZE && ro.ro_rt !=3D NULL)
-			mtu =3D ro.ro_rt->rt_mtu;
-		RO_RTFREE(&ro);
+		if (error =3D=3D EMSGSIZE && tp->t_inpcb->inp_route6.ro_rt !=3D NULL)
+			mtu =3D tp->t_inpcb->inp_route6.ro_rt->rt_mtu;
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)

--=_MailMate_E26E402B-89C4-40C9-81B5-0FB9D144830A_=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?70D2287B-664C-48E4-9E8B-68B574BE6CE6>