Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 18 Sep 2004 15:50:51 -0700
From:      Brooks Davis <brooks@one-eyed-alien.net>
To:        Brooks Davis <brooks@one-eyed-alien.net>
Cc:        re@FreeBSD.org
Subject:   Re: 5.3-RELEASE TODO
Message-ID:  <20040918225051.GA31249@odin.ac.hmc.edu>
In-Reply-To: <20040918212441.GA22566@odin.ac.hmc.edu>
References:  <20040918171216.GA27533@odin.ac.hmc.edu> <Pine.NEB.3.96L.1040918134100.91851A-100000@fledge.watson.org> <20040918212441.GA22566@odin.ac.hmc.edu>

next in thread | previous in thread | raw e-mail | index | archive | help

--UugvWAfsgieZRqgk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Sat, Sep 18, 2004 at 02:24:41PM -0700, Brooks Davis wrote:
> On Sat, Sep 18, 2004 at 01:42:24PM -0400, Robert Watson wrote:
> >=20
> > On Sat, 18 Sep 2004, Brooks Davis wrote:
> >=20
> > > > Have you tried seeing just how many addresses you can add before
> > > > getifaddrs() fails to return the complete list?  128k seems like a =
lot,
> > > > but I instrumente ifconf() locally a couple of weeks ago when I fir=
st
> > > > became aware of this problem, and discovered that even on my notebo=
ok
> > > > (which has a wireless card with one IP, and an unused ethernet card=
) that
> > > > I see moderately large buffers being read from user space:
> > > >=20
> > > > ifconf: 16384 space
> > >=20
> > > Those allocations don't seem to make any sense.  The actual space
> > > required is quite small.  All you do is copy one struct ifreq out for
> > > each address, plus one for each interface with no addresses.  The base
> > > size of a struct ifreq is 32 bytes and it extends to 34 for IPv6
> > > addresses.  The maximum size allowed by the data types is 273 (for a =
255
> > > byte address).  Since I think IPv6 are the largest addresses used in
> > > practice, MAXPHYS is probably not too bad, though it does put a new c=
ap
> > > on the number of interfaces at ~4k.=20
> > >=20
> > > If we want to keep kernel allocations small and allow all the itnerfa=
ces
> > > to be reliably reported, we probably need to go back to my origional
> > > plan where we loop repeatidly.  I might do it differently by allocati=
ng
> > > up to MAXPHYS and only reallocating if we overflow.  That would avoid
> > > doing it twice (or more) on normal machines while still being correct=
.=20
> >=20
> > I'm not too worried about theory, mostly about practice.  I.e., if you =
add
> > a few thousand IP addresses to a tap device, does all go happily?
>=20
> After adding the missing sbuf_delete(), I was able to create 2000
> aliases on a vlan.  I think the current practical limit is ~4090.
> I'm working on a new version that initially caps the allocation at
> MAXPHYS and then retries if it doesn't have enough space.

Here's a new an improved patch that let me allocate 8128 aliases on a
vlan.  It's a bit ugly because I had to fall back to looping if we don't
allocate enough space, but I think this is as good as it gets for this
rather poorly designed interface.

It's worth noting that the userland code that guesses the buffer size is
really bogus in this situation.  It guesses very badly (a few K when it
needs >100k) and then compounds the error by doing a linear search for
the right buffer size.  This means that instead of being O(addresses),
it's actully O(addresses^2).

-- Brooks

--- /home/brooks/working/freebsd/p4/freebsd/sys/net/if.c	Fri Sep 17 22:11:1=
3 2004
+++ sys/net/if.c	Sat Sep 18 15:41:03 2004
@@ -36,9 +36,11 @@
 #include "opt_mac.h"
=20
 #include <sys/param.h>
+#include <sys/types.h>
 #include <sys/conf.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
+#include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
@@ -1483,28 +1485,34 @@
 	struct ifconf *ifc =3D (struct ifconf *)data;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
-	struct ifreq ifr, *ifrp;
-	int space =3D ifc->ifc_len, error =3D 0;
+	struct ifreq ifr;
+	struct sbuf *sb;
+	int error, full =3D 0, valid_len, max_len;
+
+	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
+	max_len =3D MAXPHYS - 1;
+
+again:
+	if (ifc->ifc_len <=3D max_len) {
+		max_len =3D ifc->ifc_len;
+		full =3D 1;
+	}
+	sb =3D sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
+	max_len =3D 0;
+	valid_len =3D 0;
=20
-	ifrp =3D ifc->ifc_req;
 	IFNET_RLOCK();		/* could sleep XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		int addrs;
=20
-		if (space < sizeof(ifr))
-			break;
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
-		    >=3D sizeof(ifr.ifr_name)) {
-			error =3D ENAMETOOLONG;
-			break;
-		}
+		    >=3D sizeof(ifr.ifr_name))
+			return (ENAMETOOLONG);
=20
 		addrs =3D 0;
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa =3D ifa->ifa_addr;
=20
-			if (space < sizeof(ifr))
-				break;
 			if (jailed(curthread->td_ucred) &&
 			    prison_if(curthread->td_ucred, sa))
 				continue;
@@ -1515,47 +1523,49 @@
 					 (struct osockaddr *)&ifr.ifr_addr;
 				ifr.ifr_addr =3D *sa;
 				osa->sa_family =3D sa->sa_family;
-				error =3D copyout((caddr_t)&ifr, (caddr_t)ifrp,
-						sizeof (ifr));
-				ifrp++;
+				sbuf_bcat(sb, &ifr, sizeof(ifr));
+				max_len +=3D sizeof(ifr);
 			} else
 #endif
 			if (sa->sa_len <=3D sizeof(*sa)) {
 				ifr.ifr_addr =3D *sa;
-				error =3D copyout((caddr_t)&ifr, (caddr_t)ifrp,
-						sizeof (ifr));
-				ifrp++;
+				sbuf_bcat(sb, &ifr, sizeof(ifr));
+				max_len +=3D sizeof(ifr);
 			} else {
-				if (space < sizeof (ifr) + sa->sa_len -
-					    sizeof(*sa))
-					break;
-				space -=3D sa->sa_len - sizeof(*sa);
-				error =3D copyout((caddr_t)&ifr, (caddr_t)ifrp,
-						sizeof (ifr.ifr_name));
-				if (error =3D=3D 0)
-				    error =3D copyout((caddr_t)sa,
-				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
-				ifrp =3D (struct ifreq *)
-					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
+				sbuf_bcat(sb, &ifr,
+				    offsetof(struct ifreq, ifr_addr));
+				max_len +=3D offsetof(struct ifreq, ifr_addr);
+				sbuf_bcat(sb, sa, sa->sa_len);
+				max_len +=3D sa->sa_len;
 			}
-			if (error)
-				break;
-			space -=3D sizeof (ifr);
+
+			if (!sbuf_overflowed(sb))
+				valid_len =3D sbuf_len(sb);
 		}
-		if (error)
-			break;
-		if (!addrs) {
+		if (addrs =3D=3D 0) {
 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
-			error =3D copyout((caddr_t)&ifr, (caddr_t)ifrp,
-			    sizeof (ifr));
-			if (error)
-				break;
-			space -=3D sizeof (ifr);
-			ifrp++;
+			sbuf_bcat(sb, &ifr, sizeof(ifr));
+			max_len +=3D sizeof(ifr);
+
+			if (!sbuf_overflowed(sb))
+				valid_len =3D sbuf_len(sb);
 		}
 	}
 	IFNET_RUNLOCK();
-	ifc->ifc_len -=3D space;
+
+	/*
+	 * If we didn't allocate enough space (uncommon), try again.  If
+	 * we have already allocated as much space as we are allowed,
+	 * return what we've got.
+	 */
+	if (valid_len !=3D max_len && !full) {
+		sbuf_delete(sb);
+		goto again;
+	}
+
+	ifc->ifc_len =3D valid_len;
+	error =3D copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
+	sbuf_delete(sb);
 	return (error);
 }
=20

--=20
Any statement of the form "X is the one, true Y" is FALSE.
PGP fingerprint 655D 519C 26A7 82E7 2529  9BF0 5D8E 8BE9 F238 1AD4

--UugvWAfsgieZRqgk
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.1 (GNU/Linux)

iD8DBQFBTLvLXY6L6fI4GtQRAv2MAJ4h2YoKwo109ygXSBdTpzuOFkAEjACfQ3Eo
zysJdFMj4n3DUNyT88Y6bv8=
=UGmm
-----END PGP SIGNATURE-----

--UugvWAfsgieZRqgk--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040918225051.GA31249>