Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 15 Jan 2002 14:40:45 +0200
From:      Ruslan Ermilov <ru@FreeBSD.ORG>
To:        Luigi Rizzo <rizzo@icir.org>
Cc:        chkno@dork.com, freebsd-ipfw@FreeBSD.ORG, Ari Suutari <ari@suutari.iki.fi>, Brian Somers <brian@FreeBSD.ORG>
Subject:   Re: ip_dummynet.c:"*** OUCH! pipe should have been idle!"
Message-ID:  <20020115144045.J46269@sunbay.com>
In-Reply-To: <20020115021839.A74391@iguana.icir.org>
References:  <20020114141539.A70340@iguana.icir.org> <200201142246.g0EMkXC05128@chk.phattydomain.com> <20020114145849.A70496@iguana.icir.org> <20020115120531.C46269@sunbay.com> <20020115021839.A74391@iguana.icir.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--gj572EiMnwbLXET9
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Tue, Jan 15, 2002 at 02:18:40AM -0800, Luigi Rizzo wrote:
> On Tue, Jan 15, 2002 at 12:05:31PM +0200, Ruslan Ermilov wrote:
> > On Mon, Jan 14, 2002 at 02:58:49PM -0800, Luigi Rizzo wrote:
> > > When the queue overflows, you get an ENOBUFS error, which natd
> > > detects and handles by retransmitting the packet at a later time
> > > (not a very good idea, I'd say that natd should just drop the packet
> > > in this case. The "fix" might be trapping the ENOBUFS error in
> > > usr/src/sbin/natd.c:FlushPacketBuffer() and returning as if the
> > > transmission was successful).
> > > 
> > ENOBUFS may be returned by sendto(2) when no socket buffer space
> > is available.  In this case, natd(8) retransmits the packet only
> > after select(2) says that the socket is writable again.  Dropping
> > the packet in this case would be bogus, and natd.c has fixed this
> > in revision 1.2.
> 
> Sorry Ruslan, but I think you are wrong.
> 
> For udp/datagram sockets, the i[output] socket buffer has no role
> other than limit the max datagram size, and it is entirely bypassed
> by the sendto/write routines. So, select() sees the socket as always
> writable, and natd in this case loop forever. This is why i said
> retrying is broken. And it is severely broken, indeed.
> 
Right, this is well documented in Stevens TCP/IP Illustrated Vol. 2
in section 16.7, subsection "Unreliable Protocol Buffering".

> Look at the sendto() manpage:
> 
>    ERRORS
> 	...
> 
>      [ENOBUFS]          The system was unable to allocate an internal buffer.
>                         The operation may succeed when buffers become avail-
>                         able.
> 
>      [ENOBUFS]          The output queue for a network interface was full.
>                         This generally indicates that the interface has
>                         stopped sending, but may be caused by transient con-
>                         gestion.
> 
> and also at the source code for sendto().
> 
Been there, confirmed that.  Even for TCP, sendto() doesn't allocate
a socket buffer space -- it's the responsibility of the protocol's
output function, tcp_output().

> The reason why you cannot queue in the socket buffer is that there
> is no way to trigger the transmission when the device becomes available
> again -- for TCP this is supported by incoming acks or timeouts, but
> there is no equivalent for UDP.
> 
> Sure you could think of queueing the socket on the output device, but
> apart that there is no infrastructure for that, which interface to
> use depends on routing info, and then you might have multiple
> ones because of multicast...
> 
That makes pretty much sense, thanks for the info!

Please find the patch attached (which backs the corresponding
rev. 1.2 changes out).

Also CC:ed to Brian Somers (who committed that change) and
Ari Suutari who originally wrote natd(8).


Cheers,
Ruslan (who now wonders if the problem fixed in natd.c,v 1.2 was false).

--gj572EiMnwbLXET9
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=p

Index: natd.c
===================================================================
RCS file: /home/ncvs/src/sbin/natd/natd.c,v
retrieving revision 1.38
diff -u -p -r1.38 natd.c
--- natd.c	2001/11/27 11:06:02	1.38
+++ natd.c	2002/01/15 12:37:04
@@ -97,7 +97,6 @@ static int      StrToPortRange (const ch
 static int 	StrToProto (const char* str);
 static int      StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, port_range *portRange);
 static void	ParseArgs (int argc, char** argv);
-static void	FlushPacketBuffer (int fd);
 static void	SetupPunchFW(const char *strValue);
 
 /*
@@ -118,11 +117,6 @@ static 	int			dynamicMode;
 static  int			ifMTU;
 static	int			aliasOverhead;
 static 	int			icmpSock;
-static	char			packetBuf[IP_MAXPACKET];
-static 	int			packetLen;
-static	struct sockaddr_in	packetAddr;
-static 	int			packetSock;
-static 	int			packetDirection;
 static  int			dropIgnoredIncoming;
 static  int			logDropped;
 static	int			logFacility;
@@ -136,7 +130,6 @@ int main (int argc, char** argv)
 	int			routeSock;
 	struct sockaddr_in	addr;
 	fd_set			readMask;
-	fd_set			writeMask;
 	int			fdMax;
 /* 
  * Initialize packet aliasing software.
@@ -162,11 +155,6 @@ int main (int argc, char** argv)
  	logDropped		= 0;
  	logFacility		= LOG_DAEMON;
 	logIpfwDenied		= -1;
-/*
- * Mark packet buffer empty.
- */
-	packetSock		= -1;
-	packetDirection		= DONT_KNOW;
 
 	ParseArgs (argc, argv);
 /*
@@ -330,7 +318,7 @@ int main (int argc, char** argv)
 
 	while (running) {
 
-		if (divertInOut != -1 && !ifName && packetSock == -1) {
+		if (divertInOut != -1 && !ifName) {
 /*
  * When using only one socket, just call 
  * DoAliasing repeatedly to process packets.
@@ -342,30 +330,17 @@ int main (int argc, char** argv)
  * Build read mask from socket descriptors to select.
  */
 		FD_ZERO (&readMask);
-		FD_ZERO (&writeMask);
-
-/*
- * If there is unsent packet in buffer, use select
- * to check when socket comes writable again.
- */
-		if (packetSock != -1) {
-
-			FD_SET (packetSock, &writeMask);
-		}
-		else {
 /*
- * No unsent packet exists - safe to check if
- * new ones are available.
+ * Check if new packets are available.
  */
-			if (divertIn != -1)
-				FD_SET (divertIn, &readMask);
+		if (divertIn != -1)
+			FD_SET (divertIn, &readMask);
 
-			if (divertOut != -1)
-				FD_SET (divertOut, &readMask);
+		if (divertOut != -1)
+			FD_SET (divertOut, &readMask);
 
-			if (divertInOut != -1)
-				FD_SET (divertInOut, &readMask);
-		}
+		if (divertInOut != -1)
+			FD_SET (divertInOut, &readMask);
 /*
  * Routing info is processed always.
  */
@@ -374,8 +349,8 @@ int main (int argc, char** argv)
 
 		if (select (fdMax + 1,
 			    &readMask,
-			    &writeMask,
 			    NULL,
+			    NULL,
 			    NULL) == -1) {
 
 			if (errno == EINTR)
@@ -384,10 +359,6 @@ int main (int argc, char** argv)
 			Quit ("Select failed.");
 		}
 
-		if (packetSock != -1)
-			if (FD_ISSET (packetSock, &writeMask))
-				FlushPacketBuffer (packetSock);
-
 		if (divertIn != -1)
 			if (FD_ISSET (divertIn, &readMask))
 				DoAliasing (divertIn, INPUT);
@@ -470,9 +441,13 @@ static void DoAliasing (int fd, int dire
 {
 	int			bytes;
 	int			origBytes;
+	char			buf[IP_MAXPACKET];
+	struct sockaddr_in	addr;
+	int			wrote;
 	int			status;
 	int			addrSize;
 	struct ip*		ip;
+	char			msgBuf[80];
 
 	if (assignAliasAddr) {
 
@@ -482,12 +457,12 @@ static void DoAliasing (int fd, int dire
 /*
  * Get packet from socket.
  */
-	addrSize  = sizeof packetAddr;
+	addrSize  = sizeof addr;
 	origBytes = recvfrom (fd,
-			      packetBuf,
-			      sizeof packetBuf,
+			      buf,
+			      sizeof buf,
 			      0,
-			      (struct sockaddr*) &packetAddr,
+			      (struct sockaddr*) &addr,
 			      &addrSize);
 
 	if (origBytes == -1) {
@@ -500,9 +475,9 @@ static void DoAliasing (int fd, int dire
 /*
  * This is a IP packet.
  */
-	ip = (struct ip*) packetBuf;
+	ip = (struct ip*) buf;
 	if (direction == DONT_KNOW) {
-		if (packetAddr.sin_addr.s_addr == INADDR_ANY)
+		if (addr.sin_addr.s_addr == INADDR_ANY)
 			direction = OUTPUT;
 		else
 			direction = INPUT;
@@ -541,14 +516,14 @@ static void DoAliasing (int fd, int dire
 /*
  * Outgoing packets. Do aliasing.
  */
-		PacketAliasOut (packetBuf, IP_MAXPACKET);
+		PacketAliasOut (buf, IP_MAXPACKET);
 	}
 	else {
 
 /*
  * Do aliasing.
  */	
-		status = PacketAliasIn (packetBuf, IP_MAXPACKET);
+		status = PacketAliasIn (buf, IP_MAXPACKET);
 		if (status == PKT_ALIAS_IGNORED &&
 		    dropIgnoredIncoming) {
 
@@ -583,42 +558,24 @@ static void DoAliasing (int fd, int dire
 		printf ("\n");
 	}
 
-	packetLen  	= bytes;
-	packetSock 	= fd;
-	packetDirection = direction;
-
-	FlushPacketBuffer (fd);
-}
-
-static void FlushPacketBuffer (int fd)
-{
-	int			wrote;
-	char			msgBuf[80];
 /*
  * Put packet back for processing.
  */
 	wrote = sendto (fd, 
-		        packetBuf,
-	    		packetLen,
+		        buf,
+	    		bytes,
 	    		0,
-	    		(struct sockaddr*) &packetAddr,
-	    		sizeof packetAddr);
+	    		(struct sockaddr*) &addr,
+	    		sizeof addr);
 	
-	if (wrote != packetLen) {
-/*
- * If buffer space is not available,
- * just return. Main loop will take care of 
- * retrying send when space becomes available.
- */
-		if (errno == ENOBUFS)
-			return;
+	if (wrote != bytes) {
 
 		if (errno == EMSGSIZE) {
 
-			if (packetDirection == OUTPUT &&
+			if (direction == OUTPUT &&
 			    ifMTU != -1)
 				SendNeedFragIcmp (icmpSock,
-						  (struct ip*) packetBuf,
+						  (struct ip*) buf,
 						  ifMTU - aliasOverhead);
 		}
 		else if (errno == EACCES && logIpfwDenied) {
@@ -627,8 +584,6 @@ static void FlushPacketBuffer (int fd)
 			Warn (msgBuf);
 		}
 	}
-
-	packetSock = -1;
 }
 
 static void HandleRoutingInfo (int fd)

--gj572EiMnwbLXET9--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-ipfw" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020115144045.J46269>