Date: Tue, 15 Jan 2002 14:40:45 +0200 From: Ruslan Ermilov <ru@FreeBSD.ORG> To: Luigi Rizzo <rizzo@icir.org> Cc: chkno@dork.com, freebsd-ipfw@FreeBSD.ORG, Ari Suutari <ari@suutari.iki.fi>, Brian Somers <brian@FreeBSD.ORG> Subject: Re: ip_dummynet.c:"*** OUCH! pipe should have been idle!" Message-ID: <20020115144045.J46269@sunbay.com> In-Reply-To: <20020115021839.A74391@iguana.icir.org> References: <20020114141539.A70340@iguana.icir.org> <200201142246.g0EMkXC05128@chk.phattydomain.com> <20020114145849.A70496@iguana.icir.org> <20020115120531.C46269@sunbay.com> <20020115021839.A74391@iguana.icir.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--gj572EiMnwbLXET9 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Tue, Jan 15, 2002 at 02:18:40AM -0800, Luigi Rizzo wrote: > On Tue, Jan 15, 2002 at 12:05:31PM +0200, Ruslan Ermilov wrote: > > On Mon, Jan 14, 2002 at 02:58:49PM -0800, Luigi Rizzo wrote: > > > When the queue overflows, you get an ENOBUFS error, which natd > > > detects and handles by retransmitting the packet at a later time > > > (not a very good idea, I'd say that natd should just drop the packet > > > in this case. The "fix" might be trapping the ENOBUFS error in > > > usr/src/sbin/natd.c:FlushPacketBuffer() and returning as if the > > > transmission was successful). > > > > > ENOBUFS may be returned by sendto(2) when no socket buffer space > > is available. In this case, natd(8) retransmits the packet only > > after select(2) says that the socket is writable again. Dropping > > the packet in this case would be bogus, and natd.c has fixed this > > in revision 1.2. > > Sorry Ruslan, but I think you are wrong. > > For udp/datagram sockets, the i[output] socket buffer has no role > other than limit the max datagram size, and it is entirely bypassed > by the sendto/write routines. So, select() sees the socket as always > writable, and natd in this case loop forever. This is why i said > retrying is broken. And it is severely broken, indeed. > Right, this is well documented in Stevens TCP/IP Illustrated Vol. 2 in section 16.7, subsection "Unreliable Protocol Buffering". > Look at the sendto() manpage: > > ERRORS > ... > > [ENOBUFS] The system was unable to allocate an internal buffer. > The operation may succeed when buffers become avail- > able. > > [ENOBUFS] The output queue for a network interface was full. > This generally indicates that the interface has > stopped sending, but may be caused by transient con- > gestion. > > and also at the source code for sendto(). > Been there, confirmed that. Even for TCP, sendto() doesn't allocate a socket buffer space -- it's the responsibility of the protocol's output function, tcp_output(). > The reason why you cannot queue in the socket buffer is that there > is no way to trigger the transmission when the device becomes available > again -- for TCP this is supported by incoming acks or timeouts, but > there is no equivalent for UDP. > > Sure you could think of queueing the socket on the output device, but > apart that there is no infrastructure for that, which interface to > use depends on routing info, and then you might have multiple > ones because of multicast... > That makes pretty much sense, thanks for the info! Please find the patch attached (which backs the corresponding rev. 1.2 changes out). Also CC:ed to Brian Somers (who committed that change) and Ari Suutari who originally wrote natd(8). Cheers, Ruslan (who now wonders if the problem fixed in natd.c,v 1.2 was false). --gj572EiMnwbLXET9 Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename=p Index: natd.c =================================================================== RCS file: /home/ncvs/src/sbin/natd/natd.c,v retrieving revision 1.38 diff -u -p -r1.38 natd.c --- natd.c 2001/11/27 11:06:02 1.38 +++ natd.c 2002/01/15 12:37:04 @@ -97,7 +97,6 @@ static int StrToPortRange (const ch static int StrToProto (const char* str); static int StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, port_range *portRange); static void ParseArgs (int argc, char** argv); -static void FlushPacketBuffer (int fd); static void SetupPunchFW(const char *strValue); /* @@ -118,11 +117,6 @@ static int dynamicMode; static int ifMTU; static int aliasOverhead; static int icmpSock; -static char packetBuf[IP_MAXPACKET]; -static int packetLen; -static struct sockaddr_in packetAddr; -static int packetSock; -static int packetDirection; static int dropIgnoredIncoming; static int logDropped; static int logFacility; @@ -136,7 +130,6 @@ int main (int argc, char** argv) int routeSock; struct sockaddr_in addr; fd_set readMask; - fd_set writeMask; int fdMax; /* * Initialize packet aliasing software. @@ -162,11 +155,6 @@ int main (int argc, char** argv) logDropped = 0; logFacility = LOG_DAEMON; logIpfwDenied = -1; -/* - * Mark packet buffer empty. - */ - packetSock = -1; - packetDirection = DONT_KNOW; ParseArgs (argc, argv); /* @@ -330,7 +318,7 @@ int main (int argc, char** argv) while (running) { - if (divertInOut != -1 && !ifName && packetSock == -1) { + if (divertInOut != -1 && !ifName) { /* * When using only one socket, just call * DoAliasing repeatedly to process packets. @@ -342,30 +330,17 @@ int main (int argc, char** argv) * Build read mask from socket descriptors to select. */ FD_ZERO (&readMask); - FD_ZERO (&writeMask); - -/* - * If there is unsent packet in buffer, use select - * to check when socket comes writable again. - */ - if (packetSock != -1) { - - FD_SET (packetSock, &writeMask); - } - else { /* - * No unsent packet exists - safe to check if - * new ones are available. + * Check if new packets are available. */ - if (divertIn != -1) - FD_SET (divertIn, &readMask); + if (divertIn != -1) + FD_SET (divertIn, &readMask); - if (divertOut != -1) - FD_SET (divertOut, &readMask); + if (divertOut != -1) + FD_SET (divertOut, &readMask); - if (divertInOut != -1) - FD_SET (divertInOut, &readMask); - } + if (divertInOut != -1) + FD_SET (divertInOut, &readMask); /* * Routing info is processed always. */ @@ -374,8 +349,8 @@ int main (int argc, char** argv) if (select (fdMax + 1, &readMask, - &writeMask, NULL, + NULL, NULL) == -1) { if (errno == EINTR) @@ -384,10 +359,6 @@ int main (int argc, char** argv) Quit ("Select failed."); } - if (packetSock != -1) - if (FD_ISSET (packetSock, &writeMask)) - FlushPacketBuffer (packetSock); - if (divertIn != -1) if (FD_ISSET (divertIn, &readMask)) DoAliasing (divertIn, INPUT); @@ -470,9 +441,13 @@ static void DoAliasing (int fd, int dire { int bytes; int origBytes; + char buf[IP_MAXPACKET]; + struct sockaddr_in addr; + int wrote; int status; int addrSize; struct ip* ip; + char msgBuf[80]; if (assignAliasAddr) { @@ -482,12 +457,12 @@ static void DoAliasing (int fd, int dire /* * Get packet from socket. */ - addrSize = sizeof packetAddr; + addrSize = sizeof addr; origBytes = recvfrom (fd, - packetBuf, - sizeof packetBuf, + buf, + sizeof buf, 0, - (struct sockaddr*) &packetAddr, + (struct sockaddr*) &addr, &addrSize); if (origBytes == -1) { @@ -500,9 +475,9 @@ static void DoAliasing (int fd, int dire /* * This is a IP packet. */ - ip = (struct ip*) packetBuf; + ip = (struct ip*) buf; if (direction == DONT_KNOW) { - if (packetAddr.sin_addr.s_addr == INADDR_ANY) + if (addr.sin_addr.s_addr == INADDR_ANY) direction = OUTPUT; else direction = INPUT; @@ -541,14 +516,14 @@ static void DoAliasing (int fd, int dire /* * Outgoing packets. Do aliasing. */ - PacketAliasOut (packetBuf, IP_MAXPACKET); + PacketAliasOut (buf, IP_MAXPACKET); } else { /* * Do aliasing. */ - status = PacketAliasIn (packetBuf, IP_MAXPACKET); + status = PacketAliasIn (buf, IP_MAXPACKET); if (status == PKT_ALIAS_IGNORED && dropIgnoredIncoming) { @@ -583,42 +558,24 @@ static void DoAliasing (int fd, int dire printf ("\n"); } - packetLen = bytes; - packetSock = fd; - packetDirection = direction; - - FlushPacketBuffer (fd); -} - -static void FlushPacketBuffer (int fd) -{ - int wrote; - char msgBuf[80]; /* * Put packet back for processing. */ wrote = sendto (fd, - packetBuf, - packetLen, + buf, + bytes, 0, - (struct sockaddr*) &packetAddr, - sizeof packetAddr); + (struct sockaddr*) &addr, + sizeof addr); - if (wrote != packetLen) { -/* - * If buffer space is not available, - * just return. Main loop will take care of - * retrying send when space becomes available. - */ - if (errno == ENOBUFS) - return; + if (wrote != bytes) { if (errno == EMSGSIZE) { - if (packetDirection == OUTPUT && + if (direction == OUTPUT && ifMTU != -1) SendNeedFragIcmp (icmpSock, - (struct ip*) packetBuf, + (struct ip*) buf, ifMTU - aliasOverhead); } else if (errno == EACCES && logIpfwDenied) { @@ -627,8 +584,6 @@ static void FlushPacketBuffer (int fd) Warn (msgBuf); } } - - packetSock = -1; } static void HandleRoutingInfo (int fd) --gj572EiMnwbLXET9-- To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-ipfw" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020115144045.J46269>