Date: Tue, 15 Jan 2002 14:40:45 +0200 From: Ruslan Ermilov <ru@FreeBSD.ORG> To: Luigi Rizzo <rizzo@icir.org> Cc: chkno@dork.com, freebsd-ipfw@FreeBSD.ORG, Ari Suutari <ari@suutari.iki.fi>, Brian Somers <brian@FreeBSD.ORG> Subject: Re: ip_dummynet.c:"*** OUCH! pipe should have been idle!" Message-ID: <20020115144045.J46269@sunbay.com> In-Reply-To: <20020115021839.A74391@iguana.icir.org> References: <20020114141539.A70340@iguana.icir.org> <200201142246.g0EMkXC05128@chk.phattydomain.com> <20020114145849.A70496@iguana.icir.org> <20020115120531.C46269@sunbay.com> <20020115021839.A74391@iguana.icir.org>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
On Tue, Jan 15, 2002 at 02:18:40AM -0800, Luigi Rizzo wrote:
> On Tue, Jan 15, 2002 at 12:05:31PM +0200, Ruslan Ermilov wrote:
> > On Mon, Jan 14, 2002 at 02:58:49PM -0800, Luigi Rizzo wrote:
> > > When the queue overflows, you get an ENOBUFS error, which natd
> > > detects and handles by retransmitting the packet at a later time
> > > (not a very good idea, I'd say that natd should just drop the packet
> > > in this case. The "fix" might be trapping the ENOBUFS error in
> > > usr/src/sbin/natd.c:FlushPacketBuffer() and returning as if the
> > > transmission was successful).
> > >
> > ENOBUFS may be returned by sendto(2) when no socket buffer space
> > is available. In this case, natd(8) retransmits the packet only
> > after select(2) says that the socket is writable again. Dropping
> > the packet in this case would be bogus, and natd.c has fixed this
> > in revision 1.2.
>
> Sorry Ruslan, but I think you are wrong.
>
> For udp/datagram sockets, the i[output] socket buffer has no role
> other than limit the max datagram size, and it is entirely bypassed
> by the sendto/write routines. So, select() sees the socket as always
> writable, and natd in this case loop forever. This is why i said
> retrying is broken. And it is severely broken, indeed.
>
Right, this is well documented in Stevens TCP/IP Illustrated Vol. 2
in section 16.7, subsection "Unreliable Protocol Buffering".
> Look at the sendto() manpage:
>
> ERRORS
> ...
>
> [ENOBUFS] The system was unable to allocate an internal buffer.
> The operation may succeed when buffers become avail-
> able.
>
> [ENOBUFS] The output queue for a network interface was full.
> This generally indicates that the interface has
> stopped sending, but may be caused by transient con-
> gestion.
>
> and also at the source code for sendto().
>
Been there, confirmed that. Even for TCP, sendto() doesn't allocate
a socket buffer space -- it's the responsibility of the protocol's
output function, tcp_output().
> The reason why you cannot queue in the socket buffer is that there
> is no way to trigger the transmission when the device becomes available
> again -- for TCP this is supported by incoming acks or timeouts, but
> there is no equivalent for UDP.
>
> Sure you could think of queueing the socket on the output device, but
> apart that there is no infrastructure for that, which interface to
> use depends on routing info, and then you might have multiple
> ones because of multicast...
>
That makes pretty much sense, thanks for the info!
Please find the patch attached (which backs the corresponding
rev. 1.2 changes out).
Also CC:ed to Brian Somers (who committed that change) and
Ari Suutari who originally wrote natd(8).
Cheers,
Ruslan (who now wonders if the problem fixed in natd.c,v 1.2 was false).
[-- Attachment #2 --]
Index: natd.c
===================================================================
RCS file: /home/ncvs/src/sbin/natd/natd.c,v
retrieving revision 1.38
diff -u -p -r1.38 natd.c
--- natd.c 2001/11/27 11:06:02 1.38
+++ natd.c 2002/01/15 12:37:04
@@ -97,7 +97,6 @@ static int StrToPortRange (const ch
static int StrToProto (const char* str);
static int StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, port_range *portRange);
static void ParseArgs (int argc, char** argv);
-static void FlushPacketBuffer (int fd);
static void SetupPunchFW(const char *strValue);
/*
@@ -118,11 +117,6 @@ static int dynamicMode;
static int ifMTU;
static int aliasOverhead;
static int icmpSock;
-static char packetBuf[IP_MAXPACKET];
-static int packetLen;
-static struct sockaddr_in packetAddr;
-static int packetSock;
-static int packetDirection;
static int dropIgnoredIncoming;
static int logDropped;
static int logFacility;
@@ -136,7 +130,6 @@ int main (int argc, char** argv)
int routeSock;
struct sockaddr_in addr;
fd_set readMask;
- fd_set writeMask;
int fdMax;
/*
* Initialize packet aliasing software.
@@ -162,11 +155,6 @@ int main (int argc, char** argv)
logDropped = 0;
logFacility = LOG_DAEMON;
logIpfwDenied = -1;
-/*
- * Mark packet buffer empty.
- */
- packetSock = -1;
- packetDirection = DONT_KNOW;
ParseArgs (argc, argv);
/*
@@ -330,7 +318,7 @@ int main (int argc, char** argv)
while (running) {
- if (divertInOut != -1 && !ifName && packetSock == -1) {
+ if (divertInOut != -1 && !ifName) {
/*
* When using only one socket, just call
* DoAliasing repeatedly to process packets.
@@ -342,30 +330,17 @@ int main (int argc, char** argv)
* Build read mask from socket descriptors to select.
*/
FD_ZERO (&readMask);
- FD_ZERO (&writeMask);
-
-/*
- * If there is unsent packet in buffer, use select
- * to check when socket comes writable again.
- */
- if (packetSock != -1) {
-
- FD_SET (packetSock, &writeMask);
- }
- else {
/*
- * No unsent packet exists - safe to check if
- * new ones are available.
+ * Check if new packets are available.
*/
- if (divertIn != -1)
- FD_SET (divertIn, &readMask);
+ if (divertIn != -1)
+ FD_SET (divertIn, &readMask);
- if (divertOut != -1)
- FD_SET (divertOut, &readMask);
+ if (divertOut != -1)
+ FD_SET (divertOut, &readMask);
- if (divertInOut != -1)
- FD_SET (divertInOut, &readMask);
- }
+ if (divertInOut != -1)
+ FD_SET (divertInOut, &readMask);
/*
* Routing info is processed always.
*/
@@ -374,8 +349,8 @@ int main (int argc, char** argv)
if (select (fdMax + 1,
&readMask,
- &writeMask,
NULL,
+ NULL,
NULL) == -1) {
if (errno == EINTR)
@@ -384,10 +359,6 @@ int main (int argc, char** argv)
Quit ("Select failed.");
}
- if (packetSock != -1)
- if (FD_ISSET (packetSock, &writeMask))
- FlushPacketBuffer (packetSock);
-
if (divertIn != -1)
if (FD_ISSET (divertIn, &readMask))
DoAliasing (divertIn, INPUT);
@@ -470,9 +441,13 @@ static void DoAliasing (int fd, int dire
{
int bytes;
int origBytes;
+ char buf[IP_MAXPACKET];
+ struct sockaddr_in addr;
+ int wrote;
int status;
int addrSize;
struct ip* ip;
+ char msgBuf[80];
if (assignAliasAddr) {
@@ -482,12 +457,12 @@ static void DoAliasing (int fd, int dire
/*
* Get packet from socket.
*/
- addrSize = sizeof packetAddr;
+ addrSize = sizeof addr;
origBytes = recvfrom (fd,
- packetBuf,
- sizeof packetBuf,
+ buf,
+ sizeof buf,
0,
- (struct sockaddr*) &packetAddr,
+ (struct sockaddr*) &addr,
&addrSize);
if (origBytes == -1) {
@@ -500,9 +475,9 @@ static void DoAliasing (int fd, int dire
/*
* This is a IP packet.
*/
- ip = (struct ip*) packetBuf;
+ ip = (struct ip*) buf;
if (direction == DONT_KNOW) {
- if (packetAddr.sin_addr.s_addr == INADDR_ANY)
+ if (addr.sin_addr.s_addr == INADDR_ANY)
direction = OUTPUT;
else
direction = INPUT;
@@ -541,14 +516,14 @@ static void DoAliasing (int fd, int dire
/*
* Outgoing packets. Do aliasing.
*/
- PacketAliasOut (packetBuf, IP_MAXPACKET);
+ PacketAliasOut (buf, IP_MAXPACKET);
}
else {
/*
* Do aliasing.
*/
- status = PacketAliasIn (packetBuf, IP_MAXPACKET);
+ status = PacketAliasIn (buf, IP_MAXPACKET);
if (status == PKT_ALIAS_IGNORED &&
dropIgnoredIncoming) {
@@ -583,42 +558,24 @@ static void DoAliasing (int fd, int dire
printf ("\n");
}
- packetLen = bytes;
- packetSock = fd;
- packetDirection = direction;
-
- FlushPacketBuffer (fd);
-}
-
-static void FlushPacketBuffer (int fd)
-{
- int wrote;
- char msgBuf[80];
/*
* Put packet back for processing.
*/
wrote = sendto (fd,
- packetBuf,
- packetLen,
+ buf,
+ bytes,
0,
- (struct sockaddr*) &packetAddr,
- sizeof packetAddr);
+ (struct sockaddr*) &addr,
+ sizeof addr);
- if (wrote != packetLen) {
-/*
- * If buffer space is not available,
- * just return. Main loop will take care of
- * retrying send when space becomes available.
- */
- if (errno == ENOBUFS)
- return;
+ if (wrote != bytes) {
if (errno == EMSGSIZE) {
- if (packetDirection == OUTPUT &&
+ if (direction == OUTPUT &&
ifMTU != -1)
SendNeedFragIcmp (icmpSock,
- (struct ip*) packetBuf,
+ (struct ip*) buf,
ifMTU - aliasOverhead);
}
else if (errno == EACCES && logIpfwDenied) {
@@ -627,8 +584,6 @@ static void FlushPacketBuffer (int fd)
Warn (msgBuf);
}
}
-
- packetSock = -1;
}
static void HandleRoutingInfo (int fd)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020115144045.J46269>
