From owner-freebsd-bugs@FreeBSD.ORG Sat May 22 22:10:02 2010 Return-Path: Delivered-To: freebsd-bugs@hub.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7BBC9106566C for ; Sat, 22 May 2010 22:10:02 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (unknown [IPv6:2001:4f8:fff6::28]) by mx1.freebsd.org (Postfix) with ESMTP id 3F5F78FC14 for ; Sat, 22 May 2010 22:10:02 +0000 (UTC) Received: from freefall.freebsd.org (localhost [127.0.0.1]) by freefall.freebsd.org (8.14.4/8.14.4) with ESMTP id o4MMA14Z044281 for ; Sat, 22 May 2010 22:10:01 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.14.4/8.14.4/Submit) id o4MMA1hl044280; Sat, 22 May 2010 22:10:01 GMT (envelope-from gnats) Resent-Date: Sat, 22 May 2010 22:10:01 GMT Resent-Message-Id: <201005222210.o4MMA1hl044280@freefall.freebsd.org> Resent-From: FreeBSD-gnats-submit@FreeBSD.org (GNATS Filer) Resent-To: freebsd-bugs@FreeBSD.org Resent-Reply-To: FreeBSD-gnats-submit@FreeBSD.org, Anton Lavrentiev Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A1D48106566B for ; Sat, 22 May 2010 22:05:29 +0000 (UTC) (envelope-from nobody@FreeBSD.org) Received: from www.freebsd.org (www.freebsd.org [IPv6:2001:4f8:fff6::21]) by mx1.freebsd.org (Postfix) with ESMTP id 906138FC12 for ; Sat, 22 May 2010 22:05:29 +0000 (UTC) Received: from www.freebsd.org (localhost [127.0.0.1]) by www.freebsd.org (8.14.3/8.14.3) with ESMTP id o4MM5TVQ074886 for ; Sat, 22 May 2010 22:05:29 GMT (envelope-from nobody@www.freebsd.org) Received: (from nobody@localhost) by www.freebsd.org (8.14.3/8.14.3/Submit) id o4MM5Trw074885; Sat, 22 May 2010 22:05:29 GMT (envelope-from nobody) Message-Id: <201005222205.o4MM5Trw074885@www.freebsd.org> Date: Sat, 22 May 2010 22:05:29 GMT From: Anton Lavrentiev To: freebsd-gnats-submit@FreeBSD.org X-Send-Pr-Version: www-3.1 Cc: Subject: misc/146845: close() return error 54 (connection reset by peer) wrongly X-BeenThere: freebsd-bugs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Bug reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 22 May 2010 22:10:02 -0000 >Number: 146845 >Category: misc >Synopsis: close() return error 54 (connection reset by peer) wrongly >Confidential: no >Severity: serious >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Sat May 22 22:10:01 UTC 2010 >Closed-Date: >Last-Modified: >Originator: Anton Lavrentiev >Release: 8.0 >Organization: NCBI/NLM/NIH >Environment: FreeBSD beastie3.ncbi.nlm.nih.gov 8.0-RELEASE-p2 FreeBSD 8.0-RELEASE-p2 #0: Tue Jan 5 16:02:27 UTC 2010 root@i386-builder.daemonology.net:/usr/obj/usr/src/sys/GENERIC i386 >Description: When an INET socket is being closed from two ends almost simultaneously, with both sides using a series of SO_LINGER, shutdown and close, and one side being slightly behind the other, the side closing last gets -1 from close() on such a socket, and errno 54, Connection reset by peer, even though there was no data in the socket (so it more likely seems that the FIN that the late shutdown() was assumed to send was counted towards the actual data, but it should not have). No other UNIX flavor (and we have many here at NCBI) exhibits this kind of problem. Also, the problem is not 100% reproducible on FreeBSD (but is pretty frequent, nonetheless). We have seen this very same problem on both version 6 and 8 of the OS (reporting here for 8). >How-To-Repeat: In the attached, there is a server-client C program that reproduces the problem (although, the program itself has to be started a few times before the actual error occurs). At the bottom of the attached C program, a ktrace of the communicating processes, and errno 54 coming out of close(). select(0,...) is used to slowdown one process to imitate some of its midwork before it is ready to proceed with actual socket disposal. If either shutdown() or setsockopt(SO_LINGER) are excluded from the code, the problem in close() seems to be gone. while ktrace -i ./a.out; do date; done kdump >Fix: There are workarounds (as to not to use either setsockopt(SO_LINGER) or shutdown(SHUT_WR), or both) but they do not fix the problem in general. Patch attached with submission follows: #include #include #include #include #include #include #include #include #include #define BUG int main(void) { char buf[40]; int a, s, len; struct sockaddr_in sin; struct linger lg; pid_t pid; if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return 1; } memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_port = htons(7890); if (bind(s, (struct sockaddr*) &sin, sizeof(sin)) < 0) { perror("bind"); return 1; } if (listen(s, 100) < 0) { perror("listen"); return 1; } if (pid = fork()) { len = sizeof(sin); memset(&sin, 0, sizeof(sin)); if ((a = accept(s, (struct sockaddr*) &sin, &len)) < 0) { perror("accept"); return 1; } close(s); s = a; } else { close(s); if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return 1; } if (connect(s, (struct sockaddr*) &sin, sizeof(sin)) < 0) { perror("connect"); return 1; } } if (write(s, "Hello\n", 6) < 6) { perror("write"); return 1; } if ((len = read(s, buf, sizeof(buf))) < 0) { perror("read"); return 1; } lg.l_linger = 5; lg.l_onoff = 1; if (setsockopt(s, SOL_SOCKET, SO_LINGER, &lg, sizeof(lg)) < 0) { perror("setsockopt"); return 1; } if (pid) { struct timeval tv = {0, 350000} ; select(0, 0, 0, 0, &tv); } #ifdef BUG if (shutdown(s, SHUT_WR) < 0) { perror("shutdown"); return 1; } #endif errno = 0; a = close(s); if (a < 0) perror("close"); printf("%.*s %d %d\n", len, buf, a, errno); return a; } #if 0 68009 a.out CALL socket(PF_INET,SOCK_STREAM,IPPROTO_IP) 68009 a.out RET socket 3 68009 a.out CALL bind(0x3,0xbfbfe9dc,0x10) 68009 a.out STRU struct sockaddr { AF_INET, 0.0.0.0:7890 } 68009 a.out RET bind 0 68009 a.out CALL listen(0x3,0x64) 68009 a.out RET listen 0 68009 a.out CALL fork 68009 a.out RET fork 68010/0x109aa 68010 a.out RET fork 0 68009 a.out CALL accept(0x3,0xbfbfe9dc,0xbfbfe9ec) 68010 a.out CALL close(0x3) 68010 a.out RET close 0 68010 a.out CALL socket(PF_INET,SOCK_STREAM,IPPROTO_IP) 68010 a.out RET socket 3 68010 a.out CALL connect(0x3,0xbfbfe9dc,0x10) 68010 a.out STRU struct sockaddr { AF_INET, 0.0.0.0:7890 } 68010 a.out RET connect 0 68010 a.out CALL write(0x3,0x8048bc3,0x6) 68010 a.out GIO fd 3 wrote 6 bytes "Hello " 68009 a.out STRU struct sockaddr { AF_INET, 127.0.0.1:12597 } 68010 a.out RET write 6 68009 a.out RET accept 4 68010 a.out CALL read(0x3,0xbfbfe9f0,0x28) 68009 a.out CALL close(0x3) 68009 a.out RET close 0 68009 a.out CALL write(0x4,0x8048bc3,0x6) 68009 a.out GIO fd 4 wrote 6 bytes "Hello " 68009 a.out RET write 6 68009 a.out CALL read(0x4,0xbfbfe9f0,0x28) 68010 a.out GIO fd 3 read 6 bytes "Hello " 68010 a.out RET read 6 68009 a.out GIO fd 4 read 6 bytes "Hello " 68009 a.out RET read 6 68010 a.out CALL setsockopt(0x3,SOL_SOCKET,SO_LINGER,0xbfbfe9d4,0x8) 68010 a.out RET setsockopt 0 68009 a.out CALL setsockopt(0x4,SOL_SOCKET,SO_LINGER,0xbfbfe9d4,0x8) 68009 a.out RET setsockopt 0 68010 a.out CALL shutdown(0x3,) 68009 a.out CALL select(0,0,0,0,0xbfbfe9cc) 68010 a.out RET shutdown 0 68010 a.out CALL close(0x3) 68010 a.out RET close 0 68010 a.out CALL write(0x1,0x28201000,0x6) 68010 a.out GIO fd 1 wrote 6 bytes "Hello " 68010 a.out RET write 6 68010 a.out CALL write(0x1,0x28201000,0x5) 68010 a.out GIO fd 1 wrote 5 bytes " 0 0 " 68010 a.out RET write 5 68010 a.out CALL exit(0) 68009 a.out RET select 0 68009 a.out CALL shutdown(0x4,) 68009 a.out RET shutdown 0 68009 a.out CALL close(0x4) 68009 a.out RET close -1 errno 54 Connection reset by peer <<<=== BUG!!! 68009 a.out CALL writev(0x2,0xbfbfe168,0x4) 68009 a.out GIO fd 2 wrote 32 bytes "close: Connection reset by peer " 68009 a.out RET writev 32/0x20 68009 a.out CALL write(0x1,0x28201000,0x6) 68009 a.out GIO fd 1 wrote 6 bytes "Hello " 68009 a.out RET write 6 68009 a.out CALL write(0x1,0x28201000,0x7) 68009 a.out GIO fd 1 wrote 7 bytes " -1 54 " 68009 a.out RET write 7 68009 a.out CALL exit(0xffffffff) #endif >Release-Note: >Audit-Trail: >Unformatted: