Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Feb 2010 14:23:13 GMT
From:      Mikolaj Golub <to.my.trociny@gmail.com>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   kern/144061: race on unix socket close
Message-ID:  <201002181423.o1IENDMJ077444@www.freebsd.org>
Resent-Message-ID: <201002181430.o1IEU2UQ099628@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         144061
>Category:       kern
>Synopsis:       race on unix socket close
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Feb 18 14:30:02 UTC 2010
>Closed-Date:
>Last-Modified:
>Originator:     Mikolaj Golub
>Release:        8.0-STABLE, 7.2-RELEASE-p6
>Organization:
>Environment:
FreeBSD zhuzha.ua1 8.0-STABLE FreeBSD 8.0-STABLE #8: Thu Feb 18 15:48:46 EET 2010     root@zhuzha.ua1:/usr/obj/usr/src/sys/GENERIC  i386
>Description:
This issue was dissussed in freebsd-hacker@, the subject "unix socket: race on close?"

http://lists.freebsd.org/pipermail/freebsd-hackers/2010-February/030741.html

Below is a simple test code with unix sockets: the client does
connect()/close() in loop and the server -- accept()/close().

--------------------------------------------------------------------------------

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <unistd.h>
#include <sys/select.h>
#include <err.h>

#define UNIXSTR_PATH "/tmp/mytest.socket"
#define USLEEP  100

int main(int argc, char **argv)
{
	int			listenfd, connfd, pid;
	struct sockaddr_un	servaddr;
	
	pid = fork();
	if (-1 == pid)
		errx(1, "fork(): %d", errno);

	if (0 != pid) { /* parent */

		if ((listenfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0)
			errx(1, "parent: socket error: %d", errno);

		unlink(UNIXSTR_PATH);
		bzero(&servaddr, sizeof(servaddr));
		servaddr.sun_family = AF_LOCAL;
		strcpy(servaddr.sun_path, UNIXSTR_PATH);

		if (bind(listenfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0)
			errx(1, "parent: bind error: %d", errno);

		if (listen(listenfd, 1024) < 0)
			errx(1, "parent: listen error: %d", errno);
		
		for ( ; ; ) {
			if ((connfd = accept(listenfd, (struct sockaddr *) NULL, NULL)) < 0)
				errx(1, "parent: accept error: %d", errno);

			//usleep(USLEEP / 2); // (I) uncomment this or (II) below to avoid the race
			
	        	if (close(connfd) < 0)
				errx(1, "parent: close error: %d", errno);
		}
		
	} else { /* child */

		sleep(1); /* give the parent some time to create the socket */

		for ( ; ; ) {

			if ((connfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0)
				errx(1, "child: socket error: %d", errno);

			bzero(&servaddr, sizeof(servaddr));
			servaddr.sun_family = AF_LOCAL;
			strcpy(servaddr.sun_path, UNIXSTR_PATH);

			if (connect(connfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0)
				errx(1, "child: connect error %d", errno);
			
			// usleep(USLEEP); // (II) uncomment this or (I) above to avoid the race

			if (close(connfd) != 0) 
				errx(1, "child: close error: %d", errno);

			usleep(USLEEP);
		}
	}

	return 0;
}

--------------------------------------------------------------------------------

Sometimes close() fails with 'Socket is not connected' error:

a.out: parent: close error: 57

or

a.out: child: close error: 57

It looks like race in close(). Looking at uipc_socket.c:soclose():

int
soclose(struct socket *so)
{
        int error = 0;

        KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));

        CURVNET_SET(so->so_vnet);
        funsetown(&so->so_sigio);
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
                        error = sodisconnect(so);
                        if (error)
                                goto drop;
                }

so_state is checked without locking and then sodisconnect() is called, which
closes both sockets of the connection. So if the close() is called for both ends simultaneously it is possible that sodisconnect() will be called for both ends and for one ENOTCONN will be returned.

I made the following modifications (suggested by Robert Watson) to the code to have some confirmation:

1) just add logging the error when sodisconnect() returns error:

--- uipc_socket.c.orig	2010-02-18 14:25:25.000000000 +0200
+++ uipc_socket.c	2010-02-18 14:55:26.000000000 +0200
@@ -120,6 +120,7 @@ __FBSDID("$FreeBSD: src/sys/kern/uipc_so
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
+#include <sys/syslog.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
@@ -136,6 +137,7 @@ __FBSDID("$FreeBSD: src/sys/kern/uipc_so
 
 #include <vm/uma.h>
 
+
 #ifdef COMPAT_IA32
 #include <sys/mount.h>
 #include <sys/sysent.h>
@@ -657,7 +659,7 @@ soclose(struct socket *so)
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error)
-				goto drop;
+				log(LOG_INFO, "soclose: sodisconnect error: %d\n", error);
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&


Then on every error exit of the test application, like this

a.out: parent: close error: 57

I have the message log:

Feb 18 15:35:32 zhuzha kernel: soclose: sodisconnect error: 57

2) add logging the error when sodisconnect() returns error and ignore the error:

--- uipc_socket.c.orig	2010-02-18 14:25:25.000000000 +0200
+++ uipc_socket.c	2010-02-18 15:41:07.000000000 +0200
@@ -120,6 +120,7 @@ __FBSDID("$FreeBSD: src/sys/kern/uipc_so
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
+#include <sys/syslog.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
@@ -136,6 +137,7 @@ __FBSDID("$FreeBSD: src/sys/kern/uipc_so
 
 #include <vm/uma.h>
 
+
 #ifdef COMPAT_IA32
 #include <sys/mount.h>
 #include <sys/sysent.h>
@@ -656,8 +658,11 @@ soclose(struct socket *so)
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
-			if (error)
-				goto drop;
+			if (error) {
+				log(LOG_INFO, "soclose: sodisconnect error: %d\n", error);
+				if (error == ENOTCONN)
+					error = 0;
+			}
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&

After this the test application does not exits and I see in the message log these errors:

Feb 18 16:02:37 zhuzha kernel: soclose: sodisconnect error: 57
Feb 18 16:03:31 zhuzha kernel: soclose: sodisconnect error: 57
Feb 18 16:05:49 zhuzha last message repeated 4 times
Feb 18 16:15:50 zhuzha last message repeated 13 times
>How-To-Repeat:

>Fix:


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201002181423.o1IENDMJ077444>