Date: Tue, 6 Aug 2002 16:16:03 -0700 (PDT) From: Don Lewis <dl-freebsd@catspoiler.org> To: jhay@icomtek.csir.co.za, hsu@FreeBSD.org, ume@FreeBSD.org, suz@kame.net, current@FreeBSD.org Subject: CFR - patch for TCPv6 accept lock violation bug Message-ID: <200208062316.g76NG3wr091139@gw.catspoiler.org>
next in thread | raw e-mail | index | archive | help
FreeBSD -current detects a lock violation in the IPv6 TCP accept code in tcp6_usr_accept() caused by calling in6_mapped_peeraddr(), which calls MALLOC() which may block, while a lock is being held. Instead of taking the same approach to solve this problem as was done in tcp_usr_accept(), which would result in a lot of duplicated code, it looks like a better solution is to make a local copy of the address and port information before dropping the lock, and then perform all the potentially blocking operations after the lock as been dropped. If this same approach is used in other places where in_setsockaddr(), in_setpeeraddr(), in6_setsockaddr(), and in6_setpeeraddr() are called, then we can replace a bunch of duplicated code with calls to a few new functions that accept the address and port information and convert it to the sockaddr structure. I've still got a couple of questions, though. Why don't in6_setpeeraddr() and in6_setsockaddr() handle the IPv4 mapping done in in6_mapped_peeraddr() and in6_mapped_sockaddr()? If they did, we wouldn't need the latter routines. I don't see any reason to have both sets of routines. Can the INP_IPV4 flag ever be set in the tcp6_usr_accept() case? If not, the expanded code in tcp6_usr_accept() could be simplified. The function prototypes in in_pcb.h and in6_pcb.h don't seem to be sorted in any consistent order. Index: netinet/in_pcb.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/in_pcb.h,v retrieving revision 1.51 diff -u -r1.51 in_pcb.h --- netinet/in_pcb.h 22 Jul 2002 15:51:02 -0000 1.51 +++ netinet/in_pcb.h 6 Aug 2002 21:43:03 -0000 @@ -344,6 +344,8 @@ void in_pcbrehash(struct inpcb *); int in_setpeeraddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo); int in_setsockaddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);; +struct sockaddr * + in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbremlists(struct inpcb *inp); int prison_xinpcb(struct thread *td, struct inpcb *inp); #endif /* _KERNEL */ Index: netinet/in_pcb.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.109 diff -u -r1.109 in_pcb.c --- netinet/in_pcb.c 25 Jul 2002 17:40:44 -0000 1.109 +++ netinet/in_pcb.c 6 Aug 2002 21:42:45 -0000 @@ -577,6 +577,23 @@ uma_zfree(ipi->ipi_zone, inp); } +struct sockaddr * +in_sockaddr(port, addr_p) + in_port_t port; + struct in_addr *addr_p; +{ + struct sockaddr_in *sin; + + MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, + M_WAITOK | M_ZERO); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = *addr_p; + sin->sin_port = port; + + return (struct sockaddr *)sin; +} + /* * The wrapper function will pass down the pcbinfo for this function to lock. * The socket must have a valid @@ -593,15 +610,8 @@ { int s; register struct inpcb *inp; - register struct sockaddr_in *sin; - - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, - M_WAITOK | M_ZERO); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); + struct in_addr addr; + in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); @@ -609,17 +619,16 @@ if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); - free(sin, M_SONAME); return ECONNRESET; } INP_LOCK(inp); - sin->sin_port = inp->inp_lport; - sin->sin_addr = inp->inp_laddr; + port = inp->inp_lport; + addr = inp->inp_laddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); - *nam = (struct sockaddr *)sin; + *nam = in_sockaddr(port, &addr); return 0; } @@ -634,15 +643,8 @@ { int s; register struct inpcb *inp; - register struct sockaddr_in *sin; - - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, - M_WAITOK | M_ZERO); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); + struct in_addr addr; + in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); @@ -650,17 +652,16 @@ if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); - free(sin, M_SONAME); return ECONNRESET; } INP_LOCK(inp); - sin->sin_port = inp->inp_fport; - sin->sin_addr = inp->inp_faddr; + port = inp->inp_fport; + addr = inp->inp_faddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); - *nam = (struct sockaddr *)sin; + *nam = in_sockaddr(port, &addr); return 0; } Index: netinet/tcp_usrreq.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.79 diff -u -r1.79 tcp_usrreq.c --- netinet/tcp_usrreq.c 29 Jul 2002 09:01:39 -0000 1.79 +++ netinet/tcp_usrreq.c 6 Aug 2002 22:05:29 -0000 @@ -467,8 +467,8 @@ int error = 0; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; - struct sockaddr_in *sin; - const int inirw = INI_READ; + struct in_addr addr; + in_port_t port = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { @@ -476,21 +476,12 @@ goto out; } - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, - M_WAITOK | M_ZERO); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - s = splnet(); INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); - free(sin, M_SONAME); return (EINVAL); } INP_LOCK(inp); @@ -499,14 +490,20 @@ TCPDEBUG1(); /* - * We inline in_setpeeraddr here, because we have already done - * the locking and the malloc. + * We inline in_setpeeraddr and COMMON_END here, so that we can + * copy the data of interest and defer the malloc until after we + * release the lock. */ - sin->sin_port = inp->inp_fport; - sin->sin_addr = inp->inp_faddr; - *nam = (struct sockaddr *)sin; + port = inp->inp_fport; + addr = inp->inp_faddr; - COMMON_END(PRU_ACCEPT); +out: TCPDEBUG2(PRU_ACCEPT); + if (tp) + INP_UNLOCK(inp); + splx(s); + if (error == 0) + *nam = in_sockaddr(port, &addr); + return error; } #ifdef INET6 @@ -517,7 +514,10 @@ struct inpcb *inp = NULL; int error = 0; struct tcpcb *tp = NULL; - const int inirw = INI_READ; + struct in_addr addr; + struct in6_addr addr6; + in_port_t port = 0; + int v4 = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { @@ -537,8 +537,31 @@ INP_INFO_RUNLOCK(&tcbinfo); tp = intotcpcb(inp); TCPDEBUG1(); - in6_mapped_peeraddr(so, nam); - COMMON_END(PRU_ACCEPT); + /* + * We inline in6_mapped_peeraddr and COMMON_END here, so that we can + * copy the data of interest and defer the malloc until after we + * release the lock. + */ + if (inp->inp_vflag & INP_IPV4) { + v4 = 1; + port = inp->inp_fport; + addr = inp->inp_faddr; + } else { + port = inp->inp_fport; + addr6 = inp->in6p_faddr; + } + +out: TCPDEBUG2(PRU_ACCEPT); + if (tp) + INP_UNLOCK(inp); + splx(s); + if (error == 0) { + if (v4) + *nam = in6_v4mapsin6_sockaddr(port, &addr); + else + *nam = in6_sockaddr(port, &addr6); + } + return error; } #endif /* INET6 */ Index: netinet6/in6_pcb.h =================================================================== RCS file: /home/ncvs/src/sys/netinet6/in6_pcb.h,v retrieving revision 1.8 diff -u -r1.8 in6_pcb.h --- netinet6/in6_pcb.h 14 Jun 2002 08:35:21 -0000 1.8 +++ netinet6/in6_pcb.h 6 Aug 2002 21:44:56 -0000 @@ -95,6 +95,10 @@ struct inpcb *(*)(struct inpcb *, int))); struct inpcb * in6_rtchange __P((struct inpcb *, int)); +struct sockaddr * + in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p)); +struct sockaddr * + in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p)); int in6_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); int in6_setsockaddr __P((struct socket *so, struct sockaddr **nam)); int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam)); Index: netinet6/in6_pcb.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/in6_pcb.c,v retrieving revision 1.31 diff -u -r1.31 in6_pcb.c --- netinet6/in6_pcb.c 14 Jun 2002 08:35:21 -0000 1.31 +++ netinet6/in6_pcb.c 6 Aug 2002 22:23:26 -0000 @@ -626,6 +626,50 @@ uma_zfree(ipi->ipi_zone, inp); } +struct sockaddr * +in6_sockaddr(port, addr_p) + in_port_t port; + struct in6_addr *addr_p; +{ + struct sockaddr_in6 *sin6; + + MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); + bzero(sin6, sizeof *sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *addr_p; + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + else + sin6->sin6_scope_id = 0; /*XXX*/ + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_addr.s6_addr16[1] = 0; + + return (struct sockaddr *)sin6; +} + +struct sockaddr * +in6_v4mapsin6_sockaddr(port, addr_p) + in_port_t port; + struct in_addr *addr_p; +{ + struct sockaddr_in sin; + struct sockaddr_in6 *sin6_p; + + bzero(&sin, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = port; + sin.sin_addr = *addr_p; + + MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, + M_WAITOK); + in6_sin_2_v4mapsin6(&sin, sin6_p); + + return (struct sockaddr *)sin6_p; +} + /* * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points @@ -643,34 +687,20 @@ { int s; register struct inpcb *inp; - register struct sockaddr_in6 *sin6; - - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); - bzero(sin6, sizeof *sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); + struct in6_addr addr; + in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); - free(sin6, M_SONAME); return EINVAL; } - sin6->sin6_port = inp->inp_lport; - sin6->sin6_addr = inp->in6p_laddr; + port = inp->inp_lport; + addr = inp->in6p_laddr; splx(s); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); - else - sin6->sin6_scope_id = 0; /*XXX*/ - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - sin6->sin6_addr.s6_addr16[1] = 0; - *nam = (struct sockaddr *)sin6; + *nam = in6_sockaddr(port, &addr); return 0; } @@ -681,34 +711,20 @@ { int s; struct inpcb *inp; - register struct sockaddr_in6 *sin6; - - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin6, struct sockaddr_in6 *, sizeof(*sin6), M_SONAME, M_WAITOK); - bzero((caddr_t)sin6, sizeof (*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); + struct in6_addr addr; + in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); - free(sin6, M_SONAME); return EINVAL; } - sin6->sin6_port = inp->inp_fport; - sin6->sin6_addr = inp->in6p_faddr; + port = inp->inp_fport; + addr = inp->in6p_faddr; splx(s); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); - else - sin6->sin6_scope_id = 0; /*XXX*/ - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - sin6->sin6_addr.s6_addr16[1] = 0; - *nam = (struct sockaddr *)sin6; + *nam = in6_sockaddr(port, &addr); return 0; } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200208062316.g76NG3wr091139>