Date: Mon, 22 Dec 2003 16:45:11 -0800 (PST) From: Sam Leffler <sam@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 44208 for review Message-ID: <200312230045.hBN0jAkd009419@repoman.freebsd.org>
index | next in thread | raw e-mail
http://perforce.freebsd.org/chv.cgi?CH=44208 Change 44208 by sam@sam_ebb on 2003/12/22 16:44:11 Redo soreceive's handling of MT_CONTROL mbufs to eliminate a LOR between so_rcv and the file descriptor lock. Instead of calling the externalize method as each MT_CONTROL mbuf is identified, hold them until after the sockbuf lock has been dropped and then process them. This potentially changes the semantics of soreceive when errors occur; must monitor the effect of this change carefully. While here fix some bugs and cleanup some code: o handle sodupsockaddr failing; return ENOMEM if it happens o handle m_copy (aka m_copym) failing; return ENOBUFS if it happens o optimize the handling of controlp based on the knowledged that MT_CONTROL mbufs come one at a time o simplify MT_CONTROL processing by using if { do { } while } and hoisting code out of the loop o add a KASSERT to validate an assumption about error that's used to optimize some subsequent code o note that the goto restart case at the bottom of soreceive causes msg recv statistics to be wrong Open issue: it's unclear what to do if the externalize returns an error after retrieving data from the socket. This is likely to confuse applications. Affected files ... .. //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 edit Differences ... ==== //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 (text+ko) ==== @@ -775,14 +775,19 @@ int flags, len, error, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; + struct mbuf *cm, **cme; int moff, type = 0; int orig_resid = uio->uio_resid; mp = mp0; if (psa) *psa = 0; - if (controlp) + cm = NULL; + if (controlp) { *controlp = 0; + cme = &cm; + } else + cme = &cm; /* XXX to silence gcc */ if (flagsp) flags = *flagsp &~ MSG_EOR; else @@ -893,6 +898,7 @@ goto restart; } dontblock: + KASSERT(error == 0, ("unexpected state, error %u", error)); if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; SBLASTRECORDCHK(&so->so_rcv); @@ -901,10 +907,14 @@ if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); - orig_resid = 0; - if (psa) + if (psa) { *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); /* XXX */ + if (*psa == NULL) { + error = ENOMEM; + goto release; + } + } if (flags & MSG_PEEK) { m = m->m_next; } else { @@ -912,30 +922,39 @@ so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } + orig_resid = 0; } - while (m && m->m_type == MT_CONTROL && error == 0) { - if (flags & MSG_PEEK) { - if (controlp) - *controlp = m_copy(m, 0, m->m_len); - m = m->m_next; - } else { - sbfree(&so->so_rcv, m); - so->so_rcv.sb_mb = m->m_next; - m->m_next = NULL; - if (pr->pr_domain->dom_externalize) - error = - (*pr->pr_domain->dom_externalize)(m, controlp); - else if (controlp) - *controlp = m; - else - m_freem(m); - m = so->so_rcv.sb_mb; - } - if (controlp) { - orig_resid = 0; - while (*controlp != NULL) - controlp = &(*controlp)->m_next; - } + if (m && m->m_type == MT_CONTROL) { + do { + if (flags & MSG_PEEK) { + if (controlp) { + *controlp = m_copy(m, 0, m->m_len); + if (*controlp == NULL) { + error = ENOBUFS; + goto release; + } + controlp = &(*controlp)->m_next; + } + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + so->so_rcv.sb_mb = m->m_next; + m->m_next = NULL; + if (controlp) { + /* + * Link mbufs together for processing + * below. See the comments there for + * an explanation of why we delay the + * work. + */ + *cme = m; + cme = &(*cme)->m_next; + } else + m_free(m); + m = so->so_rcv.sb_mb; + } + } while (m && m->m_type == MT_CONTROL); + orig_resid = 0; } if (m) { if ((flags & MSG_PEEK) == 0) { @@ -1132,7 +1151,7 @@ } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) - goto restart; + goto restart; /* XXX multi-counts msgs */ if (flagsp) *flagsp |= flags; @@ -1140,6 +1159,20 @@ sbunlock(&so->so_rcv); out: SOCKBUF_UNLOCK(&so->so_rcv); + if (cm != NULL) { + /* + * Deal with control data now that we've dopped the + * sockbuf lock. This is important as otherwise, for + * unix domain sockets, we create a LOR between so_rcv + * and the file descriptor lock. Note we assume the + * externalize method handles a list of mbufs. + */ + if (error == 0 && pr->pr_domain->dom_externalize) { + /* XXX ignore error? */ + error = (*pr->pr_domain->dom_externalize)(cm, controlp); + } else + m_freem(cm); + } return (error); }help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200312230045.hBN0jAkd009419>
