Date: Thu, 7 Nov 2019 21:23:07 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r354483 - in head/sys/netinet: . tcp_stacks Message-ID: <201911072123.xA7LN7MA096129@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Thu Nov 7 21:23:07 2019 New Revision: 354483 URL: https://svnweb.freebsd.org/changeset/base/354483 Log: Now that all of the tcp_input() and all its branches are executed in the network epoch, we can greatly simplify synchronization. Remove all unneccesary epoch enters hidden under INP_INFO_RLOCK macro. Remove some unneccesary assertions and convert necessary ones into the NET_EPOCH_ASSERT macro. Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c Modified: head/sys/netinet/tcp_input.c ============================================================================== --- head/sys/netinet/tcp_input.c Thu Nov 7 21:14:59 2019 (r354482) +++ head/sys/netinet/tcp_input.c Thu Nov 7 21:23:07 2019 (r354483) @@ -561,7 +561,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto) int rstreason = 0; /* For badport_bandlim accounting purposes */ uint8_t iptos; struct m_tag *fwd_tag = NULL; - struct epoch_tracker et; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; @@ -570,7 +569,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto) #endif /* INET6 */ struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ - int ti_locked; #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, @@ -581,6 +579,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) short ostate = 0; #endif + NET_EPOCH_ASSERT(); + #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif @@ -747,19 +747,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto) drop_hdrlen = off0 + off; /* - * Locate pcb for segment; if we're likely to add or remove a - * connection then first acquire pcbinfo lock. There are three cases - * where we might discover later we need a write lock despite the - * flags: ACKs moving a connection out of the syncache, ACKs for a - * connection in TIMEWAIT and SYNs not targeting a listening socket. - */ - if ((thflags & (TH_FIN | TH_RST)) != 0) { - INP_INFO_RLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_RLOCKED; - } else - ti_locked = TI_UNLOCKED; - - /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ( @@ -776,13 +763,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); findpcb: -#ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { - INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); - } -#endif #ifdef INET6 if (isipv6 && fwd_tag != NULL) { struct sockaddr_in6 *next_hop6; @@ -942,12 +922,6 @@ findpcb: * XXXRW: It may be time to rethink timewait locking. */ if (inp->inp_flags & INP_TIMEWAIT) { - if (ti_locked == TI_UNLOCKED) { - INP_INFO_RLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_RLOCKED; - } - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); /* @@ -955,7 +929,6 @@ findpcb: */ if (tcp_twcheck(inp, &to, th, m, tlen)) goto findpcb; - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); } /* @@ -977,27 +950,6 @@ findpcb: } #endif - /* - * We've identified a valid inpcb, but it could be that we need an - * inpcbinfo write lock but don't hold it. In this case, attempt to - * acquire using the same strategy as the TIMEWAIT case above. If we - * relock, we have to jump back to 'relocked' as the connection might - * now be in TIMEWAIT. - */ -#ifdef INVARIANTS - if ((thflags & (TH_FIN | TH_RST)) != 0) - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); -#endif - if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || - (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && - !IS_FASTOPEN(tp->t_flags)))) { - if (ti_locked == TI_UNLOCKED) { - INP_INFO_RLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_RLOCKED; - } - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } - #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) @@ -1052,7 +1004,6 @@ findpcb: */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1130,8 +1081,6 @@ tfo_socket_result: TCP_PROBE5(receive, NULL, tp, m, tp, th); tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos); - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); } /* @@ -1334,10 +1283,6 @@ tfo_socket_result: * Entry added to syncache and mbuf consumed. * Only the listen socket is unlocked by syncache_add(). */ - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_UNLOCKED; - } INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { @@ -1370,25 +1315,11 @@ tfo_socket_result: * the inpcb, and unlocks pcbinfo. */ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos); - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); dropwithreset: TCP_PROBE5(receive, NULL, tp, m, tp, th); - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_UNLOCKED; - } -#ifdef INVARIANTS - else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); - } -#endif - if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(inp); @@ -1401,18 +1332,6 @@ dropunlock: if (m != NULL) TCP_PROBE5(receive, NULL, tp, m, tp, th); - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); - ti_locked = TI_UNLOCKED; - } -#ifdef INVARIANTS - else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); - } -#endif - if (inp != NULL) INP_WUNLOCK(inp); @@ -1501,7 +1420,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru struct mbuf *mfree; struct tcpopt to; int tfo_syn; - + #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, @@ -1516,16 +1435,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tp->sackhint.last_sack_ack = 0; sack_changed = 0; nsegs = max(1, m->m_pkthdr.lro_nsegs); - /* - * If this is either a state-changing packet or current state isn't - * established, we require a write lock on tcbinfo. Otherwise, we - * allow the tcbinfo to be in either alocked or unlocked, as the - * caller may have unnecessarily acquired a write lock due to a race. - */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || - tp->t_state != TCPS_ESTABLISHED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } + + NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); @@ -2047,7 +1958,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tcp_state_change(tp, TCPS_SYN_RECEIVED); } - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2120,7 +2030,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); @@ -2163,8 +2072,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && tp->t_state != TCPS_SYN_RECEIVED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && SEQ_GEQ(th->th_seq, tp->last_ack_sent) && @@ -2288,8 +2195,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " "after socket was closed, " @@ -2875,7 +2780,6 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); m_freem(m); return; @@ -2890,7 +2794,6 @@ process_ACK: */ case TCPS_LAST_ACK: if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } @@ -3139,8 +3042,6 @@ dodata: /* XXX */ * standard timers. */ case TCPS_FIN_WAIT_2: - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - tcp_twstart(tp); return; } Modified: head/sys/netinet/tcp_stacks/bbr.c ============================================================================== --- head/sys/netinet/tcp_stacks/bbr.c Thu Nov 7 21:14:59 2019 (r354482) +++ head/sys/netinet/tcp_stacks/bbr.c Thu Nov 7 21:23:07 2019 (r354483) @@ -8618,7 +8618,6 @@ dodata: /* XXX */ bbr->rc_timer_first = 1; bbr_timer_cancel(bbr, __LINE__, bbr->r_ctl.rc_rcvtime); - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); tcp_twstart(tp); return (1); @@ -9619,7 +9618,6 @@ bbr_check_data_after_close(struct mbuf *m, struct tcp_ struct tcpcb *tp, int32_t * tlen, struct tcphdr *th, struct socket *so) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if (bbr->rc_allow_data_af_clo == 0) { close_now: tp = tcp_close(tp); @@ -9861,7 +9859,6 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, stru return (ret_val); } if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); m_freem(m); return (1); @@ -9974,7 +9971,6 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, stru return (ret_val); } if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); ctf_do_drop(m, tp); return (1); Modified: head/sys/netinet/tcp_stacks/rack.c ============================================================================== --- head/sys/netinet/tcp_stacks/rack.c Thu Nov 7 21:14:59 2019 (r354482) +++ head/sys/netinet/tcp_stacks/rack.c Thu Nov 7 21:23:07 2019 (r354483) @@ -5875,7 +5875,6 @@ dodata: /* XXX */ case TCPS_FIN_WAIT_2: rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); return (1); } @@ -6353,7 +6352,6 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_state_change(tp, TCPS_SYN_RECEIVED); } - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* * Advance th->th_seq to correspond to first data byte. If data, @@ -6847,7 +6845,6 @@ rack_check_data_after_close(struct mbuf *m, { struct tcp_rack *rack; - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->rc_allow_data_af_clo == 0) { close_now: @@ -7079,7 +7076,6 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, str return (ret_val); } if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); m_freem(m); return (1); @@ -7187,7 +7183,6 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, str return (ret_val); } if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); ctf_do_drop(m, tp); return (1); @@ -7650,16 +7645,8 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr kern_prefetch(rack, &prev_state); prev_state = 0; thflags = th->th_flags; - /* - * If this is either a state-changing packet or current state isn't - * established, we require a read lock on tcbinfo. Otherwise, we - * allow the tcbinfo to be in either locked or unlocked, as the - * caller may have unnecessarily acquired a lock due to a race. - */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || - tp->t_state != TCPS_ESTABLISHED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } + + NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c ============================================================================== --- head/sys/netinet/tcp_stacks/rack_bbr_common.c Thu Nov 7 21:14:59 2019 (r354482) +++ head/sys/netinet/tcp_stacks/rack_bbr_common.c Thu Nov 7 21:23:07 2019 (r354483) @@ -253,7 +253,6 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socke */ struct mbuf *m_save; struct ether_header *eh; - struct epoch_tracker et; struct tcphdr *th; #ifdef INET6 struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ @@ -268,14 +267,8 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct socke uint16_t drop_hdrlen; uint8_t iptos, no_vn=0, bpf_req=0; - /* - * This is a bit deceptive, we get the - * "info epoch" which is really the network - * epoch. This covers us on both any INP - * type change but also if the ifp goes - * away it covers us as well. - */ - INP_INFO_RLOCK_ET(&V_tcbinfo, et); + NET_EPOCH_ASSERT(); + if (m && m->m_pkthdr.rcvif) ifp = m->m_pkthdr.rcvif; else @@ -445,7 +438,6 @@ skip_vnet: } if (no_vn == 0) CURVNET_RESTORE(); - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return(retval); } skipped_pkt: @@ -453,7 +445,6 @@ skipped_pkt: } if (no_vn == 0) CURVNET_RESTORE(); - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return(retval); } @@ -680,7 +671,6 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, str SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); @@ -732,7 +722,8 @@ ctf_process_rst(struct mbuf *m, struct tcphdr *th, str void ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + + NET_EPOCH_ASSERT(); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn &&
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911072123.xA7LN7MA096129>