From owner-svn-src-head@freebsd.org Tue Mar 21 06:39:52 2017 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 9ED4ED164CF; Tue, 21 Mar 2017 06:39:52 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 3CEC01ADF; Tue, 21 Mar 2017 06:39:52 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v2L6dp9j055539; Tue, 21 Mar 2017 06:39:51 GMT (envelope-from glebius@FreeBSD.org) Received: (from glebius@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v2L6dnRf055522; Tue, 21 Mar 2017 06:39:49 GMT (envelope-from glebius@FreeBSD.org) Message-Id: <201703210639.v2L6dnRf055522@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: glebius set sender to glebius@FreeBSD.org using -f From: Gleb Smirnoff Date: Tue, 21 Mar 2017 06:39:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r315662 - in head: contrib/bsnmp/snmp_mibII contrib/ipfilter/ipsend lib/libprocstat sys/netinet sys/sys usr.bin/netstat usr.bin/sockstat usr.bin/systat usr.sbin/tcpdrop usr.sbin/trpt X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 21 Mar 2017 06:39:52 -0000 Author: glebius Date: Tue Mar 21 06:39:49 2017 New Revision: 315662 URL: https://svnweb.freebsd.org/changeset/base/315662 Log: Hide struct inpcb, struct tcpcb from the userland. This is a painful change, but it is needed. On the one hand, we avoid modifying them, and this slows down some ideas, on the other hand we still eventually modify them and tools like netstat(1) never work on next version of FreeBSD. We maintain a ton of spares in them, and we already got some ifdef hell at the end of tcpcb. Details: - Hide struct inpcb, struct tcpcb under _KERNEL || _WANT_FOO. - Make struct xinpcb, struct xtcpcb pure API structures, not including kernel structures inpcb and tcpcb inside. Export into these structures the fields from inpcb and tcpcb that are known to be used, and put there a ton of spare space. - Make kernel and userland utilities compilable after these changes. - Bump __FreeBSD_version. Reviewed by: rrs, gnn Differential Revision: D10018 Modified: head/contrib/bsnmp/snmp_mibII/mibII_tcp.c head/contrib/bsnmp/snmp_mibII/mibII_udp.c head/contrib/ipfilter/ipsend/sock.c head/lib/libprocstat/libprocstat.c head/sys/netinet/in_pcb.c head/sys/netinet/in_pcb.h head/sys/netinet/ip_divert.c head/sys/netinet/raw_ip.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_syncache.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h head/sys/netinet/tcp_var.h head/sys/netinet/udp_usrreq.c head/sys/sys/param.h head/usr.bin/netstat/inet.c head/usr.bin/sockstat/sockstat.c head/usr.bin/systat/extern.h head/usr.bin/systat/netcmds.c head/usr.bin/systat/netstat.c head/usr.sbin/tcpdrop/tcpdrop.c head/usr.sbin/trpt/trpt.c Modified: head/contrib/bsnmp/snmp_mibII/mibII_tcp.c ============================================================================== --- head/contrib/bsnmp/snmp_mibII/mibII_tcp.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/contrib/bsnmp/snmp_mibII/mibII_tcp.c Tue Mar 21 06:39:49 2017 (r315662) @@ -310,7 +310,7 @@ op_tcpconn(struct snmp_context *ctx __un switch (value->var.subs[sub - 1]) { case LEAF_tcpConnState: - switch (tcpoids[i].tp->xt_tp.t_state) { + switch (tcpoids[i].tp->t_state) { case TCPS_CLOSED: value->v.integer = 1; Modified: head/contrib/bsnmp/snmp_mibII/mibII_udp.c ============================================================================== --- head/contrib/bsnmp/snmp_mibII/mibII_udp.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/contrib/bsnmp/snmp_mibII/mibII_udp.c Tue Mar 21 06:39:49 2017 (r315662) @@ -105,8 +105,8 @@ fetch_udp(void) ptr->xig_len > sizeof(struct xinpgen); ptr = (struct xinpgen *)(void *)((char *)ptr + ptr->xig_len)) { inp = (struct xinpcb *)ptr; - if (inp->xi_inp.inp_gencnt > xinpgen->xig_gen || - (inp->xi_inp.inp_vflag & INP_IPV4) == 0) + if (inp->inp_gencnt > xinpgen->xig_gen || + (inp->inp_vflag & INP_IPV4) == 0) continue; udp_total++; @@ -128,17 +128,17 @@ fetch_udp(void) ptr->xig_len > sizeof(struct xinpgen); ptr = (struct xinpgen *)(void *)((char *)ptr + ptr->xig_len)) { inp = (struct xinpcb *)ptr; - if (inp->xi_inp.inp_gencnt > xinpgen->xig_gen || - (inp->xi_inp.inp_vflag & INP_IPV4) == 0) + if (inp->inp_gencnt > xinpgen->xig_gen || + (inp->inp_vflag & INP_IPV4) == 0) continue; oid->inp = inp; oid->index.len = 5; - inaddr = ntohl(inp->xi_inp.inp_laddr.s_addr); + inaddr = ntohl(inp->inp_laddr.s_addr); oid->index.subs[0] = (inaddr >> 24) & 0xff; oid->index.subs[1] = (inaddr >> 16) & 0xff; oid->index.subs[2] = (inaddr >> 8) & 0xff; oid->index.subs[3] = (inaddr >> 0) & 0xff; - oid->index.subs[4] = ntohs(inp->xi_inp.inp_lport); + oid->index.subs[4] = ntohs(inp->inp_lport); oid++; } Modified: head/contrib/ipfilter/ipsend/sock.c ============================================================================== --- head/contrib/ipfilter/ipsend/sock.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/contrib/ipfilter/ipsend/sock.c Tue Mar 21 06:39:49 2017 (r315662) @@ -78,8 +78,10 @@ typedef int boolean_t; # include #endif #include +#define _WANT_INPCB #include #include +#define _WANT_TCPCB #include #include #include Modified: head/lib/libprocstat/libprocstat.c ============================================================================== --- head/lib/libprocstat/libprocstat.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/lib/libprocstat/libprocstat.c Tue Mar 21 06:39:49 2017 (r315662) @@ -82,6 +82,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#define _WANT_INPCB #include #include Modified: head/sys/netinet/in_pcb.c ============================================================================== --- head/sys/netinet/in_pcb.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/in_pcb.c Tue Mar 21 06:39:49 2017 (r315662) @@ -2434,6 +2434,41 @@ so_sototcpcb(struct socket *so) return (sototcpcb(so)); } +/* + * Create an external-format (``xinpcb'') structure using the information in + * the kernel-format in_pcb structure pointed to by inp. This is done to + * reduce the spew of irrelevant information over this interface, to isolate + * user code from changes in the kernel structure, and potentially to provide + * information-hiding if we decide that some of this information should be + * hidden from users. + */ +void +in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi) +{ + + xi->xi_len = sizeof(struct xinpcb); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xi->xi_socket); + else + bzero(&xi->xi_socket, sizeof(struct xsocket)); + bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo)); + xi->inp_gencnt = inp->inp_gencnt; + xi->inp_ppcb = inp->inp_ppcb; + xi->inp_flow = inp->inp_flow; + xi->inp_flowid = inp->inp_flowid; + xi->inp_flowtype = inp->inp_flowtype; + xi->inp_flags = inp->inp_flags; + xi->inp_flags2 = inp->inp_flags2; + xi->inp_rss_listen_bucket = inp->inp_rss_listen_bucket; + xi->in6p_cksum = inp->in6p_cksum; + xi->in6p_hops = inp->in6p_hops; + xi->inp_ip_tos = inp->inp_ip_tos; + xi->inp_vflag = inp->inp_vflag; + xi->inp_ip_ttl = inp->inp_ip_ttl; + xi->inp_ip_p = inp->inp_ip_p; + xi->inp_ip_minttl = inp->inp_ip_minttl; +} + #ifdef DDB static void db_print_indent(int indent) Modified: head/sys/netinet/in_pcb.h ============================================================================== --- head/sys/netinet/in_pcb.h Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/in_pcb.h Tue Mar 21 06:39:49 2017 (r315662) @@ -53,7 +53,6 @@ #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ -struct inpcbpolicy; /* * struct inpcb is the common protocol control block structure used in most @@ -65,7 +64,7 @@ struct inpcbpolicy; */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); -typedef u_quad_t inp_gen_t; +typedef uint64_t inp_gen_t; /* * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. @@ -130,9 +129,8 @@ struct in_conninfo { #define inc6_laddr inc_ie.ie6_laddr #define inc6_zoneid inc_ie.ie6_zoneid -struct icmp6_filter; - -/*- +#if defined(_KERNEL) || defined(_WANT_INPCB) +/* * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and * IPv6 sockets. In the case of TCP and UDP, further per-connection state is * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb @@ -181,6 +179,8 @@ struct icmp6_filter; * read-lock usage during modification, this model can be applied to other * protocols (especially SCTP). */ +struct icmp6_filter; +struct inpcbpolicy; struct m_snd_tag; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ @@ -204,10 +204,8 @@ struct inpcb { uint32_t inp_flowid; /* (x) flow id / queue id */ u_int inp_refcount; /* (i) refcount */ struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ - void *inp_pspare[4]; /* (x) general use */ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */ - u_int inp_ispare[4]; /* (x) user cookie / general use */ /* Local and foreign ports, local and foreign addr. */ struct in_conninfo inp_inc; /* (i) list for PCB's local port */ @@ -218,23 +216,23 @@ struct inpcb { /* Protocol-dependent part; options. */ struct { - u_char inp4_ip_tos; /* (i) type of service proto */ - struct mbuf *inp4_options; /* (i) IP options */ - struct ip_moptions *inp4_moptions; /* (i) IP mcast options */ - } inp_depend4; + u_char inp_ip_tos; /* (i) type of service proto */ + struct mbuf *inp_options; /* (i) IP options */ + struct ip_moptions *inp_moptions; /* (i) mcast options */ + }; struct { /* (i) IP options */ - struct mbuf *inp6_options; + struct mbuf *in6p_options; /* (i) IP6 options for outgoing packets */ - struct ip6_pktopts *inp6_outputopts; + struct ip6_pktopts *in6p_outputopts; /* (i) IP multicast options */ - struct ip6_moptions *inp6_moptions; + struct ip6_moptions *in6p_moptions; /* (i) ICMPv6 code type filter */ - struct icmp6_filter *inp6_icmp6filt; + struct icmp6_filter *in6p_icmp6filt; /* (i) IPV6_CHECKSUM setsockopt */ - int inp6_cksum; - short inp6_hops; - } inp_depend6; + int in6p_cksum; + short in6p_hops; + }; LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ struct inpcbport *inp_phd; /* (i/h) head of this list */ #define inp_zero_size offsetof(struct inpcb, inp_gencnt) @@ -249,24 +247,17 @@ struct inpcb { #define inp_route inp_rtu.inpu_route #define inp_route6 inp_rtu.inpu_route6 }; +#endif /* _KERNEL */ + #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport #define inp_faddr inp_inc.inc_faddr #define inp_laddr inp_inc.inc_laddr -#define inp_ip_tos inp_depend4.inp4_ip_tos -#define inp_options inp_depend4.inp4_options -#define inp_moptions inp_depend4.inp4_moptions #define in6p_faddr inp_inc.inc6_faddr #define in6p_laddr inp_inc.inc6_laddr #define in6p_zoneid inp_inc.inc6_zoneid -#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_flowinfo inp_flow -#define in6p_options inp_depend6.inp6_options -#define in6p_outputopts inp_depend6.inp6_outputopts -#define in6p_moptions inp_depend6.inp6_moptions -#define in6p_icmp6filt inp_depend6.inp6_icmp6filt -#define in6p_cksum inp_depend6.inp6_cksum #define inp_vnet inp_pcbinfo->ipi_vnet @@ -280,21 +271,53 @@ struct inpcb { /* * Interface exported to userland by various protocols which use inpcbs. Hack * alert -- only define if struct xsocket is in scope. + * Fields prefixed with "xi_" are unique to this structure, and the rest + * match fields in the struct inpcb, to ease coding and porting. + * + * Legend: + * (s) - used by userland utilities in src + * (p) - used by utilities in ports + * (3) - is known to be used by third party software not in ports + * (n) - no known usage */ #ifdef _SYS_SOCKETVAR_H_ -struct xinpcb { - size_t xi_len; /* length of this structure */ - struct inpcb xi_inp; - struct xsocket xi_socket; - u_quad_t xi_alignment_hack; -}; - -struct xinpgen { - size_t xig_len; /* length of this structure */ - u_int xig_count; /* number of PCBs at this time */ - inp_gen_t xig_gen; /* generation count at this time */ - so_gen_t xig_sogen; /* socket generation count at this time */ +struct xinpcb { + size_t xi_len; /* length of this structure */ + struct xsocket xi_socket; /* (s,p) */ + struct in_conninfo inp_inc; /* (s,p) */ + uint64_t inp_gencnt; /* (s,p) */ + union { + void *inp_ppcb; /* (s) netstat(1) */ + int64_t ph_ppcb; + }; + int64_t inp_spare64[4]; + uint32_t inp_flow; /* (s) */ + uint32_t inp_flowid; /* (s) */ + uint32_t inp_flowtype; /* (s) */ + int32_t inp_flags; /* (s,p) */ + int32_t inp_flags2; /* (s) */ + int32_t inp_rss_listen_bucket; /* (n) */ + int32_t in6p_cksum; /* (n) */ + int32_t inp_spare32[4]; + uint16_t in6p_hops; /* (n) */ + uint8_t inp_ip_tos; /* (n) */ + int8_t pad8; + uint8_t inp_vflag; /* (s,p) */ + uint8_t inp_ip_ttl; /* (n) */ + uint8_t inp_ip_p; /* (n) */ + uint8_t inp_ip_minttl; /* (n) */ + int8_t inp_spare8[4]; +} __aligned(8); + +struct xinpgen { + size_t xig_len; /* length of this structure */ + u_int xig_count; /* number of PCBs at this time */ + inp_gen_t xig_gen; /* generation count at this time */ + so_gen_t xig_sogen; /* socket generation count this time */ }; +#ifdef _KERNEL +void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); +#endif #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { Modified: head/sys/netinet/ip_divert.c ============================================================================== --- head/sys/netinet/ip_divert.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/ip_divert.c Tue Mar 21 06:39:49 2017 (r315662) @@ -691,12 +691,8 @@ div_pcblist(SYSCTL_HANDLER_ARGS) INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); + + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else Modified: head/sys/netinet/raw_ip.c ============================================================================== --- head/sys/netinet/raw_ip.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/raw_ip.c Tue Mar 21 06:39:49 2017 (r315662) @@ -1077,12 +1077,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS) if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else Modified: head/sys/netinet/tcp_subr.c ============================================================================== --- head/sys/netinet/tcp_subr.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/tcp_subr.c Tue Mar 21 06:39:49 2017 (r315662) @@ -1773,30 +1773,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; - void *inp_ppcb; - bzero(&xt, sizeof(xt)); - xt.xt_len = sizeof xt; - /* XXX should avoid extra copy */ - bcopy(inp, &xt.xt_inp, sizeof *inp); - inp_ppcb = inp->inp_ppcb; - if (inp_ppcb == NULL) - bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); - else if (inp->inp_flags & INP_TIMEWAIT) { - bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); - xt.xt_tp.t_state = TCPS_TIME_WAIT; - } else { - bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); - if (xt.xt_tp.t_timers) - tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); - } - if (inp->inp_socket != NULL) - sotoxsocket(inp->inp_socket, &xt.xt_socket); - else { - bzero(&xt.xt_socket, sizeof xt.xt_socket); - xt.xt_socket.xso_protocol = IPPROTO_TCP; - } - xt.xt_inp.inp_gencnt = inp->inp_gencnt; + tcp_inptoxtp(inp, &xt); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else @@ -2765,3 +2743,53 @@ tcp_state_change(struct tcpcb *tp, int n tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } + +/* + * Create an external-format (``xtcpcb'') structure using the information in + * the kernel-format tcpcb structure pointed to by tp. This is done to + * reduce the spew of irrelevant information over this interface, to isolate + * user code from changes in the kernel structure, and potentially to provide + * information-hiding if we decide that some of this information should be + * hidden from users. + */ +void +tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt) +{ + struct tcpcb *tp = intotcpcb(inp); + sbintime_t now; + + if (inp->inp_flags & INP_TIMEWAIT) { + bzero(xt, sizeof(struct xtcpcb)); + xt->t_state = TCPS_TIME_WAIT; + } else { + xt->t_state = tp->t_state; + xt->t_flags = tp->t_flags; + xt->t_sndzerowin = tp->t_sndzerowin; + xt->t_sndrexmitpack = tp->t_sndrexmitpack; + xt->t_rcvoopack = tp->t_rcvoopack; + + now = getsbinuptime(); +#define COPYTIMER(ttt) do { \ + if (callout_active(&tp->t_timers->ttt)) \ + xt->ttt = (tp->t_timers->ttt.c_time - now) / \ + SBT_1MS; \ + else \ + xt->ttt = 0; \ +} while (0) + COPYTIMER(tt_delack); + COPYTIMER(tt_rexmt); + COPYTIMER(tt_persist); + COPYTIMER(tt_keep); + COPYTIMER(tt_2msl); +#undef COPYTIMER + xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz; + + bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack, + TCP_FUNCTION_NAME_LEN_MAX); + } + + xt->xt_len = sizeof(struct xtcpcb); + in_pcbtoxinpcb(inp, &xt->xt_inp); + if (inp->inp_socket == NULL) + xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; +} Modified: head/sys/netinet/tcp_syncache.c ============================================================================== --- head/sys/netinet/tcp_syncache.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/tcp_syncache.c Tue Mar 21 06:39:49 2017 (r315662) @@ -2217,13 +2217,13 @@ syncache_pcblist(struct sysctl_req *req, xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; - bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); - xt.xt_tp.t_inpcb = &xt.xt_inp; - xt.xt_tp.t_state = TCPS_SYN_RECEIVED; - xt.xt_socket.xso_protocol = IPPROTO_TCP; - xt.xt_socket.xso_len = sizeof (struct xsocket); - xt.xt_socket.so_type = SOCK_STREAM; - xt.xt_socket.so_state = SS_ISCONNECTING; + bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, + sizeof (struct in_conninfo)); + xt.t_state = TCPS_SYN_RECEIVED; + xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; + xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket); + xt.xt_inp.xi_socket.so_type = SOCK_STREAM; + xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING; error = SYSCTL_OUT(req, &xt, sizeof xt); if (error) { SCH_UNLOCK(sch); Modified: head/sys/netinet/tcp_timer.c ============================================================================== --- head/sys/netinet/tcp_timer.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/tcp_timer.c Tue Mar 21 06:39:49 2017 (r315662) @@ -1006,28 +1006,3 @@ tcp_timer_stop(struct tcpcb *tp, uint32_ tp->t_timers->tt_draincnt++; } } - -#define ticks_to_msecs(t) (1000*(t) / hz) - -void -tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, - struct xtcp_timer *xtimer) -{ - sbintime_t now; - - bzero(xtimer, sizeof(*xtimer)); - if (timer == NULL) - return; - now = getsbinuptime(); - if (callout_active(&timer->tt_delack)) - xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_rexmt)) - xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_persist)) - xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_keep)) - xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_2msl)) - xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; - xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); -} Modified: head/sys/netinet/tcp_timer.h ============================================================================== --- head/sys/netinet/tcp_timer.h Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/tcp_timer.h Tue Mar 21 06:39:49 2017 (r315662) @@ -210,8 +210,6 @@ void tcp_timer_keep(void *xtp); void tcp_timer_persist(void *xtp); void tcp_timer_rexmt(void *xtp); void tcp_timer_delack(void *xtp); -void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, - struct xtcp_timer *xtimer); #endif /* _KERNEL */ Modified: head/sys/netinet/tcp_var.h ============================================================================== --- head/sys/netinet/tcp_var.h Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/tcp_var.h Tue Mar 21 06:39:49 2017 (r315662) @@ -39,15 +39,9 @@ #ifdef _KERNEL #include #include +#endif -/* - * Kernel variables for tcp. - */ -VNET_DECLARE(int, tcp_do_rfc1323); -#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) - -#endif /* _KERNEL */ - +#if defined(_KERNEL) || defined(_WANT_TCPCB) /* TCP segment queue entry */ struct tseg_qent { LIST_ENTRY(tseg_qent) tqe_q; @@ -83,90 +77,12 @@ struct sackhint { uint64_t _pad[1]; /* TBD */ }; -struct tcptemp { - u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ - struct tcphdr tt_t; -}; - -#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ - -/* - * TODO: We yet need to brave plowing in - * to tcp_input() and the pru_usrreq() block. - * Right now these go to the old standards which - * are somewhat ok, but in the long term may - * need to be changed. If we do tackle tcp_input() - * then we need to get rid of the tcp_do_segment() - * function below. - */ -/* Flags for tcp functions */ -#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ -struct tcpcb; -struct inpcb; -struct sockopt; -struct socket; - -/* - * If defining the optional tcp_timers, in the - * tfb_tcp_timer_stop call you must use the - * callout_async_drain() function with the - * tcp_timer_discard callback. You should check - * the return of callout_async_drain() and if 0 - * increment tt_draincnt. Since the timer sub-system - * does not know your callbacks you must provide a - * stop_all function that loops through and calls - * tcp_timer_stop() with each of your defined timers. - * Adding a tfb_tcp_handoff_ok function allows the socket - * option to change stacks to query you even if the - * connection is in a later stage. You return 0 to - * say you can take over and run your stack, you return - * non-zero (an error number) to say no you can't. - * If the function is undefined you can only change - * in the early states (before connect or listen). - * tfb_tcp_fb_fini is changed to add a flag to tell - * the old stack if the tcb is being destroyed or - * not. A one in the flag means the TCB is being - * destroyed, a zero indicates its transitioning to - * another stack (via socket option). - */ -struct tcp_function_block { - char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; - int (*tfb_tcp_output)(struct tcpcb *); - void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, - int, int, uint8_t, - int); - int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, - struct inpcb *inp, struct tcpcb *tp); - /* Optional memory allocation/free routine */ - void (*tfb_tcp_fb_init)(struct tcpcb *); - void (*tfb_tcp_fb_fini)(struct tcpcb *, int); - /* Optional timers, must define all if you define one */ - int (*tfb_tcp_timer_stop_all)(struct tcpcb *); - void (*tfb_tcp_timer_activate)(struct tcpcb *, - uint32_t, u_int); - int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); - void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); - void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); - int (*tfb_tcp_handoff_ok)(struct tcpcb *); - volatile uint32_t tfb_refcnt; - uint32_t tfb_flags; -}; - -struct tcp_function { - TAILQ_ENTRY(tcp_function) tf_next; - struct tcp_function_block *tf_fb; -}; - -TAILQ_HEAD(tcp_funchead, tcp_function); - /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. */ struct tcpcb { struct tsegqe_head t_segq; /* segment reassembly queue */ - void *t_pspare[2]; /* new reassembly queue */ int t_segqlen; /* segment reassembly queue length */ int t_dupacks; /* consecutive dup acks recd */ @@ -197,12 +113,10 @@ struct tcpcb { uint32_t snd_wnd; /* send window */ uint32_t snd_cwnd; /* congestion-controlled window */ - u_long snd_spare1; /* unused */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ - u_long snd_spare2; /* unused */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ u_int t_rcvtime; /* inactivity time */ @@ -210,9 +124,6 @@ struct tcpcb { u_int t_rtttime; /* RTT measurement start time */ tcp_seq t_rtseq; /* sequence number being timed */ - u_int t_bw_spare1; /* unused */ - tcp_seq t_bw_spare2; /* unused */ - int t_rxtcur; /* current retransmit value (ticks) */ u_int t_maxseg; /* maximum segment size */ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ @@ -276,32 +187,97 @@ struct tcpcb { u_int t_tsomaxsegcount; /* TSO maximum segment count */ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ u_int t_flags2; /* More tcpcb flags storage */ -#if defined(_KERNEL) && defined(TCP_RFC7413) - uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ - uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ -#else - uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ -#endif struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ -#if defined(_KERNEL) && defined(TCP_RFC7413) +#ifdef TCP_RFC7413 + uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ - void *t_pspare2[1]; /* 1 TCP_SIGNATURE */ -#else - void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */ #endif -#if defined(_KERNEL) && defined(TCPPCAP) +#ifdef TCPPCAP struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ -#ifdef _LP64 - uint64_t _pad[0]; /* all used! */ -#else - uint64_t _pad[2]; /* 2 are available */ -#endif /* _LP64 */ -#else - uint64_t _pad[6]; -#endif /* defined(_KERNEL) && defined(TCPPCAP) */ +#endif }; +#endif /* _KERNEL || _WANT_TCPCB */ + +#ifdef _KERNEL +/* + * Kernel variables for tcp. + */ +VNET_DECLARE(int, tcp_do_rfc1323); +#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) + +struct tcptemp { + u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ + struct tcphdr tt_t; +}; + +/* + * TODO: We yet need to brave plowing in + * to tcp_input() and the pru_usrreq() block. + * Right now these go to the old standards which + * are somewhat ok, but in the long term may + * need to be changed. If we do tackle tcp_input() + * then we need to get rid of the tcp_do_segment() + * function below. + */ +/* Flags for tcp functions */ +#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ + +/* + * If defining the optional tcp_timers, in the + * tfb_tcp_timer_stop call you must use the + * callout_async_drain() function with the + * tcp_timer_discard callback. You should check + * the return of callout_async_drain() and if 0 + * increment tt_draincnt. Since the timer sub-system + * does not know your callbacks you must provide a + * stop_all function that loops through and calls + * tcp_timer_stop() with each of your defined timers. + * Adding a tfb_tcp_handoff_ok function allows the socket + * option to change stacks to query you even if the + * connection is in a later stage. You return 0 to + * say you can take over and run your stack, you return + * non-zero (an error number) to say no you can't. + * If the function is undefined you can only change + * in the early states (before connect or listen). + * tfb_tcp_fb_fini is changed to add a flag to tell + * the old stack if the tcb is being destroyed or + * not. A one in the flag means the TCB is being + * destroyed, a zero indicates its transitioning to + * another stack (via socket option). + */ +struct tcp_function_block { + char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; + int (*tfb_tcp_output)(struct tcpcb *); + void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, + struct socket *, struct tcpcb *, + int, int, uint8_t, + int); + int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, + struct inpcb *inp, struct tcpcb *tp); + /* Optional memory allocation/free routine */ + void (*tfb_tcp_fb_init)(struct tcpcb *); + void (*tfb_tcp_fb_fini)(struct tcpcb *, int); + /* Optional timers, must define all if you define one */ + int (*tfb_tcp_timer_stop_all)(struct tcpcb *); + void (*tfb_tcp_timer_activate)(struct tcpcb *, + uint32_t, u_int); + int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); + void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); + void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); + int (*tfb_tcp_handoff_ok)(struct tcpcb *); + volatile uint32_t tfb_refcnt; + uint32_t tfb_flags; +}; + +struct tcp_function { + TAILQ_ENTRY(tcp_function) tf_next; + struct tcp_function_block *tf_fb; +}; + +TAILQ_HEAD(tcp_funchead, tcp_function); +#endif /* _KERNEL */ /* * Flags and utility macros for the t_flags field. @@ -656,26 +632,41 @@ struct tcp_hhook_data { /* * TCB structure exported to user-land via sysctl(3). + * + * Fields prefixed with "xt_" are unique to the export structure, and fields + * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'. + * + * Legend: + * (s) - used by userland utilities in src + * (p) - used by utilities in ports + * (3) - is known to be used by third party software not in ports + * (n) - no known usage + * * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) -struct xtcp_timer { - int tt_rexmt; /* retransmit timer */ - int tt_persist; /* retransmit persistence */ - int tt_keep; /* keepalive */ - int tt_2msl; /* 2*msl TIME_WAIT timer */ - int tt_delack; /* delayed ACK timer */ - int t_rcvtime; /* Time since last packet received */ -}; -struct xtcpcb { - size_t xt_len; - struct inpcb xt_inp; - struct tcpcb xt_tp; - struct xsocket xt_socket; - struct xtcp_timer xt_timer; - u_quad_t xt_alignment_hack; -}; +struct xtcpcb { + size_t xt_len; /* length of this structure */ + struct xinpcb xt_inp; + char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (n) */ + int64_t spare64[8]; + int32_t t_state; /* (s,p) */ + uint32_t t_flags; /* (s,p) */ + int32_t t_sndzerowin; /* (s) */ + int32_t t_sndrexmitpack; /* (s) */ + int32_t t_rcvoopack; /* (s) */ + int32_t t_rcvtime; /* (s) */ + int32_t tt_rexmt; /* (s) */ + int32_t tt_persist; /* (s) */ + int32_t tt_keep; /* (s) */ + int32_t tt_2msl; /* (s) */ + int32_t tt_delack; /* (s) */ + int32_t spare32[32]; +} __aligned(8); +#ifdef _KERNEL +void tcp_inptoxtp(const struct inpcb *, struct xtcpcb *); +#endif #endif /* Modified: head/sys/netinet/udp_usrreq.c ============================================================================== --- head/sys/netinet/udp_usrreq.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/netinet/udp_usrreq.c Tue Mar 21 06:39:49 2017 (r315662) @@ -905,13 +905,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); - xi.xi_inp.inp_gencnt = inp->inp_gencnt; + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else Modified: head/sys/sys/param.h ============================================================================== --- head/sys/sys/param.h Tue Mar 21 05:15:10 2017 (r315661) +++ head/sys/sys/param.h Tue Mar 21 06:39:49 2017 (r315662) @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1200025 /* Master, propagated to newvers */ +#define __FreeBSD_version 1200026 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, Modified: head/usr.bin/netstat/inet.c ============================================================================== --- head/usr.bin/netstat/inet.c Tue Mar 21 05:15:10 2017 (r315661) +++ head/usr.bin/netstat/inet.c Tue Mar 21 06:39:49 2017 (r315662) @@ -91,7 +91,7 @@ static int udp_done, tcp_done, sdp_done; #endif /* INET6 */ static int -pcblist_sysctl(int proto, const char *name, char **bufp, int istcp __unused) +pcblist_sysctl(int proto, const char *name, char **bufp) { const char *mibvar; char *buf; @@ -181,120 +181,6 @@ sotoxsocket(struct socket *so, struct xs return (0); } -static int -pcblist_kvm(u_long off, char **bufp, int istcp) -{ - struct inpcbinfo pcbinfo; - struct inpcbhead listhead; - struct inpcb *inp; - struct xinpcb xi; - struct xinpgen xig; - struct xtcpcb xt; - struct socket so; - struct xsocket *xso; - char *buf, *p; - size_t len; - - if (off == 0) - return (0); - kread(off, &pcbinfo, sizeof(pcbinfo)); - if (istcp) - len = 2 * sizeof(xig) + - (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * - sizeof(struct xtcpcb); - else - len = 2 * sizeof(xig) + - (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * - sizeof(struct xinpcb); - if ((buf = malloc(len)) == NULL) { - xo_warnx("malloc %lu bytes", (u_long)len); - return (0); - } - p = buf; - -#define COPYOUT(obj, size) do { \ - if (len < (size)) { \ - xo_warnx("buffer size exceeded"); \ - goto fail; \ - } \ - bcopy((obj), p, (size)); \ - len -= (size); \ - p += (size); \ -} while (0) - -#define KREAD(off, buf, len) do { \ - if (kread((uintptr_t)(off), (buf), (len)) != 0) \ - goto fail; \ -} while (0) - - /* Write out header. */ - xig.xig_len = sizeof xig; - xig.xig_count = pcbinfo.ipi_count; - xig.xig_gen = pcbinfo.ipi_gencnt; - xig.xig_sogen = 0; - COPYOUT(&xig, sizeof xig); - - /* Walk the PCB list. */ - xt.xt_len = sizeof xt; - xi.xi_len = sizeof xi; - if (istcp) - xso = &xt.xt_socket; - else - xso = &xi.xi_socket; - KREAD(pcbinfo.ipi_listhead, &listhead, sizeof(listhead)); - LIST_FOREACH(inp, &listhead, inp_list) { - if (istcp) { - KREAD(inp, &xt.xt_inp, sizeof(*inp)); - inp = &xt.xt_inp; - } else { - KREAD(inp, &xi.xi_inp, sizeof(*inp)); - inp = &xi.xi_inp; - } - - if (inp->inp_gencnt > pcbinfo.ipi_gencnt) - continue; - - if (istcp) { - if (inp->inp_ppcb == NULL) - bzero(&xt.xt_tp, sizeof xt.xt_tp); - else if (inp->inp_flags & INP_TIMEWAIT) { - bzero(&xt.xt_tp, sizeof xt.xt_tp); - xt.xt_tp.t_state = TCPS_TIME_WAIT; - } else - KREAD(inp->inp_ppcb, &xt.xt_tp, - sizeof xt.xt_tp); - } - if (inp->inp_socket) { - KREAD(inp->inp_socket, &so, sizeof(so)); - if (sotoxsocket(&so, xso) != 0) - goto fail; - } else { - bzero(xso, sizeof(*xso)); - if (istcp) - xso->xso_protocol = IPPROTO_TCP; - } - if (istcp) - COPYOUT(&xt, sizeof xt); - else - COPYOUT(&xi, sizeof xi); - } - - /* Reread the pcbinfo and write out the footer. */ - kread(off, &pcbinfo, sizeof(pcbinfo)); - xig.xig_count = pcbinfo.ipi_count; - xig.xig_gen = pcbinfo.ipi_gencnt; - COPYOUT(&xig, sizeof xig); - - *bufp = buf; - return (1); - -fail: - free(buf); - return (0); -#undef COPYOUT -#undef KREAD -} - /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. @@ -304,15 +190,14 @@ fail: void protopr(u_long off, const char *name, int af1, int proto) { - int istcp; static int first = 1; + int istcp; char *buf; const char *vchar; - struct tcpcb *tp = NULL; - struct inpcb *inp; + struct xtcpcb *tp; + struct xinpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; - struct xtcp_timer *timer; istcp = 0; switch (proto) { @@ -341,28 +226,21 @@ protopr(u_long off, const char *name, in #endif break; } - if (live) { - if (!pcblist_sysctl(proto, name, &buf, istcp)) - return; - } else { - if (!pcblist_kvm(off, &buf, istcp)) - return; - } + + if (!pcblist_sysctl(proto, name, &buf)) + return; oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { - timer = &((struct xtcpcb *)xig)->xt_timer; - tp = &((struct xtcpcb *)xig)->xt_tp; - inp = &((struct xtcpcb *)xig)->xt_inp; - so = &((struct xtcpcb *)xig)->xt_socket; + tp = (struct xtcpcb *)xig; + inp = &tp->xt_inp; } else { - inp = &((struct xinpcb *)xig)->xi_inp; - so = &((struct xinpcb *)xig)->xi_socket; - timer = NULL; + inp = (struct xinpcb *)xig; } + so = &inp->xi_socket; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***