Date: Sat, 6 Nov 2010 11:51:25 GMT From: Aman Jassal <aman@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 185431 for review Message-ID: <201011061151.oA6BpP5P006453@skunkworks.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@185431?ac=10 Change 185431 by aman@src on 2010/11/06 11:50:50 After quite some time, adding some sysctl support for the routing tables in libnetstat. The support is still in somewhat experimental state though : it cannot export masks associated to a subnet because of kernel page faults and the size of struct rttable_perfib_header should be reviewed as well to make sure it is a power of 2. Submitted by: aman Reviewed by: pgj Affected files ... .. //depot/projects/soc2009/pgj_libstat/src/lib/libnetstat/netstat_internal.h#69 edit .. //depot/projects/soc2009/pgj_libstat/src/lib/libnetstat/netstat_route.c#9 edit .. //depot/projects/soc2009/pgj_libstat/src/sys/net/route.h#6 edit .. //depot/projects/soc2009/pgj_libstat/src/sys/net/rtsock.c#7 edit .. //depot/projects/soc2009/pgj_libstat/src/usr.bin/netstat/main.c#50 edit Differences ... ==== //depot/projects/soc2009/pgj_libstat/src/lib/libnetstat/netstat_internal.h#69 (text+ko) ==== @@ -682,6 +682,8 @@ struct netisr_work *_netstat_nw_allocate(struct netisr_work_list *); struct routeaddr_type *extract_address(void *, void *, int); +struct routeaddr_type *extract_saddress(void *, void *, int); + const char *resolve_val2str_name(int, const struct val2str *); /* XXX: merge these into a common address resolution routine. */ const char *routename(in_addr_t in, int numeric); ==== //depot/projects/soc2009/pgj_libstat/src/lib/libnetstat/netstat_route.c#9 (text+ko) ==== @@ -33,10 +33,12 @@ #include <net/if.h> #include <net/if_dl.h> #include <net/if_var.h> +#include <net/route.h> #include <netinet/in.h> #include <arpa/inet.h> #include <err.h> +#include <errno.h> #include <kvm.h> #include <nlist.h> #include <stdio.h> @@ -64,6 +66,7 @@ static void process_tree(kvm_t *, struct route_type_list *, struct radix_node *, int, int); static void extract_rtentry_data(struct rtentry *, struct route_type *); +static void extract_srtentry_data(struct rttable_perfib_data *, struct route_type *, int); static void extract_node(struct radix_node *, struct routenode_type *, int); int @@ -307,11 +310,76 @@ int route_tree_sysctl(struct route_type_list *list, __unused int fib, - __unused int domain, __unused int flags) + int domain, __unused int flags) { - /* XXX: unsupported */ - list->rtl_error = NETSTAT_ERROR_UNSUPPORTED; - return (-1); + /* + * AJ : Adding sysctl routing tables support, after + * quite some time... + */ + char *mibvar_rttables; + char *buffer_rtdata, *p; + size_t rtdata_len; + struct rttable_stream_header *rshp; + struct rttable_perfib_header *rphp; + struct rttable_perfib_data *rpdp; + struct route_type *rtp; + uint32_t i, j; + + if (domain == AF_INET) + mibvar_rttables = "net.route.inet_dump"; + else if (domain == AF_INET6) + mibvar_rttables = "net.route.inet6_dump"; + else + mibvar_rttables = NULL; + + if (sysctlbyname(mibvar_rttables, 0, &rtdata_len, 0, 0) < 0) { + if (errno != ENOENT) + warn("sysctl: mibvar estimate"); + goto end; + } + if ((buffer_rtdata = malloc(rtdata_len)) == 0) { + warnx("malloc %lu bytes", (u_long)rtdata_len); + goto end; + } + if (sysctlbyname(mibvar_rttables, buffer_rtdata, &rtdata_len, 0, 0) < 0) { + warn("sysctl: mibvar retrieval"); + goto out_rtdata; + } + if (rtdata_len < sizeof(*rshp)) { + list->rtl_error = NETSTAT_ERROR_VERSION; + goto out_rtdata; + } + + p = buffer_rtdata; + rshp = (struct rttable_stream_header *)p; + p += sizeof(*rshp); + + if (rshp->rsh_version != RTTABLE_STREAM_VERSION) { + list->rtl_error = NETSTAT_ERROR_VERSION; + goto out_rtdata; + } + + for (i = 0; i < rshp->rsh_count; i++) { + /* + * Decapsulate per-fib header. + */ + rphp = (struct rttable_perfib_header *)p; + p += sizeof(*rphp); + + for (j = 0; j < rphp->rph_count; j++) { + rpdp = (struct rttable_perfib_data *)p; + p += sizeof(*rpdp); + + rtp = _netstat_rt_allocate(list); + extract_srtentry_data(rpdp, rtp, domain); + } + + } + +out_rtdata: + free(buffer_rtdata); +end: + return (0); } #define CNV_FLAG(X, Y) \ @@ -363,10 +431,64 @@ } } rtp->rt_mtu = rte->rt_rmx.rmx_mtu; - rtp->rt_fib = rte->rt_fibnum; +} +#undef CNV_FLAG + +#define CNV_FLAG(X, Y) \ + if (rpdp->rpd_flags & (X)) \ + rpdp->rpd_flags |= (Y) + +void +extract_srtentry_data(struct rttable_perfib_data *rpdp, struct route_type *rtp, int domain) +{ + time_t expire_time; + struct timespec uptime; + struct sockaddr_storage *sa, *mk; + + CNV_FLAG(RTF_UP, NETSTAT_RT_UP); + CNV_FLAG(RTF_GATEWAY, NETSTAT_RT_GATEWAY); + CNV_FLAG(RTF_HOST, NETSTAT_RT_HOST); + CNV_FLAG(RTF_REJECT, NETSTAT_RT_REJECT); + CNV_FLAG(RTF_DYNAMIC, NETSTAT_RT_DYNAMIC); + CNV_FLAG(RTF_MODIFIED, NETSTAT_RT_MODIFIED); + CNV_FLAG(RTF_DONE, NETSTAT_RT_DONE); + CNV_FLAG(RTF_XRESOLVE, NETSTAT_RT_XRESOLVE); + CNV_FLAG(RTF_LLINFO, NETSTAT_RT_LLINFO); + CNV_FLAG(RTF_STATIC, NETSTAT_RT_STATIC); + CNV_FLAG(RTF_PROTO1, NETSTAT_RT_PROTO1); + CNV_FLAG(RTF_PROTO2, NETSTAT_RT_PROTO2); + CNV_FLAG(RTF_PROTO3, NETSTAT_RT_PROTO3); + CNV_FLAG(RTF_BLACKHOLE, NETSTAT_RT_BLACKHOLE); + CNV_FLAG(RTF_BROADCAST, NETSTAT_RT_BROADCAST); + + rtp->rt_fib = rpdp->rpd_fib; + rtp->rt_family = (rpdp->rpd_dst).ss_family; + sa = &rpdp->rpd_dst; + mk = &rpdp->rpd_mask; + rtp->rt_destination = extract_saddress(sa, mk, rpdp->rpd_flags); + sa = &rpdp->rpd_gw; + rtp->rt_gateway = extract_saddress(sa, NULL, RTF_HOST); + rtp->rt_refs = rpdp->rpd_refs; + rtp->rt_used = rpdp->rpd_used; + if (rpdp->rpd_interface_name != NULL) + rtp->rt_interface = strdup(rpdp->rpd_interface_name); + else + rtp->rt_interface = strdup("---"); + if (rpdp->rpd_expire > 0) { + if (clock_gettime(CLOCK_UPTIME, &uptime) < 0) { + warn("netstat_route: clock_gettime() failed"); + } + expire_time = rpdp->rpd_expire - uptime.tv_sec; + if (expire_time > 0) { + rtp->rt_flags |= NETSTAT_RT_EXPIRES; + rtp->rt_expire = expire_time; + } + } + rtp->rt_mtu = rpdp->rpd_mtu; } #undef CNV_FLAG + struct routeaddr_type * extract_address(void *saddr, void *maddr, int flags) { @@ -523,6 +645,163 @@ return (rap); } + +struct routeaddr_type * +extract_saddress(void *saddr, void *maddr, int flags) +{ + struct routeaddr_type *rap; + struct sockaddr *sa, *mask; + struct sockaddr_in *sa_in, *mk_in; +#ifdef INET6 + struct sockaddr_in6 *sa_in6, *mk_in6; + struct in6_addr *in6; +#endif + struct sockaddr_dl *sa_dl; + char *cp, *cq, *cqlim, *p; + int n; + char workbuf[128]; + u_char *s, *slim; + + if (saddr == NULL) + return (NULL); + + sa = (struct sockaddr *)saddr; + mask = (struct sockaddr *)maddr; + sa_in = (struct sockaddr_in *)sa; + mk_in = (struct sockaddr_in *)mask; +#ifdef INET6 + sa_in6 = (struct sockaddr_in6 *)sa; + in6 = &sa_in6->sin6_addr; + mk_in6 = (struct sockaddr_in6 *)mask; +#endif + sa_dl = (struct sockaddr_dl *)sa; + + rap = _netstat_rat_allocate(sa->sa_family, sa, sizeof(struct sockaddr)); + if (rap == NULL) + return (NULL); + + switch (sa->sa_family) { + case PF_INET: + if ((sa_in->sin_addr.s_addr == INADDR_ANY) && + (mask != NULL) && + (ntohl(mk_in->sin_addr.s_addr) == 0L)) { + rap->rat_address = strdup("default"); + strlcpy(rap->rat_ni_address, "default", + sizeof(rap->rat_ni_address)); + } else if (flags & RTF_HOST) { + inet_ntop(PF_INET, &sa_in->sin_addr, + rap->rat_ni_address, sizeof(rap->rat_ni_address)); + rap->rat_address = + strdup(routename(sa_in->sin_addr.s_addr, 0)); + } else if (mask != NULL) { + strlcpy(rap->rat_ni_address, + netname(sa_in->sin_addr.s_addr, + ntohl(mk_in->sin_addr.s_addr), 1), + sizeof(rap->rat_ni_address)); + rap->rat_address = + strdup(netname(sa_in->sin_addr.s_addr, + ntohl(mk_in->sin_addr.s_addr), 0)); + } else { + rap->rat_address = + strdup(netname(sa_in->sin_addr.s_addr, + 0L, 0)); + strlcpy(rap->rat_ni_address, rap->rat_address, + sizeof(rap->rat_ni_address)); + } + rap->rat_data = malloc(sizeof(struct sockaddr_in)); + if (rap->rat_data != NULL) { + rap->rat_data_len = sizeof(struct sockaddr_in); + memcpy(rap->rat_data, sa_in, rap->rat_data_len); + } + break; +#ifdef INET6 + case PF_INET6: + /* + * XXX: This is a special workaround for KAME kernels. + * sin6_scope_id field of SA should be set in the future. + */ + if (IN6_IS_ADDR_LINKLOCAL(in6) || + IN6_IS_ADDR_MC_LINKLOCAL(in6)) { + sa_in6->sin6_scope_id = + (u_int32_t)ntohs(*(u_short *)&in6->s6_addr[2]); + *(u_short *)&in6->s6_addr[2] = 0; + } + if (flags & RTF_HOST) { + rap->rat_address = strdup(routename6(sa_in6, 0)); + strlcpy(rap->rat_ni_address, routename6(sa_in6, 1), + sizeof(rap->rat_ni_address)); + } + else if (mask != NULL) { + rap->rat_address = strdup(netname6(sa_in6, + &mk_in6->sin6_addr, 0)); + strlcpy(rap->rat_ni_address, + netname6(sa_in6, &mk_in6->sin6_addr, 1), + sizeof(rap->rat_ni_address)); + } else { + rap->rat_address = strdup(netname6(sa_in6, 0L, 0)); + strlcpy(rap->rat_ni_address, rap->rat_address, + sizeof(rap->rat_ni_address)); + } + rap->rat_data = malloc(sizeof(struct sockaddr_in6)); + if (rap->rat_data != NULL) { + rap->rat_data_len = sizeof(struct sockaddr_in6); + memcpy(rap->rat_data, sa_in6, rap->rat_data_len); + } + break; +#endif + case PF_IPX: + break; + case PF_APPLETALK: + break; + case PF_NETGRAPH: + break; + case PF_LINK: + if (sa_dl->sdl_nlen == 0 && sa_dl->sdl_alen == 0 && + sa_dl->sdl_slen == 0) { + sprintf(rap->rat_ni_address, "<Link#%d>", + sa_dl->sdl_index); + } else { + cp = (char *)LLADDR(sa_dl); + n = sa_dl->sdl_alen; + p = rap->rat_address; + while (--n >= 0) { + sprintf(p, "%02x%s", *cp++ & 0xff, + n > 0 ? ":" : ""); + p += 3; + } + } + rap->rat_address = strdup(rap->rat_ni_address); + rap->rat_data = malloc(sizeof(struct sockaddr_dl)); + if (rap->rat_data != NULL) { + rap->rat_data_len = sizeof(struct sockaddr_dl); + memcpy(rap->rat_data, sa_dl, rap->rat_data_len); + } + break; + default: + s = (u_char *)sa->sa_data; + cq = workbuf; + slim = sa->sa_len + (u_char *)sa; + cqlim = cq + sizeof(workbuf) - 6; + cq += sprintf(cq, "(%d)", sa->sa_family); + while (s < slim && cq < cqlim) { + cq += sprintf(cq, " %02x", *s++); + if (s < slim) + cq += sprintf(cq, "%02x", *s++); + } + rap->rat_address = strdup(workbuf); + strlcpy(rap->rat_ni_address, workbuf, + sizeof(rap->rat_ni_address)); + rap->rat_data = malloc(sizeof(struct sockaddr)); + if (rap->rat_data != NULL) { + rap->rat_data_len = sizeof(struct sockaddr); + memcpy(rap->rat_data, sa, rap->rat_data_len); + } + break; + } + + return (rap); +} + void extract_node(struct radix_node *rn, struct routenode_type *rnp, int mkcnt) { ==== //depot/projects/soc2009/pgj_libstat/src/sys/net/route.h#6 (text+ko) ==== @@ -309,6 +309,46 @@ }; /* + * Statistics structures to be used by user space monitoring tools. + */ +#define RID_MAX_NAME 16 +#define RTTABLE_STREAM_VERSION 0x00000001 + +struct rttable_stream_header { + uint32_t rsh_version; /* Stream format version */ + uint32_t rsh_count; /* Number of fibs */ + uint32_t rsh_family; /* Address family */ + uint32_t _rsh_pad; /* Padding/Reserved field for future use. */ +}; + +struct rttable_perfib_header { + uint32_t rph_fib; /* Fib instance */ + uint32_t rph_count; /* Number of nodes for the given fib */ + uint32_t _rph_pad[2]; /* Padding/Reserved field for future use. */ +}; + +struct rttable_perfib_data { + /* + * Struct sockaddr_storage to store the sockets' information. + */ + struct sockaddr_storage rpd_dst; // Destination subnet + struct sockaddr_storage rpd_mask; // Destination subnet's netmask + struct sockaddr_storage rpd_gw; // Gateway + + /* + * Remaining struct rtentry fields + */ + uint32_t rpd_fib; + uint32_t rpd_flags; + uint64_t rpd_refs; + uint64_t rpd_used; + char rpd_interface_name[RID_MAX_NAME]; + uint64_t rpd_expire; + uint32_t rpd_mtu; + uint8_t _rpid_pad[16]; +}; + +/* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). ==== //depot/projects/soc2009/pgj_libstat/src/sys/net/rtsock.c#7 (text+ko) ==== @@ -46,6 +46,7 @@ #include <sys/proc.h> #include <sys/protosw.h> #include <sys/rwlock.h> +#include <sys/sbuf.h> #include <sys/signalvar.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -1666,6 +1667,211 @@ SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); +static int +sysctl_export_rttable(SYSCTL_HANDLER_ARGS) +{ + int af, buflen, error, fib_instance, rt_entry_counter_total, i; + int rt_entry_counter[RT_NUMFIBS]; + char *buffer; + struct sbuf sbuf; + struct rttable_stream_header rsh; + struct rttable_perfib_header rph; + struct rttable_perfib_data rpd; + struct radix_node_head *rt_table = NULL; + struct radix_node *base, *next; + register struct radix_node *rn; + struct rtentry *rtp; + + for (i = 0; i < RT_NUMFIBS; i++) + rt_entry_counter[i] = 0; + rt_entry_counter_total = 0; + error = 0; + buflen = 0; + + switch ((intptr_t)arg1) { + case AF_INET: + af = AF_INET; + break; + case AF_INET6: + af = AF_INET6; + break; + default: + error = EAFNOSUPPORT; + return(0); + } + + for (fib_instance = 0; fib_instance < RT_NUMFIBS; fib_instance++) { + + rt_table = rt_tables_get_rnh(fib_instance, af); + + RADIX_NODE_HEAD_RLOCK(rt_table); + rn = rt_table->rnh_treetop; + + while (rn->rn_bit >= 0) + rn = rn->rn_left; + for (;;) { + base = rn; + /* If at right child go back up, otherwise, go right */ + while (rn->rn_parent->rn_right == rn + && (rn->rn_flags & RNF_ROOT) == 0) + rn = rn->rn_parent; + + /* Find the next *leaf* since next node might vanish, too */ + for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) + rn = rn->rn_left; + next = rn; + + /* Process leaves */ + while ((rn = base)) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT)) { + rt_entry_counter[fib_instance]++; + rt_entry_counter_total++; + } + } + + rn = next; + if (rn->rn_flags & RNF_ROOT) + break; + } + + RADIX_NODE_HEAD_RUNLOCK(rt_table); + } + + buflen = sizeof(rsh) + RT_NUMFIBS * sizeof(rph) + rt_entry_counter_total * sizeof(rpd) + 1; + buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); + + sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN); + + /* + * Insert stream header. + */ + bzero(&rsh, sizeof(rsh)); + rsh.rsh_version = RTTABLE_STREAM_VERSION; + rsh.rsh_count = RT_NUMFIBS; + rsh.rsh_family = af; + + if (sbuf_bcat(&sbuf, &rsh, sizeof(rsh)) < 0) { + error = ENOMEM; + goto out; + } + + for (fib_instance = 0; fib_instance < RT_NUMFIBS; fib_instance++) { + + i = 0; + rt_table = rt_tables_get_rnh(fib_instance, af); + + /* + * Insert headers per-fib. + * The second header indicates the number of routing + * entries in the routing table, the counting was performed + * earlier in rt_entry_counter. + */ + bzero(&rph, sizeof(rph)); + rph.rph_fib = fib_instance; + rph.rph_count = rt_entry_counter[fib_instance]; + + if (sbuf_bcat(&sbuf, &rph, sizeof(rph)) < 0) { + error = ENOMEM; + goto out; + } + + /* + * AJ : The radix tree is protected by a rwlock. + * Since we only need to go through the + * radix tree and read all the nodes, holding + * the rwlock in read mode should be enough. + * + * Here, we fill all the rttable_perfib_data + * structures and build up the data stream. + * If we reach the maximum number of entries + * initially allocated, we stop processing the + * radix tree and go on to the next one. + * The aim here is to make a quick snapshot of + * the routing table. + */ + RADIX_NODE_HEAD_RLOCK(rt_table); + rn = rt_table->rnh_treetop; + + while (rn->rn_bit >= 0) + rn = rn->rn_left; + + for (;;) { + base = rn; + /* If at right child go back up, otherwise, go right */ + while (rn->rn_parent->rn_right == rn + && (rn->rn_flags & RNF_ROOT) == 0) + rn = rn->rn_parent; + + /* Find the next *leaf* since next node might vanish, too */ + for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) + rn = rn->rn_left; + next = rn; + + /* Process leaves */ + while ((rn = base)) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT)) { + rtp = (struct rtentry *)rn; + + rpd.rpd_fib = rtp->rt_fibnum; + rpd.rpd_flags = rtp->rt_flags; + rpd.rpd_refs = rtp->rt_refcnt; + rpd.rpd_used = rtp->rt_rmx.rmx_pksent; + bcopy(rtp->rt_ifp->if_xname, rpd.rpd_interface_name, RID_MAX_NAME); + rpd.rpd_expire = rtp->rt_rmx.rmx_expire; + rpd.rpd_mtu = rtp->rt_rmx.rmx_mtu; + + rpd.rpd_dst = *((struct sockaddr_storage *)rt_key(rtp)); + rpd.rpd_gw = *((struct sockaddr_storage *)rtp->rt_gateway); + + /* + * AJ : For some reason, calling rt_mask() ALWAYS ends up + * in a kernel page fault, this is seriously irritating + * because without the netmask, handling the subnet + * is meaningless. + */ + //rpd.rpd_mask = *((struct sockaddr_storage *)rt_mask(rtp)); + + if (sbuf_bcat(&sbuf, &rpd, sizeof(rpd)) < 0) { + RADIX_NODE_HEAD_RUNLOCK(rt_table); + error = ENOMEM; + goto out; + } + + if (af == AF_INET && i < rph.rph_count) + i++; + else if (af == AF_INET6 && i < rph.rph_count) + i++; + else + goto rt_out; + } + } + + rn = next; + if (rn->rn_flags & RNF_ROOT) + break; + } +rt_out: + RADIX_NODE_HEAD_RUNLOCK(rt_table); + } + + sbuf_finish(&sbuf); + error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); +out: + sbuf_delete(&sbuf); + free(buffer, M_TEMP); + return(error); +} + +SYSCTL_PROC(_net_route, OID_AUTO, inet_dump, CTLFLAG_RD|CTLTYPE_STRUCT, + (caddr_t)(long)AF_INET, 0, sysctl_export_rttable, "s, struct rttable_perfib_header", + "Inet Routing Tables (stream)"); + +SYSCTL_PROC(_net_route, OID_AUTO, inet6_dump, CTLFLAG_RD|CTLTYPE_STRUCT, + (caddr_t)(long)AF_INET6, 0, sysctl_export_rttable, "s, struct rttable_perfib_header", + "Inet6 Routing Tables (stream)"); + /* * Definitions of protocols supported in the ROUTE domain. */ ==== //depot/projects/soc2009/pgj_libstat/src/usr.bin/netstat/main.c#50 (text+ko) ==== @@ -477,7 +477,8 @@ netstat_session_free(session); } else { - kread(0, NULL, 0); + if (!live) + kread(0, NULL, 0); session = netstat_session_new(kvmd); routepr(session, af); netstat_session_free(session);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201011061151.oA6BpP5P006453>