From owner-svn-src-user@FreeBSD.ORG Tue Apr 14 04:43:26 2009 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 09A611065670; Tue, 14 Apr 2009 04:43:26 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id EC5A28FC0C; Tue, 14 Apr 2009 04:43:25 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n3E4hPxq094361; Tue, 14 Apr 2009 04:43:25 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n3E4hPxw094359; Tue, 14 Apr 2009 04:43:25 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200904140443.n3E4hPxw094359@svn.freebsd.org> From: Kip Macy Date: Tue, 14 Apr 2009 04:43:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r191040 - in user/kmacy/releng_7_net_backport/sys: netinet sys X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Apr 2009 04:43:26 -0000 Author: kmacy Date: Tue Apr 14 04:43:25 2009 New Revision: 191040 URL: http://svn.freebsd.org/changeset/base/191040 Log: update netinet with igmp changes Modified: user/kmacy/releng_7_net_backport/sys/netinet/igmp.c user/kmacy/releng_7_net_backport/sys/netinet/igmp.h user/kmacy/releng_7_net_backport/sys/netinet/igmp_var.h user/kmacy/releng_7_net_backport/sys/netinet/in.c user/kmacy/releng_7_net_backport/sys/netinet/in_mcast.c user/kmacy/releng_7_net_backport/sys/netinet/in_proto.c user/kmacy/releng_7_net_backport/sys/netinet/in_var.h user/kmacy/releng_7_net_backport/sys/netinet/ip_input.c user/kmacy/releng_7_net_backport/sys/netinet/ip_var.h user/kmacy/releng_7_net_backport/sys/netinet/udp_usrreq.c user/kmacy/releng_7_net_backport/sys/netinet/udp_var.h user/kmacy/releng_7_net_backport/sys/netinet/vinet.h user/kmacy/releng_7_net_backport/sys/sys/mbuf.h user/kmacy/releng_7_net_backport/sys/sys/tree.h Modified: user/kmacy/releng_7_net_backport/sys/netinet/igmp.c ============================================================================== --- user/kmacy/releng_7_net_backport/sys/netinet/igmp.c Tue Apr 14 04:15:56 2009 (r191039) +++ user/kmacy/releng_7_net_backport/sys/netinet/igmp.c Tue Apr 14 04:43:25 2009 (r191040) @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2007-2009 Bruce Simpson. * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. @@ -35,11 +36,13 @@ /* * Internet Group Management Protocol (IGMP) routines. + * [RFC1112, RFC2236, RFC3376] * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. * Modified by Bill Fenner, Xerox PARC, Feb 1995. * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. + * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. * * MULTICAST Revision: 3.5.1.4 */ @@ -52,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -59,8 +63,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include +#include #include #include @@ -78,464 +85,3626 @@ __FBSDID("$FreeBSD$"); #include -static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); +#ifndef KTR_IGMPV3 +#define KTR_IGMPV3 KTR_SUBSYS +#endif + +static struct igmp_ifinfo * + igi_alloc_locked(struct ifnet *); +static void igi_delete_locked(const struct ifnet *); +static void igmp_dispatch_queue(struct ifqueue *, int, const int); +static void igmp_fasttimo_vnet(void); +static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *); +static int igmp_handle_state_change(struct in_multi *, + struct igmp_ifinfo *); +static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *); +static int igmp_input_v1_query(struct ifnet *, const struct ip *); +static int igmp_input_v2_query(struct ifnet *, const struct ip *, + const struct igmp *); +static int igmp_input_v3_query(struct ifnet *, const struct ip *, + /*const*/ struct igmpv3 *); +static int igmp_input_v3_group_query(struct in_multi *, + struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *); +static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, + /*const*/ struct igmp *); +static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, + /*const*/ struct igmp *); +static void igmp_intr(struct mbuf *); +static int igmp_isgroupreported(const struct in_addr); +static struct mbuf * + igmp_ra_alloc(void); +#ifdef KTR +static char * igmp_rec_type_to_str(const int); +#endif +static void igmp_set_version(struct igmp_ifinfo *, const int); +static void igmp_slowtimo_vnet(void); +static void igmp_sysinit(void); +static int igmp_v1v2_queue_report(struct in_multi *, const int); +static void igmp_v1v2_process_group_timer(struct in_multi *, const int); +static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); +static void igmp_v2_update_group(struct in_multi *, const int); +static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); +static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *); +static struct mbuf * + igmp_v3_encap_report(struct ifnet *, struct mbuf *); +static int igmp_v3_enqueue_group_record(struct ifqueue *, + struct in_multi *, const int, const int, const int); +static int igmp_v3_enqueue_filter_change(struct ifqueue *, + struct in_multi *); +static void igmp_v3_process_group_timers(struct igmp_ifinfo *, + struct ifqueue *, struct ifqueue *, struct in_multi *, + const int); +static int igmp_v3_merge_state_changes(struct in_multi *, + struct ifqueue *); +static void igmp_v3_suppress_group_record(struct in_multi *); +static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS); +static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS); +static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); + +#ifdef VIMAGE +static vnet_attach_fn vnet_igmp_iattach; +static vnet_detach_fn vnet_igmp_idetach; +#else +static int vnet_igmp_iattach(const void *); +static int vnet_igmp_idetach(const void *); +#endif /* VIMAGE */ + +/* + * System-wide globals. + * + * Unlocked access to these is OK, except for the global IGMP output + * queue. The IGMP subsystem lock ends up being system-wide for the moment, + * because all VIMAGEs have to share a global output queue, as netisrs + * themselves are not virtualized. + * + * Locking: + * * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. + * Any may be taken independently; if any are held at the same + * time, the above lock order must be followed. + * * All output is delegated to the netisr to handle IFF_NEEDSGIANT. + * Most of the time, direct dispatch will be fine. + * * IN_MULTI_LOCK covers in_multi. + * * IGMP_LOCK covers igmp_ifinfo and any global variables in this file, + * including the output queue. + * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of + * per-link state iterators. + * * igmp_ifinfo is valid as long as PF_INET is attached to the interface, + * therefore it is not refcounted. + * We allow unlocked reads of igmp_ifinfo when accessed via in_multi. + * + * Reference counting + * * IGMP acquires its own reference every time an in_multi is passed to + * it and the group is being joined for the first time. + * * IGMP releases its reference(s) on in_multi in a deferred way, + * because the operations which process the release run as part of + * a loop whose control variables are directly affected by the release + * (that, and not recursing on the IF_ADDR_LOCK). + * + * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds + * to a vnet in ifp->if_vnet. + * + * SMPng: XXX We may potentially race operations on ifma_protospec. + * The problem is that we currently lack a clean way of taking the + * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing, + * as anything which modifies ifma needs to be covered by that lock. + * So check for ifma_protospec being NULL before proceeding. + */ +struct mtx igmp_mtx; +int mpsafe_igmp = 0; +SYSCTL_INT(_debug, OID_AUTO, mpsafe_igmp, CTLFLAG_RDTUN, &mpsafe_igmp, 0, + "Enable SMP-safe IGMPv3"); + +struct mbuf *m_raopt; /* Router Alert option */ +MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); + +/* + * Global netisr output queue. + * This is only used as a last resort if we cannot directly dispatch. + * As IN_MULTI_LOCK is no longer in the bottom half of IP, we can do + * this, providing mpsafe_igmp is set. If it is not, we take Giant, + * and queueing is forced. + */ +struct ifqueue igmpoq; + +/* + * VIMAGE-wide globals. + * + * The IGMPv3 timers themselves need to run per-image, however, + * protosw timers run globally (see tcp). + * An ifnet can only be in one vimage at a time, and the loopback + * ifnet, loif, is itself virtualized. + * It would otherwise be possible to seriously hose IGMP state, + * and create inconsistencies in upstream multicast routing, if you have + * multiple VIMAGEs running on the same link joining different multicast + * groups, UNLESS the "primary IP address" is different. This is because + * IGMP for IPv4 does not force link-local addresses to be used for each + * node, unlike MLD for IPv6. + * Obviously the IGMPv3 per-interface state has per-vimage granularity + * also as a result. + * + * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection + * policy to control the address used by IGMP on the link. + */ +#ifdef VIMAGE_GLOBALS +int interface_timers_running; /* IGMPv3 general query response */ +int state_change_timers_running; /* IGMPv3 state-change retransmit */ +int current_state_timers_running; /* IGMPv1/v2 host report; + * IGMPv3 g/sg query response */ + +LIST_HEAD(, igmp_ifinfo) igi_head; +struct igmpstat igmpstat; +struct timeval igmp_gsrdelay; + +int igmp_recvifkludge; +int igmp_sendra; +int igmp_sendlocal; +int igmp_v1enable; +int igmp_v2enable; +int igmp_legacysupp; +int igmp_default_version; +#endif /* VIMAGE_GLOBALS */ + +/* + * Virtualized sysctls. + */ +SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats, + CTLFLAG_RW, igmpstat, igmpstat, ""); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge, + CTLFLAG_RW, igmp_recvifkludge, 0, + "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra, + CTLFLAG_RW, igmp_sendra, 0, + "Send IP Router Alert option in IGMPv2/v3 messages"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal, + CTLFLAG_RW, igmp_sendlocal, 0, + "Send IGMP membership reports for 224.0.0.0/24 groups"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable, + CTLFLAG_RW, igmp_v1enable, 0, + "Enable backwards compatibility with IGMPv1"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v2enable, + CTLFLAG_RW, igmp_v2enable, 0, + "Enable backwards compatibility with IGMPv2"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp, + CTLFLAG_RW, igmp_legacysupp, 0, + "Allow v1/v2 reports to suppress v3 group responses"); +SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version, + CTLTYPE_INT | CTLFLAG_RW , igmp_default_version, 0, + sysctl_igmp_default_version, "I", + "Default version of IGMP to run on each interface"); +SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay, + CTLTYPE_INT | CTLFLAG_RW , igmp_gsrdelay.tv_sec, 0, + sysctl_igmp_gsr, "I", + "Rate limit for IGMPv3 Group-and-Source queries in seconds"); + +/* + * Non-virtualized sysctls. + */ +SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD, + sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); + +static __inline void +igmp_save_context(struct mbuf *m, struct ifnet *ifp) +{ + +#ifdef VIMAGE + m->m_pkthdr.header = ifp->if_vnet; +#endif /* VIMAGE */ + m->m_pkthdr.flowid = ifp->if_index; +} + +static __inline void +igmp_scrub_context(struct mbuf *m) +{ + + m->m_pkthdr.header = NULL; + m->m_pkthdr.flowid = 0; +} + +#ifdef KTR +static __inline char * +inet_ntoa_haddr(in_addr_t haddr) +{ + struct in_addr ia; + + ia.s_addr = htonl(haddr); + return (inet_ntoa(ia)); +} +#endif + +/* + * Restore context from a queued IGMP output chain. + * Return saved ifindex. + * + * VIMAGE: The assertion is there to make sure that we + * actually called CURVNET_SET() with what's in the mbuf chain. + */ +static __inline uint32_t +igmp_restore_context(struct mbuf *m) +{ + +#ifdef notyet +#if defined(VIMAGE) && defined(INVARIANTS) + KASSERT(curvnet == (m->m_pkthdr.header), + ("%s: called when curvnet was not restored", __func__)); +#endif +#endif + return (m->m_pkthdr.flowid); +} + +/* + * Retrieve or set default IGMP version. + * + * VIMAGE: Assume curvnet set by caller. + * SMPng: NOTE: Serialized by IGMP lock. + */ +static int +sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS) +{ + int error; + int new; + + error = sysctl_wire_old_buffer(req, sizeof(int)); + if (error) + return (error); + + IGMP_LOCK(); + + new = V_igmp_default_version; + + error = sysctl_handle_int(oidp, &new, 0, req); + if (error || !req->newptr) + goto out_locked; + + if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { + error = EINVAL; + goto out_locked; + } + + CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d", + V_igmp_default_version, new); + + V_igmp_default_version = new; + +out_locked: + IGMP_UNLOCK(); + return (error); +} + +/* + * Retrieve or set threshold between group-source queries in seconds. + * + * VIMAGE: Assume curvnet set by caller. + * SMPng: NOTE: Serialized by IGMP lock. + */ +static int +sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS) +{ + int error; + int i; + + error = sysctl_wire_old_buffer(req, sizeof(int)); + if (error) + return (error); + + IGMP_LOCK(); + + i = V_igmp_gsrdelay.tv_sec; + + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || !req->newptr) + goto out_locked; + + if (i < -1 || i >= 60) { + error = EINVAL; + goto out_locked; + } + + CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d", + V_igmp_gsrdelay.tv_sec, i); + V_igmp_gsrdelay.tv_sec = i; + +out_locked: + IGMP_UNLOCK(); + return (error); +} + +/* + * Expose struct igmp_ifinfo to userland, keyed by ifindex. + * For use by ifmcstat(8). + * + * SMPng: NOTE: Does an unlocked ifindex space read. + * VIMAGE: Assume curvnet set by caller. The node handler itself + * is not directly virtualized. + */ +static int +sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS) +{ + INIT_VNET_NET(curvnet); + int *name; + int error; + u_int namelen; + struct ifnet *ifp; + struct igmp_ifinfo *igi; + + name = (int *)arg1; + namelen = arg2; + + if (req->newptr != NULL) + return (EPERM); + + if (namelen != 1) + return (EINVAL); + + error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo)); + if (error) + return (error); + + IN_MULTI_LOCK(); + IGMP_LOCK(); + + if (name[0] <= 0 || name[0] > V_if_index) { + error = ENOENT; + goto out_locked; + } + + error = ENOENT; + + ifp = ifnet_byindex(name[0]); + if (ifp == NULL) + goto out_locked; + + LIST_FOREACH(igi, &V_igi_head, igi_link) { + if (ifp == igi->igi_ifp) { + error = SYSCTL_OUT(req, igi, + sizeof(struct igmp_ifinfo)); + break; + } + } + +out_locked: + IGMP_UNLOCK(); + IN_MULTI_UNLOCK(); + return (error); +} + +/* + * Dispatch an entire queue of pending packet chains + * using the netisr. + * VIMAGE: Assumes the vnet pointer has been set. + */ +static void +igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop) +{ + struct mbuf *m; + + for (;;) { + _IF_DEQUEUE(ifq, m); + if (m == NULL) + break; + CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m); + if (loop) + m->m_flags |= M_IGMP_LOOP; + netisr_dispatch(NETISR_IGMP, m); + if (--limit == 0) + break; + } +} + +/* + * Filter outgoing IGMP report state by group. + * + * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). + * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are + * disabled for all groups in the 224.0.0.0/24 link-local scope. However, + * this may break certain IGMP snooping switches which rely on the old + * report behaviour. + * + * Return zero if the given group is one for which IGMP reports + * should be suppressed, or non-zero if reports should be issued. + */ +static __inline int +igmp_isgroupreported(const struct in_addr addr) +{ + + if (in_allhosts(addr) || + ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) + return (0); + + return (1); +} + +/* + * Construct a Router Alert option to use in outgoing packets. + */ +static struct mbuf * +igmp_ra_alloc(void) +{ + struct mbuf *m; + struct ipoption *p; + + MGET(m, M_DONTWAIT, MT_DATA); + p = mtod(m, struct ipoption *); + p->ipopt_dst.s_addr = INADDR_ANY; + p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ + p->ipopt_list[1] = 0x04; /* 4 bytes long */ + p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ + p->ipopt_list[3] = 0x00; /* pad byte */ + m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; + + return (m); +} + +/* + * Attach IGMP when PF_INET is attached to an interface. + * + * VIMAGE: Currently we set the vnet pointer, although it is + * likely that it was already set by our caller. + */ +struct igmp_ifinfo * +igmp_domifattach(struct ifnet *ifp) +{ + struct igmp_ifinfo *igi; + + CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + CURVNET_SET(ifp->if_vnet); + IGMP_LOCK(); + + igi = igi_alloc_locked(ifp); + if (!(ifp->if_flags & IFF_MULTICAST)) + igi->igi_flags |= IGIF_SILENT; + + IGMP_UNLOCK(); + CURVNET_RESTORE(); + + return (igi); +} + +/* + * VIMAGE: assume curvnet set by caller. + */ +static struct igmp_ifinfo * +igi_alloc_locked(/*const*/ struct ifnet *ifp) +{ + struct igmp_ifinfo *igi; + + IGMP_LOCK_ASSERT(); + + igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO); + if (igi == NULL) + goto out; + + igi->igi_ifp = ifp; + igi->igi_version = V_igmp_default_version; + igi->igi_flags = 0; + igi->igi_rv = IGMP_RV_INIT; + igi->igi_qi = IGMP_QI_INIT; + igi->igi_qri = IGMP_QRI_INIT; + igi->igi_uri = IGMP_URI_INIT; + + SLIST_INIT(&igi->igi_relinmhead); + + /* + * Responses to general queries are subject to bounds. + */ + IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS); + + LIST_INSERT_HEAD(&V_igi_head, igi, igi_link); + + CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)", + ifp, ifp->if_xname); + +out: + return (igi); +} + +/* + * Hook for ifdetach. + * + * NOTE: Some finalization tasks need to run before the protocol domain + * is detached, but also before the link layer does its cleanup. + * + * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK(). + * XXX This is also bitten by unlocked ifma_protospec access. + * + * VIMAGE: curvnet should have been set by caller, but let's not assume + * that for now. + */ +void +igmp_ifdetach(struct ifnet *ifp) +{ + struct igmp_ifinfo *igi; + struct ifmultiaddr *ifma; + struct in_multi *inm, *tinm; + + CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, + ifp->if_xname); + + CURVNET_SET(ifp->if_vnet); + + IGMP_LOCK(); + + igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; + if (igi->igi_version == IGMP_VERSION_3) { + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET || + ifma->ifma_protospec == NULL) + continue; +#if 0 + KASSERT(ifma->ifma_protospec != NULL, + ("%s: ifma_protospec is NULL", __func__)); +#endif + inm = (struct in_multi *)ifma->ifma_protospec; + if (inm->inm_state == IGMP_LEAVING_MEMBER) { + SLIST_INSERT_HEAD(&igi->igi_relinmhead, + inm, inm_nrele); + } + inm_clear_recorded(inm); + } + IF_ADDR_UNLOCK(ifp); + /* + * Free the in_multi reference(s) for this IGMP lifecycle. + */ + SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, + tinm) { + SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); + inm_release_locked(inm); + } + } + + IGMP_UNLOCK(); + +#ifdef VIMAGE + /* + * Plug the potential race which may occur when a VIMAGE + * is detached and we are forced to queue pending IGMP output for + * output netisr processing due to !mpsafe_igmp. In this case it + * is possible that igmp_intr() is about to see mbuf chains with + * invalid cached curvnet pointers. + * This is a rare condition, so just blow them all away. + * FUTURE: This may in fact not be needed, because IFF_NEEDSGIANT + * is being removed in 8.x and the netisr may then be eliminated; + * it is needed only if VIMAGE and IFF_NEEDSGIANT need to co-exist + */ + if (!mpsafe_igmp) { + int drops; + + IF_LOCK(&igmpoq); + drops = igmpoq.ifq_len; + _IF_DRAIN(&igmpoq); + IF_UNLOCK(&igmpoq); + if (bootverbose && drops) { + printf("%s: dropped %d pending IGMP output packets\n", + __func__, drops); + } + } +#endif /* VIMAGE */ + + CURVNET_RESTORE(); +} + +/* + * Hook for domifdetach. + * + * VIMAGE: curvnet should have been set by caller, but let's not assume + * that for now. + */ +void +igmp_domifdetach(struct ifnet *ifp) +{ + struct igmp_ifinfo *igi; + + CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + CURVNET_SET(ifp->if_vnet); + IGMP_LOCK(); + + igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; + igi_delete_locked(ifp); + + IGMP_UNLOCK(); + CURVNET_RESTORE(); +} + +static void +igi_delete_locked(const struct ifnet *ifp) +{ + struct igmp_ifinfo *igi, *tigi; + + CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)", + __func__, ifp, ifp->if_xname); + + IGMP_LOCK_ASSERT(); + + LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) { + if (igi->igi_ifp == ifp) { + /* + * Free deferred General Query responses. + */ + _IF_DRAIN(&igi->igi_gq); + + LIST_REMOVE(igi, igi_link); + + KASSERT(SLIST_EMPTY(&igi->igi_relinmhead), + ("%s: there are dangling in_multi references", + __func__)); + + free(igi, M_IGMP); + return; + } + } + +#ifdef INVARIANTS + panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp); +#endif +} + +/* + * Process a received IGMPv1 query. + * Return non-zero if the message should be dropped. + * + * VIMAGE: The curvnet pointer is derived from the input ifp. + */ +static int +igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip) +{ + INIT_VNET_INET(ifp->if_vnet); + struct ifmultiaddr *ifma; + struct igmp_ifinfo *igi; + struct in_multi *inm; + + /* + * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1. + * igmp_group is always ignored. Do not drop it as a userland + * daemon may wish to see it. + */ + if (!in_allhosts(ip->ip_dst)) { + IGMPSTAT_INC(igps_rcv_badqueries); + return (0); + } + + IGMPSTAT_INC(igps_rcv_gen_queries); + + /* + * Switch to IGMPv1 host compatibility mode. + */ + IN_MULTI_LOCK(); + IGMP_LOCK(); + + igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; + KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); + + if (igi->igi_flags & IGIF_LOOPBACK) { + CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)", + ifp, ifp->if_xname); + goto out_locked; + } + + igmp_set_version(igi, IGMP_VERSION_1); + + CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname); + + /* + * Start the timers in all of our group records + * for the interface on which the query arrived, + * except those which are already running. + */ + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in_multi *)ifma->ifma_protospec; + if (inm->inm_timer != 0) + continue; + switch (inm->inm_state) { + case IGMP_NOT_MEMBER: + case IGMP_SILENT_MEMBER: + break; + case IGMP_G_QUERY_PENDING_MEMBER: + case IGMP_SG_QUERY_PENDING_MEMBER: + case IGMP_REPORTING_MEMBER: + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_SLEEPING_MEMBER: + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_REPORTING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_V1V2_MAX_RI * PR_FASTHZ); + V_current_state_timers_running = 1; + break; + case IGMP_LEAVING_MEMBER: + break; + } + } + IF_ADDR_UNLOCK(ifp); + +out_locked: + IGMP_UNLOCK(); + IN_MULTI_UNLOCK(); + + return (0); +} + +/* + * Process a received IGMPv2 general or group-specific query. + */ +static int +igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, + const struct igmp *igmp) +{ + struct ifmultiaddr *ifma; + struct igmp_ifinfo *igi; + struct in_multi *inm; + uint16_t timer; + + /* + * Perform lazy allocation of IGMP link info if required, + * and switch to IGMPv2 host compatibility mode. + */ + IN_MULTI_LOCK(); + IGMP_LOCK(); + + igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; + KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); + + if (igi->igi_flags & IGIF_LOOPBACK) { + CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)", + ifp, ifp->if_xname); + goto out_locked; + } + + igmp_set_version(igi, IGMP_VERSION_2); + + timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; + if (timer == 0) + timer = 1; + + if (!in_nullhost(igmp->igmp_group)) { + /* + * IGMPv2 Group-Specific Query. + * If this is a group-specific IGMPv2 query, we need only + * look up the single group to process it. + */ + inm = inm_lookup(ifp, igmp->igmp_group); + if (inm != NULL) { + CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)", + inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + igmp_v2_update_group(inm, timer); + } + IGMPSTAT_INC(igps_rcv_group_queries); + } else { + /* + * IGMPv2 General Query. + * If this was not sent to the all-hosts group, ignore it. + */ + if (in_allhosts(ip->ip_dst)) { + /* + * For each reporting group joined on this + * interface, kick the report timer. + */ + CTR2(KTR_IGMPV3, + "process v2 general query on ifp %p(%s)", + ifp, ifp->if_xname); + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in_multi *)ifma->ifma_protospec; + igmp_v2_update_group(inm, timer); + } + IF_ADDR_UNLOCK(ifp); + } + IGMPSTAT_INC(igps_rcv_gen_queries); + } + +out_locked: + IGMP_UNLOCK(); + IN_MULTI_UNLOCK(); + + return (0); +} + +/* + * Update the report timer on a group in response to an IGMPv2 query. + * + * If we are becoming the reporting member for this group, start the timer. + * If we already are the reporting member for this group, and timer is + * below the threshold, reset it. + * + * We may be updating the group for the first time since we switched + * to IGMPv3. If we are, then we must clear any recorded source lists, + * and transition to REPORTING state; the group timer is overloaded + * for group and group-source query responses. + * + * Unlike IGMPv3, the delay per group should be jittered + * to avoid bursts of IGMPv2 reports. + */ +static void +igmp_v2_update_group(struct in_multi *inm, const int timer) +{ + + CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__, + inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer); + + IN_MULTI_LOCK_ASSERT(); + + switch (inm->inm_state) { + case IGMP_NOT_MEMBER: + case IGMP_SILENT_MEMBER: + break; + case IGMP_REPORTING_MEMBER: + if (inm->inm_timer != 0 && + inm->inm_timer <= timer) { + CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, " + "skipping.", __func__); + break; + } + /* FALLTHROUGH */ + case IGMP_SG_QUERY_PENDING_MEMBER: + case IGMP_G_QUERY_PENDING_MEMBER: + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__); + inm->inm_state = IGMP_REPORTING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + V_current_state_timers_running = 1; + break; + case IGMP_SLEEPING_MEMBER: + CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__); + inm->inm_state = IGMP_AWAKENING_MEMBER; + break; + case IGMP_LEAVING_MEMBER: + break; + } +} + +/* + * Process a received IGMPv3 general, group-specific or + * group-and-source-specific query. + * Assumes m has already been pulled up to the full IGMP message length. + * Return 0 if successful, otherwise an appropriate error code is returned. + */ +static int +igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, + /*const*/ struct igmpv3 *igmpv3) +{ + struct igmp_ifinfo *igi; + struct in_multi *inm; + uint32_t maxresp, nsrc, qqi; + uint16_t timer; + uint8_t qrv; + + CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname); + + maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ + if (maxresp >= 128) { + maxresp = IGMP_MANT(igmpv3->igmp_code) << + (IGMP_EXP(igmpv3->igmp_code) + 3); + } + + /* + * Robustness must never be less than 2 for on-wire IGMPv3. + * FIXME: Check if ifp has IGIF_LOOPBACK set, as we make + * an exception for interfaces whose IGMPv3 state changes + * are redirected to loopback (e.g. MANET). + */ + qrv = IGMP_QRV(igmpv3->igmp_misc); + if (qrv < 2) { + CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__, + qrv, IGMP_RV_INIT); + qrv = IGMP_RV_INIT; + } + + qqi = igmpv3->igmp_qqi; + if (qqi >= 128) { + qqi = IGMP_MANT(igmpv3->igmp_qqi) << + (IGMP_EXP(igmpv3->igmp_qqi) + 3); + } + + timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE; + if (timer == 0) + timer = 1; + + nsrc = ntohs(igmpv3->igmp_numsrc); + + IN_MULTI_LOCK(); + IGMP_LOCK(); + + igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; + KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); + + if (igi->igi_flags & IGIF_LOOPBACK) { + CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)", + ifp, ifp->if_xname); + goto out_locked; + } + + igmp_set_version(igi, IGMP_VERSION_3); + + igi->igi_rv = qrv; + igi->igi_qi = qqi; + igi->igi_qri = maxresp; + + CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi, + maxresp); + + if (in_nullhost(igmpv3->igmp_group)) { + /* + * IGMPv3 General Query. *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***