Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 30 Mar 2026 16:15:40 +0000
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 4c486fe40267 - main - ip_mroute: FIBify
Message-ID:  <69caa1ac.3c306.1426a84@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=4c486fe402673c49443293cfb70ad4da61d39916

commit 4c486fe402673c49443293cfb70ad4da61d39916
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2026-03-30 13:31:29 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-03-30 16:08:21 +0000

    ip_mroute: FIBify
    
    Modify the control plane (ioctl and socket option handlers) to use the
    routing socket FIB to index into the mfctable array.  Modify the
    forwarding plane to use the mbuf's FIB to determine which routing table
    to use.
    
    MFC after:      2 weeks
    Sponsored by:   Stormshield
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D55241
---
 sys/netinet/in_mcast.c  |   3 +-
 sys/netinet/ip_mroute.c | 319 +++++++++++++++++++++++++-----------------------
 sys/netinet/ip_mroute.h |   5 +-
 sys/netinet/ip_output.c |  11 +-
 sys/netinet/raw_ip.c    |   4 +-
 5 files changed, 180 insertions(+), 162 deletions(-)

diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index 20e6f9d8b322..6b9bb599a475 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -2702,7 +2702,8 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
 		if (error)
 			break;
-		if (!legal_vif_num(vifi) && (vifi != -1)) {
+		if (!legal_vif_num(inp->inp_socket->so_fibnum, vifi) &&
+		    vifi != -1) {
 			error = EINVAL;
 			break;
 		}
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 9ecaa9a669d6..6f2bf8248dd8 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -306,32 +306,35 @@ static struct pim_encap_pimhdr pim_encap_pimhdr = {
  * Private variables.
  */
 
-static u_long	X_ip_mcast_src(int);
+static u_long	X_ip_mcast_src(int, int);
 static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 static void	X_ip_mrouter_done(struct socket *);
 static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
-static int	X_legal_vif_num(int);
+static int	X_legal_vif_num(int, int);
 static int	X_mrt_ioctl(u_long, caddr_t, int);
 
-static int	add_bw_upcall(struct bw_upcall *);
-static int	add_mfc(struct mfcctl2 *);
-static int	add_vif(struct vifctl *);
+static int	add_bw_upcall(struct mfctable *, struct bw_upcall *);
+static int	add_mfc(struct mfctable *, struct mfcctl2 *);
+static int	add_vif(struct mfctable *, int, struct vifctl *);
 static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
 static void	bw_meter_geq_receive_packet(struct bw_meter *, int,
 		    struct timeval *);
-static void	bw_upcalls_send(void);
-static int	del_bw_upcall(struct bw_upcall *);
-static int	del_mfc(struct mfcctl2 *);
-static int	del_vif(vifi_t);
-static int	del_vif_locked(vifi_t, struct ifnet **, struct ifnet **);
+static void	bw_upcalls_send(struct mfctable *);
+static void	bw_upcalls_send_all(void);
+static int	del_bw_upcall(struct mfctable *, struct bw_upcall *);
+static int	del_mfc(struct mfctable *, struct mfcctl2 *);
+static int	del_vif(struct mfctable *, vifi_t);
+static int	del_vif_locked(struct mfctable *, vifi_t, struct ifnet **,
+		    struct ifnet **);
 static void	expire_bw_upcalls_send(void *);
 static void	expire_mfc(struct mfc *);
-static void	expire_upcalls(void *);
+static void	expire_upcalls(struct mfctable *);
+static void	expire_upcalls_all(void *);
 static void	free_bw_list(struct bw_meter *);
-static int	get_sg_cnt(struct sioc_sg_req *);
-static int	get_vif_cnt(struct sioc_vif_req *);
+static int	get_sg_cnt(struct mfctable *, struct sioc_sg_req *);
+static int	get_vif_cnt(struct mfctable *, struct sioc_vif_req *);
 static void	if_detached_event(void *, struct ifnet *);
 static int	ip_mdq(struct mfctable *, struct mbuf *, struct ifnet *,
 		    struct mfc *, vifi_t);
@@ -344,13 +347,13 @@ static struct mbuf *
 		pim_register_prepare(struct ip *, struct mbuf *);
 static int	pim_register_send(struct mfctable *, struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
-static int	pim_register_send_rp(struct ip *, struct vif *,
-		    struct mbuf *, struct mfc *);
-static int	pim_register_send_upcall(struct ip *, struct vif *,
-		    struct mbuf *, struct mfc *);
+static int	pim_register_send_rp(struct mfctable *, struct ip *,
+		    struct vif *, struct mbuf *, struct mfc *);
+static int	pim_register_send_upcall(struct mfctable *, struct ip *,
+		    struct vif *, struct mbuf *, struct mfc *);
 static void	send_packet(struct vif *, struct mbuf *);
-static int	set_api_config(uint32_t *);
-static int	set_assert(int);
+static int	set_api_config(struct mfctable *, uint32_t *);
+static int	set_assert(struct mfctable *, int);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in *);
 
@@ -406,6 +409,17 @@ mfc_alloc(void)
 	return rt;
 }
 
+static struct mfctable *
+somfctable(struct socket *so)
+{
+	int fib;
+
+	fib = atomic_load_int(&so->so_fibnum);
+	KASSERT(fib >= 0 && fib < V_nmfctables,
+	    ("%s: so_fibnum %d out of range", __func__, fib));
+	return (&V_mfctables[fib]);
+}
+
 /*
  * Handle MRT setsockopt commands to modify the multicast forwarding tables.
  */
@@ -420,7 +434,7 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 	struct bw_upcall bw_upcall;
 	uint32_t i;
 
-	mfct = &V_mfctables[0];
+	mfct = somfctable(so);
 	if (so != mfct->router && sopt->sopt_name != MRT_INIT)
 		return EPERM;
 
@@ -439,13 +453,13 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 		error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 		if (error)
 			break;
-		error = add_vif(&vifc);
+		error = add_vif(mfct, so->so_fibnum, &vifc);
 		break;
 	case MRT_DEL_VIF:
 		error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 		if (error)
 			break;
-		error = del_vif(vifi);
+		error = del_vif(mfct, vifi);
 		break;
 	case MRT_ADD_MFC:
 	case MRT_DEL_MFC:
@@ -465,22 +479,22 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 		if (error)
 			break;
 		if (sopt->sopt_name == MRT_ADD_MFC)
-			error = add_mfc(&mfc);
+			error = add_mfc(mfct, &mfc);
 		else
-			error = del_mfc(&mfc);
+			error = del_mfc(mfct, &mfc);
 		break;
 
 	case MRT_ASSERT:
 		error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 		if (error)
 			break;
-		set_assert(optval);
+		set_assert(mfct, optval);
 		break;
 
 	case MRT_API_CONFIG:
 		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 		if (!error)
-			error = set_api_config(&i);
+			error = set_api_config(mfct, &i);
 		if (!error)
 			error = sooptcopyout(sopt, &i, sizeof i);
 		break;
@@ -492,9 +506,9 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 		if (error)
 			break;
 		if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
-			error = add_bw_upcall(&bw_upcall);
+			error = add_bw_upcall(mfct, &bw_upcall);
 		else
-			error = del_bw_upcall(&bw_upcall);
+			error = del_bw_upcall(mfct, &bw_upcall);
 		break;
 
 	default:
@@ -513,8 +527,7 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 	struct mfctable *mfct;
 	int error;
 
-	mfct = &V_mfctables[0];
-
+	mfct = somfctable(so);
 	switch (sopt->sopt_name) {
 	case MRT_VERSION:
 		error = sooptcopyout(sopt, &mrt_api_version,
@@ -543,20 +556,23 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
  * Handle ioctl commands to obtain information from the cache
  */
 static int
-X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
+X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum)
 {
+	struct mfctable *mfct;
 	int error;
 
 	error = priv_check(curthread, PRIV_NETINET_MROUTE);
 	if (error)
 		return (error);
+
+	mfct = &V_mfctables[fibnum];
 	switch (cmd) {
-	case (SIOCGETVIFCNT):
-		error = get_vif_cnt((struct sioc_vif_req *)data);
+	case SIOCGETVIFCNT:
+		error = get_vif_cnt(mfct, (struct sioc_vif_req *)data);
 		break;
 
-	case (SIOCGETSGCNT):
-		error = get_sg_cnt((struct sioc_sg_req *)data);
+	case SIOCGETSGCNT:
+		error = get_sg_cnt(mfct, (struct sioc_sg_req *)data);
 		break;
 
 	default:
@@ -570,12 +586,12 @@ X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
-get_sg_cnt(struct sioc_sg_req *req)
+get_sg_cnt(struct mfctable *mfct, struct sioc_sg_req *req)
 {
 	struct mfc *rt;
 
 	MRW_RLOCK();
-	rt = mfc_find(&V_mfctables[0], &req->src, &req->grp);
+	rt = mfc_find(mfct, &req->src, &req->grp);
 	if (rt == NULL) {
 		MRW_RUNLOCK();
 		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
@@ -592,13 +608,11 @@ get_sg_cnt(struct sioc_sg_req *req)
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
-get_vif_cnt(struct sioc_vif_req *req)
+get_vif_cnt(struct mfctable *mfct, struct sioc_vif_req *req)
 {
-	struct mfctable *mfct;
 	struct vif *vif;
 	vifi_t vifi;
 
-	mfct = &V_mfctables[0];
 	vifi = req->vifi;
 
 	MRW_RLOCK();
@@ -619,35 +633,26 @@ get_vif_cnt(struct sioc_vif_req *req)
 	return 0;
 }
 
-static void
-if_detached_event(void *arg __unused, struct ifnet *ifp)
+/*
+ * Tear down multicast forwarder state associated with this ifnet.
+ * 1. Walk the vif list, matching vifs against this ifnet.
+ * 2. Walk the multicast forwarding cache (mfc) looking for
+ *    inner matches with this vif's index.
+ * 3. Expire any matching multicast forwarding cache entries.
+ * 4. Free vif state. This should disable ALLMULTI on the interface.
+ */
+static int
+detach_ifnet(struct mfctable *mfct, struct ifnet *ifp)
 {
-	struct mfctable *mfct;
-	vifi_t vifi;
-	u_long i, vifi_cnt = 0;
 	struct ifnet *free_ptr, *multi_leave;
+	int count;
 
-	mfct = &V_mfctables[0];
-
-	MRW_WLOCK();
-	if (!V_ip_mrouting_enabled) {
-		MRW_WUNLOCK();
-		return;
-	}
-
-	/*
-	 * Tear down multicast forwarder state associated with this ifnet.
-	 * 1. Walk the vif list, matching vifs against this ifnet.
-	 * 2. Walk the multicast forwarding cache (mfc) looking for
-	 *    inner matches with this vif's index.
-	 * 3. Expire any matching multicast forwarding cache entries.
-	 * 4. Free vif state. This should disable ALLMULTI on the interface.
-	 */
+	count = 0;
 restart:
-	for (vifi = 0; vifi < mfct->numvifs; vifi++) {
+	for (vifi_t vifi = 0; vifi < mfct->numvifs; vifi++) {
 		if (mfct->viftable[vifi].v_ifp != ifp)
 			continue;
-		for (i = 0; i < mfchashsize; i++) {
+		for (u_long i = 0; i < mfchashsize; i++) {
 			struct mfc *rt, *nrt;
 
 			LIST_FOREACH_SAFE(rt, &mfct->mfchashtbl[i], mfc_hash,
@@ -657,9 +662,9 @@ restart:
 				}
 			}
 		}
-		del_vif_locked(vifi, &multi_leave, &free_ptr);
+		del_vif_locked(mfct, vifi, &multi_leave, &free_ptr);
 		if (free_ptr != NULL)
-			vifi_cnt++;
+			count++;
 		if (multi_leave) {
 			MRW_WUNLOCK();
 			if_allmulti(multi_leave, 0);
@@ -667,15 +672,26 @@ restart:
 			goto restart;
 		}
 	}
+	return (count);
+}
+
+static void
+if_detached_event(void *arg __unused, struct ifnet *ifp)
+{
+	int count;
+
+	MRW_WLOCK();
+	if (!V_ip_mrouting_enabled) {
+		MRW_WUNLOCK();
+		return;
+	}
 
+	count = 0;
+	for (int i = 0; i < V_nmfctables; i++)
+		count += detach_ifnet(&V_mfctables[i], ifp);
 	MRW_WUNLOCK();
 
-	/*
-	 * Free IFP. We don't have to use free_ptr here as it is the same
-	 * that ifp. Perform free as many times as required in case
-	 * refcount is greater than 1.
-	 */
-	for (i = 0; i < vifi_cnt; i++)
+	for (int i = 0; i < count; i++)
 		if_free(ifp);
 }
 
@@ -685,7 +701,7 @@ ip_mrouter_upcall_thread(void *arg, int pending __unused)
 	CURVNET_SET((struct vnet *) arg);
 
 	MRW_WLOCK();
-	bw_upcalls_send();
+	bw_upcalls_send_all();
 	MRW_WUNLOCK();
 
 	CURVNET_RESTORE();
@@ -704,7 +720,7 @@ ip_mrouter_init(struct socket *so, int version)
 	if (version != 1)
 		return ENOPROTOOPT;
 
-	mfct = &V_mfctables[0];
+	mfct = somfctable(so);
 	MRW_TEARDOWN_WLOCK();
 	MRW_WLOCK();
 
@@ -743,7 +759,7 @@ ip_mrouter_init(struct socket *so, int version)
 	taskqueue_cancel(V_task_queue, &V_task, NULL);
 	taskqueue_unblock(V_task_queue);
 
-	callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
+	callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls_all,
 	    curvnet);
 	callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	    curvnet);
@@ -774,7 +790,7 @@ X_ip_mrouter_done(struct socket *so)
 	vifi_t vifi;
 	struct bw_upcall *bu;
 
-	mfct = &V_mfctables[0];
+	mfct = somfctable(so);
 
 	MRW_TEARDOWN_WLOCK();
 	if (so != mfct->router) {
@@ -835,9 +851,6 @@ X_ip_mrouter_done(struct socket *so)
 	mfct->numvifs = 0;
 	mfct->pim_assert_enabled = 0;
 
-	callout_stop(&V_expire_upcalls_ch);
-	callout_stop(&V_bw_upcalls_ch);
-
 	/*
 	 * Free all multicast forwarding cache entries.
 	 * Do not use hashdestroy(), as we must perform other cleanup.
@@ -873,12 +886,12 @@ X_ip_mrouter_done(struct socket *so)
  * Set PIM assert processing global
  */
 static int
-set_assert(int i)
+set_assert(struct mfctable *mfct, int i)
 {
 	if ((i != 1) && (i != 0))
 		return EINVAL;
 
-	V_mfctables[0].pim_assert_enabled = i;
+	mfct->pim_assert_enabled = i;
 
 	return 0;
 }
@@ -887,9 +900,8 @@ set_assert(int i)
  * Configure API capabilities
  */
 int
-set_api_config(uint32_t *apival)
+set_api_config(struct mfctable *mfct, uint32_t *apival)
 {
-	struct mfctable *mfct;
 	u_long i;
 
 	/*
@@ -899,7 +911,6 @@ set_api_config(uint32_t *apival)
 	 *  - pim_assert is not enabled
 	 *  - the MFC table is empty
 	 */
-	mfct = &V_mfctables[0];
 	if (mfct->numvifs > 0) {
 		*apival = 0;
 		return EPERM;
@@ -931,9 +942,8 @@ set_api_config(uint32_t *apival)
  * Add a vif to the vif table
  */
 static int
-add_vif(struct vifctl *vifcp)
+add_vif(struct mfctable *mfct, int fibnum, struct vifctl *vifcp)
 {
-	struct mfctable *mfct;
 	struct vif *vifp;
 	struct sockaddr_in sin = {sizeof sin, AF_INET};
 	struct ifaddr *ifa;
@@ -943,7 +953,6 @@ add_vif(struct vifctl *vifcp)
 	if (vifcp->vifc_vifi >= MAXVIFS)
 		return EINVAL;
 
-	mfct = &V_mfctables[0];
 	vifp = &mfct->viftable[vifcp->vifc_vifi];
 
 	/* rate limiting is no longer supported by this code */
@@ -974,6 +983,10 @@ add_vif(struct vifctl *vifcp)
 			return EADDRNOTAVAIL;
 		}
 		ifp = ifa->ifa_ifp;
+		if (ifp->if_fib != fibnum) {
+			NET_EPOCH_EXIT(et);
+			return EADDRNOTAVAIL;
+		}
 		/* XXX FIXME we need to take a ref on ifp and cleanup properly! */
 		NET_EPOCH_EXIT(et);
 	}
@@ -1039,12 +1052,11 @@ add_vif(struct vifctl *vifcp)
  * Delete a vif from the vif table
  */
 static int
-del_vif_locked(vifi_t vifi, struct ifnet **ifp_multi_leave, struct ifnet **ifp_free)
+del_vif_locked(struct mfctable *mfct, vifi_t vifi,
+    struct ifnet **ifp_multi_leave, struct ifnet **ifp_free)
 {
-	struct mfctable *mfct;
 	struct vif *vifp;
 
-	mfct = &V_mfctables[0];
 	*ifp_free = NULL;
 	*ifp_multi_leave = NULL;
 
@@ -1086,13 +1098,13 @@ del_vif_locked(vifi_t vifi, struct ifnet **ifp_multi_leave, struct ifnet **ifp_f
 }
 
 static int
-del_vif(vifi_t vifi)
+del_vif(struct mfctable *mfct, vifi_t vifi)
 {
 	int cc;
 	struct ifnet *free_ptr, *multi_leave;
 
 	MRW_WLOCK();
-	cc = del_vif_locked(vifi, &multi_leave, &free_ptr);
+	cc = del_vif_locked(mfct, vifi, &multi_leave, &free_ptr);
 	MRW_WUNLOCK();
 
 	if (multi_leave)
@@ -1170,17 +1182,14 @@ expire_mfc(struct mfc *rt)
  * Add an mfc entry
  */
 static int
-add_mfc(struct mfcctl2 *mfccp)
+add_mfc(struct mfctable *mfct, struct mfcctl2 *mfccp)
 {
 	struct mfc *rt;
-	struct mfctable *mfct;
 	struct rtdetq *rte;
 	u_long hash = 0;
 	u_short nstl;
 	struct epoch_tracker et;
 
-	mfct = &V_mfctables[0];
-
 	MRW_WLOCK();
 	rt = mfc_find(mfct, &mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
 
@@ -1273,9 +1282,8 @@ add_mfc(struct mfcctl2 *mfccp)
  * Delete an mfc entry
  */
 static int
-del_mfc(struct mfcctl2 *mfccp)
+del_mfc(struct mfctable *mfct, struct mfcctl2 *mfccp)
 {
-	struct mfctable *mfct;
 	struct in_addr origin;
 	struct in_addr mcastgrp;
 	struct mfc *rt;
@@ -1286,7 +1294,6 @@ del_mfc(struct mfcctl2 *mfccp)
 	CTR3(KTR_IPMF, "%s: delete mfc orig 0x%08x group %lx", __func__,
 			ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
 
-	mfct = &V_mfctables[0];
 	MRW_WLOCK();
 
 	LIST_FOREACH(rt, &mfct->mfchashtbl[MFCHASH(origin, mcastgrp)],
@@ -1371,12 +1378,15 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
 		return (1);
 	}
 
-	mfct = &V_mfctables[0];
+	mfct = &V_mfctables[M_GETFIB(m)];
 
 	/*
 	 * BEGIN: MCAST ROUTING HOT PATH
 	 */
 	MRW_RLOCK();
+	if (__predict_false(mfct->router == NULL))
+		return (EADDRNOTAVAIL);
+
 	if (imo && ((vifi = imo->imo_multicast_vif) < mfct->numvifs)) {
 		if (ip->ip_ttl < MAXTTL)
 			ip->ip_ttl++; /* compensate for -1 in *_send routines */
@@ -1556,21 +1566,10 @@ mfc_find_retry:
 	return 0;
 }
 
-/*
- * Clean up the cache entry if upcall is not serviced
- */
 static void
-expire_upcalls(void *arg)
+expire_upcalls(struct mfctable *mfct)
 {
-	struct mfctable *mfct;
-	u_long i;
-
-	CURVNET_SET((struct vnet *) arg);
-
-	MRW_LOCK_ASSERT();
-
-	mfct = &V_mfctables[0];
-	for (i = 0; i < mfchashsize; i++) {
+	for (u_long i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
 
 		if (mfct->nexpire[i] == 0)
@@ -1591,8 +1590,22 @@ expire_upcalls(void *arg)
 			expire_mfc(rt);
 		}
 	}
+}
+
+/*
+ * Clean up the cache entry if upcall is not serviced
+ */
+static void
+expire_upcalls_all(void *arg)
+{
+	CURVNET_SET((struct vnet *)arg);
+
+	MRW_LOCK_ASSERT();
+
+	for (int i = 0; i < V_nmfctables; i++)
+		expire_upcalls(&V_mfctables[i]);
 
-	callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
+	callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls_all,
 	    curvnet);
 
 	CURVNET_RESTORE();
@@ -1757,16 +1770,18 @@ ip_mdq(struct mfctable *mfct, struct mbuf *m, struct ifnet *ifp, struct mfc *rt,
  * Check if a vif number is legal/ok. This is used by in_mcast.c.
  */
 static int
-X_legal_vif_num(int vif)
+X_legal_vif_num(int fibnum, int vif)
 {
+	struct mfctable *mfct;
 	int ret;
 
 	ret = 0;
 	if (vif < 0)
 		return (ret);
 
+	mfct = &V_mfctables[fibnum];
 	MRW_RLOCK();
-	if (vif < V_mfctables[0].numvifs)
+	if (vif < mfct->numvifs)
 		ret = 1;
 	MRW_RUNLOCK();
 
@@ -1777,17 +1792,19 @@ X_legal_vif_num(int vif)
  * Return the local address used by this vif
  */
 static u_long
-X_ip_mcast_src(int vifi)
+X_ip_mcast_src(int fibnum, int vifi)
 {
+	struct mfctable *mfct;
 	in_addr_t addr;
 
 	addr = INADDR_ANY;
 	if (vifi < 0)
 		return (addr);
 
+	mfct = &V_mfctables[fibnum];
 	MRW_RLOCK();
-	if (vifi < V_mfctables[0].numvifs)
-		addr = V_mfctables[0].viftable[vifi].v_lcl_addr.s_addr;
+	if (vifi < mfct->numvifs)
+		addr = mfct->viftable[vifi].v_lcl_addr.s_addr;
 	MRW_RUNLOCK();
 
 	return (addr);
@@ -1838,8 +1855,6 @@ send_packet(struct vif *vifp, struct mbuf *m)
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
-	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
-	    (ptrdiff_t)(vifp - V_mfctables[0].viftable), error);
 }
 
 /*
@@ -1945,9 +1960,8 @@ expire_bw_meter_leq(void *arg)
  * Add a bw_meter entry
  */
 static int
-add_bw_upcall(struct bw_upcall *req)
+add_bw_upcall(struct mfctable *mfct, struct bw_upcall *req)
 {
-	struct mfctable *mfct;
 	struct mfc *mfc;
 	struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 	BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
@@ -1955,8 +1969,6 @@ add_bw_upcall(struct bw_upcall *req)
 	struct bw_meter *x, **bwm_ptr;
 	uint32_t flags;
 
-	mfct = &V_mfctables[0];
-
 	if (!(mfct->api_config & MRT_MFC_BW_UPCALL))
 		return EOPNOTSUPP;
 
@@ -1978,7 +1990,7 @@ add_bw_upcall(struct bw_upcall *req)
 	 * Find if we have already same bw_meter entry
 	 */
 	MRW_WLOCK();
-	mfc = mfc_find(&V_mfctables[0], &req->bu_src, &req->bu_dst);
+	mfc = mfc_find(mfct, &req->bu_src, &req->bu_dst);
 	if (mfc == NULL) {
 		MRW_WUNLOCK();
 		return EADDRNOTAVAIL;
@@ -2022,6 +2034,7 @@ add_bw_upcall(struct bw_upcall *req)
 	x->bm_flags = flags;
 	x->bm_time_next = NULL;
 	x->bm_mfc = mfc;
+	x->bm_mfctable = mfct;
 	x->arg = curvnet;
 	sprintf(x->bm_mtx_name, "BM mtx %p", x);
 	mtx_init(&x->bm_mtx, x->bm_mtx_name, NULL, MTX_DEF);
@@ -2063,14 +2076,11 @@ free_bw_list(struct bw_meter *list)
  * Delete one or multiple bw_meter entries
  */
 static int
-del_bw_upcall(struct bw_upcall *req)
+del_bw_upcall(struct mfctable *mfct, struct bw_upcall *req)
 {
-	struct mfctable *mfct;
 	struct mfc *mfc;
 	struct bw_meter *x, **bwm_ptr;
 
-	mfct = &V_mfctables[0];
-
 	if (!(mfct->api_config & MRT_MFC_BW_UPCALL))
 		return EOPNOTSUPP;
 
@@ -2191,14 +2201,11 @@ bw_meter_geq_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
-	struct mfctable *mfct;
 	struct timeval delta;
 	struct bw_upcall *u;
 
 	MRW_LOCK_ASSERT();
 
-	mfct = &V_mfctables[0];
-
 	/*
 	 * Compute the measured time interval
 	 */
@@ -2231,9 +2238,9 @@ bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 	if (x->bm_flags & BW_METER_LEQ)
 		u->bu_flags |= BW_UPCALL_LEQ;
 
-	if (buf_ring_enqueue(mfct->bw_upcalls, u))
+	if (buf_ring_enqueue(x->bm_mfctable->bw_upcalls, u))
 		log(LOG_WARNING, "bw_meter_prepare_upcall: cannot enqueue upcall\n");
-	if (buf_ring_count(mfct->bw_upcalls) > (BW_UPCALLS_MAX / 2)) {
+	if (buf_ring_count(x->bm_mfctable->bw_upcalls) > (BW_UPCALLS_MAX / 2)) {
 		taskqueue_enqueue(V_task_queue, &V_task);
 	}
 }
@@ -2241,9 +2248,8 @@ bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
  * Send the pending bandwidth-related upcalls
  */
 static void
-bw_upcalls_send(void)
+bw_upcalls_send(struct mfctable *mfct)
 {
-	struct mfctable *mfct;
 	struct mbuf *m;
 	int len = 0;
 	struct bw_upcall *bu;
@@ -2259,8 +2265,6 @@ bw_upcalls_send(void)
 		{ 0 }		/* im_dst  */
 	};
 
-	mfct = &V_mfctables[0];
-
 	MRW_LOCK_ASSERT();
 
 	if (buf_ring_empty(mfct->bw_upcalls))
@@ -2295,6 +2299,18 @@ bw_upcalls_send(void)
 	}
 }
 
+static void
+bw_upcalls_send_all(void)
+{
+	for (int i = 0; i < V_nmfctables; i++) {
+		struct mfctable *mfct;
+
+		mfct = &V_mfctables[i];
+		if (mfct->router != NULL)
+			bw_upcalls_send(mfct);
+	}
+}
+
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
@@ -2305,7 +2321,7 @@ expire_bw_upcalls_send(void *arg)
 
 	/* This callout is run with MRW_RLOCK taken */
 
-	bw_upcalls_send();
+	bw_upcalls_send_all();
 
 	callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	    curvnet);
@@ -2350,9 +2366,10 @@ pim_register_send(struct mfctable *mfct, struct ip *ip, struct vif *vifp,
 			ip = mtod(mm, struct ip *);
 			if ((mfct->api_config & MRT_MFC_RP) &&
 			    !in_nullhost(rt->mfc_rp)) {
-				pim_register_send_rp(ip, vifp, mm, rt);
+				pim_register_send_rp(mfct, ip, vifp, mm, rt);
 			} else {
-				pim_register_send_upcall(ip, vifp, mm, rt);
+				pim_register_send_upcall(mfct, ip, vifp, mm,
+				    rt);
 			}
 		}
 	}
@@ -2414,10 +2431,9 @@ pim_register_prepare(struct ip *ip, struct mbuf *m)
  * Send an upcall with the data packet to the user-level process.
  */
 static int
-pim_register_send_upcall(struct ip *ip, struct vif *vifp,
+pim_register_send_upcall(struct mfctable *mfct, struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
-	struct mfctable *mfct;
 	struct mbuf *mb_first;
 	int len = ntohs(ip->ip_len);
 	struct igmpmsg *im;
@@ -2425,8 +2441,6 @@ pim_register_send_upcall(struct ip *ip, struct vif *vifp,
 
 	MRW_LOCK_ASSERT();
 
-	mfct = &V_mfctables[0];
-
 	/*
 	 * Add a new mbuf with an upcall header
 	 */
@@ -2469,10 +2483,9 @@ pim_register_send_upcall(struct ip *ip, struct vif *vifp,
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
-pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
-    struct mfc *rt)
+pim_register_send_rp(struct mfctable *mfct, struct ip *ip, struct vif *vifp,
+    struct mbuf *mb_copy, struct mfc *rt)
 {
-	struct mfctable *mfct;
 	struct mbuf *mb_first;
 	struct ip *ip_outer;
 	struct pim_encap_pimhdr *pimhdr;
@@ -2481,7 +2494,6 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
 
 	MRW_LOCK_ASSERT();
 
-	mfct = &V_mfctables[0];
 	if (vifi >= mfct->numvifs ||
 	    in_nullhost(mfct->viftable[vifi].v_lcl_addr)) {
 		m_freem(mb_copy);
@@ -2572,7 +2584,7 @@ pim_input(struct mbuf *m, int off, int proto, void *arg __unused)
 	int datalen = ntohs(ip->ip_len) - iphlen;
 	int ip_tos;
 
-	mfct = &V_mfctables[0];
+	mfct = &V_mfctables[M_GETFIB(m)];
 
 	/* Keep statistics */
 	PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2789,7 +2801,7 @@ sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 
 	if (req->newptr)
 		return (EPERM);
-	mfct = &V_mfctables[0];
+	mfct = &V_mfctables[curthread->td_proc->p_fibnum];
 	if (mfct->mfchashtbl == NULL)	/* XXX unlocked */
 		return (0);
 	error = sysctl_wire_old_buffer(req, 0);
@@ -2828,7 +2840,7 @@ sysctl_viflist(SYSCTL_HANDLER_ARGS)
 	if (error)
 		return (error);
 
-	mfct = &V_mfctables[0];
+	mfct = &V_mfctables[curthread->td_proc->p_fibnum];
 	MRW_RLOCK();
 	/* Copy out user-visible portion of vif entry. */
 	for (i = 0; i < MAXVIFS; i++) {
@@ -2909,6 +2921,9 @@ vnet_mroute_uninit(const void *unused __unused)
 		free(mfct->nexpire, M_MRTABLE);
 	}
 	free(V_mfctables, M_MRTABLE);
+
+	callout_drain(&V_expire_upcalls_ch);
+	callout_drain(&V_bw_upcalls_ch);
 }
 VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE,
     vnet_mroute_uninit, NULL);
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
index 5c2527ea64e5..d7e53b678474 100644
--- a/sys/netinet/ip_mroute.h
+++ b/sys/netinet/ip_mroute.h
@@ -348,6 +348,7 @@ struct bw_meter {
 	struct bw_data	bm_measured;		/* the measured bw	     */
 	struct timeval	bm_start_time;		/* abs. time		     */
 #ifdef _KERNEL
+	struct mfctable *bm_mfctable;		/* Routing table             */
 	struct callout	bm_meter_callout;	/* Periodic callout          */
 	void*		arg;			/* custom argument           */
 	struct mtx 	bm_mtx;			/* meter lock                */
@@ -366,7 +367,7 @@ struct mbuf;
 struct socket;
 struct sockopt;
 
-extern u_long	(*ip_mcast_src)(int);
+extern u_long	(*ip_mcast_src)(int, int);
 extern int	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 extern void	(*ip_mrouter_done)(struct socket *);
@@ -376,7 +377,7 @@ extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
 extern void	(*ip_rsvp_force_done)(struct socket *);
 extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
 
-extern int	(*legal_vif_num)(int);
+extern int	(*legal_vif_num)(int, int);
 extern int	(*mrt_ioctl)(u_long, caddr_t, int);
 extern int	(*rsvp_input_p)(struct mbuf **, int *, int);
 #endif /* _KERNEL */
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 9431d5f0d7b2..200f281f34a7 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -561,12 +561,13 @@ again:
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
+			int vif;
+
+			vif = imo->imo_multicast_vif;
 			ip->ip_ttl = imo->imo_multicast_ttl;
-			if (imo->imo_multicast_vif != -1)
-				ip->ip_src.s_addr =
-				    ip_mcast_src ?
-				    ip_mcast_src(imo->imo_multicast_vif) :
-				    INADDR_ANY;
+			if (vif != -1)
+				ip->ip_src.s_addr = ip_mcast_src ?
+				    ip_mcast_src(fibnum, vif) : INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 9b17d0d80327..a8a4fc1df9e2 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -118,8 +118,8 @@ void (*ip_mrouter_done)(struct socket *);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
-int (*legal_vif_num)(int);
-u_long (*ip_mcast_src)(int);
+int (*legal_vif_num)(int, int);
+u_long (*ip_mcast_src)(int, int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69caa1ac.3c306.1426a84>