Date: Wed, 27 Aug 2003 15:11:49 -0700 (PDT) From: Sam Leffler <sam@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 37040 for review Message-ID: <200308272211.h7RMBnYn026778@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=37040 Change 37040 by sam@sam_ebb on 2003/08/27 15:11:34 Checkpoint multicast routing locking. From the code: We use two locks: one for the virtual interface table and one for the forwarding table. These locks may be nested in which case the VIF lock must always be taken first. Note that each lock is used to cover not only the specific data structure but also related data structures. It may be better to add more fine-grained locking later; it's not clear to me how performance-critical this code is. Needs thorough review and testing. Affected files ... .. //depot/projects/netperf/sys/netinet/ip_mroute.c#6 edit Differences ... ==== //depot/projects/netperf/sys/netinet/ip_mroute.c#6 (text+ko) ==== @@ -81,6 +81,15 @@ static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); +/* + * Locking. We use two locks: one for the virtual interface table and + * one for the forwarding table. These locks may be nested in which case + * the VIF lock must always be taken first. Note that each lock is used + * to cover not only the specific data structure but also related data + * structures. It may be better to add more fine-grained locking later; + * it's not clear to me how performance-critical this code is. + */ + static struct mrtstat mrtstat; SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, &mrtstat, mrtstat, @@ -91,14 +100,28 @@ &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); +static struct mtx mfc_mtx; +#define MFC_LOCK() mtx_lock(&mfc_mtx) +#define MFC_UNLOCK() mtx_unlock(&mfc_mtx) +#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) +#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF) +#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) + static struct vif viftable[MAXVIFS]; SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, &viftable, sizeof(viftable), "S,vif[MAXVIFS]", "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); +static struct mtx vif_mtx; +#define VIF_LOCK() mtx_lock(&vif_mtx) +#define VIF_UNLOCK() mtx_unlock(&vif_mtx) +#define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) +#define VIF_LOCK_INIT() mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF) +#define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) + static u_char nexpire[MFCTBLSIZ]; -static struct callout_handle expire_upcalls_ch; +static struct callout expire_upcalls_ch; #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ @@ -149,7 +172,7 @@ */ #define BW_METER_BUCKETS 1024 static struct bw_meter *bw_meter_timers[BW_METER_BUCKETS]; -static struct callout_handle bw_meter_ch; +static struct callout bw_meter_ch; #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ /* @@ -158,7 +181,7 @@ */ static struct bw_upcall bw_upcalls[BW_UPCALLS_MAX]; static u_int bw_upcalls_n; /* # of pending upcalls */ -static struct callout_handle bw_upcalls_ch; +static struct callout bw_upcalls_ch; #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ #ifdef PIM @@ -226,6 +249,11 @@ static u_long last_encap_src; static struct vif *last_encap_vif; +/* + * Callout for queue processing. + */ +static struct callout tbf_reprocess_ch; + static u_long X_ip_mcast_src(int vifi); static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); @@ -321,6 +349,8 @@ { struct mfc *rt; + MFC_LOCK_ASSERT(); + for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) if ((rt->mfc_origin.s_addr == o) && (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) @@ -515,12 +545,11 @@ static int get_sg_cnt(struct sioc_sg_req *req) { - int s; struct mfc *rt; - s = splnet(); + MFC_LOCK(); rt = mfc_find(req->src.s_addr, req->grp.s_addr); - splx(s); + MFC_UNLOCK(); if (rt == NULL) { req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return EADDRNOTAVAIL; @@ -572,16 +601,24 @@ ip_mrouter = so; bzero((caddr_t)mfctable, sizeof(mfctable)); + MFC_LOCK_INIT(); + VIF_LOCK_INIT(); bzero((caddr_t)nexpire, sizeof(nexpire)); pim_assert = 0; - expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); + callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE); + callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); bw_upcalls_n = 0; bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers)); - bw_upcalls_ch = timeout(expire_bw_upcalls_send, NULL, BW_UPCALLS_PERIOD); - bw_meter_ch = timeout(expire_bw_meter_process, NULL, BW_METER_PERIOD); + callout_init(&bw_upcalls_ch, CALLOUT_MPSAFE); + callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, + expire_bw_upcalls_send, NULL); + callout_init(&bw_meter_ch, CALLOUT_MPSAFE); + callout_reset(&bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, NULL); + + callout_init(&tbf_reprocess_ch, CALLOUT_MPSAFE); mrt_api_config = 0; @@ -603,9 +640,19 @@ struct ifreq ifr; struct mfc *rt; struct rtdetq *rte; - int s; + + /* + * Detach/disable hooks to the reset of the system. + */ + ip_mrouter = NULL; + mrt_api_config = 0; - s = splnet(); + VIF_LOCK(); + if (encap_cookie) { + encap_detach(encap_cookie); + encap_cookie = NULL; + } + callout_stop(&tbf_reprocess_ch); /* * For each phyint in use, disable promiscuous reception of all IP @@ -627,17 +674,17 @@ bzero((caddr_t)viftable, sizeof(viftable)); numvifs = 0; pim_assert = 0; - - untimeout(expire_upcalls, NULL, expire_upcalls_ch); - - mrt_api_config = 0; - bw_upcalls_n = 0; - untimeout(expire_bw_upcalls_send, NULL, bw_upcalls_ch); - untimeout(expire_bw_meter_process, NULL, bw_meter_ch); + VIF_UNLOCK(); + VIF_LOCK_DESTROY(); /* * Free all multicast forwarding cache entries. */ + MFC_LOCK(); + callout_stop(&expire_upcalls_ch); + callout_stop(&bw_upcalls_ch); + callout_stop(&bw_meter_ch); + for (i = 0; i < MFCTBLSIZ; i++) { for (rt = mfctable[i]; rt != NULL; ) { struct mfc *nr = rt->mfc_next; @@ -654,10 +701,11 @@ rt = nr; } } - bzero((caddr_t)mfctable, sizeof(mfctable)); - + bw_upcalls_n = 0; bzero(bw_meter_timers, sizeof(bw_meter_timers)); + MFC_UNLOCK(); + MFC_LOCK_DESTROY(); /* * Reset de-encapsulation cache @@ -668,15 +716,6 @@ reg_vif_num = VIFI_INVALID; #endif - if (encap_cookie) { - encap_detach(encap_cookie); - encap_cookie = NULL; - } - - ip_mrouter = NULL; - - splx(s); - if (mrtdebug) log(LOG_DEBUG, "ip_mrouter_done\n"); @@ -824,15 +863,22 @@ struct sockaddr_in sin = {sizeof sin, AF_INET}; struct ifaddr *ifa; struct ifnet *ifp; - int error, s; + int error; struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; - if (vifcp->vifc_vifi >= MAXVIFS) + VIF_LOCK(); + if (vifcp->vifc_vifi >= MAXVIFS) { + VIF_UNLOCK(); return EINVAL; - if (vifp->v_lcl_addr.s_addr != INADDR_ANY) + } + if (vifp->v_lcl_addr.s_addr != INADDR_ANY) { + VIF_UNLOCK(); return EADDRINUSE; - if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) + } + if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) { + VIF_UNLOCK(); return EADDRNOTAVAIL; + } /* Find the interface with an address in AF_INET family */ #ifdef PIM @@ -861,17 +907,20 @@ * to encapsulated packets. */ if (encap_cookie == NULL) { + int i; + encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4, mroute_encapcheck, (struct protosw *)&mroute_encap_protosw, NULL); if (encap_cookie == NULL) { printf("ip_mroute: unable to attach encap\n"); + VIF_UNLOCK(); return EIO; /* XXX */ } - for (s = 0; s < MAXVIFS; ++s) { - multicast_decap_if[s].if_name = "mdecap"; - multicast_decap_if[s].if_unit = s; + for (i = 0; i < MAXVIFS; ++i) { + multicast_decap_if[i].if_name = "mdecap"; + multicast_decap_if[i].if_unit = i; } } /* @@ -884,6 +933,7 @@ bzero(&vifp->v_route, sizeof(vifp->v_route)); } else { log(LOG_ERR, "source routed tunnels not supported\n"); + VIF_UNLOCK(); return EOPNOTSUPP; } #ifdef PIM @@ -901,18 +951,19 @@ } #endif } else { /* Make sure the interface supports multicast */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + VIF_UNLOCK(); return EOPNOTSUPP; + } /* Enable promiscuous reception of all IP multicasts from the if */ - s = splnet(); error = if_allmulti(ifp, 1); - splx(s); - if (error) + if (error) { + VIF_UNLOCK(); return error; + } } - s = splnet(); /* define parameters for the tbf structure */ vifp->v_tbf = v_tbf; GET_TIME(vifp->v_tbf->tbf_last_pkt_t); @@ -935,11 +986,12 @@ vifp->v_pkt_out = 0; vifp->v_bytes_in = 0; vifp->v_bytes_out = 0; - splx(s); /* Adjust numvifs up if the vifi is higher than numvifs */ if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; + VIF_UNLOCK(); + if (mrtdebug) log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", vifcp->vifc_vifi, @@ -959,16 +1011,19 @@ del_vif(vifi_t vifi) { struct vif *vifp; - int s; + + VIF_LOCK(); - if (vifi >= numvifs) + if (vifi >= numvifs) { + VIF_UNLOCK(); return EINVAL; + } vifp = &viftable[vifi]; - if (vifp->v_lcl_addr.s_addr == INADDR_ANY) + if (vifp->v_lcl_addr.s_addr == INADDR_ANY) { + VIF_UNLOCK(); return EADDRNOTAVAIL; + } - s = splnet(); - if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) if_allmulti(vifp->v_ifp, 0); @@ -1004,7 +1059,7 @@ break; numvifs = vifi; - splx(s); + VIF_UNLOCK(); return 0; } @@ -1061,6 +1116,8 @@ u_short nstl; int s; + MFC_LOCK(); + rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); /* If an entry already exists, just update the fields */ @@ -1071,16 +1128,14 @@ (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); - s = splnet(); update_mfc_params(rt, mfccp); - splx(s); + MFC_UNLOCK(); return 0; } /* * Find the entry for which the upcall was made and update */ - s = splnet(); hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { @@ -1156,7 +1211,7 @@ mfctable[hash] = rt; } } - splx(s); + MFC_UNLOCK(); return 0; } @@ -1171,7 +1226,6 @@ struct mfc *rt; struct mfc **nptr; u_long hash; - int s; struct bw_meter *list; origin = mfccp->mfcc_origin; @@ -1181,7 +1235,7 @@ log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); - s = splnet(); + MFC_LOCK(); hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) @@ -1190,12 +1244,14 @@ rt->mfc_stall == NULL) break; if (rt == NULL) { - splx(s); + MFC_UNLOCK(); return EADDRNOTAVAIL; } *nptr = rt->mfc_next; + MFC_UNLOCK(); + /* * free the bw_meter entries */ @@ -1204,8 +1260,6 @@ free(rt, M_MRTABLE); - splx(s); - free_bw_list(list); return 0; @@ -1245,7 +1299,7 @@ struct ip_moptions *imo) { struct mfc *rt; - int s; + int error; vifi_t vifi; if (mrtdebug & DEBUG_FORWARD) @@ -1274,6 +1328,7 @@ return 1; } + VIF_LOCK(); if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { if (ip->ip_ttl < 255) ip->ip_ttl++; /* compensate for -1 in *_send routines */ @@ -1286,7 +1341,9 @@ (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", vifp->v_ifp->if_name, vifp->v_ifp->if_unit); } - return ip_mdq(m, ifp, NULL, vifi); + error = ip_mdq(m, ifp, NULL, vifi); + VIF_UNLOCK(); + return error; } if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", @@ -1299,20 +1356,24 @@ * Don't forward a packet with time-to-live of zero or one, * or a packet destined to a local-only group. */ - if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) + if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) { + VIF_UNLOCK(); return 0; + } /* * Determine forwarding vifs from the forwarding cache table */ - s = splnet(); + MFC_LOCK(); ++mrtstat.mrts_mfc_lookups; rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); /* Entry exists, so forward if necessary */ if (rt != NULL) { - splx(s); - return ip_mdq(m, ifp, rt, -1); + MFC_UNLOCK(); + error = ip_mdq(m, ifp, rt, -1); + VIF_UNLOCK(); + return error; } else { /* * If we don't have a route for packet's origin, @@ -1339,7 +1400,8 @@ */ rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); if (rte == NULL) { - splx(s); + MFC_UNLOCK(); + VIF_UNLOCK(); return ENOBUFS; } mb0 = m_copypacket(m, M_DONTWAIT); @@ -1347,7 +1409,8 @@ mb0 = m_pullup(mb0, hlen); if (mb0 == NULL) { free(rte, M_MRTABLE); - splx(s); + MFC_UNLOCK(); + VIF_UNLOCK(); return ENOBUFS; } @@ -1405,7 +1468,8 @@ fail: free(rte, M_MRTABLE); m_freem(mb0); - splx(s); + MFC_UNLOCK(); + VIF_UNLOCK(); return ENOBUFS; } @@ -1447,7 +1511,8 @@ non_fatal: free(rte, M_MRTABLE); m_freem(mb0); - splx(s); + MFC_UNLOCK(); + VIF_UNLOCK(); return 0; } @@ -1459,7 +1524,8 @@ rte->ifp = ifp; rte->next = NULL; - splx(s); + MFC_UNLOCK(); + VIF_UNLOCK(); return 0; } @@ -1474,9 +1540,8 @@ struct rtdetq *rte; struct mfc *mfc, **nptr; int i; - int s; - s = splnet(); + MFC_LOCK(); for (i = 0; i < MFCTBLSIZ; i++) { if (nexpire[i] == 0) continue; @@ -1524,8 +1589,9 @@ } } } - splx(s); - expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); + MFC_UNLOCK(); + + callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); } /* @@ -1538,6 +1604,7 @@ vifi_t vifi; int plen = ip->ip_len; + VIF_LOCK_ASSERT(); /* * Macro to send packet on vif. Since RSVP packets don't get counted on * input, they shouldn't get counted on output, so statistics keeping is @@ -1675,8 +1742,10 @@ struct timeval now; GET_TIME(now); + MFC_LOCK(); for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) bw_meter_receive_packet(x, plen, &now); + MFC_UNLOCK(); } return 0; @@ -1688,6 +1757,7 @@ static int X_legal_vif_num(int vif) { + /* XXX unlocked, matter? */ return (vif >= 0 && vif < numvifs); } @@ -1697,6 +1767,7 @@ static u_long X_ip_mcast_src(int vifi) { + /* XXX unlocked, matter? */ if (vifi >= 0 && vifi < numvifs) return viftable[vifi].v_lcl_addr.s_addr; else @@ -1709,6 +1780,8 @@ struct mbuf *mb_copy; int hlen = ip->ip_hl << 2; + VIF_LOCK_ASSERT(); + /* * Make a new reference to the packet; make sure that * the IP header is actually copied, not just referenced, @@ -1733,6 +1806,8 @@ struct ip *ip_copy; int i, len = ip->ip_len; + VIF_LOCK_ASSERT(); + /* Take care of delayed checksums */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); @@ -1806,6 +1881,8 @@ { struct tbf *t = vifp->v_tbf; + VIF_LOCK_ASSERT(); + if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */ mrtstat.mrts_pkt2large++; m_freem(m); @@ -1820,7 +1897,8 @@ tbf_send_packet(vifp, m); } else { /* no, queue packet and try later */ tbf_queue(vifp, m); - timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); + callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, + tbf_reprocess_q, vifp); } } else if (t->tbf_q_len < t->tbf_max_q_len) { /* finite queue length, so queue pkts and process queue */ @@ -1844,9 +1922,10 @@ static void tbf_queue(struct vif *vifp, struct mbuf *m) { - int s = splnet(); struct tbf *t = vifp->v_tbf; + VIF_LOCK_ASSERT(); + if (t->tbf_t == NULL) /* Queue was empty */ t->tbf_q = m; else /* Insert at tail */ @@ -1862,8 +1941,6 @@ m->m_act = NULL; t->tbf_q_len++; - - splx(s); } /* @@ -1872,9 +1949,10 @@ static void tbf_process_q(struct vif *vifp) { - int s = splnet(); struct tbf *t = vifp->v_tbf; + VIF_LOCK_ASSERT(); + /* loop through the queue at the interface and send as many packets * as possible */ @@ -1895,7 +1973,6 @@ m->m_act = NULL; tbf_send_packet(vifp, m); } - splx(s); } static void @@ -1905,10 +1982,12 @@ if (ip_mrouter == NULL) return; + VIF_LOCK(); tbf_update_tokens(vifp); tbf_process_q(vifp); if (vifp->v_tbf->tbf_q_len) - timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); + callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, tbf_reprocess_q, vifp); + VIF_UNLOCK(); } /* function that will selectively discard a member of the queue @@ -1917,12 +1996,13 @@ static int tbf_dq_sel(struct vif *vifp, struct ip *ip) { - int s = splnet(); u_int p; struct mbuf *m, *last; struct mbuf **np; struct tbf *t = vifp->v_tbf; + VIF_LOCK_ASSERT(); + p = priority(vifp, ip); np = &t->tbf_q; @@ -1937,21 +2017,19 @@ /* It's impossible for the queue to be empty, but check anyways. */ if (--t->tbf_q_len == 0) t->tbf_t = NULL; - splx(s); mrtstat.mrts_drop_sel++; return 1; } np = &m->m_act; last = m; } - splx(s); return 0; } static void tbf_send_packet(struct vif *vifp, struct mbuf *m) { - int s = splnet(); + VIF_LOCK_ASSERT(); if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */ ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); @@ -1977,7 +2055,6 @@ log(LOG_DEBUG, "phyint_send on vif %d err %d\n", (int)(vifp - viftable), error); } - splx(s); } /* determine the current time and then @@ -1989,9 +2066,10 @@ { struct timeval tp; u_long tm; - int s = splnet(); struct tbf *t = vifp->v_tbf; + VIF_LOCK_ASSERT(); + GET_TIME(tp); TV_DELTA(tp, t->tbf_last_pkt_t, tm); @@ -2010,8 +2088,6 @@ if (t->tbf_n_tok > MAX_BKT_SIZE) t->tbf_n_tok = MAX_BKT_SIZE; - - splx(s); } static int @@ -2054,7 +2130,7 @@ static int X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) { - int error, vifi, s; + int error, vifi; if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return EOPNOTSUPP; @@ -2063,17 +2139,17 @@ if (error) return error; - s = splnet(); + VIF_LOCK(); if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ - splx(s); + VIF_UNLOCK(); return EADDRNOTAVAIL; } if (sopt->sopt_name == IP_RSVP_VIF_ON) { /* Check if socket is available. */ if (viftable[vifi].v_rsvpd != NULL) { - splx(s); + VIF_UNLOCK(); return EADDRINUSE; } @@ -2101,7 +2177,7 @@ rsvp_on--; } } - splx(s); + VIF_UNLOCK(); return 0; } @@ -2109,13 +2185,12 @@ X_ip_rsvp_force_done(struct socket *so) { int vifi; - int s; /* Don't bother if it is not the right type of socket. */ if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return; - s = splnet(); + VIF_LOCK(); /* The socket may be attached to more than one vif...this * is perfectly legal. @@ -2133,7 +2208,7 @@ } } - splx(s); + VIF_UNLOCK(); } static void @@ -2142,7 +2217,6 @@ int vifi; struct ip *ip = mtod(m, struct ip *); struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; - int s; struct ifnet *ifp; if (rsvpdebug) @@ -2157,8 +2231,6 @@ return; } - s = splnet(); - if (rsvpdebug) printf("rsvp_input: check vifs\n"); @@ -2167,6 +2239,8 @@ #endif ifp = m->m_pkthdr.rcvif; + + VIF_LOCK(); /* Find which vif the packet arrived on. */ for (vifi = 0; vifi < numvifs; vifi++) if (viftable[vifi].v_ifp == ifp) @@ -2174,6 +2248,12 @@ if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { /* + * Drop the lock here to avoid holding it across rip_input. + * This could make rsvpdebug printfs wrong. If you care, + * record the state of stuff before dropping the lock. + */ + VIF_UNLOCK(); + /* * If the old-style non-vif-associated socket is set, * then use it. Otherwise, drop packet since there * is no specific socket for this vif. @@ -2189,7 +2269,6 @@ printf("rsvp_input: No socket defined for vif %d\n",vifi); m_freem(m); } - splx(s); return; } rsvp_src.sin_addr = ip->ip_src; @@ -2205,8 +2284,7 @@ if (rsvpdebug) printf("rsvp_input: send packet up\n"); } - - splx(s); + VIF_UNLOCK(); } /* @@ -2249,7 +2327,6 @@ struct timeval now; struct bw_meter *x; uint32_t flags; - int s; if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; @@ -2272,10 +2349,10 @@ /* * Find if we have already same bw_meter entry */ - s = splnet(); + MFC_LOCK(); mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); if (mfc == NULL) { - splx(s); + MFC_UNLOCK(); return EADDRNOTAVAIL; } for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { @@ -2284,16 +2361,17 @@ (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && (x->bm_flags & BW_METER_USER_FLAGS) == flags) { - splx(s); + MFC_UNLOCK(); return 0; /* XXX Already installed */ } } - splx(s); /* Allocate the new bw_meter entry */ x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); - if (x == NULL) + if (x == NULL) { + MFC_UNLOCK(); return ENOBUFS; + } /* Set the new bw_meter entry */ x->bm_threshold.b_time = req->bu_threshold.b_time; @@ -2308,12 +2386,11 @@ x->bm_time_hash = BW_METER_BUCKETS; /* Add the new bw_meter entry to the front of entries for this MFC */ - s = splnet(); x->bm_mfc = mfc; x->bm_mfc_next = mfc->mfc_bw_meter; mfc->mfc_bw_meter = x; schedule_bw_meter(x, &now); - splx(s); + MFC_UNLOCK(); return 0; } @@ -2338,16 +2415,15 @@ { struct mfc *mfc; struct bw_meter *x; - int s; if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; - s = splnet(); + MFC_LOCK(); /* Find the corresponding MFC entry */ mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); if (mfc == NULL) { - splx(s); + MFC_UNLOCK(); return EADDRNOTAVAIL; } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { /* @@ -2357,7 +2433,7 @@ list = mfc->mfc_bw_meter; mfc->mfc_bw_meter = NULL; - splx(s); + MFC_UNLOCK(); free_bw_list(list); return 0; } else { /* Delete a single bw_meter entry */ @@ -2381,14 +2457,14 @@ prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ else x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ - splx(s); + MFC_UNLOCK(); unschedule_bw_meter(x); /* Free the bw_meter entry */ free(x, M_BWMETER); return 0; } else { - splx(s); + MFC_UNLOCK(); return EINVAL; } } @@ -2402,9 +2478,9 @@ bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) { struct timeval delta; - int s; - s = splnet(); + MFC_LOCK_ASSERT(); + delta = *nowp; BW_TIMEVALDECR(&delta, &x->bm_start_time); @@ -2485,7 +2561,6 @@ x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; } } - splx(s); } >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200308272211.h7RMBnYn026778>