Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 May 2012 14:11:03 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r235991 - projects/pf/head/sys/contrib/pf/net
Message-ID:  <201205251411.q4PEB3Dt014289@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Fri May 25 14:11:02 2012
New Revision: 235991
URL: http://svn.freebsd.org/changeset/base/235991

Log:
  1) Locking kifs. In r234651 I made safe expiring of rules. However, states
     may also reference kifs directly. Rules, and some structures hanging off
     rules may also reference kifs. Now, states no longer update refcount on
     kifs, only rules do. When refcount on a kif reaches zero, and kif isn't
     representing an existing interface or group, then it is moved to a list
     of unlinked kifs locked by a separate mutex. These unlinked kifs are
     purged via naive mark-and-sweep run, similarly to unlinked rules expiry.
  
  2) Apart from rules we've got some more structures that a hanging off the
     rules, and are read by the packet processing path: struct pfi_kif,
     struct pfi_dynaddr, struct pfr_ktable, struct pf_pool, struct pf_pooladdr.
     Reading these should require reader lock on rules, and modifying them writer
     lock.
     - Convert PF_LOCK to PF_RULES_WLOCK() in appropriate ioctls.
     - Acquire PF_RULES_WLOCK() in pf_free_rule(). To avoid LOR with unlinked
       rules mutex, use a temporary list on stack.
     - Assert pf rules lock in many functions that operate on tables or
       struct pf_addr_wrap.
     - Remove separate uma(9) zone for dynaddrs, no reason for it.
  
  3) In many ioctl paths we used to obtain locks quite early, and thus wasn't
     able to use M_WAITOK. Make a poor attempt to make situation here better:
     - Do some parameter validation prior to actually processing the request
     - Pre-allocate struct pf_rule, struct pfi_kif prior to obtaining locks
     with M_WAITOK. Free them on failure.
     Unfortunately, fixing all pf(4) ioctls to use M_WAITOK is quite difficult,
     especially configuring pf tables, where rn_inithead() is called, which
     uses M_NOWAIT.

Modified:
  projects/pf/head/sys/contrib/pf/net/if_pfsync.c
  projects/pf/head/sys/contrib/pf/net/pf.c
  projects/pf/head/sys/contrib/pf/net/pf_if.c
  projects/pf/head/sys/contrib/pf/net/pf_ioctl.c
  projects/pf/head/sys/contrib/pf/net/pf_table.c
  projects/pf/head/sys/contrib/pf/net/pfvar.h

Modified: projects/pf/head/sys/contrib/pf/net/if_pfsync.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/if_pfsync.c	Fri May 25 11:14:08 2012	(r235990)
+++ projects/pf/head/sys/contrib/pf/net/if_pfsync.c	Fri May 25 14:11:02 2012	(r235991)
@@ -419,7 +419,7 @@ pfsync_state_import(struct pfsync_state 
 	struct pfi_kif	*kif;
 	int error;
 
-	PF_LOCK_ASSERT();
+	PF_RULES_RASSERT();
 
 	if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
 		printf("%s: invalid creator id: %08x\n", __func__,
@@ -427,7 +427,7 @@ pfsync_state_import(struct pfsync_state 
 		return (EINVAL);
 	}
 
-	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
+	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: unknown interface: %s\n", __func__,
 			    sp->ifname);
@@ -629,6 +629,12 @@ pfsync_input(struct mbuf *m, __unused in
 	pkt.src = ip->ip_src;
 	pkt.flags = 0;
 
+	PF_LOCK();
+	/*
+	 * Trusting pf_chksum during packet processing, as well as seeking
+	 * in interface name tree, require holding PF_RULES_RLOCK().
+	 */
+	PF_RULES_RLOCK();
 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
 		pkt.flags |= PFSYNC_SI_CKSUM;
 
@@ -639,17 +645,24 @@ pfsync_input(struct mbuf *m, __unused in
 
 		if (subh.action >= PFSYNC_ACT_MAX) {
 			V_pfsyncstats.pfsyncs_badact++;
+			PF_RULES_RUNLOCK();
+			PF_UNLOCK();
 			goto done;
 		}
 
 		count = ntohs(subh.count);
 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
-		if (rv == -1)
+		if (rv == -1) {
+			PF_RULES_RUNLOCK();
+			PF_UNLOCK();
 			return;
+		}
 
 		offset += rv;
 	}
+	PF_RULES_RUNLOCK();
+	PF_UNLOCK();
 
 done:
 	m_freem(m);
@@ -671,12 +684,11 @@ pfsync_in_clr(struct pfsync_pkt *pkt, st
 	}
 	clr = (struct pfsync_clr *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		creatorid = clr[i].creatorid;
 
 		if (clr[i].ifname[0] != '\0' &&
-		    pfi_kif_get(clr[i].ifname) == NULL)
+		    pfi_kif_find(clr[i].ifname) == NULL)
 			continue;
 
 		for (int i = 0; i <= V_pf_hashmask; i++) {
@@ -694,7 +706,6 @@ relock:
 			PF_HASHROW_UNLOCK(ih);
 		}
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -714,7 +725,6 @@ pfsync_in_ins(struct pfsync_pkt *pkt, st
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
@@ -734,7 +744,6 @@ pfsync_in_ins(struct pfsync_pkt *pkt, st
 			/* Drop out, but process the rest of the actions. */
 			break;
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -756,7 +765,6 @@ pfsync_in_iack(struct pfsync_pkt *pkt, s
 	}
 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		ia = &iaa[i];
 
@@ -771,7 +779,6 @@ pfsync_in_iack(struct pfsync_pkt *pkt, s
 		}
 		PF_STATE_UNLOCK(st);
 	}
-	PF_UNLOCK();
 	/*
 	 * XXX this is not yet implemented, but we know the size of the
 	 * message so we can skip it.
@@ -835,7 +842,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, st
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
@@ -903,7 +909,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, st
 		st->pfsync_time = time_uptime;
 		PF_STATE_UNLOCK(st);
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -928,7 +933,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, 
 	}
 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		up = &ua[i];
 
@@ -997,7 +1001,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, 
 		st->pfsync_time = time_uptime;
 		PF_STATE_UNLOCK(st);
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -1019,7 +1022,6 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, s
 	}
 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		ur = &ura[i];
 
@@ -1040,7 +1042,6 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, s
 			PF_STATE_UNLOCK(st);
 		}
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -1061,7 +1062,6 @@ pfsync_in_del(struct pfsync_pkt *pkt, st
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
@@ -1073,7 +1073,6 @@ pfsync_in_del(struct pfsync_pkt *pkt, st
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -1094,7 +1093,6 @@ pfsync_in_del_c(struct pfsync_pkt *pkt, 
 	}
 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
@@ -1107,7 +1105,6 @@ pfsync_in_del_c(struct pfsync_pkt *pkt, 
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
-	PF_UNLOCK();
 
 	return (len);
 }
@@ -1193,10 +1190,8 @@ pfsync_in_tdb(struct pfsync_pkt *pkt, st
 	}
 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
 
-	PF_LOCK();
 	for (i = 0; i < count; i++)
 		pfsync_update_net_tdb(&tp[i]);
-	PF_UNLOCK();
 #endif
 
 	return (len);
@@ -1662,8 +1657,6 @@ pfsync_insert_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
-	PF_LOCK_ASSERT();
-
 	if (st->state_flags & PFSTATE_NOSYNC)
 		return;
 

Modified: projects/pf/head/sys/contrib/pf/net/pf.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf.c	Fri May 25 11:14:08 2012	(r235990)
+++ projects/pf/head/sys/contrib/pf/net/pf.c	Fri May 25 14:11:02 2012	(r235991)
@@ -730,8 +730,6 @@ pf_initialize()
 	    sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
 	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
-	V_pfi_addr_z = uma_zcreate("pf pfi_dynaddr", sizeof(struct pfi_dynaddr),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 }
 
 void
@@ -778,7 +776,6 @@ pf_cleanup()
 	uma_zdestroy(V_pf_pooladdr_z);
 	uma_zdestroy(V_pfr_ktable_z);
 	uma_zdestroy(V_pfr_kentry_z);
-	uma_zdestroy(V_pfi_addr_z);
 }
 
 static int
@@ -1051,7 +1048,6 @@ pf_state_insert(struct pfi_kif *kif, str
 
 	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
 	V_pf_status.states++;
-	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 	if (pfsync_insert_state_ptr != NULL)
 		pfsync_insert_state_ptr(s);
 
@@ -1312,10 +1308,15 @@ pf_purge_thread(void *v)
 
 		/* Purge other expired types every PFTM_INTERVAL seconds. */
 		if (fullrun) {
-			/* Order important: rules should be the last. */
+			/*
+			 * Order is important:
+			 * - states and src nodes reference rules
+			 * - states and rules reference kifs
+			 */
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 			pf_purge_unlinked_rules();
+			pfi_kif_purge();
 		}
 
 		PF_UNLOCK();
@@ -1471,7 +1472,6 @@ pf_free_state(struct pf_state *cur)
 	if (cur->anchor.ptr != NULL)
 		--cur->anchor.ptr->states_cur;
 	pf_normalize_tcp_cleanup(cur);
-	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
 	if (cur->tag)
 		pf_tag_unref(cur->tag);
 	uma_zfree(V_pf_state_z, cur);
@@ -1515,7 +1515,9 @@ relock:
 				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->anchor.ptr != NULL)
 				s->anchor.ptr->rule_flag |= PFRULE_REFS;
-
+			s->kif->pfik_flags |= PFI_IFLAG_REFS;
+			if (s->rt_kif)
+				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
 		}
 		PF_HASHROW_UNLOCK(ih);
 		i++;
@@ -1528,22 +1530,36 @@ relock:
 static void
 pf_purge_unlinked_rules()
 {
+	struct pf_rulequeue tmpq;
 	struct pf_rule *r, *r1;
 
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old rules.
 	 * Reference flag is raised by pf_purge_expired_states()
 	 * and pf_purge_expired_src_nodes().
+	 *
+	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
+	 * use a temporary queue.
 	 */
+	TAILQ_INIT(&tmpq);
 	PF_UNLNKDRULES_LOCK();
 	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
 		if (!(r->rule_flag & PFRULE_REFS)) {
 			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
-			pf_free_rule(r);
+			TAILQ_INSERT_TAIL(&tmpq, r, entries);
 		} else
 			r->rule_flag &= ~PFRULE_REFS;
 	}
 	PF_UNLNKDRULES_UNLOCK();
+
+	if (!TAILQ_EMPTY(&tmpq)) {
+		PF_RULES_WLOCK();
+		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
+			TAILQ_REMOVE(&tmpq, r, entries);
+			pf_free_rule(r);
+		}
+		PF_RULES_WUNLOCK();
+	}
 }
 
 void

Modified: projects/pf/head/sys/contrib/pf/net/pf_if.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf_if.c	Fri May 25 11:14:08 2012	(r235990)
+++ projects/pf/head/sys/contrib/pf/net/pf_if.c	Fri May 25 14:11:02 2012	(r235991)
@@ -65,17 +65,18 @@ __FBSDID("$FreeBSD$");
 #endif /* INET6 */
 
 VNET_DEFINE(struct pfi_kif *,	 pfi_all);
-VNET_DEFINE(uma_zone_t,		 pfi_addr_z);
-VNET_DEFINE(struct pfi_ifhead,	 pfi_ifs);
-#define	V_pfi_ifs		 VNET(pfi_ifs)
-VNET_DEFINE(long,		 pfi_update);
-#define	V_pfi_update		 VNET(pfi_update)
-VNET_DEFINE(struct pfr_addr *,	 pfi_buffer);
+static VNET_DEFINE(long, pfi_update);
+#define	V_pfi_update	VNET(pfi_update)
+#define PFI_BUFFER_MAX	0x10000
+
+/* XXXGL */
+static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
+static VNET_DEFINE(int, pfi_buffer_cnt);
+static VNET_DEFINE(int,	pfi_buffer_max);
 #define	V_pfi_buffer		 VNET(pfi_buffer)
-VNET_DEFINE(int,		 pfi_buffer_cnt);
 #define	V_pfi_buffer_cnt	 VNET(pfi_buffer_cnt)
-VNET_DEFINE(int,		 pfi_buffer_max);
 #define	V_pfi_buffer_max	 VNET(pfi_buffer_max)
+
 eventhandler_tag	 pfi_attach_cookie;
 eventhandler_tag	 pfi_detach_cookie;
 eventhandler_tag	 pfi_attach_group_cookie;
@@ -84,16 +85,12 @@ eventhandler_tag	 pfi_detach_group_cooki
 eventhandler_tag	 pfi_ifaddr_event_cookie;
 
 static void	 pfi_attach_ifnet(struct ifnet *);
-static void	 pfi_detach_ifnet(struct ifnet *);
 static void	 pfi_attach_ifgroup(struct ifg_group *);
-static void	 pfi_detach_ifgroup(struct ifg_group *);
-static void	 pfi_group_change(const char *);
 
 static void	 pfi_kif_update(struct pfi_kif *);
 static void	 pfi_dynaddr_update(struct pfi_dynaddr *dyn);
 static void	 pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
 		    int);
-static void	 pfi_kifaddr_update(void *);
 static void	 pfi_instance_add(struct ifnet *, int, int);
 static void	 pfi_address_add(struct sockaddr *, int, int);
 static int	 pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
@@ -106,26 +103,37 @@ static void	 pfi_change_group_event(void
 static void	 pfi_detach_group_event(void *, struct ifg_group *);
 static void	 pfi_ifaddr_event(void * __unused, struct ifnet *);
 
-RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
-RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
-
-#define PFI_BUFFER_MAX		0x10000
-#define PFI_MTYPE		M_IFADDR
+RB_HEAD(pfi_ifhead, pfi_kif);
+static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+#define	V_pfi_ifs	VNET(pfi_ifs)
+
+#define	PFI_BUFFER_MAX		0x10000
+MALLOC_DEFINE(PFI_MTYPE, "pf ifnets", "pf interface database");
+
+LIST_HEAD(pfi_list, pfi_kif);
+static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
+#define	V_pfi_unlinked_kifs	VNET(pfi_unlinked_kifs)
+static struct mtx pfi_unlnkdkifs_mtx;
 
 void
 pfi_initialize(void)
 {
-	if (V_pfi_all != NULL)	/* already initialized */
-		return;
+	struct ifg_group *ifg;
+	struct ifnet *ifp;
+	struct pfi_kif *kif;
 
 	V_pfi_buffer_max = 64;
 	V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
 	    PFI_MTYPE, M_WAITOK);
 
-	if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL)
-		panic("pfi_kif_get for pfi_all failed");
-	struct ifg_group *ifg;
-	struct ifnet *ifp;
+	mtx_init(&pfi_unlnkdkifs_mtx, "pf unlinked interfaces", NULL, MTX_DEF);
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+	PF_RULES_WLOCK();
+	V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
+	PF_RULES_WUNLOCK();
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
@@ -153,53 +161,59 @@ pfi_cleanup(void)
 {
 	struct pfi_kif *p;
 
-	PF_UNLOCK();
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
 	EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
 	EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
 	EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
 	EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
-	PF_LOCK();
 
 	V_pfi_all = NULL;
 	while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
-		if (p->pfik_rules || p->pfik_states) {
-			printf("pfi_cleanup: dangling refs for %s\n",
-			    p->pfik_name);
-		}
-
 		RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p);
 		free(p, PFI_MTYPE);
 	}
 
+	while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) {
+		LIST_REMOVE(p, pfik_list);
+		free(p, PFI_MTYPE);
+	}
+
+	mtx_destroy(&pfi_unlnkdkifs_mtx);
+
 	free(V_pfi_buffer, PFI_MTYPE);
 }
 
 struct pfi_kif *
-pfi_kif_get(const char *kif_name)
+pfi_kif_find(const char *kif_name)
 {
-	struct pfi_kif		*kif;
-	struct pfi_kif_cmp	 s;
+	struct pfi_kif_cmp s;
+
+	PF_RULES_ASSERT();
 
 	bzero(&s, sizeof(s));
 	strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
-	if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL)
-		return (kif);
 
-	/* create new one */
-	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
-		return (NULL);
+	return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
+}
+
+struct pfi_kif *
+pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
+{
+	struct pfi_kif *kif1;
 
+	PF_RULES_WASSERT();
+	KASSERT(kif != NULL, ("%s: null kif", __func__));
+
+	kif1 = pfi_kif_find(kif_name);
+	if (kif1 != NULL) {
+		free(kif, PFI_MTYPE);
+		return (kif1);
+	}
+
+	bzero(kif, sizeof(*kif));
 	strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
-	/*
-	 * It seems that the value of time_second is in unintialzied state
-	 * when pf sets interface statistics clear time in boot phase if pf
-	 * was statically linked to kernel. Instead of setting the bogus
-	 * time value have pfi_get_ifaces handle this case. In
-	 * pfi_get_ifaces it uses boottime.tv_sec if it sees the time is 0.
-	 */
-	kif->pfik_tzero = time_second > 1 ? time_second : 0;
+	kif->pfik_tzero = time_second;
 	TAILQ_INIT(&kif->pfik_dynaddrs);
 
 	RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
@@ -208,55 +222,56 @@ pfi_kif_get(const char *kif_name)
 }
 
 void
-pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what)
+pfi_kif_ref(struct pfi_kif *kif)
 {
-	switch (what) {
-	case PFI_KIF_REF_RULE:
-		kif->pfik_rules++;
-		break;
-	case PFI_KIF_REF_STATE:
-		kif->pfik_states++;
-		break;
-	default:
-		panic("pfi_kif_ref with unknown type");
-	}
+
+	PF_RULES_WASSERT();
+	kif->pfik_rulerefs++;
 }
 
 void
-pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what)
+pfi_kif_unref(struct pfi_kif *kif)
 {
-	if (kif == NULL)
-		return;
 
-	switch (what) {
-	case PFI_KIF_REF_NONE:
-		break;
-	case PFI_KIF_REF_RULE:
-		if (kif->pfik_rules <= 0) {
-			printf("pfi_kif_unref: rules refcount <= 0\n");
-			return;
-		}
-		kif->pfik_rules--;
-		break;
-	case PFI_KIF_REF_STATE:
-		if (kif->pfik_states <= 0) {
-			printf("pfi_kif_unref: state refcount <= 0\n");
-			return;
-		}
-		kif->pfik_states--;
-		break;
-	default:
-		panic("pfi_kif_unref with unknown type");
-	}
+	PF_RULES_WASSERT();
+	KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
 
-	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
+	kif->pfik_rulerefs--;
+
+	if (kif->pfik_rulerefs > 0)
 		return;
 
-	if (kif->pfik_rules || kif->pfik_states)
+	/* kif referencing an existing ifnet or group should exist. */
+	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
 		return;
 
 	RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
-	free(kif, PFI_MTYPE);
+
+	kif->pfik_flags |= PFI_IFLAG_REFS;
+
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+void
+pfi_kif_purge(void)
+{
+	struct pfi_kif *kif, *kif1;
+
+	/*
+	 * Do naive mark-and-sweep garbage collecting of old kifs.
+	 * Reference flag is raised by pf_purge_expired_states().
+	 */
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
+		if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
+			LIST_REMOVE(kif, pfik_list);
+			free(kif, PFI_MTYPE);
+		} else
+			kif->pfik_flags &= ~PFI_IFLAG_REFS;
+	}
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
 }
 
 int
@@ -268,6 +283,7 @@ pfi_kif_match(struct pfi_kif *rule_kif, 
 		return (1);
 
 	if (rule_kif->pfik_group != NULL)
+		/* XXXGL: locking? */
 		TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
 			if (p->ifgl_group == rule_kif->pfik_group)
 				return (1);
@@ -278,75 +294,35 @@ pfi_kif_match(struct pfi_kif *rule_kif, 
 static void
 pfi_attach_ifnet(struct ifnet *ifp)
 {
-	struct pfi_kif		*kif;
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
-	pfi_initialize();
+	PF_RULES_WLOCK();
 	V_pfi_update++;
-	if ((kif = pfi_kif_get(ifp->if_xname)) == NULL)
-		panic("pfi_kif_get failed");
+	kif = pfi_kif_attach(kif, ifp->if_xname);
 
 	kif->pfik_ifp = ifp;
-	ifp->if_pf_kif = (caddr_t)kif;
-
-
-	pfi_kif_update(kif);
-}
-
-static void
-pfi_detach_ifnet(struct ifnet *ifp)
-{
-	struct pfi_kif		*kif;
-
-	if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL)
-		return;
+	ifp->if_pf_kif = kif;
 
-	V_pfi_update++;
 	pfi_kif_update(kif);
-
-	kif->pfik_ifp = NULL;
-	ifp->if_pf_kif = NULL;
-	pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+	PF_RULES_WUNLOCK();
 }
 
 static void
 pfi_attach_ifgroup(struct ifg_group *ifg)
 {
-	struct pfi_kif	*kif;
-
-	pfi_initialize();
-	V_pfi_update++;
-	if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL)
-		panic("pfi_kif_get failed");
-
-	kif->pfik_group = ifg;
-	ifg->ifg_pf_kif = (caddr_t)kif;
-}
-
-static void
-pfi_detach_ifgroup(struct ifg_group *ifg)
-{
-	struct pfi_kif	*kif;
-
-	if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL)
-		return;
-
-	V_pfi_update++;
-
-	kif->pfik_group = NULL;
-	ifg->ifg_pf_kif = NULL;
-	pfi_kif_unref(kif, PFI_KIF_REF_NONE);
-}
+	struct pfi_kif *kif;
 
-static void
-pfi_group_change(const char *group)
-{
-	struct pfi_kif		*kif;
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
+	PF_RULES_WLOCK();
 	V_pfi_update++;
-	if ((kif = pfi_kif_get(group)) == NULL)
-		panic("pfi_kif_get failed");
+	kif = pfi_kif_attach(kif, ifg->ifg_group);
 
-	pfi_kif_update(kif);
+	kif->pfik_group = ifg;
+	ifg->ifg_pf_kif = kif;
+	PF_RULES_WUNLOCK();
 }
 
 int
@@ -390,24 +366,27 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a
 	struct pfi_dynaddr	*dyn;
 	char			 tblname[PF_TABLE_NAME_SIZE];
 	struct pf_ruleset	*ruleset = NULL;
+	struct pfi_kif		*kif;
 	int			 rv = 0;
 
+	PF_RULES_WASSERT();
 	KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
 	    __func__, aw->type));
 	KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
 
-	if ((dyn = uma_zalloc(V_pfi_addr_z, M_NOWAIT | M_ZERO)) == NULL)
+	if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
+	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
+		free(dyn, PFI_MTYPE);
+		return (ENOMEM);
+	}
+
 	if (!strcmp(aw->v.ifname, "self"))
-		dyn->pfid_kif = pfi_kif_get(IFG_ALL);
+		dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
 	else
-		dyn->pfid_kif = pfi_kif_get(aw->v.ifname);
-	if (dyn->pfid_kif == NULL) {
-		rv = ENOENT;
-		goto _bad;
-	}
-	pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE);
+		dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
+	pfi_kif_ref(dyn->pfid_kif);
 
 	dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
 	if (af == AF_INET && dyn->pfid_net == 32)
@@ -450,8 +429,8 @@ _bad:
 	if (ruleset != NULL)
 		pf_remove_if_empty_ruleset(ruleset);
 	if (dyn->pfid_kif != NULL)
-		pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE);
-	uma_zfree(V_pfi_addr_z, dyn);
+		pfi_kif_unref(dyn->pfid_kif);
+	free(dyn, PFI_MTYPE);
 
 	return (rv);
 }
@@ -462,15 +441,20 @@ pfi_kif_update(struct pfi_kif *kif)
 	struct ifg_list		*ifgl;
 	struct pfi_dynaddr	*p;
 
+	PF_RULES_WASSERT();
+
 	/* update all dynaddr */
 	TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
 		pfi_dynaddr_update(p);
 
 	/* again for all groups kif is member of */
-	if (kif->pfik_ifp != NULL)
+	if (kif->pfik_ifp != NULL) {
+		IF_ADDR_RLOCK(kif->pfik_ifp);
 		TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
 			pfi_kif_update((struct pfi_kif *)
 			    ifgl->ifgl_group->ifg_pf_kif);
+		IF_ADDR_RUNLOCK(kif->pfik_ifp);
+	}
 }
 
 static void
@@ -479,8 +463,9 @@ pfi_dynaddr_update(struct pfi_dynaddr *d
 	struct pfi_kif		*kif;
 	struct pfr_ktable	*kt;
 
-	if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL)
-		panic("pfi_dynaddr_update");
+	PF_RULES_WASSERT();
+	KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
+	    ("%s: bad argument", __func__));
 
 	kif = dyn->pfid_kif;
 	kt = dyn->pfid_kt;
@@ -503,14 +488,17 @@ pfi_table_update(struct pfr_ktable *kt, 
 
 	if (kif->pfik_ifp != NULL)
 		pfi_instance_add(kif->pfik_ifp, net, flags);
-	else if (kif->pfik_group != NULL)
+	else if (kif->pfik_group != NULL) {
+		IFNET_RLOCK();
 		TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
 			pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+		IFNET_RUNLOCK();
+	}
 
 	if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
 	    NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
-		printf("pfi_table_update: cannot set %d new addresses "
-		    "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e);
+		printf("%s: cannot set %d new addresses into table %s: %d\n",
+		    __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
 }
 
 static void
@@ -520,9 +508,8 @@ pfi_instance_add(struct ifnet *ifp, int 
 	int		 got4 = 0, got6 = 0;
 	int		 net2, af;
 
-	if (ifp == NULL)
-		return;
-	TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) {
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) {
 		if (ia->ifa_addr == NULL)
 			continue;
 		af = ia->ifa_addr->sa_family;
@@ -578,6 +565,7 @@ pfi_instance_add(struct ifnet *ifp, int 
 		else
 			pfi_address_add(ia->ifa_addr, af, net2);
 	}
+	IF_ADDR_RUNLOCK(ifp);
 }
 
 static void
@@ -590,15 +578,15 @@ pfi_address_add(struct sockaddr *sa, int
 		int		 new_max = V_pfi_buffer_max * 2;
 
 		if (new_max > PFI_BUFFER_MAX) {
-			printf("pfi_address_add: address buffer full (%d/%d)\n",
+			printf("%s: address buffer full (%d/%d)\n", __func__,
 			    V_pfi_buffer_cnt, PFI_BUFFER_MAX);
 			return;
 		}
 		p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
 		    M_NOWAIT);
 		if (p == NULL) {
-			printf("pfi_address_add: no memory to grow buffer "
-			    "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+			printf("%s: no memory to grow buffer (%d/%d)\n",
+			    __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
 			return;
 		}
 		memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer));
@@ -635,9 +623,9 @@ pfi_dynaddr_remove(struct pfi_dynaddr *d
 	KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
 
 	TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
-	pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE);
+	pfi_kif_unref(dyn->pfid_kif);
 	pfr_detach_table(dyn->pfid_kt);
-	uma_zfree(V_pfi_addr_z, dyn);
+	free(dyn, PFI_MTYPE);
 }
 
 void
@@ -652,15 +640,6 @@ pfi_dynaddr_copyout(struct pf_addr_wrap 
 	aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
 }
 
-static void
-pfi_kifaddr_update(void *v)
-{
-	struct pfi_kif		*kif = (struct pfi_kif *)v;
-
-	V_pfi_update++;
-	pfi_kif_update(kif);
-}
-
 static int
 pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
 {
@@ -808,8 +787,8 @@ pfi_attach_ifnet_event(void *arg __unuse
 {
 
 	CURVNET_SET(ifp->if_vnet);
-	PF_LOCK();
 	pfi_attach_ifnet(ifp);
+	PF_LOCK();
 #ifdef ALTQ
 	pf_altq_ifnet_event(ifp, 0);
 #endif
@@ -820,10 +799,18 @@ pfi_attach_ifnet_event(void *arg __unuse
 static void
 pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
 {
+	struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
 
 	CURVNET_SET(ifp->if_vnet);
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	pfi_kif_update(kif);
+
+	kif->pfik_ifp = NULL;
+	ifp->if_pf_kif = NULL;
+	PF_RULES_WUNLOCK();
+
 	PF_LOCK();
-	pfi_detach_ifnet(ifp);
 #ifdef ALTQ
 	pf_altq_ifnet_event(ifp, 1);
 #endif
@@ -836,31 +823,38 @@ pfi_attach_group_event(void *arg , struc
 {
 
 	CURVNET_SET((struct vnet *)arg);
-	PF_LOCK();
 	pfi_attach_ifgroup(ifg);
-	PF_UNLOCK();
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_change_group_event(void *arg, char *gname)
 {
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
 	CURVNET_SET((struct vnet *)arg);
-	PF_LOCK();
-	pfi_group_change(gname);
-	PF_UNLOCK();
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, gname);
+	pfi_kif_update(kif);
+	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_detach_group_event(void *arg, struct ifg_group *ifg)
 {
+	struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
 
 	CURVNET_SET((struct vnet *)arg);
-	PF_LOCK();
-	pfi_detach_ifgroup(ifg);
-	PF_UNLOCK();
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+
+	kif->pfik_group = NULL;
+	ifg->ifg_pf_kif = NULL;
+	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
@@ -869,9 +863,11 @@ pfi_ifaddr_event(void *arg __unused, str
 {
 
 	CURVNET_SET(ifp->if_vnet);
-	PF_LOCK();
-	if (ifp && ifp->if_pf_kif)
-		pfi_kifaddr_update(ifp->if_pf_kif);
-	PF_UNLOCK();
+	PF_RULES_WLOCK();
+	if (ifp && ifp->if_pf_kif) {
+		V_pfi_update++;
+		pfi_kif_update(ifp->if_pf_kif);
+	}
+	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }

Modified: projects/pf/head/sys/contrib/pf/net/pf_ioctl.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf_ioctl.c	Fri May 25 11:14:08 2012	(r235990)
+++ projects/pf/head/sys/contrib/pf/net/pf_ioctl.c	Fri May 25 14:11:02 2012	(r235991)
@@ -379,7 +379,8 @@ pf_empty_pool(struct pf_palist *poola)
 			pfr_detach_table(pa->addr.p.tbl);
 			break;
 		}
-		pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE);
+		if (pa->kif)
+			pfi_kif_unref(pa->kif);
 		TAILQ_REMOVE(poola, pa, entries);
 		uma_zfree(V_pf_pooladdr_z, pa);
 	}
@@ -403,6 +404,8 @@ void
 pf_free_rule(struct pf_rule *rule)
 {
 
+	PF_RULES_WASSERT();
+
 	pf_tag_unref(rule->tag);
 	pf_tag_unref(rule->match_tag);
 #ifdef ALTQ
@@ -428,7 +431,8 @@ pf_free_rule(struct pf_rule *rule)
 	}
 	if (rule->overload_tbl)
 		pfr_detach_table(rule->overload_tbl);
-	pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
+	if (rule->kif)
+		pfi_kif_unref(rule->kif);
 	pf_anchor_remove(rule);
 	pf_empty_pool(&rule->rpool.list);
 	uma_zfree(V_pf_rule_z, rule);
@@ -1016,8 +1020,6 @@ pf_addr_copyout(struct pf_addr_wrap *add
 static int
 pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 {
-	struct pf_pooladdr	*pa = NULL;
-	struct pf_pool		*pool = NULL;
 	int			 error = 0;
 
 	CURVNET_SET(TD_TO_VNET(td));
@@ -1179,75 +1181,57 @@ pfioctl(struct cdev *dev, u_long cmd, ca
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*rule, *tail;
 		struct pf_pooladdr	*pa;
+		struct pfi_kif		*kif = NULL;
 		int			 rs_num;
 
-		PF_RULES_WLOCK();
-		pr->anchor[sizeof(pr->anchor) - 1] = 0;
-		ruleset = pf_find_ruleset(pr->anchor);
-		if (ruleset == NULL) {
-			PF_RULES_WUNLOCK();
+		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
 			error = EINVAL;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201205251411.q4PEB3Dt014289>