Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 30 Aug 2003 14:56:12 -0700 (PDT)
From:      Sam Leffler <sam@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 37245 for review
Message-ID:  <200308302156.h7ULuCsN072305@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=37245

Change 37245 by sam@sam_ebb on 2003/08/30 14:56:07

	Checkpoint:
	
	o split bridge callbacks into input + output so each can be
	  optimized and to cleanup locking
	o cleanup locking in bridge module unload (still may be a race
	  against timer routine)
	o combine bridge packet analysis code with input processing; it
	  is unlikely anyone is going to use the existing code to do
	  packet analysis in a driver using a partially received packet
	  and combining the two leads to useful optimizations
	o move bridge processing work from ether input to the bridge module
	o replace come m_copy calls with m_copypacket (optimization)
	o move layer2 ipfw processing logic from ethernet code to ipfw
	o revamp API to bridge callbacks for consistency and to eliminate
	  confusing cases where sometimes mbufs are reclaimed and sometimes not
	o revamp ipfw2 layer2 processing callback to eliminate mbuf recaliming
	o remove use of MT_TAG pseudo-mbufs
	o move net.link.ether.ipfw sysctl to ipfw module; this means it
	  will not be defined unless the module is loaded
	o reimplement net.link.ether.ipfw to remove a comparison previously
	  required for each packet
	o replace 6's with ETHER_ADDR_LEN
	o revamp divert sockets to use real m_tag's
	o revamp dummynet to use real m_tag's
	o cleanup dummynet-bridge integration
	o redo ip reassembly queue handling to propagate tags instead of
	  recording divert socket state in the queue header
	o redo ipfw forward handling with m_tag's to eliminate MT_TAG use
	  and parameters to ip_dooptions and ip_forward
	o revamp dummynet i/o callback method to take out-of-band parameters
	  from the mbuf chain (stored in m_tag)
	o remove PACKET_TAG_IPFWORWARD; no longer needed as info is stored
	  instated in PACKET_TAG_IPFW
	
	This stuff compiles but needs lots more work.  Some of this can be
	culled and integrated (especially bridge cleanup).

Affected files ...

.. //depot/projects/tagcleanup/sys/net/bridge.c#2 edit
.. //depot/projects/tagcleanup/sys/net/bridge.h#2 edit
.. //depot/projects/tagcleanup/sys/net/if_ethersubr.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_divert.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_dummynet.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_dummynet.h#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_fw.h#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_fw2.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_input.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_output.c#2 edit
.. //depot/projects/tagcleanup/sys/netinet/ip_var.h#2 edit
.. //depot/projects/tagcleanup/sys/netinet/tcp_input.c#2 edit
.. //depot/projects/tagcleanup/sys/sys/mbuf.h#2 edit

Differences ...

==== //depot/projects/tagcleanup/sys/net/bridge.c#2 (text+ko) ====

@@ -619,10 +619,10 @@
 static void
 bdg_timeout(void *dummy)
 {
+    BDG_LOCK();
     if (do_bridge) {
 	int l, i;
 
-	BDG_LOCK();
 	/*
 	 * age entries in the forwarding table.
 	 */
@@ -650,9 +650,10 @@
 	    bridge_on();	/* we just need unmute, really */
 	    bdg_loops = 0;
 	}
-	BDG_UNLOCK();
     }
-    callout_reset(&bdg_callout, 2*hz, bdg_timeout, NULL);
+    if (bridge_in_ptr)
+	callout_reset(&bdg_callout, 2*hz, bdg_timeout, NULL);
+    BDG_UNLOCK();
 }
 
 /*
@@ -693,6 +694,9 @@
     case 1:
 	if (ETHER_ADDR_EQ(c->my_macs[0].etheraddr, eh->ether_dhost))
 	    return BDG_LOCAL;
+    case 0:
+	/* XXX BDG_UNKOWN? */
+	break;
     }
     /*
      * Look for a possible destination in table
@@ -704,125 +708,6 @@
 	return BDG_UNKNOWN;
 }
 
-/**
- * bridge_in() is invoked to perform bridging decision on input packets.
- *
- * On Input:
- *   eh		Ethernet header of the incoming packet.
- *   ifp	interface the packet is coming from.
- *
- * On Return: destination of packet, one of
- *   BDG_BCAST	broadcast
- *   BDG_MCAST  multicast
- *   BDG_LOCAL  is only for a local address (do not forward)
- *   BDG_DROP   drop the packet
- *   ifp	ifp of the destination interface.
- *
- * Forwarding is not done directly to give a chance to some drivers
- * to fetch more of the packet, or simply drop it completely.
- */
-
-static struct ifnet *
-bridge_in(struct ifnet *ifp, struct ether_header *eh)
-{
-    int index;
-    struct ifnet *dst, *old;
-    bdg_hash_table *bt;			/* location in hash table */
-    int dropit = BDG_MUTED(ifp);
-
-    /*
-     * hash the source address
-     */
-    BDG_LOCK();
-    index = HASH_FN(eh->ether_shost);
-    bt = &BDG_CLUSTER(ifp)->ht[index];
-    bt->used = 1;
-    old = bt->name;
-    if (old) {				/* the entry is valid */
-	if (!ETHER_ADDR_EQ(eh->ether_shost, bt->etheraddr)) {
-	    bdg_ipfw_colls++;
-	    bt->name = NULL;		/* NB: will overwrite below */
-	} else if (old != ifp) {
-	    /*
-	     * Found a loop. Either a machine has moved, or there
-	     * is a misconfiguration/reconfiguration of the network.
-	     * First, do not forward this packet!
-	     * Record the relocation anyways; then, if loops persist,
-	     * suspect a reconfiguration and disable forwarding
-	     * from the old interface.
-	     */
-	    bt->name = ifp;		/* relocate address */
-	    printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
-			bdg_loops, eh->ether_shost, ".",
-			ifp->if_name, ifp->if_unit,
-			old->if_name, old->if_unit,
-			BDG_MUTED(old) ? "muted":"active");
-	    dropit = 1;
-	    if (!BDG_MUTED(old)) {
-		if (bdg_loops++ > 10)
-		    BDG_MUTE(old);
-	    }
-	}
-    }
-
-    /*
-     * now write the source address into the table
-     */
-    if (bt->name == NULL) {
-	DPRINTF(("%s: new addr %6D at %d for %s%d\n",
-	    __func__, eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit));
-	ETHER_ADDR_COPY(bt->etheraddr, eh->ether_shost);
-	bt->name = ifp;
-    }
-    dst = bridge_dst_lookup(eh, BDG_CLUSTER(ifp));
-    BDG_UNLOCK();
-
-    /*
-     * bridge_dst_lookup can return the following values:
-     *   BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
-     * For muted interfaces, or when we detect a loop, the first 3 are
-     * changed in BDG_LOCAL (we still listen to incoming traffic),
-     * and others to BDG_DROP (no use for the local host).
-     * Also, for incoming packets, ifp is changed to BDG_DROP if ifp == src.
-     * These changes are not necessary for outgoing packets from ether_output().
-     */
-    BDG_STAT(ifp, BDG_IN);
-    switch ((uintptr_t)dst) {
-    case (uintptr_t)BDG_BCAST:
-    case (uintptr_t)BDG_MCAST:
-    case (uintptr_t)BDG_LOCAL:
-    case (uintptr_t)BDG_UNKNOWN:
-    case (uintptr_t)BDG_DROP:
-	BDG_STAT(ifp, dst);
-	break;
-    default:
-	if (dst == ifp || dropit)
-	    BDG_STAT(ifp, BDG_DROP);
-	else
-	    BDG_STAT(ifp, BDG_FORWARD);
-	break;
-    }
-
-    if (dropit) {
-	if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
-	    dst = BDG_LOCAL;
-	else
-	    dst = BDG_DROP;
-    } else {
-	if (dst == ifp)
-	    dst = BDG_DROP;
-    }
-    DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s%d\n", __func__,
-	eh->ether_shost, ".",
-	eh->ether_dhost, ".",
-	ntohs(eh->ether_type),
-	(dst <= BDG_FORWARD) ? bdg_dst_names[(int)dst] :
-		dst->if_name,
-	(dst <= BDG_FORWARD) ? 0 : dst->if_unit));
-
-    return dst;
-}
-
 /*
  * Return 1 if it's ok to send a packet out the specified interface.
  * The interface must be:
@@ -832,6 +717,8 @@
  *	up and running,
  *	not the source interface, and
  *	belong to the same cluster as the 'real_dst'.
+ *
+ * NB: src may be NULL when applying this check for outbound frames.
  */
 static __inline int
 bridge_ifok(struct ifnet *ifp, struct ifnet *src, struct ifnet *dst)
@@ -845,84 +732,103 @@
 }
 
 /*
- * Forward a packet to dst -- which can be a single interface or
- * an entire cluster. The src port and muted interfaces are excluded.
- *
- * If src == NULL, the pkt comes from ether_output, and dst is the real
- * interface the packet is originally sent to. In this case, we must forward
- * it to the whole cluster.
- * We never call bdg_forward from ether_output on interfaces which are
- * not part of a cluster.
- *
- * If possible (i.e. we can determine that the caller does not need
- * a copy), the packet is consumed here, and bdg_forward returns NULL.
- * Otherwise, a pointer to a copy of the packet is returned.
+ * Forward a packet to dst -- which can be a single interface or an
+ * entire cluster.  The src port and muted interfaces are excluded.
+ * src is the interface on which the packet originated (if any) and
+ * real_dst is the interface to which the frame was directed (only
+ * meaningful for outbound frames).
  */
 static struct mbuf *
-bdg_forward(struct mbuf *m0, struct ifnet *dst)
+bdg_forward(struct ifnet *dst, struct mbuf *m0,
+    struct ifnet *real_dst, struct ifnet *src)
 {
-#define	EH_RESTORE(_m) do {						   \
-    M_PREPEND((_m), ETHER_HDR_LEN, M_DONTWAIT);			   	   \
-    if ((_m) == NULL) {							   \
-	bdg_dropped++;							   \
-	return NULL;							   \
-    }									   \
-    if (eh != mtod((_m), struct ether_header *))			   \
-	bcopy(&save_eh, mtod((_m), struct ether_header *), ETHER_HDR_LEN); \
-    else								   \
-	bdg_predict++;							   \
-} while (0);
-    struct ether_header *eh;
-    struct ifnet *src;
     struct ifnet *ifp, *last;
-    int shared = bdg_copy;		/* someone else is using the mbuf */
-    struct ifnet *real_dst = dst;	/* real dst from ether_output */
-    struct ip_fw_args args;
-#ifdef PFIL_HOOKS
-    struct packet_filter_hook *pfh;
-    int rv;
-#endif /* PFIL_HOOKS */
-    struct ether_header save_eh;
+    int shared;
     struct mbuf *m;
 
-    DDB(quad_t ticks; ticks = rdtsc();)
-
-    args.rule = NULL;		/* did we match a firewall rule ? */
-    /* Fetch state from dummynet tag, ignore others */
-    for (;m0->m_type == MT_TAG; m0 = m0->m_next)
-	if (m0->_m_tag_id == PACKET_TAG_DUMMYNET) {
-	    args.rule = ((struct dn_pkt *)m0)->rule;
-	    shared = 0;			/* For sure this is our own mbuf. */
-	}
-    if (args.rule == NULL)
-	bdg_thru++;			/* count 1st time through bdg_forward */
-
+    KASSERT(dst != BDG_DROP && dst != BDG_LOCAL,
+	("forwarding bogus frame; dst %u", (int) dst));
     /*
-     * The packet arrives with the Ethernet header at the front.
+     * We need to make a copy if the packet goes to multiple
+     * destinations or we've been configured explicitly to
+     * copy frames (is this still useful?).
      */
-    eh = mtod(m0, struct ether_header *);
+    shared = (dst == BDG_BCAST || dst == BDG_MCAST) | bdg_copy;
 
-    src = m0->m_pkthdr.rcvif;
-    if (src == NULL) {			/* packet from ether_output */
-	BDG_LOCK();
-	dst = bridge_dst_lookup(eh, BDG_CLUSTER(real_dst));
-	BDG_UNLOCK();
+    last = NULL;
+    if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
+	/*
+	 * Scan all ports and send copies to all but the last.
+	 */
+	IFNET_RLOCK();		/* XXX replace with generation # */
+	TAILQ_FOREACH(ifp, &ifnet, if_link) {
+	    if (bridge_ifok(ifp, src, real_dst)) {
+		if (last) {
+		    /*
+		     * At this point we know two interfaces need a copy
+		     * of the packet (last + ifp) so we must create a
+		     * copy to handoff to last.
+		     */
+		    m = m_copypacket(m0, M_DONTWAIT);
+		    if (m == NULL) {
+			IFNET_RUNLOCK();
+			/* XXX  rate-limit */
+			printf("%s: m_copypacket failed!\n", __func__);
+			bdg_dropped++;
+			return m0;	/* the original is still there... */
+		    }
+		    if (IF_HANDOFF(&last->if_snd, m, last))
+			BDG_STAT(last, BDG_OUT);
+		    else
+			bdg_dropped++;
+		}
+		last = ifp;
+	    }
+	}
+	IFNET_RUNLOCK();
+    } else {
+	if (bridge_ifok(dst, src, real_dst))
+	    last = dst;
+    }
+    if (last) {
+	if (shared) {			/* need to copy */
+	    m = m_copypacket(m0, M_DONTWAIT);
+	    if (m == NULL) {
+		/* XXX  rate-limit */
+		printf("%s: m_copypacket failed!\n", __func__);
+		bdg_dropped++ ;
+		return m0;		/* the original is still there... */
+	    }
+	} else {			/* consume original */
+	    m = m0, m0 = NULL;
+	}
+	if (IF_HANDOFF(&last->if_snd, m, last))
+	    BDG_STAT(last, BDG_OUT);
+	else
+	    bdg_dropped++;
     }
+    return m0;
+}
+
+/*
+ * Apply packet filtering rules to a packet destined for forwarding.
+ * This is terribly ugly because the packet filtering engines all
+ * expect an IP packet so we must temporarily strip the Ethernet header
+ * and then restore it on return.  This gets complicated if this procedure
+ * requires reallocation of the mbuf chain and not just a simple adjustment
+ * of pointers to account for the header removal.
+ *
+ * Life would be much better if the packet filtering engines took a
+ * packet and knew to skip the fixed size Ethernet header.
+ */
+static struct mbuf *
+bdg_filter_and_forward(struct ifnet *dst, struct mbuf *m0, struct ifnet *src)
+{
+#ifdef PFIL_HOOKS
+    struct packet_filter_hook *pfh;
+#endif /* PFIL_HOOKS */
 
-    if (dst == BDG_DROP) {		/* this should not happen */
-	printf("xx bdg_forward for BDG_DROP\n");
-	m_freem(m0);
-	bdg_dropped++;
-	return NULL;
-    }
-    if (dst == BDG_LOCAL) {		/* this should not happen as well */
-	printf("xx ouch, bdg_forward for local pkt\n");
-	return m0;
-    }
-    if (dst == BDG_BCAST || dst == BDG_MCAST) {
-	 /* need a copy for the local stack */
-	 shared = 1;
-    }
+    DDB(quad_t ticks; ticks = rdtsc();)
 
     /*
      * Do filtering in a very similar way to what is done in ip_output.
@@ -931,21 +837,54 @@
      * Additional restrictions may apply e.g. non-IP, short packets,
      * and pkts already gone through a pipe.
      */
-    if (src != NULL && (
+    if ((IPFW_LOADED && bdg_ipfw != 0)
 #ifdef PFIL_HOOKS
-	((pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh)) != NULL && bdg_ipf !=0) ||
+	|| ((pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh)) != NULL && bdg_ipf !=0)
 #endif
-	(IPFW_LOADED && bdg_ipfw != 0))) {
+	) {
+#define	EH_RESTORE(_m) do {						   \
+    M_PREPEND((_m), ETHER_HDR_LEN, M_DONTWAIT);			   	   \
+    if ((_m) == NULL) {							   \
+	bdg_dropped++;							   \
+	return NULL;							   \
+    }									   \
+    if (eh != mtod((_m), struct ether_header *))			   \
+	bcopy(&save_eh, mtod((_m), struct ether_header *), ETHER_HDR_LEN); \
+    else								   \
+	bdg_predict++;							   \
+} while (0);
+	struct ether_header *eh;
+	struct ether_header save_eh;
+	struct ip_fw_args args;
+	struct mbuf *m;
+	struct m_tag *mtag;
+	int i, shared;
 
-	int i;
+	shared = (dst == BDG_BCAST || dst == BDG_MCAST) | bdg_copy;
 
-	if (args.rule != NULL && fw_one_pass)
-	    goto forward; /* packet already partially processed */
+	/*
+	 * Yech, check for dummynet state.  If the packet is tagged
+	 * and a rule has been applied then this is a subsequent pass
+	 * through the bridge and we need to bypass processing when
+	 * ``one pass'' is enabled.  Otherwise we need to record the
+	 * rule number for use by ipfw.
+	 *
+	 * This is bogus, ipfw should check itself for a previous rule
+	 * being applied.
+	 */
+	mtag = m_tag_find(m0, PACKET_TAG_DUMMYNET, NULL);
+	if (mtag != NULL) {
+	    args.rule = ((struct dn_pkt_tag *)(mtag+1))->rule;
+	    if (args.rule != NULL && fw_one_pass)
+		goto forward;	/* packet already partially processed */
+	    shared = 0;		/* for sure this is our own mbuif */
+	} else
+	    args.rule = NULL;
 	/*
 	 * i need some amt of data to be contiguous, and in case others need
 	 * the packet (shared==1) also better be in the first mbuf.
 	 */
-	i = min(m0->m_pkthdr.len, max_protohdr) ;
+	i = min(m0->m_pkthdr.len, max_protohdr);
 	if (shared || m0->m_len < i) {
 	    m0 = m_pullup(m0, i);
 	    if (m0 == NULL) {
@@ -953,8 +892,11 @@
 		bdg_dropped++;
 		return NULL;
 	    }
-	    eh = mtod(m0, struct ether_header *);
 	}
+	/*
+	 * The packet arrives with the Ethernet header at the front.
+	 */
+	eh = mtod(m0, struct ether_header *);
 
 	/*
 	 * Processing below expects the Ethernet header is stripped.
@@ -983,7 +925,7 @@
 
 	    do {
 		if (pfh->pfil_func) {
-		    rv = pfh->pfil_func(ip, ip->ip_hl << 2, src, 0, &m0);
+		    int rv = pfh->pfil_func(ip, ip->ip_hl << 2, src, 0, &m0);
 		    if (m0 == NULL) {
 			bdg_dropped++;
 			return NULL;
@@ -1046,12 +988,12 @@
 		    return NULL;
 		}
 	    } else {
-		m = m0 ; /* pass the original to dummynet */
-		m0 = NULL ; /* and nothing back to the caller */
+		m = m0;		/* pass the original to dummynet */
+		m0 = NULL;	/* and nothing back to the caller */
 	    }
 
-	    args.oif = real_dst;
-	    ip_dn_io_ptr(m, (i & 0xffff),DN_TO_BDG_FWD, &args);
+	    args.oif = dst;
+	    ip_dn_io_ptr(m, (i & 0xffff),DN_TO_BDG_FWD);
 	    return m0;
 	}
 	/*
@@ -1060,88 +1002,169 @@
 	 */
 	bdg_ipfw_drops++;
 	return m0;
+#undef EH_RESTORE
     }
 forward:
     /*
-     * Again, bring up the headers in case of shared bufs to avoid
-     * corruptions in the future.
+     * Do the actual forwarding.  We pass src as the real destination
+     * to avoid forwarding to ourself--this will happen through the
+     * normal input path by returning a copy of the mbuf chain.
      */
-    if (shared) {
-	int i = min(m0->m_pkthdr.len, max_protohdr);
+    bdg_thru++;
+    return bdg_forward(dst, m0, src, src);
+}
+
+/*
+ * Process a packet received on the input path.  We assume
+ * the first mbuf has the contiguous Ethernet header.
+ */
+static struct mbuf *
+bdg_input(struct ifnet *ifp, struct mbuf *m)
+{
+    struct ether_header *eh = mtod(m, struct ether_header *);
+    int dropit, index;
+    struct ifnet *dst, *old;
+    bdg_hash_table *bt;			/* location in hash table */
+
+    KASSERT(ifp == m->m_pkthdr.rcvif, ("rcvif mismatch"));
+    KASSERT(m->m_len >= sizeof(*eh), ("bogus mbuf, len %u", m->m_len));
 
-	m0 = m_pullup(m0, i);
-	if (m0 == NULL) {
-	    bdg_dropped++;
-	    return NULL;
+    /*
+     * hash the source address
+     */
+    BDG_LOCK();
+    dropit = BDG_MUTED(ifp);
+    index = HASH_FN(eh->ether_shost);
+    bt = &BDG_CLUSTER(ifp)->ht[index];
+    bt->used = 1;
+    old = bt->name;
+    if (old) {				/* the entry is valid */
+	if (!ETHER_ADDR_EQ(eh->ether_shost, bt->etheraddr)) {
+	    bdg_ipfw_colls++;
+	    bt->name = NULL;		/* NB: will overwrite below */
+	} else if (old != ifp) {
+	    /*
+	     * Found a loop. Either a machine has moved, or there
+	     * is a misconfiguration/reconfiguration of the network.
+	     * First, do not forward this packet!
+	     * Record the relocation anyways; then, if loops persist,
+	     * suspect a reconfiguration and disable forwarding
+	     * from the old interface.
+	     */
+	    bt->name = ifp;		/* relocate address */
+	    printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
+			bdg_loops, eh->ether_shost, ".",
+			ifp->if_name, ifp->if_unit,
+			old->if_name, old->if_unit,
+			BDG_MUTED(old) ? "muted":"active");
+	    dropit = 1;
+	    if (!BDG_MUTED(old)) {
+		if (bdg_loops++ > 10)
+		    BDG_MUTE(old);
+	    }
 	}
-	/* NB: eh is not used below; no need to recalculate it */
     }
 
     /*
-     * now real_dst is used to determine the cluster where to forward.
-     * For packets coming from ether_input, this is the one of the 'src'
-     * interface, whereas for locally generated packets (src==NULL) it
-     * is the cluster of the original destination interface, which
-     * was already saved into real_dst.
+     * now write the source address into the table
      */
-    if (src != NULL)
-	real_dst = src;
+    if (bt->name == NULL) {
+	DPRINTF(("%s: new addr %6D at %d for %s%d\n",
+	    __func__, eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit));
+	ETHER_ADDR_COPY(bt->etheraddr, eh->ether_shost);
+	bt->name = ifp;
+    }
+    dst = bridge_dst_lookup(eh, BDG_CLUSTER(ifp));
+    BDG_UNLOCK();
+
+    DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s%d\n", __func__,
+	eh->ether_shost, ".",
+	eh->ether_dhost, ".",
+	ntohs(eh->ether_type),
+	(dst <= BDG_FORWARD) ? bdg_dst_names[(int)dst] :
+		dst->if_name,
+	(dst <= BDG_FORWARD) ? 0 : dst->if_unit));
 
-    last = NULL;
-    if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
+    /*
+     * bridge_dst_lookup can return the following values:
+     *   BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
+     * For muted interfaces, or when we detect a loop, the first 3 are
+     * changed in BDG_LOCAL (we still listen to incoming traffic),
+     * and others to BDG_DROP (no use for the local host).
+     * Also, for incoming packets, ifp is changed to BDG_DROP if ifp == src.
+     * These changes are not necessary for outgoing packets from ether_output().
+     */
+    BDG_STAT(ifp, BDG_IN);
+    switch ((uintptr_t)dst) {
+    case (uintptr_t)BDG_UNKNOWN:
+	/*
+	 * Unknown destination, if interface is muted, drop
+	 * the frame.  Otherwise, treat it as local.
+	 */
+	if (dropit)
+	    goto drop;
+	/* fall thru... */
+    case (uintptr_t)BDG_LOCAL:		/* known local frame */
+	BDG_STAT(ifp, dst);
+	break;
+    case (uintptr_t)BDG_BCAST:		/* broadcast frame */
+    case (uintptr_t)BDG_MCAST:		/* multicast frame */
+	/*
+	 * Broad/multicast.  Forward to cluster and
+	 * return a copy for local dispatch.
+	 */
+	BDG_STAT(ifp, dst);
+	m = bdg_filter_and_forward(dst, m, ifp);
+	if (m == NULL)
+	    if_printf(ifp, "bridge dropped %s packet\n",
+		    dst == BDG_BCAST ? "broadcast" : "multicast");
+	break;
+    case (uintptr_t)BDG_DROP:		/* discard/drop frame */
+	goto drop;
+    default:				/* forward to specified interface */
 	/*
-	 * Scan all ports and send copies to all but the last.
+	 * Unicast; if to ourself or interface is muted, discard.
 	 */
-	IFNET_RLOCK();		/* XXX replace with generation # */
-	TAILQ_FOREACH(ifp, &ifnet, if_link) {
-	    if (bridge_ifok(ifp, src, real_dst)) {
-		if (last) {
-		    /*
-		     * At this point we know two interfaces need a copy
-		     * of the packet (last + ifp) so we must create a
-		     * copy to handoff to last.
-		     */
-		    m = m_copypacket(m0, M_DONTWAIT);
-		    if (m == NULL) {
-			IFNET_RUNLOCK();
-			printf("%s: , m_copypacket failed!\n", __func__);
-			bdg_dropped++;
-			return m0;	/* the original is still there... */
-		    }
-		    if (IF_HANDOFF(&last->if_snd, m, last))
-			BDG_STAT(last, BDG_OUT);
-		    else
-			bdg_dropped++;
-		}
-		last = ifp;
-	    }
-	}
-	IFNET_RUNLOCK();
-    } else {
-	if (bridge_ifok(dst, src, real_dst))
-	    last = dst;
+	if (dst == ifp || dropit)
+	    goto drop;
+	BDG_STAT(ifp, BDG_FORWARD);
+	m = bdg_filter_and_forward(dst, m, ifp);
+	goto drop1;			/* XXX must discard ourself */
     }
-    if (last) {
-	if (shared) {			/* need to copy */
-	    m = m_copypacket(m0, M_DONTWAIT);
-	    if (m == NULL) {
-		printf("%s: sorry, m_copypacket failed!\n", __func__);
-		bdg_dropped++ ;
-		return m0;		/* the original is still there... */
-	    }
-	} else {			/* consume original */
-	    m = m0, m0 = NULL;
-	}
-	if (IF_HANDOFF(&last->if_snd, m, last))
-	    BDG_STAT(last, BDG_OUT);
-	else
-	    bdg_dropped++;
+    return m;				/* return for local processing */
+drop:
+    BDG_STAT(ifp, BDG_DROP);
+drop1:
+    if (m != NULL)
+	m_freem(m);
+    return NULL;
+}
+
+/*
+ * Forward a packet from the output path.  The interface specified
+ * is where the frame is destined; we forward it to the whole cluster.
+ * We assume the first mbuf has the contiguous Ethernet header.
+ */
+static struct mbuf *
+bdg_output(struct ifnet *ifp, struct mbuf *m)
+{
+    struct ether_header *eh = mtod(m, struct ether_header *);
+    struct ifnet *dst;
+
+    KASSERT(m->m_len >= sizeof(*eh), ("bogus mbuf, len %u", m->m_len));
+
+    /* XXX LOR */
+    BDG_LOCK();
+    dst = bridge_dst_lookup(eh, BDG_CLUSTER(ifp));
+    BDG_UNLOCK();
+    if (dst == BDG_LOCAL) {			/* this should not happen */
+	printf("xx ouch, bdg_forward for local pkt\n");
+	return m;
     }
-
-    DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
-	if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
-    return m0;
-#undef EH_RESTORE
+    m = bdg_forward(dst, m, ifp, NULL);		/* NB: nul src is ok */
+    if (m)
+	m_freem(m);
+    return NULL;
 }
 
 /*
@@ -1166,8 +1189,8 @@
 
     bzero(&bdg_stats, sizeof(bdg_stats));
 
-    bridge_in_ptr = bridge_in;
-    bdg_forward_ptr = bdg_forward;
+    bridge_in_ptr = bdg_input;
+    bridge_out_ptr = bdg_output;
     bdgtakeifaces_ptr = reconfigure_bridge;
 
     bdgtakeifaces_ptr();		/* XXX does this do anything? */
@@ -1177,16 +1200,21 @@
     return 0 ;
 }
 
-#ifdef KLD_MODULE
 static void
 bdgdestroy(void)
 {
+    BDG_LOCK();
+    do_bridge = 0;
     bridge_in_ptr = NULL;
-    bdg_forward_ptr = NULL;
+    bridge_out_ptr = NULL;
+    callout_stop(&bdg_callout);
+    BDG_UNLOCK();
+
     bdgtakeifaces_ptr = NULL;
 
-    callout_stop(&bdg_callout);
+    BDG_LOCK();
     bridge_off();
+    BDG_UNLOCK();
 
     if (clusters)
 	free(clusters, M_IFADDR);
@@ -1194,9 +1222,9 @@
 	free(ifp2sc, M_IFADDR);
 	ifp2sc = NULL;
     }
+    /* XXX race with bdg_timeout? */
     mtx_destroy(&bdg_mtx);
 }
-#endif /* KLD_MODULE */
 
 /*
  * initialization code, both for static and dynamic loading.
@@ -1204,28 +1232,19 @@
 static int
 bridge_modevent(module_t mod, int type, void *unused)
 {
-	int err = 0 ;
+	int err;
 
 	switch (type) {
 	case MOD_LOAD:
-		if (BDG_LOADED) {
-			err = EEXIST;
-			break ;
-		}
 		err = bdginit();
 		break;
 	case MOD_UNLOAD:
-#if !defined(KLD_MODULE)
-		printf("bridge statically compiled, cannot unload\n");
-		err = EINVAL ;
-#else
 		/* XXX lock */
-		do_bridge = 0;
 		bdgdestroy();
-#endif
+		err = 0;
 		break;
 	default:
-		err = EINVAL ;
+		err = EINVAL;
 		break;
 	}
 	return err;

==== //depot/projects/tagcleanup/sys/net/bridge.h#2 (text+ko) ====

@@ -101,12 +101,12 @@
 #define BDG_STAT(ifp, type) bdg_stats.s[ifp->if_index].p_in[(uintptr_t)type]++ 
  
 #ifdef _KERNEL
-typedef	struct ifnet *bridge_in_t(struct ifnet *, struct ether_header *);
+typedef	struct mbuf *bridge_in_t(struct ifnet *, struct mbuf *);
 /* bdg_forward frees the mbuf if necessary, returning null */
-typedef	struct mbuf *bdg_forward_t(struct mbuf *, struct ifnet *);
+typedef	struct mbuf *bridge_out_t(struct ifnet *, struct mbuf *);
 typedef	void bdgtakeifaces_t(void);
 extern	bridge_in_t *bridge_in_ptr;
-extern	bdg_forward_t *bdg_forward_ptr;
+extern	bridge_out_t *bridge_out_ptr;
 extern	bdgtakeifaces_t *bdgtakeifaces_ptr;
 
 #define	BDG_LOADED	(bdgtakeifaces_ptr != NULL)

==== //depot/projects/tagcleanup/sys/net/if_ethersubr.c#2 (text+ko) ====

@@ -107,7 +107,7 @@
 /* bridge support */
 int do_bridge;
 bridge_in_t *bridge_in_ptr;
-bdg_forward_t *bdg_forward_ptr;
+bridge_out_t *bridge_out_ptr;
 bdgtakeifaces_t *bdgtakeifaces_ptr;
 struct bdg_softc *ifp2sc;
 
@@ -120,10 +120,7 @@
 #define senderr(e) do { error = (e); goto bad;} while (0)
 #define IFP2AC(IFP) ((struct arpcom *)IFP)
 
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
-	struct ip_fw **rule, int shared);
-static int ether_ipfw;
+ether_fw_chk_t *ether_fw_chk_ptr;
 
 /*
  * Ethernet output routine.
@@ -141,7 +138,7 @@
 {
 	short type;
 	int error = 0, hdrcmplt = 0;
- 	u_char esrc[6], edst[6];
+ 	u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
 	struct rtentry *rt;
 	struct ether_header *eh;
 	int loop_copy = 0;
@@ -229,13 +226,13 @@
 	case pseudo_AF_HDRCMPLT:
 		hdrcmplt = 1;
 		eh = (struct ether_header *)dst->sa_data;
-		(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
+		(void)memcpy(esrc, eh->ether_shost, ETHER_ADDR_LEN);
 		/* FALLTHROUGH */
 
 	case AF_UNSPEC:
 		loop_copy = -1; /* if this is for us, don't do it */
 		eh = (struct ether_header *)dst->sa_data;
- 		(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
+ 		(void)memcpy(edst, eh->ether_dhost, ETHER_ADDR_LEN);
 		type = eh->ether_type;
 		break;
 
@@ -252,9 +249,8 @@
 	if (m == 0)
 		senderr(ENOBUFS);
 	eh = mtod(m, struct ether_header *);
-	(void)memcpy(&eh->ether_type, &type,
-		sizeof(eh->ether_type));
- 	(void)memcpy(eh->ether_dhost, edst, sizeof (edst));
+	(void)memcpy(&eh->ether_type, &type, sizeof(eh->ether_type));
+ 	(void)memcpy(eh->ether_dhost, edst, ETHER_ADDR_LEN);
 	if (hdrcmplt)
 		(void)memcpy(eh->ether_shost, esrc,
 			sizeof(eh->ether_shost));
@@ -282,7 +278,7 @@
 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
 			struct mbuf *n;
 
-			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
+			if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
 				n->m_pkthdr.csum_flags |= csum_flags;
 				if (csum_flags & CSUM_DATA_VALID)
 					n->m_pkthdr.csum_data = 0xffff;
@@ -323,135 +319,27 @@
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
-	struct ip_fw *rule = NULL;
-
-	/* Extract info from dummynet tag, ignore others */
-	for (; m->m_type == MT_TAG; m = m->m_next)
-		if (m->m_flags == PACKET_TAG_DUMMYNET)
-			rule = ((struct dn_pkt *)m)->rule;
-
-	if (rule == NULL && BDG_ACTIVE(ifp)) {
-		/*
-		 * Beware, the bridge code notices the null rcvif and
-		 * uses that identify that it's being called from
-		 * ether_output as opposd to ether_input.  Yech.
-		 */
-		m->m_pkthdr.rcvif = NULL;
-		m = bdg_forward_ptr(m, ifp);
-		if (m != NULL)
-			m_freem(m);
-		return (0);
-	}
-	if (IPFW_LOADED && ether_ipfw != 0) {
-		if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) {
-			if (m) {
-				m_freem(m);
-				return ENOBUFS;	/* pkt dropped */
-			} else
-				return 0;	/* consumed e.g. in a pipe */
-		}
-	}
+	int error;
 
 	/*
-	 * Queue message on interface, update output statistics if
-	 * successful, and start output if interface not yet active.
+	 * Pass to bridge if active.
 	 */
-	return (IF_HANDOFF(&ifp->if_snd, m, ifp) ? 0 : ENOBUFS);
-}
-
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame. This section of code could be used from
- * bridge.c as well as long as we use some extra info
- * to distinguish that case from ether_output_frame();
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
-	struct ip_fw **rule, int shared)
-{
-	struct ether_header *eh;
-	struct ether_header save_eh;
-	struct mbuf *m;
-	int i;
-	struct ip_fw_args args;
-
-	if (*rule != NULL && fw_one_pass)
-		return 1; /* dummynet packet, already partially processed */
-
+	if (bridge_out_ptr && (m = (*bridge_out_ptr)(ifp, m)) == NULL)
+		return 0;
 	/*
-	 * I need some amt of data to be contiguous, and in case others need
-	 * the packet (shared==1) also better be in the first mbuf.
+	 * Pass to ipfw if active.  Note that we explicitly return
+	 * an error code returned to us by ipfw when the packet is
+	 * discarded.  This is mostly a hack for when dummynet consumes
+	 * packets inside a pipe and we want the transmission to look
+	 * successful.  It's not clear if this is necessary.
 	 */
-	m = *m0;
-	i = min( m->m_pkthdr.len, max_protohdr);
-	if ( shared || m->m_len < i) {
-		m = m_pullup(m, i);
-		if (m == NULL) {
-			*m0 = m;
-			return 0;
-		}
-	}
-	eh = mtod(m, struct ether_header *);
-	save_eh = *eh;			/* save copy for restore below */
-	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
-
-	args.m = m;		/* the packet we are looking at		*/
-	args.oif = dst;		/* destination, if any			*/
-	args.divert_rule = 0;	/* we do not support divert yet		*/
-	args.rule = *rule;	/* matching rule to restart		*/
-	args.next_hop = NULL;	/* we do not support forward yet	*/
-	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
-	i = ip_fw_chk_ptr(&args);
-	m = args.m;

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200308302156.h7ULuCsN072305>