Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Mar 2008 08:52:13 GMT
From:      Robert Watson <rwatson@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 138503 for review
Message-ID:  <200803250852.m2P8qDha019772@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=138503

Change 138503 by rwatson@rwatson_cinnamon on 2008/03/25 08:52:05

	Maintain and observe a ZBUF_FLAG_IMMUTABLE flag on zero-copy BPF
	buffer kernel descriptors, which is used to allow the buffer
	currently in the BPF "store" position to be assigned to userspace
	when it fills, even if userspace hasn't acknowledged the buffer
	in the "hold" position yet.  To implement this, notify the buffer
	model when a buffer becomes full, and check that the store buffer
	is writable, not just for it being full, before trying to append
	new packet data.  Shared memory buffers will be assigned to
	userspace at most once per fill, be it in the store or in the
	hold position.
	
	This removes the restriction that at most one shared memory can
	by owned by userspace, reducing the chances that userspace will
	need to call select() after acknowledging one buffer in order to
	wait for the next buffer when under high load.  This more fully
	realizes the goal of zero system calls in order to process a
	high-speed packet stream from BPF.

Affected files ...

.. //depot/projects/zcopybpf/src/sys/net/bpf.c#52 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.c#37 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.h#14 edit

Differences ...

==== //depot/projects/zcopybpf/src/sys/net/bpf.c#52 (text+ko) ====

@@ -218,6 +218,45 @@
 	return (0);
 }
 
+/*
+ * Allow the buffer model to indicate that the current store buffer is
+ * immutable, regardless of the appearance of space.  Return (1) if the
+ * buffer is writable, and (0) if not.
+ */
+static int
+bpf_canwritebuf(struct bpf_d *d)
+{
+
+	BPFD_LOCK_ASSERT(d);
+
+	switch (d->bd_bufmode) {
+	case BPF_BUFMODE_ZBUF:
+		return (bpf_zerocopy_canwritebuf(d));
+	}
+	return (1);
+}
+
+/*
+ * Notify buffer model that an attempt to write to the store buffer has
+ * resulted in a dropped packet, in which case the buffer may be considered
+ * full.
+ */
+static void
+bpf_buffull(struct bpf_d *d)
+{
+
+	BPFD_LOCK_ASSERT(d);
+
+	switch (d->bd_bufmode) {
+	case BPF_BUFMODE_ZBUF:
+		bpf_zerocopy_buffull(d);
+		break;
+	}
+}
+
+/*
+ * Notify the buffer model that a buffer has moved into the hold position.
+ */
 void
 bpf_bufheld(struct bpf_d *d)
 {
@@ -1676,7 +1715,7 @@
 	 * run this check if we need the space), but for now it's a reliable
 	 * spot to do it.
 	 */
-	if (bpf_canfreebuf(d)) {
+	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
@@ -1694,27 +1733,28 @@
 
 	/*
 	 * Round up the end of the previous packet to the next longword.
+	 *
+	 * Drop the packet if there's no room and no hope of room
+	 * If the packet would overflow the storage buffer or the storage
+	 * buffer is considered immutable by the buffer model, try to rotate
+	 * the buffer and wakeup pending processes.
 	 */
 	curlen = BPF_WORDALIGN(d->bd_slen);
-	if (curlen + totlen > d->bd_bufsize) {
-		/*
-		 * This packet will overflow the storage buffer.
-		 * Rotate the buffers if we can, then wakeup any
-		 * pending reads.
-		 */
+	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
 		if (d->bd_fbuf == NULL) {
 			/*
-			 * We haven't completed the previous read yet,
-			 * so drop the packet.
+			 * There's no room in the store buffer, and no
+			 * prospect of room, so drop the packet.  Notify the
+			 * buffer model.
 			 */
+			bpf_buffull(d);
 			++d->bd_dcount;
 			return;
 		}
 		ROTATE_BUFFERS(d);
 		do_wakeup = 1;
 		curlen = 0;
-	}
-	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
+	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
 		/*
 		 * Immediate mode is set, or the read timeout has already
 		 * expired during a select call.  A packet arrived, so the

==== //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.c#37 (text+ko) ====

@@ -84,7 +84,7 @@
  * scatter-gather copying.  One significant mitigating factor is that on
  * systems with a direct memory map, we can avoid TLB misses.
  *
- * At the front of the shared memor region is a bpf_zbuf_header, which
+ * At the front of the shared memory region is a bpf_zbuf_header, which
  * contains shared control data to allow user space and the kernel to
  * synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF
  * knows that the space is not available.
@@ -93,11 +93,19 @@
 	vm_offset_t	 zb_uaddr;	/* User address, may be stale. */
 	size_t		 zb_size;	/* Size of buffer, incl. header. */
 	u_int		 zb_numpages;	/* Number of pages. */
+	int		 zb_flags;	/* Flags on zbuf. */
 	struct sf_buf	**zb_pages;	/* Pages themselves. */
 	struct bpf_zbuf_header	*zb_header;	/* Shared header. */
 };
 
 /*
+ * When a buffer has been assigned to userspace, flag it as such, as the
+ * buffer may remain in the store position as a result of the user process
+ * not yet having acknowledged the buffer in the hold position yet.
+ */
+#define	ZBUF_FLAG_IMMUTABLE	0x00000001	/* Set when owned by user. */
+
+/*
  * Release a page we've previously wired.
  */
 static void
@@ -253,6 +261,9 @@
 	src_bytes = (u_char *)src;
 	zb = (struct zbuf *)buf;
 
+	KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+	    ("bpf_zerocopy_append_bytes: ZBUF_FLAG_IMMUTABLE"));
+
 	/*
 	 * Scatter-gather copy to user pages mapped into kernel address space
 	 * using sf_bufs: copy up to a page at a time.
@@ -302,6 +313,9 @@
 	m = (struct mbuf *)src;
 	zb = (struct zbuf *)buf;
 
+	KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+	    ("bpf_zerocopy_append_mbuf: ZBUF_FLAG_IMMUTABLE"));
+
 	/*
 	 * Scatter gather both from an mbuf chain and to a user page set
 	 * mapped into kernel address space using sf_bufs.  If we're lucky,
@@ -343,9 +357,38 @@
 }
 
 /*
+ * Notification from the BPF framework that a buffer in the store position is
+ * rejecting packets and may be considered full.  We mark the buffer as
+ * immutable and assign to userspace so that it is immediately available for
+ * the user process to access.
+ */
+void
+bpf_zerocopy_buffull(struct bpf_d *d)
+{
+	struct zbuf *zb;
+
+	KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+	    ("bpf_zerocopy_buffull: not in zbuf mode"));
+
+	zb = (struct zbuf *)d->bd_sbuf;
+	KASSERT(zb != NULL, ("bpf_zerocopy_buffull: zb == NULL"));
+
+	if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+		zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+		zb->zb_header->bzh_kernel_len = d->bd_slen;
+		atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+	}
+}
+
+/*
  * Notification from the BPF framework that a buffer has moved into the held
  * slot on a descriptor.  Zero-copy BPF will update the shared page to let
- * the user process know.
+ * the user process know and flag the buffer as immutable if it hasn't
+ * already been marked immutable due to filling while it was in the store
+ * position.
+ *
+ * Note: identical logic as in bpf_zerocopy_buffull(), except that we operate
+ * on bd_hbuf and bd_hlen.
  */
 void
 bpf_zerocopy_bufheld(struct bpf_d *d)
@@ -357,8 +400,12 @@
 
 	zb = (struct zbuf *)d->bd_hbuf;
 	KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL"));
-	zb->zb_header->bzh_kernel_len = d->bd_hlen;
-	atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+
+	if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+		zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+		zb->zb_header->bzh_kernel_len = d->bd_hlen;
+		atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+	}
 }
 
 /*
@@ -385,6 +432,28 @@
 }
 
 /*
+ * Query from the BPF framework as to whether or not the buffer current in
+ * the store position can actually be written to.  This may return false if
+ * the store buffer is assigned to userspace before the hold buffer is
+ * acknowledged.
+ */
+int
+bpf_zerocopy_canwritebuf(struct bpf_d *d)
+{
+	struct zbuf *zb;
+
+	KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+	    ("bpf_zerocopy_canwritebuf: not in zbuf mode"));
+
+	zb = (struct zbuf *)d->bd_sbuf;
+	KASSERT(zb != NULL, ("bpf_zerocopy_canwritebuf: bd_sbuf NULL"));
+
+	if (zb->zb_flags & ZBUF_FLAG_IMMUTABLE)
+		return (0);
+	return (1);
+}
+
+/*
  * Free zero copy buffers at request of descriptor.
  */
 void

==== //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.h#14 (text+ko) ====

@@ -40,8 +40,10 @@
 	    void *src, u_int len);
 void	bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
 	    void *src, u_int len);
+void	bpf_zerocopy_buffull(struct bpf_d *);
 void	bpf_zerocopy_bufheld(struct bpf_d *);
 int	bpf_zerocopy_canfreebuf(struct bpf_d *);
+int	bpf_zerocopy_canwritebuf(struct bpf_d *);
 void	bpf_zerocopy_free(struct bpf_d *d);
 int	bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d,
 	    size_t *i);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200803250852.m2P8qDha019772>