Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Dec 2007 23:52:03 GMT
From:      Robert Watson <rwatson@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 131498 for review
Message-ID:  <200712232352.lBNNq3LE040945@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=131498

Change 131498 by rwatson@rwatson_cinnamon on 2007/12/23 23:51:06

	Rework libpcap interactions with zero-copy BPF:
	
	- Move to simply assigning generation numbers and comparing for
	  equality rather than incrementing and using >.  As a result, we
	  can remove the XXX (and concern) about serial number
	  arithmetic.  The invariant is that if the two numbers are
	  identical, the kernel owns the buffer, and if they differ,
	  user space owns the buffer.
	
	- Introduce a new pcap_t field, zbuffer, which tracks the last
	  buffer to be "used", if any.  That way we always know what the
	  next buffer should be.
	
	- Check shared memory buffers for data *before* calling select(),
	  in order to avoid the select() call if we already have data
	  waiting.
	
	- Pull out shared memory checks for data in pcap_next_zbuf() and
	  perform them more regularly so as to avoid more system calls.
	  For example, don't rotate the buffers unless there's no pending
	  data we own after select() returns.
	
	This requires review and more testing, but should avoid one
	system call per buffer when BPF is able to keep the pipeline
	full.

Affected files ...

.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#13 edit
.. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#7 edit

Differences ...

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#13 (text+ko) ====

@@ -143,29 +143,70 @@
 
 #ifdef BIOCGETBUFMODE
 /*
+ * Simple version of pcap_next_zbuf() -- do the shared memory part, but no
+ * blocking.
+ */
+static int
+pcap_next_zbuf_shm(pcap_t *p, u_int *cc)
+{
+	struct bpf_zbuf_header *bzh;
+
+	/*
+	 * If we've never used a buffer before, or if the last buffer was
+	 * zbuf2, try zbuf1.
+	 */
+	if (p->zbuffer == p->zbuf2 || p->zbuffer == NULL) {
+		bzh = (struct bpf_zbuf_header *)p->zbuf1;
+		if (bzh->bzh_kernel_gen != bzh->bzh_user_gen) {
+			p->bzh = bzh;
+			p->zbuffer = (u_char *)p->zbuf1;
+			p->buffer = p->zbuffer + sizeof(*bzh);
+			*cc = bzh->bzh_kernel_len;
+			return (1);
+		}
+	} else if (p->zbuffer == p->zbuf1) {
+		bzh = (struct bpf_zbuf_header *)p->zbuf2;
+		if (bzh->bzh_kernel_gen != bzh->bzh_user_gen) {
+			p->bzh = bzh;
+			p->zbuffer = (u_char *)p->zbuf2;
+			p->buffer = p->zbuffer + sizeof(*bzh);
+			*cc = bzh->bzh_kernel_len;
+			return (1);
+		}
+	}
+	return (0);
+}
+
+/*
  * Selection routine for zero-copy BPF: identify the next completed buffer,
  * if any.  Try shared memory first, and if that doesn't work, make a system
  * call, which may dislodge a buffer.
  *
  * Return (1) if the buffer is found, (0) if a retry is required, and (-1) if
  * there is an unrecoverable error.
- *
- * XXXRW: Check to make sure the version comparison we're doing here is
- * really the right thing -- maybe use serial number arithmetic?
  */
 static int
 pcap_next_zbuf(pcap_t *p, u_int *cc)
 {
-	struct bpf_zbuf_header *bzh;
 	struct bpf_zbuf bz;
 	struct timeval tv;
 	fd_set r_set;
-	int r;
+	int data, r;
+
+	/*
+	 * Start out by seeing whether anything is waiting by checking the
+	 * next shared memory buffer for data.
+	 */
+	data = pcap_next_zbuf_shm(p, cc);
+	if (data)
+		return (data);
 
+	/*
+	 * No data in the buffer, so must use select() to wait for data or
+	 * the next timeout.
+	 */
 	FD_ZERO(&r_set);
 	FD_SET(p->fd, &r_set);
-	p->bzh = NULL;
-	p->buffer = NULL;
 	if (p->to_ms != 0) {
 		tv.tv_sec = p->to_ms / 1000;
 		tv.tv_usec = (p->to_ms * 1000) % 1000000;
@@ -178,67 +219,29 @@
 		    "select: %s", strerror(errno));
 		return (-1);
 	}
+
 	/*
-	 * Handle timeouts here
+	 * Check again for data, which may exist now that we've either been
+	 * woken up as a result of data or timed out.  Try the "there's data"		 * case first since it doesn't require a system call.
 	 */
-	if (r == 0) {
-		if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
-			(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
-			    "BIOCROTZBUF: %s", strerror(errno));
-			return (-1);
-		}
-		/*
-		 * select(2) woke us up due to a timeout, and there was no
-		 * data to be processed in the store buffer.  Tell pcap to
-		 * to wait again.
-		 */
-		if (bz.bz_bufa == NULL)
-			return (0);
-	}
-	/* XXXCSJP should we check FD_ISSET()? */
+	data = pcap_next_zbuf_shm(p, cc);
+	if (data)
+		return (data);
+
 	/*
-	 * If we have made it this far, chances are select(2) returned because
-	 * there is data ready to be processed in the hold buffer.  Compare the
-	 * user generation numbers against the kernels.  If there are any
-	 * differences, process the packet data.
+	 * Try forcing a buffer rotation to dislodge timed out or immediate
+	 * data.
 	 */
-	bzh = (struct bpf_zbuf_header *)p->zbuf1;
-	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
-		p->bzh = bzh;
-		p->buffer = (u_char *)p->zbuf1;
-		p->buffer += sizeof(*bzh);
-		*cc = bzh->bzh_kernel_len;
-		return (1);
-	}
-	bzh = (struct bpf_zbuf_header *)p->zbuf2;
-	if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) {
-		p->bzh = bzh;
-		p->buffer = (u_char *)p->zbuf2;
-		p->buffer += sizeof(*bzh);
-		*cc = bzh->bzh_kernel_len;
-		return (1);
-	}
-	/*
-	 * If the generation numbers were the same for both buffers, then it
-	 * is possible that we woke up because of BIOCIMMEDIATE.  In either
-	 * case, manually rotate the buffers.
-	 */
 	if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
 		(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
 		    "BIOCROTZBUF: %s", strerror(errno));
 		return (-1);
 	}
+
 	/*
-	 * It's possible that we were unable to rotate the buffer because the
-	 * user generation numbers have not been modified, in which case retry.
+	 * Last chance for data.
 	 */
-	if (bz.bz_bufa == NULL)
-		return (0);
-	p->bzh = (struct bpf_zbuf_header *)bz.bz_bufa;
-	p->buffer = (u_char *)bz.bz_bufa;
-	p->buffer += sizeof(*bzh);
-	*cc = bz.bz_buflen;
-	return (1);
+	return (pcap_next_zbuf_shm(p, cc));
 }
 
 static int
@@ -246,7 +249,7 @@
 {
 	struct bpf_zbuf bz;
 
-	p->bzh->bzh_user_gen++;
+	p->bzh->bzh_user_gen = p->bzh->bzh_kernel_gen;
 	p->bzh = NULL;
 	p->buffer = NULL;
 	return (0);

==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#7 (text+ko) ====

@@ -184,9 +184,11 @@
 	 * alternative between these two actual mmap'd buffers as required.
 	 * As there is a header on the front size of the mmap'd buffer, only
 	 * some of the buffer is exposed to libpcap as a whole via bufsize;
-	 * zbufsize is the true size.
+	 * zbufsize is the true size.  zbuffer tracks the current zbuf
+	 * assocated with buffer so that it can be used to decide which the
+	 * next buffer to read will be.
 	 */
-	u_char *zbuf1, *zbuf2;
+	u_char *zbuf1, *zbuf2, *zbuffer;
 	u_int zbufsize;
 	u_int timeout;
 	u_int zerocopy;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200712232352.lBNNq3LE040945>