From owner-p4-projects@FreeBSD.ORG Sun Dec 23 23:52:03 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id B0F3B16A479; Sun, 23 Dec 2007 23:52:03 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 5207716A476 for ; Sun, 23 Dec 2007 23:52:03 +0000 (UTC) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 5AB0113C455 for ; Sun, 23 Dec 2007 23:52:03 +0000 (UTC) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id lBNNq3Se041001 for ; Sun, 23 Dec 2007 23:52:03 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id lBNNq3LE040945 for perforce@freebsd.org; Sun, 23 Dec 2007 23:52:03 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Date: Sun, 23 Dec 2007 23:52:03 GMT Message-Id: <200712232352.lBNNq3LE040945@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to bb+lists.freebsd.perforce@cyrus.watson.org using -f From: Robert Watson To: Perforce Change Reviews Cc: Subject: PERFORCE change 131498 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 23 Dec 2007 23:52:04 -0000 http://perforce.freebsd.org/chv.cgi?CH=131498 Change 131498 by rwatson@rwatson_cinnamon on 2007/12/23 23:51:06 Rework libpcap interactions with zero-copy BPF: - Move to simply assigning generation numbers and comparing for equality rather than incrementing and using >. As a result, we can remove the XXX (and concern) about serial number arithmetic. The invariant is that if the two numbers are identical, the kernel owns the buffer, and if they differ, user space owns the buffer. - Introduce a new pcap_t field, zbuffer, which tracks the last buffer to be "used", if any. That way we always know what the next buffer should be. - Check shared memory buffers for data *before* calling select(), in order to avoid the select() call if we already have data waiting. - Pull out shared memory checks for data in pcap_next_zbuf() and perform them more regularly so as to avoid more system calls. For example, don't rotate the buffers unless there's no pending data we own after select() returns. This requires review and more testing, but should avoid one system call per buffer when BPF is able to keep the pipeline full. Affected files ... .. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#13 edit .. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#7 edit Differences ... ==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#13 (text+ko) ==== @@ -143,29 +143,70 @@ #ifdef BIOCGETBUFMODE /* + * Simple version of pcap_next_zbuf() -- do the shared memory part, but no + * blocking. + */ +static int +pcap_next_zbuf_shm(pcap_t *p, u_int *cc) +{ + struct bpf_zbuf_header *bzh; + + /* + * If we've never used a buffer before, or if the last buffer was + * zbuf2, try zbuf1. + */ + if (p->zbuffer == p->zbuf2 || p->zbuffer == NULL) { + bzh = (struct bpf_zbuf_header *)p->zbuf1; + if (bzh->bzh_kernel_gen != bzh->bzh_user_gen) { + p->bzh = bzh; + p->zbuffer = (u_char *)p->zbuf1; + p->buffer = p->zbuffer + sizeof(*bzh); + *cc = bzh->bzh_kernel_len; + return (1); + } + } else if (p->zbuffer == p->zbuf1) { + bzh = (struct bpf_zbuf_header *)p->zbuf2; + if (bzh->bzh_kernel_gen != bzh->bzh_user_gen) { + p->bzh = bzh; + p->zbuffer = (u_char *)p->zbuf2; + p->buffer = p->zbuffer + sizeof(*bzh); + *cc = bzh->bzh_kernel_len; + return (1); + } + } + return (0); +} + +/* * Selection routine for zero-copy BPF: identify the next completed buffer, * if any. Try shared memory first, and if that doesn't work, make a system * call, which may dislodge a buffer. * * Return (1) if the buffer is found, (0) if a retry is required, and (-1) if * there is an unrecoverable error. - * - * XXXRW: Check to make sure the version comparison we're doing here is - * really the right thing -- maybe use serial number arithmetic? */ static int pcap_next_zbuf(pcap_t *p, u_int *cc) { - struct bpf_zbuf_header *bzh; struct bpf_zbuf bz; struct timeval tv; fd_set r_set; - int r; + int data, r; + + /* + * Start out by seeing whether anything is waiting by checking the + * next shared memory buffer for data. + */ + data = pcap_next_zbuf_shm(p, cc); + if (data) + return (data); + /* + * No data in the buffer, so must use select() to wait for data or + * the next timeout. + */ FD_ZERO(&r_set); FD_SET(p->fd, &r_set); - p->bzh = NULL; - p->buffer = NULL; if (p->to_ms != 0) { tv.tv_sec = p->to_ms / 1000; tv.tv_usec = (p->to_ms * 1000) % 1000000; @@ -178,67 +219,29 @@ "select: %s", strerror(errno)); return (-1); } + /* - * Handle timeouts here + * Check again for data, which may exist now that we've either been + * woken up as a result of data or timed out. Try the "there's data" * case first since it doesn't require a system call. */ - if (r == 0) { - if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) { - (void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE, - "BIOCROTZBUF: %s", strerror(errno)); - return (-1); - } - /* - * select(2) woke us up due to a timeout, and there was no - * data to be processed in the store buffer. Tell pcap to - * to wait again. - */ - if (bz.bz_bufa == NULL) - return (0); - } - /* XXXCSJP should we check FD_ISSET()? */ + data = pcap_next_zbuf_shm(p, cc); + if (data) + return (data); + /* - * If we have made it this far, chances are select(2) returned because - * there is data ready to be processed in the hold buffer. Compare the - * user generation numbers against the kernels. If there are any - * differences, process the packet data. + * Try forcing a buffer rotation to dislodge timed out or immediate + * data. */ - bzh = (struct bpf_zbuf_header *)p->zbuf1; - if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) { - p->bzh = bzh; - p->buffer = (u_char *)p->zbuf1; - p->buffer += sizeof(*bzh); - *cc = bzh->bzh_kernel_len; - return (1); - } - bzh = (struct bpf_zbuf_header *)p->zbuf2; - if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) { - p->bzh = bzh; - p->buffer = (u_char *)p->zbuf2; - p->buffer += sizeof(*bzh); - *cc = bzh->bzh_kernel_len; - return (1); - } - /* - * If the generation numbers were the same for both buffers, then it - * is possible that we woke up because of BIOCIMMEDIATE. In either - * case, manually rotate the buffers. - */ if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) { (void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCROTZBUF: %s", strerror(errno)); return (-1); } + /* - * It's possible that we were unable to rotate the buffer because the - * user generation numbers have not been modified, in which case retry. + * Last chance for data. */ - if (bz.bz_bufa == NULL) - return (0); - p->bzh = (struct bpf_zbuf_header *)bz.bz_bufa; - p->buffer = (u_char *)bz.bz_bufa; - p->buffer += sizeof(*bzh); - *cc = bz.bz_buflen; - return (1); + return (pcap_next_zbuf_shm(p, cc)); } static int @@ -246,7 +249,7 @@ { struct bpf_zbuf bz; - p->bzh->bzh_user_gen++; + p->bzh->bzh_user_gen = p->bzh->bzh_kernel_gen; p->bzh = NULL; p->buffer = NULL; return (0); ==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#7 (text+ko) ==== @@ -184,9 +184,11 @@ * alternative between these two actual mmap'd buffers as required. * As there is a header on the front size of the mmap'd buffer, only * some of the buffer is exposed to libpcap as a whole via bufsize; - * zbufsize is the true size. + * zbufsize is the true size. zbuffer tracks the current zbuf + * assocated with buffer so that it can be used to decide which the + * next buffer to read will be. */ - u_char *zbuf1, *zbuf2; + u_char *zbuf1, *zbuf2, *zbuffer; u_int zbufsize; u_int timeout; u_int zerocopy;