Date: Thu, 1 Feb 2007 15:25:37 GMT From: Robert Watson <rwatson@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 113839 for review Message-ID: <200702011525.l11FPbRU040778@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=113839 Change 113839 by rwatson@rwatson_cinnamon on 2007/02/01 15:25:28 Further chicken scratchings at teaching libpcap about shared memory headers for zero-copy BPF. This sort of works; the lack of timeouts is still an issue, and there appears to be a problem under high use leading to a crash (likely a pointer/buffer bug somewhere in the code I've added). Affected files ... .. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 edit .. //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 edit Differences ... ==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-bpf.c#3 (text+ko) ==== @@ -141,7 +141,98 @@ return (0); } +#ifdef BIOCGETBUFMODE +/* + * Selection routine for zero-copy BPF: identify the next completed buffer, + * if any. Try shared memory first, and if that doesn't work, make a system + * call, which may dislodge a buffer. + * + * Return (1) if the buffer is found, (0) if a retry is required, and (-1) if + * there is an unrecoverable error. + * + * XXXRW: Check to make sure the version comparison we're doing here is + * really the right thing -- maybe use serial number arithmetic? + */ static int +pcap_next_zbuf(pcap_t *p, u_int *cc) +{ + struct bpf_zbuf_header *bzh; + struct pollfd pollfd; + struct bpf_zbuf bz; + int i; + + p->bzh = NULL; + + /* + * First try directly accessing the zero-copy buffer headers. + */ + bzh = (struct bpf_zbuf_header *)p->zbuf1; + if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) { + printf("pcap_next_zbuf: zbuf1 gen\n"); + goto found; + } + bzh = (struct bpf_zbuf_header *)p->zbuf2; + if (bzh->bzh_kernel_gen > bzh->bzh_user_gen) { + printf("pcap_next_zbuf: zbuf2 gen\n"); + goto found; + } + + /* + * Next, try asking the kernel, which may dislodge a buffer in + * immediate mode. + */ + bzero(&bz, sizeof(bz)); + if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) { + snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCGETZNEXT: %s", + pcap_strerror(errno)); + return (-1); + } + bzh = bz.bz_bufa; + if (bzh != NULL) { + printf("pcap_next_zbuf getznext\n"); + goto found; + } + + printf("poll timeout %d\n", p->timeout); + bzero(&pollfd, sizeof(pollfd)); + pollfd.fd = p->fd; + pollfd.events = POLLIN; + i = poll(&pollfd, 1, p->timeout == 0 ? INFTIM : p->timeout); + if (i < 0 && errno != EINTR) { + snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "poll: %s", + pcap_strerror(errno)); + return (-1); + } + return (0); +found: + p->bzh = bzh; + *cc = bzh->bzh_kernel_len; + p->buffer = (u_char *)(bzh + 1); + return (1); +} + +static int +pcap_ack_zbuf(pcap_t *p) +{ + struct bpf_zbuf bz; + + p->bzh->bzh_user_gen++; +#if 0 + bzero(&bz, sizeof(bz)); + bz.bz_bufa = (u_char *)p->bzh; + if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) { + snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCACKZBUF: %s", + pcap_strerror(errno)); + return (-1); + } +#endif + p->bzh = NULL; + p->buffer = NULL; + return (0); +} +#endif + +static int pcap_read_bpf(pcap_t *p, int cnt, pcap_handler callback, u_char *user) { int cc; @@ -149,13 +240,12 @@ register u_char *bp, *ep; u_char *datap; struct bpf_insn *fcode; +#ifdef BIOCSETBUFMODE + int i; +#endif #ifdef PCAP_FDDIPAD register int pad; #endif -#ifdef BIOCSETBUFMODE - struct pollfd pollfd; - struct bpf_zbuf bz; -#endif fcode = p->md.use_bpf ? NULL : p->fcode.bf_insns; again: @@ -174,51 +264,14 @@ cc = p->cc; if (p->cc == 0) { #ifdef BIOCSETBUFMODE - /* - * XXXRW: All of this could use serious revision. - */ if (p->zbuf1 != NULL) { - if (p->buffer != NULL) { - bzero(&bz, sizeof(bz)); - bz.bz_bufa = p->buffer; - bz.bz_buflen = p->bufsize; - if (ioctl(p->fd, BIOCACKZBUF, &bz) < 0) { - snprintf(p->errbuf, PCAP_ERRBUF_SIZE, - "BIOCGETZNEXT: %s", - pcap_strerror(errno)); - return (-1); - } - p->buffer = NULL; - } - bzero(&bz, sizeof(bz)); - if (ioctl(p->fd, BIOCGETZNEXT, &bz) < 0) { - snprintf(p->errbuf, PCAP_ERRBUF_SIZE, - "BIOCGETZNEXT: %s", - pcap_strerror(errno)); + if (p->buffer != NULL) + pcap_ack_zbuf(p); + i = pcap_next_zbuf(p, &cc); + if (i == 0) + goto again; + if (i < 0) return (-1); - } - printf("getznext returned %p\n", bz.bz_bufa); - if (bz.bz_bufa != NULL) { - p->buffer = bz.bz_bufa; - cc = bz.bz_buflen; - } else { - /* - * XXXRW: Need to implement non-blocking - * operation -- query fd with fcntl? - */ - bzero(&pollfd, sizeof(pollfd)); - pollfd.fd = p->fd; - pollfd.events = POLLIN; - printf("poll returned %d\n", - poll(&pollfd, 1, p->timeout == 0 ? INFTIM - : p->timeout)); - printf("pollfd.revents = 0x%x\n", - pollfd.revents); - - /* XXXRW: Should force buffer rotation here. */ - - goto again; - } } else #endif cc = read(p->fd, (char *)p->buffer, p->bufsize); @@ -727,21 +780,21 @@ /* * XXXRW: This logic should be revisited. */ - v = 32768; - if (v % getpagesize() != 0) - v = getpagesize(); - if (v > zbufmax) - v = zbufmax; + p->zbufsize = 32768; + if (p->zbufsize % getpagesize() != 0) + p->zbufsize = getpagesize(); + if (p->zbufsize > zbufmax) + p->zbufsize = zbufmax; - p->zbuf1 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON, - -1, 0); - p->zbuf2 = mmap(NULL, v, PROT_READ | PROT_WRITE, MAP_ANON, - -1, 0); + p->zbuf1 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE, + MAP_ANON, -1, 0); + p->zbuf2 = mmap(NULL, p->zbufsize, PROT_READ | PROT_WRITE, + MAP_ANON, -1, 0); if (p->zbuf1 == MAP_FAILED || p->zbuf2 == MAP_FAILED) { if (p->zbuf1 != MAP_FAILED) - munmap(p->zbuf1, v); + munmap(p->zbuf1, p->zbufsize); if (p->zbuf2 != MAP_FAILED) - munmap(p->zbuf1, v); + munmap(p->zbuf1, p->zbufsize); snprintf(ebuf, PCAP_ERRBUF_SIZE, "mmap: %s", pcap_strerror(errno)); } @@ -749,7 +802,7 @@ bzero(&bz, sizeof(bz)); bz.bz_bufa = p->zbuf1; bz.bz_bufb = p->zbuf2; - bz.bz_buflen = v; + bz.bz_buflen = p->zbufsize; if (ioctl(fd, BIOCSETZBUF, (caddr_t)&bz) < 0) { snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETZBUF: %s", @@ -763,6 +816,8 @@ device, pcap_strerror(errno)); goto bad; } + + v = p->zbufsize - sizeof(struct bpf_zbuf_header); } else { #endif ==== //depot/projects/zcopybpf/src/contrib/libpcap/pcap-int.h#3 (text+ko) ==== @@ -162,11 +162,22 @@ * * Zero-copy read buffer -- for zero-copy BPF. 'buffer' above will * alternative between these two actual mmap'd buffers as required. + * As there is a header on the front size of the mmap'd buffer, only + * some of the buffer is exposed to libpcap as a whole via bufsize; + * zbufsize is the true size. */ u_char *zbuf1, *zbuf2; + u_int zbufsize; u_int timeout; /* + * If there's currently a buffer being actively processed, then it is + * referenced here; 'buffer' is also pointed at it, but offset by the + * size of the header. + */ + struct bpf_zbuf_header *bzh; + + /* * Place holder for pcap_next(). */ u_char *pkt;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200702011525.l11FPbRU040778>