Date: Thu, 22 May 2008 12:02:33 GMT From: Oleg <Oleg.Dolgov@gmail.com> To: freebsd-gnats-submit@FreeBSD.org Subject: kern/123892: if_tap [bug] No buffer space available Message-ID: <200805221202.m4MC2Xcr031047@www.freebsd.org> Resent-Message-ID: <200805221210.m4MCA134082766@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 123892 >Category: kern >Synopsis: if_tap [bug] No buffer space available >Confidential: no >Severity: non-critical >Priority: medium >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Thu May 22 12:10:01 UTC 2008 >Closed-Date: >Last-Modified: >Originator: Oleg >Release: FreeBSD 7-RELEASE amd64 >Organization: Sunbay >Environment: >Description: Hello First, I meet this bug along time ago http://lists.freebsd.org/pipermail/freebsd-net/2007-July/014941.html and write small program, similar to http://www.freebsd.org/cgi/query-pr.cgi?pr=95665 but, that work with tap device, to reproduce bug. And no feedback come. Later, I make small research while debugging kernel http://lists.freebsd.org/pipermail/freebsd-net/2007-April/014064.html and again no help :( Today I got this bug agan but with another way: You need to have multicore machine, FreeBSD 7 kernel with options SCHED_ULE options PREEMPTION options SMP >From NOTES # PREEMPTION allows the threads that are in the kernel to be preempted # by higher priority threads. It helps with interactivity and # allows interrupt threads to run sooner rather than waiting. # WARNING! Only tested on amd64 and i386. When preempt is possible tap-device have 2 unsafe functions (if_tap.c): tapifstart and tapread. static int tapread(struct cdev *dev, struct uio *uio, int flag) { // // skipped // .. /* sleep until we get a packet */ do { s = splimp(); IF_DEQUEUE(&ifp->if_snd, m); splx(s); // (1) if (m == NULL) { if (flag & O_NONBLOCK) return (EWOULDBLOCK); mtx_lock(&tp->tap_mtx); tp->tap_flags |= TAP_RWAIT; mtx_unlock(&tp->tap_mtx); // (2) error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0); if (error) return (error); } } while (m == NULL); // // skipped // .. } static void tapifstart(struct ifnet *ifp) { // // skipped // .. s = splimp(); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if (ifp->if_snd.ifq_len != 0) { mtx_lock(&tp->tap_mtx); if (tp->tap_flags & TAP_RWAIT) { tp->tap_flags &= ~TAP_RWAIT; // (3) wakeup(tp); } if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { mtx_unlock(&tp->tap_mtx); pgsigio(&tp->tap_sigio, SIGIO, 0); } else mtx_unlock(&tp->tap_mtx); selwakeuppri(&tp->tap_rsel, PZERO+1); KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0); ifp->if_opackets ++; /* obytes are counted in ether_output */ } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; splx(s); } So, when some process make block read on tap-device, tapread called, it check if_snd, and if it empty make sleep until we get a packet for sending (tapifstart called). Now imagine, that tapread thread see that queue are empty and going to sleep, but between (1) and (2) interrupted. While that thread rest interrupted someplace between (1)-(2) network stack got or generate many packets for tap-device (50 e.g. >= if_snd.ifq_maxlen), tapifstart called for each packet, it make wakeup (3) for tapread thread, but they are lost, because no threads sleeping. Now really if_snd queue are full. Next, our tapread thread execution returned and go sleep - never wakeup, because if_snd queue are full, tcp/ip stack will drop new packets for tap-device and will not call tapifstart. p.s. tun device have same problem. Best regards, Oleg Dolgov. >How-To-Repeat: >Fix: Here are patch, condition variable with if_snd.ifq_mtx make necessary synchronization. Patch attached with submission follows: Patches for if_tap.c and if_tapvar.h ========================================================================= if_tap.c.orig: $FreeBSD: src/sys/net/if_tap.c,v 1.71 2007/03/19 18:17:31 bms Exp $ --- if_tap.c.orig 2008-05-22 13:13:47.000000000 +0300 +++ if_tap.c.patched 2008-05-22 13:13:47.000000000 +0300 @@ -58,6 +58,7 @@ #include <sys/ttycom.h> #include <sys/uio.h> #include <sys/queue.h> +#include <sys/condvar.h> #include <net/bpf.h> #include <net/ethernet.h> @@ -226,6 +227,7 @@ if_free_type(ifp, IFT_ETHER); splx(s); + cv_destroy(&tp->tap_start_cv); mtx_destroy(&tp->tap_mtx); free(tp, M_TAP); } @@ -413,6 +415,7 @@ /* allocate driver storage and create device */ MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); + cv_init(&tp->tap_start_cv, "tap_start_cv"); mtx_lock(&tapmtx); SLIST_INSERT_HEAD(&taphead, tp, tap_next); mtx_unlock(&tapmtx); @@ -674,29 +677,22 @@ } mtx_unlock(&tp->tap_mtx); - s = splimp(); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - if (ifp->if_snd.ifq_len != 0) { - mtx_lock(&tp->tap_mtx); - if (tp->tap_flags & TAP_RWAIT) { - tp->tap_flags &= ~TAP_RWAIT; - wakeup(tp); - } + mtx_lock(&tp->tap_mtx); + if (tp->tap_flags & TAP_RWAIT) { + tp->tap_flags &= ~TAP_RWAIT; + cv_broadcast(&tp->tap_start_cv); + } - if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { - mtx_unlock(&tp->tap_mtx); - pgsigio(&tp->tap_sigio, SIGIO, 0); - } else - mtx_unlock(&tp->tap_mtx); + if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { + mtx_unlock(&tp->tap_mtx); + pgsigio(&tp->tap_sigio, SIGIO, 0); + } else + mtx_unlock(&tp->tap_mtx); - selwakeuppri(&tp->tap_rsel, PZERO+1); - KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0); - ifp->if_opackets ++; /* obytes are counted in ether_output */ - } + selwakeuppri(&tp->tap_rsel, PZERO+1); + KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0); + ifp->if_opackets ++; /* obytes are counted in ether_output */ - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - splx(s); } /* tapifstart */ @@ -860,23 +856,31 @@ mtx_unlock(&tp->tap_mtx); /* sleep until we get a packet */ + s = splimp(); + IF_LOCK(&ifp->if_snd); do { - s = splimp(); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); + _IF_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { - if (flag & O_NONBLOCK) + if (flag & O_NONBLOCK) { + IF_UNLOCK(&ifp->if_snd); + splx(s); return (EWOULDBLOCK); + } mtx_lock(&tp->tap_mtx); tp->tap_flags |= TAP_RWAIT; mtx_unlock(&tp->tap_mtx); - error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0); - if (error) + error = cv_wait_sig(&tp->tap_start_cv, &ifp->if_snd.ifq_mtx); + if (error) { + IF_UNLOCK(&ifp->if_snd); + splx(s); return (error); + } } } while (m == NULL); + IF_UNLOCK(&ifp->if_snd); + splx(s); /* feed packet to bpf */ BPF_MTAP(ifp, m); ========================================================================= if_tapvar.h.orig: $FreeBSD: src/sys/net/if_tapvar.h,v 1.10 2005/06/10 16:49:18 brooks Exp $ --- if_tapvar.h.orig 2008-05-22 13:13:47.000000000 +0300 +++ if_tapvar.h.patched 2008-05-22 13:13:47.000000000 +0300 @@ -64,6 +64,7 @@ SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */ struct cdev *tap_dev; struct mtx tap_mtx; /* per-softc mutex */ + struct cv tap_start_cv; /* wake up readers */ }; #endif /* !_NET_IF_TAPVAR_H_ */ >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200805221202.m4MC2Xcr031047>