From owner-freebsd-net@FreeBSD.ORG Fri Mar 11 11:02:41 2005 Return-Path: Delivered-To: freebsd-net@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 4047B16A4CE; Fri, 11 Mar 2005 11:02:41 +0000 (GMT) Received: from relay.bestcom.ru (relay.bestcom.ru [217.72.144.5]) by mx1.FreeBSD.org (Postfix) with ESMTP id 28F9043D41; Fri, 11 Mar 2005 11:02:40 +0000 (GMT) (envelope-from glebius@FreeBSD.org) Received: from cell.sick.ru (root@cell.sick.ru [217.72.144.68]) by relay.bestcom.ru (8.13.1/8.12.9) with ESMTP id j2BB2bwd086145 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=FAIL); Fri, 11 Mar 2005 14:02:37 +0300 (MSK) (envelope-from glebius@FreeBSD.org) Received: from cell.sick.ru (glebius@localhost [127.0.0.1]) by cell.sick.ru (8.13.1/8.12.8) with ESMTP id j2BB2a8R087338 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Fri, 11 Mar 2005 14:02:36 +0300 (MSK) (envelope-from glebius@FreeBSD.org) Received: (from glebius@localhost) by cell.sick.ru (8.13.1/8.13.1/Submit) id j2BB2YQf087337; Fri, 11 Mar 2005 14:02:35 +0300 (MSK) (envelope-from glebius@FreeBSD.org) X-Authentication-Warning: cell.sick.ru: glebius set sender to glebius@FreeBSD.org using -f Date: Fri, 11 Mar 2005 14:02:34 +0300 From: Gleb Smirnoff To: Luigi Rizzo , John Baldwin , dima <_pppp@mail.ru> Message-ID: <20050311110234.GA87255@cell.sick.ru> References: <20050217161031.A46700@xorpc.icir.org> <200503011642.48813.jhb@FreeBSD.org> <20050301162949.A64187@xorpc.icir.org> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="X1bOJ3K7DJ5YkBrT" Content-Disposition: inline In-Reply-To: <20050301162949.A64187@xorpc.icir.org> User-Agent: Mutt/1.5.6i X-Virus-Scanned: ClamAV version devel-20050125, clamav-milter version 0.80ff on relay.bestcom.ru X-Virus-Status: Clean cc: pjd@FreeBSD.org cc: rwatson@FreeBSD.org cc: ru@FreeBSD.org cc: net@FreeBSD.org Subject: Re: Giant-free polling [PATCH] X-BeenThere: freebsd-net@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Networking and TCP/IP with FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 11 Mar 2005 11:02:41 -0000 --X1bOJ3K7DJ5YkBrT Content-Type: text/plain; charset=koi8-r Content-Disposition: inline Collegues, On Tue, Mar 01, 2005 at 04:29:49PM -0800, Luigi Rizzo wrote: L> [cc-ing net@freebsd.org to get more opinions] this good, that you have CC'ed net@, otherwise we would continue working in parallel without collaboration. Here is attached patch made in a collaboration by ru, pjd and me. We use separate mutex for polling, and we drop it before calling the poll handler to avoid LOR and contention on this mutex. We have definite evidence that now idle_poll and ISR poll can run in parallel: if we remove PRF_RUNNING flag check, we got panics because poll handlers of many drivers (e.g. if_em) are not reentrable. I think this patch is a step forward in direction of parallel polling on SMP, since we now have two polling instances (ISR + idle) working in parallel. -- Totus tuus, Glebius. GLEBIUS-RIPN GLEB-RIPE --X1bOJ3K7DJ5YkBrT Content-Type: text/plain; charset=koi8-r Content-Disposition: attachment; filename="kern_poll.LIST.diff" Index: kern_poll.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_poll.c,v retrieving revision 1.19 diff -u -r1.19 kern_poll.c --- kern_poll.c 25 Feb 2005 22:07:50 -0000 1.19 +++ kern_poll.c 11 Mar 2005 10:49:47 -0000 @@ -33,6 +33,7 @@ #include #include /* needed by net/if.h */ #include +#include #include /* for IFF_* flags */ #include /* for NETISR_POLL */ @@ -144,7 +145,7 @@ SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RW, &residual_burst, 0, "# of residual cycles in burst"); -static u_int32_t poll_handlers; /* next free entry in pr[]. */ +static u_int32_t poll_handlers; SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD, &poll_handlers, 0, "Number of registered poll handlers"); @@ -169,20 +170,33 @@ &idlepoll_sleeping, 0, "idlepoll is sleeping"); -#define POLL_LIST_LEN 128 struct pollrec { poll_handler_t *handler; struct ifnet *ifp; +#define PRF_RUNNING 0x1 +#define PRF_PINNED 0x2 + uint32_t flags; + LIST_ENTRY(pollrec) next; }; -static struct pollrec pr[POLL_LIST_LEN]; +#define PR_VALID(pr) ((pr)->handler != NULL && \ + !((pr)->flags & PRF_RUNNING) && \ + ((pr)->ifp->if_flags & (IFF_UP|IFF_RUNNING)) == \ + (IFF_UP|IFF_RUNNING)) + +static LIST_HEAD(, pollrec) poll_list = LIST_HEAD_INITIALIZER(&poll_list); + +static struct mtx poll_mtx; static void init_device_poll(void) { - netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, 0); - netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, 0); + mtx_init(&poll_mtx, "polling", NULL, MTX_DEF); + netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, + NETISR_MPSAFE); + netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, + NETISR_MPSAFE); } SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL) @@ -244,17 +258,26 @@ void ether_poll(int count) { - int i; - - mtx_lock(&Giant); + struct pollrec *pr, *pr1; if (count > poll_each_burst) count = poll_each_burst; - for (i = 0 ; i < poll_handlers ; i++) - if (pr[i].handler && (IFF_UP|IFF_RUNNING) == - (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) ) - pr[i].handler(pr[i].ifp, 0, count); /* quick check */ - mtx_unlock(&Giant); + + mtx_lock(&poll_mtx); + LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) { + if (PR_VALID(pr)) { + pr->flags |= PRF_RUNNING; + if (pr1 != NULL) + pr1->flags |= PRF_PINNED; + mtx_unlock(&poll_mtx); + pr->handler(pr->ifp, 0, count); + mtx_lock(&poll_mtx); + if (pr1 != NULL) + pr1->flags &= ~PRF_PINNED; + pr->flags &= ~PRF_RUNNING; + } + } + mtx_unlock(&poll_mtx); } /* @@ -320,17 +343,17 @@ /* * netisr_poll is scheduled by schednetisr when appropriate, typically once - * per tick. It is called at splnet() so first thing to do is to upgrade to - * splimp(), and call all registered handlers. + * per tick. */ static void netisr_poll(void) { + struct pollrec *pr, *pr1; static int reg_frac_count; - int i, cycles; + int cycles; enum poll_cmd arg = POLL_ONLY; - mtx_lock(&Giant); + mtx_lock(&poll_mtx); phase = 3; if (residual_burst == 0) { /* first call in this tick */ microuptime(&poll_start_t); @@ -372,25 +395,36 @@ residual_burst -= cycles; if (polling) { - for (i = 0 ; i < poll_handlers ; i++) - if (pr[i].handler && (IFF_UP|IFF_RUNNING) == - (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) ) - pr[i].handler(pr[i].ifp, arg, cycles); + LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) { + if (PR_VALID(pr)) { + pr->flags |= PRF_RUNNING; + mtx_unlock(&poll_mtx); + pr->handler(pr->ifp, arg, cycles); + mtx_lock(&poll_mtx); + pr->flags &= ~PRF_RUNNING; + } else if (pr->handler == NULL && + !(pr->flags & PRF_PINNED)) { + LIST_REMOVE(pr, next); + free(pr, M_TEMP); + } + } } else { /* unregister */ - for (i = 0 ; i < poll_handlers ; i++) { - if (pr[i].handler && - pr[i].ifp->if_flags & IFF_RUNNING) { - pr[i].ifp->if_flags &= ~IFF_POLLING; - pr[i].handler(pr[i].ifp, POLL_DEREGISTER, 1); + LIST_FOREACH_SAFE(pr, &poll_list, next, pr1) { + if (pr->handler != NULL && + pr->ifp->if_flags & IFF_RUNNING) { + pr->ifp->if_flags &= ~IFF_POLLING; + mtx_unlock(&poll_mtx); + pr->handler(pr->ifp, POLL_DEREGISTER, 1); + mtx_lock(&poll_mtx); } - pr[i].handler=NULL; + pr->handler = NULL; } residual_burst = 0; poll_handlers = 0; } /* on -stable, schednetisr(NETISR_POLLMORE); */ phase = 4; - mtx_unlock(&Giant); + mtx_unlock(&poll_mtx); } /* @@ -399,12 +433,12 @@ * A device is not supposed to register itself multiple times. * * This is called from within the *_intr() functions, so we do not need - * further locking. + * further ifnet locking. */ int ether_poll_register(poll_handler_t *h, struct ifnet *ifp) { - int s; + struct pollrec *pr; if (polling == 0) /* polling disabled, cannot register */ return 0; @@ -415,30 +449,27 @@ if (ifp->if_flags & IFF_POLLING) /* already polling */ return 0; - s = splhigh(); - if (poll_handlers >= POLL_LIST_LEN) { - /* - * List full, cannot register more entries. - * This should never happen; if it does, it is probably a - * broken driver trying to register multiple times. Checking - * this at runtime is expensive, and won't solve the problem - * anyways, so just report a few times and then give up. - */ - static int verbose = 10 ; - splx(s); - if (verbose >0) { - printf("poll handlers list full, " - "maybe a broken driver ?\n"); - verbose--; + mtx_lock(&poll_mtx); + LIST_FOREACH(pr, &poll_list, next) + if (pr->ifp == ifp && pr->handler != NULL) { + mtx_unlock(&poll_mtx); + log(LOG_DEBUG, "ether_poll_register: %s: handler" + " already registered\n", ifp->if_xname); + return (0); } - return 0; /* no polling for you */ + pr = malloc(sizeof(*pr), M_TEMP, M_NOWAIT); + if (pr == NULL) { + mtx_unlock(&poll_mtx); + log(LOG_DEBUG, "ether_poll_register: malloc() failed\n"); + return (0); } - pr[poll_handlers].handler = h; - pr[poll_handlers].ifp = ifp; + pr->handler = h; + pr->ifp = ifp; + LIST_INSERT_HEAD(&poll_list, pr, next); poll_handlers++; ifp->if_flags |= IFF_POLLING; - splx(s); + mtx_unlock(&poll_mtx); if (idlepoll_sleeping) wakeup(&idlepoll_sleeping); return 1; /* polling enabled in next call */ @@ -453,29 +484,24 @@ int ether_poll_deregister(struct ifnet *ifp) { - int i; + struct pollrec *pr; - mtx_lock(&Giant); if ( !ifp || !(ifp->if_flags & IFF_POLLING) ) { - mtx_unlock(&Giant); return 0; } - for (i = 0 ; i < poll_handlers ; i++) - if (pr[i].ifp == ifp) /* found it */ - break; - ifp->if_flags &= ~IFF_POLLING; /* found or not... */ - if (i == poll_handlers) { - mtx_unlock(&Giant); - printf("ether_poll_deregister: ifp not found!!!\n"); - return 0; - } - poll_handlers--; - if (i < poll_handlers) { /* Last entry replaces this one. */ - pr[i].handler = pr[poll_handlers].handler; - pr[i].ifp = pr[poll_handlers].ifp; - } - mtx_unlock(&Giant); - return 1; + mtx_lock(&poll_mtx); + LIST_FOREACH(pr, &poll_list, next) + if (pr->ifp == ifp && pr->handler != NULL) { + pr->handler = NULL; + poll_handlers--; + mtx_unlock(&poll_mtx); + ifp->if_flags &= ~IFF_POLLING; + return (1); + } + mtx_unlock(&poll_mtx); + log(LOG_DEBUG, "ether_poll_deregister: %s: not found!!!\n", + ifp->if_xname); + return (0); } static void @@ -495,10 +521,7 @@ for (;;) { if (poll_in_idle_loop && poll_handlers > 0) { idlepoll_sleeping = 0; - mtx_lock(&Giant); ether_poll(poll_each_burst); - mtx_unlock(&Giant); - mtx_assert(&Giant, MA_NOTOWNED); mtx_lock_spin(&sched_lock); mi_switch(SW_VOL, NULL); mtx_unlock_spin(&sched_lock); --X1bOJ3K7DJ5YkBrT--