Date: Fri, 11 Mar 2005 16:55:25 +0300 From: dima <_pppp@mail.ru> To: Gleb Smirnoff <glebius@FreeBSD.org> Cc: ru@FreeBSD.org Subject: Re[2]: Giant-free polling [PATCH] Message-ID: <E1D9kbt-000FAj-00._pppp-mail-ru@f22.mail.ru> In-Reply-To: <20050311110234.GA87255@cell.sick.ru>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --]
> Collegues,
>
> On Tue, Mar 01, 2005 at 04:29:49PM -0800, Luigi Rizzo wrote:
> L> [cc-ing net@freebsd.org to get more opinions]
>
> this good, that you have CC'ed net@, otherwise we would continue
> working in parallel without collaboration.
It's a pity you didn't read the complete thread,
thus the original version of my patch isn't valid anymore.
>
> Here is attached patch made in a collaboration by ru, pjd and me.
I thought about using list also, but considered it to bring
too much overhead to the code. The original idea of handling arrays
seems to be very elegant.
>
> We use separate mutex for polling, and we drop it before calling the
> poll handler to avoid LOR and contention on this mutex.
The recent version of the patch uses sx lock to protect pr[] and
the array of per-interface mutexes iface_locks[].
So, all CPUs can poll different interfaces at the same time now.
See the complete thread for details.
>
> We have definite evidence that now idle_poll and ISR poll can run
> in parallel: if we remove PRF_RUNNING flag check, we got panics
> because poll handlers of many drivers (e.g. if_em) are not reentrable.
>
> I think this patch is a step forward in direction of parallel polling
> on SMP, since we now have two polling instances (ISR + idle) working
> in parallel.
I also mentioned that poll_idle() isn't called from anywhere in the kernel.
Thus we have only 2 possible sources for polling: hardclock (per-CPU) and traps.
The polling code in trap handler should also be changed a bit to check for
the trap's source.
I attach the most recent version of the patch.
The only difference is locking in ether_poll_register() suggested by Luigi.
>
> --
> Totus tuus, Glebius.
> GLEBIUS-RIPN GLEB-RIPE
[-- Attachment #2 --]
--- orig/kern_poll.c Thu Feb 17 17:11:05 2005
+++ kern_poll.c Fri Mar 4 17:03:33 2005
@@ -41,12 +41,17 @@
#include <sys/resourcevar.h>
#include <sys/kthread.h>
+#include <sys/lock.h> /* for sx(9) */
+#include <sys/sx.h>
+
#ifdef SMP
#ifndef COMPILING_LINT
#error DEVICE_POLLING is not compatible with SMP
#endif
#endif
+struct sx polling_lock;
+
static void netisr_poll(void); /* the two netisr handlers */
static void netisr_pollmore(void);
@@ -182,6 +187,7 @@
};
static struct pollrec pr[POLL_LIST_LEN];
+struct mtx iface_locks[POLL_LIST_LEN];
static void
init_device_poll(void)
@@ -189,6 +195,7 @@
netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, 0);
netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, 0);
+ sx_init( &polling_lock, "polling" );
}
SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL)
@@ -252,15 +259,19 @@
{
int i;
- mtx_lock(&Giant);
+ sx_slock( &polling_lock );
if (count > poll_each_burst)
count = poll_each_burst;
for (i = 0 ; i < poll_handlers ; i++)
- if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
- (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
+ if( pr[i].handler && ( IFF_UP | IFF_RUNNING ) ==
+ ( pr[i].ifp->if_flags & ( IFF_UP | IFF_RUNNING ) ) &&
+ mtx_trylock( &iface_locks[i] ) ) {
pr[i].handler(pr[i].ifp, 0, count); /* quick check */
- mtx_unlock(&Giant);
+ mtx_unlock( &iface_locks[i] );
+ }
+
+ sx_sunlock( &polling_lock );
}
/*
@@ -335,7 +346,6 @@
static int reg_frac_count;
int i, cycles;
enum poll_cmd arg = POLL_ONLY;
- mtx_lock(&Giant);
phase = 3;
if (residual_burst == 0) { /* first call in this tick */
@@ -377,26 +387,37 @@
residual_burst : poll_each_burst;
residual_burst -= cycles;
+ sx_slock( &polling_lock );
+
if (polling) {
for (i = 0 ; i < poll_handlers ; i++)
- if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
- (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
+ if( pr[i].handler &&
+ ( IFF_UP | IFF_RUNNING ) ==
+ ( pr[i].ifp->if_flags & (IFF_UP | IFF_RUNNING) ) &&
+ mtx_trylock( &iface_locks[i] ) ) {
pr[i].handler(pr[i].ifp, arg, cycles);
+ mtx_unlock( &iface_locks[i] );
+ }
} else { /* unregister */
for (i = 0 ; i < poll_handlers ; i++) {
- if (pr[i].handler &&
- pr[i].ifp->if_flags & IFF_RUNNING) {
+ if( pr[i].handler &&
+ pr[i].ifp->if_flags & IFF_RUNNING &&
+ mtx_trylock( &iface_locks[i] ) ) {
pr[i].ifp->if_flags &= ~IFF_POLLING;
pr[i].handler(pr[i].ifp, POLL_DEREGISTER, 1);
+ mtx_unlock( &iface_locks[i] );
+ mtx_destroy( &iface_locks[i] );
}
pr[i].handler=NULL;
}
residual_burst = 0;
poll_handlers = 0;
}
+
+ sx_sunlock( &polling_lock );
+
/* on -stable, schednetisr(NETISR_POLLMORE); */
phase = 4;
- mtx_unlock(&Giant);
}
/*
@@ -440,9 +461,15 @@
return 0; /* no polling for you */
}
+ sx_xlock( &polling_lock );
+
pr[poll_handlers].handler = h;
pr[poll_handlers].ifp = ifp;
+ mtx_init( &iface_locks[poll_handlers], ifp->if_xname, NULL, MTX_DEF );
poll_handlers++;
+
+ sx_xunlock( &polling_lock );
+
ifp->if_flags |= IFF_POLLING;
splx(s);
if (idlepoll_sleeping)
@@ -461,17 +488,19 @@
{
int i;
- mtx_lock(&Giant);
+
if ( !ifp || !(ifp->if_flags & IFF_POLLING) ) {
- mtx_unlock(&Giant);
return 0;
}
+
+ sx_xlock( &polling_lock );
+
for (i = 0 ; i < poll_handlers ; i++)
if (pr[i].ifp == ifp) /* found it */
break;
ifp->if_flags &= ~IFF_POLLING; /* found or not... */
if (i == poll_handlers) {
- mtx_unlock(&Giant);
+ sx_xunlock( &polling_lock );
printf("ether_poll_deregister: ifp not found!!!\n");
return 0;
}
@@ -479,8 +508,10 @@
if (i < poll_handlers) { /* Last entry replaces this one. */
pr[i].handler = pr[poll_handlers].handler;
pr[i].ifp = pr[poll_handlers].ifp;
+ mtx_destroy( &iface_locks[i] );
+ iface_locks[i] = iface_locks[poll_handlers];
}
- mtx_unlock(&Giant);
+ sx_xunlock( &polling_lock );
return 1;
}
@@ -501,10 +532,10 @@
for (;;) {
if (poll_in_idle_loop && poll_handlers > 0) {
idlepoll_sleeping = 0;
- mtx_lock(&Giant);
+ /* mtx_lock(&Giant); */
ether_poll(poll_each_burst);
- mtx_unlock(&Giant);
- mtx_assert(&Giant, MA_NOTOWNED);
+ /* mtx_unlock(&Giant);
+ mtx_assert(&Giant, MA_NOTOWNED); */
mtx_lock_spin(&sched_lock);
mi_switch(SW_VOL, NULL);
mtx_unlock_spin(&sched_lock);
help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?E1D9kbt-000FAj-00._pppp-mail-ru>
