Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 11 Mar 2005 16:55:25 +0300
From:      dima <_pppp@mail.ru>
To:        Gleb Smirnoff <glebius@FreeBSD.org>
Cc:        ru@FreeBSD.org
Subject:   Re[2]: Giant-free polling [PATCH]
Message-ID:  <E1D9kbt-000FAj-00._pppp-mail-ru@f22.mail.ru>
In-Reply-To: <20050311110234.GA87255@cell.sick.ru>

index | next in thread | previous in thread | raw e-mail

[-- Attachment #1 --]
>   Collegues,
> 
> On Tue, Mar 01, 2005 at 04:29:49PM -0800, Luigi Rizzo wrote:
> L> [cc-ing net@freebsd.org to get more opinions]
> 
> this good, that you have CC'ed net@, otherwise we would continue
> working in parallel without collaboration.

It's a pity you didn't read the complete thread,
thus the original version of my patch isn't valid anymore.

> 
> Here is attached patch made in a collaboration by ru, pjd and me.

I thought about using list also, but considered it to bring
too much overhead to the code. The original idea of handling arrays
seems to be very elegant.

> 
> We use separate mutex for polling, and we drop it before calling the
> poll handler to avoid LOR and contention on this mutex.

The recent version of the patch uses sx lock to protect pr[] and
the array of per-interface mutexes iface_locks[].
So, all CPUs can poll different interfaces at the same time now.
See the complete thread for details.

> 
> We have definite evidence that now idle_poll and ISR poll can run
> in parallel: if we remove PRF_RUNNING flag check, we got panics
> because poll handlers of many drivers (e.g. if_em) are not reentrable.
> 
> I think this patch is a step forward in direction of parallel polling
> on SMP, since we now have two polling instances (ISR + idle) working
> in parallel.

I also mentioned that poll_idle() isn't called from anywhere in the kernel.
Thus we have only 2 possible sources for polling: hardclock (per-CPU) and traps.
The polling code in trap handler should also be changed a bit to check for
the trap's source.

I attach the most recent version of the patch.
The only difference is locking in ether_poll_register() suggested by Luigi.

> 
> -- 
> Totus tuus, Glebius.
> GLEBIUS-RIPN GLEB-RIPE


[-- Attachment #2 --]
--- orig/kern_poll.c	Thu Feb 17 17:11:05 2005
+++ kern_poll.c	Fri Mar  4 17:03:33 2005
@@ -41,12 +41,17 @@
 #include <sys/resourcevar.h>
 #include <sys/kthread.h>
 
+#include <sys/lock.h>			/* for sx(9)			*/
+#include <sys/sx.h>
+
 #ifdef SMP
 #ifndef COMPILING_LINT
 #error DEVICE_POLLING is not compatible with SMP
 #endif
 #endif
 
+struct sx polling_lock;
+
 static void netisr_poll(void);		/* the two netisr handlers      */
 static void netisr_pollmore(void);
 
@@ -182,6 +187,7 @@
 };
 
 static struct pollrec pr[POLL_LIST_LEN];
+struct mtx iface_locks[POLL_LIST_LEN];
 
 static void
 init_device_poll(void)
@@ -189,6 +195,7 @@
 
 	netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, 0);
 	netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, 0);
+	sx_init( &polling_lock, "polling" );
 }
 SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL)
 
@@ -252,15 +259,19 @@
 {
 	int i;
 
-	mtx_lock(&Giant);
+	sx_slock( &polling_lock );
 
 	if (count > poll_each_burst)
 		count = poll_each_burst;
 	for (i = 0 ; i < poll_handlers ; i++)
-		if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
-		    (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
+		if( pr[i].handler && ( IFF_UP | IFF_RUNNING ) ==
+		    ( pr[i].ifp->if_flags & ( IFF_UP | IFF_RUNNING ) ) &&
+		    mtx_trylock( &iface_locks[i] ) ) {
 			pr[i].handler(pr[i].ifp, 0, count); /* quick check */
-	mtx_unlock(&Giant);
+			mtx_unlock( &iface_locks[i] );
+		}
+
+	sx_sunlock( &polling_lock );
 }
 
 /*
@@ -335,7 +346,6 @@
 	static int reg_frac_count;
 	int i, cycles;
 	enum poll_cmd arg = POLL_ONLY;
-	mtx_lock(&Giant);
 
 	phase = 3;
 	if (residual_burst == 0) { /* first call in this tick */
@@ -377,26 +387,37 @@
 		residual_burst : poll_each_burst;
 	residual_burst -= cycles;
 
+	sx_slock( &polling_lock );
+
 	if (polling) {
 		for (i = 0 ; i < poll_handlers ; i++)
-			if (pr[i].handler && (IFF_UP|IFF_RUNNING) ==
-			    (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
+			if( pr[i].handler &&
+			    ( IFF_UP | IFF_RUNNING ) ==
+			    ( pr[i].ifp->if_flags & (IFF_UP | IFF_RUNNING) ) &&
+			    mtx_trylock( &iface_locks[i] ) ) {
 				pr[i].handler(pr[i].ifp, arg, cycles);
+				mtx_unlock( &iface_locks[i] );
+			}
 	} else {	/* unregister */
 		for (i = 0 ; i < poll_handlers ; i++) {
-			if (pr[i].handler &&
-			    pr[i].ifp->if_flags & IFF_RUNNING) {
+			if( pr[i].handler &&
+			    pr[i].ifp->if_flags & IFF_RUNNING &&
+			    mtx_trylock( &iface_locks[i] ) ) {
 				pr[i].ifp->if_flags &= ~IFF_POLLING;
 				pr[i].handler(pr[i].ifp, POLL_DEREGISTER, 1);
+				mtx_unlock( &iface_locks[i] );
+				mtx_destroy( &iface_locks[i] );
 			}
 			pr[i].handler=NULL;
 		}
 		residual_burst = 0;
 		poll_handlers = 0;
 	}
+
+	sx_sunlock( &polling_lock );
+
 	/* on -stable, schednetisr(NETISR_POLLMORE); */
 	phase = 4;
-	mtx_unlock(&Giant);
 }
 
 /*
@@ -440,9 +461,15 @@
 		return 0; /* no polling for you */
 	}
 
+	sx_xlock( &polling_lock );
+
 	pr[poll_handlers].handler = h;
 	pr[poll_handlers].ifp = ifp;
+	mtx_init( &iface_locks[poll_handlers], ifp->if_xname, NULL, MTX_DEF );
 	poll_handlers++;
+
+	sx_xunlock( &polling_lock );
+
 	ifp->if_flags |= IFF_POLLING;
 	splx(s);
 	if (idlepoll_sleeping)
@@ -461,17 +488,19 @@
 {
 	int i;
 
-	mtx_lock(&Giant);
+
 	if ( !ifp || !(ifp->if_flags & IFF_POLLING) ) {
-		mtx_unlock(&Giant);
 		return 0;
 	}
+
+	sx_xlock( &polling_lock );
+
 	for (i = 0 ; i < poll_handlers ; i++)
 		if (pr[i].ifp == ifp) /* found it */
 			break;
 	ifp->if_flags &= ~IFF_POLLING; /* found or not... */
 	if (i == poll_handlers) {
-		mtx_unlock(&Giant);
+		sx_xunlock( &polling_lock );
 		printf("ether_poll_deregister: ifp not found!!!\n");
 		return 0;
 	}
@@ -479,8 +508,10 @@
 	if (i < poll_handlers) { /* Last entry replaces this one. */
 		pr[i].handler = pr[poll_handlers].handler;
 		pr[i].ifp = pr[poll_handlers].ifp;
+		mtx_destroy( &iface_locks[i] );
+		iface_locks[i] = iface_locks[poll_handlers];
 	}
-	mtx_unlock(&Giant);
+	sx_xunlock( &polling_lock );
 	return 1;
 }
 
@@ -501,10 +532,10 @@
 	for (;;) {
 		if (poll_in_idle_loop && poll_handlers > 0) {
 			idlepoll_sleeping = 0;
-			mtx_lock(&Giant);
+			/* mtx_lock(&Giant); */
 			ether_poll(poll_each_burst);
-			mtx_unlock(&Giant);
-			mtx_assert(&Giant, MA_NOTOWNED);
+			/* mtx_unlock(&Giant);
+			mtx_assert(&Giant, MA_NOTOWNED); */
 			mtx_lock_spin(&sched_lock);
 			mi_switch(SW_VOL, NULL);
 			mtx_unlock_spin(&sched_lock);
help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?E1D9kbt-000FAj-00._pppp-mail-ru>