Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 17 Feb 2011 16:23:53 +0600
From:      Eugene Grosbein <egrosbein@rdtc.ru>
To:        Ed Maste <emaste@freebsd.org>
Cc:        freebsd-net@freebsd.org, Paul Thornton <prt@prt.org>
Subject:   Re: Polling slows down bandwidth
Message-ID:  <4D5CF739.10303@rdtc.ru>
In-Reply-To: <20101030174152.GB41042@sandvine.com>
References:  <1519248747.20101028232111@yandex.ru>	<1452146D-A590-4676-A662-14D0EEE82152@mac.com>	<606859717.20101029093926@yandex.ru>	<FA2E3C9B-A1A1-4FB3-80AD-26F5518C1ABF@mac.com>	<4CCC0243.8060507@prt.org> <20101030174152.GB41042@sandvine.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On 31.10.2010 00:41, Ed Maste wrote:

>> I've been doing testing with FreeBSD 8 and em interfaces recently, and
>> my experience agrees with Chuck's statement - that polling makes things
>> worse when you use new (anything in the last 2 or 3 years) hardware with
>> good quality gigabit ethernet interfaces.
> 
> There are some deficiencies in the current polling algorithm that will
> cause it to perform less than optimally (it will temporarily stop
> processing packets even though it is consuming less CPU than requested).
> I have some changes that I plan to bring into the tree to improve this
> situation.  For recent high quality hardware though I expect you'll get
> roughly equivalent performance from polling and standard opteration.

Also note that current em(4) and igb(4) drivers limit maximum number
of interrupts per second one adapter is allowed to generate to 8000.

This number is inadequate to modern hardware like Core i3-based systems
and leads to significant packet forwarding delays (houndreds of extra miliseconds).

igb(4) in current has loader tunnable to change this limis but not em(4).
I've made a patch to change this limit at run time (per-device) and raise it upto 32000.
My em NICs generate upto 24000 interrupts per second and CPU is still pretty idle.

For example, here is the patch for 8.2-PRERELEASE's em(4).
It adds "hw.em.max_interrupt_rate" loader tunnable just like in igb(4)
plus per-device r/w sysctls named "dev.em.X.max_interrupt_rate"
(loader tunnable sets default for sysctls). There is similar patch for igb(4)
to make this run-time sysctl. Manual ifconfig down/up is required
after change of boot-time value to take effect.

--- sys/dev/e1000/if_em.h.orig	2010-12-30 11:18:14.000000000 +0600
+++ sys/dev/e1000/if_em.h	2010-12-30 11:27:17.000000000 +0600
@@ -436,6 +436,12 @@
 	unsigned long	link_irq;
 
 	struct e1000_hw_stats stats;
+	
+	/* Bring link down when interface goes down */
+	int		down_disables_link;
+
+	/* Maximum interrupt rate */
+	int		max_interrupt_rate;
 };
 
 /********************************************************************************
--- sys/dev/e1000/if_em.c.orig	2010-12-30 11:18:14.000000000 +0600
+++ sys/dev/e1000/if_em.c	2010-12-30 11:27:17.000000000 +0600
@@ -365,6 +365,10 @@
 static int em_fc_setting = e1000_fc_full;
 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
 
+#define MAX_INTS_PER_SEC	8000
+static unsigned em_max_interrupt_rate = MAX_INTS_PER_SEC;
+TUNABLE_INT("hw.em.max_interrupt_rate", &em_max_interrupt_rate);
+
 /* Global used in WOL setup with multiport cards */
 static int global_quad_port_a = 0;
 
@@ -452,6 +456,20 @@
 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_debug_info, "I", "Debug Information");
 
+	adapter->down_disables_link = 0;
+	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "down_disables_link", CTLTYPE_INT|CTLFLAG_RW,
+	    &adapter->down_disables_link, adapter->down_disables_link,
+	    "Bring link down when interface goes down");
+
+	adapter->max_interrupt_rate = em_max_interrupt_rate;
+	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "max_interrupt_rate", CTLTYPE_UINT|CTLFLAG_RW,
+	    &adapter->max_interrupt_rate, adapter->max_interrupt_rate,
+	    "Maximum interrupt rate");
+
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
@@ -1266,6 +1284,8 @@
 	}
 
 	/* Initialize the hardware */
+	if (!adapter->hw.phy.reset_disable)
+	        e1000_phy_hw_reset(&adapter->hw);
 	em_reset(adapter);
 	em_update_link_status(adapter);
 
@@ -2319,6 +2339,19 @@
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
+
+	if(adapter->down_disables_link) {
+		/* Bring physical link down by powering the phy down */
+/*		e1000_power_down_phy(&adapter->hw); */
+		e1000_power_down_phy_copper(&adapter->hw);
+
+		/* Update system interface state */
+		adapter->hw.mac.get_link_status = 1;
+		em_update_link_status(adapter);
+
+		/* Reset the phy next time init gets called */
+		adapter->hw.phy.reset_disable = FALSE;
+	}
 }
 
 
@@ -4046,8 +4079,6 @@
  *  Enable receive unit.
  *
  **********************************************************************/
-#define MAX_INTS_PER_SEC	8000
-#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
 
 static void
 em_initialize_receive_unit(struct adapter *adapter)
@@ -4071,18 +4102,24 @@
 	    adapter->rx_abs_int_delay.value);
 	/*
 	 * Set the interrupt throttling rate. Value is calculated
-	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
+	 * as 1/(max_interrupt_rate * 256ns)
 	 */
-	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
+	if (adapter->max_interrupt_rate > 0) {
+	E1000_WRITE_REG(hw, E1000_ITR, 
+	    1000000000/256/adapter->max_interrupt_rate);
+	}
 
 	/*
 	** When using MSIX interrupts we need to throttle
 	** using the EITR register (82574 only)
 	*/
 	if (hw->mac.type == e1000_82574)
-		for (int i = 0; i < 4; i++)
+		for (int i = 0; i < 4; i++) {
+		    if (adapter->max_interrupt_rate > 0) {
 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
-			    DEFAULT_ITR);
+			    1000000000/256/adapter->max_interrupt_rate);
+		    }
+		}
 
 	/* Disable accelerated ackknowledge */
 	if (adapter->hw.mac.type == e1000_82574)




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4D5CF739.10303>