Date: Tue, 2 Apr 2013 19:40:17 +0400 From: Gleb Smirnoff <glebius@FreeBSD.org> To: Luigi Rizzo <rizzo@iet.unipi.it> Cc: arch@FreeBSD.org, kib@FreeBSD.org Subject: Re: [CFR][CFT] counter(9): new API for faster and raceless counters Message-ID: <20130402154017.GN76816@FreeBSD.org> In-Reply-To: <20130402145722.GA9161@onelab2.iet.unipi.it> References: <20130401115128.GZ76816@FreeBSD.org> <20130402013758.GA96904@onelab2.iet.unipi.it> <20130402141717.GK76816@glebius.int.ru> <20130402145722.GA9161@onelab2.iet.unipi.it>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --]
Luigi,
On Tue, Apr 02, 2013 at 04:57:22PM +0200, Luigi Rizzo wrote:
L> > Here is patch for review. It adds 4 more primitives:
L> >
L> > counter_enter();
L> > counter_u64_add_prot(c, x);
L> > counter_u64_subtract_prot(c, x);
L> > counter_exit();
L>
L> thanks for the patch. I have three more comments:
L>
L> - is it really necessary to have the "subtract" version ?
L> Couldn't one just make "x" an int64_t ? or it gives
L> too many warnings at runtime maybe ?
Agreed. See patch.
L> - (this can be fixed later) in the i386 version, counter_enter()
L> and counter_exit() have an if statement which may become quite
L> expensive if mispredicted. Also, the test is repeated 3 times in
L> counter_u64_add() (enter/add/exit). Hopefully the compiler
L> optimizes out the extra calls, but the previous version seemed
L> more readable. Anyways, at some point we should figure out
L> whether putting likely/unlikely annotations on the result of
L> (cpu_feature & CPUID_CX8) may improve performance where it matters.
Agreed. See patch.
L> - do you plan to provide an API to initialize a counter to 0 or a
L> specific value ? I suppose this is done implicitly on allocation,
L> but there are cases (e.g. ipfw) where the application explicitly
L> zeroes counters.
There already is counter_u64_zero().
L> > So 63% speedup, not speaking on the fact that in such a tight loop 98% of
L> > parallel updates are lost on racy counter :)
L> >
L> > A tight loop with atomic_add() is 22 times (twenty two times) slower than
L> > new counter. I didn't bother to run ministat :)
L>
L> yes i think this really makes justice of the improvements of the new code
L> (i am a bit unclear on what actual test you ran / how many counter_u64_add()
L> per syscall you have, but i do expect the racy counters to be much slower
L> and much less reliable, and the 20x slowdown with atomics is completely
L> expected.)
The test made 2 * 10^6 iterations of updating a counter in a for loop.
--
Totus tuus, Glebius.
[-- Attachment #2 --]
Index: sys/sparc64/include/counter.h
===================================================================
--- sys/sparc64/include/counter.h (revision 249002)
+++ sys/sparc64/include/counter.h (working copy)
@@ -31,22 +31,21 @@
#include <sys/pcpu.h>
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define counter_enter() critical_enter()
+#define counter_exit() critical_exit()
- critical_enter();
- *(uint64_t *)zpcpu_get(c) += inc;
- critical_exit();
-}
+#define counter_u64_add_prot(c, inc) do { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+} while (0)
static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
- critical_enter();
- *(uint64_t *)zpcpu_get(c) -= dec;
- critical_exit();
+ counter_enter();
+ counter_u64_add_prot(c, inc);
+ counter_exit();
}
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/ia64/include/counter.h
===================================================================
--- sys/ia64/include/counter.h (revision 249002)
+++ sys/ia64/include/counter.h (working copy)
@@ -31,22 +31,21 @@
#include <sys/pcpu.h>
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define counter_enter() critical_enter()
+#define counter_exit() critical_exit()
- critical_enter();
- *(uint64_t *)zpcpu_get(c) += inc;
- critical_exit();
-}
+#define counter_u64_add_prot(c, inc) do { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+} while (0)
static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
- critical_enter();
- *(uint64_t *)zpcpu_get(c) -= dec;
- critical_exit();
+ counter_enter();
+ counter_u64_add_prot(c, inc);
+ counter_exit();
}
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c (revision 249002)
+++ sys/netinet/ip_input.c (working copy)
@@ -293,7 +293,7 @@ void
kmod_ipstat_dec(int statnum)
{
- counter_u64_subtract((counter_u64_t )&V_ipstatp + statnum, 1);
+ counter_u64_add((counter_u64_t )&V_ipstatp + statnum, -1);
}
static int
Index: sys/netinet/ip_var.h
===================================================================
--- sys/netinet/ip_var.h (revision 249002)
+++ sys/netinet/ip_var.h (working copy)
@@ -175,7 +175,7 @@ VNET_DECLARE(struct ipstat_p, ipstatp);
#define IPSTAT_ADD(name, val) counter_u64_add(V_ipstatp.name, (val))
#define IPSTAT_SUB(name, val) counter_u64_subtract(V_ipstatp.name, (val))
#define IPSTAT_INC(name) IPSTAT_ADD(name, 1)
-#define IPSTAT_DEC(name) IPSTAT_SUB(name, 1)
+#define IPSTAT_DEC(name) IPSTAT_ADD(name, -1)
/*
* Kernel module consumers must use this accessor macro.
Index: sys/i386/include/counter.h
===================================================================
--- sys/i386/include/counter.h (revision 249002)
+++ sys/i386/include/counter.h (working copy)
@@ -33,8 +33,18 @@
#include <machine/md_var.h>
#include <machine/specialreg.h>
+#define counter_enter() do { \
+ if ((cpu_feature & CPUID_CX8) == 0) \
+ critical_enter(); \
+} while (0)
+
+#define counter_exit() do { \
+ if ((cpu_feature & CPUID_CX8) == 0) \
+ critical_exit(); \
+} while (0)
+
static inline void
-counter_64_inc_8b(uint64_t *p, uint64_t inc)
+counter_64_inc_8b(uint64_t *p, int64_t inc)
{
__asm __volatile(
@@ -52,8 +62,16 @@ static inline void
: "memory", "cc", "eax", "edx", "ebx", "ecx");
}
+#define counter_u64_add_prot(c, inc) do { \
+ if ((cpu_feature & CPUID_CX8) == 0) { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+ } else \
+ counter_64_inc_8b((c), (inc)); \
+} while (0)
+
static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
if ((cpu_feature & CPUID_CX8) == 0) {
@@ -65,11 +83,4 @@ static inline void
}
}
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
- counter_u64_add(c, -(int64_t)dec);
-}
-
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/amd64/include/counter.h
===================================================================
--- sys/amd64/include/counter.h (revision 249002)
+++ sys/amd64/include/counter.h (working copy)
@@ -33,8 +33,13 @@
extern struct pcpu __pcpu[1];
+#define counter_enter() do {} while (0)
+#define counter_exit() do {} while (0)
+
+#define counter_u64_add_prot(c, i) counter_u64_add(c, i)
+
static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
__asm __volatile("addq\t%1,%%gs:(%0)"
@@ -43,14 +48,4 @@ static inline void
: "memory", "cc");
}
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
- __asm __volatile("subq\t%1,%%gs:(%0)"
- :
- : "r" ((char *)c - (char *)&__pcpu[0]), "r" (dec)
- : "memory", "cc");
-}
-
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/arm/include/counter.h
===================================================================
--- sys/arm/include/counter.h (revision 249002)
+++ sys/arm/include/counter.h (working copy)
@@ -31,22 +31,21 @@
#include <sys/pcpu.h>
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define counter_enter() critical_enter()
+#define counter_exit() critical_exit()
- critical_enter();
- *(uint64_t *)zpcpu_get(c) += inc;
- critical_exit();
-}
+#define counter_u64_add_prot(c, inc) do { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+} while (0)
static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
- critical_enter();
- *(uint64_t *)zpcpu_get(c) -= dec;
- critical_exit();
+ counter_enter();
+ counter_u64_add_prot(c, inc);
+ counter_exit();
}
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/powerpc/include/counter.h
===================================================================
--- sys/powerpc/include/counter.h (revision 249002)
+++ sys/powerpc/include/counter.h (working copy)
@@ -33,8 +33,13 @@
#if defined(AIM) && defined(__powerpc64__)
+#define counter_enter() do {} while (0)
+#define counter_exit() do {} while (0)
+
+#define counter_u64_add_prot(c, i) counter_u64_add(c, i)
+
static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
uint64_t ccpu, old;
@@ -50,33 +55,23 @@ static inline void
: "cc", "memory");
}
-static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
-{
-
- counter_u64_add(c, -dec);
-}
-
#else /* !AIM || !64bit */
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define counter_enter() critical_enter()
+#define counter_exit() critical_exit()
- critical_enter();
- *(uint64_t *)zpcpu_get(c) += inc;
- critical_exit();
-}
+#define counter_u64_add_prot(c, inc) do { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+} while (0)
static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
- critical_enter();
- *(uint64_t *)zpcpu_get(c) -= dec;
- critical_exit();
+ counter_enter();
+ counter_u64_add_prot(c, inc);
+ counter_exit();
}
-#endif /* AIM 64bit */
-
#endif /* ! __MACHINE_COUNTER_H__ */
Index: sys/mips/include/counter.h
===================================================================
--- sys/mips/include/counter.h (revision 249002)
+++ sys/mips/include/counter.h (working copy)
@@ -31,22 +31,21 @@
#include <sys/pcpu.h>
-static inline void
-counter_u64_add(counter_u64_t c, uint64_t inc)
-{
+#define counter_enter() critical_enter()
+#define counter_exit() critical_exit()
- critical_enter();
- *(uint64_t *)zpcpu_get(c) += inc;
- critical_exit();
-}
+#define counter_u64_add_prot(c, inc) do { \
+ CRITICAL_ASSERT(td); \
+ *(uint64_t *)zpcpu_get(c) += (inc); \
+} while (0)
static inline void
-counter_u64_subtract(counter_u64_t c, uint64_t dec)
+counter_u64_add(counter_u64_t c, int64_t inc)
{
- critical_enter();
- *(uint64_t *)zpcpu_get(c) -= dec;
- critical_exit();
+ counter_enter();
+ counter_u64_add_prot(c, inc);
+ counter_exit();
}
#endif /* ! __MACHINE_COUNTER_H__ */
help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130402154017.GN76816>
