Date: Fri, 14 Mar 2014 18:31:08 +0000 From: "Meyer, Conrad" <conrad.meyer@isilon.com> To: Bryan Drewery <bdrewery@FreeBSD.org> Cc: "freebsd-hackers@freebsd.org" <freebsd-hackers@freebsd.org>, "Meyer, Conrad" <conrad.meyer@isilon.com> Subject: [PATCH] amd64/pcpu.h: Use Clang builtins for clarity when referencing thread's pcpu Message-ID: <1394821826-19412-1-git-send-email-conrad.meyer@isilon.com>
next in thread | raw e-mail | index | archive | help
We can efficiently reference thread-local pcpu members via the %gs register with Clang-annotated C code, in place of inline GNU assembly. Motivations: - Use C in leiu of inline assembly for clarity - Clang's static analyser may be better able to understand PCPU_* macros using the C constructs rather than inline assembly (unverified) Sponsored by: EMC/Isilon storage division Signed-off-by: Conrad Meyer <conrad.meyer@isilon.com> Reviewed-by: Max Laier <mlaier@FreeBSD.org> --- This is more of a "what do you think?" than a pull request. It seems like u= sing annotated C instead of asm is nice (in particular, Clang detects casts from pointers typed with one segment to another, or unsegmented type). On the ot= her hand, this is code that doesn't change frequently, and we may still need to support GCC for some time. So adding a second, parallel implementation just doubles room for bugs. Open questions: - How long is GCC intended to be supported as a compiler? - How atomic does PCPU_INC() need to be? It looks like it updates cpu-loc= al counters; as long as it's a single asm instruction, should it be fine w.r.t. interrupts? The existing implementation does NOT use the 'lock; = ' prefix. See the following simple example: $ cat gstest.c #include <sys/cdefs.h> #include <stdint.h> #define GS_RELATIVE __attribute__((address_space(256))) struct pcpu { void *curthread; struct pcpu *self; }; struct pcpu * __curpcpu(void) { volatile struct pcpu * GS_RELATIVE *res =3D (volatile struct pcpu * GS_RELATIVE *) __offsetof(struct pcpu, self); return (*res); } $ clang -Wall -Wextra -O1 -c gstest.c $ objdump -d gstest.o gstest.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <__curpcpu>: 0: 65 48 8b 04 25 08 00 mov %gs:0x8,%rax 7: 00 00 9: c3 retq Support has been present since at least April 9, 2009 (when documentation of the feature was first added to LanguageExtensions.html). So all Clang versions in BSD core (BSD9, BSD10) should support the feature. --- sys/amd64/include/pcpu.h | 98 +++++++++++++++++++++++++++++++++++++++++---= ---- 1 file changed, 84 insertions(+), 14 deletions(-) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index fe898e9..68892fc 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -81,7 +81,7 @@ extern struct pcpu *pcpup; #define PCPU_PTR(member) (&pcpup->pc_ ## member) #define PCPU_SET(member, val) (pcpup->pc_ ## member =3D (val)) =20 -#elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) +#elif defined(__clang__) || (defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE= ___TYPEOF)) =20 /* * Evaluates to the byte offset of the per-cpu variable name. @@ -95,6 +95,80 @@ extern struct pcpu *pcpup; #define __pcpu_type(name) \ __typeof(((struct pcpu *)0)->name) =20 +#if defined(__clang__) + +#define __GS_RELATIVE __attribute__((address_space(256))) + +/* + * Evaluates to the address of the per-cpu variable name. + */ +#define __PCPU_PTR(name) __extension__ ({ \ + volatile __pcpu_type(name) __GS_RELATIVE *__p; \ + \ + __p =3D (volatile __pcpu_type(name) __GS_RELATIVE *)__pcpu_offset(name); = \ + __p; \ +}) + +#define __PCPU_PTRX(name) __extension__ ({ \ + volatile __pcpu_type(pc_prvspace) __GS_RELATIVE *__p; \ + __pcpu_type(name) *__mp; \ + \ + __p =3D __PCPU_PTR(pc_prvspace); \ + __mp =3D &(*__p)->name; \ + __mp; \ +}) +#define PCPU_PTR(member) __PCPU_PTRX(pc_ ## member) + +/* + * Evaluates to the value of the per-cpu variable name. + */ +#define __PCPU_GET(name) __extension__ ({ \ + *__PCPU_PTR(name); \ +}) + +/* + * Adds the value to the per-cpu counter name. The implementation + * must be atomic with respect to interrupts. + */ +#define __PCPU_ADD(name, val) do { \ + __pcpu_type(name) __val; \ + volatile __pcpu_type(name) __GS_RELATIVE *__ptr; \ + \ + __val =3D (val); \ + __ptr =3D __PCPU_PTR(name); \ + *__ptr +=3D __val; \ +} while (0) + +/* + * Increments the value of the per-cpu counter name. The implementation + * must be atomic with respect to interrupts. + */ +#define __PCPU_INC(name) __PCPU_ADD(name, 1) + +/* + * Sets the value of the per-cpu variable name to value val. + */ +#define __PCPU_SET(name, val) do { \ + __pcpu_type(name) __val; \ + volatile __pcpu_type(name) __GS_RELATIVE *__ptr; \ + \ + __val =3D (val); \ + __ptr =3D __PCPU_PTR(name); \ + *__ptr =3D __val; \ +} while (0) + +#define curthread __extension__ ({ \ + *((volatile __pcpu_type(pc_curthread) __GS_RELATIVE *) \ + __pcpu_offset(pc_curthread)); \ +}) + +#define curpcb __extension__ ({ \ + *((volatile __pcpu_type(pc_curpcb) __GS_RELATIVE *) \ + __pcpu_offset(pc_curpcb)); \ +}) + +#else /* !__clang__ */ + /* * Evaluates to the address of the per-cpu variable name. */ @@ -200,17 +274,7 @@ extern struct pcpu *pcpup; } \ } =20 -#define PCPU_GET(member) __PCPU_GET(pc_ ## member) -#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) -#define PCPU_INC(member) __PCPU_INC(pc_ ## member) -#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) -#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) - #define OFFSETOF_CURTHREAD 0 -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wnull-dereference" -#endif static __inline __pure2 struct thread * __curthread(void) { @@ -220,9 +284,6 @@ __curthread(void) : "m" (*(char *)OFFSETOF_CURTHREAD)); return (td); } -#ifdef __clang__ -#pragma clang diagnostic pop -#endif #define curthread (__curthread()) =20 #define OFFSETOF_CURPCB 32 @@ -236,6 +297,15 @@ __curpcb(void) } #define curpcb (__curpcb()) =20 +#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) + +#endif /* __clang__ */ + +#define PCPU_GET(member) __PCPU_GET(pc_ ## member) +#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) +#define PCPU_INC(member) __PCPU_INC(pc_ ## member) +#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) + #define IS_BSP() (PCPU_GET(cpuid) =3D=3D 0) =20 #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) = */ --=20 1.8.5.3
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1394821826-19412-1-git-send-email-conrad.meyer>