Date: Tue, 14 Apr 2015 11:31:18 -0400 From: Eric van Gyzen <vangyzen@FreeBSD.org> To: current@freebsd.org Subject: Re: SSE in libthr Message-ID: <552D32C6.7020200@FreeBSD.org> In-Reply-To: <20150328173613.GE51048@funkthat.com> References: <5515AED9.8040408@FreeBSD.org> <3A96AAEC-9C1C-444E-9A73-3CD2AED33116@me.com> <5515CF1C.8010409@FreeBSD.org> <20150328173613.GE51048@funkthat.com>
next in thread | previous in thread | raw e-mail | index | archive | help
Below is an updated patch to incorporate everyone's feedback so far. I recognize all of the counter-arguments, and I agree with them in general. Indeed, as applications use more SIMD, this kind of patch goes in the wrong direction. However, there are applications that do not use enough SSE to offset the extra context-switch cost. SSE does not provide a clear benefit in the current libthr code with the current compiler, but it does provide a clear loss in some cases. Therefore, disabling SSE in libthr is a non-loss for most, and a gain for some. I refrained from disabling SSE in libc--as was suggested--because I can't make the above argument for libc. It provides such a variety of code that SSE might be a net win in some cases. I wish I had time to identify and benchmark the interesting cases. Thanks in advance for your further review and comments. Eric Index: head/lib/libthr/arch/amd64/Makefile.inc =================================================================== --- head/lib/libthr/arch/amd64/Makefile.inc (revision 281473) +++ head/lib/libthr/arch/amd64/Makefile.inc (working copy) @@ -1,3 +1,9 @@ #$FreeBSD$ SRCS+= _umtx_op_err.S + +# With the current compiler and libthr code, using SSE in libthr +# does not provide enough performance improvement to outweigh +# the extra context switch cost. This can measurably impact +# performance when the application also does not use enough SSE. +CFLAGS+=${CFLAGS_NO_SIMD} Index: head/lib/libthr/arch/i386/Makefile.inc =================================================================== --- head/lib/libthr/arch/i386/Makefile.inc (revision 281473) +++ head/lib/libthr/arch/i386/Makefile.inc (working copy) @@ -1,3 +1,9 @@ # $FreeBSD$ SRCS+= _umtx_op_err.S + +# With the current compiler and libthr code, using SSE in libthr +# does not provide enough performance improvement to outweigh +# the extra context switch cost. This can measurably impact +# performance when the application also does not use enough SSE. +CFLAGS+=${CFLAGS_NO_SIMD} Index: head/libexec/rtld-elf/amd64/Makefile.inc =================================================================== --- head/libexec/rtld-elf/amd64/Makefile.inc (revision 281473) +++ head/libexec/rtld-elf/amd64/Makefile.inc (working copy) @@ -1,6 +1,6 @@ # $FreeBSD$ -CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 -msoft-float +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float # Uncomment this to build the dynamic linker as an executable instead # of a shared library: #LDSCRIPT= ${.CURDIR}/${MACHINE_CPUARCH}/elf_rtld.x Index: head/libexec/rtld-elf/i386/Makefile.inc =================================================================== --- head/libexec/rtld-elf/i386/Makefile.inc (revision 281473) +++ head/libexec/rtld-elf/i386/Makefile.inc (working copy) @@ -1,6 +1,6 @@ # $FreeBSD$ -CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 -msoft-float +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float # Uncomment this to build the dynamic linker as an executable instead # of a shared library: #LDSCRIPT= ${.CURDIR}/${MACHINE_CPUARCH}/elf_rtld.x Index: head/share/mk/bsd.sys.mk =================================================================== --- head/share/mk/bsd.sys.mk (revision 281473) +++ head/share/mk/bsd.sys.mk (working copy) @@ -153,6 +153,26 @@ SSP_CFLAGS?= -fstack-protector CFLAGS+= ${SSP_CFLAGS} .endif # SSP && !ARM && !MIPS +# +# Prohibit the compiler from emitting SIMD instructions. +# These flags are added to CFLAGS in areas where the extra context-switch +# cost outweighs the advantages of SIMD instructions. +# +# gcc: +# Setting -mno-mmx implies -mno-3dnow +# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3 and -mfpmath=387 +# +# clang: +# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa +# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and +# -mno-sse42 +# (-mfpmath= is not supported) +# +.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" +CFLAGS_NO_SIMD.clang= -mno-avx +CFLAGS_NO_SIMD= -mno-mmx -mno-sse ${CFLAGS_NO_SIMD.${COMPILER_TYPE}} +.endif + # Allow user-specified additional warning flags, plus compiler specific flag overrides. # Unless we've overriden this... .if ${MK_WARNS} != "no" Index: head/sys/conf/kern.mk =================================================================== --- head/sys/conf/kern.mk (revision 281473) +++ head/sys/conf/kern.mk (working copy) @@ -75,18 +75,10 @@ FORMAT_EXTENSIONS= -fformat-extensions # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # -# gcc: -# Setting -mno-mmx implies -mno-3dnow -# Setting -mno-sse implies -mno-sse2, -mno-sse3 and -mno-ssse3 -# -# clang: -# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 -# .if ${MACHINE_CPUARCH} == "i386" CFLAGS.gcc+= -mno-align-long-strings -mpreferred-stack-boundary=2 -CFLAGS.clang+= -mno-aes -mno-avx -CFLAGS+= -mno-mmx -mno-sse -msoft-float +CFLAGS.clang+= -mno-aes +CFLAGS+= ${CFLAGS_NO_SIMD} -msoft-float INLINE_LIMIT?= 8000 .endif @@ -111,18 +103,9 @@ INLINE_LIMIT?= 15000 # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # -# gcc: -# Setting -mno-mmx implies -mno-3dnow -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3 and -mfpmath=387 -# -# clang: -# Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa -# Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 -# (-mfpmath= is not supported) -# .if ${MACHINE_CPUARCH} == "amd64" -CFLAGS.clang+= -mno-aes -mno-avx -CFLAGS+= -mcmodel=kernel -mno-red-zone -mno-mmx -mno-sse -msoft-float \ +CFLAGS.clang+= -mno-aes +CFLAGS+= -mcmodel=kernel -mno-red-zone ${CFLAGS_NO_SIMD} -msoft-float \ -fno-asynchronous-unwind-tables INLINE_LIMIT?= 8000 .endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?552D32C6.7020200>