Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 Aug 2013 23:01:27 +0000 (UTC)
From:      Brendan Fabeny <bf@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r324787 - in head/math: . sfft sfft/files
Message-ID:  <201308152301.r7FN1RXV058436@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bf
Date: Thu Aug 15 23:01:27 2013
New Revision: 324787
URL: http://svnweb.freebsd.org/changeset/ports/324787

Log:
  Add sfft 0.1.0, optimized Sparse Fast Fourier Transform.

Added:
  head/math/sfft/
  head/math/sfft/Makefile   (contents, props changed)
  head/math/sfft/distinfo   (contents, props changed)
  head/math/sfft/files/
  head/math/sfft/files/patch-src__computefourier-1.0-2.0.cc   (contents, props changed)
  head/math/sfft/files/patch-src__computefourier-3.0.cc   (contents, props changed)
  head/math/sfft/pkg-descr   (contents, props changed)
Modified:
  head/math/Makefile

Modified: head/math/Makefile
==============================================================================
--- head/math/Makefile	Thu Aug 15 22:13:42 2013	(r324786)
+++ head/math/Makefile	Thu Aug 15 23:01:27 2013	(r324787)
@@ -610,6 +610,7 @@
     SUBDIR += sdpa-gmp
     SUBDIR += sdpara
     SUBDIR += sedumi
+    SUBDIR += sfft
     SUBDIR += simd-viterbi
     SUBDIR += slatec
     SUBDIR += slgrace

Added: head/math/sfft/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/math/sfft/Makefile	Thu Aug 15 23:01:27 2013	(r324787)
@@ -0,0 +1,123 @@
+# Created by: bf@FreeBSD.org
+# $FreeBSD$
+
+PORTNAME=	sfft
+PORTVERSION=	0.1.0
+CATEGORIES=	math
+MASTER_SITES=	http://spiral.net/software/sfft/ LOCAL/bf
+DISTFILES=	${DISTNAME}${EXTRACT_SUFX}
+EXTRACT_ONLY=	${DISTNAME}${EXTRACT_SUFX}
+
+MAINTAINER=	bf@FreeBSD.org
+COMMENT=	Optimized Sparse Fast Fourier Transform
+
+LICENSE=	GPLv2
+
+LIB_DEPENDS=	libfftw3.so:${PORTSDIR}/math/fftw3
+
+USE_GCC=	yes
+USE_LDCONFIG=	yes
+USE_ZIP=	yes
+
+OPTIONS_DEFINE=		OPTIMIZED_CFLAGS PROFILE
+OPTIONS_DEFAULT=	OPTIMIZED_CFLAGS
+
+BUILD_WRKSRC=	${WRKSRC}/src
+INSTALL_WRKSRC=	${BUILD_WRKSRC}
+CFLAGS+=	-fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include
+LDFLAGS+=	-L${LOCALBASE}/lib
+HEADERS=	sfft.h
+HDIR=		include/sfft
+MAKE_ENV=	LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \
+		SRCCONF=/dev/null SRCS="${SRCS}"
+
+PLIST_DIRS=	${HDIR}
+PLIST_FILES=	${HEADERS:S|^|${HDIR}/|} lib/libsfft.a lib/libsfft.so \
+		lib/libsfft.so.${SHLIB_MAJOR}
+
+SHLIB_MAJOR=	1
+SRCS=	common.cc computefourier-1.0-2.0.cc \
+	computefourier-3.0.cc fftw.cc filters.cc parameters.cc \
+	sfft.cc simulation.cc timer.cc utils.cc
+
+.include <bsd.port.options.mk>
+
+.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2}
+IGNORE=	this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\
+appropriately
+.endif
+
+LIBM=	-lm
+.if ${OSVERSION} < 1000034
+LIB_DEPENDS+=	libmissing.so:${PORTSDIR}/math/libmissing
+LIBM+=		-lmissing
+.endif
+
+.if ${PORT_OPTIONS:MDOCS} || make(makesum)
+DISTFILES+=	sfft-doc.pdf
+PORTDOCS=	sfft-doc.pdf
+.endif
+
+.if ${PORT_OPTIONS:MOPTIMIZED_CFLAGS}
+CFLAGS+=	-O3 -ffast-math
+.endif
+
+.if ${PORT_OPTIONS:MPROFILE}
+.if defined(NOPROFILE) || defined(NO_PROFILE) || defined(WITHOUT_PROFILE)
+IGNORE =	you have defined WITH_PROFILE, but have also defined\
+WITHOUT_PROFILE, NOPROFILE, or NO_PROFILE
+.elif !exists(/usr/lib/libc_p.a)
+IGNORE  =	you have chosen WITH_PROFILE, but have not installed the\
+base system profiling libraries
+.endif
+PLIST_FILES+=	lib/libsfft_p.a
+.else
+MAKE_ENV+=	NO_PROFILE=yes
+.endif
+
+post-extract:
+	@${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h
+	@${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \
+		${BUILD_WRKSRC}/Makefile
+
+post-patch:
+	@${REINPLACE_CMD} -e 's/string\.h/cstring/' \
+		${WRKSRC}/src/utils.cc
+.if ${OSVERSION} < 1000034
+	@${REINPLACE_CMD} -e '\|<complex.h>|{x; \
+	s|^.*$$|#include "missing_complex.h"|; H; x;}' \
+		${WRKSRC}/src/fft.h
+.endif
+	@${REINPLACE_CMD} -E -e '/<complex\.h>/ \
+	{s/<complex\.h>/ "sfftcomplex.h"/; x ; \
+	s|^.*$$|#endif|; G; x; \
+	s|^.*$$|extern "C" {|; G; x; \
+	s|^.*$$|#ifdef __cplusplus|; G; x; \
+	s|^.*$$|#ifdef __cplusplus|; H; \
+	s|^.*$$|}|; H; \
+	s|^.*$$|#endif|; H; x;}' \
+		${WRKSRC}/src/computefourier-1.0-2.0.h \
+		${WRKSRC}/src/computefourier-3.0.h \
+		${WRKSRC}/src/fft.h
+
+CORELIMIT?=	/usr/bin/limits -Sc 0
+
+check regression-test test: build
+	@cd ${BUILD_WRKSRC}; \
+	${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
+	${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \
+	for _v in 1 2 3 ; do \
+	for _k in 5 10 50; do \
+	echo "Checking sfft version $${_v} with $${_k} frequency components:"; \
+	${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \
+	done ; done
+
+post-install:
+	@${MKDIR} ${PREFIX}/${HDIR}
+	@cd ${BUILD_WRKSRC}; ${INSTALL_DATA} ${HEADERS} ${PREFIX}/${HDIR}
+.if ${PORT_OPTIONS:MDOCS}
+	@${MKDIR} ${DOCSDIR}
+	@${INSTALL_DATA} ${_DISTDIR}/${PORTDOCS} ${DOCSDIR}
+.endif
+
+.include <bsd.port.mk>

Added: head/math/sfft/distinfo
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/math/sfft/distinfo	Thu Aug 15 23:01:27 2013	(r324787)
@@ -0,0 +1,4 @@
+SHA256 (sfft-0.1.0.zip) = b52d53f020e82f67cc7da2ad9cacb428752ec3229ce00f435a527d6180ddd494
+SIZE (sfft-0.1.0.zip) = 466599
+SHA256 (sfft-doc.pdf) = d9416944b2ca7bd068320a1b45ed0cc4a311b0bfbac5ed43e68f46bdc3a93454
+SIZE (sfft-doc.pdf) = 122378

Added: head/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/math/sfft/files/patch-src__computefourier-1.0-2.0.cc	Thu Aug 15 23:01:27 2013	(r324787)
@@ -0,0 +1,55 @@
+--- src/computefourier-1.0-2.0.cc.orig	2013-06-13 08:12:25.000000000 -0400
++++ src/computefourier-1.0-2.0.cc	2013-08-09 00:26:54.000000000 -0400
+@@ -248,8 +248,13 @@
+           __m128d ad_bc = _mm_mul_pd(ab, dc);
+           __m128d ac_mbd = _mm_mul_pd(ac_bd, signs);
+ 
++#ifdef __SSE3__
+           __m128d ab_times_cd = _mm_hadd_pd(ac_mbd, ad_bc);
+-
++#else
++          __m128d ab_times_cd_lo = _mm_shuffle_pd(ac_mbd, ad_bc, 0);
++          __m128d ab_times_cd_hi = _mm_shuffle_pd(ac_mbd, ad_bc, 3);
++          __m128d ab_times_cd = _mm_add_pd(ab_times_cd_lo, ab_times_cd_hi);
++#endif
+           unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset;
+           __m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset);
+           __m128d st = _mm_add_pd(xy, ab_times_cd);
+@@ -283,7 +288,13 @@
+       __m128d ab_square = _mm_mul_pd(ab, ab);
+       __m128d cd_square = _mm_mul_pd(cd, cd);
+ 
++#ifdef __SSE3__
+       __m128d r = _mm_hadd_pd(ab_square, cd_square);
++#else
++      __m128d r_lo = _mm_shuffle_pd(ab_square, cd_square, 0);
++      __m128d r_hi = _mm_shuffle_pd(ab_square, cd_square, 3);
++      __m128d r = _mm_add_pd(r_lo, r_hi);
++#endif
+ 
+       _mm_store_pd(samples + j, r);
+     }
+@@ -390,11 +401,23 @@
+           __m128d ad_bc = _mm_mul_pd(ab, dc);
+           __m128d mad_bc = _mm_mul_pd(ad_bc, signs);
+ 
++#ifdef __SSE3__
+           __m128d acpbd_bcmad = _mm_hadd_pd(ac_bd, mad_bc);
++#else
++          __m128d acpbd_bcmad_lo = _mm_shuffle_pd(ac_bd, mad_bc, 0);
++          __m128d acpbd_bcmad_hi = _mm_shuffle_pd(ac_bd, mad_bc, 3);
++          __m128d acpbd_bcmad = _mm_add_pd(acpbd_bcmad_lo, acpbd_bcmad_hi);
++#endif
+ 
+           __m128d cd_squares = _mm_mul_pd(cd, cd);
++
++#ifdef __SSE3__
+           __m128d cd_squares_sum =
+             _mm_hadd_pd(cd_squares, cd_squares);
++#else
++          __m128d cd_squares_flip = _mm_shuffle_pd(cd_squares, cd_squares, 1);
++          __m128d cd_squares_sum = _mm_add_pd(cd_squares, cd_squares_flip);
++#endif
+ 
+           __m128d r = _mm_div_pd(acpbd_bcmad, cd_squares_sum);
+ 

Added: head/math/sfft/files/patch-src__computefourier-3.0.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/math/sfft/files/patch-src__computefourier-3.0.cc	Thu Aug 15 23:01:27 2013	(r324787)
@@ -0,0 +1,132 @@
+--- src/computefourier-3.0.cc.orig	2013-06-13 08:12:26.000000000 -0400
++++ src/computefourier-3.0.cc	2013-08-10 17:02:52.000000000 -0400
+@@ -416,27 +416,64 @@
+ 
+   __m128d t1r = _mm_mul_pd(v1r, ab31);
+   __m128d t1i = _mm_mul_pd(v1i, ba31);
++
++#ifdef __SSE3__
+   __m128d remove1 = _mm_addsub_pd(t1r, t1i);
++#else
++__m128i mask_fliplo = _mm_set_epi32(0, 0, 0x80000000, 0);
++__m128d t1i_fliplo = _mm_xor_pd(t1i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove1 = _mm_add_pd(t1r, t1i_fliplo);
++#endif
+ 
+   __m128d t2r = _mm_mul_pd(v1r, ab32);
+   __m128d t2i = _mm_mul_pd(v1i, ba32);
++
++#ifdef __SSE3__
+   __m128d remove2 = _mm_addsub_pd(t2r, t2i);
++#else
++__m128d t2i_fliplo = _mm_xor_pd(t2i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove2 = _mm_add_pd(t2r, t2i_fliplo);
++#endif
+ 
+   __m128d t3r = _mm_mul_pd(v1r, ab33);
+   __m128d t3i = _mm_mul_pd(v1i, ba33);
++  
++#ifdef __SSE3__
+   __m128d remove3 = _mm_addsub_pd(t3r, t3i);
++#else
++__m128d t3i_fliplo = _mm_xor_pd(t3i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove3 = _mm_add_pd(t3r, t3i_fliplo);
++#endif
+ 
+   __m128d t4r = _mm_mul_pd(v2r, ab31);
+   __m128d t4i = _mm_mul_pd(v2i, ba31);
++
++#ifdef __SSE3__
+   __m128d remove4 = _mm_addsub_pd(t4r, t4i);
++#else
++__m128d t4i_fliplo = _mm_xor_pd(t4i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove4 = _mm_add_pd(t4r, t4i_fliplo);
++#endif
+ 
+   __m128d t5r = _mm_mul_pd(v2r, ab32);
+   __m128d t5i = _mm_mul_pd(v2i, ba32);
++
++#ifdef __SSE3__
+   __m128d remove5 = _mm_addsub_pd(t5r, t5i);
++#else
++__m128d t5i_fliplo = _mm_xor_pd(t5i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove5 = _mm_add_pd(t5r, t5i_fliplo);
++#endif
+ 
+   __m128d t6r = _mm_mul_pd(v2r, ab33);
+   __m128d t6i = _mm_mul_pd(v2i, ba33);
++
++#ifdef __SSE3__
+   __m128d remove6 = _mm_addsub_pd(t6r, t6i);
++#else
++__m128d t6i_fliplo = _mm_xor_pd(t6i, _mm_castsi128_pd(mask_fliplo));
++__m128d remove6 = _mm_add_pd(t6r, t6i_fliplo);
++#endif
+ 
+   FLOPCOUNT_INCREMENT(6 * (4 + 2));
+ 
+@@ -524,11 +561,28 @@
+       __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
+       FLOPCOUNT_INCREMENT(8);
+ 
++#ifdef __SSE3__
+       __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq);
+       __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq);
++#else
++      __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0);
++      __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3);
++      __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi);
++      __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0);
++      __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3);
++      __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi);
++#endif
++
+       FLOPCOUNT_INCREMENT(4);
+ 
++#ifdef __SSE3__
+       __m128d zbc = _mm_hadd_pd(c0c1, c2c3);
++#else
++      __m128d zbc_lo = _mm_shuffle_pd(c0c1, c2c3, 0);
++      __m128d zbc_hi = _mm_shuffle_pd(c0c1, c2c3, 3);
++      __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi);
++#endif
++
+       FLOPCOUNT_INCREMENT(1);
+ 
+       _mm_store_pd(zero_buck_check, zbc);
+@@ -681,13 +735,35 @@
+       __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
+       FLOPCOUNT_INCREMENT(8);
+ 
++#ifdef __SSE3__
+       __m128d c0c1 = _mm_hadd_pd(a0b0_sq, a1b1_sq);
++#else
++      __m128d c0c1_lo = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 0);
++      __m128d c0c1_hi = _mm_shuffle_pd(a0b0_sq, a1b1_sq, 3);
++      __m128d c0c1 = _mm_add_pd(c0c1_lo, c0c1_hi);
++#endif
++
+       __m128d c0c1_normed = _mm_mul_pd(c0c1, norm2vec);
++      
++#ifdef __SSE3__
+       __m128d c2c3 = _mm_hadd_pd(a2b2_sq, a3b3_sq);
++#else
++      __m128d c2c3_lo = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 0);
++      __m128d c2c3_hi = _mm_shuffle_pd(a2b2_sq, a3b3_sq, 3);
++      __m128d c2c3 = _mm_add_pd(c2c3_lo, c2c3_hi);
++#endif
++
+       __m128d c2c3_normed = _mm_mul_pd(c2c3, norm2vec);
+       FLOPCOUNT_INCREMENT(8);
+ 
++#ifdef __SSE3__
+       __m128d zbc = _mm_hadd_pd(c0c1_normed, c2c3_normed);
++#else
++      __m128d zbc_lo = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 0);
++      __m128d zbc_hi = _mm_shuffle_pd(c0c1_normed, c2c3_normed, 3);
++      __m128d zbc = _mm_add_pd(zbc_lo, zbc_hi);
++#endif
++
+       FLOPCOUNT_INCREMENT(1);
+ 
+       _mm_store_pd(zero_buck_check, zbc);

Added: head/math/sfft/pkg-descr
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/math/sfft/pkg-descr	Thu Aug 15 23:01:27 2013	(r324787)
@@ -0,0 +1,8 @@
+sfft is a library to compute discrete Fourier transforms of signals with
+a sparse frequency domain, using an algorithm that is more efficient than
+other known FFT algorithms. It was developed by Haitham Hassanieh, Piotr
+Indyk, Dina Katabi, and Eric Price at the Computer Science and Artifical
+Intelligence Lab at MIT. Performance optimizations were developed by J.
+Schumacher at the Computer Science Department of ETH Zurich in 2013.
+
+WWW: http://spiral.net/software/sfft.html



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308152301.r7FN1RXV058436>