Date: Mon, 18 Dec 2023 00:31:27 GMT From: Mikhail Teterin <mi@FreeBSD.org> To: ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org Subject: git: 00d132c04a29 - main - graphics/lepton: upgrade and fix package-building Message-ID: <202312180031.3BI0VRc4010447@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by mi: URL: https://cgit.FreeBSD.org/ports/commit/?id=00d132c04a294dae8b2993e987f0652152fdd8fa commit 00d132c04a294dae8b2993e987f0652152fdd8fa Author: Mikhail Teterin <mi@FreeBSD.org> AuthorDate: 2023-12-18 00:28:15 +0000 Commit: Mikhail Teterin <mi@FreeBSD.org> CommitDate: 2023-12-18 00:28:15 +0000 graphics/lepton: upgrade and fix package-building The upstream addressed our earlier BSD-specific concerns, so some of our patches are no longer needed. We now explicitly add -mssse3 to the CFLAGS. The SSSE3-capability has always been a requirement, but without it set (either explicitly or via -march), parts of the code would not compile: PR: 275197 --- graphics/lepton/Makefile | 25 ++- graphics/lepton/distinfo | 6 +- graphics/lepton/files/patch-base-dependencies | 92 +++-------- graphics/lepton/files/patch-bsd | 148 ------------------ graphics/lepton/files/patch-cpu | 217 -------------------------- graphics/lepton/files/patch-warnings | 123 +++++++++++---- 6 files changed, 137 insertions(+), 474 deletions(-) diff --git a/graphics/lepton/Makefile b/graphics/lepton/Makefile index 755138f2d056..cac23b80bf24 100644 --- a/graphics/lepton/Makefile +++ b/graphics/lepton/Makefile @@ -1,5 +1,5 @@ PORTNAME= lepton -PORTVERSION= 0.2016.07.16 +PORTVERSION= 0.2022.04.13 CATEGORIES= graphics devel MAINTAINER= mi@aldan.algebra.com @@ -8,14 +8,20 @@ WWW= https://github.com/dropbox/lepton LICENSE= APACHE20 +LIB_DEPENDS= libbrotlienc.so:archivers/brotli + +USES= autoreconf localbase ssl shebangfix python:test USE_GITHUB= yes GH_ACCOUNT= dropbox -GH_TAGNAME= a34ee2f4b0a6454eff8ebe334750dd008d57de35 +GH_TAGNAME= 429fe88 -USES= autoreconf GNU_CONFIGURE= yes +CONFIGURE_ARGS+=--enable-system-dependencies --disable-native-opt TEST_TARGET= check -CFLAGS+= -DBSD -DGIT_REVISION='\"${GH_TAGNAME}\"' +SHEBANG_FILES= test_suite/*.py +CFLAGS+= -DBSD -DGIT_REVISION='\"${GH_TAGNAME}\"' \ + -DUSE_SYSTEM_DEPENDENCIES -DUSE_SYSTEM_MD5_DEPENDENCY \ + -Wno-deprecated-declarations .ifndef WITH_DEBUG CFLAGS+= -DNDEBUG .endif @@ -24,9 +30,18 @@ PLIST_FILES= bin/lepton .if !${MACHINE_CPU:Mssse3} IGNORE= requires SSSE3 features in the processor +.else +CFLAGS+= -mssse3 .endif do-install: ${INSTALL_PROGRAM} ${WRKSRC}/lepton ${STAGEDIR}${PREFIX}/bin -.include <bsd.port.mk> +.include <bsd.port.pre.mk> + +.if ${OPENSSLBASE} != /usr +LDFLAGS+=-L${OPENSSLLIB} ${OPENSSL_LDFLAGS} +CFLAGS+=-isystem ${OPENSSLINC} +.endif + +.include <bsd.port.post.mk> diff --git a/graphics/lepton/distinfo b/graphics/lepton/distinfo index c17f3cd5735f..cf8eb2ba3d84 100644 --- a/graphics/lepton/distinfo +++ b/graphics/lepton/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1468524984 -SHA256 (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 9e1941c7cc72b50a20f4f7a9495df42f7978d4cd1b9764c74787af997bd386f9 -SIZE (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 50216342 +TIMESTAMP = 1702853189 +SHA256 (dropbox-lepton-0.2022.04.13-429fe88_GH0.tar.gz) = c6e7bba595bc5f9ece5eca86ad33b2fe1487e1a0209788173fd74e0a79d79315 +SIZE (dropbox-lepton-0.2022.04.13-429fe88_GH0.tar.gz) = 52639694 diff --git a/graphics/lepton/files/patch-base-dependencies b/graphics/lepton/files/patch-base-dependencies index bad2808b9dfd..3fbf68a27cdb 100644 --- a/graphics/lepton/files/patch-base-dependencies +++ b/graphics/lepton/files/patch-base-dependencies @@ -1,28 +1,8 @@ -Do not use the zlib and md5 implementations bundled by -the author(s). Use base-system's... +Do not use the brotli, md5, and zlib implementations bundled by +the author(s). Use port's or the base-system's... -mi -+++ src/io/ZlibCompression.hh -@@ -28,7 +28,7 @@ - */ - - #include "Reader.hh" --#include "../../dependencies/zlib/zlib.h" -+#include <zlib.h> - namespace Sirikata{ - class SIRIKATA_EXPORT ZlibDecoderDecompressionReader : public DecoderReader { - protected: -+++ src/lepton/bitops.cc -@@ -38,7 +38,7 @@ reading and writing of arrays - #include <algorithm> - #include <assert.h> - extern "C" { --#include "../../dependencies/md5/md5.h" -+#include <openssl/md5.h> - } - #include "bitops.hh" - +++ src/io/ioutil.cc @@ -12,5 +12,5 @@ #include "Reader.hh" @@ -31,69 +11,41 @@ the author(s). Use base-system's... +#include <openssl/md5.h> #ifdef _WIN32 #include <Windows.h> -+++ Makefile.am -@@ -1,13 +1,13 @@ --includes = -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder -I$(srcdir)/dependencies -I$(srcdir)/dependencies -I$(srcdir)/dependencies/xz/src/liblzma/common -I$(srcdir)/dependencies/xz/src/common -I$(srcdir)/dependencies/xz/src/liblzma/lzma -I$(srcdir)/dependencies/xz/src/liblzma/lz -I$(srcdir)/dependencies/xz/src/liblzma/check -I$(srcdir)/dependencies/xz/src/liblzma/rangecoder -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/dependencies/xz/src/liblzma/simple -I$(srcdir)/dependencies/xz/src/liblzma/delta -I$(srcdir)/dependencies/xz/src/liblzma +--- Makefile.am 2022-04-13 03:01:03.000000000 -0400 ++++ Makefile.am 2023-12-17 17:58:27.679341000 -0500 +@@ -1,3 +1,3 @@ +-includes = -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder -I$(srcdir)/dependencies -I$(srcdir)/dependencies -I$(srcdir)/dependencies/brotli/c/include -I$(srcdir)/dependencies/xz/src/liblzma/common -I$(srcdir)/dependencies/xz/src/common -I$(srcdir)/dependencies/xz/src/liblzma/lzma -I$(srcdir)/dependencies/xz/src/liblzma/lz -I$(srcdir)/dependencies/xz/src/liblzma/check -I$(srcdir)/dependencies/xz/src/liblzma/rangecoder -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/dependencies/xz/src/liblzma/simple -I$(srcdir)/dependencies/xz/src/liblzma/delta -I$(srcdir)/dependencies/xz/src/liblzma +includes = -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder --AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -DGIT_REVISION=\"$(shell git describe --dirty --always)\" $(includes) + +@@ -5,14 +5,14 @@ + liblocalmd5_a_SOURCES = dependencies/md5/md5.c + +-AM_CXXFLAGS = $(CXX11_FLAGS) $(CODEC_FLAGS) $(SYSTEM_DEPENDENCIES_CFLAGS) $(ARITHMETIC_CODER_CFLAGS) $(MEMORY_MANAGEMENT_CFLAGS) $(THREAD_FLAGS) $(BILLING_FLAGS) $(PICKY_CXXFLAGS) $(BENCHMARK_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -DGIT_REVISION=\"$(shell git describe --dirty --always 2> /dev/null || basename `pwd`)\" $(includes) +AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) $(includes) - AM_CFLAGS = $(C99_FLAGS) $(PICKY_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) + AM_CFLAGS = $(C99_FLAGS) $(CODEC_FLAGS) $(THREAD_FLAGS) $(BILLING_FLAGS) $(BENCHMARK_CFLAGS) $(PICKY_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -I$(srcdir)/dependencies/brotli/c/include --noinst_LIBRARIES = liblocalzlib.a libtestdriver.a liblocalmd5.a +-noinst_LIBRARIES = liblocalzlib.a liblocalmd5.a libtestdriver.a liblocalbrotli.a +noinst_LIBRARIES = libtestdriver.a - bin_PROGRAMS = lepton test_suite/test_invariants + bin_PROGRAMS = lepton + noinst_PROGRAMS = test_suite/test_invariants --lepton_LDADD = liblocalmd5.a liblocalzlib.a -lpthread -+lepton_LDADD = -lz -lcrypto -lpthread +-lepton_LDADD = liblocalmd5.a liblocalbrotli.a $(SYSTEM_DEPENDENCIES_LDFLAGS) -lpthread ++lepton_LDADD = -lz -lcrypto -lpthread -L${LOCALBASE}/lib -lbrotlidec -lbrotlienc lepton_SOURCES = \ -@@ -80,5 +80,4 @@ +@@ -92,5 +92,4 @@ src/vp8/model/model.cc \ src/vp8/model/model.hh \ - src/vp8/model/numeric.cc \ src/vp8/model/numeric.hh \ src/vp8/model/jpeg_meta.hh \ -@@ -94,33 +93,4 @@ - src/vp8/decoder/vpx_bool_reader.hh - --liblocalmd5_a_SOURCES = dependencies/md5/md5.c -- --liblocalzlib_a_SOURCES = dependencies/zlib/inflate.c \ --dependencies/zlib/inflate.h \ --dependencies/zlib/gzguts.h \ --dependencies/zlib/infback.c \ --dependencies/zlib/trees.c \ --dependencies/zlib/adler32.c \ --dependencies/zlib/gzclose.c \ --dependencies/zlib/inftrees.h \ --dependencies/zlib/zconf.h \ --dependencies/zlib/compress.c \ --dependencies/zlib/crc32.c \ --dependencies/zlib/crc32.h \ --dependencies/zlib/trees.h \ --dependencies/zlib/inftrees.c \ --dependencies/zlib/zutil.c \ --dependencies/zlib/gzwrite.c \ --dependencies/zlib/zutil.h \ --dependencies/zlib/zlib.h \ --dependencies/zlib/inffixed.h \ --dependencies/zlib/deflate.c \ --dependencies/zlib/inffast.h \ --dependencies/zlib/inffast.c \ --dependencies/zlib/uncompr.c \ --dependencies/zlib/gzread.c \ --dependencies/zlib/deflate.h \ --dependencies/zlib/gzlib.c -- - libtestdriver_a_SOURCES = test_suite/timing_driver.cc - -@@ -142,5 +112,5 @@ +@@ -232,5 +231,5 @@ src/io/DecoderPlatform.hh --test_suite_test_invariants_LDADD = liblocalzlib.a liblocalmd5.a -+test_suite_test_invariants_LDADD = -lz -lcrypto +-test_suite_test_invariants_LDADD = $(SYSTEM_DEPENDENCIES_LDFLAGS) liblocalmd5.a ++test_suite_test_invariants_LDADD = $(SYSTEM_DEPENDENCIES_LDFLAGS) -lcrypto - check_PROGRAMS = test_suite/test_recode_memory_bound test_suite/test_truncate_lowmem test_suite/test_android_lowmem test_suite/test_invariants test_suite/test_baseline_ujg test_suite/test_baseline test_suite/test_misc test_suite/test_iphone test_suite/test_phone_outdoor test_suite/test_truncate_ujg test_suite/test_truncate test_suite/test_SLR test_suite/test_progressive_ujg test_suite/test_progressive_disallowed test_suite/test_progressive test_suite/test_arithmetic_failfast test_suite/test_hq test_suite/test_baseline_unjailed test_suite/test_baseline_unjailed_thread test_suite/test_baseline_unjailed_decode test_suite/test_baseline_unjailed_decode_thread test_suite/test_seccomp_encode_main test_suite/test_seccomp_encode_thread test_suite/test_seccomp_decode_main test_suite/test_seccomp_decode_thread test_suite/test_nofsync test_suite/test_colorswap test_suite/test_odd_rst test_suite/test_trailing_header test_suite/test_trailing_rst test_suite/test_gray2sf test_suite/test_truncated _zero_run test_suite/test_bad_zero_run + check_PROGRAMS = test_suite/test_recode_memory_bound test_suite/test_truncate_lowmem test_suite/test_android_lowmem test_suite/test_invariants test_suite/test_baseline_ujg test_suite/test_baseline test_suite/test_misc test_suite/test_iphone test_suite/test_phone_outdoor test_suite/test_truncate test_suite/test_single_row_truncate test_suite/test_SLR test_suite/test_progressive_ujg test_suite/test_progressive_disallowed test_suite/test_progressive test_suite/test_arithmetic_failfast test_suite/test_hq test_suite/test_baseline_unjailed test_suite/test_baseline_unjailed_thread test_suite/test_baseline_unjailed_decode test_suite/test_baseline_unjailed_decode_thread test_suite/test_seccomp_encode_main test_suite/test_seccomp_encode_thread test_suite/test_seccomp_decode_main test_suite/test_seccomp_decode_thread test_suite/test_nofsync test_suite/test_colorswap test_suite/test_odd_rst test_suite/test_trailing_header test_suite/test_trailing_rst test_suite/test_gray2sf test_suite/test_tr uncated_zero_run test_suite/test_bad_zero_run diff --git a/graphics/lepton/files/patch-bsd b/graphics/lepton/files/patch-bsd deleted file mode 100644 index cac857193116..000000000000 --- a/graphics/lepton/files/patch-bsd +++ /dev/null @@ -1,148 +0,0 @@ -Make buildable on BSD-systems other than Apple. Mostly this -simply replaces #ifdef __APPLE__ with #ifdef BSD, but not -only... - - -mi - -+++ src/io/DecoderPlatform.hh -@@ -26,7 +26,7 @@ - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ --#if defined (__linux) || defined (__APPLE__) -+#if defined (__linux) || defined (BSD) - #define SIRIKATA_FUNCTION_EXPORT __attribute__ ((visibility("default"))) - #define SIRIKATA_EXPORT __attribute__ ((visibility("default"))) - #define SIRIKATA_PLUGIN_EXPORT __attribute__ ((visibility("default"))) -+++ src/io/MemMgrAllocator.cc -@@ -41,7 +41,7 @@ - #include <cstdint> - #include "DecoderPlatform.hh" - #include "MemMgrAllocator.hh" --#if (defined(__APPLE__) || __cplusplus <= 199711L) && !defined(_WIN32) -+#if (defined(BSD) || __cplusplus <= 199711L) && !defined(_WIN32) - #define THREAD_LOCAL_STORAGE __thread - #else - #include <atomic> -@@ -151,7 +151,7 @@ void setup_memmgr(MemMgrState& memmgr, u - } - void memmgr_init(size_t main_thread_pool_size, size_t worker_thread_pool_size, size_t num_workers, size_t x_min_pool_alloc_quantas, bool needs_huge_pages) - { --#ifdef __APPLE__ -+#ifdef BSD - // in apple, the thread_local storage winds up different when destroying the thread - num_workers *= 2; - #endif -+++ src/lepton/fork_serve.cc -@@ -10,7 +10,7 @@ - #include <fcntl.h> - #include <unistd.h> - #include <algorithm> --#ifndef __APPLE__ -+#ifndef BSD - #include <wait.h> - #else - #include <sys/wait.h> -+++ src/lepton/socket_serve.cc -@@ -11,7 +11,7 @@ - #include <algorithm> - #include <netinet/in.h> - #include <sys/time.h> --#ifndef __APPLE__ -+#ifndef BSD - #include <sys/signalfd.h> - #include <wait.h> - #else -@@ -127,7 +127,7 @@ int should_wait_bitmask(size_t children_ - - int make_sigchld_fd() { - int fd = -1; --#ifndef __APPLE__ -+#ifndef BSD - sigset_t sigset; - int err = sigemptyset(&sigset); - always_assert(err == 0); -@@ -233,7 +233,7 @@ void serving_loop(int unix_domain_socket - if (fds[i].revents & POLLIN) { - fds[i].revents = 0; - if (fds[i].fd == sigchild_fd) { --#ifndef __APPLE__ -+#ifndef BSD - struct signalfd_siginfo info; - ssize_t ignore = read(fds[i].fd, &info, sizeof(info)); - (void)ignore; -+++ src/vp8/model/jpeg_meta.hh -@@ -3,7 +3,7 @@ - - #include <vector> - #include <type_traits> --#ifndef __APPLE__ -+#ifndef BSD - #ifndef _WIN32 - #include <endian.h> - #endif -+++ src/vp8/util/memory.cc -@@ -13,7 +13,7 @@ - #ifdef _WIN32 - #define USE_STANDARD_MEMORY_ALLOCATORS - #endif --#if defined(__APPLE__) || (__cplusplus <= 199711L && !defined(_WIN32)) -+#if defined(BSD) || (__cplusplus <= 199711L && !defined(_WIN32)) - #define THREAD_LOCAL_STORAGE __thread - #else - #define THREAD_LOCAL_STORAGE thread_local -+++ src/vp8/util/vpx_config.hh -@@ -44,6 +44,6 @@ - #ifndef _BSD_SOURCE - #define _BSD_SOURCE /* See feature_test_macros(7) */ - #endif --#include <endian.h> -+#include <sys/endian.h> - #endif - #endif -+++ test_suite/timing_driver.cc -@@ -6,11 +6,13 @@ - #include <unistd.h> - #include <assert.h> - #include <errno.h> -+#include <stdlib.h> - #include <string.h> - #include <sys/types.h> - #include <sys/wait.h> - #include <fcntl.h> - #include <sys/select.h> -+#include <sys/signal.h> - #include <sys/time.h> - #include "smalljpg.hh" - -+++ src/lepton/jpgcoder.cc -@@ -77,11 +77,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI - int g_argc = 0; - const char** g_argv = NULL; - #ifndef GIT_REVISION --#include "version.hh" --#ifndef GIT_REVISION - #define GIT_REVISION "unknown" - #endif --#endif - bool fast_exit = true; - #ifdef SKIP_VALIDATION - bool g_skip_validation = true; -+++ src/io/ioutil.cc -@@ -325,13 +325,13 @@ - int input_tee_flags = 0; - int copy_to_storage_flags = 0; --#ifndef __APPLE__ -+#ifndef BSD - input_tee_flags = fcntl(input_tee, F_GETFL, 0); - #endif - fcntl(input_tee, F_SETFL, input_tee_flags | O_NONBLOCK); --#ifndef __APPLE__ -+#ifndef BSD - copy_to_input_tee_flags = fcntl(copy_to_input_tee, F_GETFL, 0); - #endif - fcntl(copy_to_input_tee, F_SETFL, copy_to_input_tee_flags | O_NONBLOCK); --#ifndef __APPLE__ -+#ifndef BSD - copy_to_storage_flags = fcntl(copy_to_storage, F_GETFL, 0); - #endif diff --git a/graphics/lepton/files/patch-cpu b/graphics/lepton/files/patch-cpu deleted file mode 100644 index 4ee98d914194..000000000000 --- a/graphics/lepton/files/patch-cpu +++ /dev/null @@ -1,217 +0,0 @@ -Make -- or attempt to -- the code work on CPUs with only SSSE3 -instruction set... - - -mi - ---- src/lepton/idct.cc -+++ src/lepton/idct.cc -@@ -1,8 +1,7 @@ - /* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ --#include <emmintrin.h> --#include <smmintrin.h> - #include <immintrin.h> - #include "../vp8/util/aligned_block.hh" -+#include "../vp8/util/mm_mullo_epi32.hh" - - namespace idct_local{ - enum { -@@ -23,7 +21,10 @@ enum { - r2 = 181 // 256/sqrt(2) - }; - } --void idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) { -+ -+#ifndef __SSE2__ -+static void -+idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) { - int32_t intermed[64]; - using namespace idct_local; - // Horizontal 1-D IDCT. -@@ -149,6 +150,8 @@ void idct_scalar(const AlignedBlock &blo - //outp[i]>>=3; - } - } -+#else /* At least SSE2 is available { */ -+ - template<int which_vec, int offset, int stride> __m128i vget_raster(const AlignedBlock&block) { - return _mm_set_epi32(block.coefficients_raster(which_vec + 3 * stride + offset), - block.coefficients_raster(which_vec + 2 * stride + offset), -@@ -162,8 +165,8 @@ template<int offset, int stride> __m128i - q[which_vec + offset])); - } - -- --__m128i epi32l_to_epi16(__m128i lowvec) { -+static __m128i -+epi32l_to_epi16(__m128i lowvec) { - return _mm_shuffle_epi8(lowvec, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, - 0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0)); - } -@@ -181,9 +184,8 @@ __m128i epi32l_to_epi16(__m128i lowvec) - }while(0) - - -- -- --void idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { -+static void -+idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { - - char vintermed_storage[64 * sizeof(int32_t) + 16]; - // align intermediate storage to 16 bytes -@@ -202,7 +204,12 @@ void idct_sse(const AlignedBlock &block, - xv6 = vget_raster<0, 5, 8>(block); - xv7 = vget_raster<0, 3, 8>(block); - if (__builtin_expect(ignore_dc, true)) { -+#ifdef __SSE4_1__ - xv0 = _mm_insert_epi32(xv0, 0, 0); -+#else -+// See http://stackoverflow.com/questions/38384520/is-there-a-sse2-equivalent-for-mm-insert-epi32 -+ xv0 = _mm_and_si128(xv0, _mm_set_epi32(-1,-1,-1, 0)); -+#endif - } - } else { - xv0 = vget_raster<32, 0, 8>(block); -@@ -378,7 +385,8 @@ __m128i m256_to_epi16(__m256i vec) { - - }*/ - #if __AVX2__ --void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { -+static void -+idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { - // align intermediate storage to 16 bytes - using namespace idct_local; - // Horizontal 1-D IDCT. -@@ -589,11 +597,16 @@ void idct_avx(const AlignedBlock &block, - #endif - } - } --#else --void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { -- idct_sse(block, q, voutp, ignore_dc); --} - #endif --void idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { -+#endif /* } SSE2 or higher is available */ -+ -+void -+idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) { -+#ifdef __AVX2__ - idct_avx(block, q, voutp, ignore_dc); -+#elif __SSE2__ -+ idct_sse(block, q, voutp, ignore_dc); -+#else -+ idct_scalar(block, q, voutp, ignore_dc); -+#endif - } ---- src/lepton/vp8_encoder.cc -+++ src/lepton/vp8_encoder.cc -@@ -150,29 +150,34 @@ void VP8ComponentEncoder::process_row(Pr - } - } - uint32_t aligned_block_cost(const AlignedBlock &block) { -- uint32_t cost = 16; // .25 cost for zeros -- if (VECTORIZE) { -- for (int i = 0; i < 64; i+= 8) { -- __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i))); -- __m128i v_cost = _mm_set1_epi16(0); -- while (!_mm_test_all_zeros(val, val)) { -- __m128i mask = _mm_cmpgt_epi16(val, _mm_setzero_si128()); -- v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2))); -- val = _mm_srli_epi16(val, 1); -- } -- __m128i sum = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8)); -- sum = _mm_add_epi16(sum ,_mm_srli_si128(sum, 4)); -- sum = _mm_add_epi16(sum, _mm_srli_si128(sum, 2)); -- cost += _mm_extract_epi16(sum, 0); -- } -- } else { -- uint32_t scost = 0; -- for (int i = 0; i < 64; ++i) { -- scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i])); -+#ifdef __SSE2__ /* SSE2 or higher instruction set available { */ -+ const __m128i zero = _mm_setzero_si128(); -+ __m128i v_cost; -+ for (int i = 0; i < 64; i+= 8) { -+ __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i))); -+ v_cost = _mm_set1_epi16(0); -+#ifndef __SSE4_1__ -+ while (_mm_movemask_epi8(_mm_cmpeq_epi32(val, zero)) != 0xFFFF) -+#else -+ while (!_mm_test_all_zeros(val, val)) -+#endif -+ { -+ __m128i mask = _mm_cmpgt_epi16(val, zero); -+ v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2))); -+ val = _mm_srli_epi16(val, 1); - } -- cost = scost; -+ v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8)); -+ v_cost = _mm_add_epi16(v_cost ,_mm_srli_si128(v_cost, 4)); -+ v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 2)); - } -- return cost; -+ return 16 + _mm_extract_epi16(v_cost, 0); -+#else /* } No SSE2 instructions { */ -+ uint32_t scost = 0; -+ for (int i = 0; i < 64; ++i) { -+ scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i])); -+ } -+ return scost; -+#endif /* } */ - } - - #ifdef ALLOW_FOUR_COLORS ---- src/vp8/model/model.hh -+++ src/vp8/model/model.hh -@@ -11,9 +11,7 @@ - #include "branch.hh" - #include "../util/aligned_block.hh" - #include "../util/block_based_image.hh" --#include <smmintrin.h> --#include <immintrin.h> --#include <emmintrin.h> -+#include "../util/mm_mullo_epi32.hh" - - class BoolEncoder; - constexpr bool advanced_dc_prediction = true; ---- src/vp8/model/numeric.hh -+++ src/vp8/model/numeric.hh -@@ -8,8 +8,8 @@ - // for std::min - #include <algorithm> - #include <assert.h> --#include <smmintrin.h> --#include <emmintrin.h> -+#include <immintrin.h> -+#include "../util/mm_mullo_epi32.hh" - - #ifdef _WIN32 - #include <intrin.h> ---- src/vp8/util/mm_mullo_epi32.hh -+++ src/vp8/util/mm_mullo_epi32.hh -@@ -0,0 +1,16 @@ -+#if defined(__SSE2__) && !defined(__SSE4_1__) && !defined(MM_MULLO_EPI32_H) -+#define MM_MULLO_EPI32_H -+#include <immintrin.h> -+// See: http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers -+// and https://software.intel.com/en-us/forums/intel-c-compiler/topic/288768 -+static inline __m128i -+_mm_mullo_epi32(const __m128i &a, const __m128i &b) -+{ -+ __m128i tmp1 = _mm_mul_epu32(a,b); /* mul 2,0*/ -+ __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a,4), -+ _mm_srli_si128(b,4)); /* mul 3,1 */ -+ return _mm_unpacklo_epi32( /* shuffle results to [63..0] and pack */ -+ _mm_shuffle_epi32(tmp1, _MM_SHUFFLE (0,0,2,0)), -+ _mm_shuffle_epi32(tmp2, _MM_SHUFFLE (0,0,2,0))); -+} -+#endif -+++ src/lepton/recoder.cc -@@ -99,5 +99,5 @@ - - static bool aligned_memchr16ff(const unsigned char *local_huff_data) { --#if 1 -+#if !defined(__i386__) - __m128i buf = _mm_load_si128((__m128i const*)local_huff_data); - __m128i ff = _mm_set1_epi8(-1); diff --git a/graphics/lepton/files/patch-warnings b/graphics/lepton/files/patch-warnings index ed3155b80426..992380cb67b5 100644 --- a/graphics/lepton/files/patch-warnings +++ b/graphics/lepton/files/patch-warnings @@ -1,19 +1,4 @@ -+++ src/io/MemMgrAllocator.cc -@@ -185,5 +185,5 @@ - } - if (!data) { -- fprintf(stderr, "Insufficient memory: unable to mmap or calloc %ld bytes\n", total_size); -+ fprintf(stderr, "Insufficient memory: unable to mmap or calloc %zu bytes\n", total_size); - fflush(stderr); - exit(37); +++ test_suite/timing_driver.cc -@@ -79,5 +81,5 @@ - } - } else { -- fprintf(stderr, "Files differ in size %ld != %ld\n", data_size, roundtrip_size); -+ fprintf(stderr, "Files differ in size %zu != %zu\n", data_size, roundtrip_size); - } - int status; @@ -396,5 +398,5 @@ leptonBuffer.size()); if (result != testImage.size()) { @@ -21,13 +6,6 @@ + fprintf(stderr, "Output Size %zu != %zu\n", result, testImage.size()); } always_assert(result == (size_t)testImage.size() && -@@ -508,5 +510,5 @@ - for (std::vector<const char *>::const_iterator filename = filenames.begin(); filename != filenames.end(); ++filename) { - testImage = load(*filename); -- fprintf(stderr, "Loading iPhone %ld\n", testImage.size()); -+ fprintf(stderr, "Loading iPhone %u\n", testImage.size()); - int retval = run_test(testImage, - use_lepton, jailed, inject_syscall_level, allow_progressive_files, multithread, +++ src/lepton/validation.cc @@ -159,5 +159,5 @@ } @@ -40,13 +18,96 @@ @@ -339,3 +339,3 @@ static_assert(sizeof(buffer) >= header.size(), "Buffer must be able to hold header"); - uint32_t cursor = 0; -+ ssize_t cursor = 0; ++ size_t cursor = 0; bool finished = false; -+++ src/lepton/jpgcoder.cc -@@ -1101,5 +1098,5 @@ - if (false) { - fprintf(stderr, -- "Predicted Decompress %ld\nAllocated This Run %ld vs Max allocated %ld\nMax Peak Size %ld vs %ld\naug-gbg %ld, garbage %ld\nbit_writer %ld\nmux %d\n", -+ "Predicted Decompress %zu\nAllocated This Run %zu vs Max allocated %zu\nMax Peak Size %zu vs %zu\naug-gbg %zu, garbage %zu\nbit_writer %zu\nmux %d\n", - decom_memory_bound, - Sirikata::memmgr_size_allocated(), +@@ -576,5 +576,5 @@ + if (del > 0) { + //fprintf(stderr, "D\n"); +- if (del < cursor) { ++ if ((size_t)del < cursor) { + //fprintf(stderr, "E %ld %ld\n", del, cursor - del); + memmove(buffer, buffer + del, cursor - del); +--- src/vp8/encoder/encoder.cc 2022-04-13 03:01:03.000000000 -0400 ++++ src/vp8/encoder/encoder.cc 2023-12-17 18:33:33.889333000 -0500 +@@ -380,12 +380,4 @@ + } + +- double delta = 0; +- for (int i = 0; i < 64; ++i) { +- delta += outp[i] - outp_sans_dc[i]; +- //fprintf (stderr, "%d + %d = %d\n", outp_sans_dc[i], context.here().dc(), outp[i]); +- } +- delta /= 64; +- //fprintf (stderr, "==== %f = %f =?= %d\n", delta, delta * 8, context.here().dc()); +- + int debug_width = LeptonDebug::getDebugWidth((int)color); + int offset = k_debug_block[(int)color]; +--- test_suite/test_invariants.cc 2022-04-13 03:01:03.000000000 -0400 ++++ test_suite/test_invariants.cc 2023-12-17 18:37:42.915625000 -0500 +@@ -464,5 +464,5 @@ + uint8_t* d =&aligned7d.at(0, 2, 1, 3, 2, 1, 0); + *d = 4; +- size_t offset = d - (uint8_t*)nullptr; ++ size_t offset = (uintptr_t)d; + always_assert(0 == (offset & 15) && "Must have alignment"); + always_assert(aligned7d.at(0, 2, 1, 3, 2, 1, 0) == 4); +@@ -470,5 +470,5 @@ + uint8_t* d2 =&a7.at(0, 2, 1, 3, 2, 1, 0); + *d2 = 5; +- offset = d2 - (uint8_t*)nullptr; ++ offset = (uintptr_t)d2; + if (offset & 15) { + fprintf(stderr, "Array7d array doesn't require alignment"); +--- src/vp8/model/model.cc 2022-04-13 03:01:03.000000000 -0400 ++++ src/vp8/model/model.cc 2023-12-17 18:40:51.433134000 -0500 +@@ -44,8 +44,8 @@ + __m256i r1 = _mm256_loadu_si256((const __m256i*)(data + 32)); + __m256i r2 = _mm256_loadu_si256((const __m256i*)(data + 64)); +- size_t offset = data - (char*)0; ++ size_t offset = (uintptr_t)data; + size_t align = 32 - (offset % 32); + char * dataend = (char*)end; +- size_t offsetend = dataend - (char*)0; ++ size_t offsetend = (uintptr_t)dataend; + __m256i *write_end = (__m256i*)(dataend - (offsetend % 32)); + __m256i *write_cursor = (__m256i*)(data + align); +--- src/vp8/util/block_based_image.hh 2022-04-13 03:01:03.000000000 -0400 ++++ src/vp8/util/block_based_image.hh 2023-12-17 18:41:56.054633000 -0500 +@@ -67,5 +67,5 @@ + nblocks_ = nblocks; + storage_ = (uint8_t*)custom_calloc(nblocks * sizeof(Block) + 31); +- size_t offset = storage_ - (uint8_t*)nullptr; ++ size_t offset = (uintptr_t)storage_; + if (offset & 31) { //needs alignment adjustment + image_ = (Block*)(storage_ + 32 - (offset & 31)); +--- src/lepton/idct.cc 2022-04-13 03:01:03.000000000 -0400 ++++ src/lepton/idct.cc 2023-12-17 18:46:58.471208000 -0500 +@@ -198,5 +198,5 @@ + char vintermed_storage[64 * sizeof(int32_t) + 16]; + // align intermediate storage to 16 bytes +- int32_t *vintermed = (int32_t*) (vintermed_storage + 16 - ((vintermed_storage - (char*)nullptr) &0xf)); ++ int32_t *vintermed = (int32_t*) (vintermed_storage + 16 - ((uintptr_t)vintermed_storage &0xf)); + using namespace idct_local; + // Horizontal 1-D IDCT. +--- src/io/Zlib0.hh 2022-04-13 03:01:03.000000000 -0400 ++++ src/io/Zlib0.hh 2023-12-17 18:48:25.076584000 -0500 +@@ -39,5 +39,4 @@ + uint32_t mAdler32; // adler32 sum + bool mClosed; +- uint16_t mBilledBytesLeft; + std::pair<uint32, JpegError> writeHeader(); + +--- src/lepton/jpgcoder.cc 2022-04-13 03:01:03.000000000 -0400 ++++ src/lepton/jpgcoder.cc 2023-12-17 18:58:02.650346000 -0500 +@@ -1245,11 +1245,4 @@ + current_run_size = cumulative_buffer_size; + +- size_t bit_writer_augmentation = 0; +- if (g_allow_progressive) { +- for (size_t cur_size = jpgfilesize - 1; cur_size; cur_size >>=1) { +- bit_writer_augmentation |= cur_size; +- } +- bit_writer_augmentation += 1; // this is used to compute the buffer size of the abit_writer for writing +- } + size_t garbage_augmentation = 0; + for (size_t cur_size = hdrs - 1; cur_size; cur_size >>=1) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202312180031.3BI0VRc4010447>