Date: Mon, 2 Mar 2009 12:44:21 +0100 From: Tomek <tomek@slimaczek.pl> To: freebsd-multimedia@freebsd.org Subject: x264 patch Message-ID: <972337066.20090302124421@slimaczek.pl>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Hello
I was trying to compile a new x264 package from source and found some
difficulties:
- there is no log2f() function in mathematical library
- some asm code (SSSE3) doesn't want to compile -- GCC 4.2.1 shipped
with FreeBSD doesn't understand such instructions like 'pmaddubsw'
I've made a patch to solve these problems:
- add a log2f() function - fast but less accurate (thanks to the Sun Microsystems)
(added files: bsdlog.h, bsdlogf.c)
- add --disable-asm-gcc-ssse3 to the configure script
How to applay:
$ git clone git://git.videolan.org/x264.git
$ patch -p0 < x264.bsd.patch
$ cd x264
$ ./configure --extra-cflags="-mmmx -msse -msse2 -msse3 -I/usr/local/include -fPIC" --extra-ldflags="-L/usr/local/lib" --enable-shared --enable-mp4-output --enable-pthread --disable-asm-gcc-ssse3
If your CPU does't have sse3 just simply remove -msse3 from --extra-cflags
$ gmake
# gmake install
$ x264 --version
x264 0.66.1115M 11863ac
built on Mar 1 2009, gcc: 4.2.1 20070719 [FreeBSD]
Tested on i386 and amd64 with FreeBSD 7.1. On big endian platforms
must be changed ieee_double_shape_type union (declared in bsdlog.h)
but I don't have such a platform to make tests.
--
Tomasz Sowa
[-- Attachment #2 --]
diff -urN x264/Makefile x264.new/Makefile
--- x264/Makefile 2009-03-01 09:05:23.000000000 +0000
+++ x264.new/Makefile 2009-03-01 09:06:13.000000000 +0000
@@ -10,7 +10,8 @@
common/quant.c common/vlc.c \
encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
encoder/set.c encoder/macroblock.c encoder/cabac.c \
- encoder/cavlc.c encoder/encoder.c
+ encoder/cavlc.c encoder/encoder.c \
+ bsdlogf.c
SRCCLI = x264.c matroska.c muxers.c
diff -urN x264/bsdlog.h x264.new/bsdlog.h
--- x264/bsdlog.h 1970-01-01 00:00:00.000000000 +0000
+++ x264.new/bsdlog.h 2009-03-01 09:06:13.000000000 +0000
@@ -0,0 +1,143 @@
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#ifndef bsdlog_headerfile
+#define bsdlog_headerfile
+
+#include <math.h>
+#include <sys/types.h>
+
+
+float log2f(float);
+
+
+
+
+#if (__BYTE_ORDER == __LITTLE_ENDIAN) && !defined(__arm__)
+
+typedef union
+{
+double value;
+struct
+{
+u_int32_t lsw;
+u_int32_t msw;
+} parts;
+} ieee_double_shape_type;
+
+#endif
+
+
+
+
+#if (__BYTE_ORDER == __BIG_ENDIAN) || defined(__arm__)
+
+/* this should be on a BIG ENDIAN platform instead of the previous code */
+/* I commented this because my compiler have a mess with macros *_ENDIAN */
+
+/*
+typedef union
+{
+double value;
+struct
+{
+u_int32_t msw;
+u_int32_t lsw;
+} parts;
+} ieee_double_shape_type;
+*/
+
+#endif
+
+
+
+
+
+#define EXTRACT_WORDS(ix0,ix1,d) \
+do { \
+ieee_double_shape_type ew_u; \
+ew_u.value = (d); \
+(ix0) = ew_u.parts.msw; \
+(ix1) = ew_u.parts.lsw; \
+} while (0)
+/* Get the more significant 32 bit int from a double. */
+
+#define GET_HIGH_WORD(i,d) \
+do { \
+ieee_double_shape_type gh_u; \
+gh_u.value = (d); \
+(i) = gh_u.parts.msw; \
+} while (0)
+
+/* Get the less significant 32 bit int from a double. */
+
+#define GET_LOW_WORD(i,d) \
+do { \
+ieee_double_shape_type gl_u; \
+gl_u.value = (d); \
+(i) = gl_u.parts.lsw; \
+} while (0)
+
+/* Set a double from two 32 bit ints. */
+
+
+/* Set the more significant 32 bits of a double from an int. */
+
+#define SET_HIGH_WORD(d,v) \
+do { \
+ieee_double_shape_type sh_u; \
+sh_u.value = (d); \
+sh_u.parts.msw = (v); \
+(d) = sh_u.value; \
+} while (0)
+
+/* Set the less significant 32 bits of a double from an int. */
+
+#define SET_LOW_WORD(d,v) \
+do { \
+ieee_double_shape_type sl_u; \
+sl_u.value = (d); \
+sl_u.parts.lsw = (v); \
+(d) = sl_u.value; \
+} while (0)
+
+
+
+/* A union which permits us to convert between a float and a 32 bit
+int. */
+
+typedef union
+{
+float value;
+u_int32_t word;
+} ieee_float_shape_type;
+
+/* Get a 32 bit int from a float. */
+
+#define GET_FLOAT_WORD(i,d) \
+do { \
+ieee_float_shape_type gf_u; \
+gf_u.value = (d); \
+(i) = gf_u.word; \
+} while (0)
+
+/* Set a float from a 32 bit int. */
+
+#define SET_FLOAT_WORD(d,i) \
+do { \
+ieee_float_shape_type sf_u; \
+sf_u.word = (i); \
+(d) = sf_u.value; \
+} while (0)
+
+
+
+#endif
diff -urN x264/bsdlogf.c x264.new/bsdlogf.c
--- x264/bsdlogf.c 1970-01-01 00:00:00.000000000 +0000
+++ x264.new/bsdlogf.c 2009-03-01 09:06:13.000000000 +0000
@@ -0,0 +1,77 @@
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@xxxxxxxxxxx
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#include "bsdlog.h"
+
+
+static const float
+ln2 = 0.6931471805599452862268,
+two25 = 3.355443200e+07, /* 0x4c000000 */
+Lg1 = 6.6666668653e-01, /* 3F2AAAAB */
+Lg2 = 4.0000000596e-01, /* 3ECCCCCD */
+Lg3 = 2.8571429849e-01, /* 3E924925 */
+Lg4 = 2.2222198546e-01, /* 3E638E29 */
+Lg5 = 1.8183572590e-01, /* 3E3A3325 */
+Lg6 = 1.5313838422e-01, /* 3E1CD04F */
+Lg7 = 1.4798198640e-01; /* 3E178897 */
+
+static const float zero = 0.0;
+
+
+
+float log2f(float x)
+{
+ float hfsq,f,s,z,R,w,t1,t2,dk;
+ int32_t k,ix,i,j;
+
+ GET_FLOAT_WORD(ix,x);
+
+ k=0;
+ if (ix < 0x00800000) { /* x < 2**-126 */
+ if ((ix&0x7fffffff)==0)
+ return -two25/zero; /* log(+-0)=-inf */
+ if (ix<0) return (x-x)/zero; /* log(-#) = NaN */
+ k -= 25; x *= two25; /* subnormal number, scale up x */
+ GET_FLOAT_WORD(ix,x);
+ }
+ if (ix >= 0x7f800000) return x+x;
+ k += (ix>>23)-127;
+ ix &= 0x007fffff;
+ i = (ix+(0x95f64<<3))&0x800000;
+ SET_FLOAT_WORD(x,ix|(i^0x3f800000)); /* normalize x or x/2 */
+ k += (i>>23);
+ dk = (float)k;
+ f = x-(float)1.0;
+ if((0x007fffff&(15+ix))<16) { /* |f| < 2**-20 */
+ if (f==zero)
+ return (dk);
+ R = f*f*((float)0.5-(float)0.33333333333333333*f);
+ return (dk-(R-f)/ln2);
+ }
+ s = f/((float)2.0+f);
+ z = s*s;
+ i = ix-(0x6147a<<3);
+ w = z*z;
+ j = (0x6b851<<3)-ix;
+ t1= w*(Lg2+w*(Lg4+w*Lg6));
+ t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+ i |= j;
+ R = t2+t1;
+ if(i>0) {
+ hfsq=(float)0.5*f*f;
+ return (dk-(hfsq-s*(hfsq+R)-f)/ln2);
+ } else
+ return (dk-((s*(f-R))-f)/ln2);
+}
diff -urN x264/common/x86/predict-c.c x264.new/common/x86/predict-c.c
--- x264/common/x86/predict-c.c 2009-03-01 09:05:24.000000000 +0000
+++ x264.new/common/x86/predict-c.c 2009-03-01 09:53:47.000000000 +0000
@@ -111,6 +111,7 @@
PREDICT_16x16_P( sse2 )
#ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
static void predict_16x16_p_ssse3( uint8_t *src )
{
int a, b, c, i00;
@@ -146,6 +147,7 @@
predict_16x16_p_core_sse2( src, i00, b, c );
}
#endif
+#endif
#define PREDICT_8x8_P(name)\
static void predict_8x8c_p_##name( uint8_t *src )\
@@ -171,6 +173,7 @@
PREDICT_8x8_P( sse2 )
#ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
static void predict_8x8c_p_ssse3( uint8_t *src )
{
int a, b, c, i00;
@@ -199,6 +202,7 @@
predict_8x8c_p_core_sse2( src, i00, b, c );
}
#endif
+#endif
#define PREDICT_16x16_DC(name)\
static void predict_16x16_dc_##name( uint8_t *src )\
@@ -395,8 +399,10 @@
return;
pf[I_PRED_16x16_H] = predict_16x16_h_ssse3;
#ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
pf[I_PRED_16x16_P] = predict_16x16_p_ssse3;
#endif
+#endif
}
void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
@@ -422,8 +428,10 @@
return;
pf[I_PRED_CHROMA_H] = predict_8x8c_h_ssse3;
#ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
pf[I_PRED_CHROMA_P] = predict_8x8c_p_ssse3;
#endif
+#endif
}
void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
diff -urN x264/configure x264.new/configure
--- x264/configure 2009-03-01 09:05:24.000000000 +0000
+++ x264.new/configure 2009-03-01 09:49:31.000000000 +0000
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/local/bin/bash
if test x"$1" = x"-h" -o x"$1" = x"--help" ; then
@@ -10,6 +10,7 @@
echo " --disable-avis-input disables avisynth input (win32 only)"
echo " --disable-mp4-output disables mp4 output (using gpac)"
echo " --disable-pthread disables multithreaded encoding"
+echo " --disable-asm-gcc-ssse3 disables SSSE3 (SSE3 extension) in GCC on x86 (used on older GCC than 4.3)"
echo " --disable-asm disables assembly optimizations on x86"
echo " --enable-debug adds -g, doesn't strip"
echo " --enable-gprof adds -pg, doesn't strip"
@@ -92,6 +93,9 @@
--enable-asm)
asm="yes"
;;
+ --disable-asm-gcc-ssse3)
+ CFLAGS="$CFLAGS -DDISABLE_ASM_GCC_SSSE3"
+ ;;
--disable-asm)
asm="no"
;;
diff -urN x264/encoder/analyse.c x264.new/encoder/analyse.c
--- x264/encoder/analyse.c 2009-03-01 09:05:24.000000000 +0000
+++ x264.new/encoder/analyse.c 2009-03-01 09:42:27.000000000 +0000
@@ -36,6 +36,7 @@
#include "ratecontrol.h"
#include "analyse.h"
#include "rdo.c"
+#include "../bsdlog.h"
typedef struct
{
diff -urN x264/version.sh x264.new/version.sh
--- x264/version.sh 2009-03-01 09:05:24.000000000 +0000
+++ x264.new/version.sh 2009-03-01 09:21:51.000000000 +0000
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/local/bin/bash
git rev-list HEAD | sort > config.git-hash
LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
if [ $LOCALVER \> 1 ] ; then
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?972337066.20090302124421>
