Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 Mar 2009 12:44:21 +0100
From:      Tomek <tomek@slimaczek.pl>
To:        freebsd-multimedia@freebsd.org
Subject:   x264 patch
Message-ID:  <972337066.20090302124421@slimaczek.pl>

next in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
Hello

I was trying to compile a new x264 package from source and found some
difficulties:
- there is no log2f() function in mathematical library
- some asm code (SSSE3) doesn't want to compile -- GCC 4.2.1 shipped
  with FreeBSD doesn't understand such instructions like 'pmaddubsw'

I've made a patch to solve these problems:
- add a log2f() function - fast but less accurate (thanks to the Sun Microsystems)
  (added files: bsdlog.h, bsdlogf.c)
- add --disable-asm-gcc-ssse3 to the configure script

How to applay:
$ git clone git://git.videolan.org/x264.git
$ patch -p0 < x264.bsd.patch
$ cd x264
$ ./configure --extra-cflags="-mmmx -msse -msse2 -msse3 -I/usr/local/include -fPIC" --extra-ldflags="-L/usr/local/lib" --enable-shared --enable-mp4-output --enable-pthread --disable-asm-gcc-ssse3
  If your CPU does't have sse3 just simply remove -msse3 from --extra-cflags
$ gmake

# gmake install

$ x264 --version
x264 0.66.1115M 11863ac
built on Mar  1 2009, gcc: 4.2.1 20070719  [FreeBSD]

Tested on i386 and amd64 with FreeBSD 7.1. On big endian platforms
must be changed ieee_double_shape_type union (declared in bsdlog.h)
but I don't have such a platform to make tests.

-- 
Tomasz Sowa

[-- Attachment #2 --]
diff -urN x264/Makefile x264.new/Makefile
--- x264/Makefile	2009-03-01 09:05:23.000000000 +0000
+++ x264.new/Makefile	2009-03-01 09:06:13.000000000 +0000
@@ -10,7 +10,8 @@
        common/quant.c common/vlc.c \
        encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
        encoder/set.c encoder/macroblock.c encoder/cabac.c \
-       encoder/cavlc.c encoder/encoder.c
+       encoder/cavlc.c encoder/encoder.c \
+       bsdlogf.c
 
 SRCCLI = x264.c matroska.c muxers.c
 
diff -urN x264/bsdlog.h x264.new/bsdlog.h
--- x264/bsdlog.h	1970-01-01 00:00:00.000000000 +0000
+++ x264.new/bsdlog.h	2009-03-01 09:06:13.000000000 +0000
@@ -0,0 +1,143 @@
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice 
+ * is preserved.
+ * ====================================================
+ */
+
+#ifndef bsdlog_headerfile
+#define bsdlog_headerfile
+
+#include <math.h>
+#include <sys/types.h>
+
+
+float  log2f(float);
+
+
+
+
+#if (__BYTE_ORDER == __LITTLE_ENDIAN) && !defined(__arm__)
+
+typedef union
+{
+double value;
+struct
+{
+u_int32_t lsw;
+u_int32_t msw;
+} parts;
+} ieee_double_shape_type;
+
+#endif 
+
+
+
+
+#if (__BYTE_ORDER == __BIG_ENDIAN) || defined(__arm__)
+
+/* this should be on a BIG ENDIAN platform instead of the previous code */
+/* I commented this because my compiler have a mess with macros *_ENDIAN */
+
+/*
+typedef union
+{
+double value;
+struct
+{
+u_int32_t msw;
+u_int32_t lsw;
+} parts;
+} ieee_double_shape_type;
+*/
+
+#endif
+
+
+
+
+
+#define EXTRACT_WORDS(ix0,ix1,d) \
+do { \
+ieee_double_shape_type ew_u; \
+ew_u.value = (d); \
+(ix0) = ew_u.parts.msw; \
+(ix1) = ew_u.parts.lsw; \
+} while (0) 
+/* Get the more significant 32 bit int from a double. */
+
+#define GET_HIGH_WORD(i,d) \
+do { \
+ieee_double_shape_type gh_u; \
+gh_u.value = (d); \
+(i) = gh_u.parts.msw; \
+} while (0)
+
+/* Get the less significant 32 bit int from a double. */
+
+#define GET_LOW_WORD(i,d) \
+do { \
+ieee_double_shape_type gl_u; \
+gl_u.value = (d); \
+(i) = gl_u.parts.lsw; \
+} while (0)
+
+/* Set a double from two 32 bit ints. */
+
+
+/* Set the more significant 32 bits of a double from an int. */
+
+#define SET_HIGH_WORD(d,v) \
+do { \
+ieee_double_shape_type sh_u; \
+sh_u.value = (d); \
+sh_u.parts.msw = (v); \
+(d) = sh_u.value; \
+} while (0)
+
+/* Set the less significant 32 bits of a double from an int. */
+
+#define SET_LOW_WORD(d,v) \
+do { \
+ieee_double_shape_type sl_u; \
+sl_u.value = (d); \
+sl_u.parts.lsw = (v); \
+(d) = sl_u.value; \
+} while (0)
+
+
+
+/* A union which permits us to convert between a float and a 32 bit
+int. */
+
+typedef union
+{
+float value;
+u_int32_t word;
+} ieee_float_shape_type;
+
+/* Get a 32 bit int from a float. */
+
+#define GET_FLOAT_WORD(i,d) \
+do { \
+ieee_float_shape_type gf_u; \
+gf_u.value = (d); \
+(i) = gf_u.word; \
+} while (0)
+
+/* Set a float from a 32 bit int. */
+
+#define SET_FLOAT_WORD(d,i) \
+do { \
+ieee_float_shape_type sf_u; \
+sf_u.word = (i); \
+(d) = sf_u.value; \
+} while (0) 
+
+
+
+#endif
diff -urN x264/bsdlogf.c x264.new/bsdlogf.c
--- x264/bsdlogf.c	1970-01-01 00:00:00.000000000 +0000
+++ x264.new/bsdlogf.c	2009-03-01 09:06:13.000000000 +0000
@@ -0,0 +1,77 @@
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@xxxxxxxxxxx
+ */
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#include "bsdlog.h"
+
+
+static const float
+ln2 = 0.6931471805599452862268, 
+two25 =    3.355443200e+07,    /* 0x4c000000 */
+Lg1 = 6.6666668653e-01,        /* 3F2AAAAB */
+Lg2 = 4.0000000596e-01,        /* 3ECCCCCD */
+Lg3 = 2.8571429849e-01, /* 3E924925 */
+Lg4 = 2.2222198546e-01, /* 3E638E29 */
+Lg5 = 1.8183572590e-01, /* 3E3A3325 */
+Lg6 = 1.5313838422e-01, /* 3E1CD04F */
+Lg7 = 1.4798198640e-01; /* 3E178897 */
+
+static const float zero   =  0.0;
+
+
+
+float log2f(float x)
+{
+       float hfsq,f,s,z,R,w,t1,t2,dk;
+       int32_t k,ix,i,j;
+
+       GET_FLOAT_WORD(ix,x);
+
+       k=0;
+       if (ix < 0x00800000) {                  /* x < 2**-126  */
+           if ((ix&0x7fffffff)==0)
+               return -two25/zero;             /* log(+-0)=-inf */
+           if (ix<0) return (x-x)/zero;        /* log(-#) = NaN */
+           k -= 25; x *= two25; /* subnormal number, scale up x */
+           GET_FLOAT_WORD(ix,x);
+       }
+       if (ix >= 0x7f800000) return x+x;
+       k += (ix>>23)-127;
+       ix &= 0x007fffff;
+       i = (ix+(0x95f64<<3))&0x800000;
+       SET_FLOAT_WORD(x,ix|(i^0x3f800000));    /* normalize x or x/2 */
+       k += (i>>23);
+       dk = (float)k;
+       f = x-(float)1.0;
+       if((0x007fffff&(15+ix))<16) {   /* |f| < 2**-20 */
+           if (f==zero) 
+                   return (dk);
+           R = f*f*((float)0.5-(float)0.33333333333333333*f);
+           return (dk-(R-f)/ln2);
+       }
+       s = f/((float)2.0+f);
+       z = s*s;
+       i = ix-(0x6147a<<3);
+       w = z*z;
+       j = (0x6b851<<3)-ix;
+       t1= w*(Lg2+w*(Lg4+w*Lg6));
+       t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+       i |= j;
+       R = t2+t1;
+       if(i>0) {
+           hfsq=(float)0.5*f*f;
+           return (dk-(hfsq-s*(hfsq+R)-f)/ln2);
+       } else
+               return (dk-((s*(f-R))-f)/ln2);
+}
diff -urN x264/common/x86/predict-c.c x264.new/common/x86/predict-c.c
--- x264/common/x86/predict-c.c	2009-03-01 09:05:24.000000000 +0000
+++ x264.new/common/x86/predict-c.c	2009-03-01 09:53:47.000000000 +0000
@@ -111,6 +111,7 @@
 PREDICT_16x16_P( sse2   )
 
 #ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
 static void predict_16x16_p_ssse3( uint8_t *src )
 {
     int a, b, c, i00;
@@ -146,6 +147,7 @@
     predict_16x16_p_core_sse2( src, i00, b, c );
 }
 #endif
+#endif
 
 #define PREDICT_8x8_P(name)\
 static void predict_8x8c_p_##name( uint8_t *src )\
@@ -171,6 +173,7 @@
 PREDICT_8x8_P( sse2   )
 
 #ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
 static void predict_8x8c_p_ssse3( uint8_t *src )
 {
     int a, b, c, i00;
@@ -199,6 +202,7 @@
     predict_8x8c_p_core_sse2( src, i00, b, c );
 }
 #endif
+#endif
 
 #define PREDICT_16x16_DC(name)\
 static void predict_16x16_dc_##name( uint8_t *src )\
@@ -395,8 +399,10 @@
         return;
     pf[I_PRED_16x16_H]      = predict_16x16_h_ssse3;
 #ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3 
     pf[I_PRED_16x16_P]      = predict_16x16_p_ssse3;
 #endif
+#endif
 }
 
 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
@@ -422,8 +428,10 @@
         return;
     pf[I_PRED_CHROMA_H]       = predict_8x8c_h_ssse3;
 #ifdef __GNUC__
+#ifndef DISABLE_ASM_GCC_SSSE3
     pf[I_PRED_CHROMA_P]       = predict_8x8c_p_ssse3;
 #endif
+#endif
 }
 
 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
diff -urN x264/configure x264.new/configure
--- x264/configure	2009-03-01 09:05:24.000000000 +0000
+++ x264.new/configure	2009-03-01 09:49:31.000000000 +0000
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/local/bin/bash
 
 if test x"$1" = x"-h" -o x"$1" = x"--help" ; then
 
@@ -10,6 +10,7 @@
 echo "  --disable-avis-input     disables avisynth input (win32 only)"
 echo "  --disable-mp4-output     disables mp4 output (using gpac)"
 echo "  --disable-pthread        disables multithreaded encoding"
+echo "  --disable-asm-gcc-ssse3  disables SSSE3 (SSE3 extension) in GCC on x86 (used on older GCC than 4.3)"
 echo "  --disable-asm            disables assembly optimizations on x86"
 echo "  --enable-debug           adds -g, doesn't strip"
 echo "  --enable-gprof           adds -pg, doesn't strip"
@@ -92,6 +93,9 @@
         --enable-asm)
             asm="yes"
             ;;
+        --disable-asm-gcc-ssse3)
+            CFLAGS="$CFLAGS -DDISABLE_ASM_GCC_SSSE3"
+            ;;
         --disable-asm)
             asm="no"
             ;;
diff -urN x264/encoder/analyse.c x264.new/encoder/analyse.c
--- x264/encoder/analyse.c	2009-03-01 09:05:24.000000000 +0000
+++ x264.new/encoder/analyse.c	2009-03-01 09:42:27.000000000 +0000
@@ -36,6 +36,7 @@
 #include "ratecontrol.h"
 #include "analyse.h"
 #include "rdo.c"
+#include "../bsdlog.h"
 
 typedef struct
 {
diff -urN x264/version.sh x264.new/version.sh
--- x264/version.sh	2009-03-01 09:05:24.000000000 +0000
+++ x264.new/version.sh	2009-03-01 09:21:51.000000000 +0000
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/local/bin/bash
 git rev-list HEAD | sort > config.git-hash
 LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
 if [ $LOCALVER \> 1 ] ; then

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?972337066.20090302124421>