From owner-svn-ports-all@freebsd.org  Sat Apr  2 12:58:35 2016
Return-Path: <owner-svn-ports-all@freebsd.org>
Delivered-To: svn-ports-all@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org
 [IPv6:2001:1900:2254:206a::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 99EAFB00B51;
 Sat,  2 Apr 2016 12:58:35 +0000 (UTC) (envelope-from kwm@FreeBSD.org)
Received: from repo.freebsd.org (repo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mx1.freebsd.org (Postfix) with ESMTPS id 306C812FB;
 Sat,  2 Apr 2016 12:58:35 +0000 (UTC) (envelope-from kwm@FreeBSD.org)
Received: from repo.freebsd.org ([127.0.1.37])
 by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u32CwYrD087461;
 Sat, 2 Apr 2016 12:58:34 GMT (envelope-from kwm@FreeBSD.org)
Received: (from kwm@localhost)
 by repo.freebsd.org (8.15.2/8.15.2/Submit) id u32CwY50087457;
 Sat, 2 Apr 2016 12:58:34 GMT (envelope-from kwm@FreeBSD.org)
Message-Id: <201604021258.u32CwY50087457@repo.freebsd.org>
X-Authentication-Warning: repo.freebsd.org: kwm set sender to kwm@FreeBSD.org
 using -f
From: Koop Mast <kwm@FreeBSD.org>
Date: Sat, 2 Apr 2016 12:58:34 +0000 (UTC)
To: ports-committers@freebsd.org, svn-ports-all@freebsd.org,
 svn-ports-head@freebsd.org
Subject: svn commit: r412405 - in head/lang/beignet: . files
X-SVN-Group: ports-head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-ports-all@freebsd.org
X-Mailman-Version: 2.1.21
Precedence: list
List-Id: SVN commit messages for the ports tree <svn-ports-all.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-ports-all>,
 <mailto:svn-ports-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-ports-all/>
List-Post: <mailto:svn-ports-all@freebsd.org>
List-Help: <mailto:svn-ports-all-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-ports-all>,
 <mailto:svn-ports-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sat, 02 Apr 2016 12:58:35 -0000

Author: kwm
Date: Sat Apr  2 12:58:34 2016
New Revision: 412405
URL: https://svnweb.freebsd.org/changeset/ports/412405

Log:
  Add two patches from upstream to support llvm37. And switch to using llvm37
  so beignet is now using the same llvm version as the Mesa ports.
  
  Obtained from:	beignet upstream

Added:
  head/lang/beignet/files/llvm37-27522f9   (contents, props changed)
  head/lang/beignet/files/llvm37-68b5180   (contents, props changed)
Modified:
  head/lang/beignet/Makefile
  head/lang/beignet/pkg-plist

Modified: head/lang/beignet/Makefile
==============================================================================
--- head/lang/beignet/Makefile	Sat Apr  2 12:52:08 2016	(r412404)
+++ head/lang/beignet/Makefile	Sat Apr  2 12:58:34 2016	(r412405)
@@ -3,6 +3,7 @@
 
 PORTNAME=	beignet
 PORTVERSION=	1.1.1
+PORTREVISION=	1
 CATEGORIES=	lang
 MASTER_SITES=	https://01.org/sites/default/files/
 DISTVERSIONSUFFIX=	-source
@@ -10,11 +11,12 @@ DISTVERSIONSUFFIX=	-source
 MAINTAINER=	x11@FreeBSD.org
 COMMENT=	OpenCL library for Intel GPUs
 
-BUILD_DEPENDS=	clang${LLVMVER}:lang/clang${LLVMVER} \
+BUILD_DEPENDS=	clang${LLVMVER}:devel/llvm${LLVMVER} \
 		opencl>=0:devel/opencl
 LIB_DEPENDS=	libOpenCL.so:devel/ocl-icd \
 		libdrm.so:graphics/libdrm
-RUN_DEPENDS=	opencl>=0:devel/opencl
+RUN_DEPENDS=	opencl>=0:devel/opencl \
+		clang${LLVMVER}:devel/llvm${LLVMVER}
 
 WRKSRC=		${WRKDIR}/Beignet-${PORTVERSION}-Source
 
@@ -22,7 +24,7 @@ USES=		cmake gmake pkgconfig shebangfix
 USE_XORG=	sm ice x11 xext xfixes
 USE_GL=		gl
 SHEBANG_FILES=	src/git_sha1.sh backend/kernels/compile.sh
-LLVMVER=	36
+LLVMVER=	37
 
 CMAKE_ARGS+=	-DLLVM_CONFIG_EXECUTABLE=${LOCALBASE}/bin/llvm-config${LLVMVER}
 
@@ -35,6 +37,9 @@ BROKEN_FreeBSD_9=	Beignet is only suppor
 #OPTIONS_DEFINE=	TESTS
 #TESTS_DESC=	Build and run the test suite
 
+EXTRA_PATCHES=	${PATCHDIR}/llvm37-68b5180:-p1 \
+		${PATCHDIR}/llvm37-27522f9:-p1		
+
 .include <bsd.port.options.mk>
 
 .if ${OPSYS} == FreeBSD && ${OSVERSION} >= 1000000 && ${OSVERSION} < 1002000

Added: head/lang/beignet/files/llvm37-27522f9
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lang/beignet/files/llvm37-27522f9	Sat Apr  2 12:58:34 2016	(r412405)
@@ -0,0 +1,1185 @@
+From 27522f9c83303078be7d927a23f2a43c587efc9a Mon Sep 17 00:00:00 2001
+From: Yang Rong <rong.r.yang@intel.com>
+Date: Wed, 16 Sep 2015 16:49:35 +0800
+Subject: GBE: use opencl c to implement llvm.memset and llvm.memcpy.
+
+llvm 3.7 change to llvm IR, need two copies if still use the llvm IR
+to implement llvm.memset and llvm.memcpy. And opencl c is more clearly.
+
+Signed-off-by: Yang Rong <rong.r.yang@intel.com>
+Reviewed-by: Ruiling Song <ruiling.song@intel.com>
+Reviewed-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
+
+diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
+index 0cd1eef..0fffd9b 100644
+--- a/backend/src/libocl/CMakeLists.txt
++++ b/backend/src/libocl/CMakeLists.txt
+@@ -52,7 +52,8 @@ FOREACH(M ${OCL_COPY_HEADERS})
+     COPY_THE_HEADER(${M})
+ ENDFOREACH(M) 
+ 
+-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image)
++SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy
++                      ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image)
+ FOREACH(M ${OCL_COPY_MODULES})
+     COPY_THE_HEADER(${M})
+     COPY_THE_SOURCE(${M})
+@@ -181,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)
+ 	)
+ ENDMACRO(ADD_LL_TO_BC_TARGET)
+ 
+-SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz)
++SET (OCL_LL_MODULES ocl_barrier ocl_clz)
+ FOREACH(f ${OCL_LL_MODULES})
+     COPY_THE_LL(${f})
+     ADD_LL_TO_BC_TARGET(${f})
+diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
+index a4af4aa..7897567 100644
+--- a/backend/src/libocl/include/ocl.h
++++ b/backend/src/libocl/include/ocl.h
+@@ -30,6 +30,7 @@
+ #include "ocl_image.h"
+ #include "ocl_integer.h"
+ #include "ocl_math.h"
++#include "ocl_memcpy.h"
+ #include "ocl_misc.h"
+ #include "ocl_printf.h"
+ #include "ocl_relational.h"
+diff --git a/backend/src/libocl/include/ocl_memcpy.h b/backend/src/libocl/include/ocl_memcpy.h
+new file mode 100644
+index 0000000..2672298
+--- /dev/null
++++ b/backend/src/libocl/include/ocl_memcpy.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#ifndef __OCL_MEMCPY_H__
++#define __OCL_MEMCPY_H__
++#include "ocl_types.h"
++
++/////////////////////////////////////////////////////////////////////////////
++// memcopy functions
++/////////////////////////////////////////////////////////////////////////////
++void __gen_memcpy_gg_align(__global uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_gp_align(__global uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_gl_align(__global uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_gc_align(__global uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_pg_align(__private uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_pp_align(__private uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_pl_align(__private uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_pc_align(__private uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_lg_align(__local uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_lp_align(__local uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_ll_align(__local uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_lc_align(__local uchar* dst, __constant uchar* src, size_t size);
++
++void __gen_memcpy_gg(__global uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_gp(__global uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_gl(__global uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_gc(__global uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_pg(__private uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_pp(__private uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_pl(__private uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_pc(__private uchar* dst, __constant uchar* src, size_t size);
++void __gen_memcpy_lg(__local uchar* dst, __global uchar* src, size_t size);
++void __gen_memcpy_lp(__local uchar* dst, __private uchar* src, size_t size);
++void __gen_memcpy_ll(__local uchar* dst, __local uchar* src, size_t size);
++void __gen_memcpy_lc(__local uchar* dst, __constant uchar* src, size_t size);
++
++#endif  /* __OCL_MEMCPY_H__ */
+diff --git a/backend/src/libocl/include/ocl_memset.h b/backend/src/libocl/include/ocl_memset.h
+new file mode 100644
+index 0000000..2d444ad
+--- /dev/null
++++ b/backend/src/libocl/include/ocl_memset.h
+@@ -0,0 +1,33 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#ifndef __OCL_MEMSET_H__
++#define __OCL_MEMSET_H__
++#include "ocl_types.h"
++
++/////////////////////////////////////////////////////////////////////////////
++// memcopy functions
++/////////////////////////////////////////////////////////////////////////////
++void __gen_memset_g_align(__global uchar* dst, uchar val, size_t size);
++void __gen_memset_p_align(__private uchar* dst, uchar val, size_t size);
++void __gen_memset_l_align(__local uchar* dst, uchar val, size_t size);
++
++void __gen_memset_g(__global uchar* dst, uchar val, size_t size);
++void __gen_memset_p(__private uchar* dst, uchar val, size_t size);
++void __gen_memset_l(__local uchar* dst, uchar val, size_t size);
++
++#endif  /* __OCL_MEMSET_H__ */
+diff --git a/backend/src/libocl/src/ocl_memcpy.cl b/backend/src/libocl/src/ocl_memcpy.cl
+new file mode 100644
+index 0000000..85f490f
+--- /dev/null
++++ b/backend/src/libocl/src/ocl_memcpy.cl
+@@ -0,0 +1,49 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++#include "ocl_memcpy.h"
++
++#define DECL_TWO_SPACE_MEMCOPY_FN(NAME, DST_SPACE, SRC_SPACE) \
++void __gen_memcpy_ ##NAME## _align (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \
++  size_t index = 0; \
++  while((index + 4) <= size) { \
++    *((DST_SPACE uint *)(dst + index)) = *((SRC_SPACE uint *)(src + index)); \
++    index += 4; \
++  } \
++  while(index < size) { \
++    dst[index] = src[index]; \
++    index++; \
++  } \
++} \
++void __gen_memcpy_ ##NAME (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \
++  size_t index = 0; \
++  while(index < size) { \
++    dst[index] = src[index]; \
++    index++; \
++  } \
++}
++
++#define DECL_ONE_SPACE_MEMCOPY_FN(NAME, DST_SPACE) \
++  DECL_TWO_SPACE_MEMCOPY_FN( NAME## g, DST_SPACE, __global) \
++  DECL_TWO_SPACE_MEMCOPY_FN( NAME## l, DST_SPACE, __local) \
++  DECL_TWO_SPACE_MEMCOPY_FN( NAME## p, DST_SPACE, __private) \
++  DECL_TWO_SPACE_MEMCOPY_FN( NAME## c, DST_SPACE, __constant)
++
++DECL_ONE_SPACE_MEMCOPY_FN(g, __global)
++DECL_ONE_SPACE_MEMCOPY_FN(l, __local)
++DECL_ONE_SPACE_MEMCOPY_FN(p, __private)
++
+diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll
+deleted file mode 100644
+index b3fadb2..0000000
+--- a/backend/src/libocl/src/ocl_memcpy.ll
++++ /dev/null
+@@ -1,729 +0,0 @@
+-;The memcpy's source code.
+-; INLINE_OVERLOADABLE void __gen_memcpy_align(uchar* dst, uchar* src, size_t size) {
+-;   size_t index = 0;
+-;   while((index + 4) <= size) {
+-;     *((uint *)(dst + index)) = *((uint *)(src + index));
+-;     index += 4;
+-;   }
+-;   while(index < size) {
+-;     dst[index] = src[index];
+-;     index++;
+-;   }
+-; }
+-
+-define void @__gen_memcpy_gg_align(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+-  %1 = load i32 addrspace(1)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+-  store i32 %1, i32 addrspace(1)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+-  %3 = load i8 addrspace(1)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gp_align(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+-  %1 = load i32 addrspace(0)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+-  store i32 %1, i32 addrspace(1)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+-  %3 = load i8 addrspace(0)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gl_align(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+-  %1 = load i32 addrspace(3)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+-  store i32 %1, i32 addrspace(1)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+-  %3 = load i8 addrspace(3)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pg_align(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+-  %1 = load i32 addrspace(1)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+-  store i32 %1, i32 addrspace(0)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+-  %3 = load i8 addrspace(1)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pp_align(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+-  %1 = load i32 addrspace(0)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+-  store i32 %1, i32 addrspace(0)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+-  %3 = load i8 addrspace(0)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pl_align(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+-  %1 = load i32 addrspace(3)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+-  store i32 %1, i32 addrspace(0)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+-  %3 = load i8 addrspace(3)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lg_align(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
+-  %1 = load i32 addrspace(1)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+-  store i32 %1, i32 addrspace(3)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1
+-  %3 = load i8 addrspace(1)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lp_align(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)*
+-  %1 = load i32 addrspace(0)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+-  store i32 %1, i32 addrspace(3)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1
+-  %3 = load i8 addrspace(0)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_ll_align(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)*
+-  %1 = load i32 addrspace(3)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+-  store i32 %1, i32 addrspace(3)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1
+-  %3 = load i8 addrspace(3)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-;The memcpy's source code.
+-; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) {
+-;   size_t index = 0;
+-;   while(index < size) {
+-;     dst[index] = src[index];
+-;     index++;
+-;   }
+-; }
+-
+-define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(1)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(1)*
+-  %3 = load i8 addrspace(1)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(1)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(1)*
+-  store i8 %3, i8 addrspace(1)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(0)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(0)*
+-  %3 = load i8 addrspace(0)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(1)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(1)*
+-  store i8 %3, i8 addrspace(1)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(3)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(3)*
+-  %3 = load i8 addrspace(3)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(1)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(1)*
+-  store i8 %3, i8 addrspace(1)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(1)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(1)*
+-  %3 = load i8 addrspace(1)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(0)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(0)*
+-  store i8 %3, i8 addrspace(0)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(0)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(0)*
+-  %3 = load i8 addrspace(0)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(0)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(0)*
+-  store i8 %3, i8 addrspace(0)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(3)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(3)*
+-  %3 = load i8 addrspace(3)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(0)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(0)*
+-  store i8 %3, i8 addrspace(0)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(1)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(1)*
+-  %3 = load i8 addrspace(1)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(3)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(3)*
+-  store i8 %3, i8 addrspace(3)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(0)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(0)*
+-  %3 = load i8 addrspace(0)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(3)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(3)*
+-  store i8 %3, i8 addrspace(3)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(3)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(3)*
+-  %3 = load i8 addrspace(3)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(3)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(3)*
+-  store i8 %3, i8 addrspace(3)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+-  %1 = load i32 addrspace(2)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
+-  store i32 %1, i32 addrspace(1)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+-  %3 = load i8 addrspace(2)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(1)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+-  %1 = load i32 addrspace(2)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)*
+-  store i32 %1, i32 addrspace(0)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+-  %3 = load i8 addrspace(2)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(0)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  br label %while.cond
+-
+-while.cond:                                       ; preds = %while.body, %entry
+-  %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ]
+-  %add = add i32 %index.0, 4
+-  %cmp = icmp ugt i32 %add, %size
+-  br i1 %cmp, label %while.cond3, label %while.body
+-
+-while.body:                                       ; preds = %while.cond
+-  %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0
+-  %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)*
+-  %1 = load i32 addrspace(2)* %0, align 4
+-  %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0
+-  %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)*
+-  store i32 %1, i32 addrspace(3)* %2, align 4
+-  br label %while.cond
+-
+-while.cond3:                                      ; preds = %while.cond, %while.body5
+-  %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ]
+-  %cmp4 = icmp ult i32 %index.1, %size
+-  br i1 %cmp4, label %while.body5, label %while.end7
+-
+-while.body5:                                      ; preds = %while.cond3
+-  %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1
+-  %3 = load i8 addrspace(2)* %arrayidx, align 1
+-  %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1
+-  store i8 %3, i8 addrspace(3)* %arrayidx6, align 1
+-  %inc = add i32 %index.1, 1
+-  br label %while.cond3
+-
+-while.end7:                                       ; preds = %while.cond3
+-  ret void
+-}
+-
+-define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(2)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(2)*
+-  %3 = load i8 addrspace(2)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(0)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(0)*
+-  store i8 %3, i8 addrspace(0)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(2)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(2)*
+-  %3 = load i8 addrspace(2)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(1)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(1)*
+-  store i8 %3, i8 addrspace(1)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+-
+-define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline {
+-entry:
+-  %cmp4 = icmp eq i32 %size, 0
+-  br i1 %cmp4, label %while.end, label %while.body
+-
+-while.body:                                       ; preds = %entry, %while.body
+-  %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+-  %0 = ptrtoint i8 addrspace(2)* %src to i32
+-  %1 = add i32 %0, %index.05
+-  %2 = inttoptr i32 %1 to i8 addrspace(2)*
+-  %3 = load i8 addrspace(2)* %2, align 1
+-  %4 = ptrtoint i8 addrspace(3)* %dst to i32
+-  %5 = add i32 %4, %index.05
+-  %6 = inttoptr i32 %5 to i8 addrspace(3)*
+-  store i8 %3, i8 addrspace(3)* %6, align 1
+-  %inc = add i32 %index.05, 1
+-  %cmp = icmp ult i32 %inc, %size
+-  br i1 %cmp, label %while.body, label %while.end
+-
+-while.end:                                        ; preds = %while.body, %entry
+-  ret void
+-}
+diff --git a/backend/src/libocl/src/ocl_memset.cl b/backend/src/libocl/src/ocl_memset.cl
+new file mode 100644
+index 0000000..b41851a
+--- /dev/null
++++ b/backend/src/libocl/src/ocl_memset.cl
+@@ -0,0 +1,44 @@
++/*
++ * Copyright © 2012 - 2014 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***