From owner-svn-ports-all@freebsd.org Sat Apr 2 12:58:35 2016 Return-Path: Delivered-To: svn-ports-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 99EAFB00B51; Sat, 2 Apr 2016 12:58:35 +0000 (UTC) (envelope-from kwm@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 306C812FB; Sat, 2 Apr 2016 12:58:35 +0000 (UTC) (envelope-from kwm@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u32CwYrD087461; Sat, 2 Apr 2016 12:58:34 GMT (envelope-from kwm@FreeBSD.org) Received: (from kwm@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u32CwY50087457; Sat, 2 Apr 2016 12:58:34 GMT (envelope-from kwm@FreeBSD.org) Message-Id: <201604021258.u32CwY50087457@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kwm set sender to kwm@FreeBSD.org using -f From: Koop Mast Date: Sat, 2 Apr 2016 12:58:34 +0000 (UTC) To: ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org Subject: svn commit: r412405 - in head/lang/beignet: . files X-SVN-Group: ports-head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-ports-all@freebsd.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: SVN commit messages for the ports tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 02 Apr 2016 12:58:35 -0000 Author: kwm Date: Sat Apr 2 12:58:34 2016 New Revision: 412405 URL: https://svnweb.freebsd.org/changeset/ports/412405 Log: Add two patches from upstream to support llvm37. And switch to using llvm37 so beignet is now using the same llvm version as the Mesa ports. Obtained from: beignet upstream Added: head/lang/beignet/files/llvm37-27522f9 (contents, props changed) head/lang/beignet/files/llvm37-68b5180 (contents, props changed) Modified: head/lang/beignet/Makefile head/lang/beignet/pkg-plist Modified: head/lang/beignet/Makefile ============================================================================== --- head/lang/beignet/Makefile Sat Apr 2 12:52:08 2016 (r412404) +++ head/lang/beignet/Makefile Sat Apr 2 12:58:34 2016 (r412405) @@ -3,6 +3,7 @@ PORTNAME= beignet PORTVERSION= 1.1.1 +PORTREVISION= 1 CATEGORIES= lang MASTER_SITES= https://01.org/sites/default/files/ DISTVERSIONSUFFIX= -source @@ -10,11 +11,12 @@ DISTVERSIONSUFFIX= -source MAINTAINER= x11@FreeBSD.org COMMENT= OpenCL library for Intel GPUs -BUILD_DEPENDS= clang${LLVMVER}:lang/clang${LLVMVER} \ +BUILD_DEPENDS= clang${LLVMVER}:devel/llvm${LLVMVER} \ opencl>=0:devel/opencl LIB_DEPENDS= libOpenCL.so:devel/ocl-icd \ libdrm.so:graphics/libdrm -RUN_DEPENDS= opencl>=0:devel/opencl +RUN_DEPENDS= opencl>=0:devel/opencl \ + clang${LLVMVER}:devel/llvm${LLVMVER} WRKSRC= ${WRKDIR}/Beignet-${PORTVERSION}-Source @@ -22,7 +24,7 @@ USES= cmake gmake pkgconfig shebangfix USE_XORG= sm ice x11 xext xfixes USE_GL= gl SHEBANG_FILES= src/git_sha1.sh backend/kernels/compile.sh -LLVMVER= 36 +LLVMVER= 37 CMAKE_ARGS+= -DLLVM_CONFIG_EXECUTABLE=${LOCALBASE}/bin/llvm-config${LLVMVER} @@ -35,6 +37,9 @@ BROKEN_FreeBSD_9= Beignet is only suppor #OPTIONS_DEFINE= TESTS #TESTS_DESC= Build and run the test suite +EXTRA_PATCHES= ${PATCHDIR}/llvm37-68b5180:-p1 \ + ${PATCHDIR}/llvm37-27522f9:-p1 + .include .if ${OPSYS} == FreeBSD && ${OSVERSION} >= 1000000 && ${OSVERSION} < 1002000 Added: head/lang/beignet/files/llvm37-27522f9 ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lang/beignet/files/llvm37-27522f9 Sat Apr 2 12:58:34 2016 (r412405) @@ -0,0 +1,1185 @@ +From 27522f9c83303078be7d927a23f2a43c587efc9a Mon Sep 17 00:00:00 2001 +From: Yang Rong +Date: Wed, 16 Sep 2015 16:49:35 +0800 +Subject: GBE: use opencl c to implement llvm.memset and llvm.memcpy. + +llvm 3.7 change to llvm IR, need two copies if still use the llvm IR +to implement llvm.memset and llvm.memcpy. And opencl c is more clearly. + +Signed-off-by: Yang Rong +Reviewed-by: Ruiling Song +Reviewed-by: Igor Gnatenko + +diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt +index 0cd1eef..0fffd9b 100644 +--- a/backend/src/libocl/CMakeLists.txt ++++ b/backend/src/libocl/CMakeLists.txt +@@ -52,7 +52,8 @@ FOREACH(M ${OCL_COPY_HEADERS}) + COPY_THE_HEADER(${M}) + ENDFOREACH(M) + +-SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_misc ocl_vload ocl_geometric ocl_image) ++SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy ++ ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image) + FOREACH(M ${OCL_COPY_MODULES}) + COPY_THE_HEADER(${M}) + COPY_THE_SOURCE(${M}) +@@ -181,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M) + ) + ENDMACRO(ADD_LL_TO_BC_TARGET) + +-SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz) ++SET (OCL_LL_MODULES ocl_barrier ocl_clz) + FOREACH(f ${OCL_LL_MODULES}) + COPY_THE_LL(${f}) + ADD_LL_TO_BC_TARGET(${f}) +diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h +index a4af4aa..7897567 100644 +--- a/backend/src/libocl/include/ocl.h ++++ b/backend/src/libocl/include/ocl.h +@@ -30,6 +30,7 @@ + #include "ocl_image.h" + #include "ocl_integer.h" + #include "ocl_math.h" ++#include "ocl_memcpy.h" + #include "ocl_misc.h" + #include "ocl_printf.h" + #include "ocl_relational.h" +diff --git a/backend/src/libocl/include/ocl_memcpy.h b/backend/src/libocl/include/ocl_memcpy.h +new file mode 100644 +index 0000000..2672298 +--- /dev/null ++++ b/backend/src/libocl/include/ocl_memcpy.h +@@ -0,0 +1,51 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see . ++ * ++ */ ++#ifndef __OCL_MEMCPY_H__ ++#define __OCL_MEMCPY_H__ ++#include "ocl_types.h" ++ ++///////////////////////////////////////////////////////////////////////////// ++// memcopy functions ++///////////////////////////////////////////////////////////////////////////// ++void __gen_memcpy_gg_align(__global uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_gp_align(__global uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_gl_align(__global uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_gc_align(__global uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_pg_align(__private uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_pp_align(__private uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_pl_align(__private uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_pc_align(__private uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_lg_align(__local uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_lp_align(__local uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_ll_align(__local uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_lc_align(__local uchar* dst, __constant uchar* src, size_t size); ++ ++void __gen_memcpy_gg(__global uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_gp(__global uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_gl(__global uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_gc(__global uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_pg(__private uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_pp(__private uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_pl(__private uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_pc(__private uchar* dst, __constant uchar* src, size_t size); ++void __gen_memcpy_lg(__local uchar* dst, __global uchar* src, size_t size); ++void __gen_memcpy_lp(__local uchar* dst, __private uchar* src, size_t size); ++void __gen_memcpy_ll(__local uchar* dst, __local uchar* src, size_t size); ++void __gen_memcpy_lc(__local uchar* dst, __constant uchar* src, size_t size); ++ ++#endif /* __OCL_MEMCPY_H__ */ +diff --git a/backend/src/libocl/include/ocl_memset.h b/backend/src/libocl/include/ocl_memset.h +new file mode 100644 +index 0000000..2d444ad +--- /dev/null ++++ b/backend/src/libocl/include/ocl_memset.h +@@ -0,0 +1,33 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see . ++ * ++ */ ++#ifndef __OCL_MEMSET_H__ ++#define __OCL_MEMSET_H__ ++#include "ocl_types.h" ++ ++///////////////////////////////////////////////////////////////////////////// ++// memcopy functions ++///////////////////////////////////////////////////////////////////////////// ++void __gen_memset_g_align(__global uchar* dst, uchar val, size_t size); ++void __gen_memset_p_align(__private uchar* dst, uchar val, size_t size); ++void __gen_memset_l_align(__local uchar* dst, uchar val, size_t size); ++ ++void __gen_memset_g(__global uchar* dst, uchar val, size_t size); ++void __gen_memset_p(__private uchar* dst, uchar val, size_t size); ++void __gen_memset_l(__local uchar* dst, uchar val, size_t size); ++ ++#endif /* __OCL_MEMSET_H__ */ +diff --git a/backend/src/libocl/src/ocl_memcpy.cl b/backend/src/libocl/src/ocl_memcpy.cl +new file mode 100644 +index 0000000..85f490f +--- /dev/null ++++ b/backend/src/libocl/src/ocl_memcpy.cl +@@ -0,0 +1,49 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see . ++ * ++ */ ++#include "ocl_memcpy.h" ++ ++#define DECL_TWO_SPACE_MEMCOPY_FN(NAME, DST_SPACE, SRC_SPACE) \ ++void __gen_memcpy_ ##NAME## _align (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ ++ size_t index = 0; \ ++ while((index + 4) <= size) { \ ++ *((DST_SPACE uint *)(dst + index)) = *((SRC_SPACE uint *)(src + index)); \ ++ index += 4; \ ++ } \ ++ while(index < size) { \ ++ dst[index] = src[index]; \ ++ index++; \ ++ } \ ++} \ ++void __gen_memcpy_ ##NAME (DST_SPACE uchar* dst, SRC_SPACE uchar* src, size_t size) { \ ++ size_t index = 0; \ ++ while(index < size) { \ ++ dst[index] = src[index]; \ ++ index++; \ ++ } \ ++} ++ ++#define DECL_ONE_SPACE_MEMCOPY_FN(NAME, DST_SPACE) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## g, DST_SPACE, __global) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## l, DST_SPACE, __local) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## p, DST_SPACE, __private) \ ++ DECL_TWO_SPACE_MEMCOPY_FN( NAME## c, DST_SPACE, __constant) ++ ++DECL_ONE_SPACE_MEMCOPY_FN(g, __global) ++DECL_ONE_SPACE_MEMCOPY_FN(l, __local) ++DECL_ONE_SPACE_MEMCOPY_FN(p, __private) ++ +diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll +deleted file mode 100644 +index b3fadb2..0000000 +--- a/backend/src/libocl/src/ocl_memcpy.ll ++++ /dev/null +@@ -1,729 +0,0 @@ +-;The memcpy's source code. +-; INLINE_OVERLOADABLE void __gen_memcpy_align(uchar* dst, uchar* src, size_t size) { +-; size_t index = 0; +-; while((index + 4) <= size) { +-; *((uint *)(dst + index)) = *((uint *)(src + index)); +-; index += 4; +-; } +-; while(index < size) { +-; dst[index] = src[index]; +-; index++; +-; } +-; } +- +-define void @__gen_memcpy_gg_align(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_gp_align(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_gl_align(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pg_align(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pp_align(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pl_align(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lg_align(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* +- %1 = load i32 addrspace(1)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 +- %3 = load i8 addrspace(1)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lp_align(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* +- %1 = load i32 addrspace(0)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 +- %3 = load i8 addrspace(0)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_ll_align(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* +- %1 = load i32 addrspace(3)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 +- %3 = load i8 addrspace(3)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-;The memcpy's source code. +-; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) { +-; size_t index = 0; +-; while(index < size) { +-; dst[index] = src[index]; +-; index++; +-; } +-; } +- +-define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(1)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(1)* +- %3 = load i8 addrspace(1)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(0)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(0)* +- %3 = load i8 addrspace(0)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(3)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(3)* +- %3 = load i8 addrspace(3)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* +- store i32 %1, i32 addrspace(1)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* +- store i32 %1, i32 addrspace(0)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- br label %while.cond +- +-while.cond: ; preds = %while.body, %entry +- %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] +- %add = add i32 %index.0, 4 +- %cmp = icmp ugt i32 %add, %size +- br i1 %cmp, label %while.cond3, label %while.body +- +-while.body: ; preds = %while.cond +- %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 +- %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* +- %1 = load i32 addrspace(2)* %0, align 4 +- %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 +- %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* +- store i32 %1, i32 addrspace(3)* %2, align 4 +- br label %while.cond +- +-while.cond3: ; preds = %while.cond, %while.body5 +- %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] +- %cmp4 = icmp ult i32 %index.1, %size +- br i1 %cmp4, label %while.body5, label %while.end7 +- +-while.body5: ; preds = %while.cond3 +- %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 +- %3 = load i8 addrspace(2)* %arrayidx, align 1 +- %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 +- store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 +- %inc = add i32 %index.1, 1 +- br label %while.cond3 +- +-while.end7: ; preds = %while.cond3 +- ret void +-} +- +-define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(0)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(0)* +- store i8 %3, i8 addrspace(0)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(1)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(1)* +- store i8 %3, i8 addrspace(1)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +- +-define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +-entry: +- %cmp4 = icmp eq i32 %size, 0 +- br i1 %cmp4, label %while.end, label %while.body +- +-while.body: ; preds = %entry, %while.body +- %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] +- %0 = ptrtoint i8 addrspace(2)* %src to i32 +- %1 = add i32 %0, %index.05 +- %2 = inttoptr i32 %1 to i8 addrspace(2)* +- %3 = load i8 addrspace(2)* %2, align 1 +- %4 = ptrtoint i8 addrspace(3)* %dst to i32 +- %5 = add i32 %4, %index.05 +- %6 = inttoptr i32 %5 to i8 addrspace(3)* +- store i8 %3, i8 addrspace(3)* %6, align 1 +- %inc = add i32 %index.05, 1 +- %cmp = icmp ult i32 %inc, %size +- br i1 %cmp, label %while.body, label %while.end +- +-while.end: ; preds = %while.body, %entry +- ret void +-} +diff --git a/backend/src/libocl/src/ocl_memset.cl b/backend/src/libocl/src/ocl_memset.cl +new file mode 100644 +index 0000000..b41851a +--- /dev/null ++++ b/backend/src/libocl/src/ocl_memset.cl +@@ -0,0 +1,44 @@ ++/* ++ * Copyright © 2012 - 2014 Intel Corporation ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***