Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 18 Mar 2020 18:35:00 +0000 (UTC)
From:      Li-Wen Hsu <lwhsu@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r528660 - in head/editors/libreoffice: . files
Message-ID:  <202003181835.02IIZ00T021153@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: lwhsu
Date: Wed Mar 18 18:35:00 2020
New Revision: 528660
URL: https://svnweb.freebsd.org/changeset/ports/528660

Log:
  Fix build with clang10
  
  PR:		244850
  Reported by:	cy
  Submitted by:	Trond.Endrestol@ximalas.info
  Obtained from:	https://github.com/LibreOffice/core (partially)

Added:
  head/editors/libreoffice/files/patch-clang10   (contents, props changed)
Modified:
  head/editors/libreoffice/Makefile

Modified: head/editors/libreoffice/Makefile
==============================================================================
--- head/editors/libreoffice/Makefile	Wed Mar 18 17:54:29 2020	(r528659)
+++ head/editors/libreoffice/Makefile	Wed Mar 18 18:35:00 2020	(r528660)
@@ -1,6 +1,6 @@
 # $FreeBSD$
 
-PORTREVISION=	3
+PORTREVISION=	4
 
 .include "${.CURDIR}/Makefile.common"
 

Added: head/editors/libreoffice/files/patch-clang10
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/editors/libreoffice/files/patch-clang10	Wed Mar 18 18:35:00 2020	(r528660)
@@ -0,0 +1,931 @@
+--- vcl/inc/unx/saltype.h	2019-12-05 20:59:23.000000000 +0100
++++ vcl/inc/unx/saltype.h	2020-03-17 18:23:05.585171000 +0100
+@@ -18,8 +18,8 @@
+ public:
+     explicit SalX11Screen(unsigned int nXScreen) : mnXScreen( nXScreen ) {}
+     unsigned int getXScreen() const { return mnXScreen; }
+-    bool operator==(const SalX11Screen &rOther) { return rOther.mnXScreen == mnXScreen; }
+-    bool operator!=(const SalX11Screen &rOther) { return rOther.mnXScreen != mnXScreen; }
++    bool operator==(const SalX11Screen &rOther) const { return rOther.mnXScreen == mnXScreen; }
++    bool operator!=(const SalX11Screen &rOther) const { return rOther.mnXScreen != mnXScreen; }
+ };
+ 
+ #endif // INCLUDED_VCL_INC_UNX_SALTYPE_H
+--- sd/source/ui/framework/factories/BasicPaneFactory.cxx	2019-12-05 20:59:23.000000000 +0100
++++ sd/source/ui/framework/factories/BasicPaneFactory.cxx	2020-03-17 20:51:22.331805000 +0100
+@@ -324,7 +324,7 @@
+ void SAL_CALL BasicPaneFactory::disposing (
+     const lang::EventObject& rEventObject)
+ {
+-    if (mxConfigurationControllerWeak == rEventObject.Source)
++    if (mxConfigurationControllerWeak.get() == rEventObject.Source)
+     {
+         mxConfigurationControllerWeak.clear();
+     }
+--- sd/inc/OutlinerIterator.hxx	2019-12-05 20:59:23.000000000 +0100
++++ sd/inc/OutlinerIterator.hxx	2020-03-17 21:20:36.906085000 +0100
+@@ -122,7 +122,7 @@
+         @return
+             Returns <TRUE/> when both iterators point to the same object.
+     */
+-    bool operator== (const Iterator& rIterator);
++    bool operator== (const Iterator& rIterator) const;
+     /** Test whether two iterators point to different objects.  This is just
+         the negation of the result of the equality operator.
+         @param rIterator
+@@ -130,7 +130,7 @@
+         @return
+             Returns <TRUE/> when both iterators point to the different objects.
+     */
+-    bool operator!= (const Iterator& rIterator);
++    bool operator!= (const Iterator& rIterator) const;
+     /** Reverse the direction of iteration.  The position of the iterator is
+         not changed.  Thus calling this method twice returns to the old state.
+     */
+--- sd/source/ui/view/OutlinerIterator.cxx.orig	2019-12-05 20:59:23.000000000 +0100
++++ sd/source/ui/view/OutlinerIterator.cxx	2020-03-17 21:24:11.082383000 +0100
+@@ -110,7 +110,7 @@
+     return *this;
+ }
+ 
+-bool Iterator::operator== (const Iterator& rIterator)
++bool Iterator::operator== (const Iterator& rIterator) const
+ {
+     if (!mxIterator || !rIterator.mxIterator)
+         return mxIterator.get() == rIterator.mxIterator.get();
+@@ -118,7 +118,7 @@
+         return *mxIterator == *rIterator.mxIterator;
+ }
+ 
+-bool Iterator::operator!= (const Iterator& rIterator)
++bool Iterator::operator!= (const Iterator& rIterator) const
+ {
+     return ! operator==(rIterator);
+ }
+--- compilerplugins/clang/simplifybool.cxx	2019-12-05 20:59:23.000000000 +0100
++++ compilerplugins/clang/simplifybool.cxx	2020-03-17 22:03:11.369300000 +0100
+@@ -241,7 +241,30 @@
+             << expr->getSourceRange();
+         return true;
+     }
+-    if (auto binaryOp = dyn_cast<BinaryOperator>(expr->getSubExpr()->IgnoreParenImpCasts())) {
++    auto sub = expr->getSubExpr()->IgnoreParenImpCasts();
++    auto reversed = false;
++#if CLANG_VERSION >= 100000
++    if (auto const rewritten = dyn_cast<CXXRewrittenBinaryOperator>(sub)) {
++        if (rewritten->isReversed()) {
++            if (rewritten->getOperator() == BO_EQ) {
++                auto const sem = rewritten->getSemanticForm();
++                bool match;
++                if (auto const op1 = dyn_cast<BinaryOperator>(sem)) {
++                    match = op1->getOpcode() == BO_EQ;
++                } else if (auto const op2 = dyn_cast<CXXOperatorCallExpr>(sem)) {
++                    match = op2->getOperator() == OO_EqualEqual;
++                } else {
++                    match = false;
++                }
++                if (match) {
++                    sub = sem;
++                    reversed = true;
++                }
++            }
++        }
++    }
++#endif
++    if (auto binaryOp = dyn_cast<BinaryOperator>(sub)) {
+         // Ignore macros, otherwise
+         //    OSL_ENSURE(!b, ...);
+         // triggers.
+@@ -289,7 +312,7 @@
+                     << binaryOp->getSourceRange();
+         }
+     }
+-    if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(expr->getSubExpr()->IgnoreParenImpCasts())) {
++    if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(sub)) {
+         // Ignore macros, otherwise
+         //    OSL_ENSURE(!b, ...);
+         // triggers.
+@@ -301,8 +324,8 @@
+         if (!(op == OO_EqualEqual || op == OO_ExclaimEqual))
+             return true;
+         BinaryOperator::Opcode negatedOpcode = BinaryOperator::negateComparisonOp(BinaryOperator::getOverloadedOpcode(op));
+-        auto lhs = binaryOp->getArg(0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
+-        auto rhs = binaryOp->getArg(1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
++        auto lhs = binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
++        auto rhs = binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType();
+         auto const negOp = findOperator(compiler, negatedOpcode, lhs, rhs);
+         if (!negOp)
+             return true;
+@@ -323,8 +346,10 @@
+             << expr->getSourceRange();
+         if (negOp != ASSUME_OPERATOR_EXISTS)
+             report(
+-                DiagnosticsEngine::Note, "the presumed corresponding negated operator is declared here",
++                DiagnosticsEngine::Note, "the presumed corresponding negated operator for %0 and %1 is declared here",
+                 negOp->getLocation())
++                << binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType()
++                << binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType()
+                 << negOp->getSourceRange();
+     }
+     return true;
+--- cui/source/tabpages/tpline.cxx	2019-12-05 20:59:23.000000000 +0100
++++ cui/source/tabpages/tpline.cxx	2020-03-17 22:06:49.493222000 +0100
+@@ -491,7 +491,7 @@
+             else if( m_pLineEndList->Count() > static_cast<long>( nPos - 1 ) )
+                 pItem.reset(new XLineStartItem( m_xLbStartStyle->get_active_text(), m_pLineEndList->GetLineEnd( nPos - 1 )->GetLineEnd() ));
+             pOld = GetOldItem( *rAttrs, XATTR_LINESTART );
+-            if( pItem && ( !pOld || !( *static_cast<const XLineEndItem*>(pOld) == *pItem ) ) )
++            if( pItem && ( !pOld || *pOld != *pItem ) )
+             {
+                 rAttrs->Put( *pItem );
+                 bModified = true;
+--- sc/source/ui/view/viewfunc.cxx.orig	2019-12-05 20:59:23.000000000 +0100
++++ sc/source/ui/view/viewfunc.cxx	2020-03-17 23:58:50.978995000 +0100
+@@ -958,7 +958,7 @@
+ 
+     //  this should be intercepted by the pool: ?!??!??
+ 
+-    if (bFrame && rNewOuter == rOldOuter && rNewInner == rOldInner)
++    if (bFrame && &rNewOuter == &rOldOuter && &rNewInner == &rOldInner)
+         bFrame = false;
+ 
+     bFrame =   bFrame
+--- sc/source/core/opencl/formulagroupcl.cxx	2019-12-05 20:59:23.000000000 +0100
++++ sc/source/core/opencl/formulagroupcl.cxx	2020-03-18 00:44:08.091710000 +0100
+@@ -1026,9 +1026,6 @@
+ /// Handling a Double Vector that is used as a sliding window input
+ /// to either a sliding window average or sum-of-products
+ /// Generate a sequential loop for reductions
+-class OpAverage;
+-class OpCount;
+-
+ template<class Base>
+ class DynamicKernelSlidingArgument : public Base
+ {
+@@ -1335,186 +1332,8 @@
+     }
+ 
+     /// Emit the definition for the auxiliary reduction kernel
+-    virtual void GenSlidingWindowFunction( std::stringstream& ss )
+-    {
+-        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-        {
+-            std::string name = Base::GetName();
+-            ss << "__kernel void " << name;
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
++    virtual void GenSlidingWindowFunction( std::stringstream& ss );
+ 
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            // Special case count
+-            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-                ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            else
+-                ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-                ss << "current_result + shm_buf[0]";
+-            else
+-                ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-        }
+-        else
+-        {
+-            std::string name = Base::GetName();
+-            /*sum reduction*/
+-            ss << "__kernel void " << name << "_sum";
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize(";
+-            ss << "(A[loopOffset + lidx + offset]+ tmp)";
+-            ss << ", tmp);\n";
+-            ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
+-            ss << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
+-            ss << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            ss << "current_result + shm_buf[0]";
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-            /*count reduction*/
+-            ss << "__kernel void " << name << "_count";
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            ss << "current_result + shm_buf[0];";
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-        }
+-
+-    }
+-
+     virtual std::string GenSlidingWindowDeclRef( bool ) const
+     {
+         std::stringstream ss;
+@@ -1527,195 +1346,10 @@
+ 
+     /// Controls how the elements in the DoubleVectorRef are traversed
+     size_t GenReductionLoopHeader(
+-        std::stringstream& ss, int nResultSize, bool& needBody )
+-    {
+-        assert(mpDVR);
+-        size_t nCurWindowSize = mpDVR->GetRefRowSize();
+-        std::string temp = Base::GetName() + "[gid0]";
+-        ss << "tmp = ";
+-        // Special case count
+-        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-        {
+-            ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
+-            ss << "nCount = nCount-1;\n";
+-            ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
+-            ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
+-        }
+-        else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-            ss << temp << "+ tmp";
+-        else
+-            ss << mpCodeGen->Gen2(temp, "tmp");
+-        ss << ";\n\t";
+-        needBody = false;
+-        return nCurWindowSize;
+-    }
++        std::stringstream& ss, int nResultSize, bool& needBody );
+ 
+-    virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
+-    {
+-        assert(Base::mpClmem == nullptr);
++    virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
+ 
+-        openclwrapper::KernelEnv kEnv;
+-        openclwrapper::setKernelEnv(&kEnv);
+-        cl_int err;
+-        size_t nInput = mpDVR->GetArrayLength();
+-        size_t nCurWindowSize = mpDVR->GetRefRowSize();
+-        // create clmem buffer
+-        if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
+-            throw Unhandled(__FILE__, __LINE__);
+-        double* pHostBuffer = const_cast<double*>(
+-            mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
+-        size_t szHostBuffer = nInput * sizeof(double);
+-        Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
+-            cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+-            szHostBuffer,
+-            pHostBuffer, &err);
+-        SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
+-
+-        mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+-            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+-            sizeof(double) * w, nullptr, nullptr);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+-        SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
+-
+-        // reproduce the reduction function name
+-        std::string kernelName;
+-        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-            kernelName = Base::GetName() + "_reduction";
+-        else
+-            kernelName = Base::GetName() + "_sum_reduction";
+-        cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+-        if (err != CL_SUCCESS)
+-            throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+-        SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+-
+-        // set kernel arg of reduction kernel
+-        // TODO(Wei Wei): use unique name for kernel
+-        cl_mem buf = Base::GetCLBuffer();
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+-        err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+-            static_cast<void*>(&buf));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+-        err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+-        err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+-        err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        // set work group size and execute
+-        size_t global_work_size[] = { 256, static_cast<size_t>(w) };
+-        size_t const local_work_size[] = { 256, 1 };
+-        SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+-        err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+-            global_work_size, local_work_size, 0, nullptr, nullptr);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+-        err = clFinish(kEnv.mpkCmdQueue);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+-        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-        {
+-            /*average need more reduction kernel for count computing*/
+-            std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
+-            double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+-                mpClmem2,
+-                CL_TRUE, CL_MAP_READ, 0,
+-                sizeof(double) * w, 0, nullptr, nullptr,
+-                &err));
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+-
+-            for (int i = 0; i < w; i++)
+-                pAllBuffer[i] = resbuf[i];
+-            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+-
+-            kernelName = Base::GetName() + "_count_reduction";
+-            redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+-            SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+-
+-            // set kernel arg of reduction kernel
+-            buf = Base::GetCLBuffer();
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+-            err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+-                static_cast<void*>(&buf));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+-            err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+-            err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+-            err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            // set work group size and execute
+-            size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
+-            size_t const local_work_size1[] = { 256, 1 };
+-            SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+-            err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+-                global_work_size1, local_work_size1, 0, nullptr, nullptr);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+-            err = clFinish(kEnv.mpkCmdQueue);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+-            resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+-                mpClmem2,
+-                CL_TRUE, CL_MAP_READ, 0,
+-                sizeof(double) * w, 0, nullptr, nullptr,
+-                &err));
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+-            for (int i = 0; i < w; i++)
+-                pAllBuffer[i + w] = resbuf[i];
+-            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+-            // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+-            if (CL_SUCCESS != err)
+-                SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+-            if (mpClmem2)
+-            {
+-                err = clReleaseMemObject(mpClmem2);
+-                SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+-                mpClmem2 = nullptr;
+-            }
+-            mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+-                cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
+-                w * sizeof(double) * 2, pAllBuffer.get(), &err);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+-            SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
+-        }
+-        // set kernel arg
+-        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+-        err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-        return 1;
+-    }
+-
+     ~ParallelReductionVectorRef()
+     {
+         if (mpClmem2)
+@@ -2314,6 +1948,380 @@
+     }
+     virtual std::string BinFuncName() const override { return "fsop"; }
+ };
++
++template<class Base>
++void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
++{
++    if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++    {
++        std::string name = Base::GetName();
++        ss << "__kernel void " << name;
++        ss << "_reduction(__global double* A, "
++            "__global double *result,int arrayLength,int windowSize){\n";
++        ss << "    double tmp, current_result =" <<
++            mpCodeGen->GetBottom();
++        ss << ";\n";
++        ss << "    int writePos = get_group_id(1);\n";
++        ss << "    int lidx = get_local_id(0);\n";
++        ss << "    __local double shm_buf[256];\n";
++        if (mpDVR->IsStartFixed())
++            ss << "    int offset = 0;\n";
++        else // if (!mpDVR->IsStartFixed())
++            ss << "    int offset = get_group_id(1);\n";
++        if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = offset + windowSize;\n";
++        else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = windowSize + get_group_id(1);\n";
++        else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        ss << "    end = min(end, arrayLength);\n";
++
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    int loop = arrayLength/512 + 1;\n";
++        ss << "    for (int l=0; l<loop; l++){\n";
++        ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++        ss << "    int loopOffset = l*512;\n";
++        ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++        ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++            "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++        ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++            "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
++        ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++        ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++            "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++        ss << "    shm_buf[lidx] = tmp;\n";
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    for (int i = 128; i >0; i/=2) {\n";
++        ss << "        if (lidx < i)\n";
++        ss << "            shm_buf[lidx] = ";
++        // Special case count
++        if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++        else
++            ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "        if (lidx == 0)\n";
++        ss << "            current_result =";
++        if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++            ss << "current_result + shm_buf[0]";
++        else
++            ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
++        ss << ";\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "    if (lidx == 0)\n";
++        ss << "        result[writePos] = current_result;\n";
++        ss << "}\n";
++    }
++    else
++    {
++        std::string name = Base::GetName();
++        /*sum reduction*/
++        ss << "__kernel void " << name << "_sum";
++        ss << "_reduction(__global double* A, "
++            "__global double *result,int arrayLength,int windowSize){\n";
++        ss << "    double tmp, current_result =" <<
++            mpCodeGen->GetBottom();
++        ss << ";\n";
++        ss << "    int writePos = get_group_id(1);\n";
++        ss << "    int lidx = get_local_id(0);\n";
++        ss << "    __local double shm_buf[256];\n";
++        if (mpDVR->IsStartFixed())
++            ss << "    int offset = 0;\n";
++        else // if (!mpDVR->IsStartFixed())
++            ss << "    int offset = get_group_id(1);\n";
++        if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = offset + windowSize;\n";
++        else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = windowSize + get_group_id(1);\n";
++        else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        ss << "    end = min(end, arrayLength);\n";
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    int loop = arrayLength/512 + 1;\n";
++        ss << "    for (int l=0; l<loop; l++){\n";
++        ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++        ss << "    int loopOffset = l*512;\n";
++        ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++        ss << "        tmp = legalize(";
++        ss << "(A[loopOffset + lidx + offset]+ tmp)";
++        ss << ", tmp);\n";
++        ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
++        ss << ", tmp);\n";
++        ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++        ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
++        ss << ", tmp);\n";
++        ss << "    shm_buf[lidx] = tmp;\n";
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    for (int i = 128; i >0; i/=2) {\n";
++        ss << "        if (lidx < i)\n";
++        ss << "            shm_buf[lidx] = ";
++        ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "        if (lidx == 0)\n";
++        ss << "            current_result =";
++        ss << "current_result + shm_buf[0]";
++        ss << ";\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "    if (lidx == 0)\n";
++        ss << "        result[writePos] = current_result;\n";
++        ss << "}\n";
++        /*count reduction*/
++        ss << "__kernel void " << name << "_count";
++        ss << "_reduction(__global double* A, "
++            "__global double *result,int arrayLength,int windowSize){\n";
++        ss << "    double tmp, current_result =" <<
++            mpCodeGen->GetBottom();
++        ss << ";\n";
++        ss << "    int writePos = get_group_id(1);\n";
++        ss << "    int lidx = get_local_id(0);\n";
++        ss << "    __local double shm_buf[256];\n";
++        if (mpDVR->IsStartFixed())
++            ss << "    int offset = 0;\n";
++        else // if (!mpDVR->IsStartFixed())
++            ss << "    int offset = get_group_id(1);\n";
++        if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = offset + windowSize;\n";
++        else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++            ss << "    int end = windowSize + get_group_id(1);\n";
++        else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++            ss << "    int end = windowSize;\n";
++        ss << "    end = min(end, arrayLength);\n";
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    int loop = arrayLength/512 + 1;\n";
++        ss << "    for (int l=0; l<loop; l++){\n";
++        ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++        ss << "    int loopOffset = l*512;\n";
++        ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++        ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++        ss << ", tmp);\n";
++        ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
++        ss << ", tmp);\n";
++        ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++        ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++        ss << ", tmp);\n";
++        ss << "    shm_buf[lidx] = tmp;\n";
++        ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    for (int i = 128; i >0; i/=2) {\n";
++        ss << "        if (lidx < i)\n";
++        ss << "            shm_buf[lidx] = ";
++        ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "        if (lidx == 0)\n";
++        ss << "            current_result =";
++        ss << "current_result + shm_buf[0];";
++        ss << ";\n";
++        ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++        ss << "    }\n";
++        ss << "    if (lidx == 0)\n";
++        ss << "        result[writePos] = current_result;\n";
++        ss << "}\n";
++    }
++
++}
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
++    std::stringstream& ss, int nResultSize, bool& needBody )
++{
++    assert(mpDVR);
++    size_t nCurWindowSize = mpDVR->GetRefRowSize();
++    std::string temp = Base::GetName() + "[gid0]";
++    ss << "tmp = ";
++    // Special case count
++    if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++    {
++        ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
++        ss << "nCount = nCount-1;\n";
++        ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
++        ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
++    }
++    else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++        ss << temp << "+ tmp";
++    else
++        ss << mpCodeGen->Gen2(temp, "tmp");
++    ss << ";\n\t";
++    needBody = false;
++    return nCurWindowSize;
++}
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
++{
++    assert(Base::mpClmem == nullptr);
++
++    openclwrapper::KernelEnv kEnv;
++    openclwrapper::setKernelEnv(&kEnv);
++    cl_int err;
++    size_t nInput = mpDVR->GetArrayLength();
++    size_t nCurWindowSize = mpDVR->GetRefRowSize();
++    // create clmem buffer
++    if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
++        throw Unhandled(__FILE__, __LINE__);
++    double* pHostBuffer = const_cast<double*>(
++        mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
++    size_t szHostBuffer = nInput * sizeof(double);
++    Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
++        cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
++        szHostBuffer,
++        pHostBuffer, &err);
++    SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
++
++    mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
++        sizeof(double) * w, nullptr, nullptr);
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++    SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
++
++    // reproduce the reduction function name
++    std::string kernelName;
++    if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++        kernelName = Base::GetName() + "_reduction";
++    else
++        kernelName = Base::GetName() + "_sum_reduction";
++    cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++    if (err != CL_SUCCESS)
++        throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++    SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++    // set kernel arg of reduction kernel
++    // TODO(Wei Wei): use unique name for kernel
++    cl_mem buf = Base::GetCLBuffer();
++    SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++    err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++        static_cast<void*>(&buf));
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++    SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++    err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++    SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++    err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++    SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++    err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++    // set work group size and execute
++    size_t global_work_size[] = { 256, static_cast<size_t>(w) };
++    size_t const local_work_size[] = { 256, 1 };
++    SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
++    err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++        global_work_size, local_work_size, 0, nullptr, nullptr);
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++    err = clFinish(kEnv.mpkCmdQueue);
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++    if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++    {
++        /*average need more reduction kernel for count computing*/
++        std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
++        double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++            mpClmem2,
++            CL_TRUE, CL_MAP_READ, 0,
++            sizeof(double) * w, 0, nullptr, nullptr,
++            &err));
++        if (err != CL_SUCCESS)
++            throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++
++        for (int i = 0; i < w; i++)
++            pAllBuffer[i] = resbuf[i];
++        err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++        if (err != CL_SUCCESS)
++            throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
++
++        kernelName = Base::GetName() + "_count_reduction";
++        redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++        if (err != CL_SUCCESS)
++            throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++        SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++        // set kernel arg of reduction kernel
++        buf = Base::GetCLBuffer();
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++        err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++            static_cast<void*>(&buf));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++        err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++        err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++        err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        // set work group size and execute
++        size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
++        size_t const local_work_size1[] = { 256, 1 };
++        SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
++        err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++            global_work_size1, local_work_size1, 0, nullptr, nullptr);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++        err = clFinish(kEnv.mpkCmdQueue);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++        resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++            mpClmem2,
++            CL_TRUE, CL_MAP_READ, 0,
++            sizeof(double) * w, 0, nullptr, nullptr,
++            &err));
++        if (err != CL_SUCCESS)
++            throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++        for (int i = 0; i < w; i++)
++            pAllBuffer[i + w] = resbuf[i];
++        err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++        // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
++        if (CL_SUCCESS != err)
++            SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
++        if (mpClmem2)
++        {
++            err = clReleaseMemObject(mpClmem2);
++            SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
++            mpClmem2 = nullptr;
++        }
++        mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++            cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
++            w * sizeof(double) * 2, pAllBuffer.get(), &err);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++        SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
++    }
++    // set kernel arg
++    SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
++    err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
++    if (CL_SUCCESS != err)
++        throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++    return 1;
++}
++
+ namespace {
+ struct SumIfsArgs
+ {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202003181835.02IIZ00T021153>