// Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_misc_hpp_ #define boost_numeric_ublas_opencl_misc_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/opencl/vector.hpp> #include <boost/numeric/ublas/opencl/matrix.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { template <typename T> typename std::enable_if<is_numeric<T>::value, T>::type a_sum(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { compute::vector<T> scratch_buffer(v.size(), queue.get_context()); compute::vector<T> result_buffer(1, queue.get_context()); cl_event event; if (std::is_same<T, float>::value) clblasSasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, double>::value) clblasDasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<float>>::value) clblasScasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<double>>::value) clblasDzasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template <typename T, typename A> typename std::enable_if<is_numeric<T>::value, T>::type a_sum(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return a_sum(vdev, queue); } template <typename T> typename std::enable_if<std::is_same<T, float>::value | std::is_same<T, double>::value, T>::type norm_1(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { return a_sum(v, queue); } template <typename T, typename A> typename std::enable_if<std::is_same<T, float>::value | std::is_same<T, double>::value, T>::type norm_1(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return norm_1(vdev, queue); } template <typename T> typename std::enable_if<is_numeric<T>::value, T>::type norm_2(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { compute::vector<T> scratch_buffer(2*v.size(), queue.get_context()); compute::vector<T> result_buffer(1, queue.get_context()); cl_event event; if (std::is_same<T, float>::value) clblasSnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, double>::value) clblasDnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<float>>::value) clblasScnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<double>>::value) clblasDznrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template <typename T, typename A> typename std::enable_if<is_numeric<T>::value, T>::type norm_2(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return norm_2(vdev, queue); } }}}} #endif