123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881 |
- //---------------------------------------------------------------------------//
- // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
- //
- // Distributed under the Boost Software License, Version 1.0
- // See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt
- //
- // See http://boostorg.github.com/compute for more information.
- //---------------------------------------------------------------------------//
- #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
- #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
- #include <algorithm>
- #include <iterator>
- #include <boost/utility/enable_if.hpp>
- #include <boost/mpl/and.hpp>
- #include <boost/mpl/not.hpp>
- #include <boost/mpl/or.hpp>
- #include <boost/compute/buffer.hpp>
- #include <boost/compute/system.hpp>
- #include <boost/compute/command_queue.hpp>
- #include <boost/compute/algorithm/detail/copy_on_device.hpp>
- #include <boost/compute/algorithm/detail/copy_to_device.hpp>
- #include <boost/compute/algorithm/detail/copy_to_host.hpp>
- #include <boost/compute/async/future.hpp>
- #include <boost/compute/container/mapped_view.hpp>
- #include <boost/compute/detail/device_ptr.hpp>
- #include <boost/compute/detail/is_contiguous_iterator.hpp>
- #include <boost/compute/detail/iterator_range_size.hpp>
- #include <boost/compute/detail/parameter_cache.hpp>
- #include <boost/compute/iterator/buffer_iterator.hpp>
- #include <boost/compute/type_traits/type_name.hpp>
- #include <boost/compute/type_traits/is_device_iterator.hpp>
- namespace boost {
- namespace compute {
- namespace detail {
- namespace mpl = boost::mpl;
- // meta-function returning true if copy() between InputIterator and
- // OutputIterator can be implemented with clEnqueueCopyBuffer().
- template<class InputIterator, class OutputIterator>
- struct can_copy_with_copy_buffer :
- mpl::and_<
- mpl::or_<
- boost::is_same<
- InputIterator,
- buffer_iterator<typename InputIterator::value_type>
- >,
- boost::is_same<
- InputIterator,
- detail::device_ptr<typename InputIterator::value_type>
- >
- >,
- mpl::or_<
- boost::is_same<
- OutputIterator,
- buffer_iterator<typename OutputIterator::value_type>
- >,
- boost::is_same<
- OutputIterator,
- detail::device_ptr<typename OutputIterator::value_type>
- >
- >,
- boost::is_same<
- typename InputIterator::value_type,
- typename OutputIterator::value_type
- >
- >::type {};
- // meta-function returning true if value_types of HostIterator and
- // DeviceIterator are same
- template<class HostIterator, class DeviceIterator>
- struct is_same_value_type :
- boost::is_same<
- typename boost::remove_cv<
- typename std::iterator_traits<HostIterator>::value_type
- >::type,
- typename boost::remove_cv<
- typename DeviceIterator::value_type
- >::type
- >::type {};
- // meta-function returning true if value_type of HostIterator is bool
- template<class HostIterator>
- struct is_bool_value_type :
- boost::is_same<
- typename boost::remove_cv<
- typename std::iterator_traits<HostIterator>::value_type
- >::type,
- bool
- >::type {};
- // host -> device (async)
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- is_same_value_type<InputIterator, OutputIterator>
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<InputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- return copy_to_device_async(first, last, result, queue, events);
- }
- // host -> device (async)
- // Type mismatch between InputIterator and OutputIterator value_types
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_same_value_type<InputIterator, OutputIterator>
- >
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<InputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const context &context = queue.get_context();
- size_t count = iterator_range_size(first, last);
- if(count < size_t(1)) {
- return future<OutputIterator>();
- }
- // map [first; last) to device and run copy kernel
- // on device for copying & casting
- ::boost::compute::mapped_view<input_type> mapped_host(
- // make sure it's a pointer to constant data
- // to force read only mapping
- const_cast<const input_type*>(
- ::boost::addressof(*first)
- ),
- count,
- context
- );
- return copy_on_device_async(
- mapped_host.begin(), mapped_host.end(), result, queue, events
- );
- }
- // host -> device
- // InputIterator is a contiguous iterator
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- is_same_value_type<InputIterator, OutputIterator>,
- is_contiguous_iterator<InputIterator>
- >
- >::type* = 0)
- {
- return copy_to_device(first, last, result, queue, events);
- }
- // host -> device
- // Type mismatch between InputIterator and OutputIterator value_types
- // InputIterator is a contiguous iterator
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_same_value_type<InputIterator, OutputIterator>
- >,
- is_contiguous_iterator<InputIterator>
- >
- >::type* = 0)
- {
- typedef typename OutputIterator::value_type output_type;
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const device &device = queue.get_device();
- // loading parameters
- std::string cache_key =
- std::string("__boost_compute_copy_to_device_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
- // calculate default values of thresholds
- if (device.type() & device::gpu) {
- // GPUs
- map_copy_threshold = 524288; // 0.5 MB
- direct_copy_threshold = 52428800; // 50 MB
- }
- else {
- // CPUs and other devices
- map_copy_threshold = 134217728; // 128 MB
- direct_copy_threshold = 0; // it's never efficient for CPUs
- }
- // load thresholds
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
- // select copy method based on thresholds & input_size_bytes
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
- // [0; map_copy_threshold) -> copy_to_device_map()
- if(input_size_bytes < map_copy_threshold) {
- return copy_to_device_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
- // on host and then perform copy_to_device()
- else if(input_size_bytes < direct_copy_threshold) {
- std::vector<output_type> vector(first, last);
- return copy_to_device(
- vector.begin(), vector.end(), result, queue, events
- );
- }
- // [direct_copy_threshold; inf) -> map [first; last) to device and
- // run copy kernel on device for copying & casting
- // At this point we are sure that count > 1 (first != last).
- // Perform async copy to device, wait for it to be finished and
- // return the result.
- // At this point we are sure that count > 1 (first != last), so event
- // returned by dispatch_copy_async() must be valid.
- return dispatch_copy_async(first, last, result, queue, events).get();
- }
- // host -> device
- // InputIterator is NOT a contiguous iterator
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- mpl::not_<
- is_device_iterator<InputIterator>
- >,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- is_contiguous_iterator<InputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename OutputIterator::value_type output_type;
- typedef typename std::iterator_traits<InputIterator>::value_type input_type;
- const device &device = queue.get_device();
- // loading parameters
- std::string cache_key =
- std::string("__boost_compute_copy_to_device_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
- // calculate default values of thresholds
- if (device.type() & device::gpu) {
- // GPUs
- map_copy_threshold = 524288; // 0.5 MB
- direct_copy_threshold = 52428800; // 50 MB
- }
- else {
- // CPUs and other devices
- map_copy_threshold = 134217728; // 128 MB
- direct_copy_threshold = 0; // it's never efficient for CPUs
- }
- // load thresholds
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
- // select copy method based on thresholds & input_size_bytes
- size_t input_size = iterator_range_size(first, last);
- size_t input_size_bytes = input_size * sizeof(input_type);
- // [0; map_copy_threshold) -> copy_to_device_map()
- //
- // if direct_copy_threshold is less than map_copy_threshold
- // copy_to_device_map() is used for every input
- if(input_size_bytes < map_copy_threshold
- || direct_copy_threshold <= map_copy_threshold) {
- return copy_to_device_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; inf) -> convert [first; last)
- // on host and then perform copy_to_device()
- std::vector<output_type> vector(first, last);
- return copy_to_device(vector.begin(), vector.end(), result, queue, events);
- }
- // device -> host (async)
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- is_same_value_type<OutputIterator, InputIterator>
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<OutputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- return copy_to_host_async(first, last, result, queue, events);
- }
- // device -> host (async)
- // Type mismatch between InputIterator and OutputIterator value_types
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::not_<
- is_same_value_type<OutputIterator, InputIterator>
- >
- >
- >::type* = 0)
- {
- BOOST_STATIC_ASSERT_MSG(
- is_contiguous_iterator<OutputIterator>::value,
- "copy_async() is only supported for contiguous host iterators"
- );
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- const context &context = queue.get_context();
- size_t count = iterator_range_size(first, last);
- if(count < size_t(1)) {
- return future<OutputIterator>();
- }
- // map host memory to device
- buffer mapped_host(
- context,
- count * sizeof(output_type),
- buffer::write_only | buffer::use_host_ptr,
- static_cast<void*>(
- ::boost::addressof(*result)
- )
- );
- // copy async on device
- ::boost::compute::future<buffer_iterator<output_type> > future =
- copy_on_device_async(
- first,
- last,
- make_buffer_iterator<output_type>(mapped_host),
- queue,
- events
- );
- // update host memory asynchronously by maping and unmaping memory
- event map_event;
- void* ptr = queue.enqueue_map_buffer_async(
- mapped_host,
- CL_MAP_READ,
- 0,
- count * sizeof(output_type),
- map_event,
- future.get_event()
- );
- event unmap_event =
- queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
- return make_future(result + count, unmap_event);
- }
- // device -> host
- // OutputIterator is a contiguous iterator
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- is_same_value_type<OutputIterator, InputIterator>,
- is_contiguous_iterator<OutputIterator>,
- mpl::not_<
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- return copy_to_host(first, last, result, queue, events);
- }
- // device -> host
- // Type mismatch between InputIterator and OutputIterator value_types
- // OutputIterator is NOT a contiguous iterator or value_type of OutputIterator
- // is a boolean type.
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::or_<
- mpl::not_<
- is_contiguous_iterator<OutputIterator>
- >,
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- typedef typename InputIterator::value_type input_type;
- const device &device = queue.get_device();
- // loading parameters
- std::string cache_key =
- std::string("__boost_compute_copy_to_host_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
- // calculate default values of thresholds
- if (device.type() & device::gpu) {
- // GPUs
- map_copy_threshold = 33554432; // 30 MB
- direct_copy_threshold = 0; // it's never efficient for GPUs
- }
- else {
- // CPUs and other devices
- map_copy_threshold = 134217728; // 128 MB
- direct_copy_threshold = 0; // it's never efficient for CPUs
- }
- // load thresholds
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
- // select copy method based on thresholds & input_size_bytes
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
- // [0; map_copy_threshold) -> copy_to_host_map()
- //
- // if direct_copy_threshold is less than map_copy_threshold
- // copy_to_host_map() is used for every input
- if(input_size_bytes < map_copy_threshold
- || direct_copy_threshold <= map_copy_threshold) {
- return copy_to_host_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
- // then copy (and convert) to result using std::copy()
- std::vector<input_type> vector(count);
- copy_to_host(first, last, vector.begin(), queue, events);
- return std::copy(vector.begin(), vector.end(), result);
- }
- // device -> host
- // Type mismatch between InputIterator and OutputIterator value_types
- // OutputIterator is a contiguous iterator
- // value_type of OutputIterator is NOT a boolean type
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- mpl::not_<
- is_device_iterator<OutputIterator>
- >,
- mpl::not_<
- is_same_value_type<OutputIterator, InputIterator>
- >,
- is_contiguous_iterator<OutputIterator>,
- mpl::not_<
- is_bool_value_type<OutputIterator>
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
- typedef typename InputIterator::value_type input_type;
- const device &device = queue.get_device();
- // loading parameters
- std::string cache_key =
- std::string("__boost_compute_copy_to_host_")
- + type_name<input_type>() + "_" + type_name<output_type>();
- boost::shared_ptr<parameter_cache> parameters =
- detail::parameter_cache::get_global_cache(device);
- uint_ map_copy_threshold;
- uint_ direct_copy_threshold;
- // calculate default values of thresholds
- if (device.type() & device::gpu) {
- // GPUs
- map_copy_threshold = 524288; // 0.5 MB
- direct_copy_threshold = 52428800; // 50 MB
- }
- else {
- // CPUs and other devices
- map_copy_threshold = 134217728; // 128 MB
- direct_copy_threshold = 0; // it's never efficient for CPUs
- }
- // load thresholds
- map_copy_threshold =
- parameters->get(
- cache_key, "map_copy_threshold", map_copy_threshold
- );
- direct_copy_threshold =
- parameters->get(
- cache_key, "direct_copy_threshold", direct_copy_threshold
- );
- // select copy method based on thresholds & input_size_bytes
- size_t count = iterator_range_size(first, last);
- size_t input_size_bytes = count * sizeof(input_type);
- // [0; map_copy_threshold) -> copy_to_host_map()
- if(input_size_bytes < map_copy_threshold) {
- return copy_to_host_map(first, last, result, queue, events);
- }
- // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
- // temporary vector then copy (and convert) to result using std::copy()
- else if(input_size_bytes < direct_copy_threshold) {
- std::vector<input_type> vector(count);
- copy_to_host(first, last, vector.begin(), queue, events);
- return std::copy(vector.begin(), vector.end(), result);
- }
- // [direct_copy_threshold; inf) -> map [result; result + input_size) to
- // device and run copy kernel on device for copying & casting
- // map host memory to device.
- // Perform async copy to host, wait for it to be finished and
- // return the result.
- // At this point we are sure that count > 1 (first != last), so event
- // returned by dispatch_copy_async() must be valid.
- return dispatch_copy_async(first, last, result, queue, events).get();
- }
- // device -> device
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >
- >::type* = 0)
- {
- return copy_on_device(first, last, result, queue, events);
- }
- // device -> device (specialization for buffer iterators)
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<InputIterator>::value_type value_type;
- typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
- difference_type n = std::distance(first, last);
- if(n < 1){
- // nothing to copy
- return result;
- }
- queue.enqueue_copy_buffer(first.get_buffer(),
- result.get_buffer(),
- first.get_index() * sizeof(value_type),
- result.get_index() * sizeof(value_type),
- static_cast<size_t>(n) * sizeof(value_type),
- events);
- return result + n;
- }
- // device -> device (async)
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- mpl::not_<
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >
- >::type* = 0)
- {
- return copy_on_device_async(first, last, result, queue, events);
- }
- // device -> device (async, specialization for buffer iterators)
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- dispatch_copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if<
- mpl::and_<
- is_device_iterator<InputIterator>,
- is_device_iterator<OutputIterator>,
- can_copy_with_copy_buffer<
- InputIterator, OutputIterator
- >
- >
- >::type* = 0)
- {
- typedef typename std::iterator_traits<InputIterator>::value_type value_type;
- typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
- difference_type n = std::distance(first, last);
- if(n < 1){
- // nothing to copy
- return make_future(result, event());
- }
- event event_ =
- queue.enqueue_copy_buffer(
- first.get_buffer(),
- result.get_buffer(),
- first.get_index() * sizeof(value_type),
- result.get_index() * sizeof(value_type),
- static_cast<size_t>(n) * sizeof(value_type),
- events
- );
- return make_future(result + n, event_);
- }
- // host -> host
- template<class InputIterator, class OutputIterator>
- inline OutputIterator
- dispatch_copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue,
- const wait_list &events,
- typename boost::enable_if_c<
- !is_device_iterator<InputIterator>::value &&
- !is_device_iterator<OutputIterator>::value
- >::type* = 0)
- {
- (void) queue;
- (void) events;
- return std::copy(first, last, result);
- }
- } // end detail namespace
- /// Copies the values in the range [\p first, \p last) to the range
- /// beginning at \p result.
- ///
- /// The generic copy() function can be used for a variety of data
- /// transfer tasks and provides a standard interface to the following
- /// OpenCL functions:
- ///
- /// \li \c clEnqueueReadBuffer()
- /// \li \c clEnqueueWriteBuffer()
- /// \li \c clEnqueueCopyBuffer()
- ///
- /// Unlike the aforementioned OpenCL functions, copy() will also work
- /// with non-contiguous data-structures (e.g. \c std::list<T>) as
- /// well as with "fancy" iterators (e.g. transform_iterator).
- ///
- /// \param first first element in the range to copy
- /// \param last last element in the range to copy
- /// \param result first element in the result range
- /// \param queue command queue to perform the operation
- ///
- /// \return \c OutputIterator to the end of the result range
- ///
- /// For example, to copy an array of \c int values on the host to a vector on
- /// the device:
- /// \code
- /// // array on the host
- /// int data[] = { 1, 2, 3, 4 };
- ///
- /// // vector on the device
- /// boost::compute::vector<int> vec(4, context);
- ///
- /// // copy values to the device vector
- /// boost::compute::copy(data, data + 4, vec.begin(), queue);
- /// \endcode
- ///
- /// The copy algorithm can also be used with standard containers such as
- /// \c std::vector<T>:
- /// \code
- /// std::vector<int> host_vector = ...
- /// boost::compute::vector<int> device_vector = ...
- ///
- /// // copy from the host to the device
- /// boost::compute::copy(
- /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
- /// );
- ///
- /// // copy from the device to the host
- /// boost::compute::copy(
- /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
- /// );
- /// \endcode
- ///
- /// Space complexity: \Omega(1)
- ///
- /// \see copy_n(), copy_if(), copy_async()
- template<class InputIterator, class OutputIterator>
- inline OutputIterator copy(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue = system::default_queue(),
- const wait_list &events = wait_list())
- {
- return detail::dispatch_copy(first, last, result, queue, events);
- }
- /// Copies the values in the range [\p first, \p last) to the range
- /// beginning at \p result. The copy is performed asynchronously.
- ///
- /// \see copy()
- template<class InputIterator, class OutputIterator>
- inline future<OutputIterator>
- copy_async(InputIterator first,
- InputIterator last,
- OutputIterator result,
- command_queue &queue = system::default_queue(),
- const wait_list &events = wait_list())
- {
- return detail::dispatch_copy_async(first, last, result, queue, events);
- }
- } // end compute namespace
- } // end boost namespace
- #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP
|