cuda.hpp 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Third party copyrights are property of their respective owners.
  17. //
  18. // Redistribution and use in source and binary forms, with or without modification,
  19. // are permitted provided that the following conditions are met:
  20. //
  21. // * Redistribution's of source code must retain the above copyright notice,
  22. // this list of conditions and the following disclaimer.
  23. //
  24. // * Redistribution's in binary form must reproduce the above copyright notice,
  25. // this list of conditions and the following disclaimer in the documentation
  26. // and/or other materials provided with the distribution.
  27. //
  28. // * The name of the copyright holders may not be used to endorse or promote products
  29. // derived from this software without specific prior written permission.
  30. //
  31. // This software is provided by the copyright holders and contributors "as is" and
  32. // any express or implied warranties, including, but not limited to, the implied
  33. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34. // In no event shall the Intel Corporation or contributors be liable for any direct,
  35. // indirect, incidental, special, exemplary, or consequential damages
  36. // (including, but not limited to, procurement of substitute goods or services;
  37. // loss of use, data, or profits; or business interruption) however caused
  38. // and on any theory of liability, whether in contract, strict liability,
  39. // or tort (including negligence or otherwise) arising in any way out of
  40. // the use of this software, even if advised of the possibility of such damage.
  41. //
  42. //M*/
  43. #ifndef OPENCV_CORE_CUDA_HPP
  44. #define OPENCV_CORE_CUDA_HPP
  45. #ifndef __cplusplus
  46. # error cuda.hpp header must be compiled as C++
  47. #endif
  48. #include "opencv2/core.hpp"
  49. #include "opencv2/core/cuda_types.hpp"
  50. /**
  51. @defgroup cuda CUDA-accelerated Computer Vision
  52. @{
  53. @defgroup cudacore Core part
  54. @{
  55. @defgroup cudacore_init Initialization and Information
  56. @defgroup cudacore_struct Data Structures
  57. @}
  58. @}
  59. */
  60. namespace cv { namespace cuda {
  61. //! @addtogroup cudacore_struct
  62. //! @{
  63. //===================================================================================
  64. // GpuMat
  65. //===================================================================================
  66. /** @brief Base storage class for GPU memory with reference counting.
  67. Its interface matches the Mat interface with the following limitations:
  68. - no arbitrary dimensions support (only 2D)
  69. - no functions that return references to their data (because references on GPU are not valid for
  70. CPU)
  71. - no expression templates technique support
  72. Beware that the latter limitation may lead to overloaded matrix operators that cause memory
  73. allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
  74. passed directly to the kernel.
  75. @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
  76. aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
  77. @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
  78. on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
  79. release function returns error if the CUDA context has been destroyed before.
  80. Some member functions are described as a "Blocking Call" while some are described as a
  81. "Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
  82. operation is finished when the function returns. However, non-blocking functions are asynchronous to
  83. host. Those functions may return even if the GPU operation is not finished.
  84. Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
  85. argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
  86. streams.
  87. @sa Mat
  88. */
  89. class CV_EXPORTS_W GpuMat
  90. {
  91. public:
  92. class CV_EXPORTS_W Allocator
  93. {
  94. public:
  95. virtual ~Allocator() {}
  96. // allocator must fill data, step and refcount fields
  97. virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
  98. virtual void free(GpuMat* mat) = 0;
  99. };
  100. //! default allocator
  101. CV_WRAP static GpuMat::Allocator* defaultAllocator();
  102. CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
  103. //! default constructor
  104. CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  105. //! constructs GpuMat of the specified size and type
  106. CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  107. CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  108. //! constructs GpuMat and fills it with the specified value _s
  109. CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  110. CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  111. //! copy constructor
  112. CV_WRAP GpuMat(const GpuMat& m);
  113. //! constructor for GpuMat headers pointing to user-allocated data
  114. GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
  115. GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
  116. //! creates a GpuMat header for a part of the bigger matrix
  117. CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
  118. CV_WRAP GpuMat(const GpuMat& m, Rect roi);
  119. //! builds GpuMat from host memory (Blocking call)
  120. CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
  121. //! destructor - calls release()
  122. ~GpuMat();
  123. //! assignment operators
  124. GpuMat& operator =(const GpuMat& m);
  125. //! allocates new GpuMat data unless the GpuMat already has specified size and type
  126. CV_WRAP void create(int rows, int cols, int type);
  127. CV_WRAP void create(Size size, int type);
  128. //! decreases reference counter, deallocate the data when reference counter reaches 0
  129. CV_WRAP void release();
  130. //! swaps with other smart pointer
  131. CV_WRAP void swap(GpuMat& mat);
  132. /** @brief Performs data upload to GpuMat (Blocking call)
  133. This function copies data from host memory to device memory. As being a blocking call, it is
  134. guaranteed that the copy operation is finished when this function returns.
  135. */
  136. CV_WRAP void upload(InputArray arr);
  137. /** @brief Performs data upload to GpuMat (Non-Blocking call)
  138. This function copies data from host memory to device memory. As being a non-blocking call, this
  139. function may return even if the copy operation is not finished.
  140. The copy operation may be overlapped with operations in other non-default streams if \p stream is
  141. not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
  142. */
  143. CV_WRAP void upload(InputArray arr, Stream& stream);
  144. /** @brief Performs data download from GpuMat (Blocking call)
  145. This function copies data from device memory to host memory. As being a blocking call, it is
  146. guaranteed that the copy operation is finished when this function returns.
  147. */
  148. CV_WRAP void download(OutputArray dst) const;
  149. /** @brief Performs data download from GpuMat (Non-Blocking call)
  150. This function copies data from device memory to host memory. As being a non-blocking call, this
  151. function may return even if the copy operation is not finished.
  152. The copy operation may be overlapped with operations in other non-default streams if \p stream is
  153. not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
  154. */
  155. CV_WRAP void download(OutputArray dst, Stream& stream) const;
  156. //! returns deep copy of the GpuMat, i.e. the data is copied
  157. CV_WRAP GpuMat clone() const;
  158. //! copies the GpuMat content to device memory (Blocking call)
  159. void copyTo(OutputArray dst) const;
  160. //! bindings overload which copies the GpuMat content to device memory (Blocking call)
  161. CV_WRAP void copyTo(CV_OUT GpuMat& dst) const {
  162. copyTo(static_cast<OutputArray>(dst));
  163. }
  164. //! copies the GpuMat content to device memory (Non-Blocking call)
  165. void copyTo(OutputArray dst, Stream& stream) const;
  166. //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call)
  167. CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const {
  168. copyTo(static_cast<OutputArray>(dst), stream);
  169. }
  170. //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
  171. void copyTo(OutputArray dst, InputArray mask) const;
  172. //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
  173. CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const {
  174. copyTo(static_cast<OutputArray>(dst), static_cast<InputArray>(mask));
  175. }
  176. //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
  177. void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
  178. //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
  179. CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const {
  180. copyTo(static_cast<OutputArray>(dst), static_cast<InputArray>(mask), stream);
  181. }
  182. //! sets some of the GpuMat elements to s (Blocking call)
  183. CV_WRAP GpuMat& setTo(Scalar s);
  184. //! sets some of the GpuMat elements to s (Non-Blocking call)
  185. CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
  186. //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
  187. CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
  188. //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
  189. CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
  190. //! converts GpuMat to another datatype (Blocking call)
  191. void convertTo(OutputArray dst, int rtype) const;
  192. //! converts GpuMat to another datatype (Non-Blocking call)
  193. void convertTo(OutputArray dst, int rtype, Stream& stream) const;
  194. //! bindings overload which converts GpuMat to another datatype (Non-Blocking call)
  195. CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const {
  196. convertTo(static_cast<OutputArray>(dst), rtype, stream);
  197. }
  198. //! converts GpuMat to another datatype with scaling (Blocking call)
  199. void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
  200. //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call)
  201. CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = 1.0, double beta = 0.0) const {
  202. convertTo(static_cast<OutputArray>(dst), rtype, alpha, beta);
  203. }
  204. //! converts GpuMat to another datatype with scaling (Non-Blocking call)
  205. void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
  206. //! converts GpuMat to another datatype with scaling (Non-Blocking call)
  207. void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
  208. //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call)
  209. CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const {
  210. convertTo(static_cast<OutputArray>(dst), rtype, alpha, beta, stream);
  211. }
  212. CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
  213. //! returns pointer to y-th row
  214. uchar* ptr(int y = 0);
  215. const uchar* ptr(int y = 0) const;
  216. //! template version of the above method
  217. template<typename _Tp> _Tp* ptr(int y = 0);
  218. template<typename _Tp> const _Tp* ptr(int y = 0) const;
  219. template <typename _Tp> operator PtrStepSz<_Tp>() const;
  220. template <typename _Tp> operator PtrStep<_Tp>() const;
  221. //! returns a new GpuMat header for the specified row
  222. CV_WRAP GpuMat row(int y) const;
  223. //! returns a new GpuMat header for the specified column
  224. CV_WRAP GpuMat col(int x) const;
  225. //! ... for the specified row span
  226. CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
  227. CV_WRAP GpuMat rowRange(Range r) const;
  228. //! ... for the specified column span
  229. CV_WRAP GpuMat colRange(int startcol, int endcol) const;
  230. CV_WRAP GpuMat colRange(Range r) const;
  231. //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
  232. GpuMat operator ()(Range rowRange, Range colRange) const;
  233. GpuMat operator ()(Rect roi) const;
  234. //! creates alternative GpuMat header for the same data, with different
  235. //! number of channels and/or different number of rows
  236. CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
  237. //! locates GpuMat header within a parent GpuMat
  238. CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
  239. //! moves/resizes the current GpuMat ROI inside the parent GpuMat
  240. CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
  241. //! returns true iff the GpuMat data is continuous
  242. //! (i.e. when there are no gaps between successive rows)
  243. CV_WRAP bool isContinuous() const;
  244. //! returns element size in bytes
  245. CV_WRAP size_t elemSize() const;
  246. //! returns the size of element channel in bytes
  247. CV_WRAP size_t elemSize1() const;
  248. //! returns element type
  249. CV_WRAP int type() const;
  250. //! returns element type
  251. CV_WRAP int depth() const;
  252. //! returns number of channels
  253. CV_WRAP int channels() const;
  254. //! returns step/elemSize1()
  255. CV_WRAP size_t step1() const;
  256. //! returns GpuMat size : width == number of columns, height == number of rows
  257. CV_WRAP Size size() const;
  258. //! returns true if GpuMat data is NULL
  259. CV_WRAP bool empty() const;
  260. // returns pointer to cuda memory
  261. CV_WRAP void* cudaPtr() const;
  262. //! internal use method: updates the continuity flag
  263. CV_WRAP void updateContinuityFlag();
  264. /*! includes several bit-fields:
  265. - the magic signature
  266. - continuity flag
  267. - depth
  268. - number of channels
  269. */
  270. int flags;
  271. //! the number of rows and columns
  272. int rows, cols;
  273. //! a distance between successive rows in bytes; includes the gap if any
  274. CV_PROP size_t step;
  275. //! pointer to the data
  276. uchar* data;
  277. //! pointer to the reference counter;
  278. //! when GpuMat points to user-allocated data, the pointer is NULL
  279. int* refcount;
  280. //! helper fields used in locateROI and adjustROI
  281. uchar* datastart;
  282. const uchar* dataend;
  283. //! allocator
  284. Allocator* allocator;
  285. };
  286. struct CV_EXPORTS_W GpuData
  287. {
  288. explicit GpuData(size_t _size);
  289. ~GpuData();
  290. GpuData(const GpuData&) = delete;
  291. GpuData& operator=(const GpuData&) = delete;
  292. GpuData(GpuData&&) = delete;
  293. GpuData& operator=(GpuData&&) = delete;
  294. uchar* data;
  295. size_t size;
  296. };
  297. class CV_EXPORTS_W GpuMatND
  298. {
  299. public:
  300. using SizeArray = std::vector<int>;
  301. using StepArray = std::vector<size_t>;
  302. using IndexArray = std::vector<int>;
  303. //! destructor
  304. ~GpuMatND();
  305. //! default constructor
  306. GpuMatND();
  307. /** @overload
  308. @param size Array of integers specifying an n-dimensional array shape.
  309. @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
  310. CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
  311. */
  312. GpuMatND(SizeArray size, int type);
  313. /** @overload
  314. @param size Array of integers specifying an n-dimensional array shape.
  315. @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
  316. CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
  317. @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
  318. allocate matrix data. Instead, they just initialize the matrix header that points to the specified
  319. data, which means that no data is copied. This operation is very efficient and can be used to
  320. process external data using OpenCV functions. The external data is not automatically deallocated, so
  321. you should take care of it.
  322. @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always
  323. set to the element size). If not specified, the matrix is assumed to be continuous.
  324. */
  325. GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray());
  326. /** @brief Allocates GPU memory.
  327. Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
  328. GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
  329. the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
  330. (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
  331. this method is always continuous and is not a sub-region of another GpuMatND.
  332. */
  333. void create(SizeArray size, int type);
  334. void release();
  335. void swap(GpuMatND& m) noexcept;
  336. /** @brief Creates a full copy of the array and the underlying data.
  337. The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
  338. the original step is not taken into account. So, the array copy is a continuous array
  339. occupying total()\*elemSize() bytes.
  340. */
  341. GpuMatND clone() const;
  342. /** @overload
  343. This overload is non-blocking, so it may return even if the copy operation is not finished.
  344. */
  345. GpuMatND clone(Stream& stream) const;
  346. /** @brief Extracts a sub-matrix.
  347. The operator makes a new header for the specified sub-array of \*this.
  348. The operator is an O(1) operation, that is, no matrix data is copied.
  349. @param ranges Array of selected ranges along each dimension.
  350. */
  351. GpuMatND operator()(const std::vector<Range>& ranges) const;
  352. /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.
  353. @note The returned GpuMat is constructed with the constructor for user-allocated data.
  354. That is, It does not perform reference counting.
  355. @note This function does not increment this GpuMatND's reference counter.
  356. */
  357. GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
  358. /** @overload
  359. Creates a GpuMat header if this GpuMatND is effectively 2D.
  360. @note The returned GpuMat is constructed with the constructor for user-allocated data.
  361. That is, It does not perform reference counting.
  362. @note This function does not increment this GpuMatND's reference counter.
  363. */
  364. GpuMat createGpuMatHeader() const;
  365. /** @brief Extracts a 2D plane part of an n-dim matrix.
  366. It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
  367. GpuMatND to the returned GpuMat.
  368. @note This operator does not increment this GpuMatND's reference counter;
  369. */
  370. GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
  371. /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.
  372. It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
  373. @note This operator does not increment this GpuMatND's reference counter;
  374. */
  375. operator GpuMat() const;
  376. GpuMatND(const GpuMatND&) = default;
  377. GpuMatND& operator=(const GpuMatND&) = default;
  378. #if defined(__GNUC__) && __GNUC__ < 5
  379. // error: function '...' defaulted on its first declaration with an exception-specification
  380. // that differs from the implicit declaration '...'
  381. GpuMatND(GpuMatND&&) = default;
  382. GpuMatND& operator=(GpuMatND&&) = default;
  383. #else
  384. GpuMatND(GpuMatND&&) noexcept = default;
  385. GpuMatND& operator=(GpuMatND&&) noexcept = default;
  386. #endif
  387. void upload(InputArray src);
  388. void upload(InputArray src, Stream& stream);
  389. void download(OutputArray dst) const;
  390. void download(OutputArray dst, Stream& stream) const;
  391. //! returns true iff the GpuMatND data is continuous
  392. //! (i.e. when there are no gaps between successive rows)
  393. bool isContinuous() const;
  394. //! returns true if the matrix is a sub-matrix of another matrix
  395. bool isSubmatrix() const;
  396. //! returns element size in bytes
  397. size_t elemSize() const;
  398. //! returns the size of element channel in bytes
  399. size_t elemSize1() const;
  400. //! returns true if data is null
  401. bool empty() const;
  402. //! returns true if not empty and points to external(user-allocated) gpu memory
  403. bool external() const;
  404. //! returns pointer to the first byte of the GPU memory
  405. uchar* getDevicePtr() const;
  406. //! returns the total number of array elements
  407. size_t total() const;
  408. //! returns the size of underlying memory in bytes
  409. size_t totalMemSize() const;
  410. //! returns element type
  411. int type() const;
  412. private:
  413. //! internal use
  414. void setFields(SizeArray size, int type, StepArray step = StepArray());
  415. public:
  416. /*! includes several bit-fields:
  417. - the magic signature
  418. - continuity flag
  419. - depth
  420. - number of channels
  421. */
  422. int flags;
  423. //! matrix dimensionality
  424. int dims;
  425. //! shape of this array
  426. SizeArray size;
  427. /*! step values
  428. Their semantics is identical to the semantics of step for Mat.
  429. */
  430. StepArray step;
  431. private:
  432. /*! internal use
  433. If this GpuMatND holds external memory, this is empty.
  434. */
  435. std::shared_ptr<GpuData> data_;
  436. /*! internal use
  437. If this GpuMatND manages memory with reference counting, this value is
  438. always equal to data_->data. If this GpuMatND holds external memory,
  439. data_ is empty and data points to the external memory.
  440. */
  441. uchar* data;
  442. /*! internal use
  443. If this GpuMatND is a sub-matrix of a larger matrix, this value is the
  444. difference of the first byte between the sub-matrix and the whole matrix.
  445. */
  446. size_t offset;
  447. };
  448. /** @brief Creates a continuous matrix.
  449. @param rows Row count.
  450. @param cols Column count.
  451. @param type Type of the matrix.
  452. @param arr Destination matrix. This parameter changes only if it has a proper type and area (
  453. \f$\texttt{rows} \times \texttt{cols}\f$ ).
  454. Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
  455. end of each row.
  456. */
  457. CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
  458. /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
  459. @param rows Minimum desired number of rows.
  460. @param cols Minimum desired number of columns.
  461. @param type Desired matrix type.
  462. @param arr Destination matrix.
  463. The function does not reallocate memory if the matrix has proper attributes already.
  464. */
  465. CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
  466. /** @brief Bindings overload to create a GpuMat from existing GPU memory.
  467. @param rows Row count.
  468. @param cols Column count.
  469. @param type Type of the matrix.
  470. @param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
  471. @param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
  472. @note Overload for generation of bindings only, not exported or intended for use internally from C++.
  473. */
  474. CV_EXPORTS_W GpuMat inline createGpuMatFromCudaMemory(int rows, int cols, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
  475. return GpuMat(rows, cols, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
  476. }
  477. /** @overload
  478. @param size 2D array size: Size(cols, rows). In the Size() constructor, the number of rows and the number of columns go in the reverse order.
  479. @param type Type of the matrix.
  480. @param cudaMemoryAddress Address of the allocated GPU memory on the device. This does not allocate matrix data. Instead, it just initializes the matrix header that points to the specified \a cudaMemoryAddress, which means that no data is copied. This operation is very efficient and can be used to process external data using OpenCV functions. The external data is not automatically deallocated, so you should take care of it.
  481. @param step Number of bytes each matrix row occupies. The value should include the padding bytes at the end of each row, if any. If the parameter is missing (set to Mat::AUTO_STEP ), no padding is assumed and the actual step is calculated as cols*elemSize(). See GpuMat::elemSize.
  482. @note Overload for generation of bindings only, not exported or intended for use internally from C++.
  483. */
  484. CV_EXPORTS_W inline GpuMat createGpuMatFromCudaMemory(Size size, int type, size_t cudaMemoryAddress, size_t step = Mat::AUTO_STEP) {
  485. return GpuMat(size, type, reinterpret_cast<void*>(cudaMemoryAddress), step);
  486. }
  487. /** @brief BufferPool for use with CUDA streams
  488. BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
  489. only useful when enabled with #setBufferPoolUsage.
  490. @code
  491. setBufferPoolUsage(true);
  492. @endcode
  493. @note #setBufferPoolUsage must be called \em before any Stream declaration.
  494. Users may specify custom allocator for Stream and may implement their own stream based
  495. functions utilizing the same underlying GPU memory management.
  496. If custom allocator is not specified, BufferPool utilizes StackAllocator by
  497. default. StackAllocator allocates a chunk of GPU device memory beforehand,
  498. and when GpuMat is declared later on, it is given the pre-allocated memory.
  499. This kind of strategy reduces the number of calls for memory allocating APIs
  500. such as cudaMalloc or cudaMallocPitch.
  501. Below is an example that utilizes BufferPool with StackAllocator:
  502. @code
  503. #include <opencv2/opencv.hpp>
  504. using namespace cv;
  505. using namespace cv::cuda
  506. int main()
  507. {
  508. setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
  509. setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
  510. Stream stream1, stream2; // Each stream uses 1 stack
  511. BufferPool pool1(stream1), pool2(stream2);
  512. GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
  513. GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
  514. GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
  515. GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
  516. cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
  517. cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
  518. }
  519. @endcode
  520. If we allocate another GpuMat on pool1 in the above example, it will be carried out by
  521. the DefaultAllocator since the stack for pool1 is full.
  522. @code
  523. GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
  524. @endcode
  525. If a third stream is declared in the above example, allocating with #getBuffer
  526. within that stream will also be carried out by the DefaultAllocator because we've run out of
  527. stacks.
  528. @code
  529. Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
  530. BufferPool pool3(stream3);
  531. GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
  532. @endcode
  533. @warning When utilizing StackAllocator, deallocation order is important.
  534. Just like a stack, deallocation must be done in LIFO order. Below is an example of
  535. erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
  536. sample code will emit CV_Assert error.
  537. @code
  538. int main()
  539. {
  540. setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
  541. Stream stream; // A default size (10 MB) stack is allocated to this stream
  542. BufferPool pool(stream);
  543. GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
  544. GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
  545. mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
  546. }
  547. @endcode
  548. Since C++ local variables are destroyed in the reverse order of construction,
  549. the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
  550. and the corresponding memory is automatically returned to the pool for later usage.
  551. @code
  552. int main()
  553. {
  554. setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
  555. setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
  556. Stream stream1, stream2; // Each stream uses 1 stack
  557. BufferPool pool1(stream1), pool2(stream2);
  558. for (int i = 0; i < 10; i++)
  559. {
  560. GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
  561. GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
  562. GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
  563. GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
  564. d_src1.setTo(Scalar(i), stream1);
  565. d_src2.setTo(Scalar(i), stream2);
  566. cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
  567. cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
  568. // The order of destruction of the local variables is:
  569. // d_dst2 => d_src2 => d_dst1 => d_src1
  570. // LIFO rule is satisfied, this code runs without error
  571. }
  572. }
  573. @endcode
  574. */
  575. class CV_EXPORTS_W BufferPool
  576. {
  577. public:
  578. //! Gets the BufferPool for the given stream.
  579. CV_WRAP explicit BufferPool(Stream& stream);
  580. //! Allocates a new GpuMat of given size and type.
  581. CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
  582. // WARNING: unreachable code using Ninja
  583. #if defined _MSC_VER && _MSC_VER >= 1920
  584. #pragma warning(push)
  585. #pragma warning(disable: 4702)
  586. #endif
  587. //! Allocates a new GpuMat of given size and type.
  588. CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
  589. #if defined _MSC_VER && _MSC_VER >= 1920
  590. #pragma warning(pop)
  591. #endif
  592. //! Returns the allocator associated with the stream.
  593. CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
  594. private:
  595. Ptr<GpuMat::Allocator> allocator_;
  596. };
  597. //! BufferPool management (must be called before Stream creation)
  598. CV_EXPORTS_W void setBufferPoolUsage(bool on);
  599. CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
  600. //===================================================================================
  601. // HostMem
  602. //===================================================================================
  603. /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
  604. Its interface is also Mat-like but with additional memory type parameters.
  605. - **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
  606. uploading/downloading data from/to GPU.
  607. - **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
  608. address space, if supported.
  609. - **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
  610. used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
  611. utilization.
  612. @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
  613. Pinned Memory APIs* document or *CUDA C Programming Guide*.
  614. */
  615. class CV_EXPORTS_W HostMem
  616. {
  617. public:
  618. enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
  619. static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
  620. CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
  621. HostMem(const HostMem& m);
  622. CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
  623. CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
  624. //! creates from host memory with coping data
  625. CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
  626. ~HostMem();
  627. HostMem& operator =(const HostMem& m);
  628. //! swaps with other smart pointer
  629. CV_WRAP void swap(HostMem& b);
  630. //! returns deep copy of the matrix, i.e. the data is copied
  631. CV_WRAP HostMem clone() const;
  632. //! allocates new matrix data unless the matrix already has specified size and type.
  633. CV_WRAP void create(int rows, int cols, int type);
  634. void create(Size size, int type);
  635. //! creates alternative HostMem header for the same data, with different
  636. //! number of channels and/or different number of rows
  637. CV_WRAP HostMem reshape(int cn, int rows = 0) const;
  638. //! decrements reference counter and released memory if needed.
  639. void release();
  640. //! returns matrix header with disabled reference counting for HostMem data.
  641. CV_WRAP Mat createMatHeader() const;
  642. /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
  643. for it.
  644. This can be done only if memory was allocated with the SHARED flag and if it is supported by the
  645. hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
  646. eliminates an extra copy.
  647. */
  648. GpuMat createGpuMatHeader() const;
  649. // Please see cv::Mat for descriptions
  650. CV_WRAP bool isContinuous() const;
  651. CV_WRAP size_t elemSize() const;
  652. CV_WRAP size_t elemSize1() const;
  653. CV_WRAP int type() const;
  654. CV_WRAP int depth() const;
  655. CV_WRAP int channels() const;
  656. CV_WRAP size_t step1() const;
  657. CV_WRAP Size size() const;
  658. CV_WRAP bool empty() const;
  659. // Please see cv::Mat for descriptions
  660. int flags;
  661. int rows, cols;
  662. CV_PROP size_t step;
  663. uchar* data;
  664. int* refcount;
  665. uchar* datastart;
  666. const uchar* dataend;
  667. AllocType alloc_type;
  668. };
  669. /** @brief Page-locks the memory of matrix and maps it for the device(s).
  670. @param m Input matrix.
  671. */
  672. CV_EXPORTS_W void registerPageLocked(Mat& m);
  673. /** @brief Unmaps the memory of matrix and makes it pageable again.
  674. @param m Input matrix.
  675. */
  676. CV_EXPORTS_W void unregisterPageLocked(Mat& m);
  677. //===================================================================================
  678. // Stream
  679. //===================================================================================
  680. /** @brief This class encapsulates a queue of asynchronous calls.
  681. @note Currently, you may face problems if an operation is enqueued twice with different data. Some
  682. functions use the constant GPU memory, and next call may update the memory before the previous one
  683. has been finished. But calling different operations asynchronously is safe because each operation
  684. has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
  685. also safe.
  686. @note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
  687. @code
  688. void thread1()
  689. {
  690. cv::cuda::Stream stream1;
  691. cv::cuda::func1(..., stream1);
  692. }
  693. void thread2()
  694. {
  695. cv::cuda::Stream stream2;
  696. cv::cuda::func2(..., stream2);
  697. }
  698. @endcode
  699. @note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
  700. In multi-threading environment the stream objects must be passed explicitly (see previous note).
  701. */
  702. class CV_EXPORTS_W Stream
  703. {
  704. typedef void (Stream::*bool_type)() const;
  705. void this_type_does_not_support_comparisons() const {}
  706. public:
  707. typedef void (*StreamCallback)(int status, void* userData);
  708. //! creates a new asynchronous stream
  709. CV_WRAP Stream();
  710. //! creates a new asynchronous stream with custom allocator
  711. CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
  712. /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream
  713. @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and
  714. supports the same parameter values.
  715. @code
  716. // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking,
  717. // non-default CUDA stream
  718. cv::cuda::Stream cvStream(cudaStreamNonBlocking);
  719. @endcode
  720. */
  721. CV_WRAP Stream(const size_t cudaFlags);
  722. /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
  723. */
  724. CV_WRAP bool queryIfComplete() const;
  725. /** @brief Blocks the current CPU thread until all operations in the stream are complete.
  726. */
  727. CV_WRAP void waitForCompletion();
  728. /** @brief Makes a compute stream wait on an event.
  729. */
  730. CV_WRAP void waitEvent(const Event& event);
  731. /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
  732. completed.
  733. @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
  734. that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
  735. Callbacks without a mandated order (in independent streams) execute in undefined order and may be
  736. serialized.
  737. */
  738. void enqueueHostCallback(StreamCallback callback, void* userData);
  739. //! return Stream object for default CUDA stream
  740. CV_WRAP static Stream& Null();
  741. //! returns true if stream object is not default (!= 0)
  742. operator bool_type() const;
  743. //! return Pointer to CUDA stream
  744. CV_WRAP void* cudaPtr() const;
  745. class Impl;
  746. private:
  747. Ptr<Impl> impl_;
  748. Stream(const Ptr<Impl>& impl);
  749. friend struct StreamAccessor;
  750. friend class BufferPool;
  751. friend class DefaultDeviceInitializer;
  752. };
  753. /** @brief Bindings overload to create a Stream object from the address stored in an existing CUDA Runtime API stream pointer (cudaStream_t).
  754. @param cudaStreamMemoryAddress Memory address stored in a CUDA Runtime API stream pointer (cudaStream_t). The created Stream object does not perform any allocation or deallocation and simply wraps existing raw CUDA Runtime API stream pointer.
  755. @note Overload for generation of bindings only, not exported or intended for use internally from C++.
  756. */
  757. CV_EXPORTS_W Stream wrapStream(size_t cudaStreamMemoryAddress);
  758. class CV_EXPORTS_W Event
  759. {
  760. public:
  761. enum CreateFlags
  762. {
  763. DEFAULT = 0x00, /**< Default event flag */
  764. BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
  765. DISABLE_TIMING = 0x02, /**< Event will not record timing data */
  766. INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
  767. };
  768. CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
  769. //! records an event
  770. CV_WRAP void record(Stream& stream = Stream::Null());
  771. //! queries an event's status
  772. CV_WRAP bool queryIfComplete() const;
  773. //! waits for an event to complete
  774. CV_WRAP void waitForCompletion();
  775. //! computes the elapsed time between events
  776. CV_WRAP static float elapsedTime(const Event& start, const Event& end);
  777. class Impl;
  778. private:
  779. Ptr<Impl> impl_;
  780. Event(const Ptr<Impl>& impl);
  781. friend struct EventAccessor;
  782. };
  783. CV_ENUM_FLAGS(Event::CreateFlags)
  784. //! @} cudacore_struct
  785. //===================================================================================
  786. // Initialization & Info
  787. //===================================================================================
  788. //! @addtogroup cudacore_init
  789. //! @{
  790. /** @brief Returns the number of installed CUDA-enabled devices.
  791. Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
  792. this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
  793. returns -1.
  794. */
  795. CV_EXPORTS_W int getCudaEnabledDeviceCount();
  796. /** @brief Sets a device and initializes it for the current thread.
  797. @param device System index of a CUDA device starting with 0.
  798. If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
  799. */
  800. CV_EXPORTS_W void setDevice(int device);
  801. /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
  802. */
  803. CV_EXPORTS_W int getDevice();
  804. /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
  805. process.
  806. Any subsequent API call to this device will reinitialize the device.
  807. */
  808. CV_EXPORTS_W void resetDevice();
  809. /** @brief Enumeration providing CUDA computing features.
  810. */
  811. enum FeatureSet
  812. {
  813. FEATURE_SET_COMPUTE_10 = 10,
  814. FEATURE_SET_COMPUTE_11 = 11,
  815. FEATURE_SET_COMPUTE_12 = 12,
  816. FEATURE_SET_COMPUTE_13 = 13,
  817. FEATURE_SET_COMPUTE_20 = 20,
  818. FEATURE_SET_COMPUTE_21 = 21,
  819. FEATURE_SET_COMPUTE_30 = 30,
  820. FEATURE_SET_COMPUTE_32 = 32,
  821. FEATURE_SET_COMPUTE_35 = 35,
  822. FEATURE_SET_COMPUTE_50 = 50,
  823. GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
  824. SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
  825. NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
  826. WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
  827. DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
  828. };
  829. //! checks whether current device supports the given feature
  830. CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
  831. /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
  832. built for.
  833. According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
  834. capability can always be compiled to binary code of greater or equal compute capability".
  835. */
  836. class CV_EXPORTS_W TargetArchs
  837. {
  838. public:
  839. /** @brief The following method checks whether the module was built with the support of the given feature:
  840. @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
  841. */
  842. static bool builtWith(FeatureSet feature_set);
  843. /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
  844. code for the given architecture(s):
  845. @param major Major compute capability version.
  846. @param minor Minor compute capability version.
  847. */
  848. CV_WRAP static bool has(int major, int minor);
  849. CV_WRAP static bool hasPtx(int major, int minor);
  850. CV_WRAP static bool hasBin(int major, int minor);
  851. CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
  852. CV_WRAP static bool hasEqualOrGreater(int major, int minor);
  853. CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
  854. CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
  855. };
  856. /** @brief Class providing functionality for querying the specified GPU properties.
  857. */
  858. class CV_EXPORTS_W DeviceInfo
  859. {
  860. public:
  861. //! creates DeviceInfo object for the current GPU
  862. CV_WRAP DeviceInfo();
  863. /** @brief The constructors.
  864. @param device_id System index of the CUDA device starting with 0.
  865. Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
  866. constructs an object for the current device.
  867. */
  868. CV_WRAP DeviceInfo(int device_id);
  869. /** @brief Returns system index of the CUDA device starting with 0.
  870. */
  871. CV_WRAP int deviceID() const;
  872. //! ASCII string identifying device
  873. const char* name() const;
  874. //! global memory available on device in bytes
  875. CV_WRAP size_t totalGlobalMem() const;
  876. //! shared memory available per block in bytes
  877. CV_WRAP size_t sharedMemPerBlock() const;
  878. //! 32-bit registers available per block
  879. CV_WRAP int regsPerBlock() const;
  880. //! warp size in threads
  881. CV_WRAP int warpSize() const;
  882. //! maximum pitch in bytes allowed by memory copies
  883. CV_WRAP size_t memPitch() const;
  884. //! maximum number of threads per block
  885. CV_WRAP int maxThreadsPerBlock() const;
  886. //! maximum size of each dimension of a block
  887. CV_WRAP Vec3i maxThreadsDim() const;
  888. //! maximum size of each dimension of a grid
  889. CV_WRAP Vec3i maxGridSize() const;
  890. //! clock frequency in kilohertz
  891. CV_WRAP int clockRate() const;
  892. //! constant memory available on device in bytes
  893. CV_WRAP size_t totalConstMem() const;
  894. //! major compute capability
  895. CV_WRAP int majorVersion() const;
  896. //! minor compute capability
  897. CV_WRAP int minorVersion() const;
  898. //! alignment requirement for textures
  899. CV_WRAP size_t textureAlignment() const;
  900. //! pitch alignment requirement for texture references bound to pitched memory
  901. CV_WRAP size_t texturePitchAlignment() const;
  902. //! number of multiprocessors on device
  903. CV_WRAP int multiProcessorCount() const;
  904. //! specified whether there is a run time limit on kernels
  905. CV_WRAP bool kernelExecTimeoutEnabled() const;
  906. //! device is integrated as opposed to discrete
  907. CV_WRAP bool integrated() const;
  908. //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
  909. CV_WRAP bool canMapHostMemory() const;
  910. enum ComputeMode
  911. {
  912. ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
  913. ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
  914. ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
  915. ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
  916. };
  917. //! compute mode
  918. CV_WRAP DeviceInfo::ComputeMode computeMode() const;
  919. //! maximum 1D texture size
  920. CV_WRAP int maxTexture1D() const;
  921. //! maximum 1D mipmapped texture size
  922. CV_WRAP int maxTexture1DMipmap() const;
  923. //! maximum size for 1D textures bound to linear memory
  924. CV_WRAP int maxTexture1DLinear() const;
  925. //! maximum 2D texture dimensions
  926. CV_WRAP Vec2i maxTexture2D() const;
  927. //! maximum 2D mipmapped texture dimensions
  928. CV_WRAP Vec2i maxTexture2DMipmap() const;
  929. //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
  930. CV_WRAP Vec3i maxTexture2DLinear() const;
  931. //! maximum 2D texture dimensions if texture gather operations have to be performed
  932. CV_WRAP Vec2i maxTexture2DGather() const;
  933. //! maximum 3D texture dimensions
  934. CV_WRAP Vec3i maxTexture3D() const;
  935. //! maximum Cubemap texture dimensions
  936. CV_WRAP int maxTextureCubemap() const;
  937. //! maximum 1D layered texture dimensions
  938. CV_WRAP Vec2i maxTexture1DLayered() const;
  939. //! maximum 2D layered texture dimensions
  940. CV_WRAP Vec3i maxTexture2DLayered() const;
  941. //! maximum Cubemap layered texture dimensions
  942. CV_WRAP Vec2i maxTextureCubemapLayered() const;
  943. //! maximum 1D surface size
  944. CV_WRAP int maxSurface1D() const;
  945. //! maximum 2D surface dimensions
  946. CV_WRAP Vec2i maxSurface2D() const;
  947. //! maximum 3D surface dimensions
  948. CV_WRAP Vec3i maxSurface3D() const;
  949. //! maximum 1D layered surface dimensions
  950. CV_WRAP Vec2i maxSurface1DLayered() const;
  951. //! maximum 2D layered surface dimensions
  952. CV_WRAP Vec3i maxSurface2DLayered() const;
  953. //! maximum Cubemap surface dimensions
  954. CV_WRAP int maxSurfaceCubemap() const;
  955. //! maximum Cubemap layered surface dimensions
  956. CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
  957. //! alignment requirements for surfaces
  958. CV_WRAP size_t surfaceAlignment() const;
  959. //! device can possibly execute multiple kernels concurrently
  960. CV_WRAP bool concurrentKernels() const;
  961. //! device has ECC support enabled
  962. CV_WRAP bool ECCEnabled() const;
  963. //! PCI bus ID of the device
  964. CV_WRAP int pciBusID() const;
  965. //! PCI device ID of the device
  966. CV_WRAP int pciDeviceID() const;
  967. //! PCI domain ID of the device
  968. CV_WRAP int pciDomainID() const;
  969. //! true if device is a Tesla device using TCC driver, false otherwise
  970. CV_WRAP bool tccDriver() const;
  971. //! number of asynchronous engines
  972. CV_WRAP int asyncEngineCount() const;
  973. //! device shares a unified address space with the host
  974. CV_WRAP bool unifiedAddressing() const;
  975. //! peak memory clock frequency in kilohertz
  976. CV_WRAP int memoryClockRate() const;
  977. //! global memory bus width in bits
  978. CV_WRAP int memoryBusWidth() const;
  979. //! size of L2 cache in bytes
  980. CV_WRAP int l2CacheSize() const;
  981. //! maximum resident threads per multiprocessor
  982. CV_WRAP int maxThreadsPerMultiProcessor() const;
  983. //! gets free and total device memory
  984. CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
  985. CV_WRAP size_t freeMemory() const;
  986. CV_WRAP size_t totalMemory() const;
  987. /** @brief Provides information on CUDA feature support.
  988. @param feature_set Features to be checked. See cuda::FeatureSet.
  989. This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
  990. */
  991. bool supports(FeatureSet feature_set) const;
  992. /** @brief Checks the CUDA module and device compatibility.
  993. This function returns true if the CUDA module can be run on the specified device. Otherwise, it
  994. returns false .
  995. */
  996. CV_WRAP bool isCompatible() const;
  997. private:
  998. int device_id_;
  999. };
  1000. CV_EXPORTS_W void printCudaDeviceInfo(int device);
  1001. CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
  1002. /** @brief Converts an array to half precision floating number.
  1003. @param _src input array.
  1004. @param _dst output array.
  1005. @param stream Stream for the asynchronous version.
  1006. @sa convertFp16
  1007. */
  1008. CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
  1009. //! @} cudacore_init
  1010. }} // namespace cv { namespace cuda {
  1011. #include "opencv2/core/cuda.inl.hpp"
  1012. #endif /* OPENCV_CORE_CUDA_HPP */