dlpack.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /*!
  2. * Copyright (c) 2017 by Contributors
  3. * \file dlpack.h
  4. * \brief The common header of DLPack.
  5. */
  6. #ifndef DLPACK_DLPACK_H_
  7. #define DLPACK_DLPACK_H_
  8. #ifdef __cplusplus
  9. #define DLPACK_EXTERN_C extern "C"
  10. #else
  11. #define DLPACK_EXTERN_C
  12. #endif
  13. /*! \brief The current version of dlpack */
  14. #define DLPACK_VERSION 60
  15. /*! \brief DLPACK_DLL prefix for windows */
  16. #ifdef _WIN32
  17. #ifdef DLPACK_EXPORTS
  18. #define DLPACK_DLL __declspec(dllexport)
  19. #else
  20. #define DLPACK_DLL __declspec(dllimport)
  21. #endif
  22. #else
  23. #define DLPACK_DLL
  24. #endif
  25. #include <stddef.h>
  26. #include <stdint.h>
  27. #ifdef __cplusplus
  28. extern "C" {
  29. #endif
  30. /*!
  31. * \brief The device type in DLDevice.
  32. */
  33. typedef enum {
  34. /*! \brief CPU device */
  35. kDLCPU = 1,
  36. /*! \brief CUDA GPU device */
  37. kDLCUDA = 2,
  38. /*!
  39. * \brief Pinned CUDA CPU memory by cudaMallocHost
  40. */
  41. kDLCUDAHost = 3,
  42. /*! \brief OpenCL devices. */
  43. kDLOpenCL = 4,
  44. /*! \brief Vulkan buffer for next generation graphics. */
  45. kDLVulkan = 7,
  46. /*! \brief Metal for Apple GPU. */
  47. kDLMetal = 8,
  48. /*! \brief Verilog simulator buffer */
  49. kDLVPI = 9,
  50. /*! \brief ROCm GPUs for AMD GPUs */
  51. kDLROCM = 10,
  52. /*!
  53. * \brief Pinned ROCm CPU memory allocated by hipMallocHost
  54. */
  55. kDLROCMHost = 11,
  56. /*!
  57. * \brief Reserved extension device type,
  58. * used for quickly test extension device
  59. * The semantics can differ depending on the implementation.
  60. */
  61. kDLExtDev = 12,
  62. /*!
  63. * \brief CUDA managed/unified memory allocated by cudaMallocManaged
  64. */
  65. kDLCUDAManaged = 13,
  66. } DLDeviceType;
  67. /*!
  68. * \brief A Device for Tensor and operator.
  69. */
  70. typedef struct {
  71. /*! \brief The device type used in the device. */
  72. DLDeviceType device_type;
  73. /*!
  74. * \brief The device index.
  75. * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
  76. */
  77. int device_id;
  78. } DLDevice;
  79. /*!
  80. * \brief The type code options DLDataType.
  81. */
  82. typedef enum {
  83. /*! \brief signed integer */
  84. kDLInt = 0U,
  85. /*! \brief unsigned integer */
  86. kDLUInt = 1U,
  87. /*! \brief IEEE floating point */
  88. kDLFloat = 2U,
  89. /*!
  90. * \brief Opaque handle type, reserved for testing purposes.
  91. * Frameworks need to agree on the handle data type for the exchange to be
  92. * well-defined.
  93. */
  94. kDLOpaqueHandle = 3U,
  95. /*! \brief bfloat16 */
  96. kDLBfloat = 4U,
  97. /*!
  98. * \brief complex number
  99. * (C/C++/Python layout: compact struct per complex number)
  100. */
  101. kDLComplex = 5U,
  102. } DLDataTypeCode;
  103. /*!
  104. * \brief The data type the tensor can hold.
  105. *
  106. * Examples
  107. * - float: type_code = 2, bits = 32, lanes=1
  108. * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
  109. * - int8: type_code = 0, bits = 8, lanes=1
  110. * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  111. */
  112. typedef struct {
  113. /*!
  114. * \brief Type code of base types.
  115. * We keep it uint8_t instead of DLDataTypeCode for minimal memory
  116. * footprint, but the value should be one of DLDataTypeCode enum values.
  117. * */
  118. uint8_t code;
  119. /*!
  120. * \brief Number of bits, common choices are 8, 16, 32.
  121. */
  122. uint8_t bits;
  123. /*! \brief Number of lanes in the type, used for vector types. */
  124. uint16_t lanes;
  125. } DLDataType;
  126. /*!
  127. * \brief Plain C Tensor object, does not manage memory.
  128. */
  129. typedef struct {
  130. /*!
  131. * \brief The opaque data pointer points to the allocated data. This will be
  132. * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
  133. * aligned to 256 bytes as in CUDA.
  134. *
  135. * For given DLTensor, the size of memory required to store the contents of
  136. * data is calculated as follows:
  137. *
  138. * \code{.c}
  139. * static inline size_t GetDataSize(const DLTensor* t) {
  140. * size_t size = 1;
  141. * for (tvm_index_t i = 0; i < t->ndim; ++i) {
  142. * size *= t->shape[i];
  143. * }
  144. * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
  145. * return size;
  146. * }
  147. * \endcode
  148. */
  149. void* data;
  150. /*! \brief The device of the tensor */
  151. DLDevice device;
  152. /*! \brief Number of dimensions */
  153. int ndim;
  154. /*! \brief The data type of the pointer*/
  155. DLDataType dtype;
  156. /*! \brief The shape of the tensor */
  157. int64_t* shape;
  158. /*!
  159. * \brief strides of the tensor (in number of elements, not bytes)
  160. * can be NULL, indicating tensor is compact and row-majored.
  161. */
  162. int64_t* strides;
  163. /*! \brief The offset in bytes to the beginning pointer to data */
  164. uint64_t byte_offset;
  165. } DLTensor;
  166. /*!
  167. * \brief C Tensor object, manage memory of DLTensor. This data structure is
  168. * intended to facilitate the borrowing of DLTensor by another framework. It is
  169. * not meant to transfer the tensor. When the borrowing framework doesn't need
  170. * the tensor, it should call the deleter to notify the host that the resource
  171. * is no longer needed.
  172. */
  173. typedef struct DLManagedTensor {
  174. /*! \brief DLTensor which is being memory managed */
  175. DLTensor dl_tensor;
  176. /*! \brief the context of the original host framework of DLManagedTensor in
  177. * which DLManagedTensor is used in the framework. It can also be NULL.
  178. */
  179. void* manager_ctx;
  180. /*! \brief Destructor signature void (*)(void*) - this should be called
  181. * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
  182. * if there is no way for the caller to provide a reasonable destructor.
  183. * The destructors deletes the argument self as well.
  184. */
  185. void (*deleter)(struct DLManagedTensor* self);
  186. } DLManagedTensor;
  187. #ifdef __cplusplus
  188. } // DLPACK_EXTERN_C
  189. #endif
  190. #endif // DLPACK_DLPACK_H_