arrays.pyx 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. """
  2. Cython implementations for internal ExtensionArrays.
  3. """
  4. cimport cython
  5. import numpy as np
  6. cimport numpy as cnp
  7. from cpython cimport PyErr_Clear
  8. from numpy cimport ndarray
  9. cnp.import_array()
  10. @cython.freelist(16)
  11. cdef class NDArrayBacked:
  12. """
  13. Implementing these methods in cython improves performance quite a bit.
  14. import pandas as pd
  15. from pandas._libs.arrays import NDArrayBacked as cls
  16. dti = pd.date_range("2016-01-01", periods=3)
  17. dta = dti._data
  18. arr = dta._ndarray
  19. obj = cls._simple_new(arr, arr.dtype)
  20. # for foo in [arr, dta, obj]: ...
  21. %timeit foo.copy()
  22. 299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference)
  23. 530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked
  24. 1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked
  25. 328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__
  26. 371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new
  27. %timeit foo.T
  28. 125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference)
  29. 226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked
  30. 911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked
  31. 215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new
  32. """
  33. # TODO: implement take in terms of cnp.PyArray_TakeFrom
  34. # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate
  35. # cdef:
  36. # readonly ndarray _ndarray
  37. # readonly object _dtype
  38. def __init__(self, ndarray values, object dtype):
  39. self._ndarray = values
  40. self._dtype = dtype
  41. @classmethod
  42. def _simple_new(cls, ndarray values, object dtype):
  43. cdef:
  44. NDArrayBacked obj
  45. obj = NDArrayBacked.__new__(cls)
  46. obj._ndarray = values
  47. obj._dtype = dtype
  48. return obj
  49. cpdef NDArrayBacked _from_backing_data(self, ndarray values):
  50. """
  51. Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
  52. This should round-trip:
  53. self == self._from_backing_data(self._ndarray)
  54. """
  55. # TODO: re-reuse simple_new if/when it can be cpdef
  56. cdef:
  57. NDArrayBacked obj
  58. obj = NDArrayBacked.__new__(type(self))
  59. obj._ndarray = values
  60. obj._dtype = self._dtype
  61. return obj
  62. cpdef __setstate__(self, state):
  63. if isinstance(state, dict):
  64. if "_data" in state:
  65. data = state.pop("_data")
  66. elif "_ndarray" in state:
  67. data = state.pop("_ndarray")
  68. else:
  69. raise ValueError # pragma: no cover
  70. self._ndarray = data
  71. self._dtype = state.pop("_dtype")
  72. for key, val in state.items():
  73. setattr(self, key, val)
  74. elif isinstance(state, tuple):
  75. if len(state) != 3:
  76. if len(state) == 1 and isinstance(state[0], dict):
  77. self.__setstate__(state[0])
  78. return
  79. raise NotImplementedError(state) # pragma: no cover
  80. data, dtype = state[:2]
  81. if isinstance(dtype, np.ndarray):
  82. dtype, data = data, dtype
  83. self._ndarray = data
  84. self._dtype = dtype
  85. if isinstance(state[2], dict):
  86. for key, val in state[2].items():
  87. setattr(self, key, val)
  88. else:
  89. raise NotImplementedError(state) # pragma: no cover
  90. else:
  91. raise NotImplementedError(state) # pragma: no cover
  92. def __len__(self) -> int:
  93. return len(self._ndarray)
  94. @property
  95. def shape(self):
  96. # object cast bc _ndarray.shape is npy_intp*
  97. return (<object>(self._ndarray)).shape
  98. @property
  99. def ndim(self) -> int:
  100. return self._ndarray.ndim
  101. @property
  102. def size(self) -> int:
  103. return self._ndarray.size
  104. @property
  105. def nbytes(self) -> int:
  106. return self._ndarray.nbytes
  107. def copy(self, order="C"):
  108. cdef:
  109. cnp.NPY_ORDER order_code
  110. int success
  111. success = cnp.PyArray_OrderConverter(order, &order_code)
  112. if not success:
  113. # clear exception so that we don't get a SystemError
  114. PyErr_Clear()
  115. # same message used by numpy
  116. msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')"
  117. raise ValueError(msg)
  118. res_values = cnp.PyArray_NewCopy(self._ndarray, order_code)
  119. return self._from_backing_data(res_values)
  120. def delete(self, loc, axis=0):
  121. res_values = np.delete(self._ndarray, loc, axis=axis)
  122. return self._from_backing_data(res_values)
  123. def swapaxes(self, axis1, axis2):
  124. res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2)
  125. return self._from_backing_data(res_values)
  126. # TODO: pass NPY_MAXDIMS equiv to axis=None?
  127. def repeat(self, repeats, axis: int | np.integer = 0):
  128. if axis is None:
  129. axis = 0
  130. res_values = cnp.PyArray_Repeat(self._ndarray, repeats, <int>axis)
  131. return self._from_backing_data(res_values)
  132. def reshape(self, *args, **kwargs):
  133. res_values = self._ndarray.reshape(*args, **kwargs)
  134. return self._from_backing_data(res_values)
  135. def ravel(self, order="C"):
  136. # cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order)
  137. # res_values = cnp.PyArray_Ravel(self._ndarray, order)
  138. res_values = self._ndarray.ravel(order)
  139. return self._from_backing_data(res_values)
  140. @property
  141. def T(self):
  142. res_values = self._ndarray.T
  143. return self._from_backing_data(res_values)
  144. def transpose(self, *axes):
  145. res_values = self._ndarray.transpose(*axes)
  146. return self._from_backing_data(res_values)