reshape.pyx 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. cimport cython
  2. from cython cimport Py_ssize_t
  3. from numpy cimport (
  4. int64_t,
  5. ndarray,
  6. uint8_t,
  7. )
  8. import numpy as np
  9. cimport numpy as cnp
  10. cnp.import_array()
  11. from pandas._libs.dtypes cimport numeric_object_t
  12. from pandas._libs.lib cimport c_is_list_like
  13. @cython.wraparound(False)
  14. @cython.boundscheck(False)
  15. def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask,
  16. Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
  17. numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
  18. """
  19. Transform long values to wide new_values.
  20. Parameters
  21. ----------
  22. values : typed ndarray
  23. mask : np.ndarray[bool]
  24. stride : int
  25. length : int
  26. width : int
  27. new_values : np.ndarray[bool]
  28. result array
  29. new_mask : np.ndarray[bool]
  30. result mask
  31. """
  32. cdef:
  33. Py_ssize_t i, j, w, nulls, s, offset
  34. if numeric_object_t is not object:
  35. # evaluated at compile-time
  36. with nogil:
  37. for i in range(stride):
  38. nulls = 0
  39. for j in range(length):
  40. for w in range(width):
  41. offset = j * width + w
  42. if mask[offset]:
  43. s = i * width + w
  44. new_values[j, s] = values[offset - nulls, i]
  45. new_mask[j, s] = 1
  46. else:
  47. nulls += 1
  48. else:
  49. # object-dtype, identical to above but we cannot use nogil
  50. for i in range(stride):
  51. nulls = 0
  52. for j in range(length):
  53. for w in range(width):
  54. offset = j * width + w
  55. if mask[offset]:
  56. s = i * width + w
  57. new_values[j, s] = values[offset - nulls, i]
  58. new_mask[j, s] = 1
  59. else:
  60. nulls += 1
  61. @cython.wraparound(False)
  62. @cython.boundscheck(False)
  63. def explode(ndarray[object] values):
  64. """
  65. transform array list-likes to long form
  66. preserve non-list entries
  67. Parameters
  68. ----------
  69. values : ndarray[object]
  70. Returns
  71. -------
  72. ndarray[object]
  73. result
  74. ndarray[int64_t]
  75. counts
  76. """
  77. cdef:
  78. Py_ssize_t i, j, count, n
  79. object v
  80. ndarray[object] result
  81. ndarray[int64_t] counts
  82. # find the resulting len
  83. n = len(values)
  84. counts = np.zeros(n, dtype="int64")
  85. for i in range(n):
  86. v = values[i]
  87. if c_is_list_like(v, True):
  88. if len(v):
  89. counts[i] += len(v)
  90. else:
  91. # empty list-like, use a nan marker
  92. counts[i] += 1
  93. else:
  94. counts[i] += 1
  95. result = np.empty(counts.sum(), dtype="object")
  96. count = 0
  97. for i in range(n):
  98. v = values[i]
  99. if c_is_list_like(v, True):
  100. if len(v):
  101. v = list(v)
  102. for j in range(len(v)):
  103. result[count] = v[j]
  104. count += 1
  105. else:
  106. # empty list-like, use a nan marker
  107. result[count] = np.nan
  108. count += 1
  109. else:
  110. # replace with the existing scalar
  111. result[count] = v
  112. count += 1
  113. return result, counts