test_engines.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. import re
  2. import numpy as np
  3. import pytest
  4. from pandas._libs import index as libindex
  5. import pandas as pd
  6. @pytest.fixture(
  7. params=[
  8. (libindex.Int64Engine, np.int64),
  9. (libindex.Int32Engine, np.int32),
  10. (libindex.Int16Engine, np.int16),
  11. (libindex.Int8Engine, np.int8),
  12. (libindex.UInt64Engine, np.uint64),
  13. (libindex.UInt32Engine, np.uint32),
  14. (libindex.UInt16Engine, np.uint16),
  15. (libindex.UInt8Engine, np.uint8),
  16. (libindex.Float64Engine, np.float64),
  17. (libindex.Float32Engine, np.float32),
  18. ],
  19. ids=lambda x: x[0].__name__,
  20. )
  21. def numeric_indexing_engine_type_and_dtype(request):
  22. return request.param
  23. class TestDatetimeEngine:
  24. @pytest.mark.parametrize(
  25. "scalar",
  26. [
  27. pd.Timedelta(pd.Timestamp("2016-01-01").asm8.view("m8[ns]")),
  28. pd.Timestamp("2016-01-01")._value,
  29. pd.Timestamp("2016-01-01").to_pydatetime(),
  30. pd.Timestamp("2016-01-01").to_datetime64(),
  31. ],
  32. )
  33. def test_not_contains_requires_timestamp(self, scalar):
  34. dti1 = pd.date_range("2016-01-01", periods=3)
  35. dti2 = dti1.insert(1, pd.NaT) # non-monotonic
  36. dti3 = dti1.insert(3, dti1[0]) # non-unique
  37. dti4 = pd.date_range("2016-01-01", freq="ns", periods=2_000_000)
  38. dti5 = dti4.insert(0, dti4[0]) # over size threshold, not unique
  39. msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))])
  40. for dti in [dti1, dti2, dti3, dti4, dti5]:
  41. with pytest.raises(TypeError, match=msg):
  42. scalar in dti._engine
  43. with pytest.raises(KeyError, match=msg):
  44. dti._engine.get_loc(scalar)
  45. class TestTimedeltaEngine:
  46. @pytest.mark.parametrize(
  47. "scalar",
  48. [
  49. pd.Timestamp(pd.Timedelta(days=42).asm8.view("datetime64[ns]")),
  50. pd.Timedelta(days=42)._value,
  51. pd.Timedelta(days=42).to_pytimedelta(),
  52. pd.Timedelta(days=42).to_timedelta64(),
  53. ],
  54. )
  55. def test_not_contains_requires_timedelta(self, scalar):
  56. tdi1 = pd.timedelta_range("42 days", freq="9h", periods=1234)
  57. tdi2 = tdi1.insert(1, pd.NaT) # non-monotonic
  58. tdi3 = tdi1.insert(3, tdi1[0]) # non-unique
  59. tdi4 = pd.timedelta_range("42 days", freq="ns", periods=2_000_000)
  60. tdi5 = tdi4.insert(0, tdi4[0]) # over size threshold, not unique
  61. msg = "|".join([re.escape(str(scalar)), re.escape(repr(scalar))])
  62. for tdi in [tdi1, tdi2, tdi3, tdi4, tdi5]:
  63. with pytest.raises(TypeError, match=msg):
  64. scalar in tdi._engine
  65. with pytest.raises(KeyError, match=msg):
  66. tdi._engine.get_loc(scalar)
  67. class TestNumericEngine:
  68. def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
  69. engine_type, dtype = numeric_indexing_engine_type_and_dtype
  70. num = 1000
  71. arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
  72. # monotonic increasing
  73. engine = engine_type(arr)
  74. assert engine.is_monotonic_increasing is True
  75. assert engine.is_monotonic_decreasing is False
  76. # monotonic decreasing
  77. engine = engine_type(arr[::-1])
  78. assert engine.is_monotonic_increasing is False
  79. assert engine.is_monotonic_decreasing is True
  80. # neither monotonic increasing or decreasing
  81. arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
  82. engine = engine_type(arr[::-1])
  83. assert engine.is_monotonic_increasing is False
  84. assert engine.is_monotonic_decreasing is False
  85. def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
  86. engine_type, dtype = numeric_indexing_engine_type_and_dtype
  87. # unique
  88. arr = np.array([1, 3, 2], dtype=dtype)
  89. engine = engine_type(arr)
  90. assert engine.is_unique is True
  91. # not unique
  92. arr = np.array([1, 2, 1], dtype=dtype)
  93. engine = engine_type(arr)
  94. assert engine.is_unique is False
  95. def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
  96. engine_type, dtype = numeric_indexing_engine_type_and_dtype
  97. # unique
  98. arr = np.array([1, 2, 3], dtype=dtype)
  99. engine = engine_type(arr)
  100. assert engine.get_loc(2) == 1
  101. # monotonic
  102. num = 1000
  103. arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
  104. engine = engine_type(arr)
  105. assert engine.get_loc(2) == slice(1000, 2000)
  106. # not monotonic
  107. arr = np.array([1, 2, 3] * num, dtype=dtype)
  108. engine = engine_type(arr)
  109. expected = np.array([False, True, False] * num, dtype=bool)
  110. result = engine.get_loc(2)
  111. assert (result == expected).all()
  112. class TestObjectEngine:
  113. engine_type = libindex.ObjectEngine
  114. dtype = np.object_
  115. values = list("abc")
  116. def test_is_monotonic(self):
  117. num = 1000
  118. arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)
  119. # monotonic increasing
  120. engine = self.engine_type(arr)
  121. assert engine.is_monotonic_increasing is True
  122. assert engine.is_monotonic_decreasing is False
  123. # monotonic decreasing
  124. engine = self.engine_type(arr[::-1])
  125. assert engine.is_monotonic_increasing is False
  126. assert engine.is_monotonic_decreasing is True
  127. # neither monotonic increasing or decreasing
  128. arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
  129. engine = self.engine_type(arr[::-1])
  130. assert engine.is_monotonic_increasing is False
  131. assert engine.is_monotonic_decreasing is False
  132. def test_is_unique(self):
  133. # unique
  134. arr = np.array(self.values, dtype=self.dtype)
  135. engine = self.engine_type(arr)
  136. assert engine.is_unique is True
  137. # not unique
  138. arr = np.array(["a", "b", "a"], dtype=self.dtype)
  139. engine = self.engine_type(arr)
  140. assert engine.is_unique is False
  141. def test_get_loc(self):
  142. # unique
  143. arr = np.array(self.values, dtype=self.dtype)
  144. engine = self.engine_type(arr)
  145. assert engine.get_loc("b") == 1
  146. # monotonic
  147. num = 1000
  148. arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
  149. engine = self.engine_type(arr)
  150. assert engine.get_loc("b") == slice(1000, 2000)
  151. # not monotonic
  152. arr = np.array(self.values * num, dtype=self.dtype)
  153. engine = self.engine_type(arr)
  154. expected = np.array([False, True, False] * num, dtype=bool)
  155. result = engine.get_loc("b")
  156. assert (result == expected).all()