test_misc.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. import sys
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import (
  5. IS64,
  6. PYPY,
  7. )
  8. from pandas.core.dtypes.common import (
  9. is_categorical_dtype,
  10. is_dtype_equal,
  11. is_object_dtype,
  12. )
  13. import pandas as pd
  14. from pandas import (
  15. Index,
  16. Series,
  17. )
  18. import pandas._testing as tm
  19. def test_isnull_notnull_docstrings():
  20. # GH#41855 make sure its clear these are aliases
  21. doc = pd.DataFrame.notnull.__doc__
  22. assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
  23. doc = pd.DataFrame.isnull.__doc__
  24. assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")
  25. doc = Series.notnull.__doc__
  26. assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
  27. doc = Series.isnull.__doc__
  28. assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")
  29. @pytest.mark.parametrize(
  30. "op_name, op",
  31. [
  32. ("add", "+"),
  33. ("sub", "-"),
  34. ("mul", "*"),
  35. ("mod", "%"),
  36. ("pow", "**"),
  37. ("truediv", "/"),
  38. ("floordiv", "//"),
  39. ],
  40. )
  41. def test_binary_ops_docstring(frame_or_series, op_name, op):
  42. # not using the all_arithmetic_functions fixture with _get_opstr
  43. # as _get_opstr is used internally in the dynamic implementation of the docstring
  44. klass = frame_or_series
  45. operand1 = klass.__name__.lower()
  46. operand2 = "other"
  47. expected_str = " ".join([operand1, op, operand2])
  48. assert expected_str in getattr(klass, op_name).__doc__
  49. # reverse version of the binary ops
  50. expected_str = " ".join([operand2, op, operand1])
  51. assert expected_str in getattr(klass, "r" + op_name).__doc__
  52. def test_ndarray_compat_properties(index_or_series_obj):
  53. obj = index_or_series_obj
  54. # Check that we work.
  55. for p in ["shape", "dtype", "T", "nbytes"]:
  56. assert getattr(obj, p, None) is not None
  57. # deprecated properties
  58. for p in ["strides", "itemsize", "base", "data"]:
  59. assert not hasattr(obj, p)
  60. msg = "can only convert an array of size 1 to a Python scalar"
  61. with pytest.raises(ValueError, match=msg):
  62. obj.item() # len > 1
  63. assert obj.ndim == 1
  64. assert obj.size == len(obj)
  65. assert Index([1]).item() == 1
  66. assert Series([1]).item() == 1
  67. @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
  68. def test_memory_usage(index_or_series_obj):
  69. obj = index_or_series_obj
  70. res = obj.memory_usage()
  71. res_deep = obj.memory_usage(deep=True)
  72. is_ser = isinstance(obj, Series)
  73. is_object = is_object_dtype(obj) or (
  74. isinstance(obj, Series) and is_object_dtype(obj.index)
  75. )
  76. is_categorical = is_categorical_dtype(obj.dtype) or (
  77. isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype)
  78. )
  79. is_object_string = is_dtype_equal(obj, "string[python]") or (
  80. is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
  81. )
  82. if len(obj) == 0:
  83. if isinstance(obj, Index):
  84. expected = 0
  85. else:
  86. expected = 108 if IS64 else 64
  87. assert res_deep == res == expected
  88. elif is_object or is_categorical or is_object_string:
  89. # only deep will pick them up
  90. assert res_deep > res
  91. else:
  92. assert res == res_deep
  93. # sys.getsizeof will call the .memory_usage with
  94. # deep=True, and add on some GC overhead
  95. diff = res_deep - sys.getsizeof(obj)
  96. assert abs(diff) < 100
  97. def test_memory_usage_components_series(series_with_simple_index):
  98. series = series_with_simple_index
  99. total_usage = series.memory_usage(index=True)
  100. non_index_usage = series.memory_usage(index=False)
  101. index_usage = series.index.memory_usage()
  102. assert total_usage == non_index_usage + index_usage
  103. @pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
  104. def test_memory_usage_components_narrow_series(dtype):
  105. series = tm.make_rand_series(name="a", dtype=dtype)
  106. total_usage = series.memory_usage(index=True)
  107. non_index_usage = series.memory_usage(index=False)
  108. index_usage = series.index.memory_usage()
  109. assert total_usage == non_index_usage + index_usage
  110. def test_searchsorted(request, index_or_series_obj):
  111. # numpy.searchsorted calls obj.searchsorted under the hood.
  112. # See gh-12238
  113. obj = index_or_series_obj
  114. if isinstance(obj, pd.MultiIndex):
  115. # See gh-14833
  116. request.node.add_marker(
  117. pytest.mark.xfail(
  118. reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
  119. )
  120. )
  121. elif obj.dtype.kind == "c" and isinstance(obj, Index):
  122. # TODO: Should Series cases also raise? Looks like they use numpy
  123. # comparison semantics https://github.com/numpy/numpy/issues/15981
  124. mark = pytest.mark.xfail(reason="complex objects are not comparable")
  125. request.node.add_marker(mark)
  126. max_obj = max(obj, default=0)
  127. index = np.searchsorted(obj, max_obj)
  128. assert 0 <= index <= len(obj)
  129. index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
  130. assert 0 <= index <= len(obj)
  131. def test_access_by_position(index_flat):
  132. index = index_flat
  133. if len(index) == 0:
  134. pytest.skip("Test doesn't make sense on empty data")
  135. series = Series(index)
  136. assert index[0] == series.iloc[0]
  137. assert index[5] == series.iloc[5]
  138. assert index[-1] == series.iloc[-1]
  139. size = len(index)
  140. assert index[-1] == index[size - 1]
  141. msg = f"index {size} is out of bounds for axis 0 with size {size}"
  142. if is_dtype_equal(index.dtype, "string[pyarrow]"):
  143. msg = "index out of bounds"
  144. with pytest.raises(IndexError, match=msg):
  145. index[size]
  146. msg = "single positional indexer is out-of-bounds"
  147. with pytest.raises(IndexError, match=msg):
  148. series.iloc[size]