123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- import sys
- import numpy as np
- import pytest
- from pandas.compat import (
- IS64,
- PYPY,
- )
- from pandas.core.dtypes.common import (
- is_categorical_dtype,
- is_dtype_equal,
- is_object_dtype,
- )
- import pandas as pd
- from pandas import (
- Index,
- Series,
- )
- import pandas._testing as tm
- def test_isnull_notnull_docstrings():
- # GH#41855 make sure its clear these are aliases
- doc = pd.DataFrame.notnull.__doc__
- assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
- doc = pd.DataFrame.isnull.__doc__
- assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")
- doc = Series.notnull.__doc__
- assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
- doc = Series.isnull.__doc__
- assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")
- @pytest.mark.parametrize(
- "op_name, op",
- [
- ("add", "+"),
- ("sub", "-"),
- ("mul", "*"),
- ("mod", "%"),
- ("pow", "**"),
- ("truediv", "/"),
- ("floordiv", "//"),
- ],
- )
- def test_binary_ops_docstring(frame_or_series, op_name, op):
- # not using the all_arithmetic_functions fixture with _get_opstr
- # as _get_opstr is used internally in the dynamic implementation of the docstring
- klass = frame_or_series
- operand1 = klass.__name__.lower()
- operand2 = "other"
- expected_str = " ".join([operand1, op, operand2])
- assert expected_str in getattr(klass, op_name).__doc__
- # reverse version of the binary ops
- expected_str = " ".join([operand2, op, operand1])
- assert expected_str in getattr(klass, "r" + op_name).__doc__
- def test_ndarray_compat_properties(index_or_series_obj):
- obj = index_or_series_obj
- # Check that we work.
- for p in ["shape", "dtype", "T", "nbytes"]:
- assert getattr(obj, p, None) is not None
- # deprecated properties
- for p in ["strides", "itemsize", "base", "data"]:
- assert not hasattr(obj, p)
- msg = "can only convert an array of size 1 to a Python scalar"
- with pytest.raises(ValueError, match=msg):
- obj.item() # len > 1
- assert obj.ndim == 1
- assert obj.size == len(obj)
- assert Index([1]).item() == 1
- assert Series([1]).item() == 1
- @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
- def test_memory_usage(index_or_series_obj):
- obj = index_or_series_obj
- res = obj.memory_usage()
- res_deep = obj.memory_usage(deep=True)
- is_ser = isinstance(obj, Series)
- is_object = is_object_dtype(obj) or (
- isinstance(obj, Series) and is_object_dtype(obj.index)
- )
- is_categorical = is_categorical_dtype(obj.dtype) or (
- isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype)
- )
- is_object_string = is_dtype_equal(obj, "string[python]") or (
- is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
- )
- if len(obj) == 0:
- if isinstance(obj, Index):
- expected = 0
- else:
- expected = 108 if IS64 else 64
- assert res_deep == res == expected
- elif is_object or is_categorical or is_object_string:
- # only deep will pick them up
- assert res_deep > res
- else:
- assert res == res_deep
- # sys.getsizeof will call the .memory_usage with
- # deep=True, and add on some GC overhead
- diff = res_deep - sys.getsizeof(obj)
- assert abs(diff) < 100
- def test_memory_usage_components_series(series_with_simple_index):
- series = series_with_simple_index
- total_usage = series.memory_usage(index=True)
- non_index_usage = series.memory_usage(index=False)
- index_usage = series.index.memory_usage()
- assert total_usage == non_index_usage + index_usage
- @pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
- def test_memory_usage_components_narrow_series(dtype):
- series = tm.make_rand_series(name="a", dtype=dtype)
- total_usage = series.memory_usage(index=True)
- non_index_usage = series.memory_usage(index=False)
- index_usage = series.index.memory_usage()
- assert total_usage == non_index_usage + index_usage
- def test_searchsorted(request, index_or_series_obj):
- # numpy.searchsorted calls obj.searchsorted under the hood.
- # See gh-12238
- obj = index_or_series_obj
- if isinstance(obj, pd.MultiIndex):
- # See gh-14833
- request.node.add_marker(
- pytest.mark.xfail(
- reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
- )
- )
- elif obj.dtype.kind == "c" and isinstance(obj, Index):
- # TODO: Should Series cases also raise? Looks like they use numpy
- # comparison semantics https://github.com/numpy/numpy/issues/15981
- mark = pytest.mark.xfail(reason="complex objects are not comparable")
- request.node.add_marker(mark)
- max_obj = max(obj, default=0)
- index = np.searchsorted(obj, max_obj)
- assert 0 <= index <= len(obj)
- index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
- assert 0 <= index <= len(obj)
- def test_access_by_position(index_flat):
- index = index_flat
- if len(index) == 0:
- pytest.skip("Test doesn't make sense on empty data")
- series = Series(index)
- assert index[0] == series.iloc[0]
- assert index[5] == series.iloc[5]
- assert index[-1] == series.iloc[-1]
- size = len(index)
- assert index[-1] == index[size - 1]
- msg = f"index {size} is out of bounds for axis 0 with size {size}"
- if is_dtype_equal(index.dtype, "string[pyarrow]"):
- msg = "index out of bounds"
- with pytest.raises(IndexError, match=msg):
- index[size]
- msg = "single positional indexer is out-of-bounds"
- with pytest.raises(IndexError, match=msg):
- series.iloc[size]
|