missing.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.api.types import is_sparse
  6. from pandas.tests.extension.base.base import BaseExtensionTests
  7. class BaseMissingTests(BaseExtensionTests):
  8. def test_isna(self, data_missing):
  9. expected = np.array([True, False])
  10. result = pd.isna(data_missing)
  11. tm.assert_numpy_array_equal(result, expected)
  12. result = pd.Series(data_missing).isna()
  13. expected = pd.Series(expected)
  14. self.assert_series_equal(result, expected)
  15. # GH 21189
  16. result = pd.Series(data_missing).drop([0, 1]).isna()
  17. expected = pd.Series([], dtype=bool)
  18. self.assert_series_equal(result, expected)
  19. @pytest.mark.parametrize("na_func", ["isna", "notna"])
  20. def test_isna_returns_copy(self, data_missing, na_func):
  21. result = pd.Series(data_missing)
  22. expected = result.copy()
  23. mask = getattr(result, na_func)()
  24. if is_sparse(mask):
  25. mask = np.array(mask)
  26. mask[:] = True
  27. self.assert_series_equal(result, expected)
  28. def test_dropna_array(self, data_missing):
  29. result = data_missing.dropna()
  30. expected = data_missing[[1]]
  31. self.assert_extension_array_equal(result, expected)
  32. def test_dropna_series(self, data_missing):
  33. ser = pd.Series(data_missing)
  34. result = ser.dropna()
  35. expected = ser.iloc[[1]]
  36. self.assert_series_equal(result, expected)
  37. def test_dropna_frame(self, data_missing):
  38. df = pd.DataFrame({"A": data_missing})
  39. # defaults
  40. result = df.dropna()
  41. expected = df.iloc[[1]]
  42. self.assert_frame_equal(result, expected)
  43. # axis = 1
  44. result = df.dropna(axis="columns")
  45. expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
  46. self.assert_frame_equal(result, expected)
  47. # multiple
  48. df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
  49. result = df.dropna()
  50. expected = df.iloc[:0]
  51. self.assert_frame_equal(result, expected)
  52. def test_fillna_scalar(self, data_missing):
  53. valid = data_missing[1]
  54. result = data_missing.fillna(valid)
  55. expected = data_missing.fillna(valid)
  56. self.assert_extension_array_equal(result, expected)
  57. def test_fillna_limit_pad(self, data_missing):
  58. arr = data_missing.take([1, 0, 0, 0, 1])
  59. result = pd.Series(arr).fillna(method="ffill", limit=2)
  60. expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
  61. self.assert_series_equal(result, expected)
  62. def test_fillna_limit_backfill(self, data_missing):
  63. arr = data_missing.take([1, 0, 0, 0, 1])
  64. result = pd.Series(arr).fillna(method="backfill", limit=2)
  65. expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
  66. self.assert_series_equal(result, expected)
  67. def test_fillna_no_op_returns_copy(self, data):
  68. data = data[~data.isna()]
  69. valid = data[0]
  70. result = data.fillna(valid)
  71. assert result is not data
  72. self.assert_extension_array_equal(result, data)
  73. result = data.fillna(method="backfill")
  74. assert result is not data
  75. self.assert_extension_array_equal(result, data)
  76. def test_fillna_series(self, data_missing):
  77. fill_value = data_missing[1]
  78. ser = pd.Series(data_missing)
  79. result = ser.fillna(fill_value)
  80. expected = pd.Series(
  81. data_missing._from_sequence(
  82. [fill_value, fill_value], dtype=data_missing.dtype
  83. )
  84. )
  85. self.assert_series_equal(result, expected)
  86. # Fill with a series
  87. result = ser.fillna(expected)
  88. self.assert_series_equal(result, expected)
  89. # Fill with a series not affecting the missing values
  90. result = ser.fillna(ser)
  91. self.assert_series_equal(result, ser)
  92. def test_fillna_series_method(self, data_missing, fillna_method):
  93. fill_value = data_missing[1]
  94. if fillna_method == "ffill":
  95. data_missing = data_missing[::-1]
  96. result = pd.Series(data_missing).fillna(method=fillna_method)
  97. expected = pd.Series(
  98. data_missing._from_sequence(
  99. [fill_value, fill_value], dtype=data_missing.dtype
  100. )
  101. )
  102. self.assert_series_equal(result, expected)
  103. def test_fillna_frame(self, data_missing):
  104. fill_value = data_missing[1]
  105. result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
  106. expected = pd.DataFrame(
  107. {
  108. "A": data_missing._from_sequence(
  109. [fill_value, fill_value], dtype=data_missing.dtype
  110. ),
  111. "B": [1, 2],
  112. }
  113. )
  114. self.assert_frame_equal(result, expected)
  115. def test_fillna_fill_other(self, data):
  116. result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
  117. expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
  118. self.assert_frame_equal(result, expected)
  119. def test_use_inf_as_na_no_effect(self, data_missing):
  120. ser = pd.Series(data_missing)
  121. expected = ser.isna()
  122. with pd.option_context("mode.use_inf_as_na", True):
  123. result = ser.isna()
  124. self.assert_series_equal(result, expected)