test_missing.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import MultiIndex
  5. import pandas._testing as tm
  6. def test_fillna(idx):
  7. # GH 11343
  8. msg = "isna is not defined for MultiIndex"
  9. with pytest.raises(NotImplementedError, match=msg):
  10. idx.fillna(idx[0])
  11. def test_dropna():
  12. # GH 6194
  13. idx = MultiIndex.from_arrays(
  14. [
  15. [1, np.nan, 3, np.nan, 5],
  16. [1, 2, np.nan, np.nan, 5],
  17. ["a", "b", "c", np.nan, "e"],
  18. ]
  19. )
  20. exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
  21. tm.assert_index_equal(idx.dropna(), exp)
  22. tm.assert_index_equal(idx.dropna(how="any"), exp)
  23. exp = MultiIndex.from_arrays(
  24. [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
  25. )
  26. tm.assert_index_equal(idx.dropna(how="all"), exp)
  27. msg = "invalid how option: xxx"
  28. with pytest.raises(ValueError, match=msg):
  29. idx.dropna(how="xxx")
  30. # GH26408
  31. # test if missing values are dropped for multiindex constructed
  32. # from codes and values
  33. idx = MultiIndex(
  34. levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
  35. codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
  36. )
  37. expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
  38. tm.assert_index_equal(idx.dropna(), expected)
  39. tm.assert_index_equal(idx.dropna(how="any"), expected)
  40. expected = MultiIndex.from_arrays(
  41. [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
  42. )
  43. tm.assert_index_equal(idx.dropna(how="all"), expected)
  44. def test_nulls(idx):
  45. # this is really a smoke test for the methods
  46. # as these are adequately tested for function elsewhere
  47. msg = "isna is not defined for MultiIndex"
  48. with pytest.raises(NotImplementedError, match=msg):
  49. idx.isna()
  50. @pytest.mark.xfail(reason="isna is not defined for MultiIndex")
  51. def test_hasnans_isnans(idx):
  52. # GH 11343, added tests for hasnans / isnans
  53. index = idx.copy()
  54. # cases in indices doesn't include NaN
  55. expected = np.array([False] * len(index), dtype=bool)
  56. tm.assert_numpy_array_equal(index._isnan, expected)
  57. assert index.hasnans is False
  58. index = idx.copy()
  59. values = index.values
  60. values[1] = np.nan
  61. index = type(idx)(values)
  62. expected = np.array([False] * len(index), dtype=bool)
  63. expected[1] = True
  64. tm.assert_numpy_array_equal(index._isnan, expected)
  65. assert index.hasnans is True
  66. def test_nan_stays_float():
  67. # GH 7031
  68. idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
  69. idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
  70. idxm = idx0.join(idx1, how="outer")
  71. assert pd.isna(idx0.get_level_values(1)).all()
  72. # the following failed in 0.14.1
  73. assert pd.isna(idxm.get_level_values(1)[:-1]).all()
  74. df0 = pd.DataFrame([[1, 2]], index=idx0)
  75. df1 = pd.DataFrame([[3, 4]], index=idx1)
  76. dfm = df0 - df1
  77. assert pd.isna(df0.index.get_level_values(1)).all()
  78. # the following failed in 0.14.1
  79. assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
  80. def test_tuples_have_na():
  81. index = MultiIndex(
  82. levels=[[1, 0], [0, 1, 2, 3]],
  83. codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
  84. )
  85. assert pd.isna(index[4][0])
  86. assert pd.isna(index.values[4][0])