test_get_numeric_data.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import numpy as np
  2. import pandas as pd
  3. from pandas import (
  4. Categorical,
  5. DataFrame,
  6. Index,
  7. Series,
  8. Timestamp,
  9. )
  10. import pandas._testing as tm
  11. from pandas.core.arrays import IntervalArray
  12. class TestGetNumericData:
  13. def test_get_numeric_data_preserve_dtype(self):
  14. # get the numeric data
  15. obj = DataFrame({"A": [1, "2", 3.0]})
  16. result = obj._get_numeric_data()
  17. expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[])
  18. tm.assert_frame_equal(result, expected)
  19. def test_get_numeric_data(self):
  20. datetime64name = np.dtype("M8[ns]").name
  21. objectname = np.dtype(np.object_).name
  22. df = DataFrame(
  23. {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")},
  24. index=np.arange(10),
  25. )
  26. result = df.dtypes
  27. expected = Series(
  28. [
  29. np.dtype("float64"),
  30. np.dtype("int64"),
  31. np.dtype(objectname),
  32. np.dtype(datetime64name),
  33. ],
  34. index=["a", "b", "c", "f"],
  35. )
  36. tm.assert_series_equal(result, expected)
  37. df = DataFrame(
  38. {
  39. "a": 1.0,
  40. "b": 2,
  41. "c": "foo",
  42. "d": np.array([1.0] * 10, dtype="float32"),
  43. "e": np.array([1] * 10, dtype="int32"),
  44. "f": np.array([1] * 10, dtype="int16"),
  45. "g": Timestamp("20010102"),
  46. },
  47. index=np.arange(10),
  48. )
  49. result = df._get_numeric_data()
  50. expected = df.loc[:, ["a", "b", "d", "e", "f"]]
  51. tm.assert_frame_equal(result, expected)
  52. only_obj = df.loc[:, ["c", "g"]]
  53. result = only_obj._get_numeric_data()
  54. expected = df.loc[:, []]
  55. tm.assert_frame_equal(result, expected)
  56. df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
  57. result = df._get_numeric_data()
  58. expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
  59. tm.assert_frame_equal(result, expected)
  60. df = result.copy()
  61. result = df._get_numeric_data()
  62. expected = df
  63. tm.assert_frame_equal(result, expected)
  64. def test_get_numeric_data_mixed_dtype(self):
  65. # numeric and object columns
  66. df = DataFrame(
  67. {
  68. "a": [1, 2, 3],
  69. "b": [True, False, True],
  70. "c": ["foo", "bar", "baz"],
  71. "d": [None, None, None],
  72. "e": [3.14, 0.577, 2.773],
  73. }
  74. )
  75. result = df._get_numeric_data()
  76. tm.assert_index_equal(result.columns, Index(["a", "b", "e"]))
  77. def test_get_numeric_data_extension_dtype(self):
  78. # GH#22290
  79. df = DataFrame(
  80. {
  81. "A": pd.array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
  82. "B": Categorical(list("abcabc")),
  83. "C": pd.array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
  84. "D": IntervalArray.from_breaks(range(7)),
  85. }
  86. )
  87. result = df._get_numeric_data()
  88. expected = df.loc[:, ["A", "C"]]
  89. tm.assert_frame_equal(result, expected)