casting.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import np_version_under1p21
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. from pandas.core.internals import ObjectBlock
  7. from pandas.tests.extension.base.base import BaseExtensionTests
  8. class BaseCastingTests(BaseExtensionTests):
  9. """Casting to and from ExtensionDtypes"""
  10. def test_astype_object_series(self, all_data):
  11. ser = pd.Series(all_data, name="A")
  12. result = ser.astype(object)
  13. assert result.dtype == np.dtype(object)
  14. if hasattr(result._mgr, "blocks"):
  15. assert isinstance(result._mgr.blocks[0], ObjectBlock)
  16. assert isinstance(result._mgr.array, np.ndarray)
  17. assert result._mgr.array.dtype == np.dtype(object)
  18. def test_astype_object_frame(self, all_data):
  19. df = pd.DataFrame({"A": all_data})
  20. result = df.astype(object)
  21. if hasattr(result._mgr, "blocks"):
  22. blk = result._data.blocks[0]
  23. assert isinstance(blk, ObjectBlock), type(blk)
  24. assert isinstance(result._mgr.arrays[0], np.ndarray)
  25. assert result._mgr.arrays[0].dtype == np.dtype(object)
  26. # earlier numpy raises TypeError on e.g. np.dtype(np.int64) == "Int64"
  27. if not np_version_under1p21:
  28. # check that we can compare the dtypes
  29. comp = result.dtypes == df.dtypes
  30. assert not comp.any()
  31. def test_tolist(self, data):
  32. result = pd.Series(data).tolist()
  33. expected = list(data)
  34. assert result == expected
  35. def test_astype_str(self, data):
  36. result = pd.Series(data[:5]).astype(str)
  37. expected = pd.Series([str(x) for x in data[:5]], dtype=str)
  38. self.assert_series_equal(result, expected)
  39. @pytest.mark.parametrize(
  40. "nullable_string_dtype",
  41. [
  42. "string[python]",
  43. pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
  44. ],
  45. )
  46. def test_astype_string(self, data, nullable_string_dtype):
  47. # GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
  48. result = pd.Series(data[:5]).astype(nullable_string_dtype)
  49. expected = pd.Series(
  50. [str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
  51. dtype=nullable_string_dtype,
  52. )
  53. self.assert_series_equal(result, expected)
  54. def test_to_numpy(self, data):
  55. expected = np.asarray(data)
  56. result = data.to_numpy()
  57. self.assert_equal(result, expected)
  58. result = pd.Series(data).to_numpy()
  59. self.assert_equal(result, expected)
  60. def test_astype_empty_dataframe(self, dtype):
  61. # https://github.com/pandas-dev/pandas/issues/33113
  62. df = pd.DataFrame()
  63. result = df.astype(dtype)
  64. self.assert_frame_equal(result, df)
  65. @pytest.mark.parametrize("copy", [True, False])
  66. def test_astype_own_type(self, data, copy):
  67. # ensure that astype returns the original object for equal dtype and copy=False
  68. # https://github.com/pandas-dev/pandas/issues/28488
  69. result = data.astype(data.dtype, copy=copy)
  70. assert (result is data) is (not copy)
  71. self.assert_extension_array_equal(result, data)