constructors.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas.api.extensions import ExtensionArray
  5. from pandas.core.internals.blocks import EABackedBlock
  6. from pandas.tests.extension.base.base import BaseExtensionTests
  7. class BaseConstructorsTests(BaseExtensionTests):
  8. def test_from_sequence_from_cls(self, data):
  9. result = type(data)._from_sequence(data, dtype=data.dtype)
  10. self.assert_extension_array_equal(result, data)
  11. data = data[:0]
  12. result = type(data)._from_sequence(data, dtype=data.dtype)
  13. self.assert_extension_array_equal(result, data)
  14. def test_array_from_scalars(self, data):
  15. scalars = [data[0], data[1], data[2]]
  16. result = data._from_sequence(scalars)
  17. assert isinstance(result, type(data))
  18. def test_series_constructor(self, data):
  19. result = pd.Series(data, copy=False)
  20. assert result.dtype == data.dtype
  21. assert len(result) == len(data)
  22. if hasattr(result._mgr, "blocks"):
  23. assert isinstance(result._mgr.blocks[0], EABackedBlock)
  24. assert result._mgr.array is data
  25. # Series[EA] is unboxed / boxed correctly
  26. result2 = pd.Series(result)
  27. assert result2.dtype == data.dtype
  28. if hasattr(result._mgr, "blocks"):
  29. assert isinstance(result2._mgr.blocks[0], EABackedBlock)
  30. def test_series_constructor_no_data_with_index(self, dtype, na_value):
  31. result = pd.Series(index=[1, 2, 3], dtype=dtype)
  32. expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
  33. self.assert_series_equal(result, expected)
  34. # GH 33559 - empty index
  35. result = pd.Series(index=[], dtype=dtype)
  36. expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
  37. self.assert_series_equal(result, expected)
  38. def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
  39. result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
  40. expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
  41. self.assert_series_equal(result, expected)
  42. def test_series_constructor_scalar_with_index(self, data, dtype):
  43. scalar = data[0]
  44. result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
  45. expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
  46. self.assert_series_equal(result, expected)
  47. result = pd.Series(scalar, index=["foo"], dtype=dtype)
  48. expected = pd.Series([scalar], index=["foo"], dtype=dtype)
  49. self.assert_series_equal(result, expected)
  50. @pytest.mark.parametrize("from_series", [True, False])
  51. def test_dataframe_constructor_from_dict(self, data, from_series):
  52. if from_series:
  53. data = pd.Series(data)
  54. result = pd.DataFrame({"A": data})
  55. assert result.dtypes["A"] == data.dtype
  56. assert result.shape == (len(data), 1)
  57. if hasattr(result._mgr, "blocks"):
  58. assert isinstance(result._mgr.blocks[0], EABackedBlock)
  59. assert isinstance(result._mgr.arrays[0], ExtensionArray)
  60. def test_dataframe_from_series(self, data):
  61. result = pd.DataFrame(pd.Series(data))
  62. assert result.dtypes[0] == data.dtype
  63. assert result.shape == (len(data), 1)
  64. if hasattr(result._mgr, "blocks"):
  65. assert isinstance(result._mgr.blocks[0], EABackedBlock)
  66. assert isinstance(result._mgr.arrays[0], ExtensionArray)
  67. def test_series_given_mismatched_index_raises(self, data):
  68. msg = r"Length of values \(3\) does not match length of index \(5\)"
  69. with pytest.raises(ValueError, match=msg):
  70. pd.Series(data[:3], index=[0, 1, 2, 3, 4])
  71. def test_from_dtype(self, data):
  72. # construct from our dtype & string dtype
  73. dtype = data.dtype
  74. expected = pd.Series(data)
  75. result = pd.Series(list(data), dtype=dtype)
  76. self.assert_series_equal(result, expected)
  77. result = pd.Series(list(data), dtype=str(dtype))
  78. self.assert_series_equal(result, expected)
  79. # gh-30280
  80. expected = pd.DataFrame(data).astype(dtype)
  81. result = pd.DataFrame(list(data), dtype=dtype)
  82. self.assert_frame_equal(result, expected)
  83. result = pd.DataFrame(list(data), dtype=str(dtype))
  84. self.assert_frame_equal(result, expected)
  85. def test_pandas_array(self, data):
  86. # pd.array(extension_array) should be idempotent...
  87. result = pd.array(data)
  88. self.assert_extension_array_equal(result, data)
  89. def test_pandas_array_dtype(self, data):
  90. # ... but specifying dtype will override idempotency
  91. result = pd.array(data, dtype=np.dtype(object))
  92. expected = pd.arrays.PandasArray(np.asarray(data, dtype=object))
  93. self.assert_equal(result, expected)
  94. def test_construct_empty_dataframe(self, dtype):
  95. # GH 33623
  96. result = pd.DataFrame(columns=["a"], dtype=dtype)
  97. expected = pd.DataFrame(
  98. {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
  99. )
  100. self.assert_frame_equal(result, expected)
  101. def test_empty(self, dtype):
  102. cls = dtype.construct_array_type()
  103. result = cls._empty((4,), dtype=dtype)
  104. assert isinstance(result, cls)
  105. assert result.dtype == dtype
  106. assert result.shape == (4,)
  107. # GH#19600 method on ExtensionDtype
  108. result2 = dtype.empty((4,))
  109. assert isinstance(result2, cls)
  110. assert result2.dtype == dtype
  111. assert result2.shape == (4,)
  112. result2 = dtype.empty(4)
  113. assert isinstance(result2, cls)
  114. assert result2.dtype == dtype
  115. assert result2.shape == (4,)