test_from_dict.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. from collections import OrderedDict
  2. import numpy as np
  3. import pytest
  4. from pandas import (
  5. DataFrame,
  6. Index,
  7. MultiIndex,
  8. RangeIndex,
  9. Series,
  10. )
  11. import pandas._testing as tm
  12. class TestFromDict:
  13. # Note: these tests are specific to the from_dict method, not for
  14. # passing dictionaries to DataFrame.__init__
  15. def test_constructor_list_of_odicts(self):
  16. data = [
  17. OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
  18. OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
  19. OrderedDict([["a", 1.5], ["d", 6]]),
  20. OrderedDict(),
  21. OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
  22. OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
  23. ]
  24. result = DataFrame(data)
  25. expected = DataFrame.from_dict(
  26. dict(zip(range(len(data)), data)), orient="index"
  27. )
  28. tm.assert_frame_equal(result, expected.reindex(result.index))
  29. def test_constructor_single_row(self):
  30. data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]
  31. result = DataFrame(data)
  32. expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
  33. result.index
  34. )
  35. tm.assert_frame_equal(result, expected)
  36. def test_constructor_list_of_series(self):
  37. data = [
  38. OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
  39. OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
  40. ]
  41. sdict = OrderedDict(zip(["x", "y"], data))
  42. idx = Index(["a", "b", "c"])
  43. # all named
  44. data2 = [
  45. Series([1.5, 3, 4], idx, dtype="O", name="x"),
  46. Series([1.5, 3, 6], idx, name="y"),
  47. ]
  48. result = DataFrame(data2)
  49. expected = DataFrame.from_dict(sdict, orient="index")
  50. tm.assert_frame_equal(result, expected)
  51. # some unnamed
  52. data2 = [
  53. Series([1.5, 3, 4], idx, dtype="O", name="x"),
  54. Series([1.5, 3, 6], idx),
  55. ]
  56. result = DataFrame(data2)
  57. sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
  58. expected = DataFrame.from_dict(sdict, orient="index")
  59. tm.assert_frame_equal(result, expected)
  60. # none named
  61. data = [
  62. OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
  63. OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
  64. OrderedDict([["a", 1.5], ["d", 6]]),
  65. OrderedDict(),
  66. OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
  67. OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
  68. ]
  69. data = [Series(d) for d in data]
  70. result = DataFrame(data)
  71. sdict = OrderedDict(zip(range(len(data)), data))
  72. expected = DataFrame.from_dict(sdict, orient="index")
  73. tm.assert_frame_equal(result, expected.reindex(result.index))
  74. result2 = DataFrame(data, index=np.arange(6, dtype=np.int64))
  75. tm.assert_frame_equal(result, result2)
  76. result = DataFrame([Series(dtype=object)])
  77. expected = DataFrame(index=[0])
  78. tm.assert_frame_equal(result, expected)
  79. data = [
  80. OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
  81. OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
  82. ]
  83. sdict = OrderedDict(zip(range(len(data)), data))
  84. idx = Index(["a", "b", "c"])
  85. data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
  86. result = DataFrame(data2)
  87. expected = DataFrame.from_dict(sdict, orient="index")
  88. tm.assert_frame_equal(result, expected)
  89. def test_constructor_orient(self, float_string_frame):
  90. data_dict = float_string_frame.T._series
  91. recons = DataFrame.from_dict(data_dict, orient="index")
  92. expected = float_string_frame.reindex(index=recons.index)
  93. tm.assert_frame_equal(recons, expected)
  94. # dict of sequence
  95. a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
  96. rs = DataFrame.from_dict(a, orient="index")
  97. xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
  98. tm.assert_frame_equal(rs, xp)
  99. def test_constructor_from_ordered_dict(self):
  100. # GH#8425
  101. a = OrderedDict(
  102. [
  103. ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
  104. ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
  105. ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
  106. ]
  107. )
  108. expected = DataFrame.from_dict(a, orient="columns").T
  109. result = DataFrame.from_dict(a, orient="index")
  110. tm.assert_frame_equal(result, expected)
  111. def test_from_dict_columns_parameter(self):
  112. # GH#18529
  113. # Test new columns parameter for from_dict that was added to make
  114. # from_items(..., orient='index', columns=[...]) easier to replicate
  115. result = DataFrame.from_dict(
  116. OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
  117. orient="index",
  118. columns=["one", "two"],
  119. )
  120. expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
  121. tm.assert_frame_equal(result, expected)
  122. msg = "cannot use columns parameter with orient='columns'"
  123. with pytest.raises(ValueError, match=msg):
  124. DataFrame.from_dict(
  125. {"A": [1, 2], "B": [4, 5]},
  126. orient="columns",
  127. columns=["one", "two"],
  128. )
  129. with pytest.raises(ValueError, match=msg):
  130. DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
  131. @pytest.mark.parametrize(
  132. "data_dict, orient, expected",
  133. [
  134. ({}, "index", RangeIndex(0)),
  135. (
  136. [{("a",): 1}, {("a",): 2}],
  137. "columns",
  138. Index([("a",)], tupleize_cols=False),
  139. ),
  140. (
  141. [OrderedDict([(("a",), 1), (("b",), 2)])],
  142. "columns",
  143. Index([("a",), ("b",)], tupleize_cols=False),
  144. ),
  145. ([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
  146. ],
  147. )
  148. def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
  149. # GH#16769
  150. df = DataFrame.from_dict(data_dict, orient)
  151. result = df.columns
  152. tm.assert_index_equal(result, expected)
  153. def test_frame_dict_constructor_empty_series(self):
  154. s1 = Series(
  155. [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
  156. )
  157. s2 = Series(
  158. [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
  159. )
  160. s3 = Series(dtype=object)
  161. # it works!
  162. DataFrame({"foo": s1, "bar": s2, "baz": s3})
  163. DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
  164. def test_from_dict_scalars_requires_index(self):
  165. msg = "If using all scalar values, you must pass an index"
  166. with pytest.raises(ValueError, match=msg):
  167. DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))
  168. def test_from_dict_orient_invalid(self):
  169. msg = (
  170. "Expected 'index', 'columns' or 'tight' for orient parameter. "
  171. "Got 'abc' instead"
  172. )
  173. with pytest.raises(ValueError, match=msg):
  174. DataFrame.from_dict({"foo": 1, "baz": 3, "bar": 2}, orient="abc")