test_iteration.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import datetime
  2. import numpy as np
  3. from pandas.compat import (
  4. IS64,
  5. is_platform_windows,
  6. )
  7. from pandas import (
  8. Categorical,
  9. DataFrame,
  10. Series,
  11. date_range,
  12. )
  13. import pandas._testing as tm
  14. class TestIteration:
  15. def test_keys(self, float_frame):
  16. assert float_frame.keys() is float_frame.columns
  17. def test_iteritems(self):
  18. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
  19. for k, v in df.items():
  20. assert isinstance(v, DataFrame._constructor_sliced)
  21. def test_items(self):
  22. # GH#17213, GH#13918
  23. cols = ["a", "b", "c"]
  24. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
  25. for c, (k, v) in zip(cols, df.items()):
  26. assert c == k
  27. assert isinstance(v, Series)
  28. assert (df[k] == v).all()
  29. def test_items_names(self, float_string_frame):
  30. for k, v in float_string_frame.items():
  31. assert v.name == k
  32. def test_iter(self, float_frame):
  33. assert tm.equalContents(list(float_frame), float_frame.columns)
  34. def test_iterrows(self, float_frame, float_string_frame):
  35. for k, v in float_frame.iterrows():
  36. exp = float_frame.loc[k]
  37. tm.assert_series_equal(v, exp)
  38. for k, v in float_string_frame.iterrows():
  39. exp = float_string_frame.loc[k]
  40. tm.assert_series_equal(v, exp)
  41. def test_iterrows_iso8601(self):
  42. # GH#19671
  43. s = DataFrame(
  44. {
  45. "non_iso8601": ["M1701", "M1802", "M1903", "M2004"],
  46. "iso8601": date_range("2000-01-01", periods=4, freq="M"),
  47. }
  48. )
  49. for k, v in s.iterrows():
  50. exp = s.loc[k]
  51. tm.assert_series_equal(v, exp)
  52. def test_iterrows_corner(self):
  53. # GH#12222
  54. df = DataFrame(
  55. {
  56. "a": [datetime.datetime(2015, 1, 1)],
  57. "b": [None],
  58. "c": [None],
  59. "d": [""],
  60. "e": [[]],
  61. "f": [set()],
  62. "g": [{}],
  63. }
  64. )
  65. expected = Series(
  66. [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}],
  67. index=list("abcdefg"),
  68. name=0,
  69. dtype="object",
  70. )
  71. _, result = next(df.iterrows())
  72. tm.assert_series_equal(result, expected)
  73. def test_itertuples(self, float_frame):
  74. for i, tup in enumerate(float_frame.itertuples()):
  75. ser = DataFrame._constructor_sliced(tup[1:])
  76. ser.name = tup[0]
  77. expected = float_frame.iloc[i, :].reset_index(drop=True)
  78. tm.assert_series_equal(ser, expected)
  79. df = DataFrame(
  80. {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"]
  81. )
  82. for tup in df.itertuples(index=False):
  83. assert isinstance(tup[1], int)
  84. df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
  85. dfaa = df[["a", "a"]]
  86. assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]
  87. # repr with int on 32-bit/windows
  88. if not (is_platform_windows() or not IS64):
  89. assert (
  90. repr(list(df.itertuples(name=None)))
  91. == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]"
  92. )
  93. tup = next(df.itertuples(name="TestName"))
  94. assert tup._fields == ("Index", "a", "b")
  95. assert (tup.Index, tup.a, tup.b) == tup
  96. assert type(tup).__name__ == "TestName"
  97. df.columns = ["def", "return"]
  98. tup2 = next(df.itertuples(name="TestName"))
  99. assert tup2 == (0, 1, 4)
  100. assert tup2._fields == ("Index", "_1", "_2")
  101. df3 = DataFrame({"f" + str(i): [i] for i in range(1024)})
  102. # will raise SyntaxError if trying to create namedtuple
  103. tup3 = next(df3.itertuples())
  104. assert isinstance(tup3, tuple)
  105. assert hasattr(tup3, "_fields")
  106. # GH#28282
  107. df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}])
  108. result_254_columns = next(df_254_columns.itertuples(index=False))
  109. assert isinstance(result_254_columns, tuple)
  110. assert hasattr(result_254_columns, "_fields")
  111. df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}])
  112. result_255_columns = next(df_255_columns.itertuples(index=False))
  113. assert isinstance(result_255_columns, tuple)
  114. assert hasattr(result_255_columns, "_fields")
  115. def test_sequence_like_with_categorical(self):
  116. # GH#7839
  117. # make sure can iterate
  118. df = DataFrame(
  119. {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
  120. )
  121. df["grade"] = Categorical(df["raw_grade"])
  122. # basic sequencing testing
  123. result = list(df.grade.values)
  124. expected = np.array(df.grade.values).tolist()
  125. tm.assert_almost_equal(result, expected)
  126. # iteration
  127. for t in df.itertuples(index=False):
  128. str(t)
  129. for row, s in df.iterrows():
  130. str(s)
  131. for c, col in df.items():
  132. str(col)