test_iterator.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. """
  2. Tests that work on both the Python and C engines but do not have a
  3. specific classification into the other test modules.
  4. """
  5. from io import StringIO
  6. import pytest
  7. from pandas import (
  8. DataFrame,
  9. concat,
  10. )
  11. import pandas._testing as tm
  12. pytestmark = pytest.mark.usefixtures("pyarrow_skip")
  13. def test_iterator(all_parsers):
  14. # see gh-6607
  15. data = """index,A,B,C,D
  16. foo,2,3,4,5
  17. bar,7,8,9,10
  18. baz,12,13,14,15
  19. qux,12,13,14,15
  20. foo2,12,13,14,15
  21. bar2,12,13,14,15
  22. """
  23. parser = all_parsers
  24. kwargs = {"index_col": 0}
  25. expected = parser.read_csv(StringIO(data), **kwargs)
  26. with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader:
  27. first_chunk = reader.read(3)
  28. tm.assert_frame_equal(first_chunk, expected[:3])
  29. last_chunk = reader.read(5)
  30. tm.assert_frame_equal(last_chunk, expected[3:])
  31. def test_iterator2(all_parsers):
  32. parser = all_parsers
  33. data = """A,B,C
  34. foo,1,2,3
  35. bar,4,5,6
  36. baz,7,8,9
  37. """
  38. with parser.read_csv(StringIO(data), iterator=True) as reader:
  39. result = list(reader)
  40. expected = DataFrame(
  41. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  42. index=["foo", "bar", "baz"],
  43. columns=["A", "B", "C"],
  44. )
  45. tm.assert_frame_equal(result[0], expected)
  46. def test_iterator_stop_on_chunksize(all_parsers):
  47. # gh-3967: stopping iteration when chunksize is specified
  48. parser = all_parsers
  49. data = """A,B,C
  50. foo,1,2,3
  51. bar,4,5,6
  52. baz,7,8,9
  53. """
  54. with parser.read_csv(StringIO(data), chunksize=1) as reader:
  55. result = list(reader)
  56. assert len(result) == 3
  57. expected = DataFrame(
  58. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  59. index=["foo", "bar", "baz"],
  60. columns=["A", "B", "C"],
  61. )
  62. tm.assert_frame_equal(concat(result), expected)
  63. @pytest.mark.parametrize(
  64. "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
  65. )
  66. def test_iterator_skipfooter_errors(all_parsers, kwargs):
  67. msg = "'skipfooter' not supported for iteration"
  68. parser = all_parsers
  69. data = "a\n1\n2"
  70. with pytest.raises(ValueError, match=msg):
  71. with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _:
  72. pass
  73. def test_iteration_open_handle(all_parsers):
  74. parser = all_parsers
  75. kwargs = {"header": None}
  76. with tm.ensure_clean() as path:
  77. with open(path, "w") as f:
  78. f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
  79. with open(path) as f:
  80. for line in f:
  81. if "CCC" in line:
  82. break
  83. result = parser.read_csv(f, **kwargs)
  84. expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]})
  85. tm.assert_frame_equal(result, expected)