test_to_csv.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. from datetime import datetime
  2. from io import StringIO
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import Series
  7. import pandas._testing as tm
  8. from pandas.io.common import get_handle
  9. class TestSeriesToCSV:
  10. def read_csv(self, path, **kwargs):
  11. params = {"index_col": 0, "header": None}
  12. params.update(**kwargs)
  13. header = params.get("header")
  14. out = pd.read_csv(path, **params).squeeze("columns")
  15. if header is None:
  16. out.name = out.index.name = None
  17. return out
  18. def test_from_csv(self, datetime_series, string_series):
  19. # freq doesn't round-trip
  20. datetime_series.index = datetime_series.index._with_freq(None)
  21. with tm.ensure_clean() as path:
  22. datetime_series.to_csv(path, header=False)
  23. ts = self.read_csv(path, parse_dates=True)
  24. tm.assert_series_equal(datetime_series, ts, check_names=False)
  25. assert ts.name is None
  26. assert ts.index.name is None
  27. # see gh-10483
  28. datetime_series.to_csv(path, header=True)
  29. ts_h = self.read_csv(path, header=0)
  30. assert ts_h.name == "ts"
  31. string_series.to_csv(path, header=False)
  32. series = self.read_csv(path)
  33. tm.assert_series_equal(string_series, series, check_names=False)
  34. assert series.name is None
  35. assert series.index.name is None
  36. string_series.to_csv(path, header=True)
  37. series_h = self.read_csv(path, header=0)
  38. assert series_h.name == "series"
  39. with open(path, "w") as outfile:
  40. outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
  41. series = self.read_csv(path, sep="|", parse_dates=True)
  42. check_series = Series(
  43. {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
  44. )
  45. tm.assert_series_equal(check_series, series)
  46. series = self.read_csv(path, sep="|", parse_dates=False)
  47. check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
  48. tm.assert_series_equal(check_series, series)
  49. def test_to_csv(self, datetime_series):
  50. with tm.ensure_clean() as path:
  51. datetime_series.to_csv(path, header=False)
  52. with open(path, newline=None) as f:
  53. lines = f.readlines()
  54. assert lines[1] != "\n"
  55. datetime_series.to_csv(path, index=False, header=False)
  56. arr = np.loadtxt(path)
  57. tm.assert_almost_equal(arr, datetime_series.values)
  58. def test_to_csv_unicode_index(self):
  59. buf = StringIO()
  60. s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
  61. s.to_csv(buf, encoding="UTF-8", header=False)
  62. buf.seek(0)
  63. s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
  64. tm.assert_series_equal(s, s2)
  65. def test_to_csv_float_format(self):
  66. with tm.ensure_clean() as filename:
  67. ser = Series([0.123456, 0.234567, 0.567567])
  68. ser.to_csv(filename, float_format="%.2f", header=False)
  69. rs = self.read_csv(filename)
  70. xp = Series([0.12, 0.23, 0.57])
  71. tm.assert_series_equal(rs, xp)
  72. def test_to_csv_list_entries(self):
  73. s = Series(["jack and jill", "jesse and frank"])
  74. split = s.str.split(r"\s+and\s+")
  75. buf = StringIO()
  76. split.to_csv(buf, header=False)
  77. def test_to_csv_path_is_none(self):
  78. # GH 8215
  79. # Series.to_csv() was returning None, inconsistent with
  80. # DataFrame.to_csv() which returned string
  81. s = Series([1, 2, 3])
  82. csv_str = s.to_csv(path_or_buf=None, header=False)
  83. assert isinstance(csv_str, str)
  84. @pytest.mark.parametrize(
  85. "s,encoding",
  86. [
  87. (
  88. Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
  89. None,
  90. ),
  91. # GH 21241, 21118
  92. (Series(["abc", "def", "ghi"], name="X"), "ascii"),
  93. (Series(["123", "你好", "世界"], name="中文"), "gb2312"),
  94. (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
  95. ],
  96. )
  97. def test_to_csv_compression(self, s, encoding, compression):
  98. with tm.ensure_clean() as filename:
  99. s.to_csv(filename, compression=compression, encoding=encoding, header=True)
  100. # test the round trip - to_csv -> read_csv
  101. result = pd.read_csv(
  102. filename,
  103. compression=compression,
  104. encoding=encoding,
  105. index_col=0,
  106. ).squeeze("columns")
  107. tm.assert_series_equal(s, result)
  108. # test the round trip using file handle - to_csv -> read_csv
  109. with get_handle(
  110. filename, "w", compression=compression, encoding=encoding
  111. ) as handles:
  112. s.to_csv(handles.handle, encoding=encoding, header=True)
  113. result = pd.read_csv(
  114. filename,
  115. compression=compression,
  116. encoding=encoding,
  117. index_col=0,
  118. ).squeeze("columns")
  119. tm.assert_series_equal(s, result)
  120. # explicitly ensure file was compressed
  121. with tm.decompress_file(filename, compression) as fh:
  122. text = fh.read().decode(encoding or "utf8")
  123. assert s.name in text
  124. with tm.decompress_file(filename, compression) as fh:
  125. tm.assert_series_equal(
  126. s,
  127. pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
  128. )
  129. def test_to_csv_interval_index(self):
  130. # GH 28210
  131. s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
  132. with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
  133. s.to_csv(path, header=False)
  134. result = self.read_csv(path, index_col=0)
  135. # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
  136. expected = s.copy()
  137. expected.index = expected.index.astype(str)
  138. tm.assert_series_equal(result, expected)