test_common.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. """
  2. Tests for the pandas.io.common functionalities
  3. """
  4. import codecs
  5. import errno
  6. from functools import partial
  7. from io import (
  8. BytesIO,
  9. StringIO,
  10. UnsupportedOperation,
  11. )
  12. import mmap
  13. import os
  14. from pathlib import Path
  15. import pickle
  16. import tempfile
  17. import pytest
  18. from pandas.compat import is_platform_windows
  19. import pandas.util._test_decorators as td
  20. import pandas as pd
  21. import pandas._testing as tm
  22. import pandas.io.common as icom
  23. class CustomFSPath:
  24. """For testing fspath on unknown objects"""
  25. def __init__(self, path) -> None:
  26. self.path = path
  27. def __fspath__(self):
  28. return self.path
  29. # Functions that consume a string path and return a string or path-like object
  30. path_types = [str, CustomFSPath, Path]
  31. try:
  32. from py.path import local as LocalPath
  33. path_types.append(LocalPath)
  34. except ImportError:
  35. pass
  36. HERE = os.path.abspath(os.path.dirname(__file__))
  37. # https://github.com/cython/cython/issues/1720
  38. class TestCommonIOCapabilities:
  39. data1 = """index,A,B,C,D
  40. foo,2,3,4,5
  41. bar,7,8,9,10
  42. baz,12,13,14,15
  43. qux,12,13,14,15
  44. foo2,12,13,14,15
  45. bar2,12,13,14,15
  46. """
  47. def test_expand_user(self):
  48. filename = "~/sometest"
  49. expanded_name = icom._expand_user(filename)
  50. assert expanded_name != filename
  51. assert os.path.isabs(expanded_name)
  52. assert os.path.expanduser(filename) == expanded_name
  53. def test_expand_user_normal_path(self):
  54. filename = "/somefolder/sometest"
  55. expanded_name = icom._expand_user(filename)
  56. assert expanded_name == filename
  57. assert os.path.expanduser(filename) == expanded_name
  58. def test_stringify_path_pathlib(self):
  59. rel_path = icom.stringify_path(Path("."))
  60. assert rel_path == "."
  61. redundant_path = icom.stringify_path(Path("foo//bar"))
  62. assert redundant_path == os.path.join("foo", "bar")
  63. @td.skip_if_no("py.path")
  64. def test_stringify_path_localpath(self):
  65. path = os.path.join("foo", "bar")
  66. abs_path = os.path.abspath(path)
  67. lpath = LocalPath(path)
  68. assert icom.stringify_path(lpath) == abs_path
  69. def test_stringify_path_fspath(self):
  70. p = CustomFSPath("foo/bar.csv")
  71. result = icom.stringify_path(p)
  72. assert result == "foo/bar.csv"
  73. def test_stringify_file_and_path_like(self):
  74. # GH 38125: do not stringify file objects that are also path-like
  75. fsspec = pytest.importorskip("fsspec")
  76. with tm.ensure_clean() as path:
  77. with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
  78. assert fsspec_obj == icom.stringify_path(fsspec_obj)
  79. @pytest.mark.parametrize("path_type", path_types)
  80. def test_infer_compression_from_path(self, compression_format, path_type):
  81. extension, expected = compression_format
  82. path = path_type("foo/bar.csv" + extension)
  83. compression = icom.infer_compression(path, compression="infer")
  84. assert compression == expected
  85. @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
  86. def test_get_handle_with_path(self, path_type):
  87. # ignore LocalPath: it creates strange paths: /absolute/~/sometest
  88. with tempfile.TemporaryDirectory(dir=Path.home()) as tmp:
  89. filename = path_type("~/" + Path(tmp).name + "/sometest")
  90. with icom.get_handle(filename, "w") as handles:
  91. assert Path(handles.handle.name).is_absolute()
  92. assert os.path.expanduser(filename) == handles.handle.name
  93. def test_get_handle_with_buffer(self):
  94. with StringIO() as input_buffer:
  95. with icom.get_handle(input_buffer, "r") as handles:
  96. assert handles.handle == input_buffer
  97. assert not input_buffer.closed
  98. assert input_buffer.closed
  99. # Test that BytesIOWrapper(get_handle) returns correct amount of bytes every time
  100. def test_bytesiowrapper_returns_correct_bytes(self):
  101. # Test latin1, ucs-2, and ucs-4 chars
  102. data = """a,b,c
  103. 1,2,3
  104. ©,®,®
  105. Look,a snake,🐍"""
  106. with icom.get_handle(StringIO(data), "rb", is_text=False) as handles:
  107. result = b""
  108. chunksize = 5
  109. while True:
  110. chunk = handles.handle.read(chunksize)
  111. # Make sure each chunk is correct amount of bytes
  112. assert len(chunk) <= chunksize
  113. if len(chunk) < chunksize:
  114. # Can be less amount of bytes, but only at EOF
  115. # which happens when read returns empty
  116. assert len(handles.handle.read()) == 0
  117. result += chunk
  118. break
  119. result += chunk
  120. assert result == data.encode("utf-8")
  121. # Test that pyarrow can handle a file opened with get_handle
  122. @td.skip_if_no("pyarrow")
  123. def test_get_handle_pyarrow_compat(self):
  124. from pyarrow import csv
  125. # Test latin1, ucs-2, and ucs-4 chars
  126. data = """a,b,c
  127. 1,2,3
  128. ©,®,®
  129. Look,a snake,🐍"""
  130. expected = pd.DataFrame(
  131. {"a": ["1", "©", "Look"], "b": ["2", "®", "a snake"], "c": ["3", "®", "🐍"]}
  132. )
  133. s = StringIO(data)
  134. with icom.get_handle(s, "rb", is_text=False) as handles:
  135. df = csv.read_csv(handles.handle).to_pandas()
  136. tm.assert_frame_equal(df, expected)
  137. assert not s.closed
  138. def test_iterator(self):
  139. with pd.read_csv(StringIO(self.data1), chunksize=1) as reader:
  140. result = pd.concat(reader, ignore_index=True)
  141. expected = pd.read_csv(StringIO(self.data1))
  142. tm.assert_frame_equal(result, expected)
  143. # GH12153
  144. with pd.read_csv(StringIO(self.data1), chunksize=1) as it:
  145. first = next(it)
  146. tm.assert_frame_equal(first, expected.iloc[[0]])
  147. tm.assert_frame_equal(pd.concat(it), expected.iloc[1:])
  148. @pytest.mark.parametrize(
  149. "reader, module, error_class, fn_ext",
  150. [
  151. (pd.read_csv, "os", FileNotFoundError, "csv"),
  152. (pd.read_fwf, "os", FileNotFoundError, "txt"),
  153. (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
  154. (pd.read_feather, "pyarrow", OSError, "feather"),
  155. (pd.read_hdf, "tables", FileNotFoundError, "h5"),
  156. (pd.read_stata, "os", FileNotFoundError, "dta"),
  157. (pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
  158. (pd.read_json, "os", FileNotFoundError, "json"),
  159. (pd.read_pickle, "os", FileNotFoundError, "pickle"),
  160. ],
  161. )
  162. def test_read_non_existent(self, reader, module, error_class, fn_ext):
  163. pytest.importorskip(module)
  164. path = os.path.join(HERE, "data", "does_not_exist." + fn_ext)
  165. msg1 = rf"File (b')?.+does_not_exist\.{fn_ext}'? does not exist"
  166. msg2 = rf"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'"
  167. msg3 = "Expected object or value"
  168. msg4 = "path_or_buf needs to be a string file path or file-like"
  169. msg5 = (
  170. rf"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: "
  171. rf"'.+does_not_exist\.{fn_ext}'"
  172. )
  173. msg6 = rf"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'"
  174. msg7 = (
  175. rf"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'"
  176. )
  177. msg8 = rf"Failed to open local file.+does_not_exist\.{fn_ext}"
  178. with pytest.raises(
  179. error_class,
  180. match=rf"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})",
  181. ):
  182. reader(path)
  183. @pytest.mark.parametrize(
  184. "method, module, error_class, fn_ext",
  185. [
  186. (pd.DataFrame.to_csv, "os", OSError, "csv"),
  187. (pd.DataFrame.to_html, "os", OSError, "html"),
  188. (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"),
  189. (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"),
  190. (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"),
  191. (pd.DataFrame.to_stata, "os", OSError, "dta"),
  192. (pd.DataFrame.to_json, "os", OSError, "json"),
  193. (pd.DataFrame.to_pickle, "os", OSError, "pickle"),
  194. ],
  195. )
  196. # NOTE: Missing parent directory for pd.DataFrame.to_hdf is handled by PyTables
  197. def test_write_missing_parent_directory(self, method, module, error_class, fn_ext):
  198. pytest.importorskip(module)
  199. dummy_frame = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]})
  200. path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext)
  201. with pytest.raises(
  202. error_class,
  203. match=r"Cannot save file into a non-existent directory: .*missing_folder",
  204. ):
  205. method(dummy_frame, path)
  206. @pytest.mark.parametrize(
  207. "reader, module, error_class, fn_ext",
  208. [
  209. (pd.read_csv, "os", FileNotFoundError, "csv"),
  210. (pd.read_table, "os", FileNotFoundError, "csv"),
  211. (pd.read_fwf, "os", FileNotFoundError, "txt"),
  212. (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
  213. (pd.read_feather, "pyarrow", OSError, "feather"),
  214. (pd.read_hdf, "tables", FileNotFoundError, "h5"),
  215. (pd.read_stata, "os", FileNotFoundError, "dta"),
  216. (pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
  217. (pd.read_json, "os", FileNotFoundError, "json"),
  218. (pd.read_pickle, "os", FileNotFoundError, "pickle"),
  219. ],
  220. )
  221. def test_read_expands_user_home_dir(
  222. self, reader, module, error_class, fn_ext, monkeypatch
  223. ):
  224. pytest.importorskip(module)
  225. path = os.path.join("~", "does_not_exist." + fn_ext)
  226. monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x))
  227. msg1 = rf"File (b')?.+does_not_exist\.{fn_ext}'? does not exist"
  228. msg2 = rf"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'"
  229. msg3 = "Unexpected character found when decoding 'false'"
  230. msg4 = "path_or_buf needs to be a string file path or file-like"
  231. msg5 = (
  232. rf"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: "
  233. rf"'.+does_not_exist\.{fn_ext}'"
  234. )
  235. msg6 = rf"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'"
  236. msg7 = (
  237. rf"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'"
  238. )
  239. msg8 = rf"Failed to open local file.+does_not_exist\.{fn_ext}"
  240. with pytest.raises(
  241. error_class,
  242. match=rf"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})",
  243. ):
  244. reader(path)
  245. @pytest.mark.parametrize(
  246. "reader, module, path",
  247. [
  248. (pd.read_csv, "os", ("io", "data", "csv", "iris.csv")),
  249. (pd.read_table, "os", ("io", "data", "csv", "iris.csv")),
  250. (
  251. pd.read_fwf,
  252. "os",
  253. ("io", "data", "fixed_width", "fixed_width_format.txt"),
  254. ),
  255. (pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")),
  256. (
  257. pd.read_feather,
  258. "pyarrow",
  259. ("io", "data", "feather", "feather-0_3_1.feather"),
  260. ),
  261. (
  262. pd.read_hdf,
  263. "tables",
  264. ("io", "data", "legacy_hdf", "datetimetz_object.h5"),
  265. ),
  266. (pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
  267. (pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
  268. (pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),
  269. (
  270. pd.read_pickle,
  271. "os",
  272. ("io", "data", "pickle", "categorical.0.25.0.pickle"),
  273. ),
  274. ],
  275. )
  276. def test_read_fspath_all(self, reader, module, path, datapath):
  277. pytest.importorskip(module)
  278. path = datapath(*path)
  279. mypath = CustomFSPath(path)
  280. result = reader(mypath)
  281. expected = reader(path)
  282. if path.endswith(".pickle"):
  283. # categorical
  284. tm.assert_categorical_equal(result, expected)
  285. else:
  286. tm.assert_frame_equal(result, expected)
  287. @pytest.mark.parametrize(
  288. "writer_name, writer_kwargs, module",
  289. [
  290. ("to_csv", {}, "os"),
  291. ("to_excel", {"engine": "openpyxl"}, "openpyxl"),
  292. ("to_feather", {}, "pyarrow"),
  293. ("to_html", {}, "os"),
  294. ("to_json", {}, "os"),
  295. ("to_latex", {}, "os"),
  296. ("to_pickle", {}, "os"),
  297. ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
  298. ],
  299. )
  300. def test_write_fspath_all(self, writer_name, writer_kwargs, module):
  301. if writer_name in ["to_latex"]: # uses Styler implementation
  302. pytest.importorskip("jinja2")
  303. p1 = tm.ensure_clean("string")
  304. p2 = tm.ensure_clean("fspath")
  305. df = pd.DataFrame({"A": [1, 2]})
  306. with p1 as string, p2 as fspath:
  307. pytest.importorskip(module)
  308. mypath = CustomFSPath(fspath)
  309. writer = getattr(df, writer_name)
  310. writer(string, **writer_kwargs)
  311. writer(mypath, **writer_kwargs)
  312. with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
  313. if writer_name == "to_excel":
  314. # binary representation of excel contains time creation
  315. # data that causes flaky CI failures
  316. result = pd.read_excel(f_str, **writer_kwargs)
  317. expected = pd.read_excel(f_path, **writer_kwargs)
  318. tm.assert_frame_equal(result, expected)
  319. else:
  320. result = f_str.read()
  321. expected = f_path.read()
  322. assert result == expected
  323. def test_write_fspath_hdf5(self):
  324. # Same test as write_fspath_all, except HDF5 files aren't
  325. # necessarily byte-for-byte identical for a given dataframe, so we'll
  326. # have to read and compare equality
  327. pytest.importorskip("tables")
  328. df = pd.DataFrame({"A": [1, 2]})
  329. p1 = tm.ensure_clean("string")
  330. p2 = tm.ensure_clean("fspath")
  331. with p1 as string, p2 as fspath:
  332. mypath = CustomFSPath(fspath)
  333. df.to_hdf(mypath, key="bar")
  334. df.to_hdf(string, key="bar")
  335. result = pd.read_hdf(fspath, key="bar")
  336. expected = pd.read_hdf(string, key="bar")
  337. tm.assert_frame_equal(result, expected)
  338. @pytest.fixture
  339. def mmap_file(datapath):
  340. return datapath("io", "data", "csv", "test_mmap.csv")
  341. class TestMMapWrapper:
  342. def test_constructor_bad_file(self, mmap_file):
  343. non_file = StringIO("I am not a file")
  344. non_file.fileno = lambda: -1
  345. # the error raised is different on Windows
  346. if is_platform_windows():
  347. msg = "The parameter is incorrect"
  348. err = OSError
  349. else:
  350. msg = "[Errno 22]"
  351. err = mmap.error
  352. with pytest.raises(err, match=msg):
  353. icom._maybe_memory_map(non_file, True)
  354. with open(mmap_file) as target:
  355. pass
  356. msg = "I/O operation on closed file"
  357. with pytest.raises(ValueError, match=msg):
  358. icom._maybe_memory_map(target, True)
  359. def test_next(self, mmap_file):
  360. with open(mmap_file) as target:
  361. lines = target.readlines()
  362. with icom.get_handle(
  363. target, "r", is_text=True, memory_map=True
  364. ) as wrappers:
  365. wrapper = wrappers.handle
  366. assert isinstance(wrapper.buffer.buffer, mmap.mmap)
  367. for line in lines:
  368. next_line = next(wrapper)
  369. assert next_line.strip() == line.strip()
  370. with pytest.raises(StopIteration, match=r"^$"):
  371. next(wrapper)
  372. def test_unknown_engine(self):
  373. with tm.ensure_clean() as path:
  374. df = tm.makeDataFrame()
  375. df.to_csv(path)
  376. with pytest.raises(ValueError, match="Unknown engine"):
  377. pd.read_csv(path, engine="pyt")
  378. def test_binary_mode(self):
  379. """
  380. 'encoding' shouldn't be passed to 'open' in binary mode.
  381. GH 35058
  382. """
  383. with tm.ensure_clean() as path:
  384. df = tm.makeDataFrame()
  385. df.to_csv(path, mode="w+b")
  386. tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
  387. @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
  388. @pytest.mark.parametrize("compression_", ["bz2", "xz"])
  389. def test_warning_missing_utf_bom(self, encoding, compression_):
  390. """
  391. bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
  392. https://stackoverflow.com/questions/55171439
  393. GH 35681
  394. """
  395. df = tm.makeDataFrame()
  396. with tm.ensure_clean() as path:
  397. with tm.assert_produces_warning(UnicodeWarning):
  398. df.to_csv(path, compression=compression_, encoding=encoding)
  399. # reading should fail (otherwise we wouldn't need the warning)
  400. msg = r"UTF-\d+ stream does not start with BOM"
  401. with pytest.raises(UnicodeError, match=msg):
  402. pd.read_csv(path, compression=compression_, encoding=encoding)
  403. def test_is_fsspec_url():
  404. assert icom.is_fsspec_url("gcs://pandas/somethingelse.com")
  405. assert icom.is_fsspec_url("gs://pandas/somethingelse.com")
  406. # the following is the only remote URL that is handled without fsspec
  407. assert not icom.is_fsspec_url("http://pandas/somethingelse.com")
  408. assert not icom.is_fsspec_url("random:pandas/somethingelse.com")
  409. assert not icom.is_fsspec_url("/local/path")
  410. assert not icom.is_fsspec_url("relative/local/path")
  411. # fsspec URL in string should not be recognized
  412. assert not icom.is_fsspec_url("this is not fsspec://url")
  413. assert not icom.is_fsspec_url("{'url': 'gs://pandas/somethingelse.com'}")
  414. # accept everything that conforms to RFC 3986 schema
  415. assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
  416. @pytest.mark.parametrize("encoding", [None, "utf-8"])
  417. @pytest.mark.parametrize("format", ["csv", "json"])
  418. def test_codecs_encoding(encoding, format):
  419. # GH39247
  420. expected = tm.makeDataFrame()
  421. with tm.ensure_clean() as path:
  422. with codecs.open(path, mode="w", encoding=encoding) as handle:
  423. getattr(expected, f"to_{format}")(handle)
  424. with codecs.open(path, mode="r", encoding=encoding) as handle:
  425. if format == "csv":
  426. df = pd.read_csv(handle, index_col=0)
  427. else:
  428. df = pd.read_json(handle)
  429. tm.assert_frame_equal(expected, df)
  430. def test_codecs_get_writer_reader():
  431. # GH39247
  432. expected = tm.makeDataFrame()
  433. with tm.ensure_clean() as path:
  434. with open(path, "wb") as handle:
  435. with codecs.getwriter("utf-8")(handle) as encoded:
  436. expected.to_csv(encoded)
  437. with open(path, "rb") as handle:
  438. with codecs.getreader("utf-8")(handle) as encoded:
  439. df = pd.read_csv(encoded, index_col=0)
  440. tm.assert_frame_equal(expected, df)
  441. @pytest.mark.parametrize(
  442. "io_class,mode,msg",
  443. [
  444. (BytesIO, "t", "a bytes-like object is required, not 'str'"),
  445. (StringIO, "b", "string argument expected, got 'bytes'"),
  446. ],
  447. )
  448. def test_explicit_encoding(io_class, mode, msg):
  449. # GH39247; this test makes sure that if a user provides mode="*t" or "*b",
  450. # it is used. In the case of this test it leads to an error as intentionally the
  451. # wrong mode is requested
  452. expected = tm.makeDataFrame()
  453. with io_class() as buffer:
  454. with pytest.raises(TypeError, match=msg):
  455. expected.to_csv(buffer, mode=f"w{mode}")
  456. @pytest.mark.parametrize("encoding_errors", [None, "strict", "replace"])
  457. @pytest.mark.parametrize("format", ["csv", "json"])
  458. def test_encoding_errors(encoding_errors, format):
  459. # GH39450
  460. msg = "'utf-8' codec can't decode byte"
  461. bad_encoding = b"\xe4"
  462. if format == "csv":
  463. content = b"," + bad_encoding + b"\n" + bad_encoding * 2 + b"," + bad_encoding
  464. reader = partial(pd.read_csv, index_col=0)
  465. else:
  466. content = (
  467. b'{"'
  468. + bad_encoding * 2
  469. + b'": {"'
  470. + bad_encoding
  471. + b'":"'
  472. + bad_encoding
  473. + b'"}}'
  474. )
  475. reader = partial(pd.read_json, orient="index")
  476. with tm.ensure_clean() as path:
  477. file = Path(path)
  478. file.write_bytes(content)
  479. if encoding_errors != "replace":
  480. with pytest.raises(UnicodeDecodeError, match=msg):
  481. reader(path, encoding_errors=encoding_errors)
  482. else:
  483. df = reader(path, encoding_errors=encoding_errors)
  484. decoded = bad_encoding.decode(errors=encoding_errors)
  485. expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
  486. tm.assert_frame_equal(df, expected)
  487. def test_bad_encdoing_errors():
  488. # GH 39777
  489. with tm.ensure_clean() as path:
  490. with pytest.raises(LookupError, match="unknown error handler name"):
  491. icom.get_handle(path, "w", errors="bad")
  492. def test_errno_attribute():
  493. # GH 13872
  494. with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err:
  495. pd.read_csv("doesnt_exist")
  496. assert err.errno == errno.ENOENT
  497. def test_fail_mmap():
  498. with pytest.raises(UnsupportedOperation, match="fileno"):
  499. with BytesIO() as buffer:
  500. icom.get_handle(buffer, "rb", memory_map=True)
  501. def test_close_on_error():
  502. # GH 47136
  503. class TestError:
  504. def close(self):
  505. raise OSError("test")
  506. with pytest.raises(OSError, match="test"):
  507. with BytesIO() as buffer:
  508. with icom.get_handle(buffer, "rb") as handles:
  509. handles.created_handles.append(TestError())
  510. @pytest.mark.parametrize(
  511. "reader",
  512. [
  513. pd.read_csv,
  514. pd.read_fwf,
  515. pd.read_excel,
  516. pd.read_feather,
  517. pd.read_hdf,
  518. pd.read_stata,
  519. pd.read_sas,
  520. pd.read_json,
  521. pd.read_pickle,
  522. ],
  523. )
  524. def test_pickle_reader(reader):
  525. # GH 22265
  526. with BytesIO() as buffer:
  527. pickle.dump(reader, buffer)