test_clipboard.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. import os
  2. import subprocess
  3. from textwrap import dedent
  4. import numpy as np
  5. import pytest
  6. from pandas.compat import (
  7. is_ci_environment,
  8. is_platform_linux,
  9. is_platform_mac,
  10. )
  11. from pandas.errors import (
  12. PyperclipException,
  13. PyperclipWindowsException,
  14. )
  15. import pandas as pd
  16. from pandas import (
  17. NA,
  18. DataFrame,
  19. Series,
  20. get_option,
  21. read_clipboard,
  22. )
  23. import pandas._testing as tm
  24. from pandas.core.arrays import (
  25. ArrowStringArray,
  26. StringArray,
  27. )
  28. from pandas.io.clipboard import (
  29. CheckedCall,
  30. _stringifyText,
  31. clipboard_get,
  32. clipboard_set,
  33. )
  34. def build_kwargs(sep, excel):
  35. kwargs = {}
  36. if excel != "default":
  37. kwargs["excel"] = excel
  38. if sep != "default":
  39. kwargs["sep"] = sep
  40. return kwargs
  41. @pytest.fixture(
  42. params=[
  43. "delims",
  44. "utf8",
  45. "utf16",
  46. "string",
  47. "long",
  48. "nonascii",
  49. "colwidth",
  50. "mixed",
  51. "float",
  52. "int",
  53. ]
  54. )
  55. def df(request):
  56. data_type = request.param
  57. if data_type == "delims":
  58. return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]})
  59. elif data_type == "utf8":
  60. return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]})
  61. elif data_type == "utf16":
  62. return DataFrame(
  63. {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
  64. )
  65. elif data_type == "string":
  66. return tm.makeCustomDataframe(
  67. 5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None]
  68. )
  69. elif data_type == "long":
  70. max_rows = get_option("display.max_rows")
  71. return tm.makeCustomDataframe(
  72. max_rows + 1,
  73. 3,
  74. data_gen_f=lambda *args: np.random.randint(2),
  75. c_idx_type="s",
  76. r_idx_type="i",
  77. c_idx_names=[None],
  78. r_idx_names=[None],
  79. )
  80. elif data_type == "nonascii":
  81. return DataFrame({"en": "in English".split(), "es": "en español".split()})
  82. elif data_type == "colwidth":
  83. _cw = get_option("display.max_colwidth") + 1
  84. return tm.makeCustomDataframe(
  85. 5,
  86. 3,
  87. data_gen_f=lambda *args: "x" * _cw,
  88. c_idx_type="s",
  89. r_idx_type="i",
  90. c_idx_names=[None],
  91. r_idx_names=[None],
  92. )
  93. elif data_type == "mixed":
  94. return DataFrame(
  95. {
  96. "a": np.arange(1.0, 6.0) + 0.01,
  97. "b": np.arange(1, 6).astype(np.int64),
  98. "c": list("abcde"),
  99. }
  100. )
  101. elif data_type == "float":
  102. return tm.makeCustomDataframe(
  103. 5,
  104. 3,
  105. data_gen_f=lambda r, c: float(r) + 0.01,
  106. c_idx_type="s",
  107. r_idx_type="i",
  108. c_idx_names=[None],
  109. r_idx_names=[None],
  110. )
  111. elif data_type == "int":
  112. return tm.makeCustomDataframe(
  113. 5,
  114. 3,
  115. data_gen_f=lambda *args: np.random.randint(2),
  116. c_idx_type="s",
  117. r_idx_type="i",
  118. c_idx_names=[None],
  119. r_idx_names=[None],
  120. )
  121. else:
  122. raise ValueError
  123. @pytest.fixture
  124. def mock_ctypes(monkeypatch):
  125. """
  126. Mocks WinError to help with testing the clipboard.
  127. """
  128. def _mock_win_error():
  129. return "Window Error"
  130. # Set raising to False because WinError won't exist on non-windows platforms
  131. with monkeypatch.context() as m:
  132. m.setattr("ctypes.WinError", _mock_win_error, raising=False)
  133. yield
  134. @pytest.mark.usefixtures("mock_ctypes")
  135. def test_checked_call_with_bad_call(monkeypatch):
  136. """
  137. Give CheckCall a function that returns a falsey value and
  138. mock get_errno so it returns false so an exception is raised.
  139. """
  140. def _return_false():
  141. return False
  142. monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True)
  143. msg = f"Error calling {_return_false.__name__} \\(Window Error\\)"
  144. with pytest.raises(PyperclipWindowsException, match=msg):
  145. CheckedCall(_return_false)()
  146. @pytest.mark.usefixtures("mock_ctypes")
  147. def test_checked_call_with_valid_call(monkeypatch):
  148. """
  149. Give CheckCall a function that returns a truthy value and
  150. mock get_errno so it returns true so an exception is not raised.
  151. The function should return the results from _return_true.
  152. """
  153. def _return_true():
  154. return True
  155. monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False)
  156. # Give CheckedCall a callable that returns a truthy value s
  157. checked_call = CheckedCall(_return_true)
  158. assert checked_call() is True
  159. @pytest.mark.parametrize(
  160. "text",
  161. [
  162. "String_test",
  163. True,
  164. 1,
  165. 1.0,
  166. 1j,
  167. ],
  168. )
  169. def test_stringify_text(text):
  170. valid_types = (str, int, float, bool)
  171. if isinstance(text, valid_types):
  172. result = _stringifyText(text)
  173. assert result == str(text)
  174. else:
  175. msg = (
  176. "only str, int, float, and bool values "
  177. f"can be copied to the clipboard, not {type(text).__name__}"
  178. )
  179. with pytest.raises(PyperclipException, match=msg):
  180. _stringifyText(text)
  181. @pytest.fixture
  182. def mock_clipboard(monkeypatch, request):
  183. """Fixture mocking clipboard IO.
  184. This mocks pandas.io.clipboard.clipboard_get and
  185. pandas.io.clipboard.clipboard_set.
  186. This uses a local dict for storing data. The dictionary
  187. key used is the test ID, available with ``request.node.name``.
  188. This returns the local dictionary, for direct manipulation by
  189. tests.
  190. """
  191. # our local clipboard for tests
  192. _mock_data = {}
  193. def _mock_set(data):
  194. _mock_data[request.node.name] = data
  195. def _mock_get():
  196. return _mock_data[request.node.name]
  197. monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
  198. monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
  199. yield _mock_data
  200. @pytest.mark.clipboard
  201. def test_mock_clipboard(mock_clipboard):
  202. import pandas.io.clipboard
  203. pandas.io.clipboard.clipboard_set("abc")
  204. assert "abc" in set(mock_clipboard.values())
  205. result = pandas.io.clipboard.clipboard_get()
  206. assert result == "abc"
  207. @pytest.mark.single_cpu
  208. @pytest.mark.clipboard
  209. @pytest.mark.usefixtures("mock_clipboard")
  210. class TestClipboard:
  211. def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None):
  212. data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
  213. result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
  214. tm.assert_frame_equal(data, result)
  215. # Test that default arguments copy as tab delimited
  216. def test_round_trip_frame(self, df):
  217. self.check_round_trip_frame(df)
  218. # Test that explicit delimiters are respected
  219. @pytest.mark.parametrize("sep", ["\t", ",", "|"])
  220. def test_round_trip_frame_sep(self, df, sep):
  221. self.check_round_trip_frame(df, sep=sep)
  222. # Test white space separator
  223. def test_round_trip_frame_string(self, df):
  224. df.to_clipboard(excel=False, sep=None)
  225. result = read_clipboard()
  226. assert df.to_string() == result.to_string()
  227. assert df.shape == result.shape
  228. # Two character separator is not supported in to_clipboard
  229. # Test that multi-character separators are not silently passed
  230. def test_excel_sep_warning(self, df):
  231. with tm.assert_produces_warning(
  232. UserWarning,
  233. match="to_clipboard in excel mode requires a single character separator.",
  234. check_stacklevel=False,
  235. ):
  236. df.to_clipboard(excel=True, sep=r"\t")
  237. # Separator is ignored when excel=False and should produce a warning
  238. def test_copy_delim_warning(self, df):
  239. with tm.assert_produces_warning():
  240. df.to_clipboard(excel=False, sep="\t")
  241. # Tests that the default behavior of to_clipboard is tab
  242. # delimited and excel="True"
  243. @pytest.mark.parametrize("sep", ["\t", None, "default"])
  244. @pytest.mark.parametrize("excel", [True, None, "default"])
  245. def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard):
  246. kwargs = build_kwargs(sep, excel)
  247. df.to_clipboard(**kwargs)
  248. assert mock_clipboard[request.node.name] == df.to_csv(sep="\t")
  249. # Tests reading of white space separated tables
  250. @pytest.mark.parametrize("sep", [None, "default"])
  251. @pytest.mark.parametrize("excel", [False])
  252. def test_clipboard_copy_strings(self, sep, excel, df):
  253. kwargs = build_kwargs(sep, excel)
  254. df.to_clipboard(**kwargs)
  255. result = read_clipboard(sep=r"\s+")
  256. assert result.to_string() == df.to_string()
  257. assert df.shape == result.shape
  258. def test_read_clipboard_infer_excel(self, request, mock_clipboard):
  259. # gh-19010: avoid warnings
  260. clip_kwargs = {"engine": "python"}
  261. text = dedent(
  262. """
  263. John James\tCharlie Mingus
  264. 1\t2
  265. 4\tHarry Carney
  266. """.strip()
  267. )
  268. mock_clipboard[request.node.name] = text
  269. df = read_clipboard(**clip_kwargs)
  270. # excel data is parsed correctly
  271. assert df.iloc[1][1] == "Harry Carney"
  272. # having diff tab counts doesn't trigger it
  273. text = dedent(
  274. """
  275. a\t b
  276. 1 2
  277. 3 4
  278. """.strip()
  279. )
  280. mock_clipboard[request.node.name] = text
  281. res = read_clipboard(**clip_kwargs)
  282. text = dedent(
  283. """
  284. a b
  285. 1 2
  286. 3 4
  287. """.strip()
  288. )
  289. mock_clipboard[request.node.name] = text
  290. exp = read_clipboard(**clip_kwargs)
  291. tm.assert_frame_equal(res, exp)
  292. def test_infer_excel_with_nulls(self, request, mock_clipboard):
  293. # GH41108
  294. text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
  295. mock_clipboard[request.node.name] = text
  296. df = read_clipboard()
  297. df_expected = DataFrame(
  298. data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
  299. )
  300. # excel data is parsed correctly
  301. tm.assert_frame_equal(df, df_expected)
  302. @pytest.mark.parametrize(
  303. "multiindex",
  304. [
  305. ( # Can't use `dedent` here as it will remove the leading `\t`
  306. "\n".join(
  307. [
  308. "\t\t\tcol1\tcol2",
  309. "A\t0\tTrue\t1\tred",
  310. "A\t1\tTrue\t\tblue",
  311. "B\t0\tFalse\t2\tgreen",
  312. ]
  313. ),
  314. [["A", "A", "B"], [0, 1, 0], [True, True, False]],
  315. ),
  316. (
  317. "\n".join(
  318. ["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
  319. ),
  320. [["A", "A", "B"], [0, 1, 0]],
  321. ),
  322. ],
  323. )
  324. def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex):
  325. # GH41108
  326. mock_clipboard[request.node.name] = multiindex[0]
  327. df = read_clipboard()
  328. df_expected = DataFrame(
  329. data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
  330. index=multiindex[1],
  331. )
  332. # excel data is parsed correctly
  333. tm.assert_frame_equal(df, df_expected)
  334. def test_invalid_encoding(self, df):
  335. msg = "clipboard only supports utf-8 encoding"
  336. # test case for testing invalid encoding
  337. with pytest.raises(ValueError, match=msg):
  338. df.to_clipboard(encoding="ascii")
  339. with pytest.raises(NotImplementedError, match=msg):
  340. read_clipboard(encoding="ascii")
  341. @pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"])
  342. def test_round_trip_valid_encodings(self, enc, df):
  343. self.check_round_trip_frame(df, encoding=enc)
  344. @pytest.mark.single_cpu
  345. @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
  346. @pytest.mark.xfail(
  347. os.environ.get("DISPLAY") is None and not is_platform_mac(),
  348. reason="Cannot be runed if a headless system is not put in place with Xvfb",
  349. strict=True,
  350. )
  351. def test_raw_roundtrip(self, data):
  352. # PR #25040 wide unicode wasn't copied correctly on PY3 on windows
  353. clipboard_set(data)
  354. assert data == clipboard_get()
  355. if is_ci_environment() and is_platform_linux():
  356. # Clipboard can sometimes keep previous param causing flaky CI failures
  357. subprocess.run(["xsel", "--delete", "--clipboard"], check=True)
  358. @pytest.mark.parametrize("engine", ["c", "python"])
  359. def test_read_clipboard_dtype_backend(
  360. self, request, mock_clipboard, string_storage, dtype_backend, engine
  361. ):
  362. # GH#50502
  363. if string_storage == "pyarrow" or dtype_backend == "pyarrow":
  364. pa = pytest.importorskip("pyarrow")
  365. if string_storage == "python":
  366. string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
  367. string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
  368. else:
  369. string_array = ArrowStringArray(pa.array(["x", "y"]))
  370. string_array_na = ArrowStringArray(pa.array(["x", None]))
  371. text = """a,b,c,d,e,f,g,h,i
  372. x,1,4.0,x,2,4.0,,True,False
  373. y,2,5.0,,,,,False,"""
  374. mock_clipboard[request.node.name] = text
  375. with pd.option_context("mode.string_storage", string_storage):
  376. result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine)
  377. expected = DataFrame(
  378. {
  379. "a": string_array,
  380. "b": Series([1, 2], dtype="Int64"),
  381. "c": Series([4.0, 5.0], dtype="Float64"),
  382. "d": string_array_na,
  383. "e": Series([2, NA], dtype="Int64"),
  384. "f": Series([4.0, NA], dtype="Float64"),
  385. "g": Series([NA, NA], dtype="Int64"),
  386. "h": Series([True, False], dtype="boolean"),
  387. "i": Series([False, NA], dtype="boolean"),
  388. }
  389. )
  390. if dtype_backend == "pyarrow":
  391. from pandas.arrays import ArrowExtensionArray
  392. expected = DataFrame(
  393. {
  394. col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
  395. for col in expected.columns
  396. }
  397. )
  398. expected["g"] = ArrowExtensionArray(pa.array([None, None]))
  399. tm.assert_frame_equal(result, expected)
  400. def test_invalid_dtype_backend(self):
  401. msg = (
  402. "dtype_backend numpy is invalid, only 'numpy_nullable' and "
  403. "'pyarrow' are allowed."
  404. )
  405. with pytest.raises(ValueError, match=msg):
  406. read_clipboard(dtype_backend="numpy")