123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477 |
- import os
- import subprocess
- from textwrap import dedent
- import numpy as np
- import pytest
- from pandas.compat import (
- is_ci_environment,
- is_platform_linux,
- is_platform_mac,
- )
- from pandas.errors import (
- PyperclipException,
- PyperclipWindowsException,
- )
- import pandas as pd
- from pandas import (
- NA,
- DataFrame,
- Series,
- get_option,
- read_clipboard,
- )
- import pandas._testing as tm
- from pandas.core.arrays import (
- ArrowStringArray,
- StringArray,
- )
- from pandas.io.clipboard import (
- CheckedCall,
- _stringifyText,
- clipboard_get,
- clipboard_set,
- )
- def build_kwargs(sep, excel):
- kwargs = {}
- if excel != "default":
- kwargs["excel"] = excel
- if sep != "default":
- kwargs["sep"] = sep
- return kwargs
- @pytest.fixture(
- params=[
- "delims",
- "utf8",
- "utf16",
- "string",
- "long",
- "nonascii",
- "colwidth",
- "mixed",
- "float",
- "int",
- ]
- )
- def df(request):
- data_type = request.param
- if data_type == "delims":
- return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]})
- elif data_type == "utf8":
- return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]})
- elif data_type == "utf16":
- return DataFrame(
- {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
- )
- elif data_type == "string":
- return tm.makeCustomDataframe(
- 5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None]
- )
- elif data_type == "long":
- max_rows = get_option("display.max_rows")
- return tm.makeCustomDataframe(
- max_rows + 1,
- 3,
- data_gen_f=lambda *args: np.random.randint(2),
- c_idx_type="s",
- r_idx_type="i",
- c_idx_names=[None],
- r_idx_names=[None],
- )
- elif data_type == "nonascii":
- return DataFrame({"en": "in English".split(), "es": "en español".split()})
- elif data_type == "colwidth":
- _cw = get_option("display.max_colwidth") + 1
- return tm.makeCustomDataframe(
- 5,
- 3,
- data_gen_f=lambda *args: "x" * _cw,
- c_idx_type="s",
- r_idx_type="i",
- c_idx_names=[None],
- r_idx_names=[None],
- )
- elif data_type == "mixed":
- return DataFrame(
- {
- "a": np.arange(1.0, 6.0) + 0.01,
- "b": np.arange(1, 6).astype(np.int64),
- "c": list("abcde"),
- }
- )
- elif data_type == "float":
- return tm.makeCustomDataframe(
- 5,
- 3,
- data_gen_f=lambda r, c: float(r) + 0.01,
- c_idx_type="s",
- r_idx_type="i",
- c_idx_names=[None],
- r_idx_names=[None],
- )
- elif data_type == "int":
- return tm.makeCustomDataframe(
- 5,
- 3,
- data_gen_f=lambda *args: np.random.randint(2),
- c_idx_type="s",
- r_idx_type="i",
- c_idx_names=[None],
- r_idx_names=[None],
- )
- else:
- raise ValueError
- @pytest.fixture
- def mock_ctypes(monkeypatch):
- """
- Mocks WinError to help with testing the clipboard.
- """
- def _mock_win_error():
- return "Window Error"
- # Set raising to False because WinError won't exist on non-windows platforms
- with monkeypatch.context() as m:
- m.setattr("ctypes.WinError", _mock_win_error, raising=False)
- yield
- @pytest.mark.usefixtures("mock_ctypes")
- def test_checked_call_with_bad_call(monkeypatch):
- """
- Give CheckCall a function that returns a falsey value and
- mock get_errno so it returns false so an exception is raised.
- """
- def _return_false():
- return False
- monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True)
- msg = f"Error calling {_return_false.__name__} \\(Window Error\\)"
- with pytest.raises(PyperclipWindowsException, match=msg):
- CheckedCall(_return_false)()
- @pytest.mark.usefixtures("mock_ctypes")
- def test_checked_call_with_valid_call(monkeypatch):
- """
- Give CheckCall a function that returns a truthy value and
- mock get_errno so it returns true so an exception is not raised.
- The function should return the results from _return_true.
- """
- def _return_true():
- return True
- monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False)
- # Give CheckedCall a callable that returns a truthy value s
- checked_call = CheckedCall(_return_true)
- assert checked_call() is True
- @pytest.mark.parametrize(
- "text",
- [
- "String_test",
- True,
- 1,
- 1.0,
- 1j,
- ],
- )
- def test_stringify_text(text):
- valid_types = (str, int, float, bool)
- if isinstance(text, valid_types):
- result = _stringifyText(text)
- assert result == str(text)
- else:
- msg = (
- "only str, int, float, and bool values "
- f"can be copied to the clipboard, not {type(text).__name__}"
- )
- with pytest.raises(PyperclipException, match=msg):
- _stringifyText(text)
- @pytest.fixture
- def mock_clipboard(monkeypatch, request):
- """Fixture mocking clipboard IO.
- This mocks pandas.io.clipboard.clipboard_get and
- pandas.io.clipboard.clipboard_set.
- This uses a local dict for storing data. The dictionary
- key used is the test ID, available with ``request.node.name``.
- This returns the local dictionary, for direct manipulation by
- tests.
- """
- # our local clipboard for tests
- _mock_data = {}
- def _mock_set(data):
- _mock_data[request.node.name] = data
- def _mock_get():
- return _mock_data[request.node.name]
- monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
- monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
- yield _mock_data
- @pytest.mark.clipboard
- def test_mock_clipboard(mock_clipboard):
- import pandas.io.clipboard
- pandas.io.clipboard.clipboard_set("abc")
- assert "abc" in set(mock_clipboard.values())
- result = pandas.io.clipboard.clipboard_get()
- assert result == "abc"
- @pytest.mark.single_cpu
- @pytest.mark.clipboard
- @pytest.mark.usefixtures("mock_clipboard")
- class TestClipboard:
- def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None):
- data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
- result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
- tm.assert_frame_equal(data, result)
- # Test that default arguments copy as tab delimited
- def test_round_trip_frame(self, df):
- self.check_round_trip_frame(df)
- # Test that explicit delimiters are respected
- @pytest.mark.parametrize("sep", ["\t", ",", "|"])
- def test_round_trip_frame_sep(self, df, sep):
- self.check_round_trip_frame(df, sep=sep)
- # Test white space separator
- def test_round_trip_frame_string(self, df):
- df.to_clipboard(excel=False, sep=None)
- result = read_clipboard()
- assert df.to_string() == result.to_string()
- assert df.shape == result.shape
- # Two character separator is not supported in to_clipboard
- # Test that multi-character separators are not silently passed
- def test_excel_sep_warning(self, df):
- with tm.assert_produces_warning(
- UserWarning,
- match="to_clipboard in excel mode requires a single character separator.",
- check_stacklevel=False,
- ):
- df.to_clipboard(excel=True, sep=r"\t")
- # Separator is ignored when excel=False and should produce a warning
- def test_copy_delim_warning(self, df):
- with tm.assert_produces_warning():
- df.to_clipboard(excel=False, sep="\t")
- # Tests that the default behavior of to_clipboard is tab
- # delimited and excel="True"
- @pytest.mark.parametrize("sep", ["\t", None, "default"])
- @pytest.mark.parametrize("excel", [True, None, "default"])
- def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard):
- kwargs = build_kwargs(sep, excel)
- df.to_clipboard(**kwargs)
- assert mock_clipboard[request.node.name] == df.to_csv(sep="\t")
- # Tests reading of white space separated tables
- @pytest.mark.parametrize("sep", [None, "default"])
- @pytest.mark.parametrize("excel", [False])
- def test_clipboard_copy_strings(self, sep, excel, df):
- kwargs = build_kwargs(sep, excel)
- df.to_clipboard(**kwargs)
- result = read_clipboard(sep=r"\s+")
- assert result.to_string() == df.to_string()
- assert df.shape == result.shape
- def test_read_clipboard_infer_excel(self, request, mock_clipboard):
- # gh-19010: avoid warnings
- clip_kwargs = {"engine": "python"}
- text = dedent(
- """
- John James\tCharlie Mingus
- 1\t2
- 4\tHarry Carney
- """.strip()
- )
- mock_clipboard[request.node.name] = text
- df = read_clipboard(**clip_kwargs)
- # excel data is parsed correctly
- assert df.iloc[1][1] == "Harry Carney"
- # having diff tab counts doesn't trigger it
- text = dedent(
- """
- a\t b
- 1 2
- 3 4
- """.strip()
- )
- mock_clipboard[request.node.name] = text
- res = read_clipboard(**clip_kwargs)
- text = dedent(
- """
- a b
- 1 2
- 3 4
- """.strip()
- )
- mock_clipboard[request.node.name] = text
- exp = read_clipboard(**clip_kwargs)
- tm.assert_frame_equal(res, exp)
- def test_infer_excel_with_nulls(self, request, mock_clipboard):
- # GH41108
- text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
- mock_clipboard[request.node.name] = text
- df = read_clipboard()
- df_expected = DataFrame(
- data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
- )
- # excel data is parsed correctly
- tm.assert_frame_equal(df, df_expected)
- @pytest.mark.parametrize(
- "multiindex",
- [
- ( # Can't use `dedent` here as it will remove the leading `\t`
- "\n".join(
- [
- "\t\t\tcol1\tcol2",
- "A\t0\tTrue\t1\tred",
- "A\t1\tTrue\t\tblue",
- "B\t0\tFalse\t2\tgreen",
- ]
- ),
- [["A", "A", "B"], [0, 1, 0], [True, True, False]],
- ),
- (
- "\n".join(
- ["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
- ),
- [["A", "A", "B"], [0, 1, 0]],
- ),
- ],
- )
- def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex):
- # GH41108
- mock_clipboard[request.node.name] = multiindex[0]
- df = read_clipboard()
- df_expected = DataFrame(
- data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
- index=multiindex[1],
- )
- # excel data is parsed correctly
- tm.assert_frame_equal(df, df_expected)
- def test_invalid_encoding(self, df):
- msg = "clipboard only supports utf-8 encoding"
- # test case for testing invalid encoding
- with pytest.raises(ValueError, match=msg):
- df.to_clipboard(encoding="ascii")
- with pytest.raises(NotImplementedError, match=msg):
- read_clipboard(encoding="ascii")
- @pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"])
- def test_round_trip_valid_encodings(self, enc, df):
- self.check_round_trip_frame(df, encoding=enc)
- @pytest.mark.single_cpu
- @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
- @pytest.mark.xfail(
- os.environ.get("DISPLAY") is None and not is_platform_mac(),
- reason="Cannot be runed if a headless system is not put in place with Xvfb",
- strict=True,
- )
- def test_raw_roundtrip(self, data):
- # PR #25040 wide unicode wasn't copied correctly on PY3 on windows
- clipboard_set(data)
- assert data == clipboard_get()
- if is_ci_environment() and is_platform_linux():
- # Clipboard can sometimes keep previous param causing flaky CI failures
- subprocess.run(["xsel", "--delete", "--clipboard"], check=True)
- @pytest.mark.parametrize("engine", ["c", "python"])
- def test_read_clipboard_dtype_backend(
- self, request, mock_clipboard, string_storage, dtype_backend, engine
- ):
- # GH#50502
- if string_storage == "pyarrow" or dtype_backend == "pyarrow":
- pa = pytest.importorskip("pyarrow")
- if string_storage == "python":
- string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
- string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
- else:
- string_array = ArrowStringArray(pa.array(["x", "y"]))
- string_array_na = ArrowStringArray(pa.array(["x", None]))
- text = """a,b,c,d,e,f,g,h,i
- x,1,4.0,x,2,4.0,,True,False
- y,2,5.0,,,,,False,"""
- mock_clipboard[request.node.name] = text
- with pd.option_context("mode.string_storage", string_storage):
- result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine)
- expected = DataFrame(
- {
- "a": string_array,
- "b": Series([1, 2], dtype="Int64"),
- "c": Series([4.0, 5.0], dtype="Float64"),
- "d": string_array_na,
- "e": Series([2, NA], dtype="Int64"),
- "f": Series([4.0, NA], dtype="Float64"),
- "g": Series([NA, NA], dtype="Int64"),
- "h": Series([True, False], dtype="boolean"),
- "i": Series([False, NA], dtype="boolean"),
- }
- )
- if dtype_backend == "pyarrow":
- from pandas.arrays import ArrowExtensionArray
- expected = DataFrame(
- {
- col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
- for col in expected.columns
- }
- )
- expected["g"] = ArrowExtensionArray(pa.array([None, None]))
- tm.assert_frame_equal(result, expected)
- def test_invalid_dtype_backend(self):
- msg = (
- "dtype_backend numpy is invalid, only 'numpy_nullable' and "
- "'pyarrow' are allowed."
- )
- with pytest.raises(ValueError, match=msg):
- read_clipboard(dtype_backend="numpy")
|