clipboards.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """ io on the clipboard """
  2. from __future__ import annotations
  3. from io import StringIO
  4. from typing import TYPE_CHECKING
  5. import warnings
  6. from pandas._libs import lib
  7. from pandas.util._exceptions import find_stack_level
  8. from pandas.util._validators import check_dtype_backend
  9. from pandas.core.dtypes.generic import ABCDataFrame
  10. from pandas import (
  11. get_option,
  12. option_context,
  13. )
  14. if TYPE_CHECKING:
  15. from pandas._typing import DtypeBackend
  16. def read_clipboard(
  17. sep: str = r"\s+",
  18. dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
  19. **kwargs,
  20. ): # pragma: no cover
  21. r"""
  22. Read text from clipboard and pass to read_csv.
  23. Parameters
  24. ----------
  25. sep : str, default '\s+'
  26. A string or regex delimiter. The default of '\s+' denotes
  27. one or more whitespace characters.
  28. dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
  29. Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
  30. arrays, nullable dtypes are used for all dtypes that have a nullable
  31. implementation when "numpy_nullable" is set, pyarrow is used for all
  32. dtypes if "pyarrow" is set.
  33. The dtype_backends are still experimential.
  34. .. versionadded:: 2.0
  35. **kwargs
  36. See read_csv for the full argument list.
  37. Returns
  38. -------
  39. DataFrame
  40. A parsed DataFrame object.
  41. """
  42. encoding = kwargs.pop("encoding", "utf-8")
  43. # only utf-8 is valid for passed value because that's what clipboard
  44. # supports
  45. if encoding is not None and encoding.lower().replace("-", "") != "utf8":
  46. raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
  47. check_dtype_backend(dtype_backend)
  48. from pandas.io.clipboard import clipboard_get
  49. from pandas.io.parsers import read_csv
  50. text = clipboard_get()
  51. # Try to decode (if needed, as "text" might already be a string here).
  52. try:
  53. text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
  54. except AttributeError:
  55. pass
  56. # Excel copies into clipboard with \t separation
  57. # inspect no more then the 10 first lines, if they
  58. # all contain an equal number (>0) of tabs, infer
  59. # that this came from excel and set 'sep' accordingly
  60. lines = text[:10000].split("\n")[:-1][:10]
  61. # Need to remove leading white space, since read_csv
  62. # accepts:
  63. # a b
  64. # 0 1 2
  65. # 1 3 4
  66. counts = {x.lstrip(" ").count("\t") for x in lines}
  67. if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
  68. sep = "\t"
  69. # check the number of leading tabs in the first line
  70. # to account for index columns
  71. index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
  72. if index_length != 0:
  73. kwargs.setdefault("index_col", list(range(index_length)))
  74. # Edge case where sep is specified to be None, return to default
  75. if sep is None and kwargs.get("delim_whitespace") is None:
  76. sep = r"\s+"
  77. # Regex separator currently only works with python engine.
  78. # Default to python if separator is multi-character (regex)
  79. if len(sep) > 1 and kwargs.get("engine") is None:
  80. kwargs["engine"] = "python"
  81. elif len(sep) > 1 and kwargs.get("engine") == "c":
  82. warnings.warn(
  83. "read_clipboard with regex separator does not work properly with c engine.",
  84. stacklevel=find_stack_level(),
  85. )
  86. return read_csv(StringIO(text), sep=sep, dtype_backend=dtype_backend, **kwargs)
  87. def to_clipboard(
  88. obj, excel: bool | None = True, sep: str | None = None, **kwargs
  89. ) -> None: # pragma: no cover
  90. """
  91. Attempt to write text representation of object to the system clipboard
  92. The clipboard can be then pasted into Excel for example.
  93. Parameters
  94. ----------
  95. obj : the object to write to the clipboard
  96. excel : bool, defaults to True
  97. if True, use the provided separator, writing in a csv
  98. format for allowing easy pasting into excel.
  99. if False, write a string representation of the object
  100. to the clipboard
  101. sep : optional, defaults to tab
  102. other keywords are passed to to_csv
  103. Notes
  104. -----
  105. Requirements for your platform
  106. - Linux: xclip, or xsel (with PyQt4 modules)
  107. - Windows:
  108. - OS X:
  109. """
  110. encoding = kwargs.pop("encoding", "utf-8")
  111. # testing if an invalid encoding is passed to clipboard
  112. if encoding is not None and encoding.lower().replace("-", "") != "utf8":
  113. raise ValueError("clipboard only supports utf-8 encoding")
  114. from pandas.io.clipboard import clipboard_set
  115. if excel is None:
  116. excel = True
  117. if excel:
  118. try:
  119. if sep is None:
  120. sep = "\t"
  121. buf = StringIO()
  122. # clipboard_set (pyperclip) expects unicode
  123. obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
  124. text = buf.getvalue()
  125. clipboard_set(text)
  126. return
  127. except TypeError:
  128. warnings.warn(
  129. "to_clipboard in excel mode requires a single character separator.",
  130. stacklevel=find_stack_level(),
  131. )
  132. elif sep is not None:
  133. warnings.warn(
  134. "to_clipboard with excel=False ignores the sep argument.",
  135. stacklevel=find_stack_level(),
  136. )
  137. if isinstance(obj, ABCDataFrame):
  138. # str(df) has various unhelpful defaults, like truncation
  139. with option_context("display.max_colwidth", None):
  140. objstr = obj.to_string(**kwargs)
  141. else:
  142. objstr = str(obj)
  143. clipboard_set(objstr)