pickle.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. """ pickle compat """
  2. from __future__ import annotations
  3. import pickle
  4. from typing import Any
  5. import warnings
  6. from pandas._typing import (
  7. CompressionOptions,
  8. FilePath,
  9. ReadPickleBuffer,
  10. StorageOptions,
  11. WriteBuffer,
  12. )
  13. from pandas.compat import pickle_compat as pc
  14. from pandas.util._decorators import doc
  15. from pandas.core.shared_docs import _shared_docs
  16. from pandas.io.common import get_handle
  17. @doc(
  18. storage_options=_shared_docs["storage_options"],
  19. compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
  20. )
  21. def to_pickle(
  22. obj: Any,
  23. filepath_or_buffer: FilePath | WriteBuffer[bytes],
  24. compression: CompressionOptions = "infer",
  25. protocol: int = pickle.HIGHEST_PROTOCOL,
  26. storage_options: StorageOptions = None,
  27. ) -> None:
  28. """
  29. Pickle (serialize) object to file.
  30. Parameters
  31. ----------
  32. obj : any object
  33. Any python object.
  34. filepath_or_buffer : str, path object, or file-like object
  35. String, path object (implementing ``os.PathLike[str]``), or file-like
  36. object implementing a binary ``write()`` function.
  37. Also accepts URL. URL has to be of S3 or GCS.
  38. {compression_options}
  39. .. versionchanged:: 1.4.0 Zstandard support.
  40. protocol : int
  41. Int which indicates which protocol should be used by the pickler,
  42. default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
  43. values for this parameter depend on the version of Python. For Python
  44. 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
  45. For Python >= 3.4, 4 is a valid value. A negative value for the
  46. protocol parameter is equivalent to setting its value to
  47. HIGHEST_PROTOCOL.
  48. {storage_options}
  49. .. versionadded:: 1.2.0
  50. .. [1] https://docs.python.org/3/library/pickle.html
  51. See Also
  52. --------
  53. read_pickle : Load pickled pandas object (or any object) from file.
  54. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  55. DataFrame.to_sql : Write DataFrame to a SQL database.
  56. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  57. Examples
  58. --------
  59. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
  60. >>> original_df # doctest: +SKIP
  61. foo bar
  62. 0 0 5
  63. 1 1 6
  64. 2 2 7
  65. 3 3 8
  66. 4 4 9
  67. >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
  68. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  69. >>> unpickled_df # doctest: +SKIP
  70. foo bar
  71. 0 0 5
  72. 1 1 6
  73. 2 2 7
  74. 3 3 8
  75. 4 4 9
  76. """ # noqa: E501
  77. if protocol < 0:
  78. protocol = pickle.HIGHEST_PROTOCOL
  79. with get_handle(
  80. filepath_or_buffer,
  81. "wb",
  82. compression=compression,
  83. is_text=False,
  84. storage_options=storage_options,
  85. ) as handles:
  86. # letting pickle write directly to the buffer is more memory-efficient
  87. pickle.dump(obj, handles.handle, protocol=protocol)
  88. @doc(
  89. storage_options=_shared_docs["storage_options"],
  90. decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer",
  91. )
  92. def read_pickle(
  93. filepath_or_buffer: FilePath | ReadPickleBuffer,
  94. compression: CompressionOptions = "infer",
  95. storage_options: StorageOptions = None,
  96. ):
  97. """
  98. Load pickled pandas object (or any object) from file.
  99. .. warning::
  100. Loading pickled data received from untrusted sources can be
  101. unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
  102. Parameters
  103. ----------
  104. filepath_or_buffer : str, path object, or file-like object
  105. String, path object (implementing ``os.PathLike[str]``), or file-like
  106. object implementing a binary ``readlines()`` function.
  107. Also accepts URL. URL is not limited to S3 and GCS.
  108. {decompression_options}
  109. .. versionchanged:: 1.4.0 Zstandard support.
  110. {storage_options}
  111. .. versionadded:: 1.2.0
  112. Returns
  113. -------
  114. same type as object stored in file
  115. See Also
  116. --------
  117. DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
  118. Series.to_pickle : Pickle (serialize) Series object to file.
  119. read_hdf : Read HDF5 file into a DataFrame.
  120. read_sql : Read SQL query or database table into a DataFrame.
  121. read_parquet : Load a parquet object, returning a DataFrame.
  122. Notes
  123. -----
  124. read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
  125. provided the object was serialized with to_pickle.
  126. Examples
  127. --------
  128. >>> original_df = pd.DataFrame(
  129. ... {{"foo": range(5), "bar": range(5, 10)}}
  130. ... ) # doctest: +SKIP
  131. >>> original_df # doctest: +SKIP
  132. foo bar
  133. 0 0 5
  134. 1 1 6
  135. 2 2 7
  136. 3 3 8
  137. 4 4 9
  138. >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
  139. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  140. >>> unpickled_df # doctest: +SKIP
  141. foo bar
  142. 0 0 5
  143. 1 1 6
  144. 2 2 7
  145. 3 3 8
  146. 4 4 9
  147. """
  148. excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
  149. with get_handle(
  150. filepath_or_buffer,
  151. "rb",
  152. compression=compression,
  153. is_text=False,
  154. storage_options=storage_options,
  155. ) as handles:
  156. # 1) try standard library Pickle
  157. # 2) try pickle_compat (older pandas version) to handle subclass changes
  158. # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
  159. try:
  160. # TypeError for Cython complaints about object.__new__ vs Tick.__new__
  161. try:
  162. with warnings.catch_warnings(record=True):
  163. # We want to silence any warnings about, e.g. moved modules.
  164. warnings.simplefilter("ignore", Warning)
  165. return pickle.load(handles.handle)
  166. except excs_to_catch:
  167. # e.g.
  168. # "No module named 'pandas.core.sparse.series'"
  169. # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
  170. return pc.load(handles.handle, encoding=None)
  171. except UnicodeDecodeError:
  172. # e.g. can occur for files written in py27; see GH#28645 and GH#31988
  173. return pc.load(handles.handle, encoding="latin-1")