compressors.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """
  2. Patched ``BZ2File`` and ``LZMAFile`` to handle pickle protocol 5.
  3. """
  4. from __future__ import annotations
  5. import bz2
  6. from pickle import PickleBuffer
  7. from pandas.compat._constants import PY310
  8. try:
  9. import lzma
  10. has_lzma = True
  11. except ImportError:
  12. has_lzma = False
  13. def flatten_buffer(
  14. b: bytes | bytearray | memoryview | PickleBuffer,
  15. ) -> bytes | bytearray | memoryview:
  16. """
  17. Return some 1-D `uint8` typed buffer.
  18. Coerces anything that does not match that description to one that does
  19. without copying if possible (otherwise will copy).
  20. """
  21. if isinstance(b, (bytes, bytearray)):
  22. return b
  23. if not isinstance(b, PickleBuffer):
  24. b = PickleBuffer(b)
  25. try:
  26. # coerce to 1-D `uint8` C-contiguous `memoryview` zero-copy
  27. return b.raw()
  28. except BufferError:
  29. # perform in-memory copy if buffer is not contiguous
  30. return memoryview(b).tobytes("A")
  31. class BZ2File(bz2.BZ2File):
  32. if not PY310:
  33. def write(self, b) -> int:
  34. # Workaround issue where `bz2.BZ2File` expects `len`
  35. # to return the number of bytes in `b` by converting
  36. # `b` into something that meets that constraint with
  37. # minimal copying.
  38. #
  39. # Note: This is fixed in Python 3.10.
  40. return super().write(flatten_buffer(b))
  41. if has_lzma:
  42. class LZMAFile(lzma.LZMAFile):
  43. if not PY310:
  44. def write(self, b) -> int:
  45. # Workaround issue where `lzma.LZMAFile` expects `len`
  46. # to return the number of bytes in `b` by converting
  47. # `b` into something that meets that constraint with
  48. # minimal copying.
  49. #
  50. # Note: This is fixed in Python 3.10.
  51. return super().write(flatten_buffer(b))