test_compression.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. from io import BytesIO
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. import pandas._testing as tm
  6. from pandas.tests.io.test_compression import _compression_to_extension
  7. def test_compression_roundtrip(compression):
  8. df = pd.DataFrame(
  9. [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
  10. index=["A", "B"],
  11. columns=["X", "Y", "Z"],
  12. )
  13. with tm.ensure_clean() as path:
  14. df.to_json(path, compression=compression)
  15. tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
  16. # explicitly ensure file was compressed.
  17. with tm.decompress_file(path, compression) as fh:
  18. result = fh.read().decode("utf8")
  19. tm.assert_frame_equal(df, pd.read_json(result))
  20. def test_read_zipped_json(datapath):
  21. uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
  22. uncompressed_df = pd.read_json(uncompressed_path)
  23. compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
  24. compressed_df = pd.read_json(compressed_path, compression="zip")
  25. tm.assert_frame_equal(uncompressed_df, compressed_df)
  26. @td.skip_if_not_us_locale
  27. @pytest.mark.single_cpu
  28. def test_with_s3_url(compression, s3_resource, s3so):
  29. # Bucket "pandas-test" created in tests/io/conftest.py
  30. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  31. with tm.ensure_clean() as path:
  32. df.to_json(path, compression=compression)
  33. with open(path, "rb") as f:
  34. s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f)
  35. roundtripped_df = pd.read_json(
  36. "s3://pandas-test/test-1", compression=compression, storage_options=s3so
  37. )
  38. tm.assert_frame_equal(df, roundtripped_df)
  39. def test_lines_with_compression(compression):
  40. with tm.ensure_clean() as path:
  41. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  42. df.to_json(path, orient="records", lines=True, compression=compression)
  43. roundtripped_df = pd.read_json(path, lines=True, compression=compression)
  44. tm.assert_frame_equal(df, roundtripped_df)
  45. def test_chunksize_with_compression(compression):
  46. with tm.ensure_clean() as path:
  47. df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
  48. df.to_json(path, orient="records", lines=True, compression=compression)
  49. with pd.read_json(
  50. path, lines=True, chunksize=1, compression=compression
  51. ) as res:
  52. roundtripped_df = pd.concat(res)
  53. tm.assert_frame_equal(df, roundtripped_df)
  54. def test_write_unsupported_compression_type():
  55. df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
  56. with tm.ensure_clean() as path:
  57. msg = "Unrecognized compression type: unsupported"
  58. with pytest.raises(ValueError, match=msg):
  59. df.to_json(path, compression="unsupported")
  60. def test_read_unsupported_compression_type():
  61. with tm.ensure_clean() as path:
  62. msg = "Unrecognized compression type: unsupported"
  63. with pytest.raises(ValueError, match=msg):
  64. pd.read_json(path, compression="unsupported")
  65. @pytest.mark.parametrize("to_infer", [True, False])
  66. @pytest.mark.parametrize("read_infer", [True, False])
  67. def test_to_json_compression(compression_only, read_infer, to_infer):
  68. # see gh-15008
  69. compression = compression_only
  70. # We'll complete file extension subsequently.
  71. filename = "test."
  72. filename += _compression_to_extension[compression]
  73. df = pd.DataFrame({"A": [1]})
  74. to_compression = "infer" if to_infer else compression
  75. read_compression = "infer" if read_infer else compression
  76. with tm.ensure_clean(filename) as path:
  77. df.to_json(path, compression=to_compression)
  78. result = pd.read_json(path, compression=read_compression)
  79. tm.assert_frame_equal(result, df)
  80. def test_to_json_compression_mode(compression):
  81. # GH 39985 (read_json does not support user-provided binary files)
  82. expected = pd.DataFrame({"A": [1]})
  83. with BytesIO() as buffer:
  84. expected.to_json(buffer, compression=compression)
  85. # df = pd.read_json(buffer, compression=compression)
  86. # tm.assert_frame_equal(expected, df)