test_s3.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. from io import BytesIO
  2. import os
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. from pandas import read_csv
  6. import pandas._testing as tm
  7. def test_streaming_s3_objects():
  8. # GH17135
  9. # botocore gained iteration support in 1.10.47, can now be used in read_*
  10. pytest.importorskip("botocore", minversion="1.10.47")
  11. from botocore.response import StreamingBody
  12. data = [b"foo,bar,baz\n1,2,3\n4,5,6\n", b"just,the,header\n"]
  13. for el in data:
  14. body = StreamingBody(BytesIO(el), content_length=len(el))
  15. read_csv(body)
  16. @td.skip_if_no("s3fs")
  17. @pytest.mark.network
  18. @tm.network
  19. def test_read_without_creds_from_pub_bucket():
  20. # GH 34626
  21. # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt
  22. result = read_csv("s3://gdelt-open-data/events/1981.csv", nrows=3)
  23. assert len(result) == 3
  24. @td.skip_if_no("s3fs")
  25. @pytest.mark.network
  26. @tm.network
  27. def test_read_with_creds_from_pub_bucket():
  28. # Ensure we can read from a public bucket with credentials
  29. # GH 34626
  30. # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt
  31. with tm.ensure_safe_environment_variables():
  32. # temporary workaround as moto fails for botocore >= 1.11 otherwise,
  33. # see https://github.com/spulec/moto/issues/1924 & 1952
  34. os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
  35. os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
  36. df = read_csv(
  37. "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None
  38. )
  39. assert len(df) == 5