123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- import os
- import pytest
- from tempfile import mkdtemp, mkstemp, NamedTemporaryFile
- from shutil import rmtree
- import numpy.lib._datasource as datasource
- from numpy.testing import assert_, assert_equal, assert_raises
- import urllib.request as urllib_request
- from urllib.parse import urlparse
- from urllib.error import URLError
- def urlopen_stub(url, data=None):
- '''Stub to replace urlopen for testing.'''
- if url == valid_httpurl():
- tmpfile = NamedTemporaryFile(prefix='urltmp_')
- return tmpfile
- else:
- raise URLError('Name or service not known')
- # setup and teardown
- old_urlopen = None
- def setup_module():
- global old_urlopen
- old_urlopen = urllib_request.urlopen
- urllib_request.urlopen = urlopen_stub
- def teardown_module():
- urllib_request.urlopen = old_urlopen
- # A valid website for more robust testing
- http_path = 'http://www.google.com/'
- http_file = 'index.html'
- http_fakepath = 'http://fake.abc.web/site/'
- http_fakefile = 'fake.txt'
- malicious_files = ['/etc/shadow', '../../shadow',
- '..\\system.dat', 'c:\\windows\\system.dat']
- magic_line = b'three is the magic number'
- # Utility functions used by many tests
- def valid_textfile(filedir):
- # Generate and return a valid temporary file.
- fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True)
- os.close(fd)
- return path
- def invalid_textfile(filedir):
- # Generate and return an invalid filename.
- fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir)
- os.close(fd)
- os.remove(path)
- return path
- def valid_httpurl():
- return http_path+http_file
- def invalid_httpurl():
- return http_fakepath+http_fakefile
- def valid_baseurl():
- return http_path
- def invalid_baseurl():
- return http_fakepath
- def valid_httpfile():
- return http_file
- def invalid_httpfile():
- return http_fakefile
- class TestDataSourceOpen:
- def setup_method(self):
- self.tmpdir = mkdtemp()
- self.ds = datasource.DataSource(self.tmpdir)
- def teardown_method(self):
- rmtree(self.tmpdir)
- del self.ds
- def test_ValidHTTP(self):
- fh = self.ds.open(valid_httpurl())
- assert_(fh)
- fh.close()
- def test_InvalidHTTP(self):
- url = invalid_httpurl()
- assert_raises(OSError, self.ds.open, url)
- try:
- self.ds.open(url)
- except OSError as e:
- # Regression test for bug fixed in r4342.
- assert_(e.errno is None)
- def test_InvalidHTTPCacheURLError(self):
- assert_raises(URLError, self.ds._cache, invalid_httpurl())
- def test_ValidFile(self):
- local_file = valid_textfile(self.tmpdir)
- fh = self.ds.open(local_file)
- assert_(fh)
- fh.close()
- def test_InvalidFile(self):
- invalid_file = invalid_textfile(self.tmpdir)
- assert_raises(OSError, self.ds.open, invalid_file)
- def test_ValidGzipFile(self):
- try:
- import gzip
- except ImportError:
- # We don't have the gzip capabilities to test.
- pytest.skip()
- # Test datasource's internal file_opener for Gzip files.
- filepath = os.path.join(self.tmpdir, 'foobar.txt.gz')
- fp = gzip.open(filepath, 'w')
- fp.write(magic_line)
- fp.close()
- fp = self.ds.open(filepath)
- result = fp.readline()
- fp.close()
- assert_equal(magic_line, result)
- def test_ValidBz2File(self):
- try:
- import bz2
- except ImportError:
- # We don't have the bz2 capabilities to test.
- pytest.skip()
- # Test datasource's internal file_opener for BZip2 files.
- filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
- fp = bz2.BZ2File(filepath, 'w')
- fp.write(magic_line)
- fp.close()
- fp = self.ds.open(filepath)
- result = fp.readline()
- fp.close()
- assert_equal(magic_line, result)
- class TestDataSourceExists:
- def setup_method(self):
- self.tmpdir = mkdtemp()
- self.ds = datasource.DataSource(self.tmpdir)
- def teardown_method(self):
- rmtree(self.tmpdir)
- del self.ds
- def test_ValidHTTP(self):
- assert_(self.ds.exists(valid_httpurl()))
- def test_InvalidHTTP(self):
- assert_equal(self.ds.exists(invalid_httpurl()), False)
- def test_ValidFile(self):
- # Test valid file in destpath
- tmpfile = valid_textfile(self.tmpdir)
- assert_(self.ds.exists(tmpfile))
- # Test valid local file not in destpath
- localdir = mkdtemp()
- tmpfile = valid_textfile(localdir)
- assert_(self.ds.exists(tmpfile))
- rmtree(localdir)
- def test_InvalidFile(self):
- tmpfile = invalid_textfile(self.tmpdir)
- assert_equal(self.ds.exists(tmpfile), False)
- class TestDataSourceAbspath:
- def setup_method(self):
- self.tmpdir = os.path.abspath(mkdtemp())
- self.ds = datasource.DataSource(self.tmpdir)
- def teardown_method(self):
- rmtree(self.tmpdir)
- del self.ds
- def test_ValidHTTP(self):
- scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
- local_path = os.path.join(self.tmpdir, netloc,
- upath.strip(os.sep).strip('/'))
- assert_equal(local_path, self.ds.abspath(valid_httpurl()))
- def test_ValidFile(self):
- tmpfile = valid_textfile(self.tmpdir)
- tmpfilename = os.path.split(tmpfile)[-1]
- # Test with filename only
- assert_equal(tmpfile, self.ds.abspath(tmpfilename))
- # Test filename with complete path
- assert_equal(tmpfile, self.ds.abspath(tmpfile))
- def test_InvalidHTTP(self):
- scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl())
- invalidhttp = os.path.join(self.tmpdir, netloc,
- upath.strip(os.sep).strip('/'))
- assert_(invalidhttp != self.ds.abspath(valid_httpurl()))
- def test_InvalidFile(self):
- invalidfile = valid_textfile(self.tmpdir)
- tmpfile = valid_textfile(self.tmpdir)
- tmpfilename = os.path.split(tmpfile)[-1]
- # Test with filename only
- assert_(invalidfile != self.ds.abspath(tmpfilename))
- # Test filename with complete path
- assert_(invalidfile != self.ds.abspath(tmpfile))
- def test_sandboxing(self):
- tmpfile = valid_textfile(self.tmpdir)
- tmpfilename = os.path.split(tmpfile)[-1]
- tmp_path = lambda x: os.path.abspath(self.ds.abspath(x))
- assert_(tmp_path(valid_httpurl()).startswith(self.tmpdir))
- assert_(tmp_path(invalid_httpurl()).startswith(self.tmpdir))
- assert_(tmp_path(tmpfile).startswith(self.tmpdir))
- assert_(tmp_path(tmpfilename).startswith(self.tmpdir))
- for fn in malicious_files:
- assert_(tmp_path(http_path+fn).startswith(self.tmpdir))
- assert_(tmp_path(fn).startswith(self.tmpdir))
- def test_windows_os_sep(self):
- orig_os_sep = os.sep
- try:
- os.sep = '\\'
- self.test_ValidHTTP()
- self.test_ValidFile()
- self.test_InvalidHTTP()
- self.test_InvalidFile()
- self.test_sandboxing()
- finally:
- os.sep = orig_os_sep
- class TestRepositoryAbspath:
- def setup_method(self):
- self.tmpdir = os.path.abspath(mkdtemp())
- self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
- def teardown_method(self):
- rmtree(self.tmpdir)
- del self.repos
- def test_ValidHTTP(self):
- scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
- local_path = os.path.join(self.repos._destpath, netloc,
- upath.strip(os.sep).strip('/'))
- filepath = self.repos.abspath(valid_httpfile())
- assert_equal(local_path, filepath)
- def test_sandboxing(self):
- tmp_path = lambda x: os.path.abspath(self.repos.abspath(x))
- assert_(tmp_path(valid_httpfile()).startswith(self.tmpdir))
- for fn in malicious_files:
- assert_(tmp_path(http_path+fn).startswith(self.tmpdir))
- assert_(tmp_path(fn).startswith(self.tmpdir))
- def test_windows_os_sep(self):
- orig_os_sep = os.sep
- try:
- os.sep = '\\'
- self.test_ValidHTTP()
- self.test_sandboxing()
- finally:
- os.sep = orig_os_sep
- class TestRepositoryExists:
- def setup_method(self):
- self.tmpdir = mkdtemp()
- self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
- def teardown_method(self):
- rmtree(self.tmpdir)
- del self.repos
- def test_ValidFile(self):
- # Create local temp file
- tmpfile = valid_textfile(self.tmpdir)
- assert_(self.repos.exists(tmpfile))
- def test_InvalidFile(self):
- tmpfile = invalid_textfile(self.tmpdir)
- assert_equal(self.repos.exists(tmpfile), False)
- def test_RemoveHTTPFile(self):
- assert_(self.repos.exists(valid_httpurl()))
- def test_CachedHTTPFile(self):
- localfile = valid_httpurl()
- # Create a locally cached temp file with an URL based
- # directory structure. This is similar to what Repository.open
- # would do.
- scheme, netloc, upath, pms, qry, frg = urlparse(localfile)
- local_path = os.path.join(self.repos._destpath, netloc)
- os.mkdir(local_path, 0o0700)
- tmpfile = valid_textfile(local_path)
- assert_(self.repos.exists(tmpfile))
- class TestOpenFunc:
- def setup_method(self):
- self.tmpdir = mkdtemp()
- def teardown_method(self):
- rmtree(self.tmpdir)
- def test_DataSourceOpen(self):
- local_file = valid_textfile(self.tmpdir)
- # Test case where destpath is passed in
- fp = datasource.open(local_file, destpath=self.tmpdir)
- assert_(fp)
- fp.close()
- # Test case where default destpath is used
- fp = datasource.open(local_file)
- assert_(fp)
- fp.close()
- def test_del_attr_handling():
- # DataSource __del__ can be called
- # even if __init__ fails when the
- # Exception object is caught by the
- # caller as happens in refguide_check
- # is_deprecated() function
- ds = datasource.DataSource()
- # simulate failed __init__ by removing key attribute
- # produced within __init__ and expected by __del__
- del ds._istmpdest
- # should not raise an AttributeError if __del__
- # gracefully handles failed __init__:
- ds.__del__()
|