_fetchers.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. from numpy import array, frombuffer, load
  2. from ._registry import registry, registry_urls
  3. try:
  4. import pooch
  5. except ImportError:
  6. pooch = None
  7. data_fetcher = None
  8. else:
  9. data_fetcher = pooch.create(
  10. # Use the default cache folder for the operating system
  11. # Pooch uses appdirs (https://github.com/ActiveState/appdirs) to
  12. # select an appropriate directory for the cache on each platform.
  13. path=pooch.os_cache("scipy-data"),
  14. # The remote data is on Github
  15. # base_url is a required param, even though we override this
  16. # using individual urls in the registry.
  17. base_url="https://github.com/scipy/",
  18. registry=registry,
  19. urls=registry_urls
  20. )
  21. def fetch_data(dataset_name, data_fetcher=data_fetcher):
  22. if data_fetcher is None:
  23. raise ImportError("Missing optional dependency 'pooch' required "
  24. "for scipy.datasets module. Please use pip or "
  25. "conda to install 'pooch'.")
  26. # The "fetch" method returns the full path to the downloaded data file.
  27. return data_fetcher.fetch(dataset_name)
  28. def ascent():
  29. """
  30. Get an 8-bit grayscale bit-depth, 512 x 512 derived image for easy
  31. use in demos.
  32. The image is derived from accent-to-the-top.jpg at
  33. http://www.public-domain-image.com/people-public-domain-images-pictures/
  34. Parameters
  35. ----------
  36. None
  37. Returns
  38. -------
  39. ascent : ndarray
  40. convenient image to use for testing and demonstration
  41. Examples
  42. --------
  43. >>> import scipy.datasets
  44. >>> ascent = scipy.datasets.ascent()
  45. >>> ascent.shape
  46. (512, 512)
  47. >>> ascent.max()
  48. 255
  49. >>> import matplotlib.pyplot as plt
  50. >>> plt.gray()
  51. >>> plt.imshow(ascent)
  52. >>> plt.show()
  53. """
  54. import pickle
  55. # The file will be downloaded automatically the first time this is run,
  56. # returning the path to the downloaded file. Afterwards, Pooch finds
  57. # it in the local cache and doesn't repeat the download.
  58. fname = fetch_data("ascent.dat")
  59. # Now we just need to load it with our standard Python tools.
  60. with open(fname, 'rb') as f:
  61. ascent = array(pickle.load(f))
  62. return ascent
  63. def electrocardiogram():
  64. """
  65. Load an electrocardiogram as an example for a 1-D signal.
  66. The returned signal is a 5 minute long electrocardiogram (ECG), a medical
  67. recording of the heart's electrical activity, sampled at 360 Hz.
  68. Returns
  69. -------
  70. ecg : ndarray
  71. The electrocardiogram in millivolt (mV) sampled at 360 Hz.
  72. Notes
  73. -----
  74. The provided signal is an excerpt (19:35 to 24:35) from the `record 208`_
  75. (lead MLII) provided by the MIT-BIH Arrhythmia Database [1]_ on
  76. PhysioNet [2]_. The excerpt includes noise induced artifacts, typical
  77. heartbeats as well as pathological changes.
  78. .. _record 208: https://physionet.org/physiobank/database/html/mitdbdir/records.htm#208
  79. .. versionadded:: 1.1.0
  80. References
  81. ----------
  82. .. [1] Moody GB, Mark RG. The impact of the MIT-BIH Arrhythmia Database.
  83. IEEE Eng in Med and Biol 20(3):45-50 (May-June 2001).
  84. (PMID: 11446209); :doi:`10.13026/C2F305`
  85. .. [2] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh,
  86. Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. PhysioBank,
  87. PhysioToolkit, and PhysioNet: Components of a New Research Resource
  88. for Complex Physiologic Signals. Circulation 101(23):e215-e220;
  89. :doi:`10.1161/01.CIR.101.23.e215`
  90. Examples
  91. --------
  92. >>> from scipy.datasets import electrocardiogram
  93. >>> ecg = electrocardiogram()
  94. >>> ecg
  95. array([-0.245, -0.215, -0.185, ..., -0.405, -0.395, -0.385])
  96. >>> ecg.shape, ecg.mean(), ecg.std()
  97. ((108000,), -0.16510875, 0.5992473991177294)
  98. As stated the signal features several areas with a different morphology.
  99. E.g., the first few seconds show the electrical activity of a heart in
  100. normal sinus rhythm as seen below.
  101. >>> import numpy as np
  102. >>> import matplotlib.pyplot as plt
  103. >>> fs = 360
  104. >>> time = np.arange(ecg.size) / fs
  105. >>> plt.plot(time, ecg)
  106. >>> plt.xlabel("time in s")
  107. >>> plt.ylabel("ECG in mV")
  108. >>> plt.xlim(9, 10.2)
  109. >>> plt.ylim(-1, 1.5)
  110. >>> plt.show()
  111. After second 16, however, the first premature ventricular contractions,
  112. also called extrasystoles, appear. These have a different morphology
  113. compared to typical heartbeats. The difference can easily be observed
  114. in the following plot.
  115. >>> plt.plot(time, ecg)
  116. >>> plt.xlabel("time in s")
  117. >>> plt.ylabel("ECG in mV")
  118. >>> plt.xlim(46.5, 50)
  119. >>> plt.ylim(-2, 1.5)
  120. >>> plt.show()
  121. At several points large artifacts disturb the recording, e.g.:
  122. >>> plt.plot(time, ecg)
  123. >>> plt.xlabel("time in s")
  124. >>> plt.ylabel("ECG in mV")
  125. >>> plt.xlim(207, 215)
  126. >>> plt.ylim(-2, 3.5)
  127. >>> plt.show()
  128. Finally, examining the power spectrum reveals that most of the biosignal is
  129. made up of lower frequencies. At 60 Hz the noise induced by the mains
  130. electricity can be clearly observed.
  131. >>> from scipy.signal import welch
  132. >>> f, Pxx = welch(ecg, fs=fs, nperseg=2048, scaling="spectrum")
  133. >>> plt.semilogy(f, Pxx)
  134. >>> plt.xlabel("Frequency in Hz")
  135. >>> plt.ylabel("Power spectrum of the ECG in mV**2")
  136. >>> plt.xlim(f[[0, -1]])
  137. >>> plt.show()
  138. """
  139. fname = fetch_data("ecg.dat")
  140. with load(fname) as file:
  141. ecg = file["ecg"].astype(int) # np.uint16 -> int
  142. # Convert raw output of ADC to mV: (ecg - adc_zero) / adc_gain
  143. ecg = (ecg - 1024) / 200.0
  144. return ecg
  145. def face(gray=False):
  146. """
  147. Get a 1024 x 768, color image of a raccoon face.
  148. raccoon-procyon-lotor.jpg at http://www.public-domain-image.com
  149. Parameters
  150. ----------
  151. gray : bool, optional
  152. If True return 8-bit grey-scale image, otherwise return a color image
  153. Returns
  154. -------
  155. face : ndarray
  156. image of a racoon face
  157. Examples
  158. --------
  159. >>> import scipy.datasets
  160. >>> face = scipy.datasets.face()
  161. >>> face.shape
  162. (768, 1024, 3)
  163. >>> face.max()
  164. 255
  165. >>> face.dtype
  166. dtype('uint8')
  167. >>> import matplotlib.pyplot as plt
  168. >>> plt.gray()
  169. >>> plt.imshow(face)
  170. >>> plt.show()
  171. """
  172. import bz2
  173. fname = fetch_data("face.dat")
  174. with open(fname, 'rb') as f:
  175. rawdata = f.read()
  176. face_data = bz2.decompress(rawdata)
  177. face = frombuffer(face_data, dtype='uint8')
  178. face.shape = (768, 1024, 3)
  179. if gray is True:
  180. face = (0.21 * face[:, :, 0] + 0.71 * face[:, :, 1] +
  181. 0.07 * face[:, :, 2]).astype('uint8')
  182. return face