conftest.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. NaT,
  6. date_range,
  7. )
  8. import pandas._testing as tm
  9. @pytest.fixture
  10. def float_frame_with_na():
  11. """
  12. Fixture for DataFrame of floats with index of unique strings
  13. Columns are ['A', 'B', 'C', 'D']; some entries are missing
  14. A B C D
  15. ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
  16. DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
  17. neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
  18. 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
  19. 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
  20. soujjZ0A08 NaN NaN NaN NaN
  21. 7W6NLGsjB9 NaN NaN NaN NaN
  22. ... ... ... ... ...
  23. uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
  24. n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
  25. ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
  26. uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
  27. 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
  28. 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
  29. sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
  30. [30 rows x 4 columns]
  31. """
  32. df = DataFrame(tm.getSeriesData())
  33. # set some NAs
  34. df.iloc[5:10] = np.nan
  35. df.iloc[15:20, -2:] = np.nan
  36. return df
  37. @pytest.fixture
  38. def bool_frame_with_na():
  39. """
  40. Fixture for DataFrame of booleans with index of unique strings
  41. Columns are ['A', 'B', 'C', 'D']; some entries are missing
  42. A B C D
  43. zBZxY2IDGd False False False False
  44. IhBWBMWllt False True True True
  45. ctjdvZSR6R True False True True
  46. AVTujptmxb False True False True
  47. G9lrImrSWq False False False True
  48. sFFwdIUfz2 NaN NaN NaN NaN
  49. s15ptEJnRb NaN NaN NaN NaN
  50. ... ... ... ... ...
  51. UW41KkDyZ4 True True False False
  52. l9l6XkOdqV True False False False
  53. X2MeZfzDYA False True False False
  54. xWkIKU7vfX False True False True
  55. QOhL6VmpGU False False False True
  56. 22PwkRJdat False True False False
  57. kfboQ3VeIK True False True False
  58. [30 rows x 4 columns]
  59. """
  60. df = DataFrame(tm.getSeriesData()) > 0
  61. df = df.astype(object)
  62. # set some NAs
  63. df.iloc[5:10] = np.nan
  64. df.iloc[15:20, -2:] = np.nan
  65. # For `any` tests we need to have at least one True before the first NaN
  66. # in each column
  67. for i in range(4):
  68. df.iloc[i, i] = True
  69. return df
  70. @pytest.fixture
  71. def float_string_frame():
  72. """
  73. Fixture for DataFrame of floats and strings with index of unique strings
  74. Columns are ['A', 'B', 'C', 'D', 'foo'].
  75. A B C D foo
  76. w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
  77. PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
  78. ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
  79. 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
  80. khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
  81. LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
  82. HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
  83. ... ... ... ... ... ...
  84. 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
  85. h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
  86. mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
  87. oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
  88. 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
  89. jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
  90. lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
  91. [30 rows x 5 columns]
  92. """
  93. df = DataFrame(tm.getSeriesData())
  94. df["foo"] = "bar"
  95. return df
  96. @pytest.fixture
  97. def mixed_float_frame():
  98. """
  99. Fixture for DataFrame of different float types with index of unique strings
  100. Columns are ['A', 'B', 'C', 'D'].
  101. A B C D
  102. GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
  103. KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
  104. VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
  105. kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
  106. CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
  107. 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
  108. tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
  109. ... ... ... ... ...
  110. 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
  111. 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
  112. B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
  113. hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
  114. 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
  115. 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
  116. xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
  117. [30 rows x 4 columns]
  118. """
  119. df = DataFrame(tm.getSeriesData())
  120. df.A = df.A.astype("float32")
  121. df.B = df.B.astype("float32")
  122. df.C = df.C.astype("float16")
  123. df.D = df.D.astype("float64")
  124. return df
  125. @pytest.fixture
  126. def mixed_int_frame():
  127. """
  128. Fixture for DataFrame of different int types with index of unique strings
  129. Columns are ['A', 'B', 'C', 'D'].
  130. A B C D
  131. mUrCZ67juP 0 1 2 2
  132. rw99ACYaKS 0 1 0 0
  133. 7QsEcpaaVU 0 1 1 1
  134. xkrimI2pcE 0 1 0 0
  135. dz01SuzoS8 0 1 255 255
  136. ccQkqOHX75 -1 1 0 0
  137. DN0iXaoDLd 0 1 0 0
  138. ... .. .. ... ...
  139. Dfb141wAaQ 1 1 254 254
  140. IPD8eQOVu5 0 1 0 0
  141. CcaKulsCmv 0 1 0 0
  142. rIBa8gu7E5 0 1 0 0
  143. RP6peZmh5o 0 1 1 1
  144. NMb9pipQWQ 0 1 0 0
  145. PqgbJEzjib 0 1 3 3
  146. [30 rows x 4 columns]
  147. """
  148. df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
  149. df.A = df.A.astype("int32")
  150. df.B = np.ones(len(df.B), dtype="uint64")
  151. df.C = df.C.astype("uint8")
  152. df.D = df.C.astype("int64")
  153. return df
  154. @pytest.fixture
  155. def timezone_frame():
  156. """
  157. Fixture for DataFrame of date_range Series with different time zones
  158. Columns are ['A', 'B', 'C']; some entries are missing
  159. A B C
  160. 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
  161. 1 2013-01-02 NaT NaT
  162. 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
  163. """
  164. df = DataFrame(
  165. {
  166. "A": date_range("20130101", periods=3),
  167. "B": date_range("20130101", periods=3, tz="US/Eastern"),
  168. "C": date_range("20130101", periods=3, tz="CET"),
  169. }
  170. )
  171. df.iloc[1, 1] = NaT
  172. df.iloc[1, 2] = NaT
  173. return df
  174. @pytest.fixture
  175. def uint64_frame():
  176. """
  177. Fixture for DataFrame with uint64 values
  178. Columns are ['A', 'B']
  179. """
  180. return DataFrame(
  181. {"A": np.arange(3), "B": [2**63, 2**63 + 5, 2**63 + 10]}, dtype=np.uint64
  182. )
  183. @pytest.fixture
  184. def simple_frame():
  185. """
  186. Fixture for simple 3x3 DataFrame
  187. Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
  188. one two three
  189. a 1.0 2.0 3.0
  190. b 4.0 5.0 6.0
  191. c 7.0 8.0 9.0
  192. """
  193. arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
  194. return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
  195. @pytest.fixture
  196. def frame_of_index_cols():
  197. """
  198. Fixture for DataFrame of columns that can be used for indexing
  199. Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
  200. 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
  201. A B C D E (tuple, as, label)
  202. 0 foo one a 0.608477 -0.012500 -1.664297
  203. 1 foo two b -0.633460 0.249614 -0.364411
  204. 2 foo three c 0.615256 2.154968 -0.834666
  205. 3 bar one d 0.234246 1.085675 0.718445
  206. 4 bar two e 0.533841 -0.005702 -3.533912
  207. """
  208. df = DataFrame(
  209. {
  210. "A": ["foo", "foo", "foo", "bar", "bar"],
  211. "B": ["one", "two", "three", "one", "two"],
  212. "C": ["a", "b", "c", "d", "e"],
  213. "D": np.random.randn(5),
  214. "E": np.random.randn(5),
  215. ("tuple", "as", "label"): np.random.randn(5),
  216. }
  217. )
  218. return df