test_nanops.py 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. from functools import partial
  2. import operator
  3. import warnings
  4. import numpy as np
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. from pandas.core.dtypes.common import is_integer_dtype
  8. import pandas as pd
  9. from pandas import (
  10. Series,
  11. isna,
  12. )
  13. import pandas._testing as tm
  14. from pandas.core import nanops
  15. from pandas.core.arrays import DatetimeArray
  16. use_bn = nanops._USE_BOTTLENECK
  17. @pytest.fixture
  18. def disable_bottleneck(monkeypatch):
  19. with monkeypatch.context() as m:
  20. m.setattr(nanops, "_USE_BOTTLENECK", False)
  21. yield
  22. @pytest.fixture
  23. def arr_shape():
  24. return 11, 7
  25. @pytest.fixture
  26. def arr_float(arr_shape):
  27. np.random.seed(11235)
  28. return np.random.randn(*arr_shape)
  29. @pytest.fixture
  30. def arr_complex(arr_float):
  31. return arr_float + arr_float * 1j
  32. @pytest.fixture
  33. def arr_int(arr_shape):
  34. np.random.seed(11235)
  35. return np.random.randint(-10, 10, arr_shape)
  36. @pytest.fixture
  37. def arr_bool(arr_shape):
  38. np.random.seed(11235)
  39. return np.random.randint(0, 2, arr_shape) == 0
  40. @pytest.fixture
  41. def arr_str(arr_float):
  42. return np.abs(arr_float).astype("S")
  43. @pytest.fixture
  44. def arr_utf(arr_float):
  45. return np.abs(arr_float).astype("U")
  46. @pytest.fixture
  47. def arr_date(arr_shape):
  48. np.random.seed(11235)
  49. return np.random.randint(0, 20000, arr_shape).astype("M8[ns]")
  50. @pytest.fixture
  51. def arr_tdelta(arr_shape):
  52. np.random.seed(11235)
  53. return np.random.randint(0, 20000, arr_shape).astype("m8[ns]")
  54. @pytest.fixture
  55. def arr_nan(arr_shape):
  56. return np.tile(np.nan, arr_shape)
  57. @pytest.fixture
  58. def arr_float_nan(arr_float, arr_nan):
  59. return np.vstack([arr_float, arr_nan])
  60. @pytest.fixture
  61. def arr_nan_float1(arr_nan, arr_float):
  62. return np.vstack([arr_nan, arr_float])
  63. @pytest.fixture
  64. def arr_nan_nan(arr_nan):
  65. return np.vstack([arr_nan, arr_nan])
  66. @pytest.fixture
  67. def arr_inf(arr_float):
  68. return arr_float * np.inf
  69. @pytest.fixture
  70. def arr_float_inf(arr_float, arr_inf):
  71. return np.vstack([arr_float, arr_inf])
  72. @pytest.fixture
  73. def arr_nan_inf(arr_nan, arr_inf):
  74. return np.vstack([arr_nan, arr_inf])
  75. @pytest.fixture
  76. def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
  77. return np.vstack([arr_float, arr_nan, arr_inf])
  78. @pytest.fixture
  79. def arr_nan_nan_inf(arr_nan, arr_inf):
  80. return np.vstack([arr_nan, arr_nan, arr_inf])
  81. @pytest.fixture
  82. def arr_obj(
  83. arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
  84. ):
  85. return np.vstack(
  86. [
  87. arr_float.astype("O"),
  88. arr_int.astype("O"),
  89. arr_bool.astype("O"),
  90. arr_complex.astype("O"),
  91. arr_str.astype("O"),
  92. arr_utf.astype("O"),
  93. arr_date.astype("O"),
  94. arr_tdelta.astype("O"),
  95. ]
  96. )
  97. @pytest.fixture
  98. def arr_nan_nanj(arr_nan):
  99. with np.errstate(invalid="ignore"):
  100. return arr_nan + arr_nan * 1j
  101. @pytest.fixture
  102. def arr_complex_nan(arr_complex, arr_nan_nanj):
  103. with np.errstate(invalid="ignore"):
  104. return np.vstack([arr_complex, arr_nan_nanj])
  105. @pytest.fixture
  106. def arr_nan_infj(arr_inf):
  107. with np.errstate(invalid="ignore"):
  108. return arr_inf * 1j
  109. @pytest.fixture
  110. def arr_complex_nan_infj(arr_complex, arr_nan_infj):
  111. with np.errstate(invalid="ignore"):
  112. return np.vstack([arr_complex, arr_nan_infj])
  113. @pytest.fixture
  114. def arr_float_1d(arr_float):
  115. return arr_float[:, 0]
  116. @pytest.fixture
  117. def arr_nan_1d(arr_nan):
  118. return arr_nan[:, 0]
  119. @pytest.fixture
  120. def arr_float_nan_1d(arr_float_nan):
  121. return arr_float_nan[:, 0]
  122. @pytest.fixture
  123. def arr_float1_nan_1d(arr_float1_nan):
  124. return arr_float1_nan[:, 0]
  125. @pytest.fixture
  126. def arr_nan_float1_1d(arr_nan_float1):
  127. return arr_nan_float1[:, 0]
  128. class TestnanopsDataFrame:
  129. def setup_method(self):
  130. np.random.seed(11235)
  131. nanops._USE_BOTTLENECK = False
  132. arr_shape = (11, 7)
  133. self.arr_float = np.random.randn(*arr_shape)
  134. self.arr_float1 = np.random.randn(*arr_shape)
  135. self.arr_complex = self.arr_float + self.arr_float1 * 1j
  136. self.arr_int = np.random.randint(-10, 10, arr_shape)
  137. self.arr_bool = np.random.randint(0, 2, arr_shape) == 0
  138. self.arr_str = np.abs(self.arr_float).astype("S")
  139. self.arr_utf = np.abs(self.arr_float).astype("U")
  140. self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]")
  141. self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]")
  142. self.arr_nan = np.tile(np.nan, arr_shape)
  143. self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
  144. self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
  145. self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
  146. self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
  147. self.arr_inf = self.arr_float * np.inf
  148. self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
  149. self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
  150. self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
  151. self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
  152. self.arr_obj = np.vstack(
  153. [
  154. self.arr_float.astype("O"),
  155. self.arr_int.astype("O"),
  156. self.arr_bool.astype("O"),
  157. self.arr_complex.astype("O"),
  158. self.arr_str.astype("O"),
  159. self.arr_utf.astype("O"),
  160. self.arr_date.astype("O"),
  161. self.arr_tdelta.astype("O"),
  162. ]
  163. )
  164. with np.errstate(invalid="ignore"):
  165. self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
  166. self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
  167. self.arr_nan_infj = self.arr_inf * 1j
  168. self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
  169. self.arr_float_2d = self.arr_float
  170. self.arr_float1_2d = self.arr_float1
  171. self.arr_nan_2d = self.arr_nan
  172. self.arr_float_nan_2d = self.arr_float_nan
  173. self.arr_float1_nan_2d = self.arr_float1_nan
  174. self.arr_nan_float1_2d = self.arr_nan_float1
  175. self.arr_float_1d = self.arr_float[:, 0]
  176. self.arr_float1_1d = self.arr_float1[:, 0]
  177. self.arr_nan_1d = self.arr_nan[:, 0]
  178. self.arr_float_nan_1d = self.arr_float_nan[:, 0]
  179. self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
  180. self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
  181. def teardown_method(self):
  182. nanops._USE_BOTTLENECK = use_bn
  183. def check_results(self, targ, res, axis, check_dtype=True):
  184. res = getattr(res, "asm8", res)
  185. if (
  186. axis != 0
  187. and hasattr(targ, "shape")
  188. and targ.ndim
  189. and targ.shape != res.shape
  190. ):
  191. res = np.split(res, [targ.shape[0]], axis=0)[0]
  192. try:
  193. tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
  194. except AssertionError:
  195. # handle timedelta dtypes
  196. if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
  197. raise
  198. # There are sometimes rounding errors with
  199. # complex and object dtypes.
  200. # If it isn't one of those, re-raise the error.
  201. if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
  202. raise
  203. # convert object dtypes to something that can be split into
  204. # real and imaginary parts
  205. if res.dtype.kind == "O":
  206. if targ.dtype.kind != "O":
  207. res = res.astype(targ.dtype)
  208. else:
  209. cast_dtype = "c16" if hasattr(np, "complex128") else "f8"
  210. res = res.astype(cast_dtype)
  211. targ = targ.astype(cast_dtype)
  212. # there should never be a case where numpy returns an object
  213. # but nanops doesn't, so make that an exception
  214. elif targ.dtype.kind == "O":
  215. raise
  216. tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
  217. tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
  218. def check_fun_data(
  219. self,
  220. testfunc,
  221. targfunc,
  222. testarval,
  223. targarval,
  224. skipna,
  225. check_dtype=True,
  226. empty_targfunc=None,
  227. **kwargs,
  228. ):
  229. for axis in list(range(targarval.ndim)) + [None]:
  230. targartempval = targarval if skipna else testarval
  231. if skipna and empty_targfunc and isna(targartempval).all():
  232. targ = empty_targfunc(targartempval, axis=axis, **kwargs)
  233. else:
  234. targ = targfunc(targartempval, axis=axis, **kwargs)
  235. if targartempval.dtype == object and (
  236. targfunc is np.any or targfunc is np.all
  237. ):
  238. # GH#12863 the numpy functions will retain e.g. floatiness
  239. if isinstance(targ, np.ndarray):
  240. targ = targ.astype(bool)
  241. else:
  242. targ = bool(targ)
  243. res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
  244. self.check_results(targ, res, axis, check_dtype=check_dtype)
  245. if skipna:
  246. res = testfunc(testarval, axis=axis, **kwargs)
  247. self.check_results(targ, res, axis, check_dtype=check_dtype)
  248. if axis is None:
  249. res = testfunc(testarval, skipna=skipna, **kwargs)
  250. self.check_results(targ, res, axis, check_dtype=check_dtype)
  251. if skipna and axis is None:
  252. res = testfunc(testarval, **kwargs)
  253. self.check_results(targ, res, axis, check_dtype=check_dtype)
  254. if testarval.ndim <= 1:
  255. return
  256. # Recurse on lower-dimension
  257. testarval2 = np.take(testarval, 0, axis=-1)
  258. targarval2 = np.take(targarval, 0, axis=-1)
  259. self.check_fun_data(
  260. testfunc,
  261. targfunc,
  262. testarval2,
  263. targarval2,
  264. skipna=skipna,
  265. check_dtype=check_dtype,
  266. empty_targfunc=empty_targfunc,
  267. **kwargs,
  268. )
  269. def check_fun(
  270. self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
  271. ):
  272. targar = testar
  273. if testar.endswith("_nan") and hasattr(self, testar[:-4]):
  274. targar = testar[:-4]
  275. testarval = getattr(self, testar)
  276. targarval = getattr(self, targar)
  277. self.check_fun_data(
  278. testfunc,
  279. targfunc,
  280. testarval,
  281. targarval,
  282. skipna=skipna,
  283. empty_targfunc=empty_targfunc,
  284. **kwargs,
  285. )
  286. def check_funs(
  287. self,
  288. testfunc,
  289. targfunc,
  290. skipna,
  291. allow_complex=True,
  292. allow_all_nan=True,
  293. allow_date=True,
  294. allow_tdelta=True,
  295. allow_obj=True,
  296. **kwargs,
  297. ):
  298. self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
  299. self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
  300. self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
  301. self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
  302. objs = [
  303. self.arr_float.astype("O"),
  304. self.arr_int.astype("O"),
  305. self.arr_bool.astype("O"),
  306. ]
  307. if allow_all_nan:
  308. self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
  309. if allow_complex:
  310. self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
  311. self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
  312. if allow_all_nan:
  313. self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
  314. objs += [self.arr_complex.astype("O")]
  315. if allow_date:
  316. targfunc(self.arr_date)
  317. self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
  318. objs += [self.arr_date.astype("O")]
  319. if allow_tdelta:
  320. try:
  321. targfunc(self.arr_tdelta)
  322. except TypeError:
  323. pass
  324. else:
  325. self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
  326. objs += [self.arr_tdelta.astype("O")]
  327. if allow_obj:
  328. self.arr_obj = np.vstack(objs)
  329. # some nanops handle object dtypes better than their numpy
  330. # counterparts, so the numpy functions need to be given something
  331. # else
  332. if allow_obj == "convert":
  333. targfunc = partial(
  334. self._badobj_wrap, func=targfunc, allow_complex=allow_complex
  335. )
  336. self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
  337. def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
  338. if value.dtype.kind == "O":
  339. if allow_complex:
  340. value = value.astype("c16")
  341. else:
  342. value = value.astype("f8")
  343. return func(value, **kwargs)
  344. @pytest.mark.parametrize(
  345. "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
  346. )
  347. def test_nan_funcs(self, nan_op, np_op, skipna):
  348. self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
  349. def test_nansum(self, skipna):
  350. self.check_funs(
  351. nanops.nansum,
  352. np.sum,
  353. skipna,
  354. allow_date=False,
  355. check_dtype=False,
  356. empty_targfunc=np.nansum,
  357. )
  358. def test_nanmean(self, skipna):
  359. self.check_funs(
  360. nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
  361. )
  362. def test_nanmedian(self, skipna):
  363. with warnings.catch_warnings(record=True):
  364. warnings.simplefilter("ignore", RuntimeWarning)
  365. self.check_funs(
  366. nanops.nanmedian,
  367. np.median,
  368. skipna,
  369. allow_complex=False,
  370. allow_date=False,
  371. allow_obj="convert",
  372. )
  373. @pytest.mark.parametrize("ddof", range(3))
  374. def test_nanvar(self, ddof, skipna):
  375. self.check_funs(
  376. nanops.nanvar,
  377. np.var,
  378. skipna,
  379. allow_complex=False,
  380. allow_date=False,
  381. allow_obj="convert",
  382. ddof=ddof,
  383. )
  384. @pytest.mark.parametrize("ddof", range(3))
  385. def test_nanstd(self, ddof, skipna):
  386. self.check_funs(
  387. nanops.nanstd,
  388. np.std,
  389. skipna,
  390. allow_complex=False,
  391. allow_date=False,
  392. allow_obj="convert",
  393. ddof=ddof,
  394. )
  395. @td.skip_if_no_scipy
  396. @pytest.mark.parametrize("ddof", range(3))
  397. def test_nansem(self, ddof, skipna):
  398. from scipy.stats import sem
  399. with np.errstate(invalid="ignore"):
  400. self.check_funs(
  401. nanops.nansem,
  402. sem,
  403. skipna,
  404. allow_complex=False,
  405. allow_date=False,
  406. allow_tdelta=False,
  407. allow_obj="convert",
  408. ddof=ddof,
  409. )
  410. @pytest.mark.parametrize(
  411. "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
  412. )
  413. def test_nanops_with_warnings(self, nan_op, np_op, skipna):
  414. with warnings.catch_warnings(record=True):
  415. warnings.simplefilter("ignore", RuntimeWarning)
  416. self.check_funs(nan_op, np_op, skipna, allow_obj=False)
  417. def _argminmax_wrap(self, value, axis=None, func=None):
  418. res = func(value, axis)
  419. nans = np.min(value, axis)
  420. nullnan = isna(nans)
  421. if res.ndim:
  422. res[nullnan] = -1
  423. elif (
  424. hasattr(nullnan, "all")
  425. and nullnan.all()
  426. or not hasattr(nullnan, "all")
  427. and nullnan
  428. ):
  429. res = -1
  430. return res
  431. def test_nanargmax(self, skipna):
  432. with warnings.catch_warnings(record=True):
  433. warnings.simplefilter("ignore", RuntimeWarning)
  434. func = partial(self._argminmax_wrap, func=np.argmax)
  435. self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
  436. def test_nanargmin(self, skipna):
  437. with warnings.catch_warnings(record=True):
  438. warnings.simplefilter("ignore", RuntimeWarning)
  439. func = partial(self._argminmax_wrap, func=np.argmin)
  440. self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
  441. def _skew_kurt_wrap(self, values, axis=None, func=None):
  442. if not isinstance(values.dtype.type, np.floating):
  443. values = values.astype("f8")
  444. result = func(values, axis=axis, bias=False)
  445. # fix for handling cases where all elements in an axis are the same
  446. if isinstance(result, np.ndarray):
  447. result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
  448. return result
  449. elif np.max(values) == np.min(values):
  450. return 0.0
  451. return result
  452. @td.skip_if_no_scipy
  453. def test_nanskew(self, skipna):
  454. from scipy.stats import skew
  455. func = partial(self._skew_kurt_wrap, func=skew)
  456. with np.errstate(invalid="ignore"):
  457. self.check_funs(
  458. nanops.nanskew,
  459. func,
  460. skipna,
  461. allow_complex=False,
  462. allow_date=False,
  463. allow_tdelta=False,
  464. )
  465. @td.skip_if_no_scipy
  466. def test_nankurt(self, skipna):
  467. from scipy.stats import kurtosis
  468. func1 = partial(kurtosis, fisher=True)
  469. func = partial(self._skew_kurt_wrap, func=func1)
  470. with np.errstate(invalid="ignore"):
  471. self.check_funs(
  472. nanops.nankurt,
  473. func,
  474. skipna,
  475. allow_complex=False,
  476. allow_date=False,
  477. allow_tdelta=False,
  478. )
  479. def test_nanprod(self, skipna):
  480. self.check_funs(
  481. nanops.nanprod,
  482. np.prod,
  483. skipna,
  484. allow_date=False,
  485. allow_tdelta=False,
  486. empty_targfunc=np.nanprod,
  487. )
  488. def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
  489. res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
  490. res01 = checkfun(
  491. self.arr_float_2d,
  492. self.arr_float1_2d,
  493. min_periods=len(self.arr_float_2d) - 1,
  494. **kwargs,
  495. )
  496. tm.assert_almost_equal(targ0, res00)
  497. tm.assert_almost_equal(targ0, res01)
  498. res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
  499. res11 = checkfun(
  500. self.arr_float_nan_2d,
  501. self.arr_float1_nan_2d,
  502. min_periods=len(self.arr_float_2d) - 1,
  503. **kwargs,
  504. )
  505. tm.assert_almost_equal(targ1, res10)
  506. tm.assert_almost_equal(targ1, res11)
  507. targ2 = np.nan
  508. res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
  509. res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
  510. res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
  511. res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
  512. res24 = checkfun(
  513. self.arr_float_nan_2d,
  514. self.arr_nan_float1_2d,
  515. min_periods=len(self.arr_float_2d) - 1,
  516. **kwargs,
  517. )
  518. res25 = checkfun(
  519. self.arr_float_2d,
  520. self.arr_float1_2d,
  521. min_periods=len(self.arr_float_2d) + 1,
  522. **kwargs,
  523. )
  524. tm.assert_almost_equal(targ2, res20)
  525. tm.assert_almost_equal(targ2, res21)
  526. tm.assert_almost_equal(targ2, res22)
  527. tm.assert_almost_equal(targ2, res23)
  528. tm.assert_almost_equal(targ2, res24)
  529. tm.assert_almost_equal(targ2, res25)
  530. def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
  531. res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
  532. res01 = checkfun(
  533. self.arr_float_1d,
  534. self.arr_float1_1d,
  535. min_periods=len(self.arr_float_1d) - 1,
  536. **kwargs,
  537. )
  538. tm.assert_almost_equal(targ0, res00)
  539. tm.assert_almost_equal(targ0, res01)
  540. res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
  541. res11 = checkfun(
  542. self.arr_float_nan_1d,
  543. self.arr_float1_nan_1d,
  544. min_periods=len(self.arr_float_1d) - 1,
  545. **kwargs,
  546. )
  547. tm.assert_almost_equal(targ1, res10)
  548. tm.assert_almost_equal(targ1, res11)
  549. targ2 = np.nan
  550. res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
  551. res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
  552. res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
  553. res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
  554. res24 = checkfun(
  555. self.arr_float_nan_1d,
  556. self.arr_nan_float1_1d,
  557. min_periods=len(self.arr_float_1d) - 1,
  558. **kwargs,
  559. )
  560. res25 = checkfun(
  561. self.arr_float_1d,
  562. self.arr_float1_1d,
  563. min_periods=len(self.arr_float_1d) + 1,
  564. **kwargs,
  565. )
  566. tm.assert_almost_equal(targ2, res20)
  567. tm.assert_almost_equal(targ2, res21)
  568. tm.assert_almost_equal(targ2, res22)
  569. tm.assert_almost_equal(targ2, res23)
  570. tm.assert_almost_equal(targ2, res24)
  571. tm.assert_almost_equal(targ2, res25)
  572. def test_nancorr(self):
  573. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  574. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  575. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
  576. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  577. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  578. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  579. def test_nancorr_pearson(self):
  580. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  581. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  582. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
  583. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  584. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  585. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  586. @td.skip_if_no_scipy
  587. def test_nancorr_kendall(self):
  588. from scipy.stats import kendalltau
  589. targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
  590. targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  591. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
  592. targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
  593. targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  594. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
  595. @td.skip_if_no_scipy
  596. def test_nancorr_spearman(self):
  597. from scipy.stats import spearmanr
  598. targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
  599. targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  600. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
  601. targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
  602. targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  603. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
  604. @td.skip_if_no_scipy
  605. def test_invalid_method(self):
  606. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  607. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  608. msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
  609. with pytest.raises(ValueError, match=msg):
  610. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
  611. def test_nancov(self):
  612. targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  613. targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  614. self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
  615. targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  616. targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  617. self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
  618. @pytest.mark.parametrize(
  619. "op,nanop",
  620. [
  621. (operator.eq, nanops.naneq),
  622. (operator.ne, nanops.nanne),
  623. (operator.gt, nanops.nangt),
  624. (operator.ge, nanops.nange),
  625. (operator.lt, nanops.nanlt),
  626. (operator.le, nanops.nanle),
  627. ],
  628. )
  629. def test_nan_comparison(request, op, nanop, disable_bottleneck):
  630. arr_float = request.getfixturevalue("arr_float")
  631. arr_float1 = request.getfixturevalue("arr_float")
  632. targ0 = op(arr_float, arr_float1)
  633. arr_nan = request.getfixturevalue("arr_nan")
  634. arr_nan_nan = request.getfixturevalue("arr_nan_nan")
  635. arr_float_nan = request.getfixturevalue("arr_float_nan")
  636. arr_float1_nan = request.getfixturevalue("arr_float_nan")
  637. arr_nan_float1 = request.getfixturevalue("arr_nan_float1")
  638. while targ0.ndim:
  639. res0 = nanop(arr_float, arr_float1)
  640. tm.assert_almost_equal(targ0, res0)
  641. if targ0.ndim > 1:
  642. targ1 = np.vstack([targ0, arr_nan])
  643. else:
  644. targ1 = np.hstack([targ0, arr_nan])
  645. res1 = nanop(arr_float_nan, arr_float1_nan)
  646. tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
  647. targ2 = arr_nan_nan
  648. res2 = nanop(arr_float_nan, arr_nan_float1)
  649. tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
  650. # Lower dimension for next step in the loop
  651. arr_float = np.take(arr_float, 0, axis=-1)
  652. arr_float1 = np.take(arr_float1, 0, axis=-1)
  653. arr_nan = np.take(arr_nan, 0, axis=-1)
  654. arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
  655. arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
  656. arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
  657. arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
  658. targ0 = np.take(targ0, 0, axis=-1)
  659. @pytest.mark.parametrize(
  660. "arr, correct",
  661. [
  662. ("arr_complex", False),
  663. ("arr_int", False),
  664. ("arr_bool", False),
  665. ("arr_str", False),
  666. ("arr_utf", False),
  667. ("arr_complex", False),
  668. ("arr_complex_nan", False),
  669. ("arr_nan_nanj", False),
  670. ("arr_nan_infj", True),
  671. ("arr_complex_nan_infj", True),
  672. ],
  673. )
  674. def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
  675. val = request.getfixturevalue(arr)
  676. while getattr(val, "ndim", True):
  677. res0 = nanops._has_infs(val)
  678. if correct:
  679. assert res0
  680. else:
  681. assert not res0
  682. if not hasattr(val, "ndim"):
  683. break
  684. # Reduce dimension for next step in the loop
  685. val = np.take(val, 0, axis=-1)
  686. @pytest.mark.parametrize(
  687. "arr, correct",
  688. [
  689. ("arr_float", False),
  690. ("arr_nan", False),
  691. ("arr_float_nan", False),
  692. ("arr_nan_nan", False),
  693. ("arr_float_inf", True),
  694. ("arr_inf", True),
  695. ("arr_nan_inf", True),
  696. ("arr_float_nan_inf", True),
  697. ("arr_nan_nan_inf", True),
  698. ],
  699. )
  700. @pytest.mark.parametrize("astype", [None, "f4", "f2"])
  701. def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
  702. val = request.getfixturevalue(arr)
  703. if astype is not None:
  704. val = val.astype(astype)
  705. while getattr(val, "ndim", True):
  706. res0 = nanops._has_infs(val)
  707. if correct:
  708. assert res0
  709. else:
  710. assert not res0
  711. if not hasattr(val, "ndim"):
  712. break
  713. # Reduce dimension for next step in the loop
  714. val = np.take(val, 0, axis=-1)
  715. @pytest.mark.parametrize(
  716. "fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
  717. )
  718. def test_bn_ok_dtype(fixture, request, disable_bottleneck):
  719. obj = request.getfixturevalue(fixture)
  720. assert nanops._bn_ok_dtype(obj.dtype, "test")
  721. @pytest.mark.parametrize(
  722. "fixture",
  723. [
  724. "arr_date",
  725. "arr_tdelta",
  726. "arr_obj",
  727. ],
  728. )
  729. def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
  730. obj = request.getfixturevalue(fixture)
  731. assert not nanops._bn_ok_dtype(obj.dtype, "test")
  732. class TestEnsureNumeric:
  733. def test_numeric_values(self):
  734. # Test integer
  735. assert nanops._ensure_numeric(1) == 1
  736. # Test float
  737. assert nanops._ensure_numeric(1.1) == 1.1
  738. # Test complex
  739. assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
  740. def test_ndarray(self):
  741. # Test numeric ndarray
  742. values = np.array([1, 2, 3])
  743. assert np.allclose(nanops._ensure_numeric(values), values)
  744. # Test object ndarray
  745. o_values = values.astype(object)
  746. assert np.allclose(nanops._ensure_numeric(o_values), values)
  747. # Test convertible string ndarray
  748. s_values = np.array(["1", "2", "3"], dtype=object)
  749. assert np.allclose(nanops._ensure_numeric(s_values), values)
  750. # Test non-convertible string ndarray
  751. s_values = np.array(["foo", "bar", "baz"], dtype=object)
  752. msg = r"Could not convert .* to numeric"
  753. with pytest.raises(TypeError, match=msg):
  754. nanops._ensure_numeric(s_values)
  755. def test_convertable_values(self):
  756. assert np.allclose(nanops._ensure_numeric("1"), 1.0)
  757. assert np.allclose(nanops._ensure_numeric("1.1"), 1.1)
  758. assert np.allclose(nanops._ensure_numeric("1+1j"), 1 + 1j)
  759. def test_non_convertable_values(self):
  760. msg = "Could not convert foo to numeric"
  761. with pytest.raises(TypeError, match=msg):
  762. nanops._ensure_numeric("foo")
  763. # with the wrong type, python raises TypeError for us
  764. msg = "argument must be a string or a number"
  765. with pytest.raises(TypeError, match=msg):
  766. nanops._ensure_numeric({})
  767. with pytest.raises(TypeError, match=msg):
  768. nanops._ensure_numeric([])
  769. class TestNanvarFixedValues:
  770. # xref GH10242
  771. # Samples from a normal distribution.
  772. @pytest.fixture
  773. def variance(self):
  774. return 3.0
  775. @pytest.fixture
  776. def samples(self, variance):
  777. return self.prng.normal(scale=variance**0.5, size=100000)
  778. def test_nanvar_all_finite(self, samples, variance):
  779. actual_variance = nanops.nanvar(samples)
  780. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  781. def test_nanvar_nans(self, samples, variance):
  782. samples_test = np.nan * np.ones(2 * samples.shape[0])
  783. samples_test[::2] = samples
  784. actual_variance = nanops.nanvar(samples_test, skipna=True)
  785. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  786. actual_variance = nanops.nanvar(samples_test, skipna=False)
  787. tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
  788. def test_nanstd_nans(self, samples, variance):
  789. samples_test = np.nan * np.ones(2 * samples.shape[0])
  790. samples_test[::2] = samples
  791. actual_std = nanops.nanstd(samples_test, skipna=True)
  792. tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)
  793. actual_std = nanops.nanvar(samples_test, skipna=False)
  794. tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
  795. def test_nanvar_axis(self, samples, variance):
  796. # Generate some sample data.
  797. samples_unif = self.prng.uniform(size=samples.shape[0])
  798. samples = np.vstack([samples, samples_unif])
  799. actual_variance = nanops.nanvar(samples, axis=1)
  800. tm.assert_almost_equal(
  801. actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
  802. )
  803. def test_nanvar_ddof(self):
  804. n = 5
  805. samples = self.prng.uniform(size=(10000, n + 1))
  806. samples[:, -1] = np.nan # Force use of our own algorithm.
  807. variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
  808. variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
  809. variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
  810. # The unbiased estimate.
  811. var = 1.0 / 12
  812. tm.assert_almost_equal(variance_1, var, rtol=1e-2)
  813. # The underestimated variance.
  814. tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
  815. # The overestimated variance.
  816. tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
  817. @pytest.mark.parametrize("axis", range(2))
  818. @pytest.mark.parametrize("ddof", range(3))
  819. def test_ground_truth(self, axis, ddof):
  820. # Test against values that were precomputed with Numpy.
  821. samples = np.empty((4, 4))
  822. samples[:3, :3] = np.array(
  823. [
  824. [0.97303362, 0.21869576, 0.55560287],
  825. [0.72980153, 0.03109364, 0.99155171],
  826. [0.09317602, 0.60078248, 0.15871292],
  827. ]
  828. )
  829. samples[3] = samples[:, 3] = np.nan
  830. # Actual variances along axis=0, 1 for ddof=0, 1, 2
  831. variance = np.array(
  832. [
  833. [
  834. [0.13762259, 0.05619224, 0.11568816],
  835. [0.20643388, 0.08428837, 0.17353224],
  836. [0.41286776, 0.16857673, 0.34706449],
  837. ],
  838. [
  839. [0.09519783, 0.16435395, 0.05082054],
  840. [0.14279674, 0.24653093, 0.07623082],
  841. [0.28559348, 0.49306186, 0.15246163],
  842. ],
  843. ]
  844. )
  845. # Test nanvar.
  846. var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
  847. tm.assert_almost_equal(var[:3], variance[axis, ddof])
  848. assert np.isnan(var[3])
  849. # Test nanstd.
  850. std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
  851. tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
  852. assert np.isnan(std[3])
  853. @pytest.mark.parametrize("ddof", range(3))
  854. def test_nanstd_roundoff(self, ddof):
  855. # Regression test for GH 10242 (test data taken from GH 10489). Ensure
  856. # that variance is stable.
  857. data = Series(766897346 * np.ones(10))
  858. result = data.std(ddof=ddof)
  859. assert result == 0.0
  860. @property
  861. def prng(self):
  862. return np.random.RandomState(1234)
  863. class TestNanskewFixedValues:
  864. # xref GH 11974
  865. # Test data + skewness value (computed with scipy.stats.skew)
  866. @pytest.fixture
  867. def samples(self):
  868. return np.sin(np.linspace(0, 1, 200))
  869. @pytest.fixture
  870. def actual_skew(self):
  871. return -0.1875895205961754
  872. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  873. def test_constant_series(self, val):
  874. # xref GH 11974
  875. data = val * np.ones(300)
  876. skew = nanops.nanskew(data)
  877. assert skew == 0.0
  878. def test_all_finite(self):
  879. alpha, beta = 0.3, 0.1
  880. left_tailed = self.prng.beta(alpha, beta, size=100)
  881. assert nanops.nanskew(left_tailed) < 0
  882. alpha, beta = 0.1, 0.3
  883. right_tailed = self.prng.beta(alpha, beta, size=100)
  884. assert nanops.nanskew(right_tailed) > 0
  885. def test_ground_truth(self, samples, actual_skew):
  886. skew = nanops.nanskew(samples)
  887. tm.assert_almost_equal(skew, actual_skew)
  888. def test_axis(self, samples, actual_skew):
  889. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  890. skew = nanops.nanskew(samples, axis=1)
  891. tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))
  892. def test_nans(self, samples):
  893. samples = np.hstack([samples, np.nan])
  894. skew = nanops.nanskew(samples, skipna=False)
  895. assert np.isnan(skew)
  896. def test_nans_skipna(self, samples, actual_skew):
  897. samples = np.hstack([samples, np.nan])
  898. skew = nanops.nanskew(samples, skipna=True)
  899. tm.assert_almost_equal(skew, actual_skew)
  900. @property
  901. def prng(self):
  902. return np.random.RandomState(1234)
  903. class TestNankurtFixedValues:
  904. # xref GH 11974
  905. # Test data + kurtosis value (computed with scipy.stats.kurtosis)
  906. @pytest.fixture
  907. def samples(self):
  908. return np.sin(np.linspace(0, 1, 200))
  909. @pytest.fixture
  910. def actual_kurt(self):
  911. return -1.2058303433799713
  912. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  913. def test_constant_series(self, val):
  914. # xref GH 11974
  915. data = val * np.ones(300)
  916. kurt = nanops.nankurt(data)
  917. assert kurt == 0.0
  918. def test_all_finite(self):
  919. alpha, beta = 0.3, 0.1
  920. left_tailed = self.prng.beta(alpha, beta, size=100)
  921. assert nanops.nankurt(left_tailed) < 0
  922. alpha, beta = 0.1, 0.3
  923. right_tailed = self.prng.beta(alpha, beta, size=100)
  924. assert nanops.nankurt(right_tailed) > 0
  925. def test_ground_truth(self, samples, actual_kurt):
  926. kurt = nanops.nankurt(samples)
  927. tm.assert_almost_equal(kurt, actual_kurt)
  928. def test_axis(self, samples, actual_kurt):
  929. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  930. kurt = nanops.nankurt(samples, axis=1)
  931. tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))
  932. def test_nans(self, samples):
  933. samples = np.hstack([samples, np.nan])
  934. kurt = nanops.nankurt(samples, skipna=False)
  935. assert np.isnan(kurt)
  936. def test_nans_skipna(self, samples, actual_kurt):
  937. samples = np.hstack([samples, np.nan])
  938. kurt = nanops.nankurt(samples, skipna=True)
  939. tm.assert_almost_equal(kurt, actual_kurt)
  940. @property
  941. def prng(self):
  942. return np.random.RandomState(1234)
  943. class TestDatetime64NaNOps:
  944. @pytest.fixture(params=["s", "ms", "us", "ns"])
  945. def unit(self, request):
  946. return request.param
  947. # Enabling mean changes the behavior of DataFrame.mean
  948. # See https://github.com/pandas-dev/pandas/issues/24752
  949. def test_nanmean(self, unit):
  950. dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
  951. expected = dti[1]
  952. for obj in [dti, DatetimeArray(dti), Series(dti)]:
  953. result = nanops.nanmean(obj)
  954. assert result == expected
  955. dti2 = dti.insert(1, pd.NaT)
  956. for obj in [dti2, DatetimeArray(dti2), Series(dti2)]:
  957. result = nanops.nanmean(obj)
  958. assert result == expected
  959. @pytest.mark.parametrize("constructor", ["M8", "m8"])
  960. def test_nanmean_skipna_false(self, constructor, unit):
  961. dtype = f"{constructor}[{unit}]"
  962. arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
  963. arr[-1, -1] = "NaT"
  964. result = nanops.nanmean(arr, skipna=False)
  965. assert np.isnat(result)
  966. assert result.dtype == dtype
  967. result = nanops.nanmean(arr, axis=0, skipna=False)
  968. expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
  969. tm.assert_numpy_array_equal(result, expected)
  970. result = nanops.nanmean(arr, axis=1, skipna=False)
  971. expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
  972. tm.assert_numpy_array_equal(result, expected)
  973. def test_use_bottleneck():
  974. if nanops._BOTTLENECK_INSTALLED:
  975. with pd.option_context("use_bottleneck", True):
  976. assert pd.get_option("use_bottleneck")
  977. with pd.option_context("use_bottleneck", False):
  978. assert not pd.get_option("use_bottleneck")
  979. @pytest.mark.parametrize(
  980. "numpy_op, expected",
  981. [
  982. (np.sum, 10),
  983. (np.nansum, 10),
  984. (np.mean, 2.5),
  985. (np.nanmean, 2.5),
  986. (np.median, 2.5),
  987. (np.nanmedian, 2.5),
  988. (np.min, 1),
  989. (np.max, 4),
  990. (np.nanmin, 1),
  991. (np.nanmax, 4),
  992. ],
  993. )
  994. def test_numpy_ops(numpy_op, expected):
  995. # GH8383
  996. result = numpy_op(Series([1, 2, 3, 4]))
  997. assert result == expected
  998. @pytest.mark.parametrize(
  999. "operation",
  1000. [
  1001. nanops.nanany,
  1002. nanops.nanall,
  1003. nanops.nansum,
  1004. nanops.nanmean,
  1005. nanops.nanmedian,
  1006. nanops.nanstd,
  1007. nanops.nanvar,
  1008. nanops.nansem,
  1009. nanops.nanargmax,
  1010. nanops.nanargmin,
  1011. nanops.nanmax,
  1012. nanops.nanmin,
  1013. nanops.nanskew,
  1014. nanops.nankurt,
  1015. nanops.nanprod,
  1016. ],
  1017. )
  1018. def test_nanops_independent_of_mask_param(operation):
  1019. # GH22764
  1020. ser = Series([1, 2, np.nan, 3, np.nan, 4])
  1021. mask = ser.isna()
  1022. median_expected = operation(ser)
  1023. median_result = operation(ser, mask=mask)
  1024. assert median_expected == median_result
  1025. @pytest.mark.parametrize("min_count", [-1, 0])
  1026. def test_check_below_min_count_negative_or_zero_min_count(min_count):
  1027. # GH35227
  1028. result = nanops.check_below_min_count((21, 37), None, min_count)
  1029. expected_result = False
  1030. assert result == expected_result
  1031. @pytest.mark.parametrize(
  1032. "mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
  1033. )
  1034. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
  1035. def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
  1036. # GH35227
  1037. shape = (10, 10)
  1038. result = nanops.check_below_min_count(shape, mask, min_count)
  1039. assert result == expected_result
  1040. @td.skip_if_windows
  1041. @td.skip_if_32bit
  1042. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
  1043. def test_check_below_min_count_large_shape(min_count, expected_result):
  1044. # GH35227 large shape used to show that the issue is fixed
  1045. shape = (2244367, 1253)
  1046. result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
  1047. assert result == expected_result
  1048. @pytest.mark.parametrize("func", ["nanmean", "nansum"])
  1049. def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
  1050. # GH 42878 bottleneck sometimes produces unreliable results for mean and sum
  1051. assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
  1052. @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
  1053. def test_nanmean_overflow(disable_bottleneck, val):
  1054. # GH 10155
  1055. # In the previous implementation mean can overflow for int dtypes, it
  1056. # is now consistent with numpy
  1057. ser = Series(val, index=range(500), dtype=np.int64)
  1058. result = ser.mean()
  1059. np_result = ser.values.mean()
  1060. assert result == val
  1061. assert result == np_result
  1062. assert result.dtype == np.float64
  1063. @pytest.mark.parametrize(
  1064. "dtype",
  1065. [
  1066. np.int16,
  1067. np.int32,
  1068. np.int64,
  1069. np.float32,
  1070. np.float64,
  1071. getattr(np, "float128", None),
  1072. ],
  1073. )
  1074. @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
  1075. def test_returned_dtype(disable_bottleneck, dtype, method):
  1076. if dtype is None:
  1077. pytest.skip("np.float128 not available")
  1078. ser = Series(range(10), dtype=dtype)
  1079. result = getattr(ser, method)()
  1080. if is_integer_dtype(dtype) and method not in ["min", "max"]:
  1081. assert result.dtype == np.float64
  1082. else:
  1083. assert result.dtype == dtype