test_bin_groupby.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import numpy as np
  2. import pytest
  3. from pandas._libs import lib
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. import pandas._testing as tm
  7. def assert_block_lengths(x):
  8. assert len(x) == len(x._mgr.blocks[0].mgr_locs)
  9. return 0
  10. def cumsum_max(x):
  11. x.cumsum().max()
  12. return 0
  13. @pytest.mark.parametrize(
  14. "func",
  15. [
  16. cumsum_max,
  17. pytest.param(assert_block_lengths, marks=td.skip_array_manager_invalid_test),
  18. ],
  19. )
  20. def test_mgr_locs_updated(func):
  21. # https://github.com/pandas-dev/pandas/issues/31802
  22. # Some operations may require creating new blocks, which requires
  23. # valid mgr_locs
  24. df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]})
  25. result = df.groupby(["A", "B"]).agg(func)
  26. expected = pd.DataFrame(
  27. {"C": [0, 0]},
  28. index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]),
  29. )
  30. tm.assert_frame_equal(result, expected)
  31. @pytest.mark.parametrize(
  32. "binner,closed,expected",
  33. [
  34. (
  35. np.array([0, 3, 6, 9], dtype=np.int64),
  36. "left",
  37. np.array([2, 5, 6], dtype=np.int64),
  38. ),
  39. (
  40. np.array([0, 3, 6, 9], dtype=np.int64),
  41. "right",
  42. np.array([3, 6, 6], dtype=np.int64),
  43. ),
  44. (np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)),
  45. (
  46. np.array([0, 3, 6], dtype=np.int64),
  47. "right",
  48. np.array([3, 6], dtype=np.int64),
  49. ),
  50. ],
  51. )
  52. def test_generate_bins(binner, closed, expected):
  53. values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
  54. result = lib.generate_bins_dt64(values, binner, closed=closed)
  55. tm.assert_numpy_array_equal(result, expected)