test_pipe.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import numpy as np
  2. import pandas as pd
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. )
  7. import pandas._testing as tm
  8. def test_pipe():
  9. # Test the pipe method of DataFrameGroupBy.
  10. # Issue #17871
  11. random_state = np.random.RandomState(1234567890)
  12. df = DataFrame(
  13. {
  14. "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
  15. "B": random_state.randn(8),
  16. "C": random_state.randn(8),
  17. }
  18. )
  19. def f(dfgb):
  20. return dfgb.B.max() - dfgb.C.min().min()
  21. def square(srs):
  22. return srs**2
  23. # Note that the transformations are
  24. # GroupBy -> Series
  25. # Series -> Series
  26. # This then chains the GroupBy.pipe and the
  27. # NDFrame.pipe methods
  28. result = df.groupby("A").pipe(f).pipe(square)
  29. index = Index(["bar", "foo"], dtype="object", name="A")
  30. expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index)
  31. tm.assert_series_equal(expected, result)
  32. def test_pipe_args():
  33. # Test passing args to the pipe method of DataFrameGroupBy.
  34. # Issue #17871
  35. df = DataFrame(
  36. {
  37. "group": ["A", "A", "B", "B", "C"],
  38. "x": [1.0, 2.0, 3.0, 2.0, 5.0],
  39. "y": [10.0, 100.0, 1000.0, -100.0, -1000.0],
  40. }
  41. )
  42. def f(dfgb, arg1):
  43. filtered = dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False)
  44. return filtered.groupby("group")
  45. def g(dfgb, arg2):
  46. return dfgb.sum() / dfgb.sum().sum() + arg2
  47. def h(df, arg3):
  48. return df.x + df.y - arg3
  49. result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
  50. # Assert the results here
  51. index = Index(["A", "B"], name="group")
  52. expected = pd.Series([-79.5160891089, -78.4839108911], index=index)
  53. tm.assert_series_equal(result, expected)
  54. # test SeriesGroupby.pipe
  55. ser = pd.Series([1, 1, 2, 2, 3, 3])
  56. result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
  57. expected = pd.Series([4, 8, 12], index=Index([1, 2, 3], dtype=np.int64))
  58. tm.assert_series_equal(result, expected)