test_frame_apply_relabeling.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import numpy as np
  2. import pytest
  3. from pandas.compat.numpy import np_version_gte1p25
  4. import pandas as pd
  5. import pandas._testing as tm
  6. def test_agg_relabel():
  7. # GH 26513
  8. df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
  9. # simplest case with one column, one func
  10. result = df.agg(foo=("B", "sum"))
  11. expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
  12. tm.assert_frame_equal(result, expected)
  13. # test on same column with different methods
  14. result = df.agg(foo=("B", "sum"), bar=("B", "min"))
  15. expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
  16. tm.assert_frame_equal(result, expected)
  17. def test_agg_relabel_multi_columns_multi_methods():
  18. # GH 26513, test on multiple columns with multiple methods
  19. df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
  20. result = df.agg(
  21. foo=("A", "sum"),
  22. bar=("B", "mean"),
  23. cat=("A", "min"),
  24. dat=("B", "max"),
  25. f=("A", "max"),
  26. g=("C", "min"),
  27. )
  28. expected = pd.DataFrame(
  29. {
  30. "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
  31. "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
  32. "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
  33. },
  34. index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
  35. )
  36. tm.assert_frame_equal(result, expected)
  37. @pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
  38. def test_agg_relabel_partial_functions():
  39. # GH 26513, test on partial, functools or more complex cases
  40. df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
  41. result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
  42. expected = pd.DataFrame(
  43. {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
  44. )
  45. tm.assert_frame_equal(result, expected)
  46. result = df.agg(
  47. foo=("A", min),
  48. bar=("A", np.min),
  49. cat=("B", max),
  50. dat=("C", "min"),
  51. f=("B", np.sum),
  52. kk=("B", lambda x: min(x)),
  53. )
  54. expected = pd.DataFrame(
  55. {
  56. "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
  57. "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
  58. "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
  59. },
  60. index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
  61. )
  62. tm.assert_frame_equal(result, expected)
  63. def test_agg_namedtuple():
  64. # GH 26513
  65. df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
  66. result = df.agg(
  67. foo=pd.NamedAgg("B", "sum"),
  68. bar=pd.NamedAgg("B", min),
  69. cat=pd.NamedAgg(column="B", aggfunc="count"),
  70. fft=pd.NamedAgg("B", aggfunc="max"),
  71. )
  72. expected = pd.DataFrame(
  73. {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
  74. )
  75. tm.assert_frame_equal(result, expected)
  76. result = df.agg(
  77. foo=pd.NamedAgg("A", "min"),
  78. bar=pd.NamedAgg(column="B", aggfunc="max"),
  79. cat=pd.NamedAgg(column="A", aggfunc="max"),
  80. )
  81. expected = pd.DataFrame(
  82. {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
  83. index=pd.Index(["foo", "bar", "cat"]),
  84. )
  85. tm.assert_frame_equal(result, expected)