test_apply_mutate.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import numpy as np
  2. import pandas as pd
  3. import pandas._testing as tm
  4. def test_group_by_copy():
  5. # GH#44803
  6. df = pd.DataFrame(
  7. {
  8. "name": ["Alice", "Bob", "Carl"],
  9. "age": [20, 21, 20],
  10. }
  11. ).set_index("name")
  12. grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group)
  13. grp_by_copy = df.groupby(["age"], group_keys=False).apply(
  14. lambda group: group.copy()
  15. )
  16. tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
  17. def test_mutate_groups():
  18. # GH3380
  19. df = pd.DataFrame(
  20. {
  21. "cat1": ["a"] * 8 + ["b"] * 6,
  22. "cat2": ["c"] * 2
  23. + ["d"] * 2
  24. + ["e"] * 2
  25. + ["f"] * 2
  26. + ["c"] * 2
  27. + ["d"] * 2
  28. + ["e"] * 2,
  29. "cat3": [f"g{x}" for x in range(1, 15)],
  30. "val": np.random.randint(100, size=14),
  31. }
  32. )
  33. def f_copy(x):
  34. x = x.copy()
  35. x["rank"] = x.val.rank(method="min")
  36. return x.groupby("cat2")["rank"].min()
  37. def f_no_copy(x):
  38. x["rank"] = x.val.rank(method="min")
  39. return x.groupby("cat2")["rank"].min()
  40. grpby_copy = df.groupby("cat1").apply(f_copy)
  41. grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
  42. tm.assert_series_equal(grpby_copy, grpby_no_copy)
  43. def test_no_mutate_but_looks_like():
  44. # GH 8467
  45. # first show's mutation indicator
  46. # second does not, but should yield the same results
  47. df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
  48. result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key)
  49. result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key)
  50. tm.assert_series_equal(result1, result2)
  51. def test_apply_function_with_indexing():
  52. # GH: 33058
  53. df = pd.DataFrame(
  54. {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}
  55. )
  56. def fn(x):
  57. x.loc[x.index[-1], "col2"] = 0
  58. return x.col2
  59. result = df.groupby(["col1"], as_index=False).apply(fn)
  60. expected = pd.Series(
  61. [1, 2, 0, 4, 5, 0],
  62. index=pd.MultiIndex.from_tuples(
  63. [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)]
  64. ),
  65. name="col2",
  66. )
  67. tm.assert_series_equal(result, expected)
  68. def test_apply_mutate_columns_multiindex():
  69. # GH 12652
  70. df = pd.DataFrame(
  71. {
  72. ("C", "julian"): [1, 2, 3],
  73. ("B", "geoffrey"): [1, 2, 3],
  74. ("A", "julian"): [1, 2, 3],
  75. ("B", "julian"): [1, 2, 3],
  76. ("A", "geoffrey"): [1, 2, 3],
  77. ("C", "geoffrey"): [1, 2, 3],
  78. },
  79. columns=pd.MultiIndex.from_tuples(
  80. [
  81. ("A", "julian"),
  82. ("A", "geoffrey"),
  83. ("B", "julian"),
  84. ("B", "geoffrey"),
  85. ("C", "julian"),
  86. ("C", "geoffrey"),
  87. ]
  88. ),
  89. )
  90. def add_column(grouped):
  91. name = grouped.columns[0][1]
  92. grouped["sum", name] = grouped.sum(axis=1)
  93. return grouped
  94. result = df.groupby(level=1, axis=1).apply(add_column)
  95. expected = pd.DataFrame(
  96. [
  97. [1, 1, 1, 3, 1, 1, 1, 3],
  98. [2, 2, 2, 6, 2, 2, 2, 6],
  99. [
  100. 3,
  101. 3,
  102. 3,
  103. 9,
  104. 3,
  105. 3,
  106. 3,
  107. 9,
  108. ],
  109. ],
  110. columns=pd.MultiIndex.from_tuples(
  111. [
  112. ("geoffrey", "A", "geoffrey"),
  113. ("geoffrey", "B", "geoffrey"),
  114. ("geoffrey", "C", "geoffrey"),
  115. ("geoffrey", "sum", "geoffrey"),
  116. ("julian", "A", "julian"),
  117. ("julian", "B", "julian"),
  118. ("julian", "C", "julian"),
  119. ("julian", "sum", "julian"),
  120. ]
  121. ),
  122. )
  123. tm.assert_frame_equal(result, expected)