test_pivot.py 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563
  1. from datetime import (
  2. date,
  3. datetime,
  4. timedelta,
  5. )
  6. from itertools import product
  7. import numpy as np
  8. import pytest
  9. from pandas.errors import PerformanceWarning
  10. import pandas as pd
  11. from pandas import (
  12. Categorical,
  13. DataFrame,
  14. Grouper,
  15. Index,
  16. MultiIndex,
  17. Series,
  18. concat,
  19. date_range,
  20. )
  21. import pandas._testing as tm
  22. from pandas.api.types import CategoricalDtype as CDT
  23. from pandas.core.reshape import reshape as reshape_lib
  24. from pandas.core.reshape.pivot import pivot_table
  25. @pytest.fixture(params=[True, False])
  26. def dropna(request):
  27. return request.param
  28. @pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))])
  29. def interval_values(request, closed):
  30. left, right = request.param
  31. return Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
  32. class TestPivotTable:
  33. @pytest.fixture
  34. def data(self):
  35. return DataFrame(
  36. {
  37. "A": [
  38. "foo",
  39. "foo",
  40. "foo",
  41. "foo",
  42. "bar",
  43. "bar",
  44. "bar",
  45. "bar",
  46. "foo",
  47. "foo",
  48. "foo",
  49. ],
  50. "B": [
  51. "one",
  52. "one",
  53. "one",
  54. "two",
  55. "one",
  56. "one",
  57. "one",
  58. "two",
  59. "two",
  60. "two",
  61. "one",
  62. ],
  63. "C": [
  64. "dull",
  65. "dull",
  66. "shiny",
  67. "dull",
  68. "dull",
  69. "shiny",
  70. "shiny",
  71. "dull",
  72. "shiny",
  73. "shiny",
  74. "shiny",
  75. ],
  76. "D": np.random.randn(11),
  77. "E": np.random.randn(11),
  78. "F": np.random.randn(11),
  79. }
  80. )
  81. def test_pivot_table(self, observed, data):
  82. index = ["A", "B"]
  83. columns = "C"
  84. table = pivot_table(
  85. data, values="D", index=index, columns=columns, observed=observed
  86. )
  87. table2 = data.pivot_table(
  88. values="D", index=index, columns=columns, observed=observed
  89. )
  90. tm.assert_frame_equal(table, table2)
  91. # this works
  92. pivot_table(data, values="D", index=index, observed=observed)
  93. if len(index) > 1:
  94. assert table.index.names == tuple(index)
  95. else:
  96. assert table.index.name == index[0]
  97. if len(columns) > 1:
  98. assert table.columns.names == columns
  99. else:
  100. assert table.columns.name == columns[0]
  101. expected = data.groupby(index + [columns])["D"].agg(np.mean).unstack()
  102. tm.assert_frame_equal(table, expected)
  103. def test_pivot_table_categorical_observed_equal(self, observed):
  104. # issue #24923
  105. df = DataFrame(
  106. {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]}
  107. )
  108. expected = df.pivot_table(
  109. index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0
  110. )
  111. expected.index = expected.index.astype("category")
  112. expected.columns = expected.columns.astype("category")
  113. df.col1 = df.col1.astype("category")
  114. df.col2 = df.col2.astype("category")
  115. result = df.pivot_table(
  116. index="col1",
  117. values="col3",
  118. columns="col2",
  119. aggfunc=np.sum,
  120. fill_value=0,
  121. observed=observed,
  122. )
  123. tm.assert_frame_equal(result, expected)
  124. def test_pivot_table_nocols(self):
  125. df = DataFrame(
  126. {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
  127. )
  128. rs = df.pivot_table(columns="cols", aggfunc=np.sum)
  129. xp = df.pivot_table(index="cols", aggfunc=np.sum).T
  130. tm.assert_frame_equal(rs, xp)
  131. rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"})
  132. xp = df.pivot_table(index="cols", aggfunc={"values": "mean"}).T
  133. tm.assert_frame_equal(rs, xp)
  134. def test_pivot_table_dropna(self):
  135. df = DataFrame(
  136. {
  137. "amount": {0: 60000, 1: 100000, 2: 50000, 3: 30000},
  138. "customer": {0: "A", 1: "A", 2: "B", 3: "C"},
  139. "month": {0: 201307, 1: 201309, 2: 201308, 3: 201310},
  140. "product": {0: "a", 1: "b", 2: "c", 3: "d"},
  141. "quantity": {0: 2000000, 1: 500000, 2: 1000000, 3: 1000000},
  142. }
  143. )
  144. pv_col = df.pivot_table(
  145. "quantity", "month", ["customer", "product"], dropna=False
  146. )
  147. pv_ind = df.pivot_table(
  148. "quantity", ["customer", "product"], "month", dropna=False
  149. )
  150. m = MultiIndex.from_tuples(
  151. [
  152. ("A", "a"),
  153. ("A", "b"),
  154. ("A", "c"),
  155. ("A", "d"),
  156. ("B", "a"),
  157. ("B", "b"),
  158. ("B", "c"),
  159. ("B", "d"),
  160. ("C", "a"),
  161. ("C", "b"),
  162. ("C", "c"),
  163. ("C", "d"),
  164. ],
  165. names=["customer", "product"],
  166. )
  167. tm.assert_index_equal(pv_col.columns, m)
  168. tm.assert_index_equal(pv_ind.index, m)
  169. def test_pivot_table_categorical(self):
  170. cat1 = Categorical(
  171. ["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True
  172. )
  173. cat2 = Categorical(
  174. ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
  175. )
  176. df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
  177. result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
  178. exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
  179. expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
  180. tm.assert_frame_equal(result, expected)
  181. def test_pivot_table_dropna_categoricals(self, dropna):
  182. # GH 15193
  183. categories = ["a", "b", "c", "d"]
  184. df = DataFrame(
  185. {
  186. "A": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
  187. "B": [1, 2, 3, 1, 2, 3, 1, 2, 3],
  188. "C": range(0, 9),
  189. }
  190. )
  191. df["A"] = df["A"].astype(CDT(categories, ordered=False))
  192. result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
  193. expected_columns = Series(["a", "b", "c"], name="A")
  194. expected_columns = expected_columns.astype(CDT(categories, ordered=False))
  195. expected_index = Series([1, 2, 3], name="B")
  196. expected = DataFrame(
  197. [[0, 3, 6], [1, 4, 7], [2, 5, 8]],
  198. index=expected_index,
  199. columns=expected_columns,
  200. )
  201. if not dropna:
  202. # add back the non observed to compare
  203. expected = expected.reindex(columns=Categorical(categories)).astype("float")
  204. tm.assert_frame_equal(result, expected)
  205. def test_pivot_with_non_observable_dropna(self, dropna):
  206. # gh-21133
  207. df = DataFrame(
  208. {
  209. "A": Categorical(
  210. [np.nan, "low", "high", "low", "high"],
  211. categories=["low", "high"],
  212. ordered=True,
  213. ),
  214. "B": [0.0, 1.0, 2.0, 3.0, 4.0],
  215. }
  216. )
  217. result = df.pivot_table(index="A", values="B", dropna=dropna)
  218. expected = DataFrame(
  219. {"B": [2.0, 3.0]},
  220. index=Index(
  221. Categorical.from_codes(
  222. [0, 1], categories=["low", "high"], ordered=True
  223. ),
  224. name="A",
  225. ),
  226. )
  227. tm.assert_frame_equal(result, expected)
  228. def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
  229. # gh-21378
  230. df = DataFrame(
  231. {
  232. "A": Categorical(
  233. ["left", "low", "high", "low", "high"],
  234. categories=["low", "high", "left"],
  235. ordered=True,
  236. ),
  237. "B": range(5),
  238. }
  239. )
  240. result = df.pivot_table(index="A", values="B", dropna=dropna)
  241. expected = DataFrame(
  242. {"B": [2, 3, 0]},
  243. index=Index(
  244. Categorical.from_codes(
  245. [0, 1, 2], categories=["low", "high", "left"], ordered=True
  246. ),
  247. name="A",
  248. ),
  249. )
  250. if not dropna:
  251. expected["B"] = expected["B"].astype(float)
  252. tm.assert_frame_equal(result, expected)
  253. def test_pivot_with_interval_index(self, interval_values, dropna):
  254. # GH 25814
  255. df = DataFrame({"A": interval_values, "B": 1})
  256. result = df.pivot_table(index="A", values="B", dropna=dropna)
  257. expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A"))
  258. if not dropna:
  259. expected = expected.astype(float)
  260. tm.assert_frame_equal(result, expected)
  261. def test_pivot_with_interval_index_margins(self):
  262. # GH 25815
  263. ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2])
  264. df = DataFrame(
  265. {
  266. "A": np.arange(4, 0, -1, dtype=np.intp),
  267. "B": ["a", "b", "a", "b"],
  268. "C": Categorical(ordered_cat, ordered=True).sort_values(
  269. ascending=False
  270. ),
  271. }
  272. )
  273. pivot_tab = pivot_table(
  274. df, index="C", columns="B", values="A", aggfunc="sum", margins=True
  275. )
  276. result = pivot_tab["All"]
  277. expected = Series(
  278. [3, 7, 10],
  279. index=Index([pd.Interval(0, 1), pd.Interval(1, 2), "All"], name="C"),
  280. name="All",
  281. dtype=np.intp,
  282. )
  283. tm.assert_series_equal(result, expected)
  284. def test_pass_array(self, data):
  285. result = data.pivot_table("D", index=data.A, columns=data.C)
  286. expected = data.pivot_table("D", index="A", columns="C")
  287. tm.assert_frame_equal(result, expected)
  288. def test_pass_function(self, data):
  289. result = data.pivot_table("D", index=lambda x: x // 5, columns=data.C)
  290. expected = data.pivot_table("D", index=data.index // 5, columns="C")
  291. tm.assert_frame_equal(result, expected)
  292. def test_pivot_table_multiple(self, data):
  293. index = ["A", "B"]
  294. columns = "C"
  295. table = pivot_table(data, index=index, columns=columns)
  296. expected = data.groupby(index + [columns]).agg(np.mean).unstack()
  297. tm.assert_frame_equal(table, expected)
  298. def test_pivot_dtypes(self):
  299. # can convert dtypes
  300. f = DataFrame(
  301. {
  302. "a": ["cat", "bat", "cat", "bat"],
  303. "v": [1, 2, 3, 4],
  304. "i": ["a", "b", "a", "b"],
  305. }
  306. )
  307. assert f.dtypes["v"] == "int64"
  308. z = pivot_table(
  309. f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.sum
  310. )
  311. result = z.dtypes
  312. expected = Series([np.dtype("int64")] * 2, index=Index(list("ab"), name="i"))
  313. tm.assert_series_equal(result, expected)
  314. # cannot convert dtypes
  315. f = DataFrame(
  316. {
  317. "a": ["cat", "bat", "cat", "bat"],
  318. "v": [1.5, 2.5, 3.5, 4.5],
  319. "i": ["a", "b", "a", "b"],
  320. }
  321. )
  322. assert f.dtypes["v"] == "float64"
  323. z = pivot_table(
  324. f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.mean
  325. )
  326. result = z.dtypes
  327. expected = Series([np.dtype("float64")] * 2, index=Index(list("ab"), name="i"))
  328. tm.assert_series_equal(result, expected)
  329. @pytest.mark.parametrize(
  330. "columns,values",
  331. [
  332. ("bool1", ["float1", "float2"]),
  333. ("bool1", ["float1", "float2", "bool1"]),
  334. ("bool2", ["float1", "float2", "bool1"]),
  335. ],
  336. )
  337. def test_pivot_preserve_dtypes(self, columns, values):
  338. # GH 7142 regression test
  339. v = np.arange(5, dtype=np.float64)
  340. df = DataFrame(
  341. {"float1": v, "float2": v + 2.0, "bool1": v <= 2, "bool2": v <= 3}
  342. )
  343. df_res = df.reset_index().pivot_table(
  344. index="index", columns=columns, values=values
  345. )
  346. result = dict(df_res.dtypes)
  347. expected = {col: np.dtype("float64") for col in df_res}
  348. assert result == expected
  349. def test_pivot_no_values(self):
  350. # GH 14380
  351. idx = pd.DatetimeIndex(
  352. ["2011-01-01", "2011-02-01", "2011-01-02", "2011-01-01", "2011-01-02"]
  353. )
  354. df = DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx)
  355. res = df.pivot_table(index=df.index.month, columns=df.index.day)
  356. exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)])
  357. exp_columns = exp_columns.set_levels(
  358. exp_columns.levels[1].astype(np.int32), level=1
  359. )
  360. exp = DataFrame(
  361. [[2.5, 4.0], [2.0, np.nan]],
  362. index=Index([1, 2], dtype=np.int32),
  363. columns=exp_columns,
  364. )
  365. tm.assert_frame_equal(res, exp)
  366. df = DataFrame(
  367. {
  368. "A": [1, 2, 3, 4, 5],
  369. "dt": date_range("2011-01-01", freq="D", periods=5),
  370. },
  371. index=idx,
  372. )
  373. res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M"))
  374. exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))])
  375. exp_columns.names = [None, "dt"]
  376. exp = DataFrame(
  377. [3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns
  378. )
  379. tm.assert_frame_equal(res, exp)
  380. res = df.pivot_table(
  381. index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M")
  382. )
  383. exp = DataFrame(
  384. [3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
  385. )
  386. tm.assert_frame_equal(res, exp)
  387. def test_pivot_multi_values(self, data):
  388. result = pivot_table(
  389. data, values=["D", "E"], index="A", columns=["B", "C"], fill_value=0
  390. )
  391. expected = pivot_table(
  392. data.drop(["F"], axis=1), index="A", columns=["B", "C"], fill_value=0
  393. )
  394. tm.assert_frame_equal(result, expected)
  395. def test_pivot_multi_functions(self, data):
  396. f = lambda func: pivot_table(
  397. data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func
  398. )
  399. result = f([np.mean, np.std])
  400. means = f(np.mean)
  401. stds = f(np.std)
  402. expected = concat([means, stds], keys=["mean", "std"], axis=1)
  403. tm.assert_frame_equal(result, expected)
  404. # margins not supported??
  405. f = lambda func: pivot_table(
  406. data,
  407. values=["D", "E"],
  408. index=["A", "B"],
  409. columns="C",
  410. aggfunc=func,
  411. margins=True,
  412. )
  413. result = f([np.mean, np.std])
  414. means = f(np.mean)
  415. stds = f(np.std)
  416. expected = concat([means, stds], keys=["mean", "std"], axis=1)
  417. tm.assert_frame_equal(result, expected)
  418. @pytest.mark.parametrize("method", [True, False])
  419. def test_pivot_index_with_nan(self, method):
  420. # GH 3588
  421. nan = np.nan
  422. df = DataFrame(
  423. {
  424. "a": ["R1", "R2", nan, "R4"],
  425. "b": ["C1", "C2", "C3", "C4"],
  426. "c": [10, 15, 17, 20],
  427. }
  428. )
  429. if method:
  430. result = df.pivot(index="a", columns="b", values="c")
  431. else:
  432. result = pd.pivot(df, index="a", columns="b", values="c")
  433. expected = DataFrame(
  434. [
  435. [nan, nan, 17, nan],
  436. [10, nan, nan, nan],
  437. [nan, 15, nan, nan],
  438. [nan, nan, nan, 20],
  439. ],
  440. index=Index([nan, "R1", "R2", "R4"], name="a"),
  441. columns=Index(["C1", "C2", "C3", "C4"], name="b"),
  442. )
  443. tm.assert_frame_equal(result, expected)
  444. tm.assert_frame_equal(df.pivot(index="b", columns="a", values="c"), expected.T)
  445. @pytest.mark.parametrize("method", [True, False])
  446. def test_pivot_index_with_nan_dates(self, method):
  447. # GH9491
  448. df = DataFrame(
  449. {
  450. "a": date_range("2014-02-01", periods=6, freq="D"),
  451. "c": 100 + np.arange(6),
  452. }
  453. )
  454. df["b"] = df["a"] - pd.Timestamp("2014-02-02")
  455. df.loc[1, "a"] = df.loc[3, "a"] = np.nan
  456. df.loc[1, "b"] = df.loc[4, "b"] = np.nan
  457. if method:
  458. pv = df.pivot(index="a", columns="b", values="c")
  459. else:
  460. pv = pd.pivot(df, index="a", columns="b", values="c")
  461. assert pv.notna().values.sum() == len(df)
  462. for _, row in df.iterrows():
  463. assert pv.loc[row["a"], row["b"]] == row["c"]
  464. if method:
  465. result = df.pivot(index="b", columns="a", values="c")
  466. else:
  467. result = pd.pivot(df, index="b", columns="a", values="c")
  468. tm.assert_frame_equal(result, pv.T)
  469. @pytest.mark.parametrize("method", [True, False])
  470. def test_pivot_with_tz(self, method):
  471. # GH 5878
  472. df = DataFrame(
  473. {
  474. "dt1": [
  475. datetime(2013, 1, 1, 9, 0),
  476. datetime(2013, 1, 2, 9, 0),
  477. datetime(2013, 1, 1, 9, 0),
  478. datetime(2013, 1, 2, 9, 0),
  479. ],
  480. "dt2": [
  481. datetime(2014, 1, 1, 9, 0),
  482. datetime(2014, 1, 1, 9, 0),
  483. datetime(2014, 1, 2, 9, 0),
  484. datetime(2014, 1, 2, 9, 0),
  485. ],
  486. "data1": np.arange(4, dtype="int64"),
  487. "data2": np.arange(4, dtype="int64"),
  488. }
  489. )
  490. df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific"))
  491. df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo"))
  492. exp_col1 = Index(["data1", "data1", "data2", "data2"])
  493. exp_col2 = pd.DatetimeIndex(
  494. ["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo"
  495. )
  496. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  497. expected = DataFrame(
  498. [[0, 2, 0, 2], [1, 3, 1, 3]],
  499. index=pd.DatetimeIndex(
  500. ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
  501. ),
  502. columns=exp_col,
  503. )
  504. if method:
  505. pv = df.pivot(index="dt1", columns="dt2")
  506. else:
  507. pv = pd.pivot(df, index="dt1", columns="dt2")
  508. tm.assert_frame_equal(pv, expected)
  509. expected = DataFrame(
  510. [[0, 2], [1, 3]],
  511. index=pd.DatetimeIndex(
  512. ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
  513. ),
  514. columns=pd.DatetimeIndex(
  515. ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
  516. ),
  517. )
  518. if method:
  519. pv = df.pivot(index="dt1", columns="dt2", values="data1")
  520. else:
  521. pv = pd.pivot(df, index="dt1", columns="dt2", values="data1")
  522. tm.assert_frame_equal(pv, expected)
  523. def test_pivot_tz_in_values(self):
  524. # GH 14948
  525. df = DataFrame(
  526. [
  527. {
  528. "uid": "aa",
  529. "ts": pd.Timestamp("2016-08-12 13:00:00-0700", tz="US/Pacific"),
  530. },
  531. {
  532. "uid": "aa",
  533. "ts": pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"),
  534. },
  535. {
  536. "uid": "aa",
  537. "ts": pd.Timestamp("2016-08-12 14:00:00-0700", tz="US/Pacific"),
  538. },
  539. {
  540. "uid": "aa",
  541. "ts": pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"),
  542. },
  543. {
  544. "uid": "aa",
  545. "ts": pd.Timestamp("2016-08-25 13:00:00-0700", tz="US/Pacific"),
  546. },
  547. ]
  548. )
  549. df = df.set_index("ts").reset_index()
  550. mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, second=0, microsecond=0))
  551. result = pivot_table(
  552. df.set_index("ts").reset_index(),
  553. values="ts",
  554. index=["uid"],
  555. columns=[mins],
  556. aggfunc=np.min,
  557. )
  558. expected = DataFrame(
  559. [
  560. [
  561. pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"),
  562. pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"),
  563. ]
  564. ],
  565. index=Index(["aa"], name="uid"),
  566. columns=pd.DatetimeIndex(
  567. [
  568. pd.Timestamp("2016-08-12 00:00:00", tz="US/Pacific"),
  569. pd.Timestamp("2016-08-25 00:00:00", tz="US/Pacific"),
  570. ],
  571. name="ts",
  572. ),
  573. )
  574. tm.assert_frame_equal(result, expected)
  575. @pytest.mark.parametrize("method", [True, False])
  576. def test_pivot_periods(self, method):
  577. df = DataFrame(
  578. {
  579. "p1": [
  580. pd.Period("2013-01-01", "D"),
  581. pd.Period("2013-01-02", "D"),
  582. pd.Period("2013-01-01", "D"),
  583. pd.Period("2013-01-02", "D"),
  584. ],
  585. "p2": [
  586. pd.Period("2013-01", "M"),
  587. pd.Period("2013-01", "M"),
  588. pd.Period("2013-02", "M"),
  589. pd.Period("2013-02", "M"),
  590. ],
  591. "data1": np.arange(4, dtype="int64"),
  592. "data2": np.arange(4, dtype="int64"),
  593. }
  594. )
  595. exp_col1 = Index(["data1", "data1", "data2", "data2"])
  596. exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M")
  597. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  598. expected = DataFrame(
  599. [[0, 2, 0, 2], [1, 3, 1, 3]],
  600. index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
  601. columns=exp_col,
  602. )
  603. if method:
  604. pv = df.pivot(index="p1", columns="p2")
  605. else:
  606. pv = pd.pivot(df, index="p1", columns="p2")
  607. tm.assert_frame_equal(pv, expected)
  608. expected = DataFrame(
  609. [[0, 2], [1, 3]],
  610. index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
  611. columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"),
  612. )
  613. if method:
  614. pv = df.pivot(index="p1", columns="p2", values="data1")
  615. else:
  616. pv = pd.pivot(df, index="p1", columns="p2", values="data1")
  617. tm.assert_frame_equal(pv, expected)
  618. def test_pivot_periods_with_margins(self):
  619. # GH 28323
  620. df = DataFrame(
  621. {
  622. "a": [1, 1, 2, 2],
  623. "b": [
  624. pd.Period("2019Q1"),
  625. pd.Period("2019Q2"),
  626. pd.Period("2019Q1"),
  627. pd.Period("2019Q2"),
  628. ],
  629. "x": 1.0,
  630. }
  631. )
  632. expected = DataFrame(
  633. data=1.0,
  634. index=Index([1, 2, "All"], name="a"),
  635. columns=Index([pd.Period("2019Q1"), pd.Period("2019Q2"), "All"], name="b"),
  636. )
  637. result = df.pivot_table(index="a", columns="b", values="x", margins=True)
  638. tm.assert_frame_equal(expected, result)
  639. @pytest.mark.parametrize(
  640. "values",
  641. [
  642. ["baz", "zoo"],
  643. np.array(["baz", "zoo"]),
  644. Series(["baz", "zoo"]),
  645. Index(["baz", "zoo"]),
  646. ],
  647. )
  648. @pytest.mark.parametrize("method", [True, False])
  649. def test_pivot_with_list_like_values(self, values, method):
  650. # issue #17160
  651. df = DataFrame(
  652. {
  653. "foo": ["one", "one", "one", "two", "two", "two"],
  654. "bar": ["A", "B", "C", "A", "B", "C"],
  655. "baz": [1, 2, 3, 4, 5, 6],
  656. "zoo": ["x", "y", "z", "q", "w", "t"],
  657. }
  658. )
  659. if method:
  660. result = df.pivot(index="foo", columns="bar", values=values)
  661. else:
  662. result = pd.pivot(df, index="foo", columns="bar", values=values)
  663. data = [[1, 2, 3, "x", "y", "z"], [4, 5, 6, "q", "w", "t"]]
  664. index = Index(data=["one", "two"], name="foo")
  665. columns = MultiIndex(
  666. levels=[["baz", "zoo"], ["A", "B", "C"]],
  667. codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
  668. names=[None, "bar"],
  669. )
  670. expected = DataFrame(data=data, index=index, columns=columns, dtype="object")
  671. tm.assert_frame_equal(result, expected)
  672. @pytest.mark.parametrize(
  673. "values",
  674. [
  675. ["bar", "baz"],
  676. np.array(["bar", "baz"]),
  677. Series(["bar", "baz"]),
  678. Index(["bar", "baz"]),
  679. ],
  680. )
  681. @pytest.mark.parametrize("method", [True, False])
  682. def test_pivot_with_list_like_values_nans(self, values, method):
  683. # issue #17160
  684. df = DataFrame(
  685. {
  686. "foo": ["one", "one", "one", "two", "two", "two"],
  687. "bar": ["A", "B", "C", "A", "B", "C"],
  688. "baz": [1, 2, 3, 4, 5, 6],
  689. "zoo": ["x", "y", "z", "q", "w", "t"],
  690. }
  691. )
  692. if method:
  693. result = df.pivot(index="zoo", columns="foo", values=values)
  694. else:
  695. result = pd.pivot(df, index="zoo", columns="foo", values=values)
  696. data = [
  697. [np.nan, "A", np.nan, 4],
  698. [np.nan, "C", np.nan, 6],
  699. [np.nan, "B", np.nan, 5],
  700. ["A", np.nan, 1, np.nan],
  701. ["B", np.nan, 2, np.nan],
  702. ["C", np.nan, 3, np.nan],
  703. ]
  704. index = Index(data=["q", "t", "w", "x", "y", "z"], name="zoo")
  705. columns = MultiIndex(
  706. levels=[["bar", "baz"], ["one", "two"]],
  707. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  708. names=[None, "foo"],
  709. )
  710. expected = DataFrame(data=data, index=index, columns=columns, dtype="object")
  711. tm.assert_frame_equal(result, expected)
  712. def test_pivot_columns_none_raise_error(self):
  713. # GH 30924
  714. df = DataFrame({"col1": ["a", "b", "c"], "col2": [1, 2, 3], "col3": [1, 2, 3]})
  715. msg = r"pivot\(\) missing 1 required keyword-only argument: 'columns'"
  716. with pytest.raises(TypeError, match=msg):
  717. df.pivot(index="col1", values="col3") # pylint: disable=missing-kwoa
  718. @pytest.mark.xfail(
  719. reason="MultiIndexed unstack with tuple names fails with KeyError GH#19966"
  720. )
  721. @pytest.mark.parametrize("method", [True, False])
  722. def test_pivot_with_multiindex(self, method):
  723. # issue #17160
  724. index = Index(data=[0, 1, 2, 3, 4, 5])
  725. data = [
  726. ["one", "A", 1, "x"],
  727. ["one", "B", 2, "y"],
  728. ["one", "C", 3, "z"],
  729. ["two", "A", 4, "q"],
  730. ["two", "B", 5, "w"],
  731. ["two", "C", 6, "t"],
  732. ]
  733. columns = MultiIndex(
  734. levels=[["bar", "baz"], ["first", "second"]],
  735. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  736. )
  737. df = DataFrame(data=data, index=index, columns=columns, dtype="object")
  738. if method:
  739. result = df.pivot(
  740. index=("bar", "first"),
  741. columns=("bar", "second"),
  742. values=("baz", "first"),
  743. )
  744. else:
  745. result = pd.pivot(
  746. df,
  747. index=("bar", "first"),
  748. columns=("bar", "second"),
  749. values=("baz", "first"),
  750. )
  751. data = {
  752. "A": Series([1, 4], index=["one", "two"]),
  753. "B": Series([2, 5], index=["one", "two"]),
  754. "C": Series([3, 6], index=["one", "two"]),
  755. }
  756. expected = DataFrame(data)
  757. tm.assert_frame_equal(result, expected)
  758. @pytest.mark.parametrize("method", [True, False])
  759. def test_pivot_with_tuple_of_values(self, method):
  760. # issue #17160
  761. df = DataFrame(
  762. {
  763. "foo": ["one", "one", "one", "two", "two", "two"],
  764. "bar": ["A", "B", "C", "A", "B", "C"],
  765. "baz": [1, 2, 3, 4, 5, 6],
  766. "zoo": ["x", "y", "z", "q", "w", "t"],
  767. }
  768. )
  769. with pytest.raises(KeyError, match=r"^\('bar', 'baz'\)$"):
  770. # tuple is seen as a single column name
  771. if method:
  772. df.pivot(index="zoo", columns="foo", values=("bar", "baz"))
  773. else:
  774. pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz"))
  775. def _check_output(
  776. self,
  777. result,
  778. values_col,
  779. data,
  780. index=["A", "B"],
  781. columns=["C"],
  782. margins_col="All",
  783. ):
  784. col_margins = result.loc[result.index[:-1], margins_col]
  785. expected_col_margins = data.groupby(index)[values_col].mean()
  786. tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
  787. assert col_margins.name == margins_col
  788. result = result.sort_index()
  789. index_margins = result.loc[(margins_col, "")].iloc[:-1]
  790. expected_ix_margins = data.groupby(columns)[values_col].mean()
  791. tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False)
  792. assert index_margins.name == (margins_col, "")
  793. grand_total_margins = result.loc[(margins_col, ""), margins_col]
  794. expected_total_margins = data[values_col].mean()
  795. assert grand_total_margins == expected_total_margins
  796. def test_margins(self, data):
  797. # column specified
  798. result = data.pivot_table(
  799. values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
  800. )
  801. self._check_output(result, "D", data)
  802. # Set a different margins_name (not 'All')
  803. result = data.pivot_table(
  804. values="D",
  805. index=["A", "B"],
  806. columns="C",
  807. margins=True,
  808. aggfunc=np.mean,
  809. margins_name="Totals",
  810. )
  811. self._check_output(result, "D", data, margins_col="Totals")
  812. # no column specified
  813. table = data.pivot_table(
  814. index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
  815. )
  816. for value_col in table.columns.levels[0]:
  817. self._check_output(table[value_col], value_col, data)
  818. def test_no_col(self, data):
  819. # no col
  820. # to help with a buglet
  821. data.columns = [k * 2 for k in data.columns]
  822. with pytest.raises(TypeError, match="Could not convert"):
  823. data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean)
  824. table = data.drop(columns="CC").pivot_table(
  825. index=["AA", "BB"], margins=True, aggfunc=np.mean
  826. )
  827. for value_col in table.columns:
  828. totals = table.loc[("All", ""), value_col]
  829. assert totals == data[value_col].mean()
  830. with pytest.raises(TypeError, match="Could not convert"):
  831. data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
  832. table = data.drop(columns="CC").pivot_table(
  833. index=["AA", "BB"], margins=True, aggfunc="mean"
  834. )
  835. for item in ["DD", "EE", "FF"]:
  836. totals = table.loc[("All", ""), item]
  837. assert totals == data[item].mean()
  838. @pytest.mark.parametrize(
  839. "columns, aggfunc, values, expected_columns",
  840. [
  841. (
  842. "A",
  843. np.mean,
  844. [[5.5, 5.5, 2.2, 2.2], [8.0, 8.0, 4.4, 4.4]],
  845. Index(["bar", "All", "foo", "All"], name="A"),
  846. ),
  847. (
  848. ["A", "B"],
  849. "sum",
  850. [
  851. [9, 13, 22, 5, 6, 11],
  852. [14, 18, 32, 11, 11, 22],
  853. ],
  854. MultiIndex.from_tuples(
  855. [
  856. ("bar", "one"),
  857. ("bar", "two"),
  858. ("bar", "All"),
  859. ("foo", "one"),
  860. ("foo", "two"),
  861. ("foo", "All"),
  862. ],
  863. names=["A", "B"],
  864. ),
  865. ),
  866. ],
  867. )
  868. def test_margin_with_only_columns_defined(
  869. self, columns, aggfunc, values, expected_columns
  870. ):
  871. # GH 31016
  872. df = DataFrame(
  873. {
  874. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  875. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  876. "C": [
  877. "small",
  878. "large",
  879. "large",
  880. "small",
  881. "small",
  882. "large",
  883. "small",
  884. "small",
  885. "large",
  886. ],
  887. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  888. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  889. }
  890. )
  891. if aggfunc != "sum":
  892. with pytest.raises(TypeError, match="Could not convert"):
  893. df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
  894. if "B" not in columns:
  895. df = df.drop(columns="B")
  896. result = df.drop(columns="C").pivot_table(
  897. columns=columns, margins=True, aggfunc=aggfunc
  898. )
  899. expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)
  900. tm.assert_frame_equal(result, expected)
  901. def test_margins_dtype(self, data):
  902. # GH 17013
  903. df = data.copy()
  904. df[["D", "E", "F"]] = np.arange(len(df) * 3).reshape(len(df), 3).astype("i8")
  905. mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
  906. mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))
  907. expected = DataFrame(
  908. {"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]}, index=mi
  909. ).rename_axis("C", axis=1)
  910. expected["All"] = expected["dull"] + expected["shiny"]
  911. result = df.pivot_table(
  912. values="D",
  913. index=["A", "B"],
  914. columns="C",
  915. margins=True,
  916. aggfunc=np.sum,
  917. fill_value=0,
  918. )
  919. tm.assert_frame_equal(expected, result)
  920. def test_margins_dtype_len(self, data):
  921. mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
  922. mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))
  923. expected = DataFrame(
  924. {"dull": [1, 1, 2, 1, 5], "shiny": [2, 0, 2, 2, 6]}, index=mi
  925. ).rename_axis("C", axis=1)
  926. expected["All"] = expected["dull"] + expected["shiny"]
  927. result = data.pivot_table(
  928. values="D",
  929. index=["A", "B"],
  930. columns="C",
  931. margins=True,
  932. aggfunc=len,
  933. fill_value=0,
  934. )
  935. tm.assert_frame_equal(expected, result)
  936. @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
  937. def test_pivot_table_multiindex_only(self, cols):
  938. # GH 17038
  939. df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]})
  940. result = df2.pivot_table(values="v", columns=cols)
  941. expected = DataFrame(
  942. [[4, 5, 6]],
  943. columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
  944. index=Index(["v"]),
  945. )
  946. tm.assert_frame_equal(result, expected)
  947. def test_pivot_table_retains_tz(self):
  948. dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam")
  949. df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti})
  950. result = df.pivot_table(index=["B", "C"], dropna=False)
  951. # check tz retention
  952. assert result.index.levels[1].equals(dti)
  953. def test_pivot_integer_columns(self):
  954. # caused by upstream bug in unstack
  955. d = date.min
  956. data = list(
  957. product(
  958. ["foo", "bar"],
  959. ["A", "B", "C"],
  960. ["x1", "x2"],
  961. [d + timedelta(i) for i in range(20)],
  962. [1.0],
  963. )
  964. )
  965. df = DataFrame(data)
  966. table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])
  967. df2 = df.rename(columns=str)
  968. table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"])
  969. tm.assert_frame_equal(table, table2, check_names=False)
  970. def test_pivot_no_level_overlap(self):
  971. # GH #1181
  972. data = DataFrame(
  973. {
  974. "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2,
  975. "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2,
  976. "c": (["foo"] * 4 + ["bar"] * 4) * 2,
  977. "value": np.random.randn(16),
  978. }
  979. )
  980. table = data.pivot_table("value", index="a", columns=["b", "c"])
  981. grouped = data.groupby(["a", "b", "c"])["value"].mean()
  982. expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all")
  983. tm.assert_frame_equal(table, expected)
  984. def test_pivot_columns_lexsorted(self):
  985. n = 10000
  986. dtype = np.dtype(
  987. [
  988. ("Index", object),
  989. ("Symbol", object),
  990. ("Year", int),
  991. ("Month", int),
  992. ("Day", int),
  993. ("Quantity", int),
  994. ("Price", float),
  995. ]
  996. )
  997. products = np.array(
  998. [
  999. ("SP500", "ADBE"),
  1000. ("SP500", "NVDA"),
  1001. ("SP500", "ORCL"),
  1002. ("NDQ100", "AAPL"),
  1003. ("NDQ100", "MSFT"),
  1004. ("NDQ100", "GOOG"),
  1005. ("FTSE", "DGE.L"),
  1006. ("FTSE", "TSCO.L"),
  1007. ("FTSE", "GSK.L"),
  1008. ],
  1009. dtype=[("Index", object), ("Symbol", object)],
  1010. )
  1011. items = np.empty(n, dtype=dtype)
  1012. iproduct = np.random.randint(0, len(products), n)
  1013. items["Index"] = products["Index"][iproduct]
  1014. items["Symbol"] = products["Symbol"][iproduct]
  1015. dr = date_range(date(2000, 1, 1), date(2010, 12, 31))
  1016. dates = dr[np.random.randint(0, len(dr), n)]
  1017. items["Year"] = dates.year
  1018. items["Month"] = dates.month
  1019. items["Day"] = dates.day
  1020. items["Price"] = np.random.lognormal(4.0, 2.0, n)
  1021. df = DataFrame(items)
  1022. pivoted = df.pivot_table(
  1023. "Price",
  1024. index=["Month", "Day"],
  1025. columns=["Index", "Symbol", "Year"],
  1026. aggfunc="mean",
  1027. )
  1028. assert pivoted.columns.is_monotonic_increasing
  1029. def test_pivot_complex_aggfunc(self, data):
  1030. f = {"D": ["std"], "E": ["sum"]}
  1031. expected = data.groupby(["A", "B"]).agg(f).unstack("B")
  1032. result = data.pivot_table(index="A", columns="B", aggfunc=f)
  1033. tm.assert_frame_equal(result, expected)
  1034. def test_margins_no_values_no_cols(self, data):
  1035. # Regression test on pivot table: no values or cols passed.
  1036. result = data[["A", "B"]].pivot_table(
  1037. index=["A", "B"], aggfunc=len, margins=True
  1038. )
  1039. result_list = result.tolist()
  1040. assert sum(result_list[:-1]) == result_list[-1]
  1041. def test_margins_no_values_two_rows(self, data):
  1042. # Regression test on pivot table: no values passed but rows are a
  1043. # multi-index
  1044. result = data[["A", "B", "C"]].pivot_table(
  1045. index=["A", "B"], columns="C", aggfunc=len, margins=True
  1046. )
  1047. assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]
  1048. def test_margins_no_values_one_row_one_col(self, data):
  1049. # Regression test on pivot table: no values passed but row and col
  1050. # defined
  1051. result = data[["A", "B"]].pivot_table(
  1052. index="A", columns="B", aggfunc=len, margins=True
  1053. )
  1054. assert result.All.tolist() == [4.0, 7.0, 11.0]
  1055. def test_margins_no_values_two_row_two_cols(self, data):
  1056. # Regression test on pivot table: no values passed but rows and cols
  1057. # are multi-indexed
  1058. data["D"] = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]
  1059. result = data[["A", "B", "C", "D"]].pivot_table(
  1060. index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True
  1061. )
  1062. assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0]
  1063. @pytest.mark.parametrize("margin_name", ["foo", "one", 666, None, ["a", "b"]])
  1064. def test_pivot_table_with_margins_set_margin_name(self, margin_name, data):
  1065. # see gh-3335
  1066. msg = (
  1067. f'Conflicting name "{margin_name}" in margins|'
  1068. "margins_name argument must be a string"
  1069. )
  1070. with pytest.raises(ValueError, match=msg):
  1071. # multi-index index
  1072. pivot_table(
  1073. data,
  1074. values="D",
  1075. index=["A", "B"],
  1076. columns=["C"],
  1077. margins=True,
  1078. margins_name=margin_name,
  1079. )
  1080. with pytest.raises(ValueError, match=msg):
  1081. # multi-index column
  1082. pivot_table(
  1083. data,
  1084. values="D",
  1085. index=["C"],
  1086. columns=["A", "B"],
  1087. margins=True,
  1088. margins_name=margin_name,
  1089. )
  1090. with pytest.raises(ValueError, match=msg):
  1091. # non-multi-index index/column
  1092. pivot_table(
  1093. data,
  1094. values="D",
  1095. index=["A"],
  1096. columns=["B"],
  1097. margins=True,
  1098. margins_name=margin_name,
  1099. )
  1100. def test_pivot_timegrouper(self, using_array_manager):
  1101. df = DataFrame(
  1102. {
  1103. "Branch": "A A A A A A A B".split(),
  1104. "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(),
  1105. "Quantity": [1, 3, 5, 1, 8, 1, 9, 3],
  1106. "Date": [
  1107. datetime(2013, 1, 1),
  1108. datetime(2013, 1, 1),
  1109. datetime(2013, 10, 1),
  1110. datetime(2013, 10, 2),
  1111. datetime(2013, 10, 1),
  1112. datetime(2013, 10, 2),
  1113. datetime(2013, 12, 2),
  1114. datetime(2013, 12, 2),
  1115. ],
  1116. }
  1117. ).set_index("Date")
  1118. expected = DataFrame(
  1119. np.array([10, 18, 3], dtype="int64").reshape(1, 3),
  1120. index=pd.DatetimeIndex([datetime(2013, 12, 31)], freq="A"),
  1121. columns="Carl Joe Mark".split(),
  1122. )
  1123. expected.index.name = "Date"
  1124. expected.columns.name = "Buyer"
  1125. result = pivot_table(
  1126. df,
  1127. index=Grouper(freq="A"),
  1128. columns="Buyer",
  1129. values="Quantity",
  1130. aggfunc=np.sum,
  1131. )
  1132. tm.assert_frame_equal(result, expected)
  1133. result = pivot_table(
  1134. df,
  1135. index="Buyer",
  1136. columns=Grouper(freq="A"),
  1137. values="Quantity",
  1138. aggfunc=np.sum,
  1139. )
  1140. tm.assert_frame_equal(result, expected.T)
  1141. expected = DataFrame(
  1142. np.array([1, np.nan, 3, 9, 18, np.nan]).reshape(2, 3),
  1143. index=pd.DatetimeIndex(
  1144. [datetime(2013, 1, 1), datetime(2013, 7, 1)], freq="6MS"
  1145. ),
  1146. columns="Carl Joe Mark".split(),
  1147. )
  1148. expected.index.name = "Date"
  1149. expected.columns.name = "Buyer"
  1150. if using_array_manager:
  1151. # INFO(ArrayManager) column without NaNs can preserve int dtype
  1152. expected["Carl"] = expected["Carl"].astype("int64")
  1153. result = pivot_table(
  1154. df,
  1155. index=Grouper(freq="6MS"),
  1156. columns="Buyer",
  1157. values="Quantity",
  1158. aggfunc=np.sum,
  1159. )
  1160. tm.assert_frame_equal(result, expected)
  1161. result = pivot_table(
  1162. df,
  1163. index="Buyer",
  1164. columns=Grouper(freq="6MS"),
  1165. values="Quantity",
  1166. aggfunc=np.sum,
  1167. )
  1168. tm.assert_frame_equal(result, expected.T)
  1169. # passing the name
  1170. df = df.reset_index()
  1171. result = pivot_table(
  1172. df,
  1173. index=Grouper(freq="6MS", key="Date"),
  1174. columns="Buyer",
  1175. values="Quantity",
  1176. aggfunc=np.sum,
  1177. )
  1178. tm.assert_frame_equal(result, expected)
  1179. result = pivot_table(
  1180. df,
  1181. index="Buyer",
  1182. columns=Grouper(freq="6MS", key="Date"),
  1183. values="Quantity",
  1184. aggfunc=np.sum,
  1185. )
  1186. tm.assert_frame_equal(result, expected.T)
  1187. msg = "'The grouper name foo is not found'"
  1188. with pytest.raises(KeyError, match=msg):
  1189. pivot_table(
  1190. df,
  1191. index=Grouper(freq="6MS", key="foo"),
  1192. columns="Buyer",
  1193. values="Quantity",
  1194. aggfunc=np.sum,
  1195. )
  1196. with pytest.raises(KeyError, match=msg):
  1197. pivot_table(
  1198. df,
  1199. index="Buyer",
  1200. columns=Grouper(freq="6MS", key="foo"),
  1201. values="Quantity",
  1202. aggfunc=np.sum,
  1203. )
  1204. # passing the level
  1205. df = df.set_index("Date")
  1206. result = pivot_table(
  1207. df,
  1208. index=Grouper(freq="6MS", level="Date"),
  1209. columns="Buyer",
  1210. values="Quantity",
  1211. aggfunc=np.sum,
  1212. )
  1213. tm.assert_frame_equal(result, expected)
  1214. result = pivot_table(
  1215. df,
  1216. index="Buyer",
  1217. columns=Grouper(freq="6MS", level="Date"),
  1218. values="Quantity",
  1219. aggfunc=np.sum,
  1220. )
  1221. tm.assert_frame_equal(result, expected.T)
  1222. msg = "The level foo is not valid"
  1223. with pytest.raises(ValueError, match=msg):
  1224. pivot_table(
  1225. df,
  1226. index=Grouper(freq="6MS", level="foo"),
  1227. columns="Buyer",
  1228. values="Quantity",
  1229. aggfunc=np.sum,
  1230. )
  1231. with pytest.raises(ValueError, match=msg):
  1232. pivot_table(
  1233. df,
  1234. index="Buyer",
  1235. columns=Grouper(freq="6MS", level="foo"),
  1236. values="Quantity",
  1237. aggfunc=np.sum,
  1238. )
  1239. def test_pivot_timegrouper_double(self):
  1240. # double grouper
  1241. df = DataFrame(
  1242. {
  1243. "Branch": "A A A A A A A B".split(),
  1244. "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(),
  1245. "Quantity": [1, 3, 5, 1, 8, 1, 9, 3],
  1246. "Date": [
  1247. datetime(2013, 11, 1, 13, 0),
  1248. datetime(2013, 9, 1, 13, 5),
  1249. datetime(2013, 10, 1, 20, 0),
  1250. datetime(2013, 10, 2, 10, 0),
  1251. datetime(2013, 11, 1, 20, 0),
  1252. datetime(2013, 10, 2, 10, 0),
  1253. datetime(2013, 10, 2, 12, 0),
  1254. datetime(2013, 12, 5, 14, 0),
  1255. ],
  1256. "PayDay": [
  1257. datetime(2013, 10, 4, 0, 0),
  1258. datetime(2013, 10, 15, 13, 5),
  1259. datetime(2013, 9, 5, 20, 0),
  1260. datetime(2013, 11, 2, 10, 0),
  1261. datetime(2013, 10, 7, 20, 0),
  1262. datetime(2013, 9, 5, 10, 0),
  1263. datetime(2013, 12, 30, 12, 0),
  1264. datetime(2013, 11, 20, 14, 0),
  1265. ],
  1266. }
  1267. )
  1268. result = pivot_table(
  1269. df,
  1270. index=Grouper(freq="M", key="Date"),
  1271. columns=Grouper(freq="M", key="PayDay"),
  1272. values="Quantity",
  1273. aggfunc=np.sum,
  1274. )
  1275. expected = DataFrame(
  1276. np.array(
  1277. [
  1278. np.nan,
  1279. 3,
  1280. np.nan,
  1281. np.nan,
  1282. 6,
  1283. np.nan,
  1284. 1,
  1285. 9,
  1286. np.nan,
  1287. 9,
  1288. np.nan,
  1289. np.nan,
  1290. np.nan,
  1291. np.nan,
  1292. 3,
  1293. np.nan,
  1294. ]
  1295. ).reshape(4, 4),
  1296. index=pd.DatetimeIndex(
  1297. [
  1298. datetime(2013, 9, 30),
  1299. datetime(2013, 10, 31),
  1300. datetime(2013, 11, 30),
  1301. datetime(2013, 12, 31),
  1302. ],
  1303. freq="M",
  1304. ),
  1305. columns=pd.DatetimeIndex(
  1306. [
  1307. datetime(2013, 9, 30),
  1308. datetime(2013, 10, 31),
  1309. datetime(2013, 11, 30),
  1310. datetime(2013, 12, 31),
  1311. ],
  1312. freq="M",
  1313. ),
  1314. )
  1315. expected.index.name = "Date"
  1316. expected.columns.name = "PayDay"
  1317. tm.assert_frame_equal(result, expected)
  1318. result = pivot_table(
  1319. df,
  1320. index=Grouper(freq="M", key="PayDay"),
  1321. columns=Grouper(freq="M", key="Date"),
  1322. values="Quantity",
  1323. aggfunc=np.sum,
  1324. )
  1325. tm.assert_frame_equal(result, expected.T)
  1326. tuples = [
  1327. (datetime(2013, 9, 30), datetime(2013, 10, 31)),
  1328. (datetime(2013, 10, 31), datetime(2013, 9, 30)),
  1329. (datetime(2013, 10, 31), datetime(2013, 11, 30)),
  1330. (datetime(2013, 10, 31), datetime(2013, 12, 31)),
  1331. (datetime(2013, 11, 30), datetime(2013, 10, 31)),
  1332. (datetime(2013, 12, 31), datetime(2013, 11, 30)),
  1333. ]
  1334. idx = MultiIndex.from_tuples(tuples, names=["Date", "PayDay"])
  1335. expected = DataFrame(
  1336. np.array(
  1337. [3, np.nan, 6, np.nan, 1, np.nan, 9, np.nan, 9, np.nan, np.nan, 3]
  1338. ).reshape(6, 2),
  1339. index=idx,
  1340. columns=["A", "B"],
  1341. )
  1342. expected.columns.name = "Branch"
  1343. result = pivot_table(
  1344. df,
  1345. index=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")],
  1346. columns=["Branch"],
  1347. values="Quantity",
  1348. aggfunc=np.sum,
  1349. )
  1350. tm.assert_frame_equal(result, expected)
  1351. result = pivot_table(
  1352. df,
  1353. index=["Branch"],
  1354. columns=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")],
  1355. values="Quantity",
  1356. aggfunc=np.sum,
  1357. )
  1358. tm.assert_frame_equal(result, expected.T)
  1359. def test_pivot_datetime_tz(self):
  1360. dates1 = [
  1361. "2011-07-19 07:00:00",
  1362. "2011-07-19 08:00:00",
  1363. "2011-07-19 09:00:00",
  1364. "2011-07-19 07:00:00",
  1365. "2011-07-19 08:00:00",
  1366. "2011-07-19 09:00:00",
  1367. ]
  1368. dates2 = [
  1369. "2013-01-01 15:00:00",
  1370. "2013-01-01 15:00:00",
  1371. "2013-01-01 15:00:00",
  1372. "2013-02-01 15:00:00",
  1373. "2013-02-01 15:00:00",
  1374. "2013-02-01 15:00:00",
  1375. ]
  1376. df = DataFrame(
  1377. {
  1378. "label": ["a", "a", "a", "b", "b", "b"],
  1379. "dt1": dates1,
  1380. "dt2": dates2,
  1381. "value1": np.arange(6, dtype="int64"),
  1382. "value2": [1, 2] * 3,
  1383. }
  1384. )
  1385. df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific"))
  1386. df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo"))
  1387. exp_idx = pd.DatetimeIndex(
  1388. ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
  1389. tz="US/Pacific",
  1390. name="dt1",
  1391. )
  1392. exp_col1 = Index(["value1", "value1"])
  1393. exp_col2 = Index(["a", "b"], name="label")
  1394. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
  1395. expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col)
  1396. result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"])
  1397. tm.assert_frame_equal(result, expected)
  1398. exp_col1 = Index(["sum", "sum", "sum", "sum", "mean", "mean", "mean", "mean"])
  1399. exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2)
  1400. exp_col3 = pd.DatetimeIndex(
  1401. ["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4,
  1402. tz="Asia/Tokyo",
  1403. name="dt2",
  1404. )
  1405. exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3])
  1406. expected = DataFrame(
  1407. np.array(
  1408. [
  1409. [0, 3, 1, 2, 0, 3, 1, 2],
  1410. [1, 4, 2, 1, 1, 4, 2, 1],
  1411. [2, 5, 1, 2, 2, 5, 1, 2],
  1412. ],
  1413. dtype="int64",
  1414. ),
  1415. index=exp_idx,
  1416. columns=exp_col,
  1417. )
  1418. result = pivot_table(
  1419. df,
  1420. index=["dt1"],
  1421. columns=["dt2"],
  1422. values=["value1", "value2"],
  1423. aggfunc=[np.sum, np.mean],
  1424. )
  1425. tm.assert_frame_equal(result, expected)
  1426. def test_pivot_dtaccessor(self):
  1427. # GH 8103
  1428. dates1 = [
  1429. "2011-07-19 07:00:00",
  1430. "2011-07-19 08:00:00",
  1431. "2011-07-19 09:00:00",
  1432. "2011-07-19 07:00:00",
  1433. "2011-07-19 08:00:00",
  1434. "2011-07-19 09:00:00",
  1435. ]
  1436. dates2 = [
  1437. "2013-01-01 15:00:00",
  1438. "2013-01-01 15:00:00",
  1439. "2013-01-01 15:00:00",
  1440. "2013-02-01 15:00:00",
  1441. "2013-02-01 15:00:00",
  1442. "2013-02-01 15:00:00",
  1443. ]
  1444. df = DataFrame(
  1445. {
  1446. "label": ["a", "a", "a", "b", "b", "b"],
  1447. "dt1": dates1,
  1448. "dt2": dates2,
  1449. "value1": np.arange(6, dtype="int64"),
  1450. "value2": [1, 2] * 3,
  1451. }
  1452. )
  1453. df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d))
  1454. df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d))
  1455. result = pivot_table(
  1456. df, index="label", columns=df["dt1"].dt.hour, values="value1"
  1457. )
  1458. exp_idx = Index(["a", "b"], name="label")
  1459. expected = DataFrame(
  1460. {7: [0, 3], 8: [1, 4], 9: [2, 5]},
  1461. index=exp_idx,
  1462. columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
  1463. )
  1464. tm.assert_frame_equal(result, expected)
  1465. result = pivot_table(
  1466. df, index=df["dt2"].dt.month, columns=df["dt1"].dt.hour, values="value1"
  1467. )
  1468. expected = DataFrame(
  1469. {7: [0, 3], 8: [1, 4], 9: [2, 5]},
  1470. index=Index([1, 2], dtype=np.int32, name="dt2"),
  1471. columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
  1472. )
  1473. tm.assert_frame_equal(result, expected)
  1474. result = pivot_table(
  1475. df,
  1476. index=df["dt2"].dt.year.values,
  1477. columns=[df["dt1"].dt.hour, df["dt2"].dt.month],
  1478. values="value1",
  1479. )
  1480. exp_col = MultiIndex.from_arrays(
  1481. [
  1482. np.array([7, 7, 8, 8, 9, 9], dtype=np.int32),
  1483. np.array([1, 2] * 3, dtype=np.int32),
  1484. ],
  1485. names=["dt1", "dt2"],
  1486. )
  1487. expected = DataFrame(
  1488. np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"),
  1489. index=Index([2013], dtype=np.int32),
  1490. columns=exp_col,
  1491. )
  1492. tm.assert_frame_equal(result, expected)
  1493. result = pivot_table(
  1494. df,
  1495. index=np.array(["X", "X", "X", "X", "Y", "Y"]),
  1496. columns=[df["dt1"].dt.hour, df["dt2"].dt.month],
  1497. values="value1",
  1498. )
  1499. expected = DataFrame(
  1500. np.array(
  1501. [[0, 3, 1, np.nan, 2, np.nan], [np.nan, np.nan, np.nan, 4, np.nan, 5]]
  1502. ),
  1503. index=["X", "Y"],
  1504. columns=exp_col,
  1505. )
  1506. tm.assert_frame_equal(result, expected)
  1507. @pytest.mark.parametrize("i", range(1, 367))
  1508. def test_daily(self, i):
  1509. rng = date_range("1/1/2000", "12/31/2004", freq="D")
  1510. ts = Series(np.random.randn(len(rng)), index=rng)
  1511. annual = pivot_table(
  1512. DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear
  1513. )
  1514. annual.columns = annual.columns.droplevel(0)
  1515. doy = np.asarray(ts.index.dayofyear)
  1516. subset = ts[doy == i]
  1517. subset.index = subset.index.year
  1518. result = annual[i].dropna()
  1519. tm.assert_series_equal(result, subset, check_names=False)
  1520. assert result.name == i
  1521. @pytest.mark.parametrize("i", range(1, 13))
  1522. def test_monthly(self, i):
  1523. rng = date_range("1/1/2000", "12/31/2004", freq="M")
  1524. ts = Series(np.random.randn(len(rng)), index=rng)
  1525. annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month)
  1526. annual.columns = annual.columns.droplevel(0)
  1527. month = ts.index.month
  1528. subset = ts[month == i]
  1529. subset.index = subset.index.year
  1530. result = annual[i].dropna()
  1531. tm.assert_series_equal(result, subset, check_names=False)
  1532. assert result.name == i
  1533. def test_pivot_table_with_iterator_values(self, data):
  1534. # GH 12017
  1535. aggs = {"D": "sum", "E": "mean"}
  1536. pivot_values_list = pivot_table(
  1537. data, index=["A"], values=list(aggs.keys()), aggfunc=aggs
  1538. )
  1539. pivot_values_keys = pivot_table(
  1540. data, index=["A"], values=aggs.keys(), aggfunc=aggs
  1541. )
  1542. tm.assert_frame_equal(pivot_values_keys, pivot_values_list)
  1543. agg_values_gen = (value for value in aggs)
  1544. pivot_values_gen = pivot_table(
  1545. data, index=["A"], values=agg_values_gen, aggfunc=aggs
  1546. )
  1547. tm.assert_frame_equal(pivot_values_gen, pivot_values_list)
  1548. def test_pivot_table_margins_name_with_aggfunc_list(self):
  1549. # GH 13354
  1550. margins_name = "Weekly"
  1551. costs = DataFrame(
  1552. {
  1553. "item": ["bacon", "cheese", "bacon", "cheese"],
  1554. "cost": [2.5, 4.5, 3.2, 3.3],
  1555. "day": ["M", "M", "T", "T"],
  1556. }
  1557. )
  1558. table = costs.pivot_table(
  1559. index="item",
  1560. columns="day",
  1561. margins=True,
  1562. margins_name=margins_name,
  1563. aggfunc=[np.mean, max],
  1564. )
  1565. ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item")
  1566. tups = [
  1567. ("mean", "cost", "M"),
  1568. ("mean", "cost", "T"),
  1569. ("mean", "cost", margins_name),
  1570. ("max", "cost", "M"),
  1571. ("max", "cost", "T"),
  1572. ("max", "cost", margins_name),
  1573. ]
  1574. cols = MultiIndex.from_tuples(tups, names=[None, None, "day"])
  1575. expected = DataFrame(table.values, index=ix, columns=cols)
  1576. tm.assert_frame_equal(table, expected)
  1577. def test_categorical_margins(self, observed, request):
  1578. if observed:
  1579. request.node.add_marker(
  1580. pytest.mark.xfail(
  1581. reason="GH#17035 (np.mean of ints is casted back to ints)"
  1582. )
  1583. )
  1584. # GH 10989
  1585. df = DataFrame(
  1586. {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
  1587. )
  1588. expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
  1589. expected.index = Index([0, 1, "All"], name="y")
  1590. expected.columns = Index([0, 1, "All"], name="z")
  1591. table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
  1592. tm.assert_frame_equal(table, expected)
  1593. def test_categorical_margins_category(self, observed, request):
  1594. if observed:
  1595. request.node.add_marker(
  1596. pytest.mark.xfail(
  1597. reason="GH#17035 (np.mean of ints is casted back to ints)"
  1598. )
  1599. )
  1600. df = DataFrame(
  1601. {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
  1602. )
  1603. expected = DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
  1604. expected.index = Index([0, 1, "All"], name="y")
  1605. expected.columns = Index([0, 1, "All"], name="z")
  1606. df.y = df.y.astype("category")
  1607. df.z = df.z.astype("category")
  1608. table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
  1609. tm.assert_frame_equal(table, expected)
  1610. def test_margins_casted_to_float(self):
  1611. # GH 24893
  1612. df = DataFrame(
  1613. {
  1614. "A": [2, 4, 6, 8],
  1615. "B": [1, 4, 5, 8],
  1616. "C": [1, 3, 4, 6],
  1617. "D": ["X", "X", "Y", "Y"],
  1618. }
  1619. )
  1620. result = pivot_table(df, index="D", margins=True)
  1621. expected = DataFrame(
  1622. {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]},
  1623. index=Index(["X", "Y", "All"], name="D"),
  1624. )
  1625. tm.assert_frame_equal(result, expected)
  1626. def test_pivot_with_categorical(self, observed, ordered):
  1627. # gh-21370
  1628. idx = [np.nan, "low", "high", "low", np.nan]
  1629. col = [np.nan, "A", "B", np.nan, "A"]
  1630. df = DataFrame(
  1631. {
  1632. "In": Categorical(idx, categories=["low", "high"], ordered=ordered),
  1633. "Col": Categorical(col, categories=["A", "B"], ordered=ordered),
  1634. "Val": range(1, 6),
  1635. }
  1636. )
  1637. # case with index/columns/value
  1638. result = df.pivot_table(
  1639. index="In", columns="Col", values="Val", observed=observed
  1640. )
  1641. expected_cols = pd.CategoricalIndex(["A", "B"], ordered=ordered, name="Col")
  1642. expected = DataFrame(data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols)
  1643. expected.index = Index(
  1644. Categorical(["low", "high"], categories=["low", "high"], ordered=ordered),
  1645. name="In",
  1646. )
  1647. tm.assert_frame_equal(result, expected)
  1648. # case with columns/value
  1649. result = df.pivot_table(columns="Col", values="Val", observed=observed)
  1650. expected = DataFrame(
  1651. data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"])
  1652. )
  1653. tm.assert_frame_equal(result, expected)
  1654. def test_categorical_aggfunc(self, observed):
  1655. # GH 9534
  1656. df = DataFrame(
  1657. {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
  1658. )
  1659. df["C1"] = df["C1"].astype("category")
  1660. result = df.pivot_table(
  1661. "V", index="C1", columns="C2", dropna=observed, aggfunc="count"
  1662. )
  1663. expected_index = pd.CategoricalIndex(
  1664. ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
  1665. )
  1666. expected_columns = Index(["a", "b"], name="C2")
  1667. expected_data = np.array([[1, 0], [1, 0], [0, 2]], dtype=np.int64)
  1668. expected = DataFrame(
  1669. expected_data, index=expected_index, columns=expected_columns
  1670. )
  1671. tm.assert_frame_equal(result, expected)
  1672. def test_categorical_pivot_index_ordering(self, observed):
  1673. # GH 8731
  1674. df = DataFrame(
  1675. {
  1676. "Sales": [100, 120, 220],
  1677. "Month": ["January", "January", "January"],
  1678. "Year": [2013, 2014, 2013],
  1679. }
  1680. )
  1681. months = [
  1682. "January",
  1683. "February",
  1684. "March",
  1685. "April",
  1686. "May",
  1687. "June",
  1688. "July",
  1689. "August",
  1690. "September",
  1691. "October",
  1692. "November",
  1693. "December",
  1694. ]
  1695. df["Month"] = df["Month"].astype("category").cat.set_categories(months)
  1696. result = df.pivot_table(
  1697. values="Sales",
  1698. index="Month",
  1699. columns="Year",
  1700. observed=observed,
  1701. aggfunc="sum",
  1702. )
  1703. expected_columns = Index([2013, 2014], name="Year", dtype="int64")
  1704. expected_index = pd.CategoricalIndex(
  1705. months, categories=months, ordered=False, name="Month"
  1706. )
  1707. expected_data = [[320, 120]] + [[0, 0]] * 11
  1708. expected = DataFrame(
  1709. expected_data, index=expected_index, columns=expected_columns
  1710. )
  1711. if observed:
  1712. expected = expected.loc[["January"]]
  1713. tm.assert_frame_equal(result, expected)
  1714. def test_pivot_table_not_series(self):
  1715. # GH 4386
  1716. # pivot_table always returns a DataFrame
  1717. # when values is not list like and columns is None
  1718. # and aggfunc is not instance of list
  1719. df = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"], "col3": [1, 3, 9]})
  1720. result = df.pivot_table("col1", index=["col3", "col2"], aggfunc=np.sum)
  1721. m = MultiIndex.from_arrays([[1, 3, 9], ["C", "D", "E"]], names=["col3", "col2"])
  1722. expected = DataFrame([3, 4, 5], index=m, columns=["col1"])
  1723. tm.assert_frame_equal(result, expected)
  1724. result = df.pivot_table("col1", index="col3", columns="col2", aggfunc=np.sum)
  1725. expected = DataFrame(
  1726. [[3, np.NaN, np.NaN], [np.NaN, 4, np.NaN], [np.NaN, np.NaN, 5]],
  1727. index=Index([1, 3, 9], name="col3"),
  1728. columns=Index(["C", "D", "E"], name="col2"),
  1729. )
  1730. tm.assert_frame_equal(result, expected)
  1731. result = df.pivot_table("col1", index="col3", aggfunc=[np.sum])
  1732. m = MultiIndex.from_arrays([["sum"], ["col1"]])
  1733. expected = DataFrame([3, 4, 5], index=Index([1, 3, 9], name="col3"), columns=m)
  1734. tm.assert_frame_equal(result, expected)
  1735. def test_pivot_margins_name_unicode(self):
  1736. # issue #13292
  1737. greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae"
  1738. frame = DataFrame({"foo": [1, 2, 3]})
  1739. table = pivot_table(
  1740. frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
  1741. )
  1742. index = Index([1, 2, 3, greek], dtype="object", name="foo")
  1743. expected = DataFrame(index=index, columns=[])
  1744. tm.assert_frame_equal(table, expected)
  1745. def test_pivot_string_as_func(self):
  1746. # GH #18713
  1747. # for correctness purposes
  1748. data = DataFrame(
  1749. {
  1750. "A": [
  1751. "foo",
  1752. "foo",
  1753. "foo",
  1754. "foo",
  1755. "bar",
  1756. "bar",
  1757. "bar",
  1758. "bar",
  1759. "foo",
  1760. "foo",
  1761. "foo",
  1762. ],
  1763. "B": [
  1764. "one",
  1765. "one",
  1766. "one",
  1767. "two",
  1768. "one",
  1769. "one",
  1770. "one",
  1771. "two",
  1772. "two",
  1773. "two",
  1774. "one",
  1775. ],
  1776. "C": range(11),
  1777. }
  1778. )
  1779. result = pivot_table(data, index="A", columns="B", aggfunc="sum")
  1780. mi = MultiIndex(
  1781. levels=[["C"], ["one", "two"]], codes=[[0, 0], [0, 1]], names=[None, "B"]
  1782. )
  1783. expected = DataFrame(
  1784. {("C", "one"): {"bar": 15, "foo": 13}, ("C", "two"): {"bar": 7, "foo": 20}},
  1785. columns=mi,
  1786. ).rename_axis("A")
  1787. tm.assert_frame_equal(result, expected)
  1788. result = pivot_table(data, index="A", columns="B", aggfunc=["sum", "mean"])
  1789. mi = MultiIndex(
  1790. levels=[["sum", "mean"], ["C"], ["one", "two"]],
  1791. codes=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]],
  1792. names=[None, None, "B"],
  1793. )
  1794. expected = DataFrame(
  1795. {
  1796. ("mean", "C", "one"): {"bar": 5.0, "foo": 3.25},
  1797. ("mean", "C", "two"): {"bar": 7.0, "foo": 6.666666666666667},
  1798. ("sum", "C", "one"): {"bar": 15, "foo": 13},
  1799. ("sum", "C", "two"): {"bar": 7, "foo": 20},
  1800. },
  1801. columns=mi,
  1802. ).rename_axis("A")
  1803. tm.assert_frame_equal(result, expected)
  1804. @pytest.mark.parametrize(
  1805. "f, f_numpy",
  1806. [
  1807. ("sum", np.sum),
  1808. ("mean", np.mean),
  1809. ("std", np.std),
  1810. (["sum", "mean"], [np.sum, np.mean]),
  1811. (["sum", "std"], [np.sum, np.std]),
  1812. (["std", "mean"], [np.std, np.mean]),
  1813. ],
  1814. )
  1815. def test_pivot_string_func_vs_func(self, f, f_numpy, data):
  1816. # GH #18713
  1817. # for consistency purposes
  1818. data = data.drop(columns="C")
  1819. result = pivot_table(data, index="A", columns="B", aggfunc=f)
  1820. expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy)
  1821. tm.assert_frame_equal(result, expected)
  1822. @pytest.mark.slow
  1823. def test_pivot_number_of_levels_larger_than_int32(self, monkeypatch):
  1824. # GH 20601
  1825. # GH 26314: Change ValueError to PerformanceWarning
  1826. class MockUnstacker(reshape_lib._Unstacker):
  1827. def __init__(self, *args, **kwargs) -> None:
  1828. # __init__ will raise the warning
  1829. super().__init__(*args, **kwargs)
  1830. raise Exception("Don't compute final result.")
  1831. with monkeypatch.context() as m:
  1832. m.setattr(reshape_lib, "_Unstacker", MockUnstacker)
  1833. df = DataFrame(
  1834. {"ind1": np.arange(2**16), "ind2": np.arange(2**16), "count": 0}
  1835. )
  1836. msg = "The following operation may generate"
  1837. with tm.assert_produces_warning(PerformanceWarning, match=msg):
  1838. with pytest.raises(Exception, match="Don't compute final result."):
  1839. df.pivot_table(
  1840. index="ind1", columns="ind2", values="count", aggfunc="count"
  1841. )
  1842. def test_pivot_table_aggfunc_dropna(self, dropna):
  1843. # GH 22159
  1844. df = DataFrame(
  1845. {
  1846. "fruit": ["apple", "peach", "apple"],
  1847. "size": [1, 1, 2],
  1848. "taste": [7, 6, 6],
  1849. }
  1850. )
  1851. def ret_one(x):
  1852. return 1
  1853. def ret_sum(x):
  1854. return sum(x)
  1855. def ret_none(x):
  1856. return np.nan
  1857. result = pivot_table(
  1858. df, columns="fruit", aggfunc=[ret_sum, ret_none, ret_one], dropna=dropna
  1859. )
  1860. data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]]
  1861. col = MultiIndex.from_product(
  1862. [["ret_sum", "ret_none", "ret_one"], ["apple", "peach"]],
  1863. names=[None, "fruit"],
  1864. )
  1865. expected = DataFrame(data, index=["size", "taste"], columns=col)
  1866. if dropna:
  1867. expected = expected.dropna(axis="columns")
  1868. tm.assert_frame_equal(result, expected)
  1869. def test_pivot_table_aggfunc_scalar_dropna(self, dropna):
  1870. # GH 22159
  1871. df = DataFrame(
  1872. {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]}
  1873. )
  1874. result = pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna)
  1875. data = [[2.5, np.nan], [1, np.nan]]
  1876. col = Index(["one", "two"], name="A")
  1877. expected = DataFrame(data, index=["x", "y"], columns=col)
  1878. if dropna:
  1879. expected = expected.dropna(axis="columns")
  1880. tm.assert_frame_equal(result, expected)
  1881. @pytest.mark.parametrize("margins", [True, False])
  1882. def test_pivot_table_empty_aggfunc(self, margins):
  1883. # GH 9186 & GH 13483 & GH 49240
  1884. df = DataFrame(
  1885. {
  1886. "A": [2, 2, 3, 3, 2],
  1887. "id": [5, 6, 7, 8, 9],
  1888. "C": ["p", "q", "q", "p", "q"],
  1889. "D": [None, None, None, None, None],
  1890. }
  1891. )
  1892. result = df.pivot_table(
  1893. index="A", columns="D", values="id", aggfunc=np.size, margins=margins
  1894. )
  1895. exp_cols = Index([], name="D")
  1896. expected = DataFrame(index=Index([], dtype="int64", name="A"), columns=exp_cols)
  1897. tm.assert_frame_equal(result, expected)
  1898. def test_pivot_table_no_column_raises(self):
  1899. # GH 10326
  1900. def agg(arr):
  1901. return np.mean(arr)
  1902. df = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]})
  1903. with pytest.raises(KeyError, match="notpresent"):
  1904. df.pivot_table("notpresent", "X", "Y", aggfunc=agg)
  1905. def test_pivot_table_multiindex_columns_doctest_case(self):
  1906. # The relevant characteristic is that the call
  1907. # to maybe_downcast_to_dtype(agged[v], data[v].dtype) in
  1908. # __internal_pivot_table has `agged[v]` a DataFrame instead of Series,
  1909. # In this case this is because agged.columns is a MultiIndex and 'v'
  1910. # is only indexing on its first level.
  1911. df = DataFrame(
  1912. {
  1913. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  1914. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  1915. "C": [
  1916. "small",
  1917. "large",
  1918. "large",
  1919. "small",
  1920. "small",
  1921. "large",
  1922. "small",
  1923. "small",
  1924. "large",
  1925. ],
  1926. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  1927. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  1928. }
  1929. )
  1930. table = pivot_table(
  1931. df,
  1932. values=["D", "E"],
  1933. index=["A", "C"],
  1934. aggfunc={"D": np.mean, "E": [min, max, np.mean]},
  1935. )
  1936. cols = MultiIndex.from_tuples(
  1937. [("D", "mean"), ("E", "max"), ("E", "mean"), ("E", "min")]
  1938. )
  1939. index = MultiIndex.from_tuples(
  1940. [("bar", "large"), ("bar", "small"), ("foo", "large"), ("foo", "small")],
  1941. names=["A", "C"],
  1942. )
  1943. vals = np.array(
  1944. [
  1945. [5.5, 9.0, 7.5, 6.0],
  1946. [5.5, 9.0, 8.5, 8.0],
  1947. [2.0, 5.0, 4.5, 4.0],
  1948. [2.33333333, 6.0, 4.33333333, 2.0],
  1949. ]
  1950. )
  1951. expected = DataFrame(vals, columns=cols, index=index)
  1952. expected[("E", "min")] = expected[("E", "min")].astype(np.int64)
  1953. expected[("E", "max")] = expected[("E", "max")].astype(np.int64)
  1954. tm.assert_frame_equal(table, expected)
  1955. def test_pivot_table_sort_false(self):
  1956. # GH#39143
  1957. df = DataFrame(
  1958. {
  1959. "a": ["d1", "d4", "d3"],
  1960. "col": ["a", "b", "c"],
  1961. "num": [23, 21, 34],
  1962. "year": ["2018", "2018", "2019"],
  1963. }
  1964. )
  1965. result = df.pivot_table(
  1966. index=["a", "col"], columns="year", values="num", aggfunc="sum", sort=False
  1967. )
  1968. expected = DataFrame(
  1969. [[23, np.nan], [21, np.nan], [np.nan, 34]],
  1970. columns=Index(["2018", "2019"], name="year"),
  1971. index=MultiIndex.from_arrays(
  1972. [["d1", "d4", "d3"], ["a", "b", "c"]], names=["a", "col"]
  1973. ),
  1974. )
  1975. tm.assert_frame_equal(result, expected)
  1976. def test_pivot_table_nullable_margins(self):
  1977. # GH#48681
  1978. df = DataFrame(
  1979. {"a": "A", "b": [1, 2], "sales": Series([10, 11], dtype="Int64")}
  1980. )
  1981. result = df.pivot_table(index="b", columns="a", margins=True, aggfunc="sum")
  1982. expected = DataFrame(
  1983. [[10, 10], [11, 11], [21, 21]],
  1984. index=Index([1, 2, "All"], name="b"),
  1985. columns=MultiIndex.from_tuples(
  1986. [("sales", "A"), ("sales", "All")], names=[None, "a"]
  1987. ),
  1988. dtype="Int64",
  1989. )
  1990. tm.assert_frame_equal(result, expected)
  1991. def test_pivot_table_sort_false_with_multiple_values(self):
  1992. df = DataFrame(
  1993. {
  1994. "firstname": ["John", "Michael"],
  1995. "lastname": ["Foo", "Bar"],
  1996. "height": [173, 182],
  1997. "age": [47, 33],
  1998. }
  1999. )
  2000. result = df.pivot_table(
  2001. index=["lastname", "firstname"], values=["height", "age"], sort=False
  2002. )
  2003. expected = DataFrame(
  2004. [[173, 47], [182, 33]],
  2005. columns=["height", "age"],
  2006. index=MultiIndex.from_tuples(
  2007. [("Foo", "John"), ("Bar", "Michael")],
  2008. names=["lastname", "firstname"],
  2009. ),
  2010. )
  2011. tm.assert_frame_equal(result, expected)
  2012. def test_pivot_table_with_margins_and_numeric_columns(self):
  2013. # GH 26568
  2014. df = DataFrame([["a", "x", 1], ["a", "y", 2], ["b", "y", 3], ["b", "z", 4]])
  2015. df.columns = [10, 20, 30]
  2016. result = df.pivot_table(
  2017. index=10, columns=20, values=30, aggfunc="sum", fill_value=0, margins=True
  2018. )
  2019. expected = DataFrame([[1, 2, 0, 3], [0, 3, 4, 7], [1, 5, 4, 10]])
  2020. expected.columns = ["x", "y", "z", "All"]
  2021. expected.index = ["a", "b", "All"]
  2022. expected.columns.name = 20
  2023. expected.index.name = 10
  2024. tm.assert_frame_equal(result, expected)
  2025. @pytest.mark.parametrize("dropna", [True, False])
  2026. def test_pivot_ea_dtype_dropna(self, dropna):
  2027. # GH#47477
  2028. df = DataFrame({"x": "a", "y": "b", "age": Series([20, 40], dtype="Int64")})
  2029. result = df.pivot_table(
  2030. index="x", columns="y", values="age", aggfunc="mean", dropna=dropna
  2031. )
  2032. expected = DataFrame(
  2033. [[30]],
  2034. index=Index(["a"], name="x"),
  2035. columns=Index(["b"], name="y"),
  2036. dtype="Float64",
  2037. )
  2038. tm.assert_frame_equal(result, expected)
  2039. def test_pivot_table_datetime_warning(self):
  2040. # GH#48683
  2041. df = DataFrame(
  2042. {
  2043. "a": "A",
  2044. "b": [1, 2],
  2045. "date": pd.Timestamp("2019-12-31"),
  2046. "sales": [10.0, 11],
  2047. }
  2048. )
  2049. with tm.assert_produces_warning(None):
  2050. result = df.pivot_table(
  2051. index=["b", "date"], columns="a", margins=True, aggfunc="sum"
  2052. )
  2053. expected = DataFrame(
  2054. [[10.0, 10.0], [11.0, 11.0], [21.0, 21.0]],
  2055. index=MultiIndex.from_arrays(
  2056. [
  2057. Index([1, 2, "All"], name="b"),
  2058. Index(
  2059. [pd.Timestamp("2019-12-31"), pd.Timestamp("2019-12-31"), ""],
  2060. dtype=object,
  2061. name="date",
  2062. ),
  2063. ]
  2064. ),
  2065. columns=MultiIndex.from_tuples(
  2066. [("sales", "A"), ("sales", "All")], names=[None, "a"]
  2067. ),
  2068. )
  2069. tm.assert_frame_equal(result, expected)
  2070. def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager):
  2071. # GH 50342
  2072. df = DataFrame(
  2073. {
  2074. "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
  2075. "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
  2076. "C": [
  2077. "small",
  2078. "large",
  2079. "large",
  2080. "small",
  2081. "small",
  2082. "large",
  2083. "small",
  2084. "small",
  2085. "large",
  2086. ],
  2087. "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
  2088. "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
  2089. ("col5",): [
  2090. "foo",
  2091. "foo",
  2092. "foo",
  2093. "foo",
  2094. "foo",
  2095. "bar",
  2096. "bar",
  2097. "bar",
  2098. "bar",
  2099. ],
  2100. ("col6", 6): [
  2101. "one",
  2102. "one",
  2103. "one",
  2104. "two",
  2105. "two",
  2106. "one",
  2107. "one",
  2108. "two",
  2109. "two",
  2110. ],
  2111. (7, "seven"): [
  2112. "small",
  2113. "large",
  2114. "large",
  2115. "small",
  2116. "small",
  2117. "large",
  2118. "small",
  2119. "small",
  2120. "large",
  2121. ],
  2122. }
  2123. )
  2124. result = pivot_table(
  2125. df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc=np.sum
  2126. )
  2127. expected = DataFrame(
  2128. [[4.0, 5.0], [7.0, 6.0], [4.0, 1.0], [np.nan, 6.0]],
  2129. columns=Index(["large", "small"], name=(7, "seven")),
  2130. index=MultiIndex.from_arrays(
  2131. [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"]
  2132. ),
  2133. )
  2134. if using_array_manager:
  2135. # INFO(ArrayManager) column without NaNs can preserve int dtype
  2136. expected["small"] = expected["small"].astype("int64")
  2137. tm.assert_frame_equal(result, expected)
  2138. class TestPivot:
  2139. def test_pivot(self):
  2140. data = {
  2141. "index": ["A", "B", "C", "C", "B", "A"],
  2142. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  2143. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  2144. }
  2145. frame = DataFrame(data)
  2146. pivoted = frame.pivot(index="index", columns="columns", values="values")
  2147. expected = DataFrame(
  2148. {
  2149. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  2150. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  2151. }
  2152. )
  2153. expected.index.name, expected.columns.name = "index", "columns"
  2154. tm.assert_frame_equal(pivoted, expected)
  2155. # name tracking
  2156. assert pivoted.index.name == "index"
  2157. assert pivoted.columns.name == "columns"
  2158. # don't specify values
  2159. pivoted = frame.pivot(index="index", columns="columns")
  2160. assert pivoted.index.name == "index"
  2161. assert pivoted.columns.names == (None, "columns")
  2162. def test_pivot_duplicates(self):
  2163. data = DataFrame(
  2164. {
  2165. "a": ["bar", "bar", "foo", "foo", "foo"],
  2166. "b": ["one", "two", "one", "one", "two"],
  2167. "c": [1.0, 2.0, 3.0, 3.0, 4.0],
  2168. }
  2169. )
  2170. with pytest.raises(ValueError, match="duplicate entries"):
  2171. data.pivot(index="a", columns="b", values="c")
  2172. def test_pivot_empty(self):
  2173. df = DataFrame(columns=["a", "b", "c"])
  2174. result = df.pivot(index="a", columns="b", values="c")
  2175. expected = DataFrame(index=[], columns=[])
  2176. tm.assert_frame_equal(result, expected, check_names=False)
  2177. def test_pivot_integer_bug(self):
  2178. df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])
  2179. result = df.pivot(index=1, columns=0, values=2)
  2180. repr(result)
  2181. tm.assert_index_equal(result.columns, Index(["A", "B"], name=0))
  2182. def test_pivot_index_none(self):
  2183. # GH#3962
  2184. data = {
  2185. "index": ["A", "B", "C", "C", "B", "A"],
  2186. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  2187. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  2188. }
  2189. frame = DataFrame(data).set_index("index")
  2190. result = frame.pivot(columns="columns", values="values")
  2191. expected = DataFrame(
  2192. {
  2193. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  2194. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  2195. }
  2196. )
  2197. expected.index.name, expected.columns.name = "index", "columns"
  2198. tm.assert_frame_equal(result, expected)
  2199. # omit values
  2200. result = frame.pivot(columns="columns")
  2201. expected.columns = MultiIndex.from_tuples(
  2202. [("values", "One"), ("values", "Two")], names=[None, "columns"]
  2203. )
  2204. expected.index.name = "index"
  2205. tm.assert_frame_equal(result, expected, check_names=False)
  2206. assert result.index.name == "index"
  2207. assert result.columns.names == (None, "columns")
  2208. expected.columns = expected.columns.droplevel(0)
  2209. result = frame.pivot(columns="columns", values="values")
  2210. expected.columns.name = "columns"
  2211. tm.assert_frame_equal(result, expected)
  2212. def test_pivot_index_list_values_none_immutable_args(self):
  2213. # GH37635
  2214. df = DataFrame(
  2215. {
  2216. "lev1": [1, 1, 1, 2, 2, 2],
  2217. "lev2": [1, 1, 2, 1, 1, 2],
  2218. "lev3": [1, 2, 1, 2, 1, 2],
  2219. "lev4": [1, 2, 3, 4, 5, 6],
  2220. "values": [0, 1, 2, 3, 4, 5],
  2221. }
  2222. )
  2223. index = ["lev1", "lev2"]
  2224. columns = ["lev3"]
  2225. result = df.pivot(index=index, columns=columns)
  2226. expected = DataFrame(
  2227. np.array(
  2228. [
  2229. [1.0, 2.0, 0.0, 1.0],
  2230. [3.0, np.nan, 2.0, np.nan],
  2231. [5.0, 4.0, 4.0, 3.0],
  2232. [np.nan, 6.0, np.nan, 5.0],
  2233. ]
  2234. ),
  2235. index=MultiIndex.from_arrays(
  2236. [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"]
  2237. ),
  2238. columns=MultiIndex.from_arrays(
  2239. [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)],
  2240. names=[None, "lev3"],
  2241. ),
  2242. )
  2243. tm.assert_frame_equal(result, expected)
  2244. assert index == ["lev1", "lev2"]
  2245. assert columns == ["lev3"]
  2246. def test_pivot_columns_not_given(self):
  2247. # GH#48293
  2248. df = DataFrame({"a": [1], "b": 1})
  2249. with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
  2250. df.pivot() # pylint: disable=missing-kwoa
  2251. def test_pivot_columns_is_none(self):
  2252. # GH#48293
  2253. df = DataFrame({None: [1], "b": 2, "c": 3})
  2254. result = df.pivot(columns=None)
  2255. expected = DataFrame({("b", 1): [2], ("c", 1): 3})
  2256. tm.assert_frame_equal(result, expected)
  2257. result = df.pivot(columns=None, index="b")
  2258. expected = DataFrame({("c", 1): 3}, index=Index([2], name="b"))
  2259. tm.assert_frame_equal(result, expected)
  2260. result = df.pivot(columns=None, index="b", values="c")
  2261. expected = DataFrame({1: 3}, index=Index([2], name="b"))
  2262. tm.assert_frame_equal(result, expected)
  2263. def test_pivot_index_is_none(self):
  2264. # GH#48293
  2265. df = DataFrame({None: [1], "b": 2, "c": 3})
  2266. result = df.pivot(columns="b", index=None)
  2267. expected = DataFrame({("c", 2): 3}, index=[1])
  2268. expected.columns.names = [None, "b"]
  2269. tm.assert_frame_equal(result, expected)
  2270. result = df.pivot(columns="b", index=None, values="c")
  2271. expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
  2272. tm.assert_frame_equal(result, expected)
  2273. def test_pivot_values_is_none(self):
  2274. # GH#48293
  2275. df = DataFrame({None: [1], "b": 2, "c": 3})
  2276. result = df.pivot(columns="b", index="c", values=None)
  2277. expected = DataFrame(
  2278. 1, index=Index([3], name="c"), columns=Index([2], name="b")
  2279. )
  2280. tm.assert_frame_equal(result, expected)
  2281. result = df.pivot(columns="b", values=None)
  2282. expected = DataFrame(1, index=[0], columns=Index([2], name="b"))
  2283. tm.assert_frame_equal(result, expected)
  2284. def test_pivot_not_changing_index_name(self):
  2285. # GH#52692
  2286. df = DataFrame({"one": ["a"], "two": 0, "three": 1})
  2287. expected = df.copy(deep=True)
  2288. df.pivot(index="one", columns="two", values="three")
  2289. tm.assert_frame_equal(df, expected)