test_indexing.py 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853
  1. from collections import namedtuple
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. from decimal import Decimal
  7. import re
  8. import numpy as np
  9. import pytest
  10. from pandas._libs import iNaT
  11. from pandas.errors import (
  12. InvalidIndexError,
  13. PerformanceWarning,
  14. SettingWithCopyError,
  15. )
  16. import pandas.util._test_decorators as td
  17. from pandas.core.dtypes.common import is_integer
  18. import pandas as pd
  19. from pandas import (
  20. Categorical,
  21. DataFrame,
  22. DatetimeIndex,
  23. Index,
  24. MultiIndex,
  25. Series,
  26. Timestamp,
  27. date_range,
  28. isna,
  29. notna,
  30. to_datetime,
  31. )
  32. import pandas._testing as tm
  33. # We pass through a TypeError raised by numpy
  34. _slice_msg = "slice indices must be integers or None or have an __index__ method"
  35. class TestDataFrameIndexing:
  36. def test_getitem(self, float_frame):
  37. # Slicing
  38. sl = float_frame[:20]
  39. assert len(sl.index) == 20
  40. # Column access
  41. for _, series in sl.items():
  42. assert len(series.index) == 20
  43. assert tm.equalContents(series.index, sl.index)
  44. for key, _ in float_frame._series.items():
  45. assert float_frame[key] is not None
  46. assert "random" not in float_frame
  47. with pytest.raises(KeyError, match="random"):
  48. float_frame["random"]
  49. def test_getitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype):
  50. # GH51053
  51. dtype = any_numeric_dtype
  52. idx = Index([1, 0, 1], dtype=dtype)
  53. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx)
  54. result = df[1]
  55. expected = DataFrame([[1, 3], [4, 6]], columns=Index([1, 1], dtype=dtype))
  56. tm.assert_frame_equal(result, expected, check_exact=True)
  57. def test_getitem2(self, float_frame):
  58. df = float_frame.copy()
  59. df["$10"] = np.random.randn(len(df))
  60. ad = np.random.randn(len(df))
  61. df["@awesome_domain"] = ad
  62. with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")):
  63. df.__getitem__('df["$10"]')
  64. res = df["@awesome_domain"]
  65. tm.assert_numpy_array_equal(ad, res.values)
  66. def test_setitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype):
  67. # GH51053
  68. dtype = any_numeric_dtype
  69. idx = Index([1, 0, 1], dtype=dtype)
  70. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx)
  71. df[1] = 10
  72. expected = DataFrame([[10, 2, 10], [10, 5, 10]], columns=idx)
  73. tm.assert_frame_equal(df, expected, check_exact=True)
  74. def test_setitem_list(self, float_frame):
  75. float_frame["E"] = "foo"
  76. data = float_frame[["A", "B"]]
  77. float_frame[["B", "A"]] = data
  78. tm.assert_series_equal(float_frame["B"], data["A"], check_names=False)
  79. tm.assert_series_equal(float_frame["A"], data["B"], check_names=False)
  80. msg = "Columns must be same length as key"
  81. with pytest.raises(ValueError, match=msg):
  82. data[["A"]] = float_frame[["A", "B"]]
  83. newcolumndata = range(len(data.index) - 1)
  84. msg = (
  85. rf"Length of values \({len(newcolumndata)}\) "
  86. rf"does not match length of index \({len(data)}\)"
  87. )
  88. with pytest.raises(ValueError, match=msg):
  89. data["A"] = newcolumndata
  90. def test_setitem_list2(self):
  91. df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=np.int_)
  92. df.loc[1, ["tt1", "tt2"]] = [1, 2]
  93. result = df.loc[df.index[1], ["tt1", "tt2"]]
  94. expected = Series([1, 2], df.columns, dtype=np.int_, name=1)
  95. tm.assert_series_equal(result, expected)
  96. df["tt1"] = df["tt2"] = "0"
  97. df.loc[df.index[1], ["tt1", "tt2"]] = ["1", "2"]
  98. result = df.loc[df.index[1], ["tt1", "tt2"]]
  99. expected = Series(["1", "2"], df.columns, name=1)
  100. tm.assert_series_equal(result, expected)
  101. def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_frame):
  102. # boolean indexing
  103. d = datetime_frame.index[10]
  104. indexer = datetime_frame.index > d
  105. indexer_obj = indexer.astype(object)
  106. subindex = datetime_frame.index[indexer]
  107. subframe = datetime_frame[indexer]
  108. tm.assert_index_equal(subindex, subframe.index)
  109. with pytest.raises(ValueError, match="Item wrong length"):
  110. datetime_frame[indexer[:-1]]
  111. subframe_obj = datetime_frame[indexer_obj]
  112. tm.assert_frame_equal(subframe_obj, subframe)
  113. with pytest.raises(ValueError, match="Boolean array expected"):
  114. datetime_frame[datetime_frame]
  115. # test that Series work
  116. indexer_obj = Series(indexer_obj, datetime_frame.index)
  117. subframe_obj = datetime_frame[indexer_obj]
  118. tm.assert_frame_equal(subframe_obj, subframe)
  119. # test that Series indexers reindex
  120. # we are producing a warning that since the passed boolean
  121. # key is not the same as the given index, we will reindex
  122. # not sure this is really necessary
  123. with tm.assert_produces_warning(UserWarning):
  124. indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1])
  125. subframe_obj = datetime_frame[indexer_obj]
  126. tm.assert_frame_equal(subframe_obj, subframe)
  127. # test df[df > 0]
  128. for df in [
  129. datetime_frame,
  130. mixed_float_frame,
  131. mixed_int_frame,
  132. ]:
  133. data = df._get_numeric_data()
  134. bif = df[df > 0]
  135. bifw = DataFrame(
  136. {c: np.where(data[c] > 0, data[c], np.nan) for c in data.columns},
  137. index=data.index,
  138. columns=data.columns,
  139. )
  140. # add back other columns to compare
  141. for c in df.columns:
  142. if c not in bifw:
  143. bifw[c] = df[c]
  144. bifw = bifw.reindex(columns=df.columns)
  145. tm.assert_frame_equal(bif, bifw, check_dtype=False)
  146. for c in df.columns:
  147. if bif[c].dtype != bifw[c].dtype:
  148. assert bif[c].dtype == df[c].dtype
  149. def test_getitem_boolean_casting(self, datetime_frame):
  150. # don't upcast if we don't need to
  151. df = datetime_frame.copy()
  152. df["E"] = 1
  153. df["E"] = df["E"].astype("int32")
  154. df["E1"] = df["E"].copy()
  155. df["F"] = 1
  156. df["F"] = df["F"].astype("int64")
  157. df["F1"] = df["F"].copy()
  158. casted = df[df > 0]
  159. result = casted.dtypes
  160. expected = Series(
  161. [np.dtype("float64")] * 4
  162. + [np.dtype("int32")] * 2
  163. + [np.dtype("int64")] * 2,
  164. index=["A", "B", "C", "D", "E", "E1", "F", "F1"],
  165. )
  166. tm.assert_series_equal(result, expected)
  167. # int block splitting
  168. df.loc[df.index[1:3], ["E1", "F1"]] = 0
  169. casted = df[df > 0]
  170. result = casted.dtypes
  171. expected = Series(
  172. [np.dtype("float64")] * 4
  173. + [np.dtype("int32")]
  174. + [np.dtype("float64")]
  175. + [np.dtype("int64")]
  176. + [np.dtype("float64")],
  177. index=["A", "B", "C", "D", "E", "E1", "F", "F1"],
  178. )
  179. tm.assert_series_equal(result, expected)
  180. @pytest.mark.parametrize(
  181. "lst", [[True, False, True], [True, True, True], [False, False, False]]
  182. )
  183. def test_getitem_boolean_list(self, lst):
  184. df = DataFrame(np.arange(12).reshape(3, 4))
  185. result = df[lst]
  186. expected = df.loc[df.index[lst]]
  187. tm.assert_frame_equal(result, expected)
  188. def test_getitem_boolean_iadd(self):
  189. arr = np.random.randn(5, 5)
  190. df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"])
  191. df[df < 0] += 1
  192. arr[arr < 0] += 1
  193. tm.assert_almost_equal(df.values, arr)
  194. def test_boolean_index_empty_corner(self):
  195. # #2096
  196. blah = DataFrame(np.empty([0, 1]), columns=["A"], index=DatetimeIndex([]))
  197. # both of these should succeed trivially
  198. k = np.array([], bool)
  199. blah[k]
  200. blah[k] = 0
  201. def test_getitem_ix_mixed_integer(self):
  202. df = DataFrame(
  203. np.random.randn(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3]
  204. )
  205. result = df.iloc[:-1]
  206. expected = df.loc[df.index[:-1]]
  207. tm.assert_frame_equal(result, expected)
  208. result = df.loc[[1, 10]]
  209. expected = df.loc[Index([1, 10])]
  210. tm.assert_frame_equal(result, expected)
  211. def test_getitem_ix_mixed_integer2(self):
  212. # 11320
  213. df = DataFrame(
  214. {
  215. "rna": (1.5, 2.2, 3.2, 4.5),
  216. -1000: [11, 21, 36, 40],
  217. 0: [10, 22, 43, 34],
  218. 1000: [0, 10, 20, 30],
  219. },
  220. columns=["rna", -1000, 0, 1000],
  221. )
  222. result = df[[1000]]
  223. expected = df.iloc[:, [3]]
  224. tm.assert_frame_equal(result, expected)
  225. result = df[[-1000]]
  226. expected = df.iloc[:, [1]]
  227. tm.assert_frame_equal(result, expected)
  228. def test_getattr(self, float_frame):
  229. tm.assert_series_equal(float_frame.A, float_frame["A"])
  230. msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'"
  231. with pytest.raises(AttributeError, match=msg):
  232. float_frame.NONEXISTENT_NAME
  233. def test_setattr_column(self):
  234. df = DataFrame({"foobar": 1}, index=range(10))
  235. df.foobar = 5
  236. assert (df.foobar == 5).all()
  237. def test_setitem(self, float_frame, using_copy_on_write):
  238. # not sure what else to do here
  239. series = float_frame["A"][::2]
  240. float_frame["col5"] = series
  241. assert "col5" in float_frame
  242. assert len(series) == 15
  243. assert len(float_frame) == 30
  244. exp = np.ravel(np.column_stack((series.values, [np.nan] * 15)))
  245. exp = Series(exp, index=float_frame.index, name="col5")
  246. tm.assert_series_equal(float_frame["col5"], exp)
  247. series = float_frame["A"]
  248. float_frame["col6"] = series
  249. tm.assert_series_equal(series, float_frame["col6"], check_names=False)
  250. # set ndarray
  251. arr = np.random.randn(len(float_frame))
  252. float_frame["col9"] = arr
  253. assert (float_frame["col9"] == arr).all()
  254. float_frame["col7"] = 5
  255. assert (float_frame["col7"] == 5).all()
  256. float_frame["col0"] = 3.14
  257. assert (float_frame["col0"] == 3.14).all()
  258. float_frame["col8"] = "foo"
  259. assert (float_frame["col8"] == "foo").all()
  260. # this is partially a view (e.g. some blocks are view)
  261. # so raise/warn
  262. smaller = float_frame[:2]
  263. msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
  264. if using_copy_on_write:
  265. # With CoW, adding a new column doesn't raise a warning
  266. smaller["col10"] = ["1", "2"]
  267. else:
  268. with pytest.raises(SettingWithCopyError, match=msg):
  269. smaller["col10"] = ["1", "2"]
  270. assert smaller["col10"].dtype == np.object_
  271. assert (smaller["col10"] == ["1", "2"]).all()
  272. def test_setitem2(self):
  273. # dtype changing GH4204
  274. df = DataFrame([[0, 0]])
  275. df.iloc[0] = np.nan
  276. expected = DataFrame([[np.nan, np.nan]])
  277. tm.assert_frame_equal(df, expected)
  278. df = DataFrame([[0, 0]])
  279. df.loc[0] = np.nan
  280. tm.assert_frame_equal(df, expected)
  281. def test_setitem_boolean(self, float_frame):
  282. df = float_frame.copy()
  283. values = float_frame.values.copy()
  284. df[df["A"] > 0] = 4
  285. values[values[:, 0] > 0] = 4
  286. tm.assert_almost_equal(df.values, values)
  287. # test that column reindexing works
  288. series = df["A"] == 4
  289. series = series.reindex(df.index[::-1])
  290. df[series] = 1
  291. values[values[:, 0] == 4] = 1
  292. tm.assert_almost_equal(df.values, values)
  293. df[df > 0] = 5
  294. values[values > 0] = 5
  295. tm.assert_almost_equal(df.values, values)
  296. df[df == 5] = 0
  297. values[values == 5] = 0
  298. tm.assert_almost_equal(df.values, values)
  299. # a df that needs alignment first
  300. df[df[:-1] < 0] = 2
  301. np.putmask(values[:-1], values[:-1] < 0, 2)
  302. tm.assert_almost_equal(df.values, values)
  303. # indexed with same shape but rows-reversed df
  304. df[df[::-1] == 2] = 3
  305. values[values == 2] = 3
  306. tm.assert_almost_equal(df.values, values)
  307. msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
  308. with pytest.raises(TypeError, match=msg):
  309. df[df * 0] = 2
  310. # index with DataFrame
  311. df_orig = df.copy()
  312. mask = df > np.abs(df)
  313. df[df > np.abs(df)] = np.nan
  314. values = df_orig.values.copy()
  315. values[mask.values] = np.nan
  316. expected = DataFrame(values, index=df_orig.index, columns=df_orig.columns)
  317. tm.assert_frame_equal(df, expected)
  318. # set from DataFrame
  319. df[df > np.abs(df)] = df * 2
  320. np.putmask(values, mask.values, df.values * 2)
  321. expected = DataFrame(values, index=df_orig.index, columns=df_orig.columns)
  322. tm.assert_frame_equal(df, expected)
  323. def test_setitem_cast(self, float_frame):
  324. float_frame["D"] = float_frame["D"].astype("i8")
  325. assert float_frame["D"].dtype == np.int64
  326. # #669, should not cast?
  327. # this is now set to int64, which means a replacement of the column to
  328. # the value dtype (and nothing to do with the existing dtype)
  329. float_frame["B"] = 0
  330. assert float_frame["B"].dtype == np.int64
  331. # cast if pass array of course
  332. float_frame["B"] = np.arange(len(float_frame))
  333. assert issubclass(float_frame["B"].dtype.type, np.integer)
  334. float_frame["foo"] = "bar"
  335. float_frame["foo"] = 0
  336. assert float_frame["foo"].dtype == np.int64
  337. float_frame["foo"] = "bar"
  338. float_frame["foo"] = 2.5
  339. assert float_frame["foo"].dtype == np.float64
  340. float_frame["something"] = 0
  341. assert float_frame["something"].dtype == np.int64
  342. float_frame["something"] = 2
  343. assert float_frame["something"].dtype == np.int64
  344. float_frame["something"] = 2.5
  345. assert float_frame["something"].dtype == np.float64
  346. def test_setitem_corner(self, float_frame):
  347. # corner case
  348. df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3))
  349. del df["B"]
  350. df["B"] = [1.0, 2.0, 3.0]
  351. assert "B" in df
  352. assert len(df.columns) == 2
  353. df["A"] = "beginning"
  354. df["E"] = "foo"
  355. df["D"] = "bar"
  356. df[datetime.now()] = "date"
  357. df[datetime.now()] = 5.0
  358. # what to do when empty frame with index
  359. dm = DataFrame(index=float_frame.index)
  360. dm["A"] = "foo"
  361. dm["B"] = "bar"
  362. assert len(dm.columns) == 2
  363. assert dm.values.dtype == np.object_
  364. # upcast
  365. dm["C"] = 1
  366. assert dm["C"].dtype == np.int64
  367. dm["E"] = 1.0
  368. assert dm["E"].dtype == np.float64
  369. # set existing column
  370. dm["A"] = "bar"
  371. assert "bar" == dm["A"][0]
  372. dm = DataFrame(index=np.arange(3))
  373. dm["A"] = 1
  374. dm["foo"] = "bar"
  375. del dm["foo"]
  376. dm["foo"] = "bar"
  377. assert dm["foo"].dtype == np.object_
  378. dm["coercible"] = ["1", "2", "3"]
  379. assert dm["coercible"].dtype == np.object_
  380. def test_setitem_corner2(self):
  381. data = {
  382. "title": ["foobar", "bar", "foobar"] + ["foobar"] * 17,
  383. "cruft": np.random.random(20),
  384. }
  385. df = DataFrame(data)
  386. ix = df[df["title"] == "bar"].index
  387. df.loc[ix, ["title"]] = "foobar"
  388. df.loc[ix, ["cruft"]] = 0
  389. assert df.loc[1, "title"] == "foobar"
  390. assert df.loc[1, "cruft"] == 0
  391. def test_setitem_ambig(self):
  392. # Difficulties with mixed-type data
  393. # Created as float type
  394. dm = DataFrame(index=range(3), columns=range(3))
  395. coercable_series = Series([Decimal(1) for _ in range(3)], index=range(3))
  396. uncoercable_series = Series(["foo", "bzr", "baz"], index=range(3))
  397. dm[0] = np.ones(3)
  398. assert len(dm.columns) == 3
  399. dm[1] = coercable_series
  400. assert len(dm.columns) == 3
  401. dm[2] = uncoercable_series
  402. assert len(dm.columns) == 3
  403. assert dm[2].dtype == np.object_
  404. def test_setitem_None(self, float_frame):
  405. # GH #766
  406. float_frame[None] = float_frame["A"]
  407. tm.assert_series_equal(
  408. float_frame.iloc[:, -1], float_frame["A"], check_names=False
  409. )
  410. tm.assert_series_equal(
  411. float_frame.loc[:, None], float_frame["A"], check_names=False
  412. )
  413. tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
  414. repr(float_frame)
  415. def test_loc_setitem_boolean_mask_allfalse(self):
  416. # GH 9596
  417. df = DataFrame(
  418. {"a": ["1", "2", "3"], "b": ["11", "22", "33"], "c": ["111", "222", "333"]}
  419. )
  420. result = df.copy()
  421. result.loc[result.b.isna(), "a"] = result.a
  422. tm.assert_frame_equal(result, df)
  423. def test_getitem_fancy_slice_integers_step(self):
  424. df = DataFrame(np.random.randn(10, 5))
  425. # this is OK
  426. result = df.iloc[:8:2] # noqa
  427. df.iloc[:8:2] = np.nan
  428. assert isna(df.iloc[:8:2]).values.all()
  429. def test_getitem_setitem_integer_slice_keyerrors(self):
  430. df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2))
  431. # this is OK
  432. cp = df.copy()
  433. cp.iloc[4:10] = 0
  434. assert (cp.iloc[4:10] == 0).values.all()
  435. # so is this
  436. cp = df.copy()
  437. cp.iloc[3:11] = 0
  438. assert (cp.iloc[3:11] == 0).values.all()
  439. result = df.iloc[2:6]
  440. result2 = df.loc[3:11]
  441. expected = df.reindex([4, 6, 8, 10])
  442. tm.assert_frame_equal(result, expected)
  443. tm.assert_frame_equal(result2, expected)
  444. # non-monotonic, raise KeyError
  445. df2 = df.iloc[list(range(5)) + list(range(5, 10))[::-1]]
  446. with pytest.raises(KeyError, match=r"^3$"):
  447. df2.loc[3:11]
  448. with pytest.raises(KeyError, match=r"^3$"):
  449. df2.loc[3:11] = 0
  450. @td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view
  451. def test_fancy_getitem_slice_mixed(
  452. self, float_frame, float_string_frame, using_copy_on_write
  453. ):
  454. sliced = float_string_frame.iloc[:, -3:]
  455. assert sliced["D"].dtype == np.float64
  456. # get view with single block
  457. # setting it triggers setting with copy
  458. original = float_frame.copy()
  459. sliced = float_frame.iloc[:, -3:]
  460. assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values)
  461. sliced.loc[:, "C"] = 4.0
  462. if not using_copy_on_write:
  463. assert (float_frame["C"] == 4).all()
  464. # with the enforcement of GH#45333 in 2.0, this remains a view
  465. np.shares_memory(sliced["C"]._values, float_frame["C"]._values)
  466. else:
  467. tm.assert_frame_equal(float_frame, original)
  468. def test_getitem_setitem_non_ix_labels(self):
  469. df = tm.makeTimeDataFrame()
  470. start, end = df.index[[5, 10]]
  471. result = df.loc[start:end]
  472. result2 = df[start:end]
  473. expected = df[5:11]
  474. tm.assert_frame_equal(result, expected)
  475. tm.assert_frame_equal(result2, expected)
  476. result = df.copy()
  477. result.loc[start:end] = 0
  478. result2 = df.copy()
  479. result2[start:end] = 0
  480. expected = df.copy()
  481. expected[5:11] = 0
  482. tm.assert_frame_equal(result, expected)
  483. tm.assert_frame_equal(result2, expected)
  484. def test_ix_multi_take(self):
  485. df = DataFrame(np.random.randn(3, 2))
  486. rs = df.loc[df.index == 0, :]
  487. xp = df.reindex([0])
  488. tm.assert_frame_equal(rs, xp)
  489. # GH#1321
  490. df = DataFrame(np.random.randn(3, 2))
  491. rs = df.loc[df.index == 0, df.columns == 1]
  492. xp = df.reindex(index=[0], columns=[1])
  493. tm.assert_frame_equal(rs, xp)
  494. def test_getitem_fancy_scalar(self, float_frame):
  495. f = float_frame
  496. ix = f.loc
  497. # individual value
  498. for col in f.columns:
  499. ts = f[col]
  500. for idx in f.index[::5]:
  501. assert ix[idx, col] == ts[idx]
  502. @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
  503. def test_setitem_fancy_scalar(self, float_frame):
  504. f = float_frame
  505. expected = float_frame.copy()
  506. ix = f.loc
  507. # individual value
  508. for j, col in enumerate(f.columns):
  509. ts = f[col] # noqa
  510. for idx in f.index[::5]:
  511. i = f.index.get_loc(idx)
  512. val = np.random.randn()
  513. expected.iloc[i, j] = val
  514. ix[idx, col] = val
  515. tm.assert_frame_equal(f, expected)
  516. def test_getitem_fancy_boolean(self, float_frame):
  517. f = float_frame
  518. ix = f.loc
  519. expected = f.reindex(columns=["B", "D"])
  520. result = ix[:, [False, True, False, True]]
  521. tm.assert_frame_equal(result, expected)
  522. expected = f.reindex(index=f.index[5:10], columns=["B", "D"])
  523. result = ix[f.index[5:10], [False, True, False, True]]
  524. tm.assert_frame_equal(result, expected)
  525. boolvec = f.index > f.index[7]
  526. expected = f.reindex(index=f.index[boolvec])
  527. result = ix[boolvec]
  528. tm.assert_frame_equal(result, expected)
  529. result = ix[boolvec, :]
  530. tm.assert_frame_equal(result, expected)
  531. result = ix[boolvec, f.columns[2:]]
  532. expected = f.reindex(index=f.index[boolvec], columns=["C", "D"])
  533. tm.assert_frame_equal(result, expected)
  534. @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
  535. def test_setitem_fancy_boolean(self, float_frame):
  536. # from 2d, set with booleans
  537. frame = float_frame.copy()
  538. expected = float_frame.copy()
  539. values = expected.values.copy()
  540. mask = frame["A"] > 0
  541. frame.loc[mask] = 0.0
  542. values[mask.values] = 0.0
  543. expected = DataFrame(values, index=expected.index, columns=expected.columns)
  544. tm.assert_frame_equal(frame, expected)
  545. frame = float_frame.copy()
  546. expected = float_frame.copy()
  547. values = expected.values.copy()
  548. frame.loc[mask, ["A", "B"]] = 0.0
  549. values[mask.values, :2] = 0.0
  550. expected = DataFrame(values, index=expected.index, columns=expected.columns)
  551. tm.assert_frame_equal(frame, expected)
  552. def test_getitem_fancy_ints(self, float_frame):
  553. result = float_frame.iloc[[1, 4, 7]]
  554. expected = float_frame.loc[float_frame.index[[1, 4, 7]]]
  555. tm.assert_frame_equal(result, expected)
  556. result = float_frame.iloc[:, [2, 0, 1]]
  557. expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]]
  558. tm.assert_frame_equal(result, expected)
  559. def test_getitem_setitem_boolean_misaligned(self, float_frame):
  560. # boolean index misaligned labels
  561. mask = float_frame["A"][::-1] > 1
  562. result = float_frame.loc[mask]
  563. expected = float_frame.loc[mask[::-1]]
  564. tm.assert_frame_equal(result, expected)
  565. cp = float_frame.copy()
  566. expected = float_frame.copy()
  567. cp.loc[mask] = 0
  568. expected.loc[mask] = 0
  569. tm.assert_frame_equal(cp, expected)
  570. def test_getitem_setitem_boolean_multi(self):
  571. df = DataFrame(np.random.randn(3, 2))
  572. # get
  573. k1 = np.array([True, False, True])
  574. k2 = np.array([False, True])
  575. result = df.loc[k1, k2]
  576. expected = df.loc[[0, 2], [1]]
  577. tm.assert_frame_equal(result, expected)
  578. expected = df.copy()
  579. df.loc[np.array([True, False, True]), np.array([False, True])] = 5
  580. expected.loc[[0, 2], [1]] = 5
  581. tm.assert_frame_equal(df, expected)
  582. def test_getitem_setitem_float_labels(self, using_array_manager):
  583. index = Index([1.5, 2, 3, 4, 5])
  584. df = DataFrame(np.random.randn(5, 5), index=index)
  585. result = df.loc[1.5:4]
  586. expected = df.reindex([1.5, 2, 3, 4])
  587. tm.assert_frame_equal(result, expected)
  588. assert len(result) == 4
  589. result = df.loc[4:5]
  590. expected = df.reindex([4, 5]) # reindex with int
  591. tm.assert_frame_equal(result, expected, check_index_type=False)
  592. assert len(result) == 2
  593. result = df.loc[4:5]
  594. expected = df.reindex([4.0, 5.0]) # reindex with float
  595. tm.assert_frame_equal(result, expected)
  596. assert len(result) == 2
  597. # loc_float changes this to work properly
  598. result = df.loc[1:2]
  599. expected = df.iloc[0:2]
  600. tm.assert_frame_equal(result, expected)
  601. df.loc[1:2] = 0
  602. result = df[1:2]
  603. assert (result == 0).all().all()
  604. # #2727
  605. index = Index([1.0, 2.5, 3.5, 4.5, 5.0])
  606. df = DataFrame(np.random.randn(5, 5), index=index)
  607. # positional slicing only via iloc!
  608. msg = (
  609. "cannot do positional indexing on Index with "
  610. r"these indexers \[1.0\] of type float"
  611. )
  612. with pytest.raises(TypeError, match=msg):
  613. df.iloc[1.0:5]
  614. result = df.iloc[4:5]
  615. expected = df.reindex([5.0])
  616. tm.assert_frame_equal(result, expected)
  617. assert len(result) == 1
  618. cp = df.copy()
  619. with pytest.raises(TypeError, match=_slice_msg):
  620. cp.iloc[1.0:5] = 0
  621. with pytest.raises(TypeError, match=msg):
  622. result = cp.iloc[1.0:5] == 0
  623. assert result.values.all()
  624. assert (cp.iloc[0:1] == df.iloc[0:1]).values.all()
  625. cp = df.copy()
  626. cp.iloc[4:5] = 0
  627. assert (cp.iloc[4:5] == 0).values.all()
  628. assert (cp.iloc[0:4] == df.iloc[0:4]).values.all()
  629. # float slicing
  630. result = df.loc[1.0:5]
  631. expected = df
  632. tm.assert_frame_equal(result, expected)
  633. assert len(result) == 5
  634. result = df.loc[1.1:5]
  635. expected = df.reindex([2.5, 3.5, 4.5, 5.0])
  636. tm.assert_frame_equal(result, expected)
  637. assert len(result) == 4
  638. result = df.loc[4.51:5]
  639. expected = df.reindex([5.0])
  640. tm.assert_frame_equal(result, expected)
  641. assert len(result) == 1
  642. result = df.loc[1.0:5.0]
  643. expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0])
  644. tm.assert_frame_equal(result, expected)
  645. assert len(result) == 5
  646. cp = df.copy()
  647. cp.loc[1.0:5.0] = 0
  648. result = cp.loc[1.0:5.0]
  649. assert (result == 0).values.all()
  650. def test_setitem_single_column_mixed_datetime(self):
  651. df = DataFrame(
  652. np.random.randn(5, 3),
  653. index=["a", "b", "c", "d", "e"],
  654. columns=["foo", "bar", "baz"],
  655. )
  656. df["timestamp"] = Timestamp("20010102")
  657. # check our dtypes
  658. result = df.dtypes
  659. expected = Series(
  660. [np.dtype("float64")] * 3 + [np.dtype("datetime64[ns]")],
  661. index=["foo", "bar", "baz", "timestamp"],
  662. )
  663. tm.assert_series_equal(result, expected)
  664. # GH#16674 iNaT is treated as an integer when given by the user
  665. df.loc["b", "timestamp"] = iNaT
  666. assert not isna(df.loc["b", "timestamp"])
  667. assert df["timestamp"].dtype == np.object_
  668. assert df.loc["b", "timestamp"] == iNaT
  669. # allow this syntax (as of GH#3216)
  670. df.loc["c", "timestamp"] = np.nan
  671. assert isna(df.loc["c", "timestamp"])
  672. # allow this syntax
  673. df.loc["d", :] = np.nan
  674. assert not isna(df.loc["c", :]).all()
  675. def test_setitem_mixed_datetime(self):
  676. # GH 9336
  677. expected = DataFrame(
  678. {
  679. "a": [0, 0, 0, 0, 13, 14],
  680. "b": [
  681. datetime(2012, 1, 1),
  682. 1,
  683. "x",
  684. "y",
  685. datetime(2013, 1, 1),
  686. datetime(2014, 1, 1),
  687. ],
  688. }
  689. )
  690. df = DataFrame(0, columns=list("ab"), index=range(6))
  691. df["b"] = pd.NaT
  692. df.loc[0, "b"] = datetime(2012, 1, 1)
  693. df.loc[1, "b"] = 1
  694. df.loc[[2, 3], "b"] = "x", "y"
  695. A = np.array(
  696. [
  697. [13, np.datetime64("2013-01-01T00:00:00")],
  698. [14, np.datetime64("2014-01-01T00:00:00")],
  699. ]
  700. )
  701. df.loc[[4, 5], ["a", "b"]] = A
  702. tm.assert_frame_equal(df, expected)
  703. def test_setitem_frame_float(self, float_frame):
  704. piece = float_frame.loc[float_frame.index[:2], ["A", "B"]]
  705. float_frame.loc[float_frame.index[-2] :, ["A", "B"]] = piece.values
  706. result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values
  707. expected = piece.values
  708. tm.assert_almost_equal(result, expected)
  709. def test_setitem_frame_mixed(self, float_string_frame):
  710. # GH 3216
  711. # already aligned
  712. f = float_string_frame.copy()
  713. piece = DataFrame(
  714. [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"]
  715. )
  716. key = (f.index[slice(None, 2)], ["A", "B"])
  717. f.loc[key] = piece
  718. tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values)
  719. def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame):
  720. # GH#3216 rows unaligned
  721. f = float_string_frame.copy()
  722. piece = DataFrame(
  723. [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]],
  724. index=list(f.index[0:2]) + ["foo", "bar"],
  725. columns=["A", "B"],
  726. )
  727. key = (f.index[slice(None, 2)], ["A", "B"])
  728. f.loc[key] = piece
  729. tm.assert_almost_equal(
  730. f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2]
  731. )
  732. def test_setitem_frame_mixed_key_unaligned(self, float_string_frame):
  733. # GH#3216 key is unaligned with values
  734. f = float_string_frame.copy()
  735. piece = f.loc[f.index[:2], ["A"]]
  736. piece.index = f.index[-2:]
  737. key = (f.index[slice(-2, None)], ["A", "B"])
  738. f.loc[key] = piece
  739. piece["B"] = np.nan
  740. tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
  741. def test_setitem_frame_mixed_ndarray(self, float_string_frame):
  742. # GH#3216 ndarray
  743. f = float_string_frame.copy()
  744. piece = float_string_frame.loc[f.index[:2], ["A", "B"]]
  745. key = (f.index[slice(-2, None)], ["A", "B"])
  746. f.loc[key] = piece.values
  747. tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
  748. def test_setitem_frame_upcast(self):
  749. # needs upcasting
  750. df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"])
  751. df2 = df.copy()
  752. df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
  753. expected = df.reindex(columns=["A", "B"])
  754. expected += 0.5
  755. expected["C"] = df["C"]
  756. tm.assert_frame_equal(df2, expected)
  757. def test_setitem_frame_align(self, float_frame):
  758. piece = float_frame.loc[float_frame.index[:2], ["A", "B"]]
  759. piece.index = float_frame.index[-2:]
  760. piece.columns = ["A", "B"]
  761. float_frame.loc[float_frame.index[-2:], ["A", "B"]] = piece
  762. result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values
  763. expected = piece.values
  764. tm.assert_almost_equal(result, expected)
  765. def test_getitem_setitem_ix_duplicates(self):
  766. # #1201
  767. df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"])
  768. result = df.loc["foo"]
  769. expected = df[:2]
  770. tm.assert_frame_equal(result, expected)
  771. result = df.loc["bar"]
  772. expected = df.iloc[[2, 4]]
  773. tm.assert_frame_equal(result, expected)
  774. result = df.loc["baz"]
  775. expected = df.iloc[3]
  776. tm.assert_series_equal(result, expected)
  777. def test_getitem_ix_boolean_duplicates_multiple(self):
  778. # #1201
  779. df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"])
  780. result = df.loc[["bar"]]
  781. exp = df.iloc[[2, 4]]
  782. tm.assert_frame_equal(result, exp)
  783. result = df.loc[df[1] > 0]
  784. exp = df[df[1] > 0]
  785. tm.assert_frame_equal(result, exp)
  786. result = df.loc[df[0] > 0]
  787. exp = df[df[0] > 0]
  788. tm.assert_frame_equal(result, exp)
  789. @pytest.mark.parametrize("bool_value", [True, False])
  790. def test_getitem_setitem_ix_bool_keyerror(self, bool_value):
  791. # #2199
  792. df = DataFrame({"a": [1, 2, 3]})
  793. message = f"{bool_value}: boolean label can not be used without a boolean index"
  794. with pytest.raises(KeyError, match=message):
  795. df.loc[bool_value]
  796. msg = "cannot use a single bool to index into setitem"
  797. with pytest.raises(KeyError, match=msg):
  798. df.loc[bool_value] = 0
  799. # TODO: rename? remove?
  800. def test_single_element_ix_dont_upcast(self, float_frame):
  801. float_frame["E"] = 1
  802. assert issubclass(float_frame["E"].dtype.type, (int, np.integer))
  803. result = float_frame.loc[float_frame.index[5], "E"]
  804. assert is_integer(result)
  805. # GH 11617
  806. df = DataFrame({"a": [1.23]})
  807. df["b"] = 666
  808. result = df.loc[0, "b"]
  809. assert is_integer(result)
  810. expected = Series([666], [0], name="b")
  811. result = df.loc[[0], "b"]
  812. tm.assert_series_equal(result, expected)
  813. def test_iloc_row(self):
  814. df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
  815. result = df.iloc[1]
  816. exp = df.loc[2]
  817. tm.assert_series_equal(result, exp)
  818. result = df.iloc[2]
  819. exp = df.loc[4]
  820. tm.assert_series_equal(result, exp)
  821. # slice
  822. result = df.iloc[slice(4, 8)]
  823. expected = df.loc[8:14]
  824. tm.assert_frame_equal(result, expected)
  825. # list of integers
  826. result = df.iloc[[1, 2, 4, 6]]
  827. expected = df.reindex(df.index[[1, 2, 4, 6]])
  828. tm.assert_frame_equal(result, expected)
  829. def test_iloc_row_slice_view(self, using_copy_on_write, request):
  830. df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
  831. original = df.copy()
  832. # verify slice is view
  833. # setting it makes it raise/warn
  834. subset = df.iloc[slice(4, 8)]
  835. assert np.shares_memory(df[2], subset[2])
  836. exp_col = original[2].copy()
  837. subset.loc[:, 2] = 0.0
  838. if not using_copy_on_write:
  839. subset.loc[:, 2] = 0.0
  840. exp_col._values[4:8] = 0.0
  841. # With the enforcement of GH#45333 in 2.0, this remains a view
  842. assert np.shares_memory(df[2], subset[2])
  843. tm.assert_series_equal(df[2], exp_col)
  844. def test_iloc_col(self):
  845. df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
  846. result = df.iloc[:, 1]
  847. exp = df.loc[:, 2]
  848. tm.assert_series_equal(result, exp)
  849. result = df.iloc[:, 2]
  850. exp = df.loc[:, 4]
  851. tm.assert_series_equal(result, exp)
  852. # slice
  853. result = df.iloc[:, slice(4, 8)]
  854. expected = df.loc[:, 8:14]
  855. tm.assert_frame_equal(result, expected)
  856. # list of integers
  857. result = df.iloc[:, [1, 2, 4, 6]]
  858. expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
  859. tm.assert_frame_equal(result, expected)
  860. def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write):
  861. df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
  862. original = df.copy()
  863. subset = df.iloc[:, slice(4, 8)]
  864. if not using_array_manager and not using_copy_on_write:
  865. # verify slice is view
  866. assert np.shares_memory(df[8]._values, subset[8]._values)
  867. subset.loc[:, 8] = 0.0
  868. assert (df[8] == 0).all()
  869. # with the enforcement of GH#45333 in 2.0, this remains a view
  870. assert np.shares_memory(df[8]._values, subset[8]._values)
  871. else:
  872. if using_copy_on_write:
  873. # verify slice is view
  874. assert np.shares_memory(df[8]._values, subset[8]._values)
  875. subset[8] = 0.0
  876. # subset changed
  877. assert (subset[8] == 0).all()
  878. # but df itself did not change (setitem replaces full column)
  879. tm.assert_frame_equal(df, original)
  880. def test_loc_duplicates(self):
  881. # gh-17105
  882. # insert a duplicate element to the index
  883. trange = date_range(
  884. start=Timestamp(year=2017, month=1, day=1),
  885. end=Timestamp(year=2017, month=1, day=5),
  886. )
  887. trange = trange.insert(loc=5, item=Timestamp(year=2017, month=1, day=5))
  888. df = DataFrame(0, index=trange, columns=["A", "B"])
  889. bool_idx = np.array([False, False, False, False, False, True])
  890. # assignment
  891. df.loc[trange[bool_idx], "A"] = 6
  892. expected = DataFrame(
  893. {"A": [0, 0, 0, 0, 6, 6], "B": [0, 0, 0, 0, 0, 0]}, index=trange
  894. )
  895. tm.assert_frame_equal(df, expected)
  896. # in-place
  897. df = DataFrame(0, index=trange, columns=["A", "B"])
  898. df.loc[trange[bool_idx], "A"] += 6
  899. tm.assert_frame_equal(df, expected)
  900. def test_setitem_with_unaligned_tz_aware_datetime_column(self):
  901. # GH 12981
  902. # Assignment of unaligned offset-aware datetime series.
  903. # Make sure timezone isn't lost
  904. column = Series(date_range("2015-01-01", periods=3, tz="utc"), name="dates")
  905. df = DataFrame({"dates": column})
  906. df["dates"] = column[[1, 0, 2]]
  907. tm.assert_series_equal(df["dates"], column)
  908. df = DataFrame({"dates": column})
  909. df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
  910. tm.assert_series_equal(df["dates"], column)
  911. def test_loc_setitem_datetimelike_with_inference(self):
  912. # GH 7592
  913. # assignment of timedeltas with NaT
  914. one_hour = timedelta(hours=1)
  915. df = DataFrame(index=date_range("20130101", periods=4))
  916. df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]")
  917. df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]")
  918. df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
  919. df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]")
  920. df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]")
  921. df["F"] = np.timedelta64("NaT")
  922. df.loc[df.index[:-1], "F"] = np.array([6 * one_hour] * 3, dtype="m8[ns]")
  923. df.loc[df.index[-3] :, "G"] = date_range("20130101", periods=3)
  924. df["H"] = np.datetime64("NaT")
  925. result = df.dtypes
  926. expected = Series(
  927. [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
  928. index=list("ABCDEFGH"),
  929. )
  930. tm.assert_series_equal(result, expected)
  931. def test_getitem_boolean_indexing_mixed(self):
  932. df = DataFrame(
  933. {
  934. 0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
  935. 1: {
  936. 35: np.nan,
  937. 40: 0.32632316859446198,
  938. 43: np.nan,
  939. 49: 0.32632316859446198,
  940. 50: 0.39114724480578139,
  941. },
  942. 2: {
  943. 35: np.nan,
  944. 40: np.nan,
  945. 43: 0.29012581014105987,
  946. 49: np.nan,
  947. 50: np.nan,
  948. },
  949. 3: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
  950. 4: {
  951. 35: 0.34215328467153283,
  952. 40: np.nan,
  953. 43: np.nan,
  954. 49: np.nan,
  955. 50: np.nan,
  956. },
  957. "y": {35: 0, 40: 0, 43: 0, 49: 0, 50: 1},
  958. }
  959. )
  960. # mixed int/float ok
  961. df2 = df.copy()
  962. df2[df2 > 0.3] = 1
  963. expected = df.copy()
  964. expected.loc[40, 1] = 1
  965. expected.loc[49, 1] = 1
  966. expected.loc[50, 1] = 1
  967. expected.loc[35, 4] = 1
  968. tm.assert_frame_equal(df2, expected)
  969. df["foo"] = "test"
  970. msg = "not supported between instances|unorderable types"
  971. with pytest.raises(TypeError, match=msg):
  972. df[df > 0.3] = 1
  973. def test_type_error_multiindex(self):
  974. # See gh-12218
  975. mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"])
  976. dg = DataFrame(
  977. [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i")
  978. )
  979. with pytest.raises(InvalidIndexError, match="slice"):
  980. dg[:, 0]
  981. index = Index(range(2), name="i")
  982. columns = MultiIndex(
  983. levels=[["x", "y"], [0, 1]], codes=[[0, 1], [0, 0]], names=[None, "c"]
  984. )
  985. expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index)
  986. result = dg.loc[:, (slice(None), 0)]
  987. tm.assert_frame_equal(result, expected)
  988. name = ("x", 0)
  989. index = Index(range(2), name="i")
  990. expected = Series([1, 3], index=index, name=name)
  991. result = dg["x", 0]
  992. tm.assert_series_equal(result, expected)
  993. def test_getitem_interval_index_partial_indexing(self):
  994. # GH#36490
  995. df = DataFrame(
  996. np.ones((3, 4)), columns=pd.IntervalIndex.from_breaks(np.arange(5))
  997. )
  998. expected = df.iloc[:, 0]
  999. res = df[0.5]
  1000. tm.assert_series_equal(res, expected)
  1001. res = df.loc[:, 0.5]
  1002. tm.assert_series_equal(res, expected)
  1003. def test_setitem_array_as_cell_value(self):
  1004. # GH#43422
  1005. df = DataFrame(columns=["a", "b"], dtype=object)
  1006. df.loc[0] = {"a": np.zeros((2,)), "b": np.zeros((2, 2))}
  1007. expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
  1008. tm.assert_frame_equal(df, expected)
  1009. def test_iloc_setitem_nullable_2d_values(self):
  1010. df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
  1011. orig = df.copy()
  1012. df.loc[:] = df.values[:, ::-1]
  1013. tm.assert_frame_equal(df, orig)
  1014. df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1])
  1015. tm.assert_frame_equal(df, orig)
  1016. df.iloc[:] = df.iloc[:, :]
  1017. tm.assert_frame_equal(df, orig)
  1018. def test_getitem_segfault_with_empty_like_object(self):
  1019. # GH#46848
  1020. df = DataFrame(np.empty((1, 1), dtype=object))
  1021. df[0] = np.empty_like(df[0])
  1022. # this produces the segfault
  1023. df[[0]]
  1024. @pytest.mark.parametrize(
  1025. "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")]
  1026. )
  1027. def test_setting_mismatched_na_into_nullable_fails(
  1028. self, null, any_numeric_ea_dtype
  1029. ):
  1030. # GH#44514 don't cast mismatched nulls to pd.NA
  1031. df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype)
  1032. ser = df["A"]
  1033. arr = ser._values
  1034. msg = "|".join(
  1035. [
  1036. r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype",
  1037. r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype",
  1038. "'values' contains non-numeric NA",
  1039. r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}",
  1040. ]
  1041. )
  1042. with pytest.raises(TypeError, match=msg):
  1043. arr[0] = null
  1044. with pytest.raises(TypeError, match=msg):
  1045. arr[:2] = [null, null]
  1046. with pytest.raises(TypeError, match=msg):
  1047. ser[0] = null
  1048. with pytest.raises(TypeError, match=msg):
  1049. ser[:2] = [null, null]
  1050. with pytest.raises(TypeError, match=msg):
  1051. ser.iloc[0] = null
  1052. with pytest.raises(TypeError, match=msg):
  1053. ser.iloc[:2] = [null, null]
  1054. with pytest.raises(TypeError, match=msg):
  1055. df.iloc[0, 0] = null
  1056. with pytest.raises(TypeError, match=msg):
  1057. df.iloc[:2, 0] = [null, null]
  1058. # Multi-Block
  1059. df2 = df.copy()
  1060. df2["B"] = ser.copy()
  1061. with pytest.raises(TypeError, match=msg):
  1062. df2.iloc[0, 0] = null
  1063. with pytest.raises(TypeError, match=msg):
  1064. df2.iloc[:2, 0] = [null, null]
  1065. def test_loc_expand_empty_frame_keep_index_name(self):
  1066. # GH#45621
  1067. df = DataFrame(columns=["b"], index=Index([], name="a"))
  1068. df.loc[0] = 1
  1069. expected = DataFrame({"b": [1]}, index=Index([0], name="a"))
  1070. tm.assert_frame_equal(df, expected)
  1071. def test_loc_expand_empty_frame_keep_midx_names(self):
  1072. # GH#46317
  1073. df = DataFrame(
  1074. columns=["d"], index=MultiIndex.from_tuples([], names=["a", "b", "c"])
  1075. )
  1076. df.loc[(1, 2, 3)] = "foo"
  1077. expected = DataFrame(
  1078. {"d": ["foo"]},
  1079. index=MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"]),
  1080. )
  1081. tm.assert_frame_equal(df, expected)
  1082. @pytest.mark.parametrize("val", ["x", 1])
  1083. @pytest.mark.parametrize("idxr", ["a", ["a"]])
  1084. def test_loc_setitem_rhs_frame(self, idxr, val):
  1085. # GH#47578
  1086. df = DataFrame({"a": [1, 2]})
  1087. with tm.assert_produces_warning(None):
  1088. df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2])
  1089. expected = DataFrame({"a": [np.nan, val]})
  1090. tm.assert_frame_equal(df, expected)
  1091. @td.skip_array_manager_invalid_test
  1092. def test_iloc_setitem_enlarge_no_warning(self):
  1093. # GH#47381
  1094. df = DataFrame(columns=["a", "b"])
  1095. expected = df.copy()
  1096. view = df[:]
  1097. with tm.assert_produces_warning(None):
  1098. df.iloc[:, 0] = np.array([1, 2], dtype=np.float64)
  1099. tm.assert_frame_equal(view, expected)
  1100. def test_loc_internals_not_updated_correctly(self):
  1101. # GH#47867 all steps are necessary to reproduce the initial bug
  1102. df = DataFrame(
  1103. {"bool_col": True, "a": 1, "b": 2.5},
  1104. index=MultiIndex.from_arrays([[1, 2], [1, 2]], names=["idx1", "idx2"]),
  1105. )
  1106. idx = [(1, 1)]
  1107. df["c"] = 3
  1108. df.loc[idx, "c"] = 0
  1109. df.loc[idx, "c"]
  1110. df.loc[idx, ["a", "b"]]
  1111. df.loc[idx, "c"] = 15
  1112. result = df.loc[idx, "c"]
  1113. expected = df = Series(
  1114. 15,
  1115. index=MultiIndex.from_arrays([[1], [1]], names=["idx1", "idx2"]),
  1116. name="c",
  1117. )
  1118. tm.assert_series_equal(result, expected)
  1119. @pytest.mark.parametrize("val", [None, [None], pd.NA, [pd.NA]])
  1120. def test_iloc_setitem_string_list_na(self, val):
  1121. # GH#45469
  1122. df = DataFrame({"a": ["a", "b", "c"]}, dtype="string")
  1123. df.iloc[[0], :] = val
  1124. expected = DataFrame({"a": [pd.NA, "b", "c"]}, dtype="string")
  1125. tm.assert_frame_equal(df, expected)
  1126. @pytest.mark.parametrize("val", [None, pd.NA])
  1127. def test_iloc_setitem_string_na(self, val):
  1128. # GH#45469
  1129. df = DataFrame({"a": ["a", "b", "c"]}, dtype="string")
  1130. df.iloc[0, :] = val
  1131. expected = DataFrame({"a": [pd.NA, "b", "c"]}, dtype="string")
  1132. tm.assert_frame_equal(df, expected)
  1133. @pytest.mark.parametrize("func", [list, Series, np.array])
  1134. def test_iloc_setitem_ea_null_slice_length_one_list(self, func):
  1135. # GH#48016
  1136. df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  1137. df.iloc[:, func([0])] = 5
  1138. expected = DataFrame({"a": [5, 5, 5]}, dtype="Int64")
  1139. tm.assert_frame_equal(df, expected)
  1140. def test_loc_named_tuple_for_midx(self):
  1141. # GH#48124
  1142. df = DataFrame(
  1143. index=MultiIndex.from_product(
  1144. [["A", "B"], ["a", "b", "c"]], names=["first", "second"]
  1145. )
  1146. )
  1147. indexer_tuple = namedtuple("Indexer", df.index.names)
  1148. idxr = indexer_tuple(first="A", second=["a", "b"])
  1149. result = df.loc[idxr, :]
  1150. expected = DataFrame(
  1151. index=MultiIndex.from_tuples(
  1152. [("A", "a"), ("A", "b")], names=["first", "second"]
  1153. )
  1154. )
  1155. tm.assert_frame_equal(result, expected)
  1156. @pytest.mark.parametrize("indexer", [["a"], "a"])
  1157. @pytest.mark.parametrize("col", [{}, {"b": 1}])
  1158. def test_set_2d_casting_date_to_int(self, col, indexer):
  1159. # GH#49159
  1160. df = DataFrame(
  1161. {"a": [Timestamp("2022-12-29"), Timestamp("2022-12-30")], **col},
  1162. )
  1163. df.loc[[1], indexer] = df["a"] + pd.Timedelta(days=1)
  1164. expected = DataFrame(
  1165. {"a": [Timestamp("2022-12-29"), Timestamp("2022-12-31")], **col},
  1166. )
  1167. tm.assert_frame_equal(df, expected)
  1168. @pytest.mark.parametrize("col", [{}, {"name": "a"}])
  1169. def test_loc_setitem_reordering_with_all_true_indexer(self, col):
  1170. # GH#48701
  1171. n = 17
  1172. df = DataFrame({**col, "x": range(n), "y": range(n)})
  1173. expected = df.copy()
  1174. df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
  1175. tm.assert_frame_equal(df, expected)
  1176. def test_loc_rhs_empty_warning(self):
  1177. # GH48480
  1178. df = DataFrame(columns=["a", "b"])
  1179. expected = df.copy()
  1180. rhs = DataFrame(columns=["a"])
  1181. with tm.assert_produces_warning(None):
  1182. df.loc[:, "a"] = rhs
  1183. tm.assert_frame_equal(df, expected)
  1184. def test_iloc_ea_series_indexer(self):
  1185. # GH#49521
  1186. df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
  1187. indexer = Series([0, 1], dtype="Int64")
  1188. row_indexer = Series([1], dtype="Int64")
  1189. result = df.iloc[row_indexer, indexer]
  1190. expected = DataFrame([[5, 6]], index=[1])
  1191. tm.assert_frame_equal(result, expected)
  1192. result = df.iloc[row_indexer.values, indexer.values]
  1193. tm.assert_frame_equal(result, expected)
  1194. def test_iloc_ea_series_indexer_with_na(self):
  1195. # GH#49521
  1196. df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
  1197. indexer = Series([0, pd.NA], dtype="Int64")
  1198. msg = "cannot convert"
  1199. with pytest.raises(ValueError, match=msg):
  1200. df.iloc[:, indexer]
  1201. with pytest.raises(ValueError, match=msg):
  1202. df.iloc[:, indexer.values]
  1203. @pytest.mark.parametrize("indexer", [True, (True,)])
  1204. @pytest.mark.parametrize("dtype", [bool, "boolean"])
  1205. def test_loc_bool_multiindex(self, dtype, indexer):
  1206. # GH#47687
  1207. midx = MultiIndex.from_arrays(
  1208. [
  1209. Series([True, True, False, False], dtype=dtype),
  1210. Series([True, False, True, False], dtype=dtype),
  1211. ],
  1212. names=["a", "b"],
  1213. )
  1214. df = DataFrame({"c": [1, 2, 3, 4]}, index=midx)
  1215. with tm.maybe_produces_warning(PerformanceWarning, isinstance(indexer, tuple)):
  1216. result = df.loc[indexer]
  1217. expected = DataFrame(
  1218. {"c": [1, 2]}, index=Index([True, False], name="b", dtype=dtype)
  1219. )
  1220. tm.assert_frame_equal(result, expected)
  1221. @pytest.mark.parametrize("utc", [False, True])
  1222. @pytest.mark.parametrize("indexer", ["date", ["date"]])
  1223. def test_loc_datetime_assignment_dtype_does_not_change(self, utc, indexer):
  1224. # GH#49837
  1225. df = DataFrame(
  1226. {
  1227. "date": to_datetime(
  1228. [datetime(2022, 1, 20), datetime(2022, 1, 22)], utc=utc
  1229. ),
  1230. "update": [True, False],
  1231. }
  1232. )
  1233. expected = df.copy(deep=True)
  1234. update_df = df[df["update"]]
  1235. df.loc[df["update"], indexer] = update_df["date"]
  1236. tm.assert_frame_equal(df, expected)
  1237. @pytest.mark.parametrize("indexer, idx", [(tm.loc, 1), (tm.iloc, 2)])
  1238. def test_setitem_value_coercing_dtypes(self, indexer, idx):
  1239. # GH#50467
  1240. df = DataFrame([["1", np.nan], ["2", np.nan], ["3", np.nan]], dtype=object)
  1241. rhs = DataFrame([[1, np.nan], [2, np.nan]])
  1242. indexer(df)[:idx, :] = rhs
  1243. expected = DataFrame([[1, np.nan], [2, np.nan], ["3", np.nan]], dtype=object)
  1244. tm.assert_frame_equal(df, expected)
  1245. class TestDataFrameIndexingUInt64:
  1246. def test_setitem(self, uint64_frame):
  1247. df = uint64_frame
  1248. idx = df["A"].rename("foo")
  1249. # setitem
  1250. assert "C" not in df.columns
  1251. df["C"] = idx
  1252. tm.assert_series_equal(df["C"], Series(idx, name="C"))
  1253. assert "D" not in df.columns
  1254. df["D"] = "foo"
  1255. df["D"] = idx
  1256. tm.assert_series_equal(df["D"], Series(idx, name="D"))
  1257. del df["D"]
  1258. # With NaN: because uint64 has no NaN element,
  1259. # the column should be cast to object.
  1260. df2 = df.copy()
  1261. df2.iloc[1, 1] = pd.NaT
  1262. df2.iloc[1, 2] = pd.NaT
  1263. result = df2["B"]
  1264. tm.assert_series_equal(notna(result), Series([True, False, True], name="B"))
  1265. tm.assert_series_equal(
  1266. df2.dtypes,
  1267. Series(
  1268. [np.dtype("uint64"), np.dtype("O"), np.dtype("O")],
  1269. index=["A", "B", "C"],
  1270. ),
  1271. )
  1272. def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
  1273. # GH#31649, check the indexing methods all the way down the stack
  1274. df = DataFrame(
  1275. {
  1276. "A": [1, 2],
  1277. "B": date_range("2000", periods=2),
  1278. "C": pd.timedelta_range("1 Day", periods=2),
  1279. }
  1280. )
  1281. ser = df.loc[0]
  1282. assert isinstance(ser.values[1], Timestamp)
  1283. assert isinstance(ser.values[2], pd.Timedelta)
  1284. ser = df.iloc[0]
  1285. assert isinstance(ser.values[1], Timestamp)
  1286. assert isinstance(ser.values[2], pd.Timedelta)
  1287. ser = df.xs(0, axis=0)
  1288. assert isinstance(ser.values[1], Timestamp)
  1289. assert isinstance(ser.values[2], pd.Timedelta)
  1290. if using_array_manager:
  1291. # remainder of the test checking BlockManager internals
  1292. return
  1293. mgr = df._mgr
  1294. mgr._rebuild_blknos_and_blklocs()
  1295. arr = mgr.fast_xs(0).array
  1296. assert isinstance(arr[1], Timestamp)
  1297. assert isinstance(arr[2], pd.Timedelta)
  1298. blk = mgr.blocks[mgr.blknos[1]]
  1299. assert blk.dtype == "M8[ns]" # we got the right block
  1300. val = blk.iget((0, 0))
  1301. assert isinstance(val, Timestamp)
  1302. blk = mgr.blocks[mgr.blknos[2]]
  1303. assert blk.dtype == "m8[ns]" # we got the right block
  1304. val = blk.iget((0, 0))
  1305. assert isinstance(val, pd.Timedelta)
  1306. msg1 = r"Cannot setitem on a Categorical with a new category( \(.*\))?, set the"
  1307. msg2 = "Cannot set a Categorical with another, without identical categories"
  1308. class TestLocILocDataFrameCategorical:
  1309. @pytest.fixture
  1310. def orig(self):
  1311. cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
  1312. idx = Index(["h", "i", "j", "k", "l", "m", "n"])
  1313. values = [1, 1, 1, 1, 1, 1, 1]
  1314. orig = DataFrame({"cats": cats, "values": values}, index=idx)
  1315. return orig
  1316. @pytest.fixture
  1317. def exp_single_row(self):
  1318. # The expected values if we change a single row
  1319. cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
  1320. idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
  1321. values1 = [1, 1, 2, 1, 1, 1, 1]
  1322. exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1)
  1323. return exp_single_row
  1324. @pytest.fixture
  1325. def exp_multi_row(self):
  1326. # assign multiple rows (mixed values) (-> array) -> exp_multi_row
  1327. # changed multiple rows
  1328. cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
  1329. idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
  1330. values2 = [1, 1, 2, 2, 1, 1, 1]
  1331. exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2)
  1332. return exp_multi_row
  1333. @pytest.fixture
  1334. def exp_parts_cats_col(self):
  1335. # changed part of the cats column
  1336. cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
  1337. idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
  1338. values3 = [1, 1, 1, 1, 1, 1, 1]
  1339. exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3)
  1340. return exp_parts_cats_col
  1341. @pytest.fixture
  1342. def exp_single_cats_value(self):
  1343. # changed single value in cats col
  1344. cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
  1345. idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
  1346. values4 = [1, 1, 1, 1, 1, 1, 1]
  1347. exp_single_cats_value = DataFrame(
  1348. {"cats": cats4, "values": values4}, index=idx4
  1349. )
  1350. return exp_single_cats_value
  1351. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  1352. def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer):
  1353. # - assign multiple rows (mixed values) -> exp_multi_row
  1354. df = orig.copy()
  1355. key = slice(2, 4)
  1356. if indexer is tm.loc:
  1357. key = slice("j", "k")
  1358. indexer(df)[key, :] = [["b", 2], ["b", 2]]
  1359. tm.assert_frame_equal(df, exp_multi_row)
  1360. df = orig.copy()
  1361. with pytest.raises(TypeError, match=msg1):
  1362. indexer(df)[key, :] = [["c", 2], ["c", 2]]
  1363. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat])
  1364. def test_loc_iloc_at_iat_setitem_single_value_in_categories(
  1365. self, orig, exp_single_cats_value, indexer
  1366. ):
  1367. # - assign a single value -> exp_single_cats_value
  1368. df = orig.copy()
  1369. key = (2, 0)
  1370. if indexer in [tm.loc, tm.at]:
  1371. key = (df.index[2], df.columns[0])
  1372. # "b" is among the categories for df["cat"}]
  1373. indexer(df)[key] = "b"
  1374. tm.assert_frame_equal(df, exp_single_cats_value)
  1375. # "c" is not among the categories for df["cat"]
  1376. with pytest.raises(TypeError, match=msg1):
  1377. indexer(df)[key] = "c"
  1378. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  1379. def test_loc_iloc_setitem_mask_single_value_in_categories(
  1380. self, orig, exp_single_cats_value, indexer
  1381. ):
  1382. # mask with single True
  1383. df = orig.copy()
  1384. mask = df.index == "j"
  1385. key = 0
  1386. if indexer is tm.loc:
  1387. key = df.columns[key]
  1388. indexer(df)[mask, key] = "b"
  1389. tm.assert_frame_equal(df, exp_single_cats_value)
  1390. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  1391. def test_loc_iloc_setitem_full_row_non_categorical_rhs(
  1392. self, orig, exp_single_row, indexer
  1393. ):
  1394. # - assign a complete row (mixed values) -> exp_single_row
  1395. df = orig.copy()
  1396. key = 2
  1397. if indexer is tm.loc:
  1398. key = df.index[2]
  1399. # not categorical dtype, but "b" _is_ among the categories for df["cat"]
  1400. indexer(df)[key, :] = ["b", 2]
  1401. tm.assert_frame_equal(df, exp_single_row)
  1402. # "c" is not among the categories for df["cat"]
  1403. with pytest.raises(TypeError, match=msg1):
  1404. indexer(df)[key, :] = ["c", 2]
  1405. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  1406. def test_loc_iloc_setitem_partial_col_categorical_rhs(
  1407. self, orig, exp_parts_cats_col, indexer
  1408. ):
  1409. # assign a part of a column with dtype == categorical ->
  1410. # exp_parts_cats_col
  1411. df = orig.copy()
  1412. key = (slice(2, 4), 0)
  1413. if indexer is tm.loc:
  1414. key = (slice("j", "k"), df.columns[0])
  1415. # same categories as we currently have in df["cats"]
  1416. compat = Categorical(["b", "b"], categories=["a", "b"])
  1417. indexer(df)[key] = compat
  1418. tm.assert_frame_equal(df, exp_parts_cats_col)
  1419. # categories do not match df["cat"]'s, but "b" is among them
  1420. semi_compat = Categorical(list("bb"), categories=list("abc"))
  1421. with pytest.raises(TypeError, match=msg2):
  1422. # different categories but holdable values
  1423. # -> not sure if this should fail or pass
  1424. indexer(df)[key] = semi_compat
  1425. # categories do not match df["cat"]'s, and "c" is not among them
  1426. incompat = Categorical(list("cc"), categories=list("abc"))
  1427. with pytest.raises(TypeError, match=msg2):
  1428. # different values
  1429. indexer(df)[key] = incompat
  1430. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  1431. def test_loc_iloc_setitem_non_categorical_rhs(
  1432. self, orig, exp_parts_cats_col, indexer
  1433. ):
  1434. # assign a part of a column with dtype != categorical -> exp_parts_cats_col
  1435. df = orig.copy()
  1436. key = (slice(2, 4), 0)
  1437. if indexer is tm.loc:
  1438. key = (slice("j", "k"), df.columns[0])
  1439. # "b" is among the categories for df["cat"]
  1440. indexer(df)[key] = ["b", "b"]
  1441. tm.assert_frame_equal(df, exp_parts_cats_col)
  1442. # "c" not part of the categories
  1443. with pytest.raises(TypeError, match=msg1):
  1444. indexer(df)[key] = ["c", "c"]
  1445. @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc, tm.iloc])
  1446. def test_getitem_preserve_object_index_with_dates(self, indexer):
  1447. # https://github.com/pandas-dev/pandas/pull/42950 - when selecting a column
  1448. # from dataframe, don't try to infer object dtype index on Series construction
  1449. idx = date_range("2012", periods=3).astype(object)
  1450. df = DataFrame({0: [1, 2, 3]}, index=idx)
  1451. assert df.index.dtype == object
  1452. if indexer is tm.getitem:
  1453. ser = indexer(df)[0]
  1454. else:
  1455. ser = indexer(df)[:, 0]
  1456. assert ser.index.dtype == object
  1457. def test_loc_on_multiindex_one_level(self):
  1458. # GH#45779
  1459. df = DataFrame(
  1460. data=[[0], [1]],
  1461. index=MultiIndex.from_tuples([("a",), ("b",)], names=["first"]),
  1462. )
  1463. expected = DataFrame(
  1464. data=[[0]], index=MultiIndex.from_tuples([("a",)], names=["first"])
  1465. )
  1466. result = df.loc["a"]
  1467. tm.assert_frame_equal(result, expected)
  1468. class TestDeprecatedIndexers:
  1469. @pytest.mark.parametrize(
  1470. "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})]
  1471. )
  1472. def test_getitem_dict_and_set_deprecated(self, key):
  1473. # GH#42825 enforced in 2.0
  1474. df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
  1475. with pytest.raises(TypeError, match="as an indexer is not supported"):
  1476. df.loc[key]
  1477. @pytest.mark.parametrize(
  1478. "key",
  1479. [
  1480. {1},
  1481. {1: 1},
  1482. (({1}, 2), "a"),
  1483. (({1: 1}, 2), "a"),
  1484. ((1, 2), {"a"}),
  1485. ((1, 2), {"a": "a"}),
  1486. ],
  1487. )
  1488. def test_getitem_dict_and_set_deprecated_multiindex(self, key):
  1489. # GH#42825 enforced in 2.0
  1490. df = DataFrame(
  1491. [[1, 2], [3, 4]],
  1492. columns=["a", "b"],
  1493. index=MultiIndex.from_tuples([(1, 2), (3, 4)]),
  1494. )
  1495. with pytest.raises(TypeError, match="as an indexer is not supported"):
  1496. df.loc[key]
  1497. @pytest.mark.parametrize(
  1498. "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})]
  1499. )
  1500. def test_setitem_dict_and_set_disallowed(self, key):
  1501. # GH#42825 enforced in 2.0
  1502. df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
  1503. with pytest.raises(TypeError, match="as an indexer is not supported"):
  1504. df.loc[key] = 1
  1505. @pytest.mark.parametrize(
  1506. "key",
  1507. [
  1508. {1},
  1509. {1: 1},
  1510. (({1}, 2), "a"),
  1511. (({1: 1}, 2), "a"),
  1512. ((1, 2), {"a"}),
  1513. ((1, 2), {"a": "a"}),
  1514. ],
  1515. )
  1516. def test_setitem_dict_and_set_disallowed_multiindex(self, key):
  1517. # GH#42825 enforced in 2.0
  1518. df = DataFrame(
  1519. [[1, 2], [3, 4]],
  1520. columns=["a", "b"],
  1521. index=MultiIndex.from_tuples([(1, 2), (3, 4)]),
  1522. )
  1523. with pytest.raises(TypeError, match="as an indexer is not supported"):
  1524. df.loc[key] = 1