test_internals.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437
  1. from datetime import (
  2. date,
  3. datetime,
  4. )
  5. import itertools
  6. import re
  7. import numpy as np
  8. import pytest
  9. from pandas._libs.internals import BlockPlacement
  10. from pandas.compat import IS64
  11. import pandas.util._test_decorators as td
  12. from pandas.core.dtypes.common import is_scalar
  13. import pandas as pd
  14. from pandas import (
  15. Categorical,
  16. DataFrame,
  17. DatetimeIndex,
  18. Index,
  19. IntervalIndex,
  20. Series,
  21. Timedelta,
  22. Timestamp,
  23. period_range,
  24. )
  25. import pandas._testing as tm
  26. import pandas.core.algorithms as algos
  27. from pandas.core.arrays import (
  28. DatetimeArray,
  29. SparseArray,
  30. TimedeltaArray,
  31. )
  32. from pandas.core.internals import (
  33. BlockManager,
  34. SingleBlockManager,
  35. make_block,
  36. )
  37. from pandas.core.internals.blocks import (
  38. ensure_block_shape,
  39. new_block,
  40. )
  41. # this file contains BlockManager specific tests
  42. # TODO(ArrayManager) factor out interleave_dtype tests
  43. pytestmark = td.skip_array_manager_invalid_test
  44. @pytest.fixture(params=[new_block, make_block])
  45. def block_maker(request):
  46. """
  47. Fixture to test both the internal new_block and pseudo-public make_block.
  48. """
  49. return request.param
  50. @pytest.fixture
  51. def mgr():
  52. return create_mgr(
  53. "a: f8; b: object; c: f8; d: object; e: f8;"
  54. "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;"
  55. "k: M8[ns, US/Eastern]; l: M8[ns, CET];"
  56. )
  57. def assert_block_equal(left, right):
  58. tm.assert_numpy_array_equal(left.values, right.values)
  59. assert left.dtype == right.dtype
  60. assert isinstance(left.mgr_locs, BlockPlacement)
  61. assert isinstance(right.mgr_locs, BlockPlacement)
  62. tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
  63. def get_numeric_mat(shape):
  64. arr = np.arange(shape[0])
  65. return np.lib.stride_tricks.as_strided(
  66. x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1)
  67. ).copy()
  68. N = 10
  69. def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block):
  70. """
  71. Supported typestr:
  72. * float, f8, f4, f2
  73. * int, i8, i4, i2, i1
  74. * uint, u8, u4, u2, u1
  75. * complex, c16, c8
  76. * bool
  77. * object, string, O
  78. * datetime, dt, M8[ns], M8[ns, tz]
  79. * timedelta, td, m8[ns]
  80. * sparse (SparseArray with fill_value=0.0)
  81. * sparse_na (SparseArray with fill_value=np.nan)
  82. * category, category2
  83. """
  84. placement = BlockPlacement(placement)
  85. num_items = len(placement)
  86. if item_shape is None:
  87. item_shape = (N,)
  88. shape = (num_items,) + item_shape
  89. mat = get_numeric_mat(shape)
  90. if typestr in (
  91. "float",
  92. "f8",
  93. "f4",
  94. "f2",
  95. "int",
  96. "i8",
  97. "i4",
  98. "i2",
  99. "i1",
  100. "uint",
  101. "u8",
  102. "u4",
  103. "u2",
  104. "u1",
  105. ):
  106. values = mat.astype(typestr) + num_offset
  107. elif typestr in ("complex", "c16", "c8"):
  108. values = 1.0j * (mat.astype(typestr) + num_offset)
  109. elif typestr in ("object", "string", "O"):
  110. values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape)
  111. elif typestr in ("b", "bool"):
  112. values = np.ones(shape, dtype=np.bool_)
  113. elif typestr in ("datetime", "dt", "M8[ns]"):
  114. values = (mat * 1e9).astype("M8[ns]")
  115. elif typestr.startswith("M8[ns"):
  116. # datetime with tz
  117. m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
  118. assert m is not None, f"incompatible typestr -> {typestr}"
  119. tz = m.groups()[0]
  120. assert num_items == 1, "must have only 1 num items for a tz-aware"
  121. values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data
  122. values = ensure_block_shape(values, ndim=len(shape))
  123. elif typestr in ("timedelta", "td", "m8[ns]"):
  124. values = (mat * 1).astype("m8[ns]")
  125. elif typestr in ("category",):
  126. values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  127. elif typestr in ("category2",):
  128. values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
  129. elif typestr in ("sparse", "sparse_na"):
  130. if shape[-1] != 10:
  131. # We also are implicitly assuming this in the category cases above
  132. raise NotImplementedError
  133. assert all(s == 1 for s in shape[:-1])
  134. if typestr.endswith("_na"):
  135. fill_value = np.nan
  136. else:
  137. fill_value = 0.0
  138. values = SparseArray(
  139. [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
  140. fill_value=fill_value,
  141. )
  142. arr = values.sp_values.view()
  143. arr += num_offset - 1
  144. else:
  145. raise ValueError(f'Unsupported typestr: "{typestr}"')
  146. return maker(values, placement=placement, ndim=len(shape))
  147. def create_single_mgr(typestr, num_rows=None):
  148. if num_rows is None:
  149. num_rows = N
  150. return SingleBlockManager(
  151. create_block(typestr, placement=slice(0, num_rows), item_shape=()),
  152. Index(np.arange(num_rows)),
  153. )
  154. def create_mgr(descr, item_shape=None):
  155. """
  156. Construct BlockManager from string description.
  157. String description syntax looks similar to np.matrix initializer. It looks
  158. like this::
  159. a,b,c: f8; d,e,f: i8
  160. Rules are rather simple:
  161. * see list of supported datatypes in `create_block` method
  162. * components are semicolon-separated
  163. * each component is `NAME,NAME,NAME: DTYPE_ID`
  164. * whitespace around colons & semicolons are removed
  165. * components with same DTYPE_ID are combined into single block
  166. * to force multiple blocks with same dtype, use '-SUFFIX'::
  167. 'a:f8-1; b:f8-2; c:f8-foobar'
  168. """
  169. if item_shape is None:
  170. item_shape = (N,)
  171. offset = 0
  172. mgr_items = []
  173. block_placements = {}
  174. for d in descr.split(";"):
  175. d = d.strip()
  176. if not len(d):
  177. continue
  178. names, blockstr = d.partition(":")[::2]
  179. blockstr = blockstr.strip()
  180. names = names.strip().split(",")
  181. mgr_items.extend(names)
  182. placement = list(np.arange(len(names)) + offset)
  183. try:
  184. block_placements[blockstr].extend(placement)
  185. except KeyError:
  186. block_placements[blockstr] = placement
  187. offset += len(names)
  188. mgr_items = Index(mgr_items)
  189. blocks = []
  190. num_offset = 0
  191. for blockstr, placement in block_placements.items():
  192. typestr = blockstr.split("-")[0]
  193. blocks.append(
  194. create_block(
  195. typestr, placement, item_shape=item_shape, num_offset=num_offset
  196. )
  197. )
  198. num_offset += len(placement)
  199. sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0])
  200. return BlockManager(
  201. tuple(sblocks),
  202. [mgr_items] + [Index(np.arange(n)) for n in item_shape],
  203. )
  204. @pytest.fixture
  205. def fblock():
  206. return create_block("float", [0, 2, 4])
  207. class TestBlock:
  208. def test_constructor(self):
  209. int32block = create_block("i4", [0])
  210. assert int32block.dtype == np.int32
  211. @pytest.mark.parametrize(
  212. "typ, data",
  213. [
  214. ["float", [0, 2, 4]],
  215. ["complex", [7]],
  216. ["object", [1, 3]],
  217. ["bool", [5]],
  218. ],
  219. )
  220. def test_pickle(self, typ, data):
  221. blk = create_block(typ, data)
  222. assert_block_equal(tm.round_trip_pickle(blk), blk)
  223. def test_mgr_locs(self, fblock):
  224. assert isinstance(fblock.mgr_locs, BlockPlacement)
  225. tm.assert_numpy_array_equal(
  226. fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp)
  227. )
  228. def test_attrs(self, fblock):
  229. assert fblock.shape == fblock.values.shape
  230. assert fblock.dtype == fblock.values.dtype
  231. assert len(fblock) == len(fblock.values)
  232. def test_copy(self, fblock):
  233. cop = fblock.copy()
  234. assert cop is not fblock
  235. assert_block_equal(fblock, cop)
  236. def test_delete(self, fblock):
  237. newb = fblock.copy()
  238. locs = newb.mgr_locs
  239. nb = newb.delete(0)[0]
  240. assert newb.mgr_locs is locs
  241. assert nb is not newb
  242. tm.assert_numpy_array_equal(
  243. nb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp)
  244. )
  245. assert not (newb.values[0] == 1).all()
  246. assert (nb.values[0] == 1).all()
  247. newb = fblock.copy()
  248. locs = newb.mgr_locs
  249. nb = newb.delete(1)
  250. assert len(nb) == 2
  251. assert newb.mgr_locs is locs
  252. tm.assert_numpy_array_equal(
  253. nb[0].mgr_locs.as_array, np.array([0], dtype=np.intp)
  254. )
  255. tm.assert_numpy_array_equal(
  256. nb[1].mgr_locs.as_array, np.array([4], dtype=np.intp)
  257. )
  258. assert not (newb.values[1] == 2).all()
  259. assert (nb[1].values[0] == 2).all()
  260. newb = fblock.copy()
  261. nb = newb.delete(2)
  262. assert len(nb) == 1
  263. tm.assert_numpy_array_equal(
  264. nb[0].mgr_locs.as_array, np.array([0, 2], dtype=np.intp)
  265. )
  266. assert (nb[0].values[1] == 1).all()
  267. newb = fblock.copy()
  268. with pytest.raises(IndexError, match=None):
  269. newb.delete(3)
  270. def test_delete_datetimelike(self):
  271. # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray
  272. arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]")
  273. df = DataFrame(arr)
  274. blk = df._mgr.blocks[0]
  275. assert isinstance(blk.values, TimedeltaArray)
  276. nb = blk.delete(1)
  277. assert len(nb) == 2
  278. assert isinstance(nb[0].values, TimedeltaArray)
  279. assert isinstance(nb[1].values, TimedeltaArray)
  280. df = DataFrame(arr.view("M8[ns]"))
  281. blk = df._mgr.blocks[0]
  282. assert isinstance(blk.values, DatetimeArray)
  283. nb = blk.delete([1, 3])
  284. assert len(nb) == 2
  285. assert isinstance(nb[0].values, DatetimeArray)
  286. assert isinstance(nb[1].values, DatetimeArray)
  287. def test_split(self):
  288. # GH#37799
  289. values = np.random.randn(3, 4)
  290. blk = new_block(values, placement=[3, 1, 6], ndim=2)
  291. result = blk._split()
  292. # check that we get views, not copies
  293. values[:] = -9999
  294. assert (blk.values == -9999).all()
  295. assert len(result) == 3
  296. expected = [
  297. new_block(values[[0]], placement=[3], ndim=2),
  298. new_block(values[[1]], placement=[1], ndim=2),
  299. new_block(values[[2]], placement=[6], ndim=2),
  300. ]
  301. for res, exp in zip(result, expected):
  302. assert_block_equal(res, exp)
  303. class TestBlockManager:
  304. def test_attrs(self):
  305. mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2")
  306. assert mgr.nblocks == 2
  307. assert len(mgr) == 6
  308. def test_duplicate_ref_loc_failure(self):
  309. tmp_mgr = create_mgr("a:bool; a: f8")
  310. axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
  311. blocks[0].mgr_locs = BlockPlacement(np.array([0]))
  312. blocks[1].mgr_locs = BlockPlacement(np.array([0]))
  313. # test trying to create block manager with overlapping ref locs
  314. msg = "Gaps in blk ref_locs"
  315. with pytest.raises(AssertionError, match=msg):
  316. mgr = BlockManager(blocks, axes)
  317. mgr._rebuild_blknos_and_blklocs()
  318. blocks[0].mgr_locs = BlockPlacement(np.array([0]))
  319. blocks[1].mgr_locs = BlockPlacement(np.array([1]))
  320. mgr = BlockManager(blocks, axes)
  321. mgr.iget(1)
  322. def test_pickle(self, mgr):
  323. mgr2 = tm.round_trip_pickle(mgr)
  324. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  325. # GH2431
  326. assert hasattr(mgr2, "_is_consolidated")
  327. assert hasattr(mgr2, "_known_consolidated")
  328. # reset to False on load
  329. assert not mgr2._is_consolidated
  330. assert not mgr2._known_consolidated
  331. @pytest.mark.parametrize("mgr_string", ["a,a,a:f8", "a: f8; a: i8"])
  332. def test_non_unique_pickle(self, mgr_string):
  333. mgr = create_mgr(mgr_string)
  334. mgr2 = tm.round_trip_pickle(mgr)
  335. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  336. def test_categorical_block_pickle(self):
  337. mgr = create_mgr("a: category")
  338. mgr2 = tm.round_trip_pickle(mgr)
  339. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  340. smgr = create_single_mgr("category")
  341. smgr2 = tm.round_trip_pickle(smgr)
  342. tm.assert_series_equal(Series(smgr), Series(smgr2))
  343. def test_iget(self):
  344. cols = Index(list("abc"))
  345. values = np.random.rand(3, 3)
  346. block = new_block(
  347. values=values.copy(),
  348. placement=np.arange(3, dtype=np.intp),
  349. ndim=values.ndim,
  350. )
  351. mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))])
  352. tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
  353. tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
  354. tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])
  355. def test_set(self):
  356. mgr = create_mgr("a,b,c: int", item_shape=(3,))
  357. mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3))
  358. mgr.iset(1, np.array(["bar"] * 3))
  359. tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
  360. tm.assert_numpy_array_equal(
  361. mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
  362. )
  363. tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
  364. tm.assert_numpy_array_equal(
  365. mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
  366. )
  367. def test_set_change_dtype(self, mgr):
  368. mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool))
  369. mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N))
  370. idx = mgr.items.get_loc("baz")
  371. assert mgr.iget(idx).dtype == np.object_
  372. mgr2 = mgr.consolidate()
  373. mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N))
  374. idx = mgr2.items.get_loc("baz")
  375. assert mgr2.iget(idx).dtype == np.object_
  376. mgr2.insert(len(mgr2.items), "quux", np.random.randn(N).astype(int))
  377. idx = mgr2.items.get_loc("quux")
  378. assert mgr2.iget(idx).dtype == np.int_
  379. mgr2.iset(mgr2.items.get_loc("quux"), np.random.randn(N))
  380. assert mgr2.iget(idx).dtype == np.float_
  381. def test_copy(self, mgr):
  382. cp = mgr.copy(deep=False)
  383. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  384. # view assertion
  385. tm.assert_equal(cp_blk.values, blk.values)
  386. if isinstance(blk.values, np.ndarray):
  387. assert cp_blk.values.base is blk.values.base
  388. else:
  389. # DatetimeTZBlock has DatetimeIndex values
  390. assert cp_blk.values._ndarray.base is blk.values._ndarray.base
  391. # copy(deep=True) consolidates, so the block-wise assertions will
  392. # fail is mgr is not consolidated
  393. mgr._consolidate_inplace()
  394. cp = mgr.copy(deep=True)
  395. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  396. bvals = blk.values
  397. cpvals = cp_blk.values
  398. tm.assert_equal(cpvals, bvals)
  399. if isinstance(cpvals, np.ndarray):
  400. lbase = cpvals.base
  401. rbase = bvals.base
  402. else:
  403. lbase = cpvals._ndarray.base
  404. rbase = bvals._ndarray.base
  405. # copy assertion we either have a None for a base or in case of
  406. # some blocks it is an array (e.g. datetimetz), but was copied
  407. if isinstance(cpvals, DatetimeArray):
  408. assert (lbase is None and rbase is None) or (lbase is not rbase)
  409. elif not isinstance(cpvals, np.ndarray):
  410. assert lbase is not rbase
  411. else:
  412. assert lbase is None and rbase is None
  413. def test_sparse(self):
  414. mgr = create_mgr("a: sparse-1; b: sparse-2")
  415. assert mgr.as_array().dtype == np.float64
  416. def test_sparse_mixed(self):
  417. mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8")
  418. assert len(mgr.blocks) == 3
  419. assert isinstance(mgr, BlockManager)
  420. @pytest.mark.parametrize(
  421. "mgr_string, dtype",
  422. [("c: f4; d: f2", np.float32), ("c: f4; d: f2; e: f8", np.float64)],
  423. )
  424. def test_as_array_float(self, mgr_string, dtype):
  425. mgr = create_mgr(mgr_string)
  426. assert mgr.as_array().dtype == dtype
  427. @pytest.mark.parametrize(
  428. "mgr_string, dtype",
  429. [
  430. ("a: bool-1; b: bool-2", np.bool_),
  431. ("a: i8-1; b: i8-2; c: i4; d: i2; e: u1", np.int64),
  432. ("c: i4; d: i2; e: u1", np.int32),
  433. ],
  434. )
  435. def test_as_array_int_bool(self, mgr_string, dtype):
  436. mgr = create_mgr(mgr_string)
  437. assert mgr.as_array().dtype == dtype
  438. def test_as_array_datetime(self):
  439. mgr = create_mgr("h: datetime-1; g: datetime-2")
  440. assert mgr.as_array().dtype == "M8[ns]"
  441. def test_as_array_datetime_tz(self):
  442. mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
  443. assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
  444. assert mgr.iget(1).dtype == "datetime64[ns, CET]"
  445. assert mgr.as_array().dtype == "object"
  446. @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
  447. def test_astype(self, t):
  448. # coerce all
  449. mgr = create_mgr("c: f4; d: f2; e: f8")
  450. t = np.dtype(t)
  451. tmgr = mgr.astype(t)
  452. assert tmgr.iget(0).dtype.type == t
  453. assert tmgr.iget(1).dtype.type == t
  454. assert tmgr.iget(2).dtype.type == t
  455. # mixed
  456. mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
  457. t = np.dtype(t)
  458. tmgr = mgr.astype(t, errors="ignore")
  459. assert tmgr.iget(2).dtype.type == t
  460. assert tmgr.iget(4).dtype.type == t
  461. assert tmgr.iget(5).dtype.type == t
  462. assert tmgr.iget(6).dtype.type == t
  463. assert tmgr.iget(0).dtype.type == np.object_
  464. assert tmgr.iget(1).dtype.type == np.object_
  465. if t != np.int64:
  466. assert tmgr.iget(3).dtype.type == np.datetime64
  467. else:
  468. assert tmgr.iget(3).dtype.type == t
  469. def test_convert(self):
  470. def _compare(old_mgr, new_mgr):
  471. """compare the blocks, numeric compare ==, object don't"""
  472. old_blocks = set(old_mgr.blocks)
  473. new_blocks = set(new_mgr.blocks)
  474. assert len(old_blocks) == len(new_blocks)
  475. # compare non-numeric
  476. for b in old_blocks:
  477. found = False
  478. for nb in new_blocks:
  479. if (b.values == nb.values).all():
  480. found = True
  481. break
  482. assert found
  483. for b in new_blocks:
  484. found = False
  485. for ob in old_blocks:
  486. if (b.values == ob.values).all():
  487. found = True
  488. break
  489. assert found
  490. # noops
  491. mgr = create_mgr("f: i8; g: f8")
  492. new_mgr = mgr.convert(copy=True)
  493. _compare(mgr, new_mgr)
  494. # convert
  495. mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
  496. mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
  497. mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
  498. mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
  499. new_mgr = mgr.convert(copy=True)
  500. assert new_mgr.iget(0).dtype == np.object_
  501. assert new_mgr.iget(1).dtype == np.object_
  502. assert new_mgr.iget(2).dtype == np.object_
  503. assert new_mgr.iget(3).dtype == np.int64
  504. assert new_mgr.iget(4).dtype == np.float64
  505. mgr = create_mgr(
  506. "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
  507. )
  508. mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
  509. mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
  510. mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
  511. new_mgr = mgr.convert(copy=True)
  512. assert new_mgr.iget(0).dtype == np.object_
  513. assert new_mgr.iget(1).dtype == np.object_
  514. assert new_mgr.iget(2).dtype == np.object_
  515. assert new_mgr.iget(3).dtype == np.int32
  516. assert new_mgr.iget(4).dtype == np.bool_
  517. assert new_mgr.iget(5).dtype.type, np.datetime64
  518. assert new_mgr.iget(6).dtype == np.int64
  519. assert new_mgr.iget(7).dtype == np.float64
  520. assert new_mgr.iget(8).dtype == np.float16
  521. def test_invalid_ea_block(self):
  522. with pytest.raises(ValueError, match="need to split"):
  523. create_mgr("a: category; b: category")
  524. with pytest.raises(ValueError, match="need to split"):
  525. create_mgr("a: category2; b: category2")
  526. def test_interleave(self):
  527. # self
  528. for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]:
  529. mgr = create_mgr(f"a: {dtype}")
  530. assert mgr.as_array().dtype == dtype
  531. mgr = create_mgr(f"a: {dtype}; b: {dtype}")
  532. assert mgr.as_array().dtype == dtype
  533. @pytest.mark.parametrize(
  534. "mgr_string, dtype",
  535. [
  536. ("a: category", "i8"),
  537. ("a: category; b: category", "i8"),
  538. ("a: category; b: category2", "object"),
  539. ("a: category2", "object"),
  540. ("a: category2; b: category2", "object"),
  541. ("a: f8", "f8"),
  542. ("a: f8; b: i8", "f8"),
  543. ("a: f4; b: i8", "f8"),
  544. ("a: f4; b: i8; d: object", "object"),
  545. ("a: bool; b: i8", "object"),
  546. ("a: complex", "complex"),
  547. ("a: f8; b: category", "object"),
  548. ("a: M8[ns]; b: category", "object"),
  549. ("a: M8[ns]; b: bool", "object"),
  550. ("a: M8[ns]; b: i8", "object"),
  551. ("a: m8[ns]; b: bool", "object"),
  552. ("a: m8[ns]; b: i8", "object"),
  553. ("a: M8[ns]; b: m8[ns]", "object"),
  554. ],
  555. )
  556. def test_interleave_dtype(self, mgr_string, dtype):
  557. # will be converted according the actual dtype of the underlying
  558. mgr = create_mgr("a: category")
  559. assert mgr.as_array().dtype == "i8"
  560. mgr = create_mgr("a: category; b: category2")
  561. assert mgr.as_array().dtype == "object"
  562. mgr = create_mgr("a: category2")
  563. assert mgr.as_array().dtype == "object"
  564. # combinations
  565. mgr = create_mgr("a: f8")
  566. assert mgr.as_array().dtype == "f8"
  567. mgr = create_mgr("a: f8; b: i8")
  568. assert mgr.as_array().dtype == "f8"
  569. mgr = create_mgr("a: f4; b: i8")
  570. assert mgr.as_array().dtype == "f8"
  571. mgr = create_mgr("a: f4; b: i8; d: object")
  572. assert mgr.as_array().dtype == "object"
  573. mgr = create_mgr("a: bool; b: i8")
  574. assert mgr.as_array().dtype == "object"
  575. mgr = create_mgr("a: complex")
  576. assert mgr.as_array().dtype == "complex"
  577. mgr = create_mgr("a: f8; b: category")
  578. assert mgr.as_array().dtype == "f8"
  579. mgr = create_mgr("a: M8[ns]; b: category")
  580. assert mgr.as_array().dtype == "object"
  581. mgr = create_mgr("a: M8[ns]; b: bool")
  582. assert mgr.as_array().dtype == "object"
  583. mgr = create_mgr("a: M8[ns]; b: i8")
  584. assert mgr.as_array().dtype == "object"
  585. mgr = create_mgr("a: m8[ns]; b: bool")
  586. assert mgr.as_array().dtype == "object"
  587. mgr = create_mgr("a: m8[ns]; b: i8")
  588. assert mgr.as_array().dtype == "object"
  589. mgr = create_mgr("a: M8[ns]; b: m8[ns]")
  590. assert mgr.as_array().dtype == "object"
  591. def test_consolidate_ordering_issues(self, mgr):
  592. mgr.iset(mgr.items.get_loc("f"), np.random.randn(N))
  593. mgr.iset(mgr.items.get_loc("d"), np.random.randn(N))
  594. mgr.iset(mgr.items.get_loc("b"), np.random.randn(N))
  595. mgr.iset(mgr.items.get_loc("g"), np.random.randn(N))
  596. mgr.iset(mgr.items.get_loc("h"), np.random.randn(N))
  597. # we have datetime/tz blocks in mgr
  598. cons = mgr.consolidate()
  599. assert cons.nblocks == 4
  600. cons = mgr.consolidate().get_numeric_data()
  601. assert cons.nblocks == 1
  602. assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
  603. tm.assert_numpy_array_equal(
  604. cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp)
  605. )
  606. def test_reindex_items(self):
  607. # mgr is not consolidated, f8 & f8-2 blocks
  608. mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
  609. reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
  610. # reindex_axis does not consolidate_inplace, as that risks failing to
  611. # invalidate _item_cache
  612. assert not reindexed.is_consolidated()
  613. tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))
  614. tm.assert_almost_equal(
  615. mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
  616. )
  617. tm.assert_almost_equal(
  618. mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
  619. )
  620. tm.assert_almost_equal(
  621. mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
  622. )
  623. tm.assert_almost_equal(
  624. mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
  625. )
  626. def test_get_numeric_data(self, using_copy_on_write):
  627. mgr = create_mgr(
  628. "int: int; float: float; complex: complex;"
  629. "str: object; bool: bool; obj: object; dt: datetime",
  630. item_shape=(3,),
  631. )
  632. mgr.iset(5, np.array([1, 2, 3], dtype=np.object_))
  633. numeric = mgr.get_numeric_data()
  634. tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
  635. tm.assert_almost_equal(
  636. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  637. numeric.iget(numeric.items.get_loc("float")).internal_values(),
  638. )
  639. # Check sharing
  640. numeric.iset(
  641. numeric.items.get_loc("float"),
  642. np.array([100.0, 200.0, 300.0]),
  643. inplace=True,
  644. )
  645. if using_copy_on_write:
  646. tm.assert_almost_equal(
  647. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  648. np.array([1.0, 1.0, 1.0]),
  649. )
  650. else:
  651. tm.assert_almost_equal(
  652. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  653. np.array([100.0, 200.0, 300.0]),
  654. )
  655. numeric2 = mgr.get_numeric_data(copy=True)
  656. tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
  657. numeric2.iset(
  658. numeric2.items.get_loc("float"),
  659. np.array([1000.0, 2000.0, 3000.0]),
  660. inplace=True,
  661. )
  662. if using_copy_on_write:
  663. tm.assert_almost_equal(
  664. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  665. np.array([1.0, 1.0, 1.0]),
  666. )
  667. else:
  668. tm.assert_almost_equal(
  669. mgr.iget(mgr.items.get_loc("float")).internal_values(),
  670. np.array([100.0, 200.0, 300.0]),
  671. )
  672. def test_get_bool_data(self, using_copy_on_write):
  673. mgr = create_mgr(
  674. "int: int; float: float; complex: complex;"
  675. "str: object; bool: bool; obj: object; dt: datetime",
  676. item_shape=(3,),
  677. )
  678. mgr.iset(6, np.array([True, False, True], dtype=np.object_))
  679. bools = mgr.get_bool_data()
  680. tm.assert_index_equal(bools.items, Index(["bool"]))
  681. tm.assert_almost_equal(
  682. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  683. bools.iget(bools.items.get_loc("bool")).internal_values(),
  684. )
  685. bools.iset(0, np.array([True, False, True]), inplace=True)
  686. if using_copy_on_write:
  687. tm.assert_numpy_array_equal(
  688. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  689. np.array([True, True, True]),
  690. )
  691. else:
  692. tm.assert_numpy_array_equal(
  693. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  694. np.array([True, False, True]),
  695. )
  696. # Check sharing
  697. bools2 = mgr.get_bool_data(copy=True)
  698. bools2.iset(0, np.array([False, True, False]))
  699. if using_copy_on_write:
  700. tm.assert_numpy_array_equal(
  701. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  702. np.array([True, True, True]),
  703. )
  704. else:
  705. tm.assert_numpy_array_equal(
  706. mgr.iget(mgr.items.get_loc("bool")).internal_values(),
  707. np.array([True, False, True]),
  708. )
  709. def test_unicode_repr_doesnt_raise(self):
  710. repr(create_mgr("b,\u05d0: object"))
  711. @pytest.mark.parametrize(
  712. "mgr_string", ["a,b,c: i8-1; d,e,f: i8-2", "a,a,a: i8-1; b,b,b: i8-2"]
  713. )
  714. def test_equals(self, mgr_string):
  715. # unique items
  716. bm1 = create_mgr(mgr_string)
  717. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  718. assert bm1.equals(bm2)
  719. @pytest.mark.parametrize(
  720. "mgr_string",
  721. [
  722. "a:i8;b:f8", # basic case
  723. "a:i8;b:f8;c:c8;d:b", # many types
  724. "a:i8;e:dt;f:td;g:string", # more types
  725. "a:i8;b:category;c:category2", # categories
  726. "c:sparse;d:sparse_na;b:f8", # sparse
  727. ],
  728. )
  729. def test_equals_block_order_different_dtypes(self, mgr_string):
  730. # GH 9330
  731. bm = create_mgr(mgr_string)
  732. block_perms = itertools.permutations(bm.blocks)
  733. for bm_perm in block_perms:
  734. bm_this = BlockManager(tuple(bm_perm), bm.axes)
  735. assert bm.equals(bm_this)
  736. assert bm_this.equals(bm)
  737. def test_single_mgr_ctor(self):
  738. mgr = create_single_mgr("f8", num_rows=5)
  739. assert mgr.external_values().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0]
  740. @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
  741. def test_validate_bool_args(self, value):
  742. bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
  743. msg = (
  744. 'For argument "inplace" expected type bool, '
  745. f"received type {type(value).__name__}."
  746. )
  747. with pytest.raises(ValueError, match=msg):
  748. bm1.replace_list([1], [2], inplace=value)
  749. def test_iset_split_block(self):
  750. bm = create_mgr("a,b,c: i8; d: f8")
  751. bm._iset_split_block(0, np.array([0]))
  752. tm.assert_numpy_array_equal(
  753. bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
  754. )
  755. # First indexer currently does not have a block associated with it in case
  756. tm.assert_numpy_array_equal(
  757. bm.blknos, np.array([0, 0, 0, 1], dtype="int64" if IS64 else "int32")
  758. )
  759. assert len(bm.blocks) == 2
  760. def test_iset_split_block_values(self):
  761. bm = create_mgr("a,b,c: i8; d: f8")
  762. bm._iset_split_block(0, np.array([0]), np.array([list(range(10))]))
  763. tm.assert_numpy_array_equal(
  764. bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
  765. )
  766. # First indexer currently does not have a block associated with it in case
  767. tm.assert_numpy_array_equal(
  768. bm.blknos, np.array([0, 2, 2, 1], dtype="int64" if IS64 else "int32")
  769. )
  770. assert len(bm.blocks) == 3
  771. def _as_array(mgr):
  772. if mgr.ndim == 1:
  773. return mgr.external_values()
  774. return mgr.as_array().T
  775. class TestIndexing:
  776. # Nosetests-style data-driven tests.
  777. #
  778. # This test applies different indexing routines to block managers and
  779. # compares the outcome to the result of same operations on np.ndarray.
  780. #
  781. # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
  782. # and are disabled.
  783. MANAGERS = [
  784. create_single_mgr("f8", N),
  785. create_single_mgr("i8", N),
  786. # 2-dim
  787. create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)),
  788. create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)),
  789. create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)),
  790. create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)),
  791. ]
  792. @pytest.mark.parametrize("mgr", MANAGERS)
  793. def test_get_slice(self, mgr):
  794. def assert_slice_ok(mgr, axis, slobj):
  795. mat = _as_array(mgr)
  796. # we maybe using an ndarray to test slicing and
  797. # might not be the full length of the axis
  798. if isinstance(slobj, np.ndarray):
  799. ax = mgr.axes[axis]
  800. if len(ax) and len(slobj) and len(slobj) != len(ax):
  801. slobj = np.concatenate(
  802. [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)]
  803. )
  804. if isinstance(slobj, slice):
  805. sliced = mgr.get_slice(slobj, axis=axis)
  806. elif mgr.ndim == 1 and axis == 0:
  807. sliced = mgr.getitem_mgr(slobj)
  808. else:
  809. # BlockManager doesn't support non-slice, SingleBlockManager
  810. # doesn't support axis > 0
  811. return
  812. mat_slobj = (slice(None),) * axis + (slobj,)
  813. tm.assert_numpy_array_equal(
  814. mat[mat_slobj], _as_array(sliced), check_dtype=False
  815. )
  816. tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
  817. assert mgr.ndim <= 2, mgr.ndim
  818. for ax in range(mgr.ndim):
  819. # slice
  820. assert_slice_ok(mgr, ax, slice(None))
  821. assert_slice_ok(mgr, ax, slice(3))
  822. assert_slice_ok(mgr, ax, slice(100))
  823. assert_slice_ok(mgr, ax, slice(1, 4))
  824. assert_slice_ok(mgr, ax, slice(3, 0, -2))
  825. if mgr.ndim < 2:
  826. # 2D only support slice objects
  827. # boolean mask
  828. assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_))
  829. assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
  830. assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
  831. if mgr.shape[ax] >= 3:
  832. assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
  833. assert_slice_ok(
  834. mgr, ax, np.array([True, True, False], dtype=np.bool_)
  835. )
  836. # fancy indexer
  837. assert_slice_ok(mgr, ax, [])
  838. assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
  839. if mgr.shape[ax] >= 3:
  840. assert_slice_ok(mgr, ax, [0, 1, 2])
  841. assert_slice_ok(mgr, ax, [-1, -2, -3])
  842. @pytest.mark.parametrize("mgr", MANAGERS)
  843. def test_take(self, mgr):
  844. def assert_take_ok(mgr, axis, indexer):
  845. mat = _as_array(mgr)
  846. taken = mgr.take(indexer, axis)
  847. tm.assert_numpy_array_equal(
  848. np.take(mat, indexer, axis), _as_array(taken), check_dtype=False
  849. )
  850. tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis])
  851. for ax in range(mgr.ndim):
  852. # take/fancy indexer
  853. assert_take_ok(mgr, ax, indexer=[])
  854. assert_take_ok(mgr, ax, indexer=[0, 0, 0])
  855. assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax])))
  856. if mgr.shape[ax] >= 3:
  857. assert_take_ok(mgr, ax, indexer=[0, 1, 2])
  858. assert_take_ok(mgr, ax, indexer=[-1, -2, -3])
  859. @pytest.mark.parametrize("mgr", MANAGERS)
  860. @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
  861. def test_reindex_axis(self, fill_value, mgr):
  862. def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
  863. mat = _as_array(mgr)
  864. indexer = mgr.axes[axis].get_indexer_for(new_labels)
  865. reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value)
  866. tm.assert_numpy_array_equal(
  867. algos.take_nd(mat, indexer, axis, fill_value=fill_value),
  868. _as_array(reindexed),
  869. check_dtype=False,
  870. )
  871. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  872. for ax in range(mgr.ndim):
  873. assert_reindex_axis_is_ok(mgr, ax, Index([]), fill_value)
  874. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value)
  875. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value)
  876. assert_reindex_axis_is_ok(mgr, ax, Index(["foo", "bar", "baz"]), fill_value)
  877. assert_reindex_axis_is_ok(
  878. mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), fill_value
  879. )
  880. if mgr.shape[ax] >= 3:
  881. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value)
  882. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value)
  883. assert_reindex_axis_is_ok(
  884. mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value
  885. )
  886. @pytest.mark.parametrize("mgr", MANAGERS)
  887. @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
  888. def test_reindex_indexer(self, fill_value, mgr):
  889. def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
  890. mat = _as_array(mgr)
  891. reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value)
  892. reindexed = mgr.reindex_indexer(
  893. new_labels, indexer, axis, fill_value=fill_value
  894. )
  895. tm.assert_numpy_array_equal(
  896. reindexed_mat, _as_array(reindexed), check_dtype=False
  897. )
  898. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  899. for ax in range(mgr.ndim):
  900. assert_reindex_indexer_is_ok(
  901. mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value
  902. )
  903. assert_reindex_indexer_is_ok(
  904. mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value
  905. )
  906. assert_reindex_indexer_is_ok(
  907. mgr,
  908. ax,
  909. Index(["foo"] * mgr.shape[ax]),
  910. np.arange(mgr.shape[ax]),
  911. fill_value,
  912. )
  913. assert_reindex_indexer_is_ok(
  914. mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value
  915. )
  916. assert_reindex_indexer_is_ok(
  917. mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value
  918. )
  919. assert_reindex_indexer_is_ok(
  920. mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value
  921. )
  922. assert_reindex_indexer_is_ok(
  923. mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value
  924. )
  925. assert_reindex_indexer_is_ok(
  926. mgr,
  927. ax,
  928. Index(["foo", mgr.axes[ax][0], "baz"]),
  929. np.array([-1, -1, -1]),
  930. fill_value,
  931. )
  932. if mgr.shape[ax] >= 3:
  933. assert_reindex_indexer_is_ok(
  934. mgr,
  935. ax,
  936. Index(["foo", "bar", "baz"]),
  937. np.array([0, 1, 2]),
  938. fill_value,
  939. )
  940. class TestBlockPlacement:
  941. @pytest.mark.parametrize(
  942. "slc, expected",
  943. [
  944. (slice(0, 4), 4),
  945. (slice(0, 4, 2), 2),
  946. (slice(0, 3, 2), 2),
  947. (slice(0, 1, 2), 1),
  948. (slice(1, 0, -1), 1),
  949. ],
  950. )
  951. def test_slice_len(self, slc, expected):
  952. assert len(BlockPlacement(slc)) == expected
  953. @pytest.mark.parametrize("slc", [slice(1, 1, 0), slice(1, 2, 0)])
  954. def test_zero_step_raises(self, slc):
  955. msg = "slice step cannot be zero"
  956. with pytest.raises(ValueError, match=msg):
  957. BlockPlacement(slc)
  958. def test_slice_canonize_negative_stop(self):
  959. # GH#37524 negative stop is OK with negative step and positive start
  960. slc = slice(3, -1, -2)
  961. bp = BlockPlacement(slc)
  962. assert bp.indexer == slice(3, None, -2)
  963. @pytest.mark.parametrize(
  964. "slc",
  965. [
  966. slice(None, None),
  967. slice(10, None),
  968. slice(None, None, -1),
  969. slice(None, 10, -1),
  970. # These are "unbounded" because negative index will
  971. # change depending on container shape.
  972. slice(-1, None),
  973. slice(None, -1),
  974. slice(-1, -1),
  975. slice(-1, None, -1),
  976. slice(None, -1, -1),
  977. slice(-1, -1, -1),
  978. ],
  979. )
  980. def test_unbounded_slice_raises(self, slc):
  981. msg = "unbounded slice"
  982. with pytest.raises(ValueError, match=msg):
  983. BlockPlacement(slc)
  984. @pytest.mark.parametrize(
  985. "slc",
  986. [
  987. slice(0, 0),
  988. slice(100, 0),
  989. slice(100, 100),
  990. slice(100, 100, -1),
  991. slice(0, 100, -1),
  992. ],
  993. )
  994. def test_not_slice_like_slices(self, slc):
  995. assert not BlockPlacement(slc).is_slice_like
  996. @pytest.mark.parametrize(
  997. "arr, slc",
  998. [
  999. ([0], slice(0, 1, 1)),
  1000. ([100], slice(100, 101, 1)),
  1001. ([0, 1, 2], slice(0, 3, 1)),
  1002. ([0, 5, 10], slice(0, 15, 5)),
  1003. ([0, 100], slice(0, 200, 100)),
  1004. ([2, 1], slice(2, 0, -1)),
  1005. ],
  1006. )
  1007. def test_array_to_slice_conversion(self, arr, slc):
  1008. assert BlockPlacement(arr).as_slice == slc
  1009. @pytest.mark.parametrize(
  1010. "arr",
  1011. [
  1012. [],
  1013. [-1],
  1014. [-1, -2, -3],
  1015. [-10],
  1016. [-1],
  1017. [-1, 0, 1, 2],
  1018. [-2, 0, 2, 4],
  1019. [1, 0, -1],
  1020. [1, 1, 1],
  1021. ],
  1022. )
  1023. def test_not_slice_like_arrays(self, arr):
  1024. assert not BlockPlacement(arr).is_slice_like
  1025. @pytest.mark.parametrize(
  1026. "slc, expected",
  1027. [(slice(0, 3), [0, 1, 2]), (slice(0, 0), []), (slice(3, 0), [])],
  1028. )
  1029. def test_slice_iter(self, slc, expected):
  1030. assert list(BlockPlacement(slc)) == expected
  1031. @pytest.mark.parametrize(
  1032. "slc, arr",
  1033. [
  1034. (slice(0, 3), [0, 1, 2]),
  1035. (slice(0, 0), []),
  1036. (slice(3, 0), []),
  1037. (slice(3, 0, -1), [3, 2, 1]),
  1038. ],
  1039. )
  1040. def test_slice_to_array_conversion(self, slc, arr):
  1041. tm.assert_numpy_array_equal(
  1042. BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp)
  1043. )
  1044. def test_blockplacement_add(self):
  1045. bpl = BlockPlacement(slice(0, 5))
  1046. assert bpl.add(1).as_slice == slice(1, 6, 1)
  1047. assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2)
  1048. assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5]
  1049. @pytest.mark.parametrize(
  1050. "val, inc, expected",
  1051. [
  1052. (slice(0, 0), 0, []),
  1053. (slice(1, 4), 0, [1, 2, 3]),
  1054. (slice(3, 0, -1), 0, [3, 2, 1]),
  1055. ([1, 2, 4], 0, [1, 2, 4]),
  1056. (slice(0, 0), 10, []),
  1057. (slice(1, 4), 10, [11, 12, 13]),
  1058. (slice(3, 0, -1), 10, [13, 12, 11]),
  1059. ([1, 2, 4], 10, [11, 12, 14]),
  1060. (slice(0, 0), -1, []),
  1061. (slice(1, 4), -1, [0, 1, 2]),
  1062. ([1, 2, 4], -1, [0, 1, 3]),
  1063. ],
  1064. )
  1065. def test_blockplacement_add_int(self, val, inc, expected):
  1066. assert list(BlockPlacement(val).add(inc)) == expected
  1067. @pytest.mark.parametrize("val", [slice(1, 4), [1, 2, 4]])
  1068. def test_blockplacement_add_int_raises(self, val):
  1069. msg = "iadd causes length change"
  1070. with pytest.raises(ValueError, match=msg):
  1071. BlockPlacement(val).add(-10)
  1072. class TestCanHoldElement:
  1073. @pytest.fixture(
  1074. params=[
  1075. lambda x: x,
  1076. lambda x: x.to_series(),
  1077. lambda x: x._data,
  1078. lambda x: list(x),
  1079. lambda x: x.astype(object),
  1080. lambda x: np.asarray(x),
  1081. lambda x: x[0],
  1082. lambda x: x[:0],
  1083. ]
  1084. )
  1085. def element(self, request):
  1086. """
  1087. Functions that take an Index and return an element that should have
  1088. blk._can_hold_element(element) for a Block with this index's dtype.
  1089. """
  1090. return request.param
  1091. def test_datetime_block_can_hold_element(self):
  1092. block = create_block("datetime", [0])
  1093. assert block._can_hold_element([])
  1094. # We will check that block._can_hold_element iff arr.__setitem__ works
  1095. arr = pd.array(block.values.ravel())
  1096. # coerce None
  1097. assert block._can_hold_element(None)
  1098. arr[0] = None
  1099. assert arr[0] is pd.NaT
  1100. # coerce different types of datetime objects
  1101. vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
  1102. for val in vals:
  1103. assert block._can_hold_element(val)
  1104. arr[0] = val
  1105. val = date(2010, 10, 10)
  1106. assert not block._can_hold_element(val)
  1107. msg = (
  1108. "value should be a 'Timestamp', 'NaT', "
  1109. "or array of those. Got 'date' instead."
  1110. )
  1111. with pytest.raises(TypeError, match=msg):
  1112. arr[0] = val
  1113. @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
  1114. def test_interval_can_hold_element_emptylist(self, dtype, element):
  1115. arr = np.array([1, 3, 4], dtype=dtype)
  1116. ii = IntervalIndex.from_breaks(arr)
  1117. blk = new_block(ii._data, [1], ndim=2)
  1118. assert blk._can_hold_element([])
  1119. # TODO: check this holds for all blocks
  1120. @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
  1121. def test_interval_can_hold_element(self, dtype, element):
  1122. arr = np.array([1, 3, 4, 9], dtype=dtype)
  1123. ii = IntervalIndex.from_breaks(arr)
  1124. blk = new_block(ii._data, [1], ndim=2)
  1125. elem = element(ii)
  1126. self.check_series_setitem(elem, ii, True)
  1127. assert blk._can_hold_element(elem)
  1128. # Careful: to get the expected Series-inplace behavior we need
  1129. # `elem` to not have the same length as `arr`
  1130. ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither")
  1131. elem = element(ii2)
  1132. self.check_series_setitem(elem, ii, False)
  1133. assert not blk._can_hold_element(elem)
  1134. ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)])
  1135. elem = element(ii3)
  1136. self.check_series_setitem(elem, ii, False)
  1137. assert not blk._can_hold_element(elem)
  1138. ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)])
  1139. elem = element(ii4)
  1140. self.check_series_setitem(elem, ii, False)
  1141. assert not blk._can_hold_element(elem)
  1142. def test_period_can_hold_element_emptylist(self):
  1143. pi = period_range("2016", periods=3, freq="A")
  1144. blk = new_block(pi._data.reshape(1, 3), [1], ndim=2)
  1145. assert blk._can_hold_element([])
  1146. def test_period_can_hold_element(self, element):
  1147. pi = period_range("2016", periods=3, freq="A")
  1148. elem = element(pi)
  1149. self.check_series_setitem(elem, pi, True)
  1150. # Careful: to get the expected Series-inplace behavior we need
  1151. # `elem` to not have the same length as `arr`
  1152. pi2 = pi.asfreq("D")[:-1]
  1153. elem = element(pi2)
  1154. self.check_series_setitem(elem, pi, False)
  1155. dti = pi.to_timestamp("S")[:-1]
  1156. elem = element(dti)
  1157. self.check_series_setitem(elem, pi, False)
  1158. def check_can_hold_element(self, obj, elem, inplace: bool):
  1159. blk = obj._mgr.blocks[0]
  1160. if inplace:
  1161. assert blk._can_hold_element(elem)
  1162. else:
  1163. assert not blk._can_hold_element(elem)
  1164. def check_series_setitem(self, elem, index: Index, inplace: bool):
  1165. arr = index._data.copy()
  1166. ser = Series(arr, copy=False)
  1167. self.check_can_hold_element(ser, elem, inplace)
  1168. if is_scalar(elem):
  1169. ser[0] = elem
  1170. else:
  1171. ser[: len(elem)] = elem
  1172. if inplace:
  1173. assert ser.array is arr # i.e. setting was done inplace
  1174. else:
  1175. assert ser.dtype == object
  1176. class TestShouldStore:
  1177. def test_should_store_categorical(self):
  1178. cat = Categorical(["A", "B", "C"])
  1179. df = DataFrame(cat)
  1180. blk = df._mgr.blocks[0]
  1181. # matching dtype
  1182. assert blk.should_store(cat)
  1183. assert blk.should_store(cat[:-1])
  1184. # different dtype
  1185. assert not blk.should_store(cat.as_ordered())
  1186. # ndarray instead of Categorical
  1187. assert not blk.should_store(np.asarray(cat))
  1188. def test_validate_ndim(block_maker):
  1189. values = np.array([1.0, 2.0])
  1190. placement = slice(2)
  1191. msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
  1192. with pytest.raises(ValueError, match=msg):
  1193. block_maker(values, placement, ndim=2)
  1194. def test_block_shape():
  1195. idx = Index([0, 1, 2, 3, 4])
  1196. a = Series([1, 2, 3]).reindex(idx)
  1197. b = Series(Categorical([1, 2, 3])).reindex(idx)
  1198. assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer
  1199. def test_make_block_no_pandas_array(block_maker):
  1200. # https://github.com/pandas-dev/pandas/pull/24866
  1201. arr = pd.arrays.PandasArray(np.array([1, 2]))
  1202. # PandasArray, no dtype
  1203. result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
  1204. assert result.dtype.kind in ["i", "u"]
  1205. if block_maker is make_block:
  1206. # new_block requires caller to unwrap PandasArray
  1207. assert result.is_extension is False
  1208. # PandasArray, PandasDtype
  1209. result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
  1210. assert result.dtype.kind in ["i", "u"]
  1211. assert result.is_extension is False
  1212. # new_block no longer taked dtype keyword
  1213. # ndarray, PandasDtype
  1214. result = block_maker(
  1215. arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim
  1216. )
  1217. assert result.dtype.kind in ["i", "u"]
  1218. assert result.is_extension is False