123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437 |
- from datetime import (
- date,
- datetime,
- )
- import itertools
- import re
- import numpy as np
- import pytest
- from pandas._libs.internals import BlockPlacement
- from pandas.compat import IS64
- import pandas.util._test_decorators as td
- from pandas.core.dtypes.common import is_scalar
- import pandas as pd
- from pandas import (
- Categorical,
- DataFrame,
- DatetimeIndex,
- Index,
- IntervalIndex,
- Series,
- Timedelta,
- Timestamp,
- period_range,
- )
- import pandas._testing as tm
- import pandas.core.algorithms as algos
- from pandas.core.arrays import (
- DatetimeArray,
- SparseArray,
- TimedeltaArray,
- )
- from pandas.core.internals import (
- BlockManager,
- SingleBlockManager,
- make_block,
- )
- from pandas.core.internals.blocks import (
- ensure_block_shape,
- new_block,
- )
- # this file contains BlockManager specific tests
- # TODO(ArrayManager) factor out interleave_dtype tests
- pytestmark = td.skip_array_manager_invalid_test
- @pytest.fixture(params=[new_block, make_block])
- def block_maker(request):
- """
- Fixture to test both the internal new_block and pseudo-public make_block.
- """
- return request.param
- @pytest.fixture
- def mgr():
- return create_mgr(
- "a: f8; b: object; c: f8; d: object; e: f8;"
- "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;"
- "k: M8[ns, US/Eastern]; l: M8[ns, CET];"
- )
- def assert_block_equal(left, right):
- tm.assert_numpy_array_equal(left.values, right.values)
- assert left.dtype == right.dtype
- assert isinstance(left.mgr_locs, BlockPlacement)
- assert isinstance(right.mgr_locs, BlockPlacement)
- tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
- def get_numeric_mat(shape):
- arr = np.arange(shape[0])
- return np.lib.stride_tricks.as_strided(
- x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1)
- ).copy()
- N = 10
- def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_block):
- """
- Supported typestr:
- * float, f8, f4, f2
- * int, i8, i4, i2, i1
- * uint, u8, u4, u2, u1
- * complex, c16, c8
- * bool
- * object, string, O
- * datetime, dt, M8[ns], M8[ns, tz]
- * timedelta, td, m8[ns]
- * sparse (SparseArray with fill_value=0.0)
- * sparse_na (SparseArray with fill_value=np.nan)
- * category, category2
- """
- placement = BlockPlacement(placement)
- num_items = len(placement)
- if item_shape is None:
- item_shape = (N,)
- shape = (num_items,) + item_shape
- mat = get_numeric_mat(shape)
- if typestr in (
- "float",
- "f8",
- "f4",
- "f2",
- "int",
- "i8",
- "i4",
- "i2",
- "i1",
- "uint",
- "u8",
- "u4",
- "u2",
- "u1",
- ):
- values = mat.astype(typestr) + num_offset
- elif typestr in ("complex", "c16", "c8"):
- values = 1.0j * (mat.astype(typestr) + num_offset)
- elif typestr in ("object", "string", "O"):
- values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape)
- elif typestr in ("b", "bool"):
- values = np.ones(shape, dtype=np.bool_)
- elif typestr in ("datetime", "dt", "M8[ns]"):
- values = (mat * 1e9).astype("M8[ns]")
- elif typestr.startswith("M8[ns"):
- # datetime with tz
- m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
- assert m is not None, f"incompatible typestr -> {typestr}"
- tz = m.groups()[0]
- assert num_items == 1, "must have only 1 num items for a tz-aware"
- values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data
- values = ensure_block_shape(values, ndim=len(shape))
- elif typestr in ("timedelta", "td", "m8[ns]"):
- values = (mat * 1).astype("m8[ns]")
- elif typestr in ("category",):
- values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
- elif typestr in ("category2",):
- values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
- elif typestr in ("sparse", "sparse_na"):
- if shape[-1] != 10:
- # We also are implicitly assuming this in the category cases above
- raise NotImplementedError
- assert all(s == 1 for s in shape[:-1])
- if typestr.endswith("_na"):
- fill_value = np.nan
- else:
- fill_value = 0.0
- values = SparseArray(
- [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
- fill_value=fill_value,
- )
- arr = values.sp_values.view()
- arr += num_offset - 1
- else:
- raise ValueError(f'Unsupported typestr: "{typestr}"')
- return maker(values, placement=placement, ndim=len(shape))
- def create_single_mgr(typestr, num_rows=None):
- if num_rows is None:
- num_rows = N
- return SingleBlockManager(
- create_block(typestr, placement=slice(0, num_rows), item_shape=()),
- Index(np.arange(num_rows)),
- )
- def create_mgr(descr, item_shape=None):
- """
- Construct BlockManager from string description.
- String description syntax looks similar to np.matrix initializer. It looks
- like this::
- a,b,c: f8; d,e,f: i8
- Rules are rather simple:
- * see list of supported datatypes in `create_block` method
- * components are semicolon-separated
- * each component is `NAME,NAME,NAME: DTYPE_ID`
- * whitespace around colons & semicolons are removed
- * components with same DTYPE_ID are combined into single block
- * to force multiple blocks with same dtype, use '-SUFFIX'::
- 'a:f8-1; b:f8-2; c:f8-foobar'
- """
- if item_shape is None:
- item_shape = (N,)
- offset = 0
- mgr_items = []
- block_placements = {}
- for d in descr.split(";"):
- d = d.strip()
- if not len(d):
- continue
- names, blockstr = d.partition(":")[::2]
- blockstr = blockstr.strip()
- names = names.strip().split(",")
- mgr_items.extend(names)
- placement = list(np.arange(len(names)) + offset)
- try:
- block_placements[blockstr].extend(placement)
- except KeyError:
- block_placements[blockstr] = placement
- offset += len(names)
- mgr_items = Index(mgr_items)
- blocks = []
- num_offset = 0
- for blockstr, placement in block_placements.items():
- typestr = blockstr.split("-")[0]
- blocks.append(
- create_block(
- typestr, placement, item_shape=item_shape, num_offset=num_offset
- )
- )
- num_offset += len(placement)
- sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0])
- return BlockManager(
- tuple(sblocks),
- [mgr_items] + [Index(np.arange(n)) for n in item_shape],
- )
- @pytest.fixture
- def fblock():
- return create_block("float", [0, 2, 4])
- class TestBlock:
- def test_constructor(self):
- int32block = create_block("i4", [0])
- assert int32block.dtype == np.int32
- @pytest.mark.parametrize(
- "typ, data",
- [
- ["float", [0, 2, 4]],
- ["complex", [7]],
- ["object", [1, 3]],
- ["bool", [5]],
- ],
- )
- def test_pickle(self, typ, data):
- blk = create_block(typ, data)
- assert_block_equal(tm.round_trip_pickle(blk), blk)
- def test_mgr_locs(self, fblock):
- assert isinstance(fblock.mgr_locs, BlockPlacement)
- tm.assert_numpy_array_equal(
- fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp)
- )
- def test_attrs(self, fblock):
- assert fblock.shape == fblock.values.shape
- assert fblock.dtype == fblock.values.dtype
- assert len(fblock) == len(fblock.values)
- def test_copy(self, fblock):
- cop = fblock.copy()
- assert cop is not fblock
- assert_block_equal(fblock, cop)
- def test_delete(self, fblock):
- newb = fblock.copy()
- locs = newb.mgr_locs
- nb = newb.delete(0)[0]
- assert newb.mgr_locs is locs
- assert nb is not newb
- tm.assert_numpy_array_equal(
- nb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp)
- )
- assert not (newb.values[0] == 1).all()
- assert (nb.values[0] == 1).all()
- newb = fblock.copy()
- locs = newb.mgr_locs
- nb = newb.delete(1)
- assert len(nb) == 2
- assert newb.mgr_locs is locs
- tm.assert_numpy_array_equal(
- nb[0].mgr_locs.as_array, np.array([0], dtype=np.intp)
- )
- tm.assert_numpy_array_equal(
- nb[1].mgr_locs.as_array, np.array([4], dtype=np.intp)
- )
- assert not (newb.values[1] == 2).all()
- assert (nb[1].values[0] == 2).all()
- newb = fblock.copy()
- nb = newb.delete(2)
- assert len(nb) == 1
- tm.assert_numpy_array_equal(
- nb[0].mgr_locs.as_array, np.array([0, 2], dtype=np.intp)
- )
- assert (nb[0].values[1] == 1).all()
- newb = fblock.copy()
- with pytest.raises(IndexError, match=None):
- newb.delete(3)
- def test_delete_datetimelike(self):
- # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray
- arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]")
- df = DataFrame(arr)
- blk = df._mgr.blocks[0]
- assert isinstance(blk.values, TimedeltaArray)
- nb = blk.delete(1)
- assert len(nb) == 2
- assert isinstance(nb[0].values, TimedeltaArray)
- assert isinstance(nb[1].values, TimedeltaArray)
- df = DataFrame(arr.view("M8[ns]"))
- blk = df._mgr.blocks[0]
- assert isinstance(blk.values, DatetimeArray)
- nb = blk.delete([1, 3])
- assert len(nb) == 2
- assert isinstance(nb[0].values, DatetimeArray)
- assert isinstance(nb[1].values, DatetimeArray)
- def test_split(self):
- # GH#37799
- values = np.random.randn(3, 4)
- blk = new_block(values, placement=[3, 1, 6], ndim=2)
- result = blk._split()
- # check that we get views, not copies
- values[:] = -9999
- assert (blk.values == -9999).all()
- assert len(result) == 3
- expected = [
- new_block(values[[0]], placement=[3], ndim=2),
- new_block(values[[1]], placement=[1], ndim=2),
- new_block(values[[2]], placement=[6], ndim=2),
- ]
- for res, exp in zip(result, expected):
- assert_block_equal(res, exp)
- class TestBlockManager:
- def test_attrs(self):
- mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2")
- assert mgr.nblocks == 2
- assert len(mgr) == 6
- def test_duplicate_ref_loc_failure(self):
- tmp_mgr = create_mgr("a:bool; a: f8")
- axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
- blocks[0].mgr_locs = BlockPlacement(np.array([0]))
- blocks[1].mgr_locs = BlockPlacement(np.array([0]))
- # test trying to create block manager with overlapping ref locs
- msg = "Gaps in blk ref_locs"
- with pytest.raises(AssertionError, match=msg):
- mgr = BlockManager(blocks, axes)
- mgr._rebuild_blknos_and_blklocs()
- blocks[0].mgr_locs = BlockPlacement(np.array([0]))
- blocks[1].mgr_locs = BlockPlacement(np.array([1]))
- mgr = BlockManager(blocks, axes)
- mgr.iget(1)
- def test_pickle(self, mgr):
- mgr2 = tm.round_trip_pickle(mgr)
- tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
- # GH2431
- assert hasattr(mgr2, "_is_consolidated")
- assert hasattr(mgr2, "_known_consolidated")
- # reset to False on load
- assert not mgr2._is_consolidated
- assert not mgr2._known_consolidated
- @pytest.mark.parametrize("mgr_string", ["a,a,a:f8", "a: f8; a: i8"])
- def test_non_unique_pickle(self, mgr_string):
- mgr = create_mgr(mgr_string)
- mgr2 = tm.round_trip_pickle(mgr)
- tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
- def test_categorical_block_pickle(self):
- mgr = create_mgr("a: category")
- mgr2 = tm.round_trip_pickle(mgr)
- tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
- smgr = create_single_mgr("category")
- smgr2 = tm.round_trip_pickle(smgr)
- tm.assert_series_equal(Series(smgr), Series(smgr2))
- def test_iget(self):
- cols = Index(list("abc"))
- values = np.random.rand(3, 3)
- block = new_block(
- values=values.copy(),
- placement=np.arange(3, dtype=np.intp),
- ndim=values.ndim,
- )
- mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))])
- tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
- tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
- tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])
- def test_set(self):
- mgr = create_mgr("a,b,c: int", item_shape=(3,))
- mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3))
- mgr.iset(1, np.array(["bar"] * 3))
- tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
- tm.assert_numpy_array_equal(
- mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
- )
- tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
- tm.assert_numpy_array_equal(
- mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
- )
- def test_set_change_dtype(self, mgr):
- mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool))
- mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N))
- idx = mgr.items.get_loc("baz")
- assert mgr.iget(idx).dtype == np.object_
- mgr2 = mgr.consolidate()
- mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N))
- idx = mgr2.items.get_loc("baz")
- assert mgr2.iget(idx).dtype == np.object_
- mgr2.insert(len(mgr2.items), "quux", np.random.randn(N).astype(int))
- idx = mgr2.items.get_loc("quux")
- assert mgr2.iget(idx).dtype == np.int_
- mgr2.iset(mgr2.items.get_loc("quux"), np.random.randn(N))
- assert mgr2.iget(idx).dtype == np.float_
- def test_copy(self, mgr):
- cp = mgr.copy(deep=False)
- for blk, cp_blk in zip(mgr.blocks, cp.blocks):
- # view assertion
- tm.assert_equal(cp_blk.values, blk.values)
- if isinstance(blk.values, np.ndarray):
- assert cp_blk.values.base is blk.values.base
- else:
- # DatetimeTZBlock has DatetimeIndex values
- assert cp_blk.values._ndarray.base is blk.values._ndarray.base
- # copy(deep=True) consolidates, so the block-wise assertions will
- # fail is mgr is not consolidated
- mgr._consolidate_inplace()
- cp = mgr.copy(deep=True)
- for blk, cp_blk in zip(mgr.blocks, cp.blocks):
- bvals = blk.values
- cpvals = cp_blk.values
- tm.assert_equal(cpvals, bvals)
- if isinstance(cpvals, np.ndarray):
- lbase = cpvals.base
- rbase = bvals.base
- else:
- lbase = cpvals._ndarray.base
- rbase = bvals._ndarray.base
- # copy assertion we either have a None for a base or in case of
- # some blocks it is an array (e.g. datetimetz), but was copied
- if isinstance(cpvals, DatetimeArray):
- assert (lbase is None and rbase is None) or (lbase is not rbase)
- elif not isinstance(cpvals, np.ndarray):
- assert lbase is not rbase
- else:
- assert lbase is None and rbase is None
- def test_sparse(self):
- mgr = create_mgr("a: sparse-1; b: sparse-2")
- assert mgr.as_array().dtype == np.float64
- def test_sparse_mixed(self):
- mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8")
- assert len(mgr.blocks) == 3
- assert isinstance(mgr, BlockManager)
- @pytest.mark.parametrize(
- "mgr_string, dtype",
- [("c: f4; d: f2", np.float32), ("c: f4; d: f2; e: f8", np.float64)],
- )
- def test_as_array_float(self, mgr_string, dtype):
- mgr = create_mgr(mgr_string)
- assert mgr.as_array().dtype == dtype
- @pytest.mark.parametrize(
- "mgr_string, dtype",
- [
- ("a: bool-1; b: bool-2", np.bool_),
- ("a: i8-1; b: i8-2; c: i4; d: i2; e: u1", np.int64),
- ("c: i4; d: i2; e: u1", np.int32),
- ],
- )
- def test_as_array_int_bool(self, mgr_string, dtype):
- mgr = create_mgr(mgr_string)
- assert mgr.as_array().dtype == dtype
- def test_as_array_datetime(self):
- mgr = create_mgr("h: datetime-1; g: datetime-2")
- assert mgr.as_array().dtype == "M8[ns]"
- def test_as_array_datetime_tz(self):
- mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
- assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
- assert mgr.iget(1).dtype == "datetime64[ns, CET]"
- assert mgr.as_array().dtype == "object"
- @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
- def test_astype(self, t):
- # coerce all
- mgr = create_mgr("c: f4; d: f2; e: f8")
- t = np.dtype(t)
- tmgr = mgr.astype(t)
- assert tmgr.iget(0).dtype.type == t
- assert tmgr.iget(1).dtype.type == t
- assert tmgr.iget(2).dtype.type == t
- # mixed
- mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
- t = np.dtype(t)
- tmgr = mgr.astype(t, errors="ignore")
- assert tmgr.iget(2).dtype.type == t
- assert tmgr.iget(4).dtype.type == t
- assert tmgr.iget(5).dtype.type == t
- assert tmgr.iget(6).dtype.type == t
- assert tmgr.iget(0).dtype.type == np.object_
- assert tmgr.iget(1).dtype.type == np.object_
- if t != np.int64:
- assert tmgr.iget(3).dtype.type == np.datetime64
- else:
- assert tmgr.iget(3).dtype.type == t
- def test_convert(self):
- def _compare(old_mgr, new_mgr):
- """compare the blocks, numeric compare ==, object don't"""
- old_blocks = set(old_mgr.blocks)
- new_blocks = set(new_mgr.blocks)
- assert len(old_blocks) == len(new_blocks)
- # compare non-numeric
- for b in old_blocks:
- found = False
- for nb in new_blocks:
- if (b.values == nb.values).all():
- found = True
- break
- assert found
- for b in new_blocks:
- found = False
- for ob in old_blocks:
- if (b.values == ob.values).all():
- found = True
- break
- assert found
- # noops
- mgr = create_mgr("f: i8; g: f8")
- new_mgr = mgr.convert(copy=True)
- _compare(mgr, new_mgr)
- # convert
- mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
- mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
- mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
- mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
- new_mgr = mgr.convert(copy=True)
- assert new_mgr.iget(0).dtype == np.object_
- assert new_mgr.iget(1).dtype == np.object_
- assert new_mgr.iget(2).dtype == np.object_
- assert new_mgr.iget(3).dtype == np.int64
- assert new_mgr.iget(4).dtype == np.float64
- mgr = create_mgr(
- "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
- )
- mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
- mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
- mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
- new_mgr = mgr.convert(copy=True)
- assert new_mgr.iget(0).dtype == np.object_
- assert new_mgr.iget(1).dtype == np.object_
- assert new_mgr.iget(2).dtype == np.object_
- assert new_mgr.iget(3).dtype == np.int32
- assert new_mgr.iget(4).dtype == np.bool_
- assert new_mgr.iget(5).dtype.type, np.datetime64
- assert new_mgr.iget(6).dtype == np.int64
- assert new_mgr.iget(7).dtype == np.float64
- assert new_mgr.iget(8).dtype == np.float16
- def test_invalid_ea_block(self):
- with pytest.raises(ValueError, match="need to split"):
- create_mgr("a: category; b: category")
- with pytest.raises(ValueError, match="need to split"):
- create_mgr("a: category2; b: category2")
- def test_interleave(self):
- # self
- for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]:
- mgr = create_mgr(f"a: {dtype}")
- assert mgr.as_array().dtype == dtype
- mgr = create_mgr(f"a: {dtype}; b: {dtype}")
- assert mgr.as_array().dtype == dtype
- @pytest.mark.parametrize(
- "mgr_string, dtype",
- [
- ("a: category", "i8"),
- ("a: category; b: category", "i8"),
- ("a: category; b: category2", "object"),
- ("a: category2", "object"),
- ("a: category2; b: category2", "object"),
- ("a: f8", "f8"),
- ("a: f8; b: i8", "f8"),
- ("a: f4; b: i8", "f8"),
- ("a: f4; b: i8; d: object", "object"),
- ("a: bool; b: i8", "object"),
- ("a: complex", "complex"),
- ("a: f8; b: category", "object"),
- ("a: M8[ns]; b: category", "object"),
- ("a: M8[ns]; b: bool", "object"),
- ("a: M8[ns]; b: i8", "object"),
- ("a: m8[ns]; b: bool", "object"),
- ("a: m8[ns]; b: i8", "object"),
- ("a: M8[ns]; b: m8[ns]", "object"),
- ],
- )
- def test_interleave_dtype(self, mgr_string, dtype):
- # will be converted according the actual dtype of the underlying
- mgr = create_mgr("a: category")
- assert mgr.as_array().dtype == "i8"
- mgr = create_mgr("a: category; b: category2")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: category2")
- assert mgr.as_array().dtype == "object"
- # combinations
- mgr = create_mgr("a: f8")
- assert mgr.as_array().dtype == "f8"
- mgr = create_mgr("a: f8; b: i8")
- assert mgr.as_array().dtype == "f8"
- mgr = create_mgr("a: f4; b: i8")
- assert mgr.as_array().dtype == "f8"
- mgr = create_mgr("a: f4; b: i8; d: object")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: bool; b: i8")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: complex")
- assert mgr.as_array().dtype == "complex"
- mgr = create_mgr("a: f8; b: category")
- assert mgr.as_array().dtype == "f8"
- mgr = create_mgr("a: M8[ns]; b: category")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: M8[ns]; b: bool")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: M8[ns]; b: i8")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: m8[ns]; b: bool")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: m8[ns]; b: i8")
- assert mgr.as_array().dtype == "object"
- mgr = create_mgr("a: M8[ns]; b: m8[ns]")
- assert mgr.as_array().dtype == "object"
- def test_consolidate_ordering_issues(self, mgr):
- mgr.iset(mgr.items.get_loc("f"), np.random.randn(N))
- mgr.iset(mgr.items.get_loc("d"), np.random.randn(N))
- mgr.iset(mgr.items.get_loc("b"), np.random.randn(N))
- mgr.iset(mgr.items.get_loc("g"), np.random.randn(N))
- mgr.iset(mgr.items.get_loc("h"), np.random.randn(N))
- # we have datetime/tz blocks in mgr
- cons = mgr.consolidate()
- assert cons.nblocks == 4
- cons = mgr.consolidate().get_numeric_data()
- assert cons.nblocks == 1
- assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
- tm.assert_numpy_array_equal(
- cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp)
- )
- def test_reindex_items(self):
- # mgr is not consolidated, f8 & f8-2 blocks
- mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
- reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
- # reindex_axis does not consolidate_inplace, as that risks failing to
- # invalidate _item_cache
- assert not reindexed.is_consolidated()
- tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))
- tm.assert_almost_equal(
- mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
- )
- tm.assert_almost_equal(
- mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
- )
- tm.assert_almost_equal(
- mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
- )
- tm.assert_almost_equal(
- mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
- )
- def test_get_numeric_data(self, using_copy_on_write):
- mgr = create_mgr(
- "int: int; float: float; complex: complex;"
- "str: object; bool: bool; obj: object; dt: datetime",
- item_shape=(3,),
- )
- mgr.iset(5, np.array([1, 2, 3], dtype=np.object_))
- numeric = mgr.get_numeric_data()
- tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("float")).internal_values(),
- numeric.iget(numeric.items.get_loc("float")).internal_values(),
- )
- # Check sharing
- numeric.iset(
- numeric.items.get_loc("float"),
- np.array([100.0, 200.0, 300.0]),
- inplace=True,
- )
- if using_copy_on_write:
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("float")).internal_values(),
- np.array([1.0, 1.0, 1.0]),
- )
- else:
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("float")).internal_values(),
- np.array([100.0, 200.0, 300.0]),
- )
- numeric2 = mgr.get_numeric_data(copy=True)
- tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
- numeric2.iset(
- numeric2.items.get_loc("float"),
- np.array([1000.0, 2000.0, 3000.0]),
- inplace=True,
- )
- if using_copy_on_write:
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("float")).internal_values(),
- np.array([1.0, 1.0, 1.0]),
- )
- else:
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("float")).internal_values(),
- np.array([100.0, 200.0, 300.0]),
- )
- def test_get_bool_data(self, using_copy_on_write):
- mgr = create_mgr(
- "int: int; float: float; complex: complex;"
- "str: object; bool: bool; obj: object; dt: datetime",
- item_shape=(3,),
- )
- mgr.iset(6, np.array([True, False, True], dtype=np.object_))
- bools = mgr.get_bool_data()
- tm.assert_index_equal(bools.items, Index(["bool"]))
- tm.assert_almost_equal(
- mgr.iget(mgr.items.get_loc("bool")).internal_values(),
- bools.iget(bools.items.get_loc("bool")).internal_values(),
- )
- bools.iset(0, np.array([True, False, True]), inplace=True)
- if using_copy_on_write:
- tm.assert_numpy_array_equal(
- mgr.iget(mgr.items.get_loc("bool")).internal_values(),
- np.array([True, True, True]),
- )
- else:
- tm.assert_numpy_array_equal(
- mgr.iget(mgr.items.get_loc("bool")).internal_values(),
- np.array([True, False, True]),
- )
- # Check sharing
- bools2 = mgr.get_bool_data(copy=True)
- bools2.iset(0, np.array([False, True, False]))
- if using_copy_on_write:
- tm.assert_numpy_array_equal(
- mgr.iget(mgr.items.get_loc("bool")).internal_values(),
- np.array([True, True, True]),
- )
- else:
- tm.assert_numpy_array_equal(
- mgr.iget(mgr.items.get_loc("bool")).internal_values(),
- np.array([True, False, True]),
- )
- def test_unicode_repr_doesnt_raise(self):
- repr(create_mgr("b,\u05d0: object"))
- @pytest.mark.parametrize(
- "mgr_string", ["a,b,c: i8-1; d,e,f: i8-2", "a,a,a: i8-1; b,b,b: i8-2"]
- )
- def test_equals(self, mgr_string):
- # unique items
- bm1 = create_mgr(mgr_string)
- bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
- assert bm1.equals(bm2)
- @pytest.mark.parametrize(
- "mgr_string",
- [
- "a:i8;b:f8", # basic case
- "a:i8;b:f8;c:c8;d:b", # many types
- "a:i8;e:dt;f:td;g:string", # more types
- "a:i8;b:category;c:category2", # categories
- "c:sparse;d:sparse_na;b:f8", # sparse
- ],
- )
- def test_equals_block_order_different_dtypes(self, mgr_string):
- # GH 9330
- bm = create_mgr(mgr_string)
- block_perms = itertools.permutations(bm.blocks)
- for bm_perm in block_perms:
- bm_this = BlockManager(tuple(bm_perm), bm.axes)
- assert bm.equals(bm_this)
- assert bm_this.equals(bm)
- def test_single_mgr_ctor(self):
- mgr = create_single_mgr("f8", num_rows=5)
- assert mgr.external_values().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0]
- @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
- def test_validate_bool_args(self, value):
- bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
- msg = (
- 'For argument "inplace" expected type bool, '
- f"received type {type(value).__name__}."
- )
- with pytest.raises(ValueError, match=msg):
- bm1.replace_list([1], [2], inplace=value)
- def test_iset_split_block(self):
- bm = create_mgr("a,b,c: i8; d: f8")
- bm._iset_split_block(0, np.array([0]))
- tm.assert_numpy_array_equal(
- bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
- )
- # First indexer currently does not have a block associated with it in case
- tm.assert_numpy_array_equal(
- bm.blknos, np.array([0, 0, 0, 1], dtype="int64" if IS64 else "int32")
- )
- assert len(bm.blocks) == 2
- def test_iset_split_block_values(self):
- bm = create_mgr("a,b,c: i8; d: f8")
- bm._iset_split_block(0, np.array([0]), np.array([list(range(10))]))
- tm.assert_numpy_array_equal(
- bm.blklocs, np.array([0, 0, 1, 0], dtype="int64" if IS64 else "int32")
- )
- # First indexer currently does not have a block associated with it in case
- tm.assert_numpy_array_equal(
- bm.blknos, np.array([0, 2, 2, 1], dtype="int64" if IS64 else "int32")
- )
- assert len(bm.blocks) == 3
- def _as_array(mgr):
- if mgr.ndim == 1:
- return mgr.external_values()
- return mgr.as_array().T
- class TestIndexing:
- # Nosetests-style data-driven tests.
- #
- # This test applies different indexing routines to block managers and
- # compares the outcome to the result of same operations on np.ndarray.
- #
- # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
- # and are disabled.
- MANAGERS = [
- create_single_mgr("f8", N),
- create_single_mgr("i8", N),
- # 2-dim
- create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)),
- create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)),
- create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)),
- create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)),
- ]
- @pytest.mark.parametrize("mgr", MANAGERS)
- def test_get_slice(self, mgr):
- def assert_slice_ok(mgr, axis, slobj):
- mat = _as_array(mgr)
- # we maybe using an ndarray to test slicing and
- # might not be the full length of the axis
- if isinstance(slobj, np.ndarray):
- ax = mgr.axes[axis]
- if len(ax) and len(slobj) and len(slobj) != len(ax):
- slobj = np.concatenate(
- [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)]
- )
- if isinstance(slobj, slice):
- sliced = mgr.get_slice(slobj, axis=axis)
- elif mgr.ndim == 1 and axis == 0:
- sliced = mgr.getitem_mgr(slobj)
- else:
- # BlockManager doesn't support non-slice, SingleBlockManager
- # doesn't support axis > 0
- return
- mat_slobj = (slice(None),) * axis + (slobj,)
- tm.assert_numpy_array_equal(
- mat[mat_slobj], _as_array(sliced), check_dtype=False
- )
- tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
- assert mgr.ndim <= 2, mgr.ndim
- for ax in range(mgr.ndim):
- # slice
- assert_slice_ok(mgr, ax, slice(None))
- assert_slice_ok(mgr, ax, slice(3))
- assert_slice_ok(mgr, ax, slice(100))
- assert_slice_ok(mgr, ax, slice(1, 4))
- assert_slice_ok(mgr, ax, slice(3, 0, -2))
- if mgr.ndim < 2:
- # 2D only support slice objects
- # boolean mask
- assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_))
- assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
- assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
- if mgr.shape[ax] >= 3:
- assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
- assert_slice_ok(
- mgr, ax, np.array([True, True, False], dtype=np.bool_)
- )
- # fancy indexer
- assert_slice_ok(mgr, ax, [])
- assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
- if mgr.shape[ax] >= 3:
- assert_slice_ok(mgr, ax, [0, 1, 2])
- assert_slice_ok(mgr, ax, [-1, -2, -3])
- @pytest.mark.parametrize("mgr", MANAGERS)
- def test_take(self, mgr):
- def assert_take_ok(mgr, axis, indexer):
- mat = _as_array(mgr)
- taken = mgr.take(indexer, axis)
- tm.assert_numpy_array_equal(
- np.take(mat, indexer, axis), _as_array(taken), check_dtype=False
- )
- tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis])
- for ax in range(mgr.ndim):
- # take/fancy indexer
- assert_take_ok(mgr, ax, indexer=[])
- assert_take_ok(mgr, ax, indexer=[0, 0, 0])
- assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax])))
- if mgr.shape[ax] >= 3:
- assert_take_ok(mgr, ax, indexer=[0, 1, 2])
- assert_take_ok(mgr, ax, indexer=[-1, -2, -3])
- @pytest.mark.parametrize("mgr", MANAGERS)
- @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
- def test_reindex_axis(self, fill_value, mgr):
- def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
- mat = _as_array(mgr)
- indexer = mgr.axes[axis].get_indexer_for(new_labels)
- reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value)
- tm.assert_numpy_array_equal(
- algos.take_nd(mat, indexer, axis, fill_value=fill_value),
- _as_array(reindexed),
- check_dtype=False,
- )
- tm.assert_index_equal(reindexed.axes[axis], new_labels)
- for ax in range(mgr.ndim):
- assert_reindex_axis_is_ok(mgr, ax, Index([]), fill_value)
- assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value)
- assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value)
- assert_reindex_axis_is_ok(mgr, ax, Index(["foo", "bar", "baz"]), fill_value)
- assert_reindex_axis_is_ok(
- mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), fill_value
- )
- if mgr.shape[ax] >= 3:
- assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value)
- assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value)
- assert_reindex_axis_is_ok(
- mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value
- )
- @pytest.mark.parametrize("mgr", MANAGERS)
- @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
- def test_reindex_indexer(self, fill_value, mgr):
- def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
- mat = _as_array(mgr)
- reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value)
- reindexed = mgr.reindex_indexer(
- new_labels, indexer, axis, fill_value=fill_value
- )
- tm.assert_numpy_array_equal(
- reindexed_mat, _as_array(reindexed), check_dtype=False
- )
- tm.assert_index_equal(reindexed.axes[axis], new_labels)
- for ax in range(mgr.ndim):
- assert_reindex_indexer_is_ok(
- mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr,
- ax,
- Index(["foo"] * mgr.shape[ax]),
- np.arange(mgr.shape[ax]),
- fill_value,
- )
- assert_reindex_indexer_is_ok(
- mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value
- )
- assert_reindex_indexer_is_ok(
- mgr,
- ax,
- Index(["foo", mgr.axes[ax][0], "baz"]),
- np.array([-1, -1, -1]),
- fill_value,
- )
- if mgr.shape[ax] >= 3:
- assert_reindex_indexer_is_ok(
- mgr,
- ax,
- Index(["foo", "bar", "baz"]),
- np.array([0, 1, 2]),
- fill_value,
- )
- class TestBlockPlacement:
- @pytest.mark.parametrize(
- "slc, expected",
- [
- (slice(0, 4), 4),
- (slice(0, 4, 2), 2),
- (slice(0, 3, 2), 2),
- (slice(0, 1, 2), 1),
- (slice(1, 0, -1), 1),
- ],
- )
- def test_slice_len(self, slc, expected):
- assert len(BlockPlacement(slc)) == expected
- @pytest.mark.parametrize("slc", [slice(1, 1, 0), slice(1, 2, 0)])
- def test_zero_step_raises(self, slc):
- msg = "slice step cannot be zero"
- with pytest.raises(ValueError, match=msg):
- BlockPlacement(slc)
- def test_slice_canonize_negative_stop(self):
- # GH#37524 negative stop is OK with negative step and positive start
- slc = slice(3, -1, -2)
- bp = BlockPlacement(slc)
- assert bp.indexer == slice(3, None, -2)
- @pytest.mark.parametrize(
- "slc",
- [
- slice(None, None),
- slice(10, None),
- slice(None, None, -1),
- slice(None, 10, -1),
- # These are "unbounded" because negative index will
- # change depending on container shape.
- slice(-1, None),
- slice(None, -1),
- slice(-1, -1),
- slice(-1, None, -1),
- slice(None, -1, -1),
- slice(-1, -1, -1),
- ],
- )
- def test_unbounded_slice_raises(self, slc):
- msg = "unbounded slice"
- with pytest.raises(ValueError, match=msg):
- BlockPlacement(slc)
- @pytest.mark.parametrize(
- "slc",
- [
- slice(0, 0),
- slice(100, 0),
- slice(100, 100),
- slice(100, 100, -1),
- slice(0, 100, -1),
- ],
- )
- def test_not_slice_like_slices(self, slc):
- assert not BlockPlacement(slc).is_slice_like
- @pytest.mark.parametrize(
- "arr, slc",
- [
- ([0], slice(0, 1, 1)),
- ([100], slice(100, 101, 1)),
- ([0, 1, 2], slice(0, 3, 1)),
- ([0, 5, 10], slice(0, 15, 5)),
- ([0, 100], slice(0, 200, 100)),
- ([2, 1], slice(2, 0, -1)),
- ],
- )
- def test_array_to_slice_conversion(self, arr, slc):
- assert BlockPlacement(arr).as_slice == slc
- @pytest.mark.parametrize(
- "arr",
- [
- [],
- [-1],
- [-1, -2, -3],
- [-10],
- [-1],
- [-1, 0, 1, 2],
- [-2, 0, 2, 4],
- [1, 0, -1],
- [1, 1, 1],
- ],
- )
- def test_not_slice_like_arrays(self, arr):
- assert not BlockPlacement(arr).is_slice_like
- @pytest.mark.parametrize(
- "slc, expected",
- [(slice(0, 3), [0, 1, 2]), (slice(0, 0), []), (slice(3, 0), [])],
- )
- def test_slice_iter(self, slc, expected):
- assert list(BlockPlacement(slc)) == expected
- @pytest.mark.parametrize(
- "slc, arr",
- [
- (slice(0, 3), [0, 1, 2]),
- (slice(0, 0), []),
- (slice(3, 0), []),
- (slice(3, 0, -1), [3, 2, 1]),
- ],
- )
- def test_slice_to_array_conversion(self, slc, arr):
- tm.assert_numpy_array_equal(
- BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp)
- )
- def test_blockplacement_add(self):
- bpl = BlockPlacement(slice(0, 5))
- assert bpl.add(1).as_slice == slice(1, 6, 1)
- assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2)
- assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5]
- @pytest.mark.parametrize(
- "val, inc, expected",
- [
- (slice(0, 0), 0, []),
- (slice(1, 4), 0, [1, 2, 3]),
- (slice(3, 0, -1), 0, [3, 2, 1]),
- ([1, 2, 4], 0, [1, 2, 4]),
- (slice(0, 0), 10, []),
- (slice(1, 4), 10, [11, 12, 13]),
- (slice(3, 0, -1), 10, [13, 12, 11]),
- ([1, 2, 4], 10, [11, 12, 14]),
- (slice(0, 0), -1, []),
- (slice(1, 4), -1, [0, 1, 2]),
- ([1, 2, 4], -1, [0, 1, 3]),
- ],
- )
- def test_blockplacement_add_int(self, val, inc, expected):
- assert list(BlockPlacement(val).add(inc)) == expected
- @pytest.mark.parametrize("val", [slice(1, 4), [1, 2, 4]])
- def test_blockplacement_add_int_raises(self, val):
- msg = "iadd causes length change"
- with pytest.raises(ValueError, match=msg):
- BlockPlacement(val).add(-10)
- class TestCanHoldElement:
- @pytest.fixture(
- params=[
- lambda x: x,
- lambda x: x.to_series(),
- lambda x: x._data,
- lambda x: list(x),
- lambda x: x.astype(object),
- lambda x: np.asarray(x),
- lambda x: x[0],
- lambda x: x[:0],
- ]
- )
- def element(self, request):
- """
- Functions that take an Index and return an element that should have
- blk._can_hold_element(element) for a Block with this index's dtype.
- """
- return request.param
- def test_datetime_block_can_hold_element(self):
- block = create_block("datetime", [0])
- assert block._can_hold_element([])
- # We will check that block._can_hold_element iff arr.__setitem__ works
- arr = pd.array(block.values.ravel())
- # coerce None
- assert block._can_hold_element(None)
- arr[0] = None
- assert arr[0] is pd.NaT
- # coerce different types of datetime objects
- vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
- for val in vals:
- assert block._can_hold_element(val)
- arr[0] = val
- val = date(2010, 10, 10)
- assert not block._can_hold_element(val)
- msg = (
- "value should be a 'Timestamp', 'NaT', "
- "or array of those. Got 'date' instead."
- )
- with pytest.raises(TypeError, match=msg):
- arr[0] = val
- @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
- def test_interval_can_hold_element_emptylist(self, dtype, element):
- arr = np.array([1, 3, 4], dtype=dtype)
- ii = IntervalIndex.from_breaks(arr)
- blk = new_block(ii._data, [1], ndim=2)
- assert blk._can_hold_element([])
- # TODO: check this holds for all blocks
- @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
- def test_interval_can_hold_element(self, dtype, element):
- arr = np.array([1, 3, 4, 9], dtype=dtype)
- ii = IntervalIndex.from_breaks(arr)
- blk = new_block(ii._data, [1], ndim=2)
- elem = element(ii)
- self.check_series_setitem(elem, ii, True)
- assert blk._can_hold_element(elem)
- # Careful: to get the expected Series-inplace behavior we need
- # `elem` to not have the same length as `arr`
- ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither")
- elem = element(ii2)
- self.check_series_setitem(elem, ii, False)
- assert not blk._can_hold_element(elem)
- ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)])
- elem = element(ii3)
- self.check_series_setitem(elem, ii, False)
- assert not blk._can_hold_element(elem)
- ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)])
- elem = element(ii4)
- self.check_series_setitem(elem, ii, False)
- assert not blk._can_hold_element(elem)
- def test_period_can_hold_element_emptylist(self):
- pi = period_range("2016", periods=3, freq="A")
- blk = new_block(pi._data.reshape(1, 3), [1], ndim=2)
- assert blk._can_hold_element([])
- def test_period_can_hold_element(self, element):
- pi = period_range("2016", periods=3, freq="A")
- elem = element(pi)
- self.check_series_setitem(elem, pi, True)
- # Careful: to get the expected Series-inplace behavior we need
- # `elem` to not have the same length as `arr`
- pi2 = pi.asfreq("D")[:-1]
- elem = element(pi2)
- self.check_series_setitem(elem, pi, False)
- dti = pi.to_timestamp("S")[:-1]
- elem = element(dti)
- self.check_series_setitem(elem, pi, False)
- def check_can_hold_element(self, obj, elem, inplace: bool):
- blk = obj._mgr.blocks[0]
- if inplace:
- assert blk._can_hold_element(elem)
- else:
- assert not blk._can_hold_element(elem)
- def check_series_setitem(self, elem, index: Index, inplace: bool):
- arr = index._data.copy()
- ser = Series(arr, copy=False)
- self.check_can_hold_element(ser, elem, inplace)
- if is_scalar(elem):
- ser[0] = elem
- else:
- ser[: len(elem)] = elem
- if inplace:
- assert ser.array is arr # i.e. setting was done inplace
- else:
- assert ser.dtype == object
- class TestShouldStore:
- def test_should_store_categorical(self):
- cat = Categorical(["A", "B", "C"])
- df = DataFrame(cat)
- blk = df._mgr.blocks[0]
- # matching dtype
- assert blk.should_store(cat)
- assert blk.should_store(cat[:-1])
- # different dtype
- assert not blk.should_store(cat.as_ordered())
- # ndarray instead of Categorical
- assert not blk.should_store(np.asarray(cat))
- def test_validate_ndim(block_maker):
- values = np.array([1.0, 2.0])
- placement = slice(2)
- msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
- with pytest.raises(ValueError, match=msg):
- block_maker(values, placement, ndim=2)
- def test_block_shape():
- idx = Index([0, 1, 2, 3, 4])
- a = Series([1, 2, 3]).reindex(idx)
- b = Series(Categorical([1, 2, 3])).reindex(idx)
- assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer
- def test_make_block_no_pandas_array(block_maker):
- # https://github.com/pandas-dev/pandas/pull/24866
- arr = pd.arrays.PandasArray(np.array([1, 2]))
- # PandasArray, no dtype
- result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
- assert result.dtype.kind in ["i", "u"]
- if block_maker is make_block:
- # new_block requires caller to unwrap PandasArray
- assert result.is_extension is False
- # PandasArray, PandasDtype
- result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
- assert result.dtype.kind in ["i", "u"]
- assert result.is_extension is False
- # new_block no longer taked dtype keyword
- # ndarray, PandasDtype
- result = block_maker(
- arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim
- )
- assert result.dtype.kind in ["i", "u"]
- assert result.is_extension is False
|