123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- import numpy as np
- import pytest
- from pandas import (
- Categorical,
- CategoricalDtype,
- CategoricalIndex,
- Index,
- )
- import pandas._testing as tm
- class TestCategoricalIndexConstructors:
- def test_construction_disallows_scalar(self):
- msg = "must be called with a collection of some kind"
- with pytest.raises(TypeError, match=msg):
- CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
- with pytest.raises(TypeError, match=msg):
- CategoricalIndex(categories=list("abcd"), ordered=False)
- def test_construction(self):
- ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
- categories = ci.categories
- result = Index(ci)
- tm.assert_index_equal(result, ci, exact=True)
- assert not result.ordered
- result = Index(ci.values)
- tm.assert_index_equal(result, ci, exact=True)
- assert not result.ordered
- # empty
- result = CategoricalIndex([], categories=categories)
- tm.assert_index_equal(result.categories, Index(categories))
- tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
- assert not result.ordered
- # passing categories
- result = CategoricalIndex(list("aabbca"), categories=categories)
- tm.assert_index_equal(result.categories, Index(categories))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
- )
- c = Categorical(list("aabbca"))
- result = CategoricalIndex(c)
- tm.assert_index_equal(result.categories, Index(list("abc")))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
- )
- assert not result.ordered
- result = CategoricalIndex(c, categories=categories)
- tm.assert_index_equal(result.categories, Index(categories))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
- )
- assert not result.ordered
- ci = CategoricalIndex(c, categories=list("abcd"))
- result = CategoricalIndex(ci)
- tm.assert_index_equal(result.categories, Index(categories))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
- )
- assert not result.ordered
- result = CategoricalIndex(ci, categories=list("ab"))
- tm.assert_index_equal(result.categories, Index(list("ab")))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
- )
- assert not result.ordered
- result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
- tm.assert_index_equal(result.categories, Index(list("ab")))
- tm.assert_numpy_array_equal(
- result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
- )
- assert result.ordered
- result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
- expected = CategoricalIndex(
- ci, categories=list("ab"), ordered=True, dtype="category"
- )
- tm.assert_index_equal(result, expected, exact=True)
- # turn me to an Index
- result = Index(np.array(ci))
- assert isinstance(result, Index)
- assert not isinstance(result, CategoricalIndex)
- def test_construction_with_dtype(self):
- # specify dtype
- ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
- result = Index(np.array(ci), dtype="category")
- tm.assert_index_equal(result, ci, exact=True)
- result = Index(np.array(ci).tolist(), dtype="category")
- tm.assert_index_equal(result, ci, exact=True)
- # these are generally only equal when the categories are reordered
- ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
- result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
- tm.assert_index_equal(result, ci, exact=True)
- # make sure indexes are handled
- idx = Index(range(3))
- expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
- result = CategoricalIndex(idx, categories=idx, ordered=True)
- tm.assert_index_equal(result, expected, exact=True)
- def test_construction_empty_with_bool_categories(self):
- # see GH#22702
- cat = CategoricalIndex([], categories=[True, False])
- categories = sorted(cat.categories.tolist())
- assert categories == [False, True]
- def test_construction_with_categorical_dtype(self):
- # construction with CategoricalDtype
- # GH#18109
- data, cats, ordered = "a a b b".split(), "c b a".split(), True
- dtype = CategoricalDtype(categories=cats, ordered=ordered)
- result = CategoricalIndex(data, dtype=dtype)
- expected = CategoricalIndex(data, categories=cats, ordered=ordered)
- tm.assert_index_equal(result, expected, exact=True)
- # GH#19032
- result = Index(data, dtype=dtype)
- tm.assert_index_equal(result, expected, exact=True)
- # error when combining categories/ordered and dtype kwargs
- msg = "Cannot specify `categories` or `ordered` together with `dtype`."
- with pytest.raises(ValueError, match=msg):
- CategoricalIndex(data, categories=cats, dtype=dtype)
- with pytest.raises(ValueError, match=msg):
- CategoricalIndex(data, ordered=ordered, dtype=dtype)
|