test_constructors.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. Categorical,
  5. CategoricalDtype,
  6. CategoricalIndex,
  7. Index,
  8. )
  9. import pandas._testing as tm
  10. class TestCategoricalIndexConstructors:
  11. def test_construction_disallows_scalar(self):
  12. msg = "must be called with a collection of some kind"
  13. with pytest.raises(TypeError, match=msg):
  14. CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
  15. with pytest.raises(TypeError, match=msg):
  16. CategoricalIndex(categories=list("abcd"), ordered=False)
  17. def test_construction(self):
  18. ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
  19. categories = ci.categories
  20. result = Index(ci)
  21. tm.assert_index_equal(result, ci, exact=True)
  22. assert not result.ordered
  23. result = Index(ci.values)
  24. tm.assert_index_equal(result, ci, exact=True)
  25. assert not result.ordered
  26. # empty
  27. result = CategoricalIndex([], categories=categories)
  28. tm.assert_index_equal(result.categories, Index(categories))
  29. tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
  30. assert not result.ordered
  31. # passing categories
  32. result = CategoricalIndex(list("aabbca"), categories=categories)
  33. tm.assert_index_equal(result.categories, Index(categories))
  34. tm.assert_numpy_array_equal(
  35. result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
  36. )
  37. c = Categorical(list("aabbca"))
  38. result = CategoricalIndex(c)
  39. tm.assert_index_equal(result.categories, Index(list("abc")))
  40. tm.assert_numpy_array_equal(
  41. result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
  42. )
  43. assert not result.ordered
  44. result = CategoricalIndex(c, categories=categories)
  45. tm.assert_index_equal(result.categories, Index(categories))
  46. tm.assert_numpy_array_equal(
  47. result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
  48. )
  49. assert not result.ordered
  50. ci = CategoricalIndex(c, categories=list("abcd"))
  51. result = CategoricalIndex(ci)
  52. tm.assert_index_equal(result.categories, Index(categories))
  53. tm.assert_numpy_array_equal(
  54. result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
  55. )
  56. assert not result.ordered
  57. result = CategoricalIndex(ci, categories=list("ab"))
  58. tm.assert_index_equal(result.categories, Index(list("ab")))
  59. tm.assert_numpy_array_equal(
  60. result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
  61. )
  62. assert not result.ordered
  63. result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
  64. tm.assert_index_equal(result.categories, Index(list("ab")))
  65. tm.assert_numpy_array_equal(
  66. result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
  67. )
  68. assert result.ordered
  69. result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
  70. expected = CategoricalIndex(
  71. ci, categories=list("ab"), ordered=True, dtype="category"
  72. )
  73. tm.assert_index_equal(result, expected, exact=True)
  74. # turn me to an Index
  75. result = Index(np.array(ci))
  76. assert isinstance(result, Index)
  77. assert not isinstance(result, CategoricalIndex)
  78. def test_construction_with_dtype(self):
  79. # specify dtype
  80. ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
  81. result = Index(np.array(ci), dtype="category")
  82. tm.assert_index_equal(result, ci, exact=True)
  83. result = Index(np.array(ci).tolist(), dtype="category")
  84. tm.assert_index_equal(result, ci, exact=True)
  85. # these are generally only equal when the categories are reordered
  86. ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
  87. result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
  88. tm.assert_index_equal(result, ci, exact=True)
  89. # make sure indexes are handled
  90. idx = Index(range(3))
  91. expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
  92. result = CategoricalIndex(idx, categories=idx, ordered=True)
  93. tm.assert_index_equal(result, expected, exact=True)
  94. def test_construction_empty_with_bool_categories(self):
  95. # see GH#22702
  96. cat = CategoricalIndex([], categories=[True, False])
  97. categories = sorted(cat.categories.tolist())
  98. assert categories == [False, True]
  99. def test_construction_with_categorical_dtype(self):
  100. # construction with CategoricalDtype
  101. # GH#18109
  102. data, cats, ordered = "a a b b".split(), "c b a".split(), True
  103. dtype = CategoricalDtype(categories=cats, ordered=ordered)
  104. result = CategoricalIndex(data, dtype=dtype)
  105. expected = CategoricalIndex(data, categories=cats, ordered=ordered)
  106. tm.assert_index_equal(result, expected, exact=True)
  107. # GH#19032
  108. result = Index(data, dtype=dtype)
  109. tm.assert_index_equal(result, expected, exact=True)
  110. # error when combining categories/ordered and dtype kwargs
  111. msg = "Cannot specify `categories` or `ordered` together with `dtype`."
  112. with pytest.raises(ValueError, match=msg):
  113. CategoricalIndex(data, categories=cats, dtype=dtype)
  114. with pytest.raises(ValueError, match=msg):
  115. CategoricalIndex(data, ordered=ordered, dtype=dtype)