test_upcast.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.parsers import ( # type: ignore[attr-defined]
  4. _maybe_upcast,
  5. na_values,
  6. )
  7. import pandas.util._test_decorators as td
  8. import pandas as pd
  9. from pandas import NA
  10. import pandas._testing as tm
  11. from pandas.core.arrays import (
  12. ArrowStringArray,
  13. BooleanArray,
  14. FloatingArray,
  15. IntegerArray,
  16. StringArray,
  17. )
  18. def test_maybe_upcast(any_real_numpy_dtype):
  19. # GH#36712
  20. dtype = np.dtype(any_real_numpy_dtype)
  21. na_value = na_values[dtype]
  22. arr = np.array([1, 2, na_value], dtype=dtype)
  23. result = _maybe_upcast(arr, use_dtype_backend=True)
  24. expected_mask = np.array([False, False, True])
  25. if issubclass(dtype.type, np.integer):
  26. expected = IntegerArray(arr, mask=expected_mask)
  27. else:
  28. expected = FloatingArray(arr, mask=expected_mask)
  29. tm.assert_extension_array_equal(result, expected)
  30. def test_maybe_upcast_no_na(any_real_numpy_dtype):
  31. # GH#36712
  32. if any_real_numpy_dtype == "float32":
  33. pytest.skip()
  34. arr = np.array([1, 2, 3], dtype=any_real_numpy_dtype)
  35. result = _maybe_upcast(arr, use_dtype_backend=True)
  36. expected_mask = np.array([False, False, False])
  37. if issubclass(np.dtype(any_real_numpy_dtype).type, np.integer):
  38. expected = IntegerArray(arr, mask=expected_mask)
  39. else:
  40. expected = FloatingArray(arr, mask=expected_mask)
  41. tm.assert_extension_array_equal(result, expected)
  42. def test_maybe_upcaste_bool():
  43. # GH#36712
  44. dtype = np.bool_
  45. na_value = na_values[dtype]
  46. arr = np.array([True, False, na_value], dtype="uint8").view(dtype)
  47. result = _maybe_upcast(arr, use_dtype_backend=True)
  48. expected_mask = np.array([False, False, True])
  49. expected = BooleanArray(arr, mask=expected_mask)
  50. tm.assert_extension_array_equal(result, expected)
  51. def test_maybe_upcaste_bool_no_nan():
  52. # GH#36712
  53. dtype = np.bool_
  54. arr = np.array([True, False, False], dtype="uint8").view(dtype)
  55. result = _maybe_upcast(arr, use_dtype_backend=True)
  56. expected_mask = np.array([False, False, False])
  57. expected = BooleanArray(arr, mask=expected_mask)
  58. tm.assert_extension_array_equal(result, expected)
  59. def test_maybe_upcaste_all_nan():
  60. # GH#36712
  61. dtype = np.int64
  62. na_value = na_values[dtype]
  63. arr = np.array([na_value, na_value], dtype=dtype)
  64. result = _maybe_upcast(arr, use_dtype_backend=True)
  65. expected_mask = np.array([True, True])
  66. expected = IntegerArray(arr, mask=expected_mask)
  67. tm.assert_extension_array_equal(result, expected)
  68. @td.skip_if_no("pyarrow")
  69. @pytest.mark.parametrize("val", [na_values[np.object_], "c"])
  70. def test_maybe_upcast_object(val, string_storage):
  71. # GH#36712
  72. import pyarrow as pa
  73. with pd.option_context("mode.string_storage", string_storage):
  74. arr = np.array(["a", "b", val], dtype=np.object_)
  75. result = _maybe_upcast(arr, use_dtype_backend=True)
  76. if string_storage == "python":
  77. exp_val = "c" if val == "c" else NA
  78. expected = StringArray(np.array(["a", "b", exp_val], dtype=np.object_))
  79. else:
  80. exp_val = "c" if val == "c" else None
  81. expected = ArrowStringArray(pa.array(["a", "b", exp_val]))
  82. tm.assert_extension_array_equal(result, expected)