123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- import numpy as np
- import pytest
- from pandas._libs.parsers import ( # type: ignore[attr-defined]
- _maybe_upcast,
- na_values,
- )
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import NA
- import pandas._testing as tm
- from pandas.core.arrays import (
- ArrowStringArray,
- BooleanArray,
- FloatingArray,
- IntegerArray,
- StringArray,
- )
- def test_maybe_upcast(any_real_numpy_dtype):
- # GH#36712
- dtype = np.dtype(any_real_numpy_dtype)
- na_value = na_values[dtype]
- arr = np.array([1, 2, na_value], dtype=dtype)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- expected_mask = np.array([False, False, True])
- if issubclass(dtype.type, np.integer):
- expected = IntegerArray(arr, mask=expected_mask)
- else:
- expected = FloatingArray(arr, mask=expected_mask)
- tm.assert_extension_array_equal(result, expected)
- def test_maybe_upcast_no_na(any_real_numpy_dtype):
- # GH#36712
- if any_real_numpy_dtype == "float32":
- pytest.skip()
- arr = np.array([1, 2, 3], dtype=any_real_numpy_dtype)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- expected_mask = np.array([False, False, False])
- if issubclass(np.dtype(any_real_numpy_dtype).type, np.integer):
- expected = IntegerArray(arr, mask=expected_mask)
- else:
- expected = FloatingArray(arr, mask=expected_mask)
- tm.assert_extension_array_equal(result, expected)
- def test_maybe_upcaste_bool():
- # GH#36712
- dtype = np.bool_
- na_value = na_values[dtype]
- arr = np.array([True, False, na_value], dtype="uint8").view(dtype)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- expected_mask = np.array([False, False, True])
- expected = BooleanArray(arr, mask=expected_mask)
- tm.assert_extension_array_equal(result, expected)
- def test_maybe_upcaste_bool_no_nan():
- # GH#36712
- dtype = np.bool_
- arr = np.array([True, False, False], dtype="uint8").view(dtype)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- expected_mask = np.array([False, False, False])
- expected = BooleanArray(arr, mask=expected_mask)
- tm.assert_extension_array_equal(result, expected)
- def test_maybe_upcaste_all_nan():
- # GH#36712
- dtype = np.int64
- na_value = na_values[dtype]
- arr = np.array([na_value, na_value], dtype=dtype)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- expected_mask = np.array([True, True])
- expected = IntegerArray(arr, mask=expected_mask)
- tm.assert_extension_array_equal(result, expected)
- @td.skip_if_no("pyarrow")
- @pytest.mark.parametrize("val", [na_values[np.object_], "c"])
- def test_maybe_upcast_object(val, string_storage):
- # GH#36712
- import pyarrow as pa
- with pd.option_context("mode.string_storage", string_storage):
- arr = np.array(["a", "b", val], dtype=np.object_)
- result = _maybe_upcast(arr, use_dtype_backend=True)
- if string_storage == "python":
- exp_val = "c" if val == "c" else NA
- expected = StringArray(np.array(["a", "b", exp_val], dtype=np.object_))
- else:
- exp_val = "c" if val == "c" else None
- expected = ArrowStringArray(pa.array(["a", "b", exp_val]))
- tm.assert_extension_array_equal(result, expected)
|