123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409 |
- from datetime import datetime
- import operator
- import numpy as np
- import pytest
- from pandas import (
- Series,
- _testing as tm,
- )
- def test_title(any_string_dtype):
- s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
- result = s.str.title()
- expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype)
- tm.assert_series_equal(result, expected)
- def test_title_mixed_object():
- s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
- result = s.str.title()
- expected = Series(
- ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan]
- )
- tm.assert_almost_equal(result, expected)
- def test_lower_upper(any_string_dtype):
- s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype)
- result = s.str.upper()
- expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype)
- tm.assert_series_equal(result, expected)
- result = result.str.lower()
- tm.assert_series_equal(result, s)
- def test_lower_upper_mixed_object():
- s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
- result = s.str.upper()
- expected = Series(["A", np.nan, "B", np.nan, np.nan, "FOO", np.nan, np.nan, np.nan])
- tm.assert_series_equal(result, expected)
- result = s.str.lower()
- expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan])
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "data, expected",
- [
- (
- ["FOO", "BAR", np.nan, "Blah", "blurg"],
- ["Foo", "Bar", np.nan, "Blah", "Blurg"],
- ),
- (["a", "b", "c"], ["A", "B", "C"]),
- (["a b", "a bc. de"], ["A b", "A bc. de"]),
- ],
- )
- def test_capitalize(data, expected, any_string_dtype):
- s = Series(data, dtype=any_string_dtype)
- result = s.str.capitalize()
- expected = Series(expected, dtype=any_string_dtype)
- tm.assert_series_equal(result, expected)
- def test_capitalize_mixed_object():
- s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
- result = s.str.capitalize()
- expected = Series(
- ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan]
- )
- tm.assert_series_equal(result, expected)
- def test_swapcase(any_string_dtype):
- s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
- result = s.str.swapcase()
- expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype)
- tm.assert_series_equal(result, expected)
- def test_swapcase_mixed_object():
- s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
- result = s.str.swapcase()
- expected = Series(
- ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", np.nan, np.nan, np.nan]
- )
- tm.assert_series_equal(result, expected)
- def test_casefold():
- # GH25405
- expected = Series(["ss", np.nan, "case", "ssd"])
- s = Series(["ß", np.nan, "case", "ßd"])
- result = s.str.casefold()
- tm.assert_series_equal(result, expected)
- def test_casemethods(any_string_dtype):
- values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"]
- s = Series(values, dtype=any_string_dtype)
- assert s.str.lower().tolist() == [v.lower() for v in values]
- assert s.str.upper().tolist() == [v.upper() for v in values]
- assert s.str.title().tolist() == [v.title() for v in values]
- assert s.str.capitalize().tolist() == [v.capitalize() for v in values]
- assert s.str.swapcase().tolist() == [v.swapcase() for v in values]
- def test_pad(any_string_dtype):
- s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
- result = s.str.pad(5, side="left")
- expected = Series(
- [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="right")
- expected = Series(
- ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="both")
- expected = Series(
- [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- def test_pad_mixed_object():
- s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0])
- result = s.str.pad(5, side="left")
- expected = Series(
- [" a", np.nan, " b", np.nan, np.nan, " ee", np.nan, np.nan, np.nan]
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="right")
- expected = Series(
- ["a ", np.nan, "b ", np.nan, np.nan, "ee ", np.nan, np.nan, np.nan]
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="both")
- expected = Series(
- [" a ", np.nan, " b ", np.nan, np.nan, " ee ", np.nan, np.nan, np.nan]
- )
- tm.assert_series_equal(result, expected)
- def test_pad_fillchar(any_string_dtype):
- s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
- result = s.str.pad(5, side="left", fillchar="X")
- expected = Series(
- ["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="right", fillchar="X")
- expected = Series(
- ["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.pad(5, side="both", fillchar="X")
- expected = Series(
- ["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- def test_pad_fillchar_bad_arg_raises(any_string_dtype):
- s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
- msg = "fillchar must be a character, not str"
- with pytest.raises(TypeError, match=msg):
- s.str.pad(5, fillchar="XY")
- msg = "fillchar must be a character, not int"
- with pytest.raises(TypeError, match=msg):
- s.str.pad(5, fillchar=5)
- @pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"])
- def test_pad_width_bad_arg_raises(method_name, any_string_dtype):
- # see gh-13598
- s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype)
- op = operator.methodcaller(method_name, "f")
- msg = "width must be of integer type, not str"
- with pytest.raises(TypeError, match=msg):
- op(s.str)
- def test_center_ljust_rjust(any_string_dtype):
- s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
- result = s.str.center(5)
- expected = Series(
- [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.ljust(5)
- expected = Series(
- ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- result = s.str.rjust(5)
- expected = Series(
- [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- def test_center_ljust_rjust_mixed_object():
- s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0])
- result = s.str.center(5)
- expected = Series(
- [
- " a ",
- np.nan,
- " b ",
- np.nan,
- np.nan,
- " c ",
- " eee ",
- np.nan,
- np.nan,
- np.nan,
- ]
- )
- tm.assert_series_equal(result, expected)
- result = s.str.ljust(5)
- expected = Series(
- [
- "a ",
- np.nan,
- "b ",
- np.nan,
- np.nan,
- "c ",
- "eee ",
- np.nan,
- np.nan,
- np.nan,
- ]
- )
- tm.assert_series_equal(result, expected)
- result = s.str.rjust(5)
- expected = Series(
- [
- " a",
- np.nan,
- " b",
- np.nan,
- np.nan,
- " c",
- " eee",
- np.nan,
- np.nan,
- np.nan,
- ]
- )
- tm.assert_series_equal(result, expected)
- def test_center_ljust_rjust_fillchar(any_string_dtype):
- s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
- result = s.str.center(5, fillchar="X")
- expected = Series(
- ["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_)
- tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
- result = s.str.ljust(5, fillchar="X")
- expected = Series(
- ["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_)
- tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
- result = s.str.rjust(5, fillchar="X")
- expected = Series(
- ["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_)
- tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
- def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype):
- s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
- # If fillchar is not a character, normal str raises TypeError
- # 'aaa'.ljust(5, 'XY')
- # TypeError: must be char, not str
- template = "fillchar must be a character, not {dtype}"
- with pytest.raises(TypeError, match=template.format(dtype="str")):
- s.str.center(5, fillchar="XY")
- with pytest.raises(TypeError, match=template.format(dtype="str")):
- s.str.ljust(5, fillchar="XY")
- with pytest.raises(TypeError, match=template.format(dtype="str")):
- s.str.rjust(5, fillchar="XY")
- with pytest.raises(TypeError, match=template.format(dtype="int")):
- s.str.center(5, fillchar=1)
- with pytest.raises(TypeError, match=template.format(dtype="int")):
- s.str.ljust(5, fillchar=1)
- with pytest.raises(TypeError, match=template.format(dtype="int")):
- s.str.rjust(5, fillchar=1)
- def test_zfill(any_string_dtype):
- s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype)
- result = s.str.zfill(5)
- expected = Series(
- ["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_)
- tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
- result = s.str.zfill(3)
- expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype)
- tm.assert_series_equal(result, expected)
- expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_)
- tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
- s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype)
- result = s.str.zfill(5)
- expected = Series(
- ["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype
- )
- tm.assert_series_equal(result, expected)
- def test_wrap(any_string_dtype):
- # test values are: two words less than width, two words equal to width,
- # two words greater than width, one word less than width, one word
- # equal to width, one word greater than width, multiple tokens with
- # trailing whitespace equal to width
- s = Series(
- [
- "hello world",
- "hello world!",
- "hello world!!",
- "abcdefabcde",
- "abcdefabcdef",
- "abcdefabcdefa",
- "ab ab ab ab ",
- "ab ab ab ab a",
- "\t",
- ],
- dtype=any_string_dtype,
- )
- # expected values
- expected = Series(
- [
- "hello world",
- "hello world!",
- "hello\nworld!!",
- "abcdefabcde",
- "abcdefabcdef",
- "abcdefabcdef\na",
- "ab ab ab ab",
- "ab ab ab ab\na",
- "",
- ],
- dtype=any_string_dtype,
- )
- result = s.str.wrap(12, break_long_words=True)
- tm.assert_series_equal(result, expected)
- def test_wrap_unicode(any_string_dtype):
- # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
- s = Series(
- [" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype
- )
- expected = Series(
- [" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype
- )
- result = s.str.wrap(6)
- tm.assert_series_equal(result, expected)
|