test_case_justify.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. from datetime import datetime
  2. import operator
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. Series,
  7. _testing as tm,
  8. )
  9. def test_title(any_string_dtype):
  10. s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
  11. result = s.str.title()
  12. expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype)
  13. tm.assert_series_equal(result, expected)
  14. def test_title_mixed_object():
  15. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
  16. result = s.str.title()
  17. expected = Series(
  18. ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan]
  19. )
  20. tm.assert_almost_equal(result, expected)
  21. def test_lower_upper(any_string_dtype):
  22. s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype)
  23. result = s.str.upper()
  24. expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype)
  25. tm.assert_series_equal(result, expected)
  26. result = result.str.lower()
  27. tm.assert_series_equal(result, s)
  28. def test_lower_upper_mixed_object():
  29. s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
  30. result = s.str.upper()
  31. expected = Series(["A", np.nan, "B", np.nan, np.nan, "FOO", np.nan, np.nan, np.nan])
  32. tm.assert_series_equal(result, expected)
  33. result = s.str.lower()
  34. expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan])
  35. tm.assert_series_equal(result, expected)
  36. @pytest.mark.parametrize(
  37. "data, expected",
  38. [
  39. (
  40. ["FOO", "BAR", np.nan, "Blah", "blurg"],
  41. ["Foo", "Bar", np.nan, "Blah", "Blurg"],
  42. ),
  43. (["a", "b", "c"], ["A", "B", "C"]),
  44. (["a b", "a bc. de"], ["A b", "A bc. de"]),
  45. ],
  46. )
  47. def test_capitalize(data, expected, any_string_dtype):
  48. s = Series(data, dtype=any_string_dtype)
  49. result = s.str.capitalize()
  50. expected = Series(expected, dtype=any_string_dtype)
  51. tm.assert_series_equal(result, expected)
  52. def test_capitalize_mixed_object():
  53. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
  54. result = s.str.capitalize()
  55. expected = Series(
  56. ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan]
  57. )
  58. tm.assert_series_equal(result, expected)
  59. def test_swapcase(any_string_dtype):
  60. s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
  61. result = s.str.swapcase()
  62. expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype)
  63. tm.assert_series_equal(result, expected)
  64. def test_swapcase_mixed_object():
  65. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
  66. result = s.str.swapcase()
  67. expected = Series(
  68. ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", np.nan, np.nan, np.nan]
  69. )
  70. tm.assert_series_equal(result, expected)
  71. def test_casefold():
  72. # GH25405
  73. expected = Series(["ss", np.nan, "case", "ssd"])
  74. s = Series(["ß", np.nan, "case", "ßd"])
  75. result = s.str.casefold()
  76. tm.assert_series_equal(result, expected)
  77. def test_casemethods(any_string_dtype):
  78. values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"]
  79. s = Series(values, dtype=any_string_dtype)
  80. assert s.str.lower().tolist() == [v.lower() for v in values]
  81. assert s.str.upper().tolist() == [v.upper() for v in values]
  82. assert s.str.title().tolist() == [v.title() for v in values]
  83. assert s.str.capitalize().tolist() == [v.capitalize() for v in values]
  84. assert s.str.swapcase().tolist() == [v.swapcase() for v in values]
  85. def test_pad(any_string_dtype):
  86. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  87. result = s.str.pad(5, side="left")
  88. expected = Series(
  89. [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
  90. )
  91. tm.assert_series_equal(result, expected)
  92. result = s.str.pad(5, side="right")
  93. expected = Series(
  94. ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
  95. )
  96. tm.assert_series_equal(result, expected)
  97. result = s.str.pad(5, side="both")
  98. expected = Series(
  99. [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
  100. )
  101. tm.assert_series_equal(result, expected)
  102. def test_pad_mixed_object():
  103. s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0])
  104. result = s.str.pad(5, side="left")
  105. expected = Series(
  106. [" a", np.nan, " b", np.nan, np.nan, " ee", np.nan, np.nan, np.nan]
  107. )
  108. tm.assert_series_equal(result, expected)
  109. result = s.str.pad(5, side="right")
  110. expected = Series(
  111. ["a ", np.nan, "b ", np.nan, np.nan, "ee ", np.nan, np.nan, np.nan]
  112. )
  113. tm.assert_series_equal(result, expected)
  114. result = s.str.pad(5, side="both")
  115. expected = Series(
  116. [" a ", np.nan, " b ", np.nan, np.nan, " ee ", np.nan, np.nan, np.nan]
  117. )
  118. tm.assert_series_equal(result, expected)
  119. def test_pad_fillchar(any_string_dtype):
  120. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  121. result = s.str.pad(5, side="left", fillchar="X")
  122. expected = Series(
  123. ["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype
  124. )
  125. tm.assert_series_equal(result, expected)
  126. result = s.str.pad(5, side="right", fillchar="X")
  127. expected = Series(
  128. ["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype
  129. )
  130. tm.assert_series_equal(result, expected)
  131. result = s.str.pad(5, side="both", fillchar="X")
  132. expected = Series(
  133. ["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype
  134. )
  135. tm.assert_series_equal(result, expected)
  136. def test_pad_fillchar_bad_arg_raises(any_string_dtype):
  137. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  138. msg = "fillchar must be a character, not str"
  139. with pytest.raises(TypeError, match=msg):
  140. s.str.pad(5, fillchar="XY")
  141. msg = "fillchar must be a character, not int"
  142. with pytest.raises(TypeError, match=msg):
  143. s.str.pad(5, fillchar=5)
  144. @pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"])
  145. def test_pad_width_bad_arg_raises(method_name, any_string_dtype):
  146. # see gh-13598
  147. s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype)
  148. op = operator.methodcaller(method_name, "f")
  149. msg = "width must be of integer type, not str"
  150. with pytest.raises(TypeError, match=msg):
  151. op(s.str)
  152. def test_center_ljust_rjust(any_string_dtype):
  153. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  154. result = s.str.center(5)
  155. expected = Series(
  156. [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
  157. )
  158. tm.assert_series_equal(result, expected)
  159. result = s.str.ljust(5)
  160. expected = Series(
  161. ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
  162. )
  163. tm.assert_series_equal(result, expected)
  164. result = s.str.rjust(5)
  165. expected = Series(
  166. [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
  167. )
  168. tm.assert_series_equal(result, expected)
  169. def test_center_ljust_rjust_mixed_object():
  170. s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0])
  171. result = s.str.center(5)
  172. expected = Series(
  173. [
  174. " a ",
  175. np.nan,
  176. " b ",
  177. np.nan,
  178. np.nan,
  179. " c ",
  180. " eee ",
  181. np.nan,
  182. np.nan,
  183. np.nan,
  184. ]
  185. )
  186. tm.assert_series_equal(result, expected)
  187. result = s.str.ljust(5)
  188. expected = Series(
  189. [
  190. "a ",
  191. np.nan,
  192. "b ",
  193. np.nan,
  194. np.nan,
  195. "c ",
  196. "eee ",
  197. np.nan,
  198. np.nan,
  199. np.nan,
  200. ]
  201. )
  202. tm.assert_series_equal(result, expected)
  203. result = s.str.rjust(5)
  204. expected = Series(
  205. [
  206. " a",
  207. np.nan,
  208. " b",
  209. np.nan,
  210. np.nan,
  211. " c",
  212. " eee",
  213. np.nan,
  214. np.nan,
  215. np.nan,
  216. ]
  217. )
  218. tm.assert_series_equal(result, expected)
  219. def test_center_ljust_rjust_fillchar(any_string_dtype):
  220. s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
  221. result = s.str.center(5, fillchar="X")
  222. expected = Series(
  223. ["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
  224. )
  225. tm.assert_series_equal(result, expected)
  226. expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_)
  227. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  228. result = s.str.ljust(5, fillchar="X")
  229. expected = Series(
  230. ["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype
  231. )
  232. tm.assert_series_equal(result, expected)
  233. expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_)
  234. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  235. result = s.str.rjust(5, fillchar="X")
  236. expected = Series(
  237. ["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
  238. )
  239. tm.assert_series_equal(result, expected)
  240. expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_)
  241. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  242. def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype):
  243. s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
  244. # If fillchar is not a character, normal str raises TypeError
  245. # 'aaa'.ljust(5, 'XY')
  246. # TypeError: must be char, not str
  247. template = "fillchar must be a character, not {dtype}"
  248. with pytest.raises(TypeError, match=template.format(dtype="str")):
  249. s.str.center(5, fillchar="XY")
  250. with pytest.raises(TypeError, match=template.format(dtype="str")):
  251. s.str.ljust(5, fillchar="XY")
  252. with pytest.raises(TypeError, match=template.format(dtype="str")):
  253. s.str.rjust(5, fillchar="XY")
  254. with pytest.raises(TypeError, match=template.format(dtype="int")):
  255. s.str.center(5, fillchar=1)
  256. with pytest.raises(TypeError, match=template.format(dtype="int")):
  257. s.str.ljust(5, fillchar=1)
  258. with pytest.raises(TypeError, match=template.format(dtype="int")):
  259. s.str.rjust(5, fillchar=1)
  260. def test_zfill(any_string_dtype):
  261. s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype)
  262. result = s.str.zfill(5)
  263. expected = Series(
  264. ["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype
  265. )
  266. tm.assert_series_equal(result, expected)
  267. expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_)
  268. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  269. result = s.str.zfill(3)
  270. expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype)
  271. tm.assert_series_equal(result, expected)
  272. expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_)
  273. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  274. s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype)
  275. result = s.str.zfill(5)
  276. expected = Series(
  277. ["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype
  278. )
  279. tm.assert_series_equal(result, expected)
  280. def test_wrap(any_string_dtype):
  281. # test values are: two words less than width, two words equal to width,
  282. # two words greater than width, one word less than width, one word
  283. # equal to width, one word greater than width, multiple tokens with
  284. # trailing whitespace equal to width
  285. s = Series(
  286. [
  287. "hello world",
  288. "hello world!",
  289. "hello world!!",
  290. "abcdefabcde",
  291. "abcdefabcdef",
  292. "abcdefabcdefa",
  293. "ab ab ab ab ",
  294. "ab ab ab ab a",
  295. "\t",
  296. ],
  297. dtype=any_string_dtype,
  298. )
  299. # expected values
  300. expected = Series(
  301. [
  302. "hello world",
  303. "hello world!",
  304. "hello\nworld!!",
  305. "abcdefabcde",
  306. "abcdefabcdef",
  307. "abcdefabcdef\na",
  308. "ab ab ab ab",
  309. "ab ab ab ab\na",
  310. "",
  311. ],
  312. dtype=any_string_dtype,
  313. )
  314. result = s.str.wrap(12, break_long_words=True)
  315. tm.assert_series_equal(result, expected)
  316. def test_wrap_unicode(any_string_dtype):
  317. # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
  318. s = Series(
  319. [" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype
  320. )
  321. expected = Series(
  322. [" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype
  323. )
  324. result = s.str.wrap(6)
  325. tm.assert_series_equal(result, expected)