test_constructors.py 77 KB


  1. from collections import OrderedDict
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. from typing import Iterator
  7. from dateutil.tz import tzoffset
  8. import numpy as np
  9. from numpy import ma
  10. import pytest
  11. from pandas._libs import (
  12. iNaT,
  13. lib,
  14. )
  15. from pandas.errors import IntCastingNaNError
  16. import pandas.util._test_decorators as td
  17. from pandas.core.dtypes.common import (
  18. is_categorical_dtype,
  19. is_datetime64tz_dtype,
  20. )
  21. from pandas.core.dtypes.dtypes import CategoricalDtype
  22. import pandas as pd
  23. from pandas import (
  24. Categorical,
  25. DataFrame,
  26. DatetimeIndex,
  27. Index,
  28. Interval,
  29. IntervalIndex,
  30. MultiIndex,
  31. NaT,
  32. Period,
  33. RangeIndex,
  34. Series,
  35. Timestamp,
  36. date_range,
  37. isna,
  38. period_range,
  39. timedelta_range,
  40. )
  41. import pandas._testing as tm
  42. from pandas.core.arrays import (
  43. IntegerArray,
  44. IntervalArray,
  45. period_array,
  46. )
  47. from pandas.core.internals.blocks import NumericBlock
  48. class TestSeriesConstructors:
  49. def test_from_ints_with_non_nano_dt64_dtype(self, index_or_series):
  50. values = np.arange(10)
  51. res = index_or_series(values, dtype="M8[s]")
  52. expected = index_or_series(values.astype("M8[s]"))
  53. tm.assert_equal(res, expected)
  54. res = index_or_series(list(values), dtype="M8[s]")
  55. tm.assert_equal(res, expected)
  56. def test_from_na_value_and_interval_of_datetime_dtype(self):
  57. # GH#41805
  58. ser = Series([None], dtype="interval[datetime64[ns]]")
  59. assert ser.isna().all()
  60. assert ser.dtype == "interval[datetime64[ns], right]"
  61. def test_infer_with_date_and_datetime(self):
  62. # GH#49341 pre-2.0 we inferred datetime-and-date to datetime64, which
  63. # was inconsistent with Index behavior
  64. ts = Timestamp(2016, 1, 1)
  65. vals = [ts.to_pydatetime(), ts.date()]
  66. ser = Series(vals)
  67. expected = Series(vals, dtype=object)
  68. tm.assert_series_equal(ser, expected)
  69. idx = Index(vals)
  70. expected = Index(vals, dtype=object)
  71. tm.assert_index_equal(idx, expected)
  72. def test_unparseable_strings_with_dt64_dtype(self):
  73. # pre-2.0 these would be silently ignored and come back with object dtype
  74. vals = ["aa"]
  75. msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
  76. with pytest.raises(ValueError, match=msg):
  77. Series(vals, dtype="datetime64[ns]")
  78. with pytest.raises(ValueError, match=msg):
  79. Series(np.array(vals, dtype=object), dtype="datetime64[ns]")
  80. @pytest.mark.parametrize(
  81. "constructor",
  82. [
  83. # NOTE: some overlap with test_constructor_empty but that test does not
  84. # test for None or an empty generator.
  85. # test_constructor_pass_none tests None but only with the index also
  86. # passed.
  87. (lambda idx: Series(index=idx)),
  88. (lambda idx: Series(None, index=idx)),
  89. (lambda idx: Series({}, index=idx)),
  90. (lambda idx: Series((), index=idx)),
  91. (lambda idx: Series([], index=idx)),
  92. (lambda idx: Series((_ for _ in []), index=idx)),
  93. (lambda idx: Series(data=None, index=idx)),
  94. (lambda idx: Series(data={}, index=idx)),
  95. (lambda idx: Series(data=(), index=idx)),
  96. (lambda idx: Series(data=[], index=idx)),
  97. (lambda idx: Series(data=(_ for _ in []), index=idx)),
  98. ],
  99. )
  100. @pytest.mark.parametrize("empty_index", [None, []])
  101. def test_empty_constructor(self, constructor, empty_index):
  102. # GH 49573 (addition of empty_index parameter)
  103. expected = Series(index=empty_index)
  104. result = constructor(empty_index)
  105. assert result.dtype == object
  106. assert len(result.index) == 0
  107. tm.assert_series_equal(result, expected, check_index_type=True)
  108. def test_invalid_dtype(self):
  109. # GH15520
  110. msg = "not understood"
  111. invalid_list = [Timestamp, "Timestamp", list]
  112. for dtype in invalid_list:
  113. with pytest.raises(TypeError, match=msg):
  114. Series([], name="time", dtype=dtype)
  115. def test_invalid_compound_dtype(self):
  116. # GH#13296
  117. c_dtype = np.dtype([("a", "i8"), ("b", "f4")])
  118. cdt_arr = np.array([(1, 0.4), (256, -13)], dtype=c_dtype)
  119. with pytest.raises(ValueError, match="Use DataFrame instead"):
  120. Series(cdt_arr, index=["A", "B"])
  121. def test_scalar_conversion(self):
  122. # Pass in scalar is disabled
  123. scalar = Series(0.5)
  124. assert not isinstance(scalar, float)
  125. def test_scalar_extension_dtype(self, ea_scalar_and_dtype):
  126. # GH 28401
  127. ea_scalar, ea_dtype = ea_scalar_and_dtype
  128. ser = Series(ea_scalar, index=range(3))
  129. expected = Series([ea_scalar] * 3, dtype=ea_dtype)
  130. assert ser.dtype == ea_dtype
  131. tm.assert_series_equal(ser, expected)
  132. def test_constructor(self, datetime_series):
  133. empty_series = Series()
  134. assert datetime_series.index._is_all_dates
  135. # Pass in Series
  136. derived = Series(datetime_series)
  137. assert derived.index._is_all_dates
  138. assert tm.equalContents(derived.index, datetime_series.index)
  139. # Ensure new index is not created
  140. assert id(datetime_series.index) == id(derived.index)
  141. # Mixed type Series
  142. mixed = Series(["hello", np.NaN], index=[0, 1])
  143. assert mixed.dtype == np.object_
  144. assert np.isnan(mixed[1])
  145. assert not empty_series.index._is_all_dates
  146. assert not Series().index._is_all_dates
  147. # exception raised is of type ValueError GH35744
  148. with pytest.raises(
  149. ValueError,
  150. match=r"Data must be 1-dimensional, got ndarray of shape \(3, 3\) instead",
  151. ):
  152. Series(np.random.randn(3, 3), index=np.arange(3))
  153. mixed.name = "Series"
  154. rs = Series(mixed).name
  155. xp = "Series"
  156. assert rs == xp
  157. # raise on MultiIndex GH4187
  158. m = MultiIndex.from_arrays([[1, 2], [3, 4]])
  159. msg = "initializing a Series from a MultiIndex is not supported"
  160. with pytest.raises(NotImplementedError, match=msg):
  161. Series(m)
  162. def test_constructor_index_ndim_gt_1_raises(self):
  163. # GH#18579
  164. df = DataFrame([[1, 2], [3, 4], [5, 6]], index=[3, 6, 9])
  165. with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
  166. Series([1, 3, 2], index=df)
  167. @pytest.mark.parametrize("input_class", [list, dict, OrderedDict])
  168. def test_constructor_empty(self, input_class):
  169. empty = Series()
  170. empty2 = Series(input_class())
  171. # these are Index() and RangeIndex() which don't compare type equal
  172. # but are just .equals
  173. tm.assert_series_equal(empty, empty2, check_index_type=False)
  174. # With explicit dtype:
  175. empty = Series(dtype="float64")
  176. empty2 = Series(input_class(), dtype="float64")
  177. tm.assert_series_equal(empty, empty2, check_index_type=False)
  178. # GH 18515 : with dtype=category:
  179. empty = Series(dtype="category")
  180. empty2 = Series(input_class(), dtype="category")
  181. tm.assert_series_equal(empty, empty2, check_index_type=False)
  182. if input_class is not list:
  183. # With index:
  184. empty = Series(index=range(10))
  185. empty2 = Series(input_class(), index=range(10))
  186. tm.assert_series_equal(empty, empty2)
  187. # With index and dtype float64:
  188. empty = Series(np.nan, index=range(10))
  189. empty2 = Series(input_class(), index=range(10), dtype="float64")
  190. tm.assert_series_equal(empty, empty2)
  191. # GH 19853 : with empty string, index and dtype str
  192. empty = Series("", dtype=str, index=range(3))
  193. empty2 = Series("", index=range(3))
  194. tm.assert_series_equal(empty, empty2)
  195. @pytest.mark.parametrize("input_arg", [np.nan, float("nan")])
  196. def test_constructor_nan(self, input_arg):
  197. empty = Series(dtype="float64", index=range(10))
  198. empty2 = Series(input_arg, index=range(10))
  199. tm.assert_series_equal(empty, empty2, check_index_type=False)
  200. @pytest.mark.parametrize(
  201. "dtype",
  202. ["f8", "i8", "M8[ns]", "m8[ns]", "category", "object", "datetime64[ns, UTC]"],
  203. )
  204. @pytest.mark.parametrize("index", [None, Index([])])
  205. def test_constructor_dtype_only(self, dtype, index):
  206. # GH-20865
  207. result = Series(dtype=dtype, index=index)
  208. assert result.dtype == dtype
  209. assert len(result) == 0
  210. def test_constructor_no_data_index_order(self):
  211. result = Series(index=["b", "a", "c"])
  212. assert result.index.tolist() == ["b", "a", "c"]
  213. def test_constructor_no_data_string_type(self):
  214. # GH 22477
  215. result = Series(index=[1], dtype=str)
  216. assert np.isnan(result.iloc[0])
  217. @pytest.mark.parametrize("item", ["entry", "ѐ", 13])
  218. def test_constructor_string_element_string_type(self, item):
  219. # GH 22477
  220. result = Series(item, index=[1], dtype=str)
  221. assert result.iloc[0] == str(item)
  222. def test_constructor_dtype_str_na_values(self, string_dtype):
  223. # https://github.com/pandas-dev/pandas/issues/21083
  224. ser = Series(["x", None], dtype=string_dtype)
  225. result = ser.isna()
  226. expected = Series([False, True])
  227. tm.assert_series_equal(result, expected)
  228. assert ser.iloc[1] is None
  229. ser = Series(["x", np.nan], dtype=string_dtype)
  230. assert np.isnan(ser.iloc[1])
  231. def test_constructor_series(self):
  232. index1 = ["d", "b", "a", "c"]
  233. index2 = sorted(index1)
  234. s1 = Series([4, 7, -5, 3], index=index1)
  235. s2 = Series(s1, index=index2)
  236. tm.assert_series_equal(s2, s1.sort_index())
  237. def test_constructor_iterable(self):
  238. # GH 21987
  239. class Iter:
  240. def __iter__(self) -> Iterator:
  241. yield from range(10)
  242. expected = Series(list(range(10)), dtype="int64")
  243. result = Series(Iter(), dtype="int64")
  244. tm.assert_series_equal(result, expected)
  245. def test_constructor_sequence(self):
  246. # GH 21987
  247. expected = Series(list(range(10)), dtype="int64")
  248. result = Series(range(10), dtype="int64")
  249. tm.assert_series_equal(result, expected)
  250. def test_constructor_single_str(self):
  251. # GH 21987
  252. expected = Series(["abc"])
  253. result = Series("abc")
  254. tm.assert_series_equal(result, expected)
  255. def test_constructor_list_like(self):
  256. # make sure that we are coercing different
  257. # list-likes to standard dtypes and not
  258. # platform specific
  259. expected = Series([1, 2, 3], dtype="int64")
  260. for obj in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype="int64")]:
  261. result = Series(obj, index=[0, 1, 2])
  262. tm.assert_series_equal(result, expected)
  263. def test_constructor_boolean_index(self):
  264. # GH#18579
  265. s1 = Series([1, 2, 3], index=[4, 5, 6])
  266. index = s1 == 2
  267. result = Series([1, 3, 2], index=index)
  268. expected = Series([1, 3, 2], index=[False, True, False])
  269. tm.assert_series_equal(result, expected)
  270. @pytest.mark.parametrize("dtype", ["bool", "int32", "int64", "float64"])
  271. def test_constructor_index_dtype(self, dtype):
  272. # GH 17088
  273. s = Series(Index([0, 2, 4]), dtype=dtype)
  274. assert s.dtype == dtype
  275. @pytest.mark.parametrize(
  276. "input_vals",
  277. [
  278. ([1, 2]),
  279. (["1", "2"]),
  280. (list(date_range("1/1/2011", periods=2, freq="H"))),
  281. (list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))),
  282. ([Interval(left=0, right=5)]),
  283. ],
  284. )
  285. def test_constructor_list_str(self, input_vals, string_dtype):
  286. # GH 16605
  287. # Ensure that data elements from a list are converted to strings
  288. # when dtype is str, 'str', or 'U'
  289. result = Series(input_vals, dtype=string_dtype)
  290. expected = Series(input_vals).astype(string_dtype)
  291. tm.assert_series_equal(result, expected)
  292. def test_constructor_list_str_na(self, string_dtype):
  293. result = Series([1.0, 2.0, np.nan], dtype=string_dtype)
  294. expected = Series(["1.0", "2.0", np.nan], dtype=object)
  295. tm.assert_series_equal(result, expected)
  296. assert np.isnan(result[2])
  297. def test_constructor_generator(self):
  298. gen = (i for i in range(10))
  299. result = Series(gen)
  300. exp = Series(range(10))
  301. tm.assert_series_equal(result, exp)
  302. # same but with non-default index
  303. gen = (i for i in range(10))
  304. result = Series(gen, index=range(10, 20))
  305. exp.index = range(10, 20)
  306. tm.assert_series_equal(result, exp)
  307. def test_constructor_map(self):
  308. # GH8909
  309. m = map(lambda x: x, range(10))
  310. result = Series(m)
  311. exp = Series(range(10))
  312. tm.assert_series_equal(result, exp)
  313. # same but with non-default index
  314. m = map(lambda x: x, range(10))
  315. result = Series(m, index=range(10, 20))
  316. exp.index = range(10, 20)
  317. tm.assert_series_equal(result, exp)
  318. def test_constructor_categorical(self):
  319. cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True)
  320. res = Series(cat)
  321. tm.assert_categorical_equal(res.values, cat)
  322. # can cast to a new dtype
  323. result = Series(Categorical([1, 2, 3]), dtype="int64")
  324. expected = Series([1, 2, 3], dtype="int64")
  325. tm.assert_series_equal(result, expected)
  326. def test_construct_from_categorical_with_dtype(self):
  327. # GH12574
  328. cat = Series(Categorical([1, 2, 3]), dtype="category")
  329. assert is_categorical_dtype(cat)
  330. assert is_categorical_dtype(cat.dtype)
  331. def test_construct_intlist_values_category_dtype(self):
  332. ser = Series([1, 2, 3], dtype="category")
  333. assert is_categorical_dtype(ser)
  334. assert is_categorical_dtype(ser.dtype)
  335. def test_constructor_categorical_with_coercion(self):
  336. factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
  337. # test basic creation / coercion of categoricals
  338. s = Series(factor, name="A")
  339. assert s.dtype == "category"
  340. assert len(s) == len(factor)
  341. str(s.values)
  342. str(s)
  343. # in a frame
  344. df = DataFrame({"A": factor})
  345. result = df["A"]
  346. tm.assert_series_equal(result, s)
  347. result = df.iloc[:, 0]
  348. tm.assert_series_equal(result, s)
  349. assert len(df) == len(factor)
  350. str(df.values)
  351. str(df)
  352. df = DataFrame({"A": s})
  353. result = df["A"]
  354. tm.assert_series_equal(result, s)
  355. assert len(df) == len(factor)
  356. str(df.values)
  357. str(df)
  358. # multiples
  359. df = DataFrame({"A": s, "B": s, "C": 1})
  360. result1 = df["A"]
  361. result2 = df["B"]
  362. tm.assert_series_equal(result1, s)
  363. tm.assert_series_equal(result2, s, check_names=False)
  364. assert result2.name == "B"
  365. assert len(df) == len(factor)
  366. str(df.values)
  367. str(df)
  368. def test_constructor_categorical_with_coercion2(self):
  369. # GH8623
  370. x = DataFrame(
  371. [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]],
  372. columns=["person_id", "person_name"],
  373. )
  374. x["person_name"] = Categorical(x.person_name) # doing this breaks transform
  375. expected = x.iloc[0].person_name
  376. result = x.person_name.iloc[0]
  377. assert result == expected
  378. result = x.person_name[0]
  379. assert result == expected
  380. result = x.person_name.loc[0]
  381. assert result == expected
  382. def test_constructor_series_to_categorical(self):
  383. # see GH#16524: test conversion of Series to Categorical
  384. series = Series(["a", "b", "c"])
  385. result = Series(series, dtype="category")
  386. expected = Series(["a", "b", "c"], dtype="category")
  387. tm.assert_series_equal(result, expected)
  388. def test_constructor_categorical_dtype(self):
  389. result = Series(
  390. ["a", "b"], dtype=CategoricalDtype(["a", "b", "c"], ordered=True)
  391. )
  392. assert is_categorical_dtype(result.dtype) is True
  393. tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
  394. assert result.cat.ordered
  395. result = Series(["a", "b"], dtype=CategoricalDtype(["b", "a"]))
  396. assert is_categorical_dtype(result.dtype)
  397. tm.assert_index_equal(result.cat.categories, Index(["b", "a"]))
  398. assert result.cat.ordered is False
  399. # GH 19565 - Check broadcasting of scalar with Categorical dtype
  400. result = Series(
  401. "a", index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)
  402. )
  403. expected = Series(
  404. ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)
  405. )
  406. tm.assert_series_equal(result, expected)
  407. def test_constructor_categorical_string(self):
  408. # GH 26336: the string 'category' maintains existing CategoricalDtype
  409. cdt = CategoricalDtype(categories=list("dabc"), ordered=True)
  410. expected = Series(list("abcabc"), dtype=cdt)
  411. # Series(Categorical, dtype='category') keeps existing dtype
  412. cat = Categorical(list("abcabc"), dtype=cdt)
  413. result = Series(cat, dtype="category")
  414. tm.assert_series_equal(result, expected)
  415. # Series(Series[Categorical], dtype='category') keeps existing dtype
  416. result = Series(result, dtype="category")
  417. tm.assert_series_equal(result, expected)
  418. def test_categorical_sideeffects_free(self):
  419. # Passing a categorical to a Series and then changing values in either
  420. # the series or the categorical should not change the values in the
  421. # other one, IF you specify copy!
  422. cat = Categorical(["a", "b", "c", "a"])
  423. s = Series(cat, copy=True)
  424. assert s.cat is not cat
  425. s = s.cat.rename_categories([1, 2, 3])
  426. exp_s = np.array([1, 2, 3, 1], dtype=np.int64)
  427. exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
  428. tm.assert_numpy_array_equal(s.__array__(), exp_s)
  429. tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
  430. # setting
  431. s[0] = 2
  432. exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64)
  433. tm.assert_numpy_array_equal(s.__array__(), exp_s2)
  434. tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
  435. # however, copy is False by default
  436. # so this WILL change values
  437. cat = Categorical(["a", "b", "c", "a"])
  438. s = Series(cat, copy=False)
  439. assert s.values is cat
  440. s = s.cat.rename_categories([1, 2, 3])
  441. assert s.values is not cat
  442. exp_s = np.array([1, 2, 3, 1], dtype=np.int64)
  443. tm.assert_numpy_array_equal(s.__array__(), exp_s)
  444. s[0] = 2
  445. exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64)
  446. tm.assert_numpy_array_equal(s.__array__(), exp_s2)
  447. def test_unordered_compare_equal(self):
  448. left = Series(["a", "b", "c"], dtype=CategoricalDtype(["a", "b"]))
  449. right = Series(Categorical(["a", "b", np.nan], categories=["a", "b"]))
  450. tm.assert_series_equal(left, right)
  451. def test_constructor_maskedarray(self):
  452. data = ma.masked_all((3,), dtype=float)
  453. result = Series(data)
  454. expected = Series([np.nan, np.nan, np.nan])
  455. tm.assert_series_equal(result, expected)
  456. data[0] = 0.0
  457. data[2] = 2.0
  458. index = ["a", "b", "c"]
  459. result = Series(data, index=index)
  460. expected = Series([0.0, np.nan, 2.0], index=index)
  461. tm.assert_series_equal(result, expected)
  462. data[1] = 1.0
  463. result = Series(data, index=index)
  464. expected = Series([0.0, 1.0, 2.0], index=index)
  465. tm.assert_series_equal(result, expected)
  466. data = ma.masked_all((3,), dtype=int)
  467. result = Series(data)
  468. expected = Series([np.nan, np.nan, np.nan], dtype=float)
  469. tm.assert_series_equal(result, expected)
  470. data[0] = 0
  471. data[2] = 2
  472. index = ["a", "b", "c"]
  473. result = Series(data, index=index)
  474. expected = Series([0, np.nan, 2], index=index, dtype=float)
  475. tm.assert_series_equal(result, expected)
  476. data[1] = 1
  477. result = Series(data, index=index)
  478. expected = Series([0, 1, 2], index=index, dtype=int)
  479. tm.assert_series_equal(result, expected)
  480. data = ma.masked_all((3,), dtype=bool)
  481. result = Series(data)
  482. expected = Series([np.nan, np.nan, np.nan], dtype=object)
  483. tm.assert_series_equal(result, expected)
  484. data[0] = True
  485. data[2] = False
  486. index = ["a", "b", "c"]
  487. result = Series(data, index=index)
  488. expected = Series([True, np.nan, False], index=index, dtype=object)
  489. tm.assert_series_equal(result, expected)
  490. data[1] = True
  491. result = Series(data, index=index)
  492. expected = Series([True, True, False], index=index, dtype=bool)
  493. tm.assert_series_equal(result, expected)
  494. data = ma.masked_all((3,), dtype="M8[ns]")
  495. result = Series(data)
  496. expected = Series([iNaT, iNaT, iNaT], dtype="M8[ns]")
  497. tm.assert_series_equal(result, expected)
  498. data[0] = datetime(2001, 1, 1)
  499. data[2] = datetime(2001, 1, 3)
  500. index = ["a", "b", "c"]
  501. result = Series(data, index=index)
  502. expected = Series(
  503. [datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)],
  504. index=index,
  505. dtype="M8[ns]",
  506. )
  507. tm.assert_series_equal(result, expected)
  508. data[1] = datetime(2001, 1, 2)
  509. result = Series(data, index=index)
  510. expected = Series(
  511. [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)],
  512. index=index,
  513. dtype="M8[ns]",
  514. )
  515. tm.assert_series_equal(result, expected)
  516. def test_constructor_maskedarray_hardened(self):
  517. # Check numpy masked arrays with hard masks -- from GH24574
  518. data = ma.masked_all((3,), dtype=float).harden_mask()
  519. result = Series(data)
  520. expected = Series([np.nan, np.nan, np.nan])
  521. tm.assert_series_equal(result, expected)
  522. def test_series_ctor_plus_datetimeindex(self):
  523. rng = date_range("20090415", "20090519", freq="B")
  524. data = {k: 1 for k in rng}
  525. result = Series(data, index=rng)
  526. assert result.index is rng
  527. def test_constructor_default_index(self):
  528. s = Series([0, 1, 2])
  529. tm.assert_index_equal(s.index, Index(range(3)), exact=True)
  530. @pytest.mark.parametrize(
  531. "input",
  532. [
  533. [1, 2, 3],
  534. (1, 2, 3),
  535. list(range(3)),
  536. Categorical(["a", "b", "a"]),
  537. (i for i in range(3)),
  538. map(lambda x: x, range(3)),
  539. ],
  540. )
  541. def test_constructor_index_mismatch(self, input):
  542. # GH 19342
  543. # test that construction of a Series with an index of different length
  544. # raises an error
  545. msg = r"Length of values \(3\) does not match length of index \(4\)"
  546. with pytest.raises(ValueError, match=msg):
  547. Series(input, index=np.arange(4))
  548. def test_constructor_numpy_scalar(self):
  549. # GH 19342
  550. # construction with a numpy scalar
  551. # should not raise
  552. result = Series(np.array(100), index=np.arange(4), dtype="int64")
  553. expected = Series(100, index=np.arange(4), dtype="int64")
  554. tm.assert_series_equal(result, expected)
  555. def test_constructor_broadcast_list(self):
  556. # GH 19342
  557. # construction with single-element container and index
  558. # should raise
  559. msg = r"Length of values \(1\) does not match length of index \(3\)"
  560. with pytest.raises(ValueError, match=msg):
  561. Series(["foo"], index=["a", "b", "c"])
  562. def test_constructor_corner(self):
  563. df = tm.makeTimeDataFrame()
  564. objs = [df, df]
  565. s = Series(objs, index=[0, 1])
  566. assert isinstance(s, Series)
  567. def test_constructor_sanitize(self):
  568. s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8")
  569. assert s.dtype == np.dtype("i8")
  570. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  571. with pytest.raises(IntCastingNaNError, match=msg):
  572. Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
  573. def test_constructor_copy(self):
  574. # GH15125
  575. # test dtype parameter has no side effects on copy=True
  576. for data in [[1.0], np.array([1.0])]:
  577. x = Series(data)
  578. y = Series(x, copy=True, dtype=float)
  579. # copy=True maintains original data in Series
  580. tm.assert_series_equal(x, y)
  581. # changes to origin of copy does not affect the copy
  582. x[0] = 2.0
  583. assert not x.equals(y)
  584. assert x[0] == 2.0
  585. assert y[0] == 1.0
  586. @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite test
  587. @pytest.mark.parametrize(
  588. "index",
  589. [
  590. date_range("20170101", periods=3, tz="US/Eastern"),
  591. date_range("20170101", periods=3),
  592. timedelta_range("1 day", periods=3),
  593. period_range("2012Q1", periods=3, freq="Q"),
  594. Index(list("abc")),
  595. Index([1, 2, 3]),
  596. RangeIndex(0, 3),
  597. ],
  598. ids=lambda x: type(x).__name__,
  599. )
  600. def test_constructor_limit_copies(self, index):
  601. # GH 17449
  602. # limit copies of input
  603. s = Series(index)
  604. # we make 1 copy; this is just a smoke test here
  605. assert s._mgr.blocks[0].values is not index
  606. def test_constructor_shallow_copy(self):
  607. # constructing a Series from Series with copy=False should still
  608. # give a "shallow" copy (share data, not attributes)
  609. # https://github.com/pandas-dev/pandas/issues/49523
  610. s = Series([1, 2, 3])
  611. s_orig = s.copy()
  612. s2 = Series(s)
  613. assert s2._mgr is not s._mgr
  614. # Overwriting index of s2 doesn't change s
  615. s2.index = ["a", "b", "c"]
  616. tm.assert_series_equal(s, s_orig)
  617. def test_constructor_pass_none(self):
  618. s = Series(None, index=range(5))
  619. assert s.dtype == np.float64
  620. s = Series(None, index=range(5), dtype=object)
  621. assert s.dtype == np.object_
  622. # GH 7431
  623. # inference on the index
  624. s = Series(index=np.array([None]))
  625. expected = Series(index=Index([None]))
  626. tm.assert_series_equal(s, expected)
  627. def test_constructor_pass_nan_nat(self):
  628. # GH 13467
  629. exp = Series([np.nan, np.nan], dtype=np.float64)
  630. assert exp.dtype == np.float64
  631. tm.assert_series_equal(Series([np.nan, np.nan]), exp)
  632. tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp)
  633. exp = Series([NaT, NaT])
  634. assert exp.dtype == "datetime64[ns]"
  635. tm.assert_series_equal(Series([NaT, NaT]), exp)
  636. tm.assert_series_equal(Series(np.array([NaT, NaT])), exp)
  637. tm.assert_series_equal(Series([NaT, np.nan]), exp)
  638. tm.assert_series_equal(Series(np.array([NaT, np.nan])), exp)
  639. tm.assert_series_equal(Series([np.nan, NaT]), exp)
  640. tm.assert_series_equal(Series(np.array([np.nan, NaT])), exp)
  641. def test_constructor_cast(self):
  642. msg = "could not convert string to float"
  643. with pytest.raises(ValueError, match=msg):
  644. Series(["a", "b", "c"], dtype=float)
  645. def test_constructor_signed_int_overflow_raises(self):
  646. # GH#41734 disallow silent overflow, enforced in 2.0
  647. msg = "Values are too large to be losslessly converted"
  648. with pytest.raises(ValueError, match=msg):
  649. Series([1, 200, 923442], dtype="int8")
  650. with pytest.raises(ValueError, match=msg):
  651. Series([1, 200, 923442], dtype="uint8")
  652. @pytest.mark.parametrize(
  653. "values",
  654. [
  655. np.array([1], dtype=np.uint16),
  656. np.array([1], dtype=np.uint32),
  657. np.array([1], dtype=np.uint64),
  658. [np.uint16(1)],
  659. [np.uint32(1)],
  660. [np.uint64(1)],
  661. ],
  662. )
  663. def test_constructor_numpy_uints(self, values):
  664. # GH#47294
  665. value = values[0]
  666. result = Series(values)
  667. assert result[0].dtype == value.dtype
  668. assert result[0] == value
  669. def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype):
  670. # see gh-15832
  671. msg = "Trying to coerce negative values to unsigned integers"
  672. with pytest.raises(OverflowError, match=msg):
  673. Series([-1], dtype=any_unsigned_int_numpy_dtype)
  674. def test_constructor_floating_data_int_dtype(self, frame_or_series):
  675. # GH#40110
  676. arr = np.random.randn(2)
  677. # Long-standing behavior (for Series, new in 2.0 for DataFrame)
  678. # has been to ignore the dtype on these;
  679. # not clear if this is what we want long-term
  680. # expected = frame_or_series(arr)
  681. # GH#49599 as of 2.0 we raise instead of silently retaining float dtype
  682. msg = "Trying to coerce float values to integer"
  683. with pytest.raises(ValueError, match=msg):
  684. frame_or_series(arr, dtype="i8")
  685. with pytest.raises(ValueError, match=msg):
  686. frame_or_series(list(arr), dtype="i8")
  687. # pre-2.0, when we had NaNs, we silently ignored the integer dtype
  688. arr[0] = np.nan
  689. # expected = frame_or_series(arr)
  690. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  691. with pytest.raises(IntCastingNaNError, match=msg):
  692. frame_or_series(arr, dtype="i8")
  693. exc = IntCastingNaNError
  694. if frame_or_series is Series:
  695. # TODO: try to align these
  696. exc = ValueError
  697. msg = "cannot convert float NaN to integer"
  698. with pytest.raises(exc, match=msg):
  699. # same behavior if we pass list instead of the ndarray
  700. frame_or_series(list(arr), dtype="i8")
  701. # float array that can be losslessly cast to integers
  702. arr = np.array([1.0, 2.0], dtype="float64")
  703. expected = frame_or_series(arr.astype("i8"))
  704. obj = frame_or_series(arr, dtype="i8")
  705. tm.assert_equal(obj, expected)
  706. obj = frame_or_series(list(arr), dtype="i8")
  707. tm.assert_equal(obj, expected)
  708. def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
  709. # see gh-15832
  710. # Updated: make sure we treat this list the same as we would treat
  711. # the equivalent ndarray
  712. # GH#49599 pre-2.0 we silently retained float dtype, in 2.0 we raise
  713. vals = [1, 2, 3.5]
  714. msg = "Trying to coerce float values to integer"
  715. with pytest.raises(ValueError, match=msg):
  716. Series(vals, dtype=any_int_numpy_dtype)
  717. with pytest.raises(ValueError, match=msg):
  718. Series(np.array(vals), dtype=any_int_numpy_dtype)
  719. def test_constructor_coerce_float_valid(self, float_numpy_dtype):
  720. s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
  721. expected = Series([1, 2, 3.5]).astype(float_numpy_dtype)
  722. tm.assert_series_equal(s, expected)
  723. def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype):
  724. # GH 22585
  725. # Updated: make sure we treat this list the same as we would treat the
  726. # equivalent ndarray
  727. vals = [1, 2, np.nan]
  728. # pre-2.0 this would return with a float dtype, in 2.0 we raise
  729. msg = "cannot convert float NaN to integer"
  730. with pytest.raises(ValueError, match=msg):
  731. Series(vals, dtype=any_int_numpy_dtype)
  732. msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
  733. with pytest.raises(IntCastingNaNError, match=msg):
  734. Series(np.array(vals), dtype=any_int_numpy_dtype)
  735. def test_constructor_dtype_no_cast(self, using_copy_on_write):
  736. # see gh-1572
  737. s = Series([1, 2, 3])
  738. s2 = Series(s, dtype=np.int64)
  739. s2[1] = 5
  740. if using_copy_on_write:
  741. assert s[1] == 2
  742. else:
  743. assert s[1] == 5
  744. def test_constructor_datelike_coercion(self):
  745. # GH 9477
  746. # incorrectly inferring on dateimelike looking when object dtype is
  747. # specified
  748. s = Series([Timestamp("20130101"), "NOV"], dtype=object)
  749. assert s.iloc[0] == Timestamp("20130101")
  750. assert s.iloc[1] == "NOV"
  751. assert s.dtype == object
  752. def test_constructor_datelike_coercion2(self):
  753. # the dtype was being reset on the slicing and re-inferred to datetime
  754. # even thought the blocks are mixed
  755. belly = "216 3T19".split()
  756. wing1 = "2T15 4H19".split()
  757. wing2 = "416 4T20".split()
  758. mat = pd.to_datetime("2016-01-22 2019-09-07".split())
  759. df = DataFrame({"wing1": wing1, "wing2": wing2, "mat": mat}, index=belly)
  760. result = df.loc["3T19"]
  761. assert result.dtype == object
  762. result = df.loc["216"]
  763. assert result.dtype == object
  764. def test_constructor_mixed_int_and_timestamp(self, frame_or_series):
  765. # specifically Timestamp with nanos, not datetimes
  766. objs = [Timestamp(9), 10, NaT._value]
  767. result = frame_or_series(objs, dtype="M8[ns]")
  768. expected = frame_or_series([Timestamp(9), Timestamp(10), NaT])
  769. tm.assert_equal(result, expected)
  770. def test_constructor_datetimes_with_nulls(self):
  771. # gh-15869
  772. for arr in [
  773. np.array([None, None, None, None, datetime.now(), None]),
  774. np.array([None, None, datetime.now(), None]),
  775. ]:
  776. result = Series(arr)
  777. assert result.dtype == "M8[ns]"
  778. def test_constructor_dtype_datetime64(self):
  779. s = Series(iNaT, dtype="M8[ns]", index=range(5))
  780. assert isna(s).all()
  781. # in theory this should be all nulls, but since
  782. # we are not specifying a dtype is ambiguous
  783. s = Series(iNaT, index=range(5))
  784. assert not isna(s).all()
  785. s = Series(np.nan, dtype="M8[ns]", index=range(5))
  786. assert isna(s).all()
  787. s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype="M8[ns]")
  788. assert isna(s[1])
  789. assert s.dtype == "M8[ns]"
  790. s = Series([datetime(2001, 1, 2, 0, 0), np.nan], dtype="M8[ns]")
  791. assert isna(s[1])
  792. assert s.dtype == "M8[ns]"
  793. def test_constructor_dtype_datetime64_10(self):
  794. # GH3416
  795. pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)]
  796. dates = [np.datetime64(x) for x in pydates]
  797. ser = Series(dates)
  798. assert ser.dtype == "M8[ns]"
  799. ser.iloc[0] = np.nan
  800. assert ser.dtype == "M8[ns]"
  801. # GH3414 related
  802. expected = Series(pydates, dtype="datetime64[ms]")
  803. result = Series(Series(dates).view(np.int64) / 1000000, dtype="M8[ms]")
  804. tm.assert_series_equal(result, expected)
  805. result = Series(dates, dtype="datetime64[ms]")
  806. tm.assert_series_equal(result, expected)
  807. expected = Series(
  808. [NaT, datetime(2013, 1, 2), datetime(2013, 1, 3)], dtype="datetime64[ns]"
  809. )
  810. result = Series([np.nan] + dates[1:], dtype="datetime64[ns]")
  811. tm.assert_series_equal(result, expected)
  812. def test_constructor_dtype_datetime64_11(self):
  813. pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)]
  814. dates = [np.datetime64(x) for x in pydates]
  815. dts = Series(dates, dtype="datetime64[ns]")
  816. # valid astype
  817. dts.astype("int64")
  818. # invalid casting
  819. msg = r"Converting from datetime64\[ns\] to int32 is not supported"
  820. with pytest.raises(TypeError, match=msg):
  821. dts.astype("int32")
  822. # ints are ok
  823. # we test with np.int64 to get similar results on
  824. # windows / 32-bit platforms
  825. result = Series(dts, dtype=np.int64)
  826. expected = Series(dts.astype(np.int64))
  827. tm.assert_series_equal(result, expected)
  828. def test_constructor_dtype_datetime64_9(self):
  829. # invalid dates can be help as object
  830. result = Series([datetime(2, 1, 1)])
  831. assert result[0] == datetime(2, 1, 1, 0, 0)
  832. result = Series([datetime(3000, 1, 1)])
  833. assert result[0] == datetime(3000, 1, 1, 0, 0)
  834. def test_constructor_dtype_datetime64_8(self):
  835. # don't mix types
  836. result = Series([Timestamp("20130101"), 1], index=["a", "b"])
  837. assert result["a"] == Timestamp("20130101")
  838. assert result["b"] == 1
  839. def test_constructor_dtype_datetime64_7(self):
  840. # GH6529
  841. # coerce datetime64 non-ns properly
  842. dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M")
  843. values2 = dates.view(np.ndarray).astype("datetime64[ns]")
  844. expected = Series(values2, index=dates)
  845. for unit in ["s", "D", "ms", "us", "ns"]:
  846. dtype = np.dtype(f"M8[{unit}]")
  847. values1 = dates.view(np.ndarray).astype(dtype)
  848. result = Series(values1, dates)
  849. if unit == "D":
  850. # for unit="D" we cast to nearest-supported reso, i.e. "s"
  851. dtype = np.dtype("M8[s]")
  852. assert result.dtype == dtype
  853. tm.assert_series_equal(result, expected.astype(dtype))
  854. # GH 13876
  855. # coerce to non-ns to object properly
  856. expected = Series(values2, index=dates, dtype=object)
  857. for dtype in ["s", "D", "ms", "us", "ns"]:
  858. values1 = dates.view(np.ndarray).astype(f"M8[{dtype}]")
  859. result = Series(values1, index=dates, dtype=object)
  860. tm.assert_series_equal(result, expected)
  861. # leave datetime.date alone
  862. dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object)
  863. series1 = Series(dates2, dates)
  864. tm.assert_numpy_array_equal(series1.values, dates2)
  865. assert series1.dtype == object
  866. def test_constructor_dtype_datetime64_6(self):
  867. # as of 2.0, these no longer infer datetime64 based on the strings,
  868. # matching the Index behavior
  869. ser = Series([None, NaT, "2013-08-05 15:30:00.000001"])
  870. assert ser.dtype == object
  871. ser = Series([np.nan, NaT, "2013-08-05 15:30:00.000001"])
  872. assert ser.dtype == object
  873. ser = Series([NaT, None, "2013-08-05 15:30:00.000001"])
  874. assert ser.dtype == object
  875. ser = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"])
  876. assert ser.dtype == object
  877. def test_constructor_dtype_datetime64_5(self):
  878. # tz-aware (UTC and other tz's)
  879. # GH 8411
  880. dr = date_range("20130101", periods=3)
  881. assert Series(dr).iloc[0].tz is None
  882. dr = date_range("20130101", periods=3, tz="UTC")
  883. assert str(Series(dr).iloc[0].tz) == "UTC"
  884. dr = date_range("20130101", periods=3, tz="US/Eastern")
  885. assert str(Series(dr).iloc[0].tz) == "US/Eastern"
  886. def test_constructor_dtype_datetime64_4(self):
  887. # non-convertible
  888. s = Series([1479596223000, -1479590, NaT])
  889. assert s.dtype == "object"
  890. assert s[2] is NaT
  891. assert "NaT" in str(s)
  892. def test_constructor_dtype_datetime64_3(self):
  893. # if we passed a NaT it remains
  894. s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT])
  895. assert s.dtype == "object"
  896. assert s[2] is NaT
  897. assert "NaT" in str(s)
  898. def test_constructor_dtype_datetime64_2(self):
  899. # if we passed a nan it remains
  900. s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
  901. assert s.dtype == "object"
  902. assert s[2] is np.nan
  903. assert "NaN" in str(s)
  904. def test_constructor_with_datetime_tz(self):
  905. # 8260
  906. # support datetime64 with tz
  907. dr = date_range("20130101", periods=3, tz="US/Eastern")
  908. s = Series(dr)
  909. assert s.dtype.name == "datetime64[ns, US/Eastern]"
  910. assert s.dtype == "datetime64[ns, US/Eastern]"
  911. assert is_datetime64tz_dtype(s.dtype)
  912. assert "datetime64[ns, US/Eastern]" in str(s)
  913. # export
  914. result = s.values
  915. assert isinstance(result, np.ndarray)
  916. assert result.dtype == "datetime64[ns]"
  917. exp = DatetimeIndex(result)
  918. exp = exp.tz_localize("UTC").tz_convert(tz=s.dt.tz)
  919. tm.assert_index_equal(dr, exp)
  920. # indexing
  921. result = s.iloc[0]
  922. assert result == Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern")
  923. result = s[0]
  924. assert result == Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern")
  925. result = s[Series([True, True, False], index=s.index)]
  926. tm.assert_series_equal(result, s[0:2])
  927. result = s.iloc[0:1]
  928. tm.assert_series_equal(result, Series(dr[0:1]))
  929. # concat
  930. result = pd.concat([s.iloc[0:1], s.iloc[1:]])
  931. tm.assert_series_equal(result, s)
  932. # short str
  933. assert "datetime64[ns, US/Eastern]" in str(s)
  934. # formatting with NaT
  935. result = s.shift()
  936. assert "datetime64[ns, US/Eastern]" in str(result)
  937. assert "NaT" in str(result)
  938. # long str
  939. t = Series(date_range("20130101", periods=1000, tz="US/Eastern"))
  940. assert "datetime64[ns, US/Eastern]" in str(t)
  941. result = DatetimeIndex(s, freq="infer")
  942. tm.assert_index_equal(result, dr)
  943. def test_constructor_with_datetime_tz4(self):
  944. # inference
  945. s = Series(
  946. [
  947. Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
  948. Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"),
  949. ]
  950. )
  951. assert s.dtype == "datetime64[ns, US/Pacific]"
  952. assert lib.infer_dtype(s, skipna=True) == "datetime64"
  953. def test_constructor_with_datetime_tz3(self):
  954. s = Series(
  955. [
  956. Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
  957. Timestamp("2013-01-02 14:00:00-0800", tz="US/Eastern"),
  958. ]
  959. )
  960. assert s.dtype == "object"
  961. assert lib.infer_dtype(s, skipna=True) == "datetime"
  962. def test_constructor_with_datetime_tz2(self):
  963. # with all NaT
  964. s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
  965. expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern"))
  966. tm.assert_series_equal(s, expected)
  967. def test_constructor_no_partial_datetime_casting(self):
  968. # GH#40111
  969. vals = [
  970. "nan",
  971. Timestamp("1990-01-01"),
  972. "2015-03-14T16:15:14.123-08:00",
  973. "2019-03-04T21:56:32.620-07:00",
  974. None,
  975. ]
  976. ser = Series(vals)
  977. assert all(ser[i] is vals[i] for i in range(len(vals)))
  978. @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
  979. @pytest.mark.parametrize("kind", ["M", "m"])
  980. @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
  981. def test_construction_to_datetimelike_unit(self, arr_dtype, kind, unit):
  982. # tests all units
  983. # gh-19223
  984. # TODO: GH#19223 was about .astype, doesn't belong here
  985. dtype = f"{kind}8[{unit}]"
  986. arr = np.array([1, 2, 3], dtype=arr_dtype)
  987. ser = Series(arr)
  988. result = ser.astype(dtype)
  989. expected = Series(arr.astype(dtype))
  990. if unit in ["ns", "us", "ms", "s"]:
  991. assert result.dtype == dtype
  992. assert expected.dtype == dtype
  993. else:
  994. # Otherwise we cast to nearest-supported unit, i.e. seconds
  995. assert result.dtype == f"{kind}8[s]"
  996. assert expected.dtype == f"{kind}8[s]"
  997. tm.assert_series_equal(result, expected)
  998. @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", NaT, np.nan, None])
  999. def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
  1000. # GH 17415: With naive string
  1001. result = Series([arg], dtype="datetime64[ns, CET]")
  1002. expected = Series(Timestamp(arg)).dt.tz_localize("CET")
  1003. tm.assert_series_equal(result, expected)
  1004. def test_constructor_datetime64_bigendian(self):
  1005. # GH#30976
  1006. ms = np.datetime64(1, "ms")
  1007. arr = np.array([np.datetime64(1, "ms")], dtype=">M8[ms]")
  1008. result = Series(arr)
  1009. expected = Series([Timestamp(ms)]).astype("M8[ms]")
  1010. assert expected.dtype == "M8[ms]"
  1011. tm.assert_series_equal(result, expected)
  1012. @pytest.mark.parametrize("interval_constructor", [IntervalIndex, IntervalArray])
  1013. def test_construction_interval(self, interval_constructor):
  1014. # construction from interval & array of intervals
  1015. intervals = interval_constructor.from_breaks(np.arange(3), closed="right")
  1016. result = Series(intervals)
  1017. assert result.dtype == "interval[int64, right]"
  1018. tm.assert_index_equal(Index(result.values), Index(intervals))
  1019. @pytest.mark.parametrize(
  1020. "data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
  1021. )
  1022. def test_constructor_infer_interval(self, data_constructor):
  1023. # GH 23563: consistent closed results in interval dtype
  1024. data = [Interval(0, 1), Interval(0, 2), None]
  1025. result = Series(data_constructor(data))
  1026. expected = Series(IntervalArray(data))
  1027. assert result.dtype == "interval[float64, right]"
  1028. tm.assert_series_equal(result, expected)
  1029. @pytest.mark.parametrize(
  1030. "data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
  1031. )
  1032. def test_constructor_interval_mixed_closed(self, data_constructor):
  1033. # GH 23563: mixed closed results in object dtype (not interval dtype)
  1034. data = [Interval(0, 1, closed="both"), Interval(0, 2, closed="neither")]
  1035. result = Series(data_constructor(data))
  1036. assert result.dtype == object
  1037. assert result.tolist() == data
  1038. def test_construction_consistency(self):
  1039. # make sure that we are not re-localizing upon construction
  1040. # GH 14928
  1041. ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
  1042. result = Series(ser, dtype=ser.dtype)
  1043. tm.assert_series_equal(result, ser)
  1044. result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype)
  1045. tm.assert_series_equal(result, ser)
  1046. # Pre-2.0 dt64 values were treated as utc, which was inconsistent
  1047. # with DatetimeIndex, which treats them as wall times, see GH#33401
  1048. result = Series(ser.values, dtype=ser.dtype)
  1049. expected = Series(ser.values).dt.tz_localize(ser.dtype.tz)
  1050. tm.assert_series_equal(result, expected)
  1051. with tm.assert_produces_warning(None):
  1052. # one suggested alternative to the deprecated (changed in 2.0) usage
  1053. middle = Series(ser.values).dt.tz_localize("UTC")
  1054. result = middle.dt.tz_convert(ser.dtype.tz)
  1055. tm.assert_series_equal(result, ser)
  1056. with tm.assert_produces_warning(None):
  1057. # the other suggested alternative to the deprecated usage
  1058. result = Series(ser.values.view("int64"), dtype=ser.dtype)
  1059. tm.assert_series_equal(result, ser)
  1060. @pytest.mark.parametrize(
  1061. "data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
  1062. )
  1063. def test_constructor_infer_period(self, data_constructor):
  1064. data = [Period("2000", "D"), Period("2001", "D"), None]
  1065. result = Series(data_constructor(data))
  1066. expected = Series(period_array(data))
  1067. tm.assert_series_equal(result, expected)
  1068. assert result.dtype == "Period[D]"
  1069. @pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
  1070. def test_construct_from_ints_including_iNaT_scalar_period_dtype(self):
  1071. series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]")
  1072. val = series[3]
  1073. assert isna(val)
  1074. series[2] = val
  1075. assert isna(series[2])
  1076. def test_constructor_period_incompatible_frequency(self):
  1077. data = [Period("2000", "D"), Period("2001", "A")]
  1078. result = Series(data)
  1079. assert result.dtype == object
  1080. assert result.tolist() == data
  1081. def test_constructor_periodindex(self):
  1082. # GH7932
  1083. # converting a PeriodIndex when put in a Series
  1084. pi = period_range("20130101", periods=5, freq="D")
  1085. s = Series(pi)
  1086. assert s.dtype == "Period[D]"
  1087. expected = Series(pi.astype(object))
  1088. tm.assert_series_equal(s, expected)
  1089. def test_constructor_dict(self):
  1090. d = {"a": 0.0, "b": 1.0, "c": 2.0}
  1091. result = Series(d)
  1092. expected = Series(d, index=sorted(d.keys()))
  1093. tm.assert_series_equal(result, expected)
  1094. result = Series(d, index=["b", "c", "d", "a"])
  1095. expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
  1096. tm.assert_series_equal(result, expected)
  1097. pidx = tm.makePeriodIndex(100)
  1098. d = {pidx[0]: 0, pidx[1]: 1}
  1099. result = Series(d, index=pidx)
  1100. expected = Series(np.nan, pidx, dtype=np.float64)
  1101. expected.iloc[0] = 0
  1102. expected.iloc[1] = 1
  1103. tm.assert_series_equal(result, expected)
  1104. def test_constructor_dict_list_value_explicit_dtype(self):
  1105. # GH 18625
  1106. d = {"a": [[2], [3], [4]]}
  1107. result = Series(d, index=["a"], dtype="object")
  1108. expected = Series(d, index=["a"])
  1109. tm.assert_series_equal(result, expected)
  1110. def test_constructor_dict_order(self):
  1111. # GH19018
  1112. # initialization ordering: by insertion order if python>= 3.6, else
  1113. # order by value
  1114. d = {"b": 1, "a": 0, "c": 2}
  1115. result = Series(d)
  1116. expected = Series([1, 0, 2], index=list("bac"))
  1117. tm.assert_series_equal(result, expected)
  1118. def test_constructor_dict_extension(self, ea_scalar_and_dtype):
  1119. ea_scalar, ea_dtype = ea_scalar_and_dtype
  1120. d = {"a": ea_scalar}
  1121. result = Series(d, index=["a"])
  1122. expected = Series(ea_scalar, index=["a"], dtype=ea_dtype)
  1123. assert result.dtype == ea_dtype
  1124. tm.assert_series_equal(result, expected)
  1125. @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")])
  1126. def test_constructor_dict_nan_key(self, value):
  1127. # GH 18480
  1128. d = {1: "a", value: "b", float("nan"): "c", 4: "d"}
  1129. result = Series(d).sort_values()
  1130. expected = Series(["a", "b", "c", "d"], index=[1, value, np.nan, 4])
  1131. tm.assert_series_equal(result, expected)
  1132. # MultiIndex:
  1133. d = {(1, 1): "a", (2, np.nan): "b", (3, value): "c"}
  1134. result = Series(d).sort_values()
  1135. expected = Series(
  1136. ["a", "b", "c"], index=Index([(1, 1), (2, np.nan), (3, value)])
  1137. )
  1138. tm.assert_series_equal(result, expected)
  1139. def test_constructor_dict_datetime64_index(self):
  1140. # GH 9456
  1141. dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"]
  1142. values = [42544017.198965244, 1234565, 40512335.181958228, -1]
  1143. def create_data(constructor):
  1144. return dict(zip((constructor(x) for x in dates_as_str), values))
  1145. data_datetime64 = create_data(np.datetime64)
  1146. data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d"))
  1147. data_Timestamp = create_data(Timestamp)
  1148. expected = Series(values, (Timestamp(x) for x in dates_as_str))
  1149. result_datetime64 = Series(data_datetime64)
  1150. result_datetime = Series(data_datetime)
  1151. result_Timestamp = Series(data_Timestamp)
  1152. tm.assert_series_equal(result_datetime64, expected)
  1153. tm.assert_series_equal(result_datetime, expected)
  1154. tm.assert_series_equal(result_Timestamp, expected)
  1155. def test_constructor_dict_tuple_indexer(self):
  1156. # GH 12948
  1157. data = {(1, 1, None): -1.0}
  1158. result = Series(data)
  1159. expected = Series(
  1160. -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]])
  1161. )
  1162. tm.assert_series_equal(result, expected)
  1163. def test_constructor_mapping(self, non_dict_mapping_subclass):
  1164. # GH 29788
  1165. ndm = non_dict_mapping_subclass({3: "three"})
  1166. result = Series(ndm)
  1167. expected = Series(["three"], index=[3])
  1168. tm.assert_series_equal(result, expected)
  1169. def test_constructor_list_of_tuples(self):
  1170. data = [(1, 1), (2, 2), (2, 3)]
  1171. s = Series(data)
  1172. assert list(s) == data
  1173. def test_constructor_tuple_of_tuples(self):
  1174. data = ((1, 1), (2, 2), (2, 3))
  1175. s = Series(data)
  1176. assert tuple(s) == data
  1177. def test_constructor_dict_of_tuples(self):
  1178. data = {(1, 2): 3, (None, 5): 6}
  1179. result = Series(data).sort_values()
  1180. expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
  1181. tm.assert_series_equal(result, expected)
  1182. # https://github.com/pandas-dev/pandas/issues/22698
  1183. @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning")
  1184. def test_fromDict(self):
  1185. data = {"a": 0, "b": 1, "c": 2, "d": 3}
  1186. series = Series(data)
  1187. tm.assert_is_sorted(series.index)
  1188. data = {"a": 0, "b": "1", "c": "2", "d": datetime.now()}
  1189. series = Series(data)
  1190. assert series.dtype == np.object_
  1191. data = {"a": 0, "b": "1", "c": "2", "d": "3"}
  1192. series = Series(data)
  1193. assert series.dtype == np.object_
  1194. data = {"a": "0", "b": "1"}
  1195. series = Series(data, dtype=float)
  1196. assert series.dtype == np.float64
  1197. def test_fromValue(self, datetime_series):
  1198. nans = Series(np.NaN, index=datetime_series.index, dtype=np.float64)
  1199. assert nans.dtype == np.float_
  1200. assert len(nans) == len(datetime_series)
  1201. strings = Series("foo", index=datetime_series.index)
  1202. assert strings.dtype == np.object_
  1203. assert len(strings) == len(datetime_series)
  1204. d = datetime.now()
  1205. dates = Series(d, index=datetime_series.index)
  1206. assert dates.dtype == "M8[ns]"
  1207. assert len(dates) == len(datetime_series)
  1208. # GH12336
  1209. # Test construction of categorical series from value
  1210. categorical = Series(0, index=datetime_series.index, dtype="category")
  1211. expected = Series(0, index=datetime_series.index).astype("category")
  1212. assert categorical.dtype == "category"
  1213. assert len(categorical) == len(datetime_series)
  1214. tm.assert_series_equal(categorical, expected)
  1215. def test_constructor_dtype_timedelta64(self):
  1216. # basic
  1217. td = Series([timedelta(days=i) for i in range(3)])
  1218. assert td.dtype == "timedelta64[ns]"
  1219. td = Series([timedelta(days=1)])
  1220. assert td.dtype == "timedelta64[ns]"
  1221. td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64(1, "s")])
  1222. assert td.dtype == "timedelta64[ns]"
  1223. # mixed with NaT
  1224. td = Series([timedelta(days=1), NaT], dtype="m8[ns]")
  1225. assert td.dtype == "timedelta64[ns]"
  1226. td = Series([timedelta(days=1), np.nan], dtype="m8[ns]")
  1227. assert td.dtype == "timedelta64[ns]"
  1228. td = Series([np.timedelta64(300000000), NaT], dtype="m8[ns]")
  1229. assert td.dtype == "timedelta64[ns]"
  1230. # improved inference
  1231. # GH5689
  1232. td = Series([np.timedelta64(300000000), NaT])
  1233. assert td.dtype == "timedelta64[ns]"
  1234. # because iNaT is int, not coerced to timedelta
  1235. td = Series([np.timedelta64(300000000), iNaT])
  1236. assert td.dtype == "object"
  1237. td = Series([np.timedelta64(300000000), np.nan])
  1238. assert td.dtype == "timedelta64[ns]"
  1239. td = Series([NaT, np.timedelta64(300000000)])
  1240. assert td.dtype == "timedelta64[ns]"
  1241. td = Series([np.timedelta64(1, "s")])
  1242. assert td.dtype == "timedelta64[ns]"
  1243. # valid astype
  1244. td.astype("int64")
  1245. # invalid casting
  1246. msg = r"Converting from timedelta64\[ns\] to int32 is not supported"
  1247. with pytest.raises(TypeError, match=msg):
  1248. td.astype("int32")
  1249. # this is an invalid casting
  1250. msg = "|".join(
  1251. [
  1252. "Could not convert object to NumPy timedelta",
  1253. "Could not convert 'foo' to NumPy timedelta",
  1254. ]
  1255. )
  1256. with pytest.raises(ValueError, match=msg):
  1257. Series([timedelta(days=1), "foo"], dtype="m8[ns]")
  1258. # leave as object here
  1259. td = Series([timedelta(days=i) for i in range(3)] + ["foo"])
  1260. assert td.dtype == "object"
  1261. # as of 2.0, these no longer infer timedelta64 based on the strings,
  1262. # matching Index behavior
  1263. ser = Series([None, NaT, "1 Day"])
  1264. assert ser.dtype == object
  1265. ser = Series([np.nan, NaT, "1 Day"])
  1266. assert ser.dtype == object
  1267. ser = Series([NaT, None, "1 Day"])
  1268. assert ser.dtype == object
  1269. ser = Series([NaT, np.nan, "1 Day"])
  1270. assert ser.dtype == object
  1271. # GH 16406
  1272. def test_constructor_mixed_tz(self):
  1273. s = Series([Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")])
  1274. expected = Series(
  1275. [Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")],
  1276. dtype="object",
  1277. )
  1278. tm.assert_series_equal(s, expected)
  1279. def test_NaT_scalar(self):
  1280. series = Series([0, 1000, 2000, iNaT], dtype="M8[ns]")
  1281. val = series[3]
  1282. assert isna(val)
  1283. series[2] = val
  1284. assert isna(series[2])
  1285. def test_NaT_cast(self):
  1286. # GH10747
  1287. result = Series([np.nan]).astype("M8[ns]")
  1288. expected = Series([NaT])
  1289. tm.assert_series_equal(result, expected)
  1290. def test_constructor_name_hashable(self):
  1291. for n in [777, 777.0, "name", datetime(2001, 11, 11), (1,), "\u05D0"]:
  1292. for data in [[1, 2, 3], np.ones(3), {"a": 0, "b": 1}]:
  1293. s = Series(data, name=n)
  1294. assert s.name == n
  1295. def test_constructor_name_unhashable(self):
  1296. msg = r"Series\.name must be a hashable type"
  1297. for n in [["name_list"], np.ones(2), {1: 2}]:
  1298. for data in [["name_list"], np.ones(2), {1: 2}]:
  1299. with pytest.raises(TypeError, match=msg):
  1300. Series(data, name=n)
  1301. def test_auto_conversion(self):
  1302. series = Series(list(date_range("1/1/2000", periods=10)))
  1303. assert series.dtype == "M8[ns]"
  1304. def test_convert_non_ns(self):
  1305. # convert from a numpy array of non-ns timedelta64
  1306. arr = np.array([1, 2, 3], dtype="timedelta64[s]")
  1307. ser = Series(arr)
  1308. assert ser.dtype == arr.dtype
  1309. tdi = timedelta_range("00:00:01", periods=3, freq="s").as_unit("s")
  1310. expected = Series(tdi)
  1311. assert expected.dtype == arr.dtype
  1312. tm.assert_series_equal(ser, expected)
  1313. # convert from a numpy array of non-ns datetime64
  1314. arr = np.array(
  1315. ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]"
  1316. )
  1317. ser = Series(arr)
  1318. expected = Series(date_range("20130101", periods=3, freq="D"), dtype="M8[s]")
  1319. assert expected.dtype == "M8[s]"
  1320. tm.assert_series_equal(ser, expected)
  1321. arr = np.array(
  1322. ["2013-01-01 00:00:01", "2013-01-01 00:00:02", "2013-01-01 00:00:03"],
  1323. dtype="datetime64[s]",
  1324. )
  1325. ser = Series(arr)
  1326. expected = Series(
  1327. date_range("20130101 00:00:01", periods=3, freq="s"), dtype="M8[s]"
  1328. )
  1329. assert expected.dtype == "M8[s]"
  1330. tm.assert_series_equal(ser, expected)
  1331. @pytest.mark.parametrize(
  1332. "index",
  1333. [
  1334. date_range("1/1/2000", periods=10),
  1335. timedelta_range("1 day", periods=10),
  1336. period_range("2000-Q1", periods=10, freq="Q"),
  1337. ],
  1338. ids=lambda x: type(x).__name__,
  1339. )
  1340. def test_constructor_cant_cast_datetimelike(self, index):
  1341. # floats are not ok
  1342. # strip Index to convert PeriodIndex -> Period
  1343. # We don't care whether the error message says
  1344. # PeriodIndex or PeriodArray
  1345. msg = f"Cannot cast {type(index).__name__.rstrip('Index')}.*? to "
  1346. with pytest.raises(TypeError, match=msg):
  1347. Series(index, dtype=float)
  1348. # ints are ok
  1349. # we test with np.int64 to get similar results on
  1350. # windows / 32-bit platforms
  1351. result = Series(index, dtype=np.int64)
  1352. expected = Series(index.astype(np.int64))
  1353. tm.assert_series_equal(result, expected)
  1354. @pytest.mark.parametrize(
  1355. "index",
  1356. [
  1357. date_range("1/1/2000", periods=10),
  1358. timedelta_range("1 day", periods=10),
  1359. period_range("2000-Q1", periods=10, freq="Q"),
  1360. ],
  1361. ids=lambda x: type(x).__name__,
  1362. )
  1363. def test_constructor_cast_object(self, index):
  1364. s = Series(index, dtype=object)
  1365. exp = Series(index).astype(object)
  1366. tm.assert_series_equal(s, exp)
  1367. s = Series(Index(index, dtype=object), dtype=object)
  1368. exp = Series(index).astype(object)
  1369. tm.assert_series_equal(s, exp)
  1370. s = Series(index.astype(object), dtype=object)
  1371. exp = Series(index).astype(object)
  1372. tm.assert_series_equal(s, exp)
  1373. @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
  1374. def test_constructor_generic_timestamp_no_frequency(self, dtype, request):
  1375. # see gh-15524, gh-15987
  1376. msg = "dtype has no unit. Please pass in"
  1377. if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
  1378. mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
  1379. request.node.add_marker(mark)
  1380. with pytest.raises(ValueError, match=msg):
  1381. Series([], dtype=dtype)
  1382. @pytest.mark.parametrize("unit", ["ps", "as", "fs", "Y", "M", "W", "D", "h", "m"])
  1383. @pytest.mark.parametrize("kind", ["m", "M"])
  1384. def test_constructor_generic_timestamp_bad_frequency(self, kind, unit):
  1385. # see gh-15524, gh-15987
  1386. # as of 2.0 we raise on any non-supported unit rather than silently
  1387. # cast to nanos; previously we only raised for frequencies higher
  1388. # than ns
  1389. dtype = f"{kind}8[{unit}]"
  1390. msg = "dtype=.* is not supported. Supported resolutions are"
  1391. with pytest.raises(TypeError, match=msg):
  1392. Series([], dtype=dtype)
  1393. with pytest.raises(TypeError, match=msg):
  1394. # pre-2.0 the DataFrame cast raised but the Series case did not
  1395. DataFrame([[0]], dtype=dtype)
  1396. @pytest.mark.parametrize("dtype", [None, "uint8", "category"])
  1397. def test_constructor_range_dtype(self, dtype):
  1398. # GH 16804
  1399. expected = Series([0, 1, 2, 3, 4], dtype=dtype or "int64")
  1400. result = Series(range(5), dtype=dtype)
  1401. tm.assert_series_equal(result, expected)
  1402. def test_constructor_range_overflows(self):
  1403. # GH#30173 range objects that overflow int64
  1404. rng = range(2**63, 2**63 + 4)
  1405. ser = Series(rng)
  1406. expected = Series(list(rng))
  1407. tm.assert_series_equal(ser, expected)
  1408. assert list(ser) == list(rng)
  1409. assert ser.dtype == np.uint64
  1410. rng2 = range(2**63 + 4, 2**63, -1)
  1411. ser2 = Series(rng2)
  1412. expected2 = Series(list(rng2))
  1413. tm.assert_series_equal(ser2, expected2)
  1414. assert list(ser2) == list(rng2)
  1415. assert ser2.dtype == np.uint64
  1416. rng3 = range(-(2**63), -(2**63) - 4, -1)
  1417. ser3 = Series(rng3)
  1418. expected3 = Series(list(rng3))
  1419. tm.assert_series_equal(ser3, expected3)
  1420. assert list(ser3) == list(rng3)
  1421. assert ser3.dtype == object
  1422. rng4 = range(2**73, 2**73 + 4)
  1423. ser4 = Series(rng4)
  1424. expected4 = Series(list(rng4))
  1425. tm.assert_series_equal(ser4, expected4)
  1426. assert list(ser4) == list(rng4)
  1427. assert ser4.dtype == object
  1428. def test_constructor_tz_mixed_data(self):
  1429. # GH 13051
  1430. dt_list = [
  1431. Timestamp("2016-05-01 02:03:37"),
  1432. Timestamp("2016-04-30 19:03:37-0700", tz="US/Pacific"),
  1433. ]
  1434. result = Series(dt_list)
  1435. expected = Series(dt_list, dtype=object)
  1436. tm.assert_series_equal(result, expected)
  1437. @pytest.mark.parametrize("pydt", [True, False])
  1438. def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt):
  1439. # GH#25843, GH#41555, GH#33401
  1440. tz = tz_aware_fixture
  1441. ts = Timestamp("2019", tz=tz)
  1442. if pydt:
  1443. ts = ts.to_pydatetime()
  1444. msg = (
  1445. "Cannot convert timezone-aware data to timezone-naive dtype. "
  1446. r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
  1447. )
  1448. with pytest.raises(ValueError, match=msg):
  1449. Series([ts], dtype="datetime64[ns]")
  1450. with pytest.raises(ValueError, match=msg):
  1451. Series(np.array([ts], dtype=object), dtype="datetime64[ns]")
  1452. with pytest.raises(ValueError, match=msg):
  1453. Series({0: ts}, dtype="datetime64[ns]")
  1454. msg = "Cannot unbox tzaware Timestamp to tznaive dtype"
  1455. with pytest.raises(TypeError, match=msg):
  1456. Series(ts, index=[0], dtype="datetime64[ns]")
  1457. def test_constructor_datetime64(self):
  1458. rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
  1459. dates = np.asarray(rng)
  1460. series = Series(dates)
  1461. assert np.issubdtype(series.dtype, np.dtype("M8[ns]"))
  1462. def test_constructor_datetimelike_scalar_to_string_dtype(
  1463. self, nullable_string_dtype
  1464. ):
  1465. # https://github.com/pandas-dev/pandas/pull/33846
  1466. result = Series("M", index=[1, 2, 3], dtype=nullable_string_dtype)
  1467. expected = Series(["M", "M", "M"], index=[1, 2, 3], dtype=nullable_string_dtype)
  1468. tm.assert_series_equal(result, expected)
  1469. @pytest.mark.parametrize(
  1470. "values",
  1471. [
  1472. [np.datetime64("2012-01-01"), np.datetime64("2013-01-01")],
  1473. ["2012-01-01", "2013-01-01"],
  1474. ],
  1475. )
  1476. def test_constructor_sparse_datetime64(self, values):
  1477. # https://github.com/pandas-dev/pandas/issues/35762
  1478. dtype = pd.SparseDtype("datetime64[ns]")
  1479. result = Series(values, dtype=dtype)
  1480. arr = pd.arrays.SparseArray(values, dtype=dtype)
  1481. expected = Series(arr)
  1482. tm.assert_series_equal(result, expected)
  1483. def test_construction_from_ordered_collection(self):
  1484. # https://github.com/pandas-dev/pandas/issues/36044
  1485. result = Series({"a": 1, "b": 2}.keys())
  1486. expected = Series(["a", "b"])
  1487. tm.assert_series_equal(result, expected)
  1488. result = Series({"a": 1, "b": 2}.values())
  1489. expected = Series([1, 2])
  1490. tm.assert_series_equal(result, expected)
  1491. def test_construction_from_large_int_scalar_no_overflow(self):
  1492. # https://github.com/pandas-dev/pandas/issues/36291
  1493. n = 1_000_000_000_000_000_000_000
  1494. result = Series(n, index=[0])
  1495. expected = Series(n)
  1496. tm.assert_series_equal(result, expected)
  1497. def test_constructor_list_of_periods_infers_period_dtype(self):
  1498. series = Series(list(period_range("2000-01-01", periods=10, freq="D")))
  1499. assert series.dtype == "Period[D]"
  1500. series = Series(
  1501. [Period("2011-01-01", freq="D"), Period("2011-02-01", freq="D")]
  1502. )
  1503. assert series.dtype == "Period[D]"
  1504. def test_constructor_subclass_dict(self, dict_subclass):
  1505. data = dict_subclass((x, 10.0 * x) for x in range(10))
  1506. series = Series(data)
  1507. expected = Series(dict(data.items()))
  1508. tm.assert_series_equal(series, expected)
  1509. def test_constructor_ordereddict(self):
  1510. # GH3283
  1511. data = OrderedDict((f"col{i}", np.random.random()) for i in range(12))
  1512. series = Series(data)
  1513. expected = Series(list(data.values()), list(data.keys()))
  1514. tm.assert_series_equal(series, expected)
  1515. # Test with subclass
  1516. class A(OrderedDict):
  1517. pass
  1518. series = Series(A(data))
  1519. tm.assert_series_equal(series, expected)
  1520. def test_constructor_dict_multiindex(self):
  1521. d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
  1522. _d = sorted(d.items())
  1523. result = Series(d)
  1524. expected = Series(
  1525. [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
  1526. )
  1527. tm.assert_series_equal(result, expected)
  1528. d["z"] = 111.0
  1529. _d.insert(0, ("z", d["z"]))
  1530. result = Series(d)
  1531. expected = Series(
  1532. [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False)
  1533. )
  1534. result = result.reindex(index=expected.index)
  1535. tm.assert_series_equal(result, expected)
  1536. def test_constructor_dict_multiindex_reindex_flat(self):
  1537. # construction involves reindexing with a MultiIndex corner case
  1538. data = {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2, "j": np.nan}
  1539. expected = Series(data)
  1540. result = Series(expected[:-1].to_dict(), index=expected.index)
  1541. tm.assert_series_equal(result, expected)
  1542. def test_constructor_dict_timedelta_index(self):
  1543. # GH #12169 : Resample category data with timedelta index
  1544. # construct Series from dict as data and TimedeltaIndex as index
  1545. # will result NaN in result Series data
  1546. expected = Series(
  1547. data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s")
  1548. )
  1549. result = Series(
  1550. data={
  1551. pd.to_timedelta(0, unit="s"): "A",
  1552. pd.to_timedelta(10, unit="s"): "B",
  1553. pd.to_timedelta(20, unit="s"): "C",
  1554. },
  1555. index=pd.to_timedelta([0, 10, 20], unit="s"),
  1556. )
  1557. tm.assert_series_equal(result, expected)
  1558. def test_constructor_infer_index_tz(self):
  1559. values = [188.5, 328.25]
  1560. tzinfo = tzoffset(None, 7200)
  1561. index = [
  1562. datetime(2012, 5, 11, 11, tzinfo=tzinfo),
  1563. datetime(2012, 5, 11, 12, tzinfo=tzinfo),
  1564. ]
  1565. series = Series(data=values, index=index)
  1566. assert series.index.tz == tzinfo
  1567. # it works! GH#2443
  1568. repr(series.index[0])
  1569. def test_constructor_with_pandas_dtype(self):
  1570. # going through 2D->1D path
  1571. vals = [(1,), (2,), (3,)]
  1572. ser = Series(vals)
  1573. dtype = ser.array.dtype # PandasDtype
  1574. ser2 = Series(vals, dtype=dtype)
  1575. tm.assert_series_equal(ser, ser2)
  1576. def test_constructor_int_dtype_missing_values(self):
  1577. # GH#43017
  1578. result = Series(index=[0], dtype="int64")
  1579. expected = Series(np.nan, index=[0], dtype="float64")
  1580. tm.assert_series_equal(result, expected)
  1581. def test_constructor_bool_dtype_missing_values(self):
  1582. # GH#43018
  1583. result = Series(index=[0], dtype="bool")
  1584. expected = Series(True, index=[0], dtype="bool")
  1585. tm.assert_series_equal(result, expected)
  1586. def test_constructor_int64_dtype(self, any_int_dtype):
  1587. # GH#44923
  1588. result = Series(["0", "1", "2"], dtype=any_int_dtype)
  1589. expected = Series([0, 1, 2], dtype=any_int_dtype)
  1590. tm.assert_series_equal(result, expected)
  1591. def test_constructor_raise_on_lossy_conversion_of_strings(self):
  1592. # GH#44923
  1593. with pytest.raises(
  1594. ValueError, match="string values cannot be losslessly cast to int8"
  1595. ):
  1596. Series(["128"], dtype="int8")
  1597. def test_constructor_dtype_timedelta_alternative_construct(self):
  1598. # GH#35465
  1599. result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]")
  1600. expected = Series(pd.to_timedelta([1000000, 200000, 3000000], unit="ns"))
  1601. tm.assert_series_equal(result, expected)
  1602. @pytest.mark.xfail(
  1603. reason="Not clear what the correct expected behavior should be with "
  1604. "integers now that we support non-nano. ATM (2022-10-08) we treat ints "
  1605. "as nanoseconds, then cast to the requested dtype. xref #48312"
  1606. )
  1607. def test_constructor_dtype_timedelta_ns_s(self):
  1608. # GH#35465
  1609. result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]")
  1610. expected = Series([1000000, 200000, 3000000], dtype="timedelta64[s]")
  1611. tm.assert_series_equal(result, expected)
  1612. @pytest.mark.xfail(
  1613. reason="Not clear what the correct expected behavior should be with "
  1614. "integers now that we support non-nano. ATM (2022-10-08) we treat ints "
  1615. "as nanoseconds, then cast to the requested dtype. xref #48312"
  1616. )
  1617. def test_constructor_dtype_timedelta_ns_s_astype_int64(self):
  1618. # GH#35465
  1619. result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]").astype(
  1620. "int64"
  1621. )
  1622. expected = Series([1000000, 200000, 3000000], dtype="timedelta64[s]").astype(
  1623. "int64"
  1624. )
  1625. tm.assert_series_equal(result, expected)
  1626. @pytest.mark.filterwarnings(
  1627. "ignore:elementwise comparison failed:DeprecationWarning"
  1628. )
  1629. @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array])
  1630. def test_constructor_mismatched_null_nullable_dtype(
  1631. self, func, any_numeric_ea_dtype
  1632. ):
  1633. # GH#44514
  1634. msg = "|".join(
  1635. [
  1636. "cannot safely cast non-equivalent object",
  1637. r"int\(\) argument must be a string, a bytes-like object "
  1638. "or a (real )?number",
  1639. r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) "
  1640. "according to the rule 'safe'",
  1641. "object cannot be converted to a FloatingDtype",
  1642. "'values' contains non-numeric NA",
  1643. ]
  1644. )
  1645. for null in tm.NP_NAT_OBJECTS + [NaT]:
  1646. with pytest.raises(TypeError, match=msg):
  1647. func([null, 1.0, 3.0], dtype=any_numeric_ea_dtype)
  1648. def test_series_constructor_ea_int_from_bool(self):
  1649. # GH#42137
  1650. result = Series([True, False, True, pd.NA], dtype="Int64")
  1651. expected = Series([1, 0, 1, pd.NA], dtype="Int64")
  1652. tm.assert_series_equal(result, expected)
  1653. result = Series([True, False, True], dtype="Int64")
  1654. expected = Series([1, 0, 1], dtype="Int64")
  1655. tm.assert_series_equal(result, expected)
  1656. def test_series_constructor_ea_int_from_string_bool(self):
  1657. # GH#42137
  1658. with pytest.raises(ValueError, match="invalid literal"):
  1659. Series(["True", "False", "True", pd.NA], dtype="Int64")
  1660. @pytest.mark.parametrize("val", [1, 1.0])
  1661. def test_series_constructor_overflow_uint_ea(self, val):
  1662. # GH#38798
  1663. max_val = np.iinfo(np.uint64).max - 1
  1664. result = Series([max_val, val], dtype="UInt64")
  1665. expected = Series(np.array([max_val, 1], dtype="uint64"), dtype="UInt64")
  1666. tm.assert_series_equal(result, expected)
  1667. @pytest.mark.parametrize("val", [1, 1.0])
  1668. def test_series_constructor_overflow_uint_ea_with_na(self, val):
  1669. # GH#38798
  1670. max_val = np.iinfo(np.uint64).max - 1
  1671. result = Series([max_val, val, pd.NA], dtype="UInt64")
  1672. expected = Series(
  1673. IntegerArray(
  1674. np.array([max_val, 1, 0], dtype="uint64"),
  1675. np.array([0, 0, 1], dtype=np.bool_),
  1676. )
  1677. )
  1678. tm.assert_series_equal(result, expected)
  1679. def test_series_constructor_overflow_uint_with_nan(self):
  1680. # GH#38798
  1681. max_val = np.iinfo(np.uint64).max - 1
  1682. result = Series([max_val, np.nan], dtype="UInt64")
  1683. expected = Series(
  1684. IntegerArray(
  1685. np.array([max_val, 1], dtype="uint64"),
  1686. np.array([0, 1], dtype=np.bool_),
  1687. )
  1688. )
  1689. tm.assert_series_equal(result, expected)
  1690. def test_series_constructor_ea_all_na(self):
  1691. # GH#38798
  1692. result = Series([np.nan, np.nan], dtype="UInt64")
  1693. expected = Series(
  1694. IntegerArray(
  1695. np.array([1, 1], dtype="uint64"),
  1696. np.array([1, 1], dtype=np.bool_),
  1697. )
  1698. )
  1699. tm.assert_series_equal(result, expected)
  1700. def test_series_from_index_dtype_equal_does_not_copy(self):
  1701. # GH#52008
  1702. idx = Index([1, 2, 3])
  1703. expected = idx.copy(deep=True)
  1704. ser = Series(idx, dtype="int64")
  1705. ser.iloc[0] = 100
  1706. tm.assert_index_equal(idx, expected)
  1707. class TestSeriesConstructorIndexCoercion:
  1708. def test_series_constructor_datetimelike_index_coercion(self):
  1709. idx = tm.makeDateIndex(10000)
  1710. ser = Series(np.random.randn(len(idx)), idx.astype(object))
  1711. # as of 2.0, we no longer silently cast the object-dtype index
  1712. # to DatetimeIndex GH#39307, GH#23598
  1713. assert not isinstance(ser.index, DatetimeIndex)
  1714. def test_series_constructor_infer_multiindex(self):
  1715. index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]]
  1716. multi = Series(1.0, index=[np.array(x) for x in index_lists])
  1717. assert isinstance(multi.index, MultiIndex)
  1718. multi = Series(1.0, index=index_lists)
  1719. assert isinstance(multi.index, MultiIndex)
  1720. multi = Series(range(4), index=index_lists)
  1721. assert isinstance(multi.index, MultiIndex)
  1722. class TestSeriesConstructorInternals:
  1723. def test_constructor_no_pandas_array(self, using_array_manager):
  1724. ser = Series([1, 2, 3])
  1725. result = Series(ser.array)
  1726. tm.assert_series_equal(ser, result)
  1727. if not using_array_manager:
  1728. assert isinstance(result._mgr.blocks[0], NumericBlock)
  1729. @td.skip_array_manager_invalid_test
  1730. def test_from_array(self):
  1731. result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
  1732. assert result._mgr.blocks[0].is_extension is False
  1733. result = Series(pd.array(["2015"], dtype="datetime64[ns]"))
  1734. assert result._mgr.blocks[0].is_extension is False
  1735. @td.skip_array_manager_invalid_test
  1736. def test_from_list_dtype(self):
  1737. result = Series(["1H", "2H"], dtype="timedelta64[ns]")
  1738. assert result._mgr.blocks[0].is_extension is False
  1739. result = Series(["2015"], dtype="datetime64[ns]")
  1740. assert result._mgr.blocks[0].is_extension is False
  1741. def test_constructor(rand_series_with_duplicate_datetimeindex):
  1742. dups = rand_series_with_duplicate_datetimeindex
  1743. assert isinstance(dups, Series)
  1744. assert isinstance(dups.index, DatetimeIndex)
  1745. @pytest.mark.parametrize(
  1746. "input_dict,expected",
  1747. [
  1748. ({0: 0}, np.array([[0]], dtype=np.int64)),
  1749. ({"a": "a"}, np.array([["a"]], dtype=object)),
  1750. ({1: 1}, np.array([[1]], dtype=np.int64)),
  1751. ],
  1752. )
  1753. def test_numpy_array(input_dict, expected):
  1754. result = np.array([Series(input_dict)])
  1755. tm.assert_numpy_array_equal(result, expected)