test_to_datetime.py 131 KB


  1. """ test to_datetime """
  2. import calendar
  3. from collections import deque
  4. from datetime import (
  5. date,
  6. datetime,
  7. timedelta,
  8. timezone,
  9. )
  10. from decimal import Decimal
  11. import locale
  12. from dateutil.parser import parse
  13. from dateutil.tz.tz import tzoffset
  14. import numpy as np
  15. import pytest
  16. import pytz
  17. from pandas._libs import tslib
  18. from pandas._libs.tslibs import (
  19. iNaT,
  20. parsing,
  21. )
  22. from pandas.errors import (
  23. OutOfBoundsDatetime,
  24. OutOfBoundsTimedelta,
  25. )
  26. import pandas.util._test_decorators as td
  27. from pandas.core.dtypes.common import is_datetime64_ns_dtype
  28. import pandas as pd
  29. from pandas import (
  30. DataFrame,
  31. DatetimeIndex,
  32. Index,
  33. NaT,
  34. Series,
  35. Timestamp,
  36. date_range,
  37. isna,
  38. to_datetime,
  39. )
  40. import pandas._testing as tm
  41. from pandas.core.arrays import DatetimeArray
  42. from pandas.core.tools import datetimes as tools
  43. from pandas.core.tools.datetimes import start_caching_at
  44. from pandas.util.version import Version
  45. PARSING_ERR_MSG = (
  46. r"You might want to try:\n"
  47. r" - passing `format` if your strings have a consistent format;\n"
  48. r" - passing `format=\'ISO8601\'` if your strings are all ISO8601 "
  49. r"but not necessarily in exactly the same format;\n"
  50. r" - passing `format=\'mixed\'`, and the format will be inferred "
  51. r"for each element individually. You might want to use `dayfirst` "
  52. r"alongside this."
  53. )
  54. @pytest.fixture(params=[True, False])
  55. def cache(request):
  56. """
  57. cache keyword to pass to to_datetime.
  58. """
  59. return request.param
  60. class TestTimeConversionFormats:
  61. @pytest.mark.parametrize("readonly", [True, False])
  62. def test_to_datetime_readonly(self, readonly):
  63. # GH#34857
  64. arr = np.array([], dtype=object)
  65. if readonly:
  66. arr.setflags(write=False)
  67. result = to_datetime(arr)
  68. expected = to_datetime([])
  69. tm.assert_index_equal(result, expected)
  70. @pytest.mark.parametrize(
  71. "format, expected",
  72. [
  73. [
  74. "%d/%m/%Y",
  75. [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")],
  76. ],
  77. [
  78. "%m/%d/%Y",
  79. [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")],
  80. ],
  81. ],
  82. )
  83. def test_to_datetime_format(self, cache, index_or_series, format, expected):
  84. values = index_or_series(["1/1/2000", "1/2/2000", "1/3/2000"])
  85. result = to_datetime(values, format=format, cache=cache)
  86. expected = index_or_series(expected)
  87. if isinstance(expected, Series):
  88. tm.assert_series_equal(result, expected)
  89. else:
  90. tm.assert_index_equal(result, expected)
  91. @pytest.mark.parametrize(
  92. "arg, expected, format",
  93. [
  94. ["1/1/2000", "20000101", "%d/%m/%Y"],
  95. ["1/1/2000", "20000101", "%m/%d/%Y"],
  96. ["1/2/2000", "20000201", "%d/%m/%Y"],
  97. ["1/2/2000", "20000102", "%m/%d/%Y"],
  98. ["1/3/2000", "20000301", "%d/%m/%Y"],
  99. ["1/3/2000", "20000103", "%m/%d/%Y"],
  100. ],
  101. )
  102. def test_to_datetime_format_scalar(self, cache, arg, expected, format):
  103. result = to_datetime(arg, format=format, cache=cache)
  104. expected = Timestamp(expected)
  105. assert result == expected
  106. def test_to_datetime_format_YYYYMMDD(self, cache):
  107. ser = Series([19801222, 19801222] + [19810105] * 5)
  108. expected = Series([Timestamp(x) for x in ser.apply(str)])
  109. result = to_datetime(ser, format="%Y%m%d", cache=cache)
  110. tm.assert_series_equal(result, expected)
  111. result = to_datetime(ser.apply(str), format="%Y%m%d", cache=cache)
  112. tm.assert_series_equal(result, expected)
  113. def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
  114. # Explicit cast to float to explicit cast when setting np.nan
  115. ser = Series([19801222, 19801222] + [19810105] * 5, dtype="float")
  116. # with NaT
  117. expected = Series(
  118. [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5
  119. )
  120. expected[2] = np.nan
  121. ser[2] = np.nan
  122. result = to_datetime(ser, format="%Y%m%d", cache=cache)
  123. tm.assert_series_equal(result, expected)
  124. # string with NaT
  125. ser2 = ser.apply(str)
  126. ser2[2] = "nat"
  127. with pytest.raises(
  128. ValueError,
  129. match=(
  130. 'unconverted data remains when parsing with format "%Y%m%d": ".0", '
  131. "at position 0"
  132. ),
  133. ):
  134. # https://github.com/pandas-dev/pandas/issues/50051
  135. to_datetime(ser2, format="%Y%m%d", cache=cache)
  136. def test_to_datetime_format_YYYYMM_with_nat(self, cache):
  137. # https://github.com/pandas-dev/pandas/issues/50237
  138. # Explicit cast to float to explicit cast when setting np.nan
  139. ser = Series([198012, 198012] + [198101] * 5, dtype="float")
  140. expected = Series(
  141. [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5
  142. )
  143. expected[2] = np.nan
  144. ser[2] = np.nan
  145. result = to_datetime(ser, format="%Y%m", cache=cache)
  146. tm.assert_series_equal(result, expected)
  147. def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
  148. # coercion
  149. # GH 7930, GH 14487
  150. ser = Series([20121231, 20141231, 99991231])
  151. result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache)
  152. expected = Series(
  153. [20121231, 20141231, 99991231],
  154. dtype=object,
  155. )
  156. tm.assert_series_equal(result, expected)
  157. def test_to_datetime_format_YYYYMMDD_ignore_with_outofbounds(self, cache):
  158. # https://github.com/pandas-dev/pandas/issues/26493
  159. result = to_datetime(
  160. ["15010101", "20150101", np.nan],
  161. format="%Y%m%d",
  162. errors="ignore",
  163. cache=cache,
  164. )
  165. expected = Index(["15010101", "20150101", np.nan])
  166. tm.assert_index_equal(result, expected)
  167. def test_to_datetime_format_YYYYMMDD_coercion(self, cache):
  168. # coercion
  169. # GH 7930
  170. ser = Series([20121231, 20141231, 99991231])
  171. result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache)
  172. expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]")
  173. tm.assert_series_equal(result, expected)
  174. @pytest.mark.parametrize(
  175. "input_s",
  176. [
  177. # Null values with Strings
  178. ["19801222", "20010112", None],
  179. ["19801222", "20010112", np.nan],
  180. ["19801222", "20010112", NaT],
  181. ["19801222", "20010112", "NaT"],
  182. # Null values with Integers
  183. [19801222, 20010112, None],
  184. [19801222, 20010112, np.nan],
  185. [19801222, 20010112, NaT],
  186. [19801222, 20010112, "NaT"],
  187. ],
  188. )
  189. def test_to_datetime_format_YYYYMMDD_with_none(self, input_s):
  190. # GH 30011
  191. # format='%Y%m%d'
  192. # with None
  193. expected = Series([Timestamp("19801222"), Timestamp("20010112"), NaT])
  194. result = Series(to_datetime(input_s, format="%Y%m%d"))
  195. tm.assert_series_equal(result, expected)
  196. @pytest.mark.parametrize(
  197. "input_s, expected",
  198. [
  199. # NaN before strings with invalid date values
  200. [
  201. Series(["19801222", np.nan, "20010012", "10019999"]),
  202. Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
  203. ],
  204. # NaN after strings with invalid date values
  205. [
  206. Series(["19801222", "20010012", "10019999", np.nan]),
  207. Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
  208. ],
  209. # NaN before integers with invalid date values
  210. [
  211. Series([20190813, np.nan, 20010012, 20019999]),
  212. Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
  213. ],
  214. # NaN after integers with invalid date values
  215. [
  216. Series([20190813, 20010012, np.nan, 20019999]),
  217. Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
  218. ],
  219. ],
  220. )
  221. def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
  222. # GH 25512
  223. # format='%Y%m%d', errors='coerce'
  224. result = to_datetime(input_s, format="%Y%m%d", errors="coerce")
  225. tm.assert_series_equal(result, expected)
  226. @pytest.mark.parametrize(
  227. "data, format, expected",
  228. [
  229. ([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])),
  230. ([pd.NA], None, DatetimeIndex(["NaT"])),
  231. (
  232. [pd.NA, "20210202202020"],
  233. "%Y%m%d%H%M%S",
  234. DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
  235. ),
  236. (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
  237. (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
  238. ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
  239. ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
  240. ],
  241. )
  242. def test_to_datetime_with_NA(self, data, format, expected):
  243. # GH#42957
  244. result = to_datetime(data, format=format)
  245. tm.assert_index_equal(result, expected)
  246. def test_to_datetime_with_NA_with_warning(self):
  247. # GH#42957
  248. result = to_datetime(["201010", pd.NA])
  249. expected = DatetimeIndex(["2010-10-20", "NaT"])
  250. tm.assert_index_equal(result, expected)
  251. def test_to_datetime_format_integer(self, cache):
  252. # GH 10178
  253. ser = Series([2000, 2001, 2002])
  254. expected = Series([Timestamp(x) for x in ser.apply(str)])
  255. result = to_datetime(ser, format="%Y", cache=cache)
  256. tm.assert_series_equal(result, expected)
  257. ser = Series([200001, 200105, 200206])
  258. expected = Series([Timestamp(x[:4] + "-" + x[4:]) for x in ser.apply(str)])
  259. result = to_datetime(ser, format="%Y%m", cache=cache)
  260. tm.assert_series_equal(result, expected)
  261. @pytest.mark.parametrize(
  262. "int_date, expected",
  263. [
  264. # valid date, length == 8
  265. [20121030, datetime(2012, 10, 30)],
  266. # short valid date, length == 6
  267. [199934, datetime(1999, 3, 4)],
  268. # long integer date partially parsed to datetime(2012,1,1), length > 8
  269. [2012010101, 2012010101],
  270. # invalid date partially parsed to datetime(2012,9,9), length == 8
  271. [20129930, 20129930],
  272. # short integer date partially parsed to datetime(2012,9,9), length < 8
  273. [2012993, 2012993],
  274. # short invalid date, length == 4
  275. [2121, 2121],
  276. ],
  277. )
  278. def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected):
  279. # GH 26583
  280. result = to_datetime(int_date, format="%Y%m%d", errors="ignore")
  281. assert result == expected
  282. def test_to_datetime_format_microsecond(self, cache):
  283. month_abbr = calendar.month_abbr[4]
  284. val = f"01-{month_abbr}-2011 00:00:01.978"
  285. format = "%d-%b-%Y %H:%M:%S.%f"
  286. result = to_datetime(val, format=format, cache=cache)
  287. exp = datetime.strptime(val, format)
  288. assert result == exp
  289. @pytest.mark.parametrize(
  290. "value, format, dt",
  291. [
  292. ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")],
  293. ["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")],
  294. [
  295. "01/10/2010 13:56:01",
  296. "%m/%d/%Y %H:%M:%S",
  297. Timestamp("2010-01-10 13:56:01"),
  298. ],
  299. # The 3 tests below are locale-dependent.
  300. # They pass, except when the machine locale is zh_CN or it_IT .
  301. pytest.param(
  302. "01/10/2010 08:14 PM",
  303. "%m/%d/%Y %I:%M %p",
  304. Timestamp("2010-01-10 20:14"),
  305. marks=pytest.mark.xfail(
  306. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  307. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  308. strict=False,
  309. ),
  310. ),
  311. pytest.param(
  312. "01/10/2010 07:40 AM",
  313. "%m/%d/%Y %I:%M %p",
  314. Timestamp("2010-01-10 07:40"),
  315. marks=pytest.mark.xfail(
  316. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  317. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  318. strict=False,
  319. ),
  320. ),
  321. pytest.param(
  322. "01/10/2010 09:12:56 AM",
  323. "%m/%d/%Y %I:%M:%S %p",
  324. Timestamp("2010-01-10 09:12:56"),
  325. marks=pytest.mark.xfail(
  326. locale.getlocale()[0] in ("zh_CN", "it_IT"),
  327. reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
  328. strict=False,
  329. ),
  330. ),
  331. ],
  332. )
  333. def test_to_datetime_format_time(self, cache, value, format, dt):
  334. assert to_datetime(value, format=format, cache=cache) == dt
  335. @td.skip_if_not_us_locale
  336. def test_to_datetime_with_non_exact(self, cache):
  337. # GH 10834
  338. # 8904
  339. # exact kw
  340. ser = Series(
  341. ["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"]
  342. )
  343. result = to_datetime(ser, format="%d%b%y", exact=False, cache=cache)
  344. expected = to_datetime(
  345. ser.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache
  346. )
  347. tm.assert_series_equal(result, expected)
  348. @pytest.mark.parametrize(
  349. "format, expected",
  350. [
  351. ("%Y-%m-%d", Timestamp(2000, 1, 3)),
  352. ("%Y-%d-%m", Timestamp(2000, 3, 1)),
  353. ("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)),
  354. ("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)),
  355. ("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)),
  356. ("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)),
  357. ("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)),
  358. ("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)),
  359. ("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)),
  360. ("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)),
  361. (
  362. "%Y-%m-%d %H:%M:%S.%f%z",
  363. Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"),
  364. ),
  365. (
  366. "%Y-%d-%m %H:%M:%S.%f%z",
  367. Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"),
  368. ),
  369. ],
  370. )
  371. def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected):
  372. # https://github.com/pandas-dev/pandas/issues/50412
  373. # the formats alternate between ISO8601 and non-ISO8601 to check both paths
  374. result = to_datetime(
  375. "2000-01-03 12:34:56.123456+01:00", format=format, exact=False
  376. )
  377. assert result == expected
  378. @pytest.mark.parametrize(
  379. "arg",
  380. [
  381. "2012-01-01 09:00:00.000000001",
  382. "2012-01-01 09:00:00.000001",
  383. "2012-01-01 09:00:00.001",
  384. "2012-01-01 09:00:00.001000",
  385. "2012-01-01 09:00:00.001000000",
  386. ],
  387. )
  388. def test_parse_nanoseconds_with_formula(self, cache, arg):
  389. # GH8989
  390. # truncating the nanoseconds when a format was provided
  391. expected = to_datetime(arg, cache=cache)
  392. result = to_datetime(arg, format="%Y-%m-%d %H:%M:%S.%f", cache=cache)
  393. assert result == expected
  394. @pytest.mark.parametrize(
  395. "value,fmt,expected",
  396. [
  397. ["2009324", "%Y%W%w", Timestamp("2009-08-13")],
  398. ["2013020", "%Y%U%w", Timestamp("2013-01-13")],
  399. ],
  400. )
  401. def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
  402. assert to_datetime(value, format=fmt, cache=cache) == expected
  403. @pytest.mark.parametrize(
  404. "fmt,dates,expected_dates",
  405. [
  406. [
  407. "%Y-%m-%d %H:%M:%S %Z",
  408. ["2010-01-01 12:00:00 UTC"] * 2,
  409. [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
  410. ],
  411. [
  412. "%Y-%m-%d %H:%M:%S %Z",
  413. [
  414. "2010-01-01 12:00:00 UTC",
  415. "2010-01-01 12:00:00 GMT",
  416. "2010-01-01 12:00:00 US/Pacific",
  417. ],
  418. [
  419. Timestamp("2010-01-01 12:00:00", tz="UTC"),
  420. Timestamp("2010-01-01 12:00:00", tz="GMT"),
  421. Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
  422. ],
  423. ],
  424. [
  425. "%Y-%m-%d %H:%M:%S%z",
  426. ["2010-01-01 12:00:00+0100"] * 2,
  427. [
  428. Timestamp(
  429. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  430. )
  431. ]
  432. * 2,
  433. ],
  434. [
  435. "%Y-%m-%d %H:%M:%S %z",
  436. ["2010-01-01 12:00:00 +0100"] * 2,
  437. [
  438. Timestamp(
  439. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  440. )
  441. ]
  442. * 2,
  443. ],
  444. [
  445. "%Y-%m-%d %H:%M:%S %z",
  446. ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
  447. [
  448. Timestamp(
  449. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60))
  450. ),
  451. Timestamp(
  452. "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60))
  453. ),
  454. ],
  455. ],
  456. [
  457. "%Y-%m-%d %H:%M:%S %z",
  458. ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],
  459. [
  460. Timestamp(
  461. "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)
  462. ), # pytz coerces to UTC
  463. Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)),
  464. ],
  465. ],
  466. ],
  467. )
  468. def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates):
  469. # GH 13486
  470. result = to_datetime(dates, format=fmt)
  471. expected = Index(expected_dates)
  472. tm.assert_equal(result, expected)
  473. def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self):
  474. # GH 32792
  475. dates = [
  476. "2010-01-01 12:00:00 +0100",
  477. "2010-01-01 12:00:00 -0100",
  478. "2010-01-01 12:00:00 +0300",
  479. "2010-01-01 12:00:00 +0400",
  480. ]
  481. expected_dates = [
  482. "2010-01-01 11:00:00+00:00",
  483. "2010-01-01 13:00:00+00:00",
  484. "2010-01-01 09:00:00+00:00",
  485. "2010-01-01 08:00:00+00:00",
  486. ]
  487. fmt = "%Y-%m-%d %H:%M:%S %z"
  488. result = to_datetime(dates, format=fmt, utc=True)
  489. expected = DatetimeIndex(expected_dates)
  490. tm.assert_index_equal(result, expected)
  491. @pytest.mark.parametrize(
  492. "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]
  493. )
  494. def test_to_datetime_parse_timezone_malformed(self, offset):
  495. fmt = "%Y-%m-%d %H:%M:%S %z"
  496. date = "2010-01-01 12:00:00 " + offset
  497. msg = "|".join(
  498. [
  499. r'^time data ".*" doesn\'t match format ".*", at position 0. '
  500. f"{PARSING_ERR_MSG}$",
  501. r'^unconverted data remains when parsing with format ".*": ".*", '
  502. f"at position 0. {PARSING_ERR_MSG}$",
  503. ]
  504. )
  505. with pytest.raises(ValueError, match=msg):
  506. to_datetime([date], format=fmt)
  507. def test_to_datetime_parse_timezone_keeps_name(self):
  508. # GH 21697
  509. fmt = "%Y-%m-%d %H:%M:%S %z"
  510. arg = Index(["2010-01-01 12:00:00 Z"], name="foo")
  511. result = to_datetime(arg, format=fmt)
  512. expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo")
  513. tm.assert_index_equal(result, expected)
  514. class TestToDatetime:
  515. @pytest.mark.filterwarnings("ignore:Could not infer format")
  516. def test_to_datetime_overflow(self):
  517. # we should get an OutOfBoundsDatetime, NOT OverflowError
  518. # TODO: Timestamp raises ValueError("could not convert string to Timestamp")
  519. # can we make these more consistent?
  520. arg = "08335394550"
  521. msg = 'Parsing "08335394550" to datetime overflows, at position 0'
  522. with pytest.raises(OutOfBoundsDatetime, match=msg):
  523. to_datetime(arg)
  524. with pytest.raises(OutOfBoundsDatetime, match=msg):
  525. to_datetime([arg])
  526. res = to_datetime(arg, errors="coerce")
  527. assert res is NaT
  528. res = to_datetime([arg], errors="coerce")
  529. tm.assert_index_equal(res, Index([NaT]))
  530. res = to_datetime(arg, errors="ignore")
  531. assert isinstance(res, str) and res == arg
  532. res = to_datetime([arg], errors="ignore")
  533. tm.assert_index_equal(res, Index([arg], dtype=object))
  534. def test_to_datetime_mixed_datetime_and_string(self):
  535. # GH#47018 adapted old doctest with new behavior
  536. d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
  537. d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
  538. res = to_datetime(["2020-01-01 17:00 -0100", d2])
  539. expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60)))
  540. tm.assert_index_equal(res, expected)
  541. @pytest.mark.parametrize(
  542. "format", ["%Y-%m-%d", "%Y-%d-%m"], ids=["ISO8601", "non-ISO8601"]
  543. )
  544. def test_to_datetime_mixed_date_and_string(self, format):
  545. # https://github.com/pandas-dev/pandas/issues/50108
  546. d1 = date(2020, 1, 2)
  547. res = to_datetime(["2020-01-01", d1], format=format)
  548. expected = DatetimeIndex(["2020-01-01", "2020-01-02"])
  549. tm.assert_index_equal(res, expected)
  550. @pytest.mark.parametrize(
  551. "fmt",
  552. ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
  553. ids=["non-ISO8601 format", "ISO8601 format"],
  554. )
  555. @pytest.mark.parametrize(
  556. "utc, args, expected",
  557. [
  558. pytest.param(
  559. True,
  560. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"],
  561. DatetimeIndex(
  562. ["2000-01-01 09:00:00+00:00", "2000-01-01 10:00:00+00:00"],
  563. dtype="datetime64[ns, UTC]",
  564. ),
  565. id="all tz-aware, with utc",
  566. ),
  567. pytest.param(
  568. False,
  569. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  570. DatetimeIndex(
  571. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  572. ),
  573. id="all tz-aware, without utc",
  574. ),
  575. pytest.param(
  576. True,
  577. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00+00:00"],
  578. DatetimeIndex(
  579. ["2000-01-01 09:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  580. dtype="datetime64[ns, UTC]",
  581. ),
  582. id="all tz-aware, mixed offsets, with utc",
  583. ),
  584. pytest.param(
  585. False,
  586. ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"],
  587. Index(
  588. [
  589. Timestamp("2000-01-01 01:00:00"),
  590. Timestamp("2000-01-01 02:00:00+0000", tz="UTC"),
  591. ],
  592. ),
  593. id="tz-aware string, naive pydatetime, without utc",
  594. ),
  595. pytest.param(
  596. True,
  597. ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"],
  598. DatetimeIndex(
  599. ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"],
  600. dtype="datetime64[ns, UTC]",
  601. ),
  602. id="tz-aware string, naive pydatetime, with utc",
  603. ),
  604. ],
  605. )
  606. @pytest.mark.parametrize(
  607. "constructor",
  608. [Timestamp, lambda x: Timestamp(x).to_pydatetime()],
  609. )
  610. def test_to_datetime_mixed_datetime_and_string_with_format(
  611. self, fmt, utc, args, expected, constructor
  612. ):
  613. # https://github.com/pandas-dev/pandas/issues/49298
  614. # https://github.com/pandas-dev/pandas/issues/50254
  615. # note: ISO8601 formats go down a fastpath, so we need to check both
  616. # a ISO8601 format and a non-ISO8601 one
  617. ts1 = constructor(args[0])
  618. ts2 = args[1]
  619. result = to_datetime([ts1, ts2], format=fmt, utc=utc)
  620. tm.assert_index_equal(result, expected)
  621. @pytest.mark.parametrize(
  622. "fmt, utc, expected",
  623. [
  624. pytest.param(
  625. "%Y-%m-%d %H:%M:%S%z",
  626. True,
  627. DatetimeIndex(
  628. ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"],
  629. dtype="datetime64[ns, UTC]",
  630. ),
  631. id="ISO8601, UTC",
  632. ),
  633. pytest.param(
  634. "%Y-%m-%d %H:%M:%S%z",
  635. False,
  636. Index(
  637. [
  638. Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
  639. Timestamp("2000-01-02 02:00:00+0200", tz="UTC+02:00"),
  640. NaT,
  641. ]
  642. ),
  643. id="ISO8601, non-UTC",
  644. ),
  645. pytest.param(
  646. "%Y-%d-%m %H:%M:%S%z",
  647. True,
  648. DatetimeIndex(
  649. ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"],
  650. dtype="datetime64[ns, UTC]",
  651. ),
  652. id="non-ISO8601, UTC",
  653. ),
  654. pytest.param(
  655. "%Y-%d-%m %H:%M:%S%z",
  656. False,
  657. Index(
  658. [
  659. Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
  660. Timestamp("2000-02-01 02:00:00+0200", tz="UTC+02:00"),
  661. NaT,
  662. ]
  663. ),
  664. id="non-ISO8601, non-UTC",
  665. ),
  666. ],
  667. )
  668. def test_to_datetime_mixed_offsets_with_none(self, fmt, utc, expected):
  669. # https://github.com/pandas-dev/pandas/issues/50071
  670. result = to_datetime(
  671. ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None],
  672. format=fmt,
  673. utc=utc,
  674. )
  675. tm.assert_index_equal(result, expected)
  676. @pytest.mark.parametrize(
  677. "fmt",
  678. ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"],
  679. ids=["non-ISO8601 format", "ISO8601 format"],
  680. )
  681. @pytest.mark.parametrize(
  682. "args",
  683. [
  684. pytest.param(
  685. ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-07:00"],
  686. id="all tz-aware, mixed timezones, without utc",
  687. ),
  688. ],
  689. )
  690. @pytest.mark.parametrize(
  691. "constructor",
  692. [Timestamp, lambda x: Timestamp(x).to_pydatetime()],
  693. )
  694. def test_to_datetime_mixed_datetime_and_string_with_format_raises(
  695. self, fmt, args, constructor
  696. ):
  697. # https://github.com/pandas-dev/pandas/issues/49298
  698. # note: ISO8601 formats go down a fastpath, so we need to check both
  699. # a ISO8601 format and a non-ISO8601 one
  700. ts1 = constructor(args[0])
  701. ts2 = constructor(args[1])
  702. with pytest.raises(
  703. ValueError, match="cannot be converted to datetime64 unless utc=True"
  704. ):
  705. to_datetime([ts1, ts2], format=fmt, utc=False)
  706. def test_to_datetime_np_str(self):
  707. # GH#32264
  708. # GH#48969
  709. value = np.str_("2019-02-04 10:18:46.297000+0000")
  710. ser = Series([value])
  711. exp = Timestamp("2019-02-04 10:18:46.297000", tz="UTC")
  712. assert to_datetime(value) == exp
  713. assert to_datetime(ser.iloc[0]) == exp
  714. res = to_datetime([value])
  715. expected = Index([exp])
  716. tm.assert_index_equal(res, expected)
  717. res = to_datetime(ser)
  718. expected = Series(expected)
  719. tm.assert_series_equal(res, expected)
  720. @pytest.mark.parametrize(
  721. "s, _format, dt",
  722. [
  723. ["2015-1-1", "%G-%V-%u", datetime(2014, 12, 29, 0, 0)],
  724. ["2015-1-4", "%G-%V-%u", datetime(2015, 1, 1, 0, 0)],
  725. ["2015-1-7", "%G-%V-%u", datetime(2015, 1, 4, 0, 0)],
  726. ],
  727. )
  728. def test_to_datetime_iso_week_year_format(self, s, _format, dt):
  729. # See GH#16607
  730. assert to_datetime(s, format=_format) == dt
  731. @pytest.mark.parametrize(
  732. "msg, s, _format",
  733. [
  734. [
  735. "ISO week directive '%V' is incompatible with the year directive "
  736. "'%Y'. Use the ISO year '%G' instead.",
  737. "1999 50",
  738. "%Y %V",
  739. ],
  740. [
  741. "ISO year directive '%G' must be used with the ISO week directive "
  742. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  743. "1999 51",
  744. "%G %V",
  745. ],
  746. [
  747. "ISO year directive '%G' must be used with the ISO week directive "
  748. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  749. "1999 Monday",
  750. "%G %A",
  751. ],
  752. [
  753. "ISO year directive '%G' must be used with the ISO week directive "
  754. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  755. "1999 Mon",
  756. "%G %a",
  757. ],
  758. [
  759. "ISO year directive '%G' must be used with the ISO week directive "
  760. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  761. "1999 6",
  762. "%G %w",
  763. ],
  764. [
  765. "ISO year directive '%G' must be used with the ISO week directive "
  766. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  767. "1999 6",
  768. "%G %u",
  769. ],
  770. [
  771. "ISO year directive '%G' must be used with the ISO week directive "
  772. "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
  773. "2051",
  774. "%G",
  775. ],
  776. [
  777. "Day of the year directive '%j' is not compatible with ISO year "
  778. "directive '%G'. Use '%Y' instead.",
  779. "1999 51 6 256",
  780. "%G %V %u %j",
  781. ],
  782. [
  783. "ISO week directive '%V' is incompatible with the year directive "
  784. "'%Y'. Use the ISO year '%G' instead.",
  785. "1999 51 Sunday",
  786. "%Y %V %A",
  787. ],
  788. [
  789. "ISO week directive '%V' is incompatible with the year directive "
  790. "'%Y'. Use the ISO year '%G' instead.",
  791. "1999 51 Sun",
  792. "%Y %V %a",
  793. ],
  794. [
  795. "ISO week directive '%V' is incompatible with the year directive "
  796. "'%Y'. Use the ISO year '%G' instead.",
  797. "1999 51 1",
  798. "%Y %V %w",
  799. ],
  800. [
  801. "ISO week directive '%V' is incompatible with the year directive "
  802. "'%Y'. Use the ISO year '%G' instead.",
  803. "1999 51 1",
  804. "%Y %V %u",
  805. ],
  806. [
  807. "ISO week directive '%V' must be used with the ISO year directive "
  808. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  809. "20",
  810. "%V",
  811. ],
  812. [
  813. "ISO week directive '%V' must be used with the ISO year directive "
  814. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  815. "1999 51 Sunday",
  816. "%V %A",
  817. ],
  818. [
  819. "ISO week directive '%V' must be used with the ISO year directive "
  820. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  821. "1999 51 Sun",
  822. "%V %a",
  823. ],
  824. [
  825. "ISO week directive '%V' must be used with the ISO year directive "
  826. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  827. "1999 51 1",
  828. "%V %w",
  829. ],
  830. [
  831. "ISO week directive '%V' must be used with the ISO year directive "
  832. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  833. "1999 51 1",
  834. "%V %u",
  835. ],
  836. [
  837. "Day of the year directive '%j' is not compatible with ISO year "
  838. "directive '%G'. Use '%Y' instead.",
  839. "1999 50",
  840. "%G %j",
  841. ],
  842. [
  843. "ISO week directive '%V' must be used with the ISO year directive "
  844. "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
  845. "20 Monday",
  846. "%V %A",
  847. ],
  848. ],
  849. )
  850. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  851. def test_error_iso_week_year(self, msg, s, _format, errors):
  852. # See GH#16607, GH#50308
  853. # This test checks for errors thrown when giving the wrong format
  854. # However, as discussed on PR#25541, overriding the locale
  855. # causes a different error to be thrown due to the format being
  856. # locale specific, but the test data is in english.
  857. # Therefore, the tests only run when locale is not overwritten,
  858. # as a sort of solution to this problem.
  859. if locale.getlocale() != ("zh_CN", "UTF-8") and locale.getlocale() != (
  860. "it_IT",
  861. "UTF-8",
  862. ):
  863. with pytest.raises(ValueError, match=msg):
  864. to_datetime(s, format=_format, errors=errors)
  865. @pytest.mark.parametrize("tz", [None, "US/Central"])
  866. def test_to_datetime_dtarr(self, tz):
  867. # DatetimeArray
  868. dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz)
  869. arr = DatetimeArray(dti)
  870. result = to_datetime(arr)
  871. assert result is arr
  872. def test_to_datetime_pydatetime(self):
  873. actual = to_datetime(datetime(2008, 1, 15))
  874. assert actual == datetime(2008, 1, 15)
  875. def test_to_datetime_YYYYMMDD(self):
  876. actual = to_datetime("20080115")
  877. assert actual == datetime(2008, 1, 15)
  878. def test_to_datetime_unparsable_ignore(self):
  879. # unparsable
  880. ser = "Month 1, 1999"
  881. assert to_datetime(ser, errors="ignore") == ser
  882. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  883. def test_to_datetime_now(self):
  884. # See GH#18666
  885. with tm.set_timezone("US/Eastern"):
  886. # GH#18705
  887. now = Timestamp("now")
  888. pdnow = to_datetime("now")
  889. pdnow2 = to_datetime(["now"])[0]
  890. # These should all be equal with infinite perf; this gives
  891. # a generous margin of 10 seconds
  892. assert abs(pdnow._value - now._value) < 1e10
  893. assert abs(pdnow2._value - now._value) < 1e10
  894. assert pdnow.tzinfo is None
  895. assert pdnow2.tzinfo is None
  896. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  897. @pytest.mark.parametrize("tz", ["Pacific/Auckland", "US/Samoa"])
  898. def test_to_datetime_today(self, tz):
  899. # See GH#18666
  900. # Test with one timezone far ahead of UTC and another far behind, so
  901. # one of these will _almost_ always be in a different day from UTC.
  902. # Unfortunately this test between 12 and 1 AM Samoa time
  903. # this both of these timezones _and_ UTC will all be in the same day,
  904. # so this test will not detect the regression introduced in #18666.
  905. with tm.set_timezone(tz):
  906. nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
  907. pdtoday = to_datetime("today")
  908. pdtoday2 = to_datetime(["today"])[0]
  909. tstoday = Timestamp("today")
  910. tstoday2 = Timestamp.today().as_unit("ns")
  911. # These should all be equal with infinite perf; this gives
  912. # a generous margin of 10 seconds
  913. assert abs(pdtoday.normalize()._value - nptoday) < 1e10
  914. assert abs(pdtoday2.normalize()._value - nptoday) < 1e10
  915. assert abs(pdtoday._value - tstoday._value) < 1e10
  916. assert abs(pdtoday._value - tstoday2._value) < 1e10
  917. assert pdtoday.tzinfo is None
  918. assert pdtoday2.tzinfo is None
  919. @pytest.mark.parametrize("arg", ["now", "today"])
  920. def test_to_datetime_today_now_unicode_bytes(self, arg):
  921. to_datetime([arg])
  922. @pytest.mark.parametrize(
  923. "format, expected_ds",
  924. [
  925. ("%Y-%m-%d %H:%M:%S%z", "2020-01-03"),
  926. ("%Y-%d-%m %H:%M:%S%z", "2020-03-01"),
  927. (None, "2020-01-03"),
  928. ],
  929. )
  930. @pytest.mark.parametrize(
  931. "string, attribute",
  932. [
  933. ("now", "utcnow"),
  934. ("today", "today"),
  935. ],
  936. )
  937. def test_to_datetime_now_with_format(self, format, expected_ds, string, attribute):
  938. # https://github.com/pandas-dev/pandas/issues/50359
  939. result = to_datetime(["2020-01-03 00:00:00Z", string], format=format, utc=True)
  940. expected = DatetimeIndex(
  941. [expected_ds, getattr(Timestamp, attribute)()], dtype="datetime64[ns, UTC]"
  942. )
  943. assert (expected - result).max().total_seconds() < 1
  944. @pytest.mark.parametrize(
  945. "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
  946. )
  947. def test_to_datetime_dt64s(self, cache, dt):
  948. assert to_datetime(dt, cache=cache) == Timestamp(dt)
  949. @pytest.mark.parametrize(
  950. "arg, format",
  951. [
  952. ("2001-01-01", "%Y-%m-%d"),
  953. ("01-01-2001", "%d-%m-%Y"),
  954. ],
  955. )
  956. def test_to_datetime_dt64s_and_str(self, arg, format):
  957. # https://github.com/pandas-dev/pandas/issues/50036
  958. result = to_datetime([arg, np.datetime64("2020-01-01")], format=format)
  959. expected = DatetimeIndex(["2001-01-01", "2020-01-01"])
  960. tm.assert_index_equal(result, expected)
  961. @pytest.mark.parametrize(
  962. "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
  963. )
  964. @pytest.mark.parametrize("errors", ["raise", "ignore", "coerce"])
  965. def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors):
  966. # GH#50369 We cast to the nearest supported reso, i.e. "s"
  967. ts = to_datetime(dt, errors=errors, cache=cache)
  968. assert isinstance(ts, Timestamp)
  969. assert ts.unit == "s"
  970. assert ts.asm8 == dt
  971. ts = Timestamp(dt)
  972. assert ts.unit == "s"
  973. assert ts.asm8 == dt
  974. def test_to_datetime_dt64d_out_of_bounds(self, cache):
  975. dt64 = np.datetime64(np.iinfo(np.int64).max, "D")
  976. msg = "Out of bounds nanosecond timestamp"
  977. with pytest.raises(OutOfBoundsDatetime, match=msg):
  978. Timestamp(dt64)
  979. with pytest.raises(OutOfBoundsDatetime, match=msg):
  980. to_datetime(dt64, errors="raise", cache=cache)
  981. assert to_datetime(dt64, errors="coerce", cache=cache) is NaT
  982. @pytest.mark.parametrize("unit", ["s", "D"])
  983. def test_to_datetime_array_of_dt64s(self, cache, unit):
  984. # https://github.com/pandas-dev/pandas/issues/31491
  985. # Need at least 50 to ensure cache is used.
  986. dts = [
  987. np.datetime64("2000-01-01", unit),
  988. np.datetime64("2000-01-02", unit),
  989. ] * 30
  990. # Assuming all datetimes are in bounds, to_datetime() returns
  991. # an array that is equal to Timestamp() parsing
  992. result = to_datetime(dts, cache=cache)
  993. if cache:
  994. # FIXME: behavior should not depend on cache
  995. expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[s]")
  996. else:
  997. expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[ns]")
  998. tm.assert_index_equal(result, expected)
  999. # A list of datetimes where the last one is out of bounds
  1000. dts_with_oob = dts + [np.datetime64("9999-01-01")]
  1001. # As of GH#?? we do not raise in this case
  1002. to_datetime(dts_with_oob, errors="raise")
  1003. result = to_datetime(dts_with_oob, errors="coerce", cache=cache)
  1004. if not cache:
  1005. # FIXME: shouldn't depend on cache!
  1006. expected = DatetimeIndex(
  1007. [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30
  1008. + [NaT],
  1009. )
  1010. else:
  1011. expected = DatetimeIndex(np.array(dts_with_oob, dtype="M8[s]"))
  1012. tm.assert_index_equal(result, expected)
  1013. # With errors='ignore', out of bounds datetime64s
  1014. # are converted to their .item(), which depending on the version of
  1015. # numpy is either a python datetime.datetime or datetime.date
  1016. result = to_datetime(dts_with_oob, errors="ignore", cache=cache)
  1017. if not cache:
  1018. # FIXME: shouldn't depend on cache!
  1019. expected = Index(dts_with_oob)
  1020. tm.assert_index_equal(result, expected)
  1021. def test_out_of_bounds_errors_ignore(self):
  1022. # https://github.com/pandas-dev/pandas/issues/50587
  1023. result = to_datetime(np.datetime64("9999-01-01"), errors="ignore")
  1024. expected = np.datetime64("9999-01-01")
  1025. assert result == expected
  1026. def test_to_datetime_tz(self, cache):
  1027. # xref 8260
  1028. # uniform returns a DatetimeIndex
  1029. arr = [
  1030. Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
  1031. Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"),
  1032. ]
  1033. result = to_datetime(arr, cache=cache)
  1034. expected = DatetimeIndex(
  1035. ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific"
  1036. )
  1037. tm.assert_index_equal(result, expected)
  1038. def test_to_datetime_tz_mixed(self, cache):
  1039. # mixed tzs will raise if errors='raise'
  1040. # https://github.com/pandas-dev/pandas/issues/50585
  1041. arr = [
  1042. Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
  1043. Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
  1044. ]
  1045. msg = (
  1046. "Tz-aware datetime.datetime cannot be "
  1047. "converted to datetime64 unless utc=True"
  1048. )
  1049. with pytest.raises(ValueError, match=msg):
  1050. to_datetime(arr, cache=cache)
  1051. result = to_datetime(arr, cache=cache, errors="ignore")
  1052. expected = Index(
  1053. [
  1054. Timestamp("2013-01-01 13:00:00-08:00"),
  1055. Timestamp("2013-01-02 14:00:00-05:00"),
  1056. ],
  1057. dtype="object",
  1058. )
  1059. tm.assert_index_equal(result, expected)
  1060. result = to_datetime(arr, cache=cache, errors="coerce")
  1061. expected = DatetimeIndex(
  1062. ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]"
  1063. )
  1064. tm.assert_index_equal(result, expected)
  1065. def test_to_datetime_different_offsets(self, cache):
  1066. # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
  1067. # see GH-26097 for more
  1068. ts_string_1 = "March 1, 2018 12:00:00+0400"
  1069. ts_string_2 = "March 1, 2018 12:00:00+0500"
  1070. arr = [ts_string_1] * 5 + [ts_string_2] * 5
  1071. expected = Index([parse(x) for x in arr])
  1072. result = to_datetime(arr, cache=cache)
  1073. tm.assert_index_equal(result, expected)
  1074. def test_to_datetime_tz_pytz(self, cache):
  1075. # see gh-8260
  1076. us_eastern = pytz.timezone("US/Eastern")
  1077. arr = np.array(
  1078. [
  1079. us_eastern.localize(
  1080. datetime(year=2000, month=1, day=1, hour=3, minute=0)
  1081. ),
  1082. us_eastern.localize(
  1083. datetime(year=2000, month=6, day=1, hour=3, minute=0)
  1084. ),
  1085. ],
  1086. dtype=object,
  1087. )
  1088. result = to_datetime(arr, utc=True, cache=cache)
  1089. expected = DatetimeIndex(
  1090. ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
  1091. dtype="datetime64[ns, UTC]",
  1092. freq=None,
  1093. )
  1094. tm.assert_index_equal(result, expected)
  1095. @pytest.mark.parametrize(
  1096. "init_constructor, end_constructor",
  1097. [
  1098. (Index, DatetimeIndex),
  1099. (list, DatetimeIndex),
  1100. (np.array, DatetimeIndex),
  1101. (Series, Series),
  1102. ],
  1103. )
  1104. def test_to_datetime_utc_true(self, cache, init_constructor, end_constructor):
  1105. # See gh-11934 & gh-6415
  1106. data = ["20100102 121314", "20100102 121315"]
  1107. expected_data = [
  1108. Timestamp("2010-01-02 12:13:14", tz="utc"),
  1109. Timestamp("2010-01-02 12:13:15", tz="utc"),
  1110. ]
  1111. result = to_datetime(
  1112. init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache
  1113. )
  1114. expected = end_constructor(expected_data)
  1115. tm.assert_equal(result, expected)
  1116. @pytest.mark.parametrize(
  1117. "scalar, expected",
  1118. [
  1119. ["20100102 121314", Timestamp("2010-01-02 12:13:14", tz="utc")],
  1120. ["20100102 121315", Timestamp("2010-01-02 12:13:15", tz="utc")],
  1121. ],
  1122. )
  1123. def test_to_datetime_utc_true_scalar(self, cache, scalar, expected):
  1124. # Test scalar case as well
  1125. result = to_datetime(scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache)
  1126. assert result == expected
  1127. def test_to_datetime_utc_true_with_series_single_value(self, cache):
  1128. # GH 15760 UTC=True with Series
  1129. ts = 1.5e18
  1130. result = to_datetime(Series([ts]), utc=True, cache=cache)
  1131. expected = Series([Timestamp(ts, tz="utc")])
  1132. tm.assert_series_equal(result, expected)
  1133. def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
  1134. ts = "2013-01-01 00:00:00-01:00"
  1135. expected_ts = "2013-01-01 01:00:00"
  1136. data = Series([ts] * 3)
  1137. result = to_datetime(data, utc=True, cache=cache)
  1138. expected = Series([Timestamp(expected_ts, tz="utc")] * 3)
  1139. tm.assert_series_equal(result, expected)
  1140. @pytest.mark.parametrize(
  1141. "date, dtype",
  1142. [
  1143. ("2013-01-01 01:00:00", "datetime64[ns]"),
  1144. ("2013-01-01 01:00:00", "datetime64[ns, UTC]"),
  1145. ],
  1146. )
  1147. def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype):
  1148. expected = Series([Timestamp("2013-01-01 01:00:00", tz="UTC")])
  1149. result = to_datetime(Series([date], dtype=dtype), utc=True, cache=cache)
  1150. tm.assert_series_equal(result, expected)
  1151. @td.skip_if_no("psycopg2")
  1152. def test_to_datetime_tz_psycopg2(self, request, cache):
  1153. # xref 8260
  1154. import psycopg2
  1155. # https://www.psycopg.org/docs/news.html#what-s-new-in-psycopg-2-9
  1156. request.node.add_marker(
  1157. pytest.mark.xfail(
  1158. Version(psycopg2.__version__.split()[0]) > Version("2.8.7"),
  1159. raises=AttributeError,
  1160. reason="psycopg2.tz is deprecated (and appears dropped) in 2.9",
  1161. )
  1162. )
  1163. # misc cases
  1164. tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)
  1165. tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None)
  1166. arr = np.array(
  1167. [
  1168. datetime(2000, 1, 1, 3, 0, tzinfo=tz1),
  1169. datetime(2000, 6, 1, 3, 0, tzinfo=tz2),
  1170. ],
  1171. dtype=object,
  1172. )
  1173. result = to_datetime(arr, errors="coerce", utc=True, cache=cache)
  1174. expected = DatetimeIndex(
  1175. ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
  1176. dtype="datetime64[ns, UTC]",
  1177. freq=None,
  1178. )
  1179. tm.assert_index_equal(result, expected)
  1180. # dtype coercion
  1181. i = DatetimeIndex(
  1182. ["2000-01-01 08:00:00"],
  1183. tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None),
  1184. )
  1185. assert is_datetime64_ns_dtype(i)
  1186. # tz coercion
  1187. result = to_datetime(i, errors="coerce", cache=cache)
  1188. tm.assert_index_equal(result, i)
  1189. result = to_datetime(i, errors="coerce", utc=True, cache=cache)
  1190. expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]")
  1191. tm.assert_index_equal(result, expected)
  1192. @pytest.mark.parametrize("arg", [True, False])
  1193. def test_datetime_bool(self, cache, arg):
  1194. # GH13176
  1195. msg = r"dtype bool cannot be converted to datetime64\[ns\]"
  1196. with pytest.raises(TypeError, match=msg):
  1197. to_datetime(arg)
  1198. assert to_datetime(arg, errors="coerce", cache=cache) is NaT
  1199. assert to_datetime(arg, errors="ignore", cache=cache) is arg
  1200. def test_datetime_bool_arrays_mixed(self, cache):
  1201. msg = f"{type(cache)} is not convertible to datetime"
  1202. with pytest.raises(TypeError, match=msg):
  1203. to_datetime([False, datetime.today()], cache=cache)
  1204. with pytest.raises(
  1205. ValueError,
  1206. match=(
  1207. r'^time data "True" doesn\'t match format "%Y%m%d", '
  1208. f"at position 1. {PARSING_ERR_MSG}$"
  1209. ),
  1210. ):
  1211. to_datetime(["20130101", True], cache=cache)
  1212. tm.assert_index_equal(
  1213. to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache),
  1214. DatetimeIndex(
  1215. [to_datetime(0, cache=cache), NaT, NaT, to_datetime(0, cache=cache)]
  1216. ),
  1217. )
  1218. @pytest.mark.parametrize("arg", [bool, to_datetime])
  1219. def test_datetime_invalid_datatype(self, arg):
  1220. # GH13176
  1221. msg = "is not convertible to datetime"
  1222. with pytest.raises(TypeError, match=msg):
  1223. to_datetime(arg)
  1224. @pytest.mark.parametrize("errors", ["coerce", "raise", "ignore"])
  1225. def test_invalid_format_raises(self, errors):
  1226. # https://github.com/pandas-dev/pandas/issues/50255
  1227. with pytest.raises(
  1228. ValueError, match="':' is a bad directive in format 'H%:M%:S%"
  1229. ):
  1230. to_datetime(["00:00:00"], format="H%:M%:S%", errors=errors)
  1231. @pytest.mark.parametrize("value", ["a", "00:01:99"])
  1232. @pytest.mark.parametrize("format", [None, "%H:%M:%S"])
  1233. def test_datetime_invalid_scalar(self, value, format):
  1234. # GH24763
  1235. res = to_datetime(value, errors="ignore", format=format)
  1236. assert res == value
  1237. res = to_datetime(value, errors="coerce", format=format)
  1238. assert res is NaT
  1239. msg = "|".join(
  1240. [
  1241. r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
  1242. f"{PARSING_ERR_MSG}$",
  1243. r'^Given date string "a" not likely a datetime, at position 0$',
  1244. r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
  1245. f"at position 0. {PARSING_ERR_MSG}$",
  1246. r"^second must be in 0..59: 00:01:99, at position 0$",
  1247. ]
  1248. )
  1249. with pytest.raises(ValueError, match=msg):
  1250. to_datetime(value, errors="raise", format=format)
  1251. @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
  1252. @pytest.mark.parametrize("format", [None, "%H:%M:%S"])
  1253. def test_datetime_outofbounds_scalar(self, value, format):
  1254. # GH24763
  1255. res = to_datetime(value, errors="ignore", format=format)
  1256. assert res == value
  1257. res = to_datetime(value, errors="coerce", format=format)
  1258. assert res is NaT
  1259. if format is not None:
  1260. msg = r'^time data ".*" doesn\'t match format ".*", at position 0.'
  1261. with pytest.raises(ValueError, match=msg):
  1262. to_datetime(value, errors="raise", format=format)
  1263. else:
  1264. msg = "^Out of bounds .*, at position 0$"
  1265. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1266. to_datetime(value, errors="raise", format=format)
  1267. @pytest.mark.parametrize(
  1268. ("values"), [(["a"]), (["00:01:99"]), (["a", "b", "99:00:00"])]
  1269. )
  1270. @pytest.mark.parametrize("format", [(None), ("%H:%M:%S")])
  1271. def test_datetime_invalid_index(self, values, format):
  1272. # GH24763
  1273. # Not great to have logic in tests, but this one's hard to
  1274. # parametrise over
  1275. if format is None and len(values) > 1:
  1276. warn = UserWarning
  1277. else:
  1278. warn = None
  1279. with tm.assert_produces_warning(warn, match="Could not infer format"):
  1280. res = to_datetime(values, errors="ignore", format=format)
  1281. tm.assert_index_equal(res, Index(values))
  1282. with tm.assert_produces_warning(warn, match="Could not infer format"):
  1283. res = to_datetime(values, errors="coerce", format=format)
  1284. tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))
  1285. msg = "|".join(
  1286. [
  1287. r'^Given date string "a" not likely a datetime, at position 0$',
  1288. r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
  1289. f"{PARSING_ERR_MSG}$",
  1290. r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
  1291. f"at position 0. {PARSING_ERR_MSG}$",
  1292. r"^second must be in 0..59: 00:01:99, at position 0$",
  1293. ]
  1294. )
  1295. with pytest.raises(ValueError, match=msg):
  1296. with tm.assert_produces_warning(warn, match="Could not infer format"):
  1297. to_datetime(values, errors="raise", format=format)
  1298. @pytest.mark.parametrize("utc", [True, None])
  1299. @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
  1300. @pytest.mark.parametrize("constructor", [list, tuple, np.array, Index, deque])
  1301. def test_to_datetime_cache(self, utc, format, constructor):
  1302. date = "20130101 00:00:00"
  1303. test_dates = [date] * 10**5
  1304. data = constructor(test_dates)
  1305. result = to_datetime(data, utc=utc, format=format, cache=True)
  1306. expected = to_datetime(data, utc=utc, format=format, cache=False)
  1307. tm.assert_index_equal(result, expected)
  1308. def test_to_datetime_from_deque(self):
  1309. # GH 29403
  1310. result = to_datetime(deque([Timestamp("2010-06-02 09:30:00")] * 51))
  1311. expected = to_datetime([Timestamp("2010-06-02 09:30:00")] * 51)
  1312. tm.assert_index_equal(result, expected)
  1313. @pytest.mark.parametrize("utc", [True, None])
  1314. @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
  1315. def test_to_datetime_cache_series(self, utc, format):
  1316. date = "20130101 00:00:00"
  1317. test_dates = [date] * 10**5
  1318. data = Series(test_dates)
  1319. result = to_datetime(data, utc=utc, format=format, cache=True)
  1320. expected = to_datetime(data, utc=utc, format=format, cache=False)
  1321. tm.assert_series_equal(result, expected)
  1322. def test_to_datetime_cache_scalar(self):
  1323. date = "20130101 00:00:00"
  1324. result = to_datetime(date, cache=True)
  1325. expected = Timestamp("20130101 00:00:00")
  1326. assert result == expected
  1327. @pytest.mark.parametrize(
  1328. "datetimelikes,expected_values",
  1329. (
  1330. (
  1331. (None, np.nan) + (NaT,) * start_caching_at,
  1332. (NaT,) * (start_caching_at + 2),
  1333. ),
  1334. (
  1335. (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
  1336. (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
  1337. ),
  1338. (
  1339. (None,)
  1340. + (NaT,) * start_caching_at
  1341. + ("2012 July 26", Timestamp("2012-07-26")),
  1342. (NaT,) * (start_caching_at + 1)
  1343. + (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
  1344. ),
  1345. ),
  1346. )
  1347. def test_convert_object_to_datetime_with_cache(
  1348. self, datetimelikes, expected_values
  1349. ):
  1350. # GH#39882
  1351. ser = Series(
  1352. datetimelikes,
  1353. dtype="object",
  1354. )
  1355. result_series = to_datetime(ser, errors="coerce")
  1356. expected_series = Series(
  1357. expected_values,
  1358. dtype="datetime64[ns]",
  1359. )
  1360. tm.assert_series_equal(result_series, expected_series)
  1361. @pytest.mark.parametrize("cache", [True, False])
  1362. @pytest.mark.parametrize(
  1363. ("input", "expected"),
  1364. (
  1365. (
  1366. Series([NaT] * 20 + [None] * 20, dtype="object"),
  1367. Series([NaT] * 40, dtype="datetime64[ns]"),
  1368. ),
  1369. (
  1370. Series([NaT] * 60 + [None] * 60, dtype="object"),
  1371. Series([NaT] * 120, dtype="datetime64[ns]"),
  1372. ),
  1373. (Series([None] * 20), Series([NaT] * 20, dtype="datetime64[ns]")),
  1374. (Series([None] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
  1375. (Series([""] * 20), Series([NaT] * 20, dtype="datetime64[ns]")),
  1376. (Series([""] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
  1377. (Series([pd.NA] * 20), Series([NaT] * 20, dtype="datetime64[ns]")),
  1378. (Series([pd.NA] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
  1379. (Series([np.NaN] * 20), Series([NaT] * 20, dtype="datetime64[ns]")),
  1380. (Series([np.NaN] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
  1381. ),
  1382. )
  1383. def test_to_datetime_converts_null_like_to_nat(self, cache, input, expected):
  1384. # GH35888
  1385. result = to_datetime(input, cache=cache)
  1386. tm.assert_series_equal(result, expected)
  1387. @pytest.mark.parametrize(
  1388. "date, format",
  1389. [
  1390. ("2017-20", "%Y-%W"),
  1391. ("20 Sunday", "%W %A"),
  1392. ("20 Sun", "%W %a"),
  1393. ("2017-21", "%Y-%U"),
  1394. ("20 Sunday", "%U %A"),
  1395. ("20 Sun", "%U %a"),
  1396. ],
  1397. )
  1398. def test_week_without_day_and_calendar_year(self, date, format):
  1399. # GH16774
  1400. msg = "Cannot use '%W' or '%U' without day and year"
  1401. with pytest.raises(ValueError, match=msg):
  1402. to_datetime(date, format=format)
  1403. def test_to_datetime_coerce(self):
  1404. # GH 26122
  1405. ts_strings = [
  1406. "March 1, 2018 12:00:00+0400",
  1407. "March 1, 2018 12:00:00+0500",
  1408. "20100240",
  1409. ]
  1410. result = to_datetime(ts_strings, errors="coerce")
  1411. expected = Index(
  1412. [
  1413. datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)),
  1414. datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)),
  1415. NaT,
  1416. ]
  1417. )
  1418. tm.assert_index_equal(result, expected)
  1419. @pytest.mark.parametrize(
  1420. "string_arg, format",
  1421. [("March 1, 2018", "%B %d, %Y"), ("2018-03-01", "%Y-%m-%d")],
  1422. )
  1423. @pytest.mark.parametrize(
  1424. "outofbounds",
  1425. [
  1426. datetime(9999, 1, 1),
  1427. date(9999, 1, 1),
  1428. np.datetime64("9999-01-01"),
  1429. "January 1, 9999",
  1430. "9999-01-01",
  1431. ],
  1432. )
  1433. def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds):
  1434. # https://github.com/pandas-dev/pandas/issues/50255
  1435. ts_strings = [string_arg, outofbounds]
  1436. result = to_datetime(ts_strings, errors="coerce", format=format)
  1437. expected = DatetimeIndex([datetime(2018, 3, 1), NaT])
  1438. tm.assert_index_equal(result, expected)
  1439. @pytest.mark.parametrize(
  1440. "errors, expected",
  1441. [
  1442. ("coerce", Index([NaT, NaT])),
  1443. ("ignore", Index(["200622-12-31", "111111-24-11"])),
  1444. ],
  1445. )
  1446. def test_to_datetime_malformed_no_raise(self, errors, expected):
  1447. # GH 28299
  1448. # GH 48633
  1449. ts_strings = ["200622-12-31", "111111-24-11"]
  1450. with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
  1451. result = to_datetime(ts_strings, errors=errors)
  1452. tm.assert_index_equal(result, expected)
  1453. def test_to_datetime_malformed_raise(self):
  1454. # GH 48633
  1455. ts_strings = ["200622-12-31", "111111-24-11"]
  1456. msg = (
  1457. 'Parsed string "200622-12-31" gives an invalid tzoffset, which must '
  1458. r"be between -timedelta\(hours=24\) and timedelta\(hours=24\), "
  1459. "at position 0"
  1460. )
  1461. with pytest.raises(
  1462. ValueError,
  1463. match=msg,
  1464. ):
  1465. with tm.assert_produces_warning(
  1466. UserWarning, match="Could not infer format"
  1467. ):
  1468. to_datetime(
  1469. ts_strings,
  1470. errors="raise",
  1471. )
  1472. def test_iso_8601_strings_with_same_offset(self):
  1473. # GH 17697, 11736
  1474. ts_str = "2015-11-18 15:30:00+05:30"
  1475. result = to_datetime(ts_str)
  1476. expected = Timestamp(ts_str)
  1477. assert result == expected
  1478. expected = DatetimeIndex([Timestamp(ts_str)] * 2)
  1479. result = to_datetime([ts_str] * 2)
  1480. tm.assert_index_equal(result, expected)
  1481. result = DatetimeIndex([ts_str] * 2)
  1482. tm.assert_index_equal(result, expected)
  1483. def test_iso_8601_strings_with_different_offsets(self):
  1484. # GH 17697, 11736
  1485. ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
  1486. result = to_datetime(ts_strings)
  1487. expected = np.array(
  1488. [
  1489. datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)),
  1490. datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)),
  1491. NaT,
  1492. ],
  1493. dtype=object,
  1494. )
  1495. # GH 21864
  1496. expected = Index(expected)
  1497. tm.assert_index_equal(result, expected)
  1498. def test_iso_8601_strings_with_different_offsets_utc(self):
  1499. ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
  1500. result = to_datetime(ts_strings, utc=True)
  1501. expected = DatetimeIndex(
  1502. [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC"
  1503. )
  1504. tm.assert_index_equal(result, expected)
  1505. def test_mixed_offsets_with_native_datetime_raises(self):
  1506. # GH 25978
  1507. vals = [
  1508. "nan",
  1509. Timestamp("1990-01-01"),
  1510. "2015-03-14T16:15:14.123-08:00",
  1511. "2019-03-04T21:56:32.620-07:00",
  1512. None,
  1513. "today",
  1514. "now",
  1515. ]
  1516. ser = Series(vals)
  1517. assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111
  1518. now = Timestamp("now")
  1519. today = Timestamp("today")
  1520. mixed = to_datetime(ser)
  1521. expected = Series(
  1522. [
  1523. "NaT",
  1524. Timestamp("1990-01-01"),
  1525. Timestamp("2015-03-14T16:15:14.123-08:00").to_pydatetime(),
  1526. Timestamp("2019-03-04T21:56:32.620-07:00").to_pydatetime(),
  1527. None,
  1528. ],
  1529. dtype=object,
  1530. )
  1531. tm.assert_series_equal(mixed[:-2], expected)
  1532. # we'll check mixed[-1] and mixed[-2] match now and today to within
  1533. # call-timing tolerances
  1534. assert (now - mixed.iloc[-1]).total_seconds() <= 0.1
  1535. assert (today - mixed.iloc[-2]).total_seconds() <= 0.1
  1536. with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
  1537. to_datetime(mixed)
  1538. def test_non_iso_strings_with_tz_offset(self):
  1539. result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2)
  1540. expected = DatetimeIndex(
  1541. [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2
  1542. )
  1543. tm.assert_index_equal(result, expected)
  1544. @pytest.mark.parametrize(
  1545. "ts, expected",
  1546. [
  1547. (Timestamp("2018-01-01"), Timestamp("2018-01-01", tz="UTC")),
  1548. (
  1549. Timestamp("2018-01-01", tz="US/Pacific"),
  1550. Timestamp("2018-01-01 08:00", tz="UTC"),
  1551. ),
  1552. ],
  1553. )
  1554. def test_timestamp_utc_true(self, ts, expected):
  1555. # GH 24415
  1556. result = to_datetime(ts, utc=True)
  1557. assert result == expected
  1558. @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"])
  1559. def test_to_datetime_with_format_out_of_bounds(self, dt_str):
  1560. # GH 9107
  1561. msg = "Out of bounds nanosecond timestamp"
  1562. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1563. to_datetime(dt_str, format="%Y%m%d")
  1564. def test_to_datetime_utc(self):
  1565. arr = np.array([parse("2012-06-13T01:39:00Z")], dtype=object)
  1566. result = to_datetime(arr, utc=True)
  1567. assert result.tz is timezone.utc
  1568. def test_to_datetime_fixed_offset(self):
  1569. from pandas.tests.indexes.datetimes.test_timezones import fixed_off
  1570. dates = [
  1571. datetime(2000, 1, 1, tzinfo=fixed_off),
  1572. datetime(2000, 1, 2, tzinfo=fixed_off),
  1573. datetime(2000, 1, 3, tzinfo=fixed_off),
  1574. ]
  1575. result = to_datetime(dates)
  1576. assert result.tz == fixed_off
  1577. class TestToDatetimeUnit:
  1578. @pytest.mark.parametrize("unit", ["Y", "M"])
  1579. @pytest.mark.parametrize("item", [150, float(150)])
  1580. def test_to_datetime_month_or_year_unit_int(self, cache, unit, item):
  1581. # GH#50870 Note we have separate tests that pd.Timestamp gets these right
  1582. ts = Timestamp(item, unit=unit)
  1583. expected = DatetimeIndex([ts])
  1584. result = to_datetime([item], unit=unit, cache=cache)
  1585. tm.assert_index_equal(result, expected)
  1586. # TODO: this should also work
  1587. # result = to_datetime(np.array([item]), unit=unit, cache=cache)
  1588. # tm.assert_index_equal(result, expected)
  1589. result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache)
  1590. tm.assert_index_equal(result, expected)
  1591. @pytest.mark.parametrize("unit", ["Y", "M"])
  1592. def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
  1593. # GH#50301
  1594. # Match Timestamp behavior in disallowing non-round floats with
  1595. # Y or M unit
  1596. warn_msg = "strings will be parsed as datetime strings"
  1597. msg = f"Conversion of non-round float with unit={unit} is ambiguous"
  1598. with pytest.raises(ValueError, match=msg):
  1599. to_datetime([1.5], unit=unit, errors="raise")
  1600. with pytest.raises(ValueError, match=msg):
  1601. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  1602. to_datetime(["1.5"], unit=unit, errors="raise")
  1603. # with errors="ignore" we also end up raising within the Timestamp
  1604. # constructor; this may not be ideal
  1605. with pytest.raises(ValueError, match=msg):
  1606. to_datetime([1.5], unit=unit, errors="ignore")
  1607. # TODO: we are NOT consistent with the Timestamp behavior in the
  1608. # float-like string case
  1609. # with pytest.raises(ValueError, match=msg):
  1610. # to_datetime(["1.5"], unit=unit, errors="ignore")
  1611. res = to_datetime([1.5], unit=unit, errors="coerce")
  1612. expected = Index([NaT], dtype="M8[ns]")
  1613. tm.assert_index_equal(res, expected)
  1614. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  1615. res = to_datetime(["1.5"], unit=unit, errors="coerce")
  1616. tm.assert_index_equal(res, expected)
  1617. # round floats are OK
  1618. res = to_datetime([1.0], unit=unit)
  1619. expected = to_datetime([1], unit=unit)
  1620. tm.assert_index_equal(res, expected)
  1621. def test_unit(self, cache):
  1622. # GH 11758
  1623. # test proper behavior with errors
  1624. msg = "cannot specify both format and unit"
  1625. with pytest.raises(ValueError, match=msg):
  1626. to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
  1627. def test_unit_array_mixed_nans(self, cache):
  1628. values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
  1629. result = to_datetime(values, unit="D", errors="ignore", cache=cache)
  1630. expected = Index(
  1631. [
  1632. 11111111111111111,
  1633. Timestamp("1970-01-02"),
  1634. Timestamp("1970-01-02"),
  1635. NaT,
  1636. NaT,
  1637. NaT,
  1638. NaT,
  1639. NaT,
  1640. ],
  1641. dtype=object,
  1642. )
  1643. tm.assert_index_equal(result, expected)
  1644. result = to_datetime(values, unit="D", errors="coerce", cache=cache)
  1645. expected = DatetimeIndex(
  1646. ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"]
  1647. )
  1648. tm.assert_index_equal(result, expected)
  1649. msg = "cannot convert input 11111111111111111 with the unit 'D'"
  1650. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1651. to_datetime(values, unit="D", errors="raise", cache=cache)
  1652. def test_unit_array_mixed_nans_large_int(self, cache):
  1653. values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]
  1654. result = to_datetime(values, errors="ignore", unit="s", cache=cache)
  1655. expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object)
  1656. tm.assert_index_equal(result, expected)
  1657. result = to_datetime(values, errors="coerce", unit="s", cache=cache)
  1658. expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"])
  1659. tm.assert_index_equal(result, expected)
  1660. msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
  1661. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1662. to_datetime(values, errors="raise", unit="s", cache=cache)
  1663. def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache):
  1664. # if we have a string, then we raise a ValueError
  1665. # and NOT an OutOfBoundsDatetime
  1666. msg = "non convertible value foo with the unit 's'"
  1667. with pytest.raises(ValueError, match=msg):
  1668. to_datetime("foo", errors="raise", unit="s", cache=cache)
  1669. @pytest.mark.parametrize("error", ["raise", "coerce", "ignore"])
  1670. def test_unit_consistency(self, cache, error):
  1671. # consistency of conversions
  1672. expected = Timestamp("1970-05-09 14:25:11")
  1673. result = to_datetime(11111111, unit="s", errors=error, cache=cache)
  1674. assert result == expected
  1675. assert isinstance(result, Timestamp)
  1676. @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
  1677. @pytest.mark.parametrize("dtype", ["float64", "int64"])
  1678. def test_unit_with_numeric(self, cache, errors, dtype):
  1679. # GH 13180
  1680. # coercions from floats/ints are ok
  1681. expected = DatetimeIndex(["2015-06-19 05:33:20", "2015-05-27 22:33:20"])
  1682. arr = np.array([1.434692e18, 1.432766e18]).astype(dtype)
  1683. result = to_datetime(arr, errors=errors, cache=cache)
  1684. tm.assert_index_equal(result, expected)
  1685. @pytest.mark.parametrize(
  1686. "exp, arr, warning",
  1687. [
  1688. [
  1689. ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
  1690. ["foo", 1.434692e18, 1.432766e18],
  1691. UserWarning,
  1692. ],
  1693. [
  1694. ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
  1695. [1.434692e18, 1.432766e18, "foo", "NaT"],
  1696. None,
  1697. ],
  1698. ],
  1699. )
  1700. def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
  1701. # but we want to make sure that we are coercing
  1702. # if we have ints/strings
  1703. expected = DatetimeIndex(exp)
  1704. with tm.assert_produces_warning(warning, match="Could not infer format"):
  1705. result = to_datetime(arr, errors="coerce", cache=cache)
  1706. tm.assert_index_equal(result, expected)
  1707. @pytest.mark.parametrize(
  1708. "arr",
  1709. [
  1710. [Timestamp("20130101"), 1.434692e18, 1.432766e18],
  1711. [1.434692e18, 1.432766e18, Timestamp("20130101")],
  1712. ],
  1713. )
  1714. def test_unit_mixed(self, cache, arr):
  1715. # GH#50453 pre-2.0 with mixed numeric/datetimes and errors="coerce"
  1716. # the numeric entries would be coerced to NaT, was never clear exactly
  1717. # why.
  1718. # mixed integers/datetimes
  1719. expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
  1720. result = to_datetime(arr, errors="coerce", cache=cache)
  1721. tm.assert_index_equal(result, expected)
  1722. # GH#49037 pre-2.0 this raised, but it always worked with Series,
  1723. # was never clear why it was disallowed
  1724. result = to_datetime(arr, errors="raise", cache=cache)
  1725. tm.assert_index_equal(result, expected)
  1726. result = DatetimeIndex(arr)
  1727. tm.assert_index_equal(result, expected)
  1728. def test_unit_rounding(self, cache):
  1729. # GH 14156 & GH 20445: argument will incur floating point errors
  1730. # but no premature rounding
  1731. result = to_datetime(1434743731.8770001, unit="s", cache=cache)
  1732. expected = Timestamp("2015-06-19 19:55:31.877000192")
  1733. assert result == expected
  1734. def test_unit_ignore_keeps_name(self, cache):
  1735. # GH 21697
  1736. expected = Index([15e9] * 2, name="name")
  1737. result = to_datetime(expected, errors="ignore", unit="s", cache=cache)
  1738. tm.assert_index_equal(result, expected)
  1739. def test_to_datetime_errors_ignore_utc_true(self):
  1740. # GH#23758
  1741. result = to_datetime([1], unit="s", utc=True, errors="ignore")
  1742. expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC")
  1743. tm.assert_index_equal(result, expected)
  1744. # TODO: this is moved from tests.series.test_timeseries, may be redundant
  1745. @pytest.mark.parametrize("dtype", [int, float])
  1746. def test_to_datetime_unit(self, dtype):
  1747. epoch = 1370745748
  1748. ser = Series([epoch + t for t in range(20)]).astype(dtype)
  1749. result = to_datetime(ser, unit="s")
  1750. expected = Series(
  1751. [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
  1752. )
  1753. tm.assert_series_equal(result, expected)
  1754. @pytest.mark.parametrize("null", [iNaT, np.nan])
  1755. def test_to_datetime_unit_with_nulls(self, null):
  1756. epoch = 1370745748
  1757. ser = Series([epoch + t for t in range(20)] + [null])
  1758. result = to_datetime(ser, unit="s")
  1759. expected = Series(
  1760. [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
  1761. + [NaT]
  1762. )
  1763. tm.assert_series_equal(result, expected)
  1764. def test_to_datetime_unit_fractional_seconds(self):
  1765. # GH13834
  1766. epoch = 1370745748
  1767. ser = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float)
  1768. result = to_datetime(ser, unit="s")
  1769. expected = Series(
  1770. [
  1771. Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
  1772. for t in np.arange(0, 2, 0.25)
  1773. ]
  1774. + [NaT]
  1775. )
  1776. # GH20455 argument will incur floating point errors but no premature rounding
  1777. result = result.round("ms")
  1778. tm.assert_series_equal(result, expected)
  1779. def test_to_datetime_unit_na_values(self):
  1780. result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D")
  1781. expected = DatetimeIndex(
  1782. [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3
  1783. )
  1784. tm.assert_index_equal(result, expected)
  1785. @pytest.mark.parametrize("bad_val", ["foo", 111111111])
  1786. def test_to_datetime_unit_invalid(self, bad_val):
  1787. msg = f"{bad_val} with the unit 'D'"
  1788. with pytest.raises(ValueError, match=msg):
  1789. to_datetime([1, 2, bad_val], unit="D")
  1790. @pytest.mark.parametrize("bad_val", ["foo", 111111111])
  1791. def test_to_timestamp_unit_coerce(self, bad_val):
  1792. # coerce we can process
  1793. expected = DatetimeIndex(
  1794. [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1
  1795. )
  1796. result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
  1797. tm.assert_index_equal(result, expected)
  1798. def test_float_to_datetime_raise_near_bounds(self):
  1799. # GH50183
  1800. msg = "cannot convert input with unit 'D'"
  1801. oneday_in_ns = 1e9 * 60 * 60 * 24
  1802. tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days
  1803. # just in bounds
  1804. should_succeed = Series(
  1805. [0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float
  1806. )
  1807. expected = (should_succeed * oneday_in_ns).astype(np.int64)
  1808. for error_mode in ["raise", "coerce", "ignore"]:
  1809. result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
  1810. tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10)
  1811. # just out of bounds
  1812. should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
  1813. should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)
  1814. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1815. to_datetime(should_fail1, unit="D", errors="raise")
  1816. with pytest.raises(OutOfBoundsDatetime, match=msg):
  1817. to_datetime(should_fail2, unit="D", errors="raise")
  1818. class TestToDatetimeDataFrame:
  1819. @pytest.fixture
  1820. def df(self):
  1821. return DataFrame(
  1822. {
  1823. "year": [2015, 2016],
  1824. "month": [2, 3],
  1825. "day": [4, 5],
  1826. "hour": [6, 7],
  1827. "minute": [58, 59],
  1828. "second": [10, 11],
  1829. "ms": [1, 1],
  1830. "us": [2, 2],
  1831. "ns": [3, 3],
  1832. }
  1833. )
  1834. def test_dataframe(self, df, cache):
  1835. result = to_datetime(
  1836. {"year": df["year"], "month": df["month"], "day": df["day"]}, cache=cache
  1837. )
  1838. expected = Series(
  1839. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:0:00")]
  1840. )
  1841. tm.assert_series_equal(result, expected)
  1842. # dict-like
  1843. result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache)
  1844. tm.assert_series_equal(result, expected)
  1845. def test_dataframe_dict_with_constructable(self, df, cache):
  1846. # dict but with constructable
  1847. df2 = df[["year", "month", "day"]].to_dict()
  1848. df2["month"] = 2
  1849. result = to_datetime(df2, cache=cache)
  1850. expected2 = Series(
  1851. [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")]
  1852. )
  1853. tm.assert_series_equal(result, expected2)
  1854. @pytest.mark.parametrize(
  1855. "unit",
  1856. [
  1857. {
  1858. "year": "years",
  1859. "month": "months",
  1860. "day": "days",
  1861. "hour": "hours",
  1862. "minute": "minutes",
  1863. "second": "seconds",
  1864. },
  1865. {
  1866. "year": "year",
  1867. "month": "month",
  1868. "day": "day",
  1869. "hour": "hour",
  1870. "minute": "minute",
  1871. "second": "second",
  1872. },
  1873. ],
  1874. )
  1875. def test_dataframe_field_aliases_column_subset(self, df, cache, unit):
  1876. # unit mappings
  1877. result = to_datetime(df[list(unit.keys())].rename(columns=unit), cache=cache)
  1878. expected = Series(
  1879. [Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")]
  1880. )
  1881. tm.assert_series_equal(result, expected)
  1882. def test_dataframe_field_aliases(self, df, cache):
  1883. d = {
  1884. "year": "year",
  1885. "month": "month",
  1886. "day": "day",
  1887. "hour": "hour",
  1888. "minute": "minute",
  1889. "second": "second",
  1890. "ms": "ms",
  1891. "us": "us",
  1892. "ns": "ns",
  1893. }
  1894. result = to_datetime(df.rename(columns=d), cache=cache)
  1895. expected = Series(
  1896. [
  1897. Timestamp("20150204 06:58:10.001002003"),
  1898. Timestamp("20160305 07:59:11.001002003"),
  1899. ]
  1900. )
  1901. tm.assert_series_equal(result, expected)
  1902. def test_dataframe_str_dtype(self, df, cache):
  1903. # coerce back to int
  1904. result = to_datetime(df.astype(str), cache=cache)
  1905. expected = Series(
  1906. [
  1907. Timestamp("20150204 06:58:10.001002003"),
  1908. Timestamp("20160305 07:59:11.001002003"),
  1909. ]
  1910. )
  1911. tm.assert_series_equal(result, expected)
  1912. def test_dataframe_coerce(self, cache):
  1913. # passing coerce
  1914. df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
  1915. msg = (
  1916. r'^cannot assemble the datetimes: time data ".+" doesn\'t '
  1917. r'match format "%Y%m%d", at position 1\.'
  1918. )
  1919. with pytest.raises(ValueError, match=msg):
  1920. to_datetime(df2, cache=cache)
  1921. result = to_datetime(df2, errors="coerce", cache=cache)
  1922. expected = Series([Timestamp("20150204 00:00:00"), NaT])
  1923. tm.assert_series_equal(result, expected)
  1924. def test_dataframe_extra_keys_raisesm(self, df, cache):
  1925. # extra columns
  1926. msg = r"extra keys have been passed to the datetime assemblage: \[foo\]"
  1927. with pytest.raises(ValueError, match=msg):
  1928. df2 = df.copy()
  1929. df2["foo"] = 1
  1930. to_datetime(df2, cache=cache)
  1931. @pytest.mark.parametrize(
  1932. "cols",
  1933. [
  1934. ["year"],
  1935. ["year", "month"],
  1936. ["year", "month", "second"],
  1937. ["month", "day"],
  1938. ["year", "day", "second"],
  1939. ],
  1940. )
  1941. def test_dataframe_missing_keys_raises(self, df, cache, cols):
  1942. # not enough
  1943. msg = (
  1944. r"to assemble mappings requires at least that \[year, month, "
  1945. r"day\] be specified: \[.+\] is missing"
  1946. )
  1947. with pytest.raises(ValueError, match=msg):
  1948. to_datetime(df[cols], cache=cache)
  1949. def test_dataframe_duplicate_columns_raises(self, cache):
  1950. # duplicates
  1951. msg = "cannot assemble with duplicate keys"
  1952. df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
  1953. df2.columns = ["year", "year", "day"]
  1954. with pytest.raises(ValueError, match=msg):
  1955. to_datetime(df2, cache=cache)
  1956. df2 = DataFrame(
  1957. {"year": [2015, 2016], "month": [2, 20], "day": [4, 5], "hour": [4, 5]}
  1958. )
  1959. df2.columns = ["year", "month", "day", "day"]
  1960. with pytest.raises(ValueError, match=msg):
  1961. to_datetime(df2, cache=cache)
  1962. def test_dataframe_int16(self, cache):
  1963. # GH#13451
  1964. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  1965. # int16
  1966. result = to_datetime(df.astype("int16"), cache=cache)
  1967. expected = Series(
  1968. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")]
  1969. )
  1970. tm.assert_series_equal(result, expected)
  1971. def test_dataframe_mixed(self, cache):
  1972. # mixed dtypes
  1973. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  1974. df["month"] = df["month"].astype("int8")
  1975. df["day"] = df["day"].astype("int8")
  1976. result = to_datetime(df, cache=cache)
  1977. expected = Series(
  1978. [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")]
  1979. )
  1980. tm.assert_series_equal(result, expected)
  1981. def test_dataframe_float(self, cache):
  1982. # float
  1983. df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
  1984. msg = (
  1985. r"^cannot assemble the datetimes: unconverted data remains when parsing "
  1986. r'with format ".*": "1", at position 0.'
  1987. )
  1988. with pytest.raises(ValueError, match=msg):
  1989. to_datetime(df, cache=cache)
  1990. def test_dataframe_utc_true(self):
  1991. # GH#23760
  1992. df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
  1993. result = to_datetime(df, utc=True)
  1994. expected = Series(
  1995. np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]")
  1996. ).dt.tz_localize("UTC")
  1997. tm.assert_series_equal(result, expected)
  1998. class TestToDatetimeMisc:
  1999. def test_to_datetime_barely_out_of_bounds(self):
  2000. # GH#19529
  2001. # GH#19382 close enough to bounds that dropping nanos would result
  2002. # in an in-bounds datetime
  2003. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
  2004. msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
  2005. with pytest.raises(OutOfBoundsDatetime, match=msg):
  2006. to_datetime(arr)
  2007. @pytest.mark.parametrize(
  2008. "arg, exp_str",
  2009. [
  2010. ["2012-01-01 00:00:00", "2012-01-01 00:00:00"],
  2011. ["20121001", "2012-10-01"], # bad iso 8601
  2012. ],
  2013. )
  2014. def test_to_datetime_iso8601(self, cache, arg, exp_str):
  2015. result = to_datetime([arg], cache=cache)
  2016. exp = Timestamp(exp_str)
  2017. assert result[0] == exp
  2018. @pytest.mark.parametrize(
  2019. "input, format",
  2020. [
  2021. ("2012", "%Y-%m"),
  2022. ("2012-01", "%Y-%m-%d"),
  2023. ("2012-01-01", "%Y-%m-%d %H"),
  2024. ("2012-01-01 10", "%Y-%m-%d %H:%M"),
  2025. ("2012-01-01 10:00", "%Y-%m-%d %H:%M:%S"),
  2026. ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M:%S.%f"),
  2027. ("2012-01-01 10:00:00.123", "%Y-%m-%d %H:%M:%S.%f%z"),
  2028. (0, "%Y-%m-%d"),
  2029. ],
  2030. )
  2031. @pytest.mark.parametrize("exact", [True, False])
  2032. def test_to_datetime_iso8601_fails(self, input, format, exact):
  2033. # https://github.com/pandas-dev/pandas/issues/12649
  2034. # `format` is longer than the string, so this fails regardless of `exact`
  2035. with pytest.raises(
  2036. ValueError,
  2037. match=(
  2038. rf"time data \"{input}\" doesn't match format "
  2039. rf"\"{format}\", at position 0"
  2040. ),
  2041. ):
  2042. to_datetime(input, format=format, exact=exact)
  2043. @pytest.mark.parametrize(
  2044. "input, format",
  2045. [
  2046. ("2012-01-01", "%Y-%m"),
  2047. ("2012-01-01 10", "%Y-%m-%d"),
  2048. ("2012-01-01 10:00", "%Y-%m-%d %H"),
  2049. ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M"),
  2050. (0, "%Y-%m-%d"),
  2051. ],
  2052. )
  2053. def test_to_datetime_iso8601_exact_fails(self, input, format):
  2054. # https://github.com/pandas-dev/pandas/issues/12649
  2055. # `format` is shorter than the date string, so only fails with `exact=True`
  2056. msg = "|".join(
  2057. [
  2058. '^unconverted data remains when parsing with format ".*": ".*"'
  2059. f", at position 0. {PARSING_ERR_MSG}$",
  2060. f'^time data ".*" doesn\'t match format ".*", at position 0. '
  2061. f"{PARSING_ERR_MSG}$",
  2062. ]
  2063. )
  2064. with pytest.raises(
  2065. ValueError,
  2066. match=(msg),
  2067. ):
  2068. to_datetime(input, format=format)
  2069. @pytest.mark.parametrize(
  2070. "input, format",
  2071. [
  2072. ("2012-01-01", "%Y-%m"),
  2073. ("2012-01-01 00", "%Y-%m-%d"),
  2074. ("2012-01-01 00:00", "%Y-%m-%d %H"),
  2075. ("2012-01-01 00:00:00", "%Y-%m-%d %H:%M"),
  2076. ],
  2077. )
  2078. def test_to_datetime_iso8601_non_exact(self, input, format):
  2079. # https://github.com/pandas-dev/pandas/issues/12649
  2080. expected = Timestamp(2012, 1, 1)
  2081. result = to_datetime(input, format=format, exact=False)
  2082. assert result == expected
  2083. @pytest.mark.parametrize(
  2084. "input, format",
  2085. [
  2086. ("2020-01", "%Y/%m"),
  2087. ("2020-01-01", "%Y/%m/%d"),
  2088. ("2020-01-01 00", "%Y/%m/%dT%H"),
  2089. ("2020-01-01T00", "%Y/%m/%d %H"),
  2090. ("2020-01-01 00:00", "%Y/%m/%dT%H:%M"),
  2091. ("2020-01-01T00:00", "%Y/%m/%d %H:%M"),
  2092. ("2020-01-01 00:00:00", "%Y/%m/%dT%H:%M:%S"),
  2093. ("2020-01-01T00:00:00", "%Y/%m/%d %H:%M:%S"),
  2094. ],
  2095. )
  2096. def test_to_datetime_iso8601_separator(self, input, format):
  2097. # https://github.com/pandas-dev/pandas/issues/12649
  2098. with pytest.raises(
  2099. ValueError,
  2100. match=(
  2101. rf"time data \"{input}\" doesn\'t match format "
  2102. rf"\"{format}\", at position 0"
  2103. ),
  2104. ):
  2105. to_datetime(input, format=format)
  2106. @pytest.mark.parametrize(
  2107. "input, format",
  2108. [
  2109. ("2020-01", "%Y-%m"),
  2110. ("2020-01-01", "%Y-%m-%d"),
  2111. ("2020-01-01 00", "%Y-%m-%d %H"),
  2112. ("2020-01-01T00", "%Y-%m-%dT%H"),
  2113. ("2020-01-01 00:00", "%Y-%m-%d %H:%M"),
  2114. ("2020-01-01T00:00", "%Y-%m-%dT%H:%M"),
  2115. ("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"),
  2116. ("2020-01-01T00:00:00", "%Y-%m-%dT%H:%M:%S"),
  2117. ("2020-01-01T00:00:00.000", "%Y-%m-%dT%H:%M:%S.%f"),
  2118. ("2020-01-01T00:00:00.000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2119. ("2020-01-01T00:00:00.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2120. ],
  2121. )
  2122. def test_to_datetime_iso8601_valid(self, input, format):
  2123. # https://github.com/pandas-dev/pandas/issues/12649
  2124. expected = Timestamp(2020, 1, 1)
  2125. result = to_datetime(input, format=format)
  2126. assert result == expected
  2127. @pytest.mark.parametrize(
  2128. "input, format",
  2129. [
  2130. ("2020-1", "%Y-%m"),
  2131. ("2020-1-1", "%Y-%m-%d"),
  2132. ("2020-1-1 0", "%Y-%m-%d %H"),
  2133. ("2020-1-1T0", "%Y-%m-%dT%H"),
  2134. ("2020-1-1 0:0", "%Y-%m-%d %H:%M"),
  2135. ("2020-1-1T0:0", "%Y-%m-%dT%H:%M"),
  2136. ("2020-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"),
  2137. ("2020-1-1T0:0:0", "%Y-%m-%dT%H:%M:%S"),
  2138. ("2020-1-1T0:0:0.000", "%Y-%m-%dT%H:%M:%S.%f"),
  2139. ("2020-1-1T0:0:0.000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2140. ("2020-1-1T0:0:0.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
  2141. ],
  2142. )
  2143. def test_to_datetime_iso8601_non_padded(self, input, format):
  2144. # https://github.com/pandas-dev/pandas/issues/21422
  2145. expected = Timestamp(2020, 1, 1)
  2146. result = to_datetime(input, format=format)
  2147. assert result == expected
  2148. @pytest.mark.parametrize(
  2149. "input, format",
  2150. [
  2151. ("2020-01-01T00:00:00.000000000+00:00", "%Y-%m-%dT%H:%M:%S.%f%z"),
  2152. ("2020-01-01T00:00:00+00:00", "%Y-%m-%dT%H:%M:%S%z"),
  2153. ("2020-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"),
  2154. ],
  2155. )
  2156. def test_to_datetime_iso8601_with_timezone_valid(self, input, format):
  2157. # https://github.com/pandas-dev/pandas/issues/12649
  2158. expected = Timestamp(2020, 1, 1, tzinfo=pytz.UTC)
  2159. result = to_datetime(input, format=format)
  2160. assert result == expected
  2161. def test_to_datetime_default(self, cache):
  2162. rs = to_datetime("2001", cache=cache)
  2163. xp = datetime(2001, 1, 1)
  2164. assert rs == xp
  2165. @pytest.mark.xfail(reason="fails to enforce dayfirst=True, which would raise")
  2166. def test_to_datetime_respects_dayfirst(self, cache):
  2167. # dayfirst is essentially broken
  2168. # The msg here is not important since it isn't actually raised yet.
  2169. msg = "Invalid date specified"
  2170. with pytest.raises(ValueError, match=msg):
  2171. # if dayfirst is respected, then this would parse as month=13, which
  2172. # would raise
  2173. with tm.assert_produces_warning(UserWarning, match="Provide format"):
  2174. to_datetime("01-13-2012", dayfirst=True, cache=cache)
  2175. def test_to_datetime_on_datetime64_series(self, cache):
  2176. # #2699
  2177. ser = Series(date_range("1/1/2000", periods=10))
  2178. result = to_datetime(ser, cache=cache)
  2179. assert result[0] == ser[0]
  2180. def test_to_datetime_with_space_in_series(self, cache):
  2181. # GH 6428
  2182. ser = Series(["10/18/2006", "10/18/2008", " "])
  2183. msg = (
  2184. r'^time data " " doesn\'t match format "%m/%d/%Y", '
  2185. rf"at position 2. {PARSING_ERR_MSG}$"
  2186. )
  2187. with pytest.raises(ValueError, match=msg):
  2188. to_datetime(ser, errors="raise", cache=cache)
  2189. result_coerce = to_datetime(ser, errors="coerce", cache=cache)
  2190. expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT])
  2191. tm.assert_series_equal(result_coerce, expected_coerce)
  2192. result_ignore = to_datetime(ser, errors="ignore", cache=cache)
  2193. tm.assert_series_equal(result_ignore, ser)
  2194. @td.skip_if_not_us_locale
  2195. def test_to_datetime_with_apply(self, cache):
  2196. # this is only locale tested with US/None locales
  2197. # GH 5195
  2198. # with a format and coerce a single item to_datetime fails
  2199. td = Series(["May 04", "Jun 02", "Dec 11"], index=[1, 2, 3])
  2200. expected = to_datetime(td, format="%b %y", cache=cache)
  2201. result = td.apply(to_datetime, format="%b %y", cache=cache)
  2202. tm.assert_series_equal(result, expected)
  2203. def test_to_datetime_timezone_name(self):
  2204. # https://github.com/pandas-dev/pandas/issues/49748
  2205. result = to_datetime("2020-01-01 00:00:00UTC", format="%Y-%m-%d %H:%M:%S%Z")
  2206. expected = Timestamp(2020, 1, 1).tz_localize("UTC")
  2207. assert result == expected
  2208. @td.skip_if_not_us_locale
  2209. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  2210. def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
  2211. # this is only locale tested with US/None locales
  2212. # GH 5195, GH50251
  2213. # with a format and coerce a single item to_datetime fails
  2214. td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
  2215. expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)
  2216. result = td.apply(
  2217. lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
  2218. )
  2219. tm.assert_series_equal(result, expected)
  2220. def test_to_datetime_empty_stt(self, cache):
  2221. # empty string
  2222. result = to_datetime("", cache=cache)
  2223. assert result is NaT
  2224. def test_to_datetime_empty_str_list(self, cache):
  2225. result = to_datetime(["", ""], cache=cache)
  2226. assert isna(result).all()
  2227. def test_to_datetime_zero(self, cache):
  2228. # ints
  2229. result = Timestamp(0)
  2230. expected = to_datetime(0, cache=cache)
  2231. assert result == expected
  2232. def test_to_datetime_strings(self, cache):
  2233. # GH 3888 (strings)
  2234. expected = to_datetime(["2012"], cache=cache)[0]
  2235. result = to_datetime("2012", cache=cache)
  2236. assert result == expected
  2237. def test_to_datetime_strings_variation(self, cache):
  2238. array = ["2012", "20120101", "20120101 12:01:01"]
  2239. expected = [to_datetime(dt_str, cache=cache) for dt_str in array]
  2240. result = [Timestamp(date_str) for date_str in array]
  2241. tm.assert_almost_equal(result, expected)
  2242. @pytest.mark.parametrize("result", [Timestamp("2012"), to_datetime("2012")])
  2243. def test_to_datetime_strings_vs_constructor(self, result):
  2244. expected = Timestamp(2012, 1, 1)
  2245. assert result == expected
  2246. def test_to_datetime_unprocessable_input(self, cache):
  2247. # GH 4928
  2248. # GH 21864
  2249. result = to_datetime([1, "1"], errors="ignore", cache=cache)
  2250. expected = Index(np.array([1, "1"], dtype="O"))
  2251. tm.assert_equal(result, expected)
  2252. msg = '^Given date string "1" not likely a datetime, at position 1$'
  2253. with pytest.raises(ValueError, match=msg):
  2254. to_datetime([1, "1"], errors="raise", cache=cache)
  2255. def test_to_datetime_unhashable_input(self, cache):
  2256. series = Series([["a"]] * 100)
  2257. result = to_datetime(series, errors="ignore", cache=cache)
  2258. tm.assert_series_equal(series, result)
  2259. def test_to_datetime_other_datetime64_units(self):
  2260. # 5/25/2012
  2261. scalar = np.int64(1337904000000000).view("M8[us]")
  2262. as_obj = scalar.astype("O")
  2263. index = DatetimeIndex([scalar])
  2264. assert index[0] == scalar.astype("O")
  2265. value = Timestamp(scalar)
  2266. assert value == as_obj
  2267. def test_to_datetime_list_of_integers(self):
  2268. rng = date_range("1/1/2000", periods=20)
  2269. rng = DatetimeIndex(rng.values)
  2270. ints = list(rng.asi8)
  2271. result = DatetimeIndex(ints)
  2272. tm.assert_index_equal(rng, result)
  2273. def test_to_datetime_overflow(self):
  2274. # gh-17637
  2275. # we are overflowing Timedelta range here
  2276. msg = "Cannot cast 139999 days 00:00:00 to unit='ns' without overflow"
  2277. with pytest.raises(OutOfBoundsTimedelta, match=msg):
  2278. date_range(start="1/1/1700", freq="B", periods=100000)
  2279. def test_string_invalid_operation(self, cache):
  2280. invalid = np.array(["87156549591102612381000001219H5"], dtype=object)
  2281. # GH #51084
  2282. with pytest.raises(ValueError, match="Unknown datetime string format"):
  2283. to_datetime(invalid, errors="raise", cache=cache)
  2284. def test_string_na_nat_conversion(self, cache):
  2285. # GH #999, #858
  2286. strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object)
  2287. expected = np.empty(4, dtype="M8[ns]")
  2288. for i, val in enumerate(strings):
  2289. if isna(val):
  2290. expected[i] = iNaT
  2291. else:
  2292. expected[i] = parse(val)
  2293. result = tslib.array_to_datetime(strings)[0]
  2294. tm.assert_almost_equal(result, expected)
  2295. result2 = to_datetime(strings, cache=cache)
  2296. assert isinstance(result2, DatetimeIndex)
  2297. tm.assert_numpy_array_equal(result, result2.values)
  2298. def test_string_na_nat_conversion_malformed(self, cache):
  2299. malformed = np.array(["1/100/2000", np.nan], dtype=object)
  2300. # GH 10636, default is now 'raise'
  2301. msg = r"Unknown datetime string format"
  2302. with pytest.raises(ValueError, match=msg):
  2303. to_datetime(malformed, errors="raise", cache=cache)
  2304. result = to_datetime(malformed, errors="ignore", cache=cache)
  2305. # GH 21864
  2306. expected = Index(malformed)
  2307. tm.assert_index_equal(result, expected)
  2308. with pytest.raises(ValueError, match=msg):
  2309. to_datetime(malformed, errors="raise", cache=cache)
  2310. def test_string_na_nat_conversion_with_name(self, cache):
  2311. idx = ["a", "b", "c", "d", "e"]
  2312. series = Series(
  2313. ["1/1/2000", np.nan, "1/3/2000", np.nan, "1/5/2000"], index=idx, name="foo"
  2314. )
  2315. dseries = Series(
  2316. [
  2317. to_datetime("1/1/2000", cache=cache),
  2318. np.nan,
  2319. to_datetime("1/3/2000", cache=cache),
  2320. np.nan,
  2321. to_datetime("1/5/2000", cache=cache),
  2322. ],
  2323. index=idx,
  2324. name="foo",
  2325. )
  2326. result = to_datetime(series, cache=cache)
  2327. dresult = to_datetime(dseries, cache=cache)
  2328. expected = Series(np.empty(5, dtype="M8[ns]"), index=idx)
  2329. for i in range(5):
  2330. x = series[i]
  2331. if isna(x):
  2332. expected[i] = NaT
  2333. else:
  2334. expected[i] = to_datetime(x, cache=cache)
  2335. tm.assert_series_equal(result, expected, check_names=False)
  2336. assert result.name == "foo"
  2337. tm.assert_series_equal(dresult, expected, check_names=False)
  2338. assert dresult.name == "foo"
  2339. @pytest.mark.parametrize(
  2340. "unit",
  2341. ["h", "m", "s", "ms", "us", "ns"],
  2342. )
  2343. def test_dti_constructor_numpy_timeunits(self, cache, unit):
  2344. # GH 9114
  2345. dtype = np.dtype(f"M8[{unit}]")
  2346. base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache)
  2347. values = base.values.astype(dtype)
  2348. if unit in ["h", "m"]:
  2349. # we cast to closest supported unit
  2350. unit = "s"
  2351. exp_dtype = np.dtype(f"M8[{unit}]")
  2352. expected = DatetimeIndex(base.astype(exp_dtype))
  2353. assert expected.dtype == exp_dtype
  2354. tm.assert_index_equal(DatetimeIndex(values), expected)
  2355. tm.assert_index_equal(to_datetime(values, cache=cache), expected)
  2356. def test_dayfirst(self, cache):
  2357. # GH 5917
  2358. arr = ["10/02/2014", "11/02/2014", "12/02/2014"]
  2359. expected = DatetimeIndex(
  2360. [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]
  2361. )
  2362. idx1 = DatetimeIndex(arr, dayfirst=True)
  2363. idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
  2364. idx3 = to_datetime(arr, dayfirst=True, cache=cache)
  2365. idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache)
  2366. idx5 = DatetimeIndex(Index(arr), dayfirst=True)
  2367. idx6 = DatetimeIndex(Series(arr), dayfirst=True)
  2368. tm.assert_index_equal(expected, idx1)
  2369. tm.assert_index_equal(expected, idx2)
  2370. tm.assert_index_equal(expected, idx3)
  2371. tm.assert_index_equal(expected, idx4)
  2372. tm.assert_index_equal(expected, idx5)
  2373. tm.assert_index_equal(expected, idx6)
  2374. def test_dayfirst_warnings_valid_input(self):
  2375. # GH 12585
  2376. warning_msg = (
  2377. "Parsing dates in .* format when dayfirst=.* was specified. "
  2378. "Pass `dayfirst=.*` or specify a format to silence this warning."
  2379. )
  2380. # CASE 1: valid input
  2381. arr = ["31/12/2014", "10/03/2011"]
  2382. expected = DatetimeIndex(
  2383. ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
  2384. )
  2385. # A. dayfirst arg correct, no warning
  2386. res1 = to_datetime(arr, dayfirst=True)
  2387. tm.assert_index_equal(expected, res1)
  2388. # B. dayfirst arg incorrect, warning
  2389. with tm.assert_produces_warning(UserWarning, match=warning_msg):
  2390. res2 = to_datetime(arr, dayfirst=False)
  2391. tm.assert_index_equal(expected, res2)
  2392. def test_dayfirst_warnings_invalid_input(self):
  2393. # CASE 2: invalid input
  2394. # cannot consistently process with single format
  2395. # ValueError *always* raised
  2396. # first in DD/MM/YYYY, second in MM/DD/YYYY
  2397. arr = ["31/12/2014", "03/30/2011"]
  2398. with pytest.raises(
  2399. ValueError,
  2400. match=(
  2401. r'^time data "03/30/2011" doesn\'t match format '
  2402. rf'"%d/%m/%Y", at position 1. {PARSING_ERR_MSG}$'
  2403. ),
  2404. ):
  2405. to_datetime(arr, dayfirst=True)
  2406. @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray])
  2407. def test_to_datetime_dta_tz(self, klass):
  2408. # GH#27733
  2409. dti = date_range("2015-04-05", periods=3).rename("foo")
  2410. expected = dti.tz_localize("UTC")
  2411. obj = klass(dti)
  2412. expected = klass(expected)
  2413. result = to_datetime(obj, utc=True)
  2414. tm.assert_equal(result, expected)
  2415. class TestGuessDatetimeFormat:
  2416. @pytest.mark.parametrize(
  2417. "test_list",
  2418. [
  2419. [
  2420. "2011-12-30 00:00:00.000000",
  2421. "2011-12-30 00:00:00.000000",
  2422. "2011-12-30 00:00:00.000000",
  2423. ],
  2424. [np.nan, np.nan, "2011-12-30 00:00:00.000000"],
  2425. ["", "2011-12-30 00:00:00.000000"],
  2426. ["NaT", "2011-12-30 00:00:00.000000"],
  2427. ["2011-12-30 00:00:00.000000", "random_string"],
  2428. ["now", "2011-12-30 00:00:00.000000"],
  2429. ["today", "2011-12-30 00:00:00.000000"],
  2430. ],
  2431. )
  2432. def test_guess_datetime_format_for_array(self, test_list):
  2433. expected_format = "%Y-%m-%d %H:%M:%S.%f"
  2434. test_array = np.array(test_list, dtype=object)
  2435. assert tools._guess_datetime_format_for_array(test_array) == expected_format
  2436. @td.skip_if_not_us_locale
  2437. def test_guess_datetime_format_for_array_all_nans(self):
  2438. format_for_string_of_nans = tools._guess_datetime_format_for_array(
  2439. np.array([np.nan, np.nan, np.nan], dtype="O")
  2440. )
  2441. assert format_for_string_of_nans is None
  2442. class TestToDatetimeInferFormat:
  2443. @pytest.mark.parametrize(
  2444. "test_format", ["%m-%d-%Y", "%m/%d/%Y %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%f"]
  2445. )
  2446. def test_to_datetime_infer_datetime_format_consistent_format(
  2447. self, cache, test_format
  2448. ):
  2449. ser = Series(date_range("20000101", periods=50, freq="H"))
  2450. s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format))
  2451. with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache)
  2452. without_format = to_datetime(s_as_dt_strings, cache=cache)
  2453. # Whether the format is explicitly passed, or
  2454. # it is inferred, the results should all be the same
  2455. tm.assert_series_equal(with_format, without_format)
  2456. def test_to_datetime_inconsistent_format(self, cache):
  2457. data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
  2458. ser = Series(np.array(data))
  2459. msg = (
  2460. r'^time data "01-02-2011 00:00:00" doesn\'t match format '
  2461. rf'"%m/%d/%Y %H:%M:%S", at position 1. {PARSING_ERR_MSG}$'
  2462. )
  2463. with pytest.raises(ValueError, match=msg):
  2464. to_datetime(ser, cache=cache)
  2465. def test_to_datetime_consistent_format(self, cache):
  2466. data = ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"]
  2467. ser = Series(np.array(data))
  2468. result = to_datetime(ser, cache=cache)
  2469. expected = Series(
  2470. ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[ns]"
  2471. )
  2472. tm.assert_series_equal(result, expected)
  2473. def test_to_datetime_series_with_nans(self, cache):
  2474. ser = Series(
  2475. np.array(
  2476. ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan],
  2477. dtype=object,
  2478. )
  2479. )
  2480. result = to_datetime(ser, cache=cache)
  2481. expected = Series(
  2482. ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[ns]"
  2483. )
  2484. tm.assert_series_equal(result, expected)
  2485. def test_to_datetime_series_start_with_nans(self, cache):
  2486. ser = Series(
  2487. np.array(
  2488. [
  2489. np.nan,
  2490. np.nan,
  2491. "01/01/2011 00:00:00",
  2492. "01/02/2011 00:00:00",
  2493. "01/03/2011 00:00:00",
  2494. ],
  2495. dtype=object,
  2496. )
  2497. )
  2498. result = to_datetime(ser, cache=cache)
  2499. expected = Series(
  2500. [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[ns]"
  2501. )
  2502. tm.assert_series_equal(result, expected)
  2503. @pytest.mark.parametrize(
  2504. "tz_name, offset",
  2505. [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)],
  2506. )
  2507. def test_infer_datetime_format_tz_name(self, tz_name, offset):
  2508. # GH 33133
  2509. ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
  2510. result = to_datetime(ser)
  2511. tz = timezone(timedelta(minutes=offset))
  2512. expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)])
  2513. tm.assert_series_equal(result, expected)
  2514. @pytest.mark.parametrize(
  2515. "ts,zero_tz",
  2516. [
  2517. ("2019-02-02 08:07:13", "Z"),
  2518. ("2019-02-02 08:07:13", ""),
  2519. ("2019-02-02 08:07:13.012345", "Z"),
  2520. ("2019-02-02 08:07:13.012345", ""),
  2521. ],
  2522. )
  2523. def test_infer_datetime_format_zero_tz(self, ts, zero_tz):
  2524. # GH 41047
  2525. ser = Series([ts + zero_tz])
  2526. result = to_datetime(ser)
  2527. tz = pytz.utc if zero_tz == "Z" else None
  2528. expected = Series([Timestamp(ts, tz=tz)])
  2529. tm.assert_series_equal(result, expected)
  2530. @pytest.mark.parametrize("format", [None, "%Y-%m-%d"])
  2531. def test_to_datetime_iso8601_noleading_0s(self, cache, format):
  2532. # GH 11871
  2533. ser = Series(["2014-1-1", "2014-2-2", "2015-3-3"])
  2534. expected = Series(
  2535. [
  2536. Timestamp("2014-01-01"),
  2537. Timestamp("2014-02-02"),
  2538. Timestamp("2015-03-03"),
  2539. ]
  2540. )
  2541. tm.assert_series_equal(to_datetime(ser, format=format, cache=cache), expected)
  2542. def test_parse_dates_infer_datetime_format_warning(self):
  2543. # GH 49024
  2544. with tm.assert_produces_warning(
  2545. UserWarning,
  2546. match="The argument 'infer_datetime_format' is deprecated",
  2547. ):
  2548. to_datetime(["10-10-2000"], infer_datetime_format=True)
  2549. class TestDaysInMonth:
  2550. # tests for issue #10154
  2551. @pytest.mark.parametrize(
  2552. "arg, format",
  2553. [
  2554. ["2015-02-29", None],
  2555. ["2015-02-29", "%Y-%m-%d"],
  2556. ["2015-02-32", "%Y-%m-%d"],
  2557. ["2015-04-31", "%Y-%m-%d"],
  2558. ],
  2559. )
  2560. def test_day_not_in_month_coerce(self, cache, arg, format):
  2561. assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
  2562. def test_day_not_in_month_raise(self, cache):
  2563. msg = "day is out of range for month: 2015-02-29, at position 0"
  2564. with pytest.raises(ValueError, match=msg):
  2565. to_datetime("2015-02-29", errors="raise", cache=cache)
  2566. @pytest.mark.parametrize(
  2567. "arg, format, msg",
  2568. [
  2569. (
  2570. "2015-02-29",
  2571. "%Y-%m-%d",
  2572. f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
  2573. ),
  2574. (
  2575. "2015-29-02",
  2576. "%Y-%d-%m",
  2577. f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
  2578. ),
  2579. (
  2580. "2015-02-32",
  2581. "%Y-%m-%d",
  2582. '^unconverted data remains when parsing with format "%Y-%m-%d": "2", '
  2583. f"at position 0. {PARSING_ERR_MSG}$",
  2584. ),
  2585. (
  2586. "2015-32-02",
  2587. "%Y-%d-%m",
  2588. '^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
  2589. f"at position 0. {PARSING_ERR_MSG}$",
  2590. ),
  2591. (
  2592. "2015-04-31",
  2593. "%Y-%m-%d",
  2594. f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
  2595. ),
  2596. (
  2597. "2015-31-04",
  2598. "%Y-%d-%m",
  2599. f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
  2600. ),
  2601. ],
  2602. )
  2603. def test_day_not_in_month_raise_value(self, cache, arg, format, msg):
  2604. # https://github.com/pandas-dev/pandas/issues/50462
  2605. with pytest.raises(ValueError, match=msg):
  2606. to_datetime(arg, errors="raise", format=format, cache=cache)
  2607. @pytest.mark.parametrize(
  2608. "expected, format",
  2609. [
  2610. ["2015-02-29", None],
  2611. ["2015-02-29", "%Y-%m-%d"],
  2612. ["2015-02-29", "%Y-%m-%d"],
  2613. ["2015-04-31", "%Y-%m-%d"],
  2614. ],
  2615. )
  2616. def test_day_not_in_month_ignore(self, cache, expected, format):
  2617. result = to_datetime(expected, errors="ignore", format=format, cache=cache)
  2618. assert result == expected
  2619. class TestDatetimeParsingWrappers:
  2620. @pytest.mark.parametrize(
  2621. "date_str, expected",
  2622. [
  2623. ("2011-01-01", datetime(2011, 1, 1)),
  2624. ("2Q2005", datetime(2005, 4, 1)),
  2625. ("2Q05", datetime(2005, 4, 1)),
  2626. ("2005Q1", datetime(2005, 1, 1)),
  2627. ("05Q1", datetime(2005, 1, 1)),
  2628. ("2011Q3", datetime(2011, 7, 1)),
  2629. ("11Q3", datetime(2011, 7, 1)),
  2630. ("3Q2011", datetime(2011, 7, 1)),
  2631. ("3Q11", datetime(2011, 7, 1)),
  2632. # quarterly without space
  2633. ("2000Q4", datetime(2000, 10, 1)),
  2634. ("00Q4", datetime(2000, 10, 1)),
  2635. ("4Q2000", datetime(2000, 10, 1)),
  2636. ("4Q00", datetime(2000, 10, 1)),
  2637. ("2000q4", datetime(2000, 10, 1)),
  2638. ("2000-Q4", datetime(2000, 10, 1)),
  2639. ("00-Q4", datetime(2000, 10, 1)),
  2640. ("4Q-2000", datetime(2000, 10, 1)),
  2641. ("4Q-00", datetime(2000, 10, 1)),
  2642. ("00q4", datetime(2000, 10, 1)),
  2643. ("2005", datetime(2005, 1, 1)),
  2644. ("2005-11", datetime(2005, 11, 1)),
  2645. ("2005 11", datetime(2005, 11, 1)),
  2646. ("11-2005", datetime(2005, 11, 1)),
  2647. ("11 2005", datetime(2005, 11, 1)),
  2648. ("200511", datetime(2020, 5, 11)),
  2649. ("20051109", datetime(2005, 11, 9)),
  2650. ("20051109 10:15", datetime(2005, 11, 9, 10, 15)),
  2651. ("20051109 08H", datetime(2005, 11, 9, 8, 0)),
  2652. ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)),
  2653. ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)),
  2654. ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)),
  2655. ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)),
  2656. ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)),
  2657. ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)),
  2658. ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)),
  2659. ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)),
  2660. ("Thu Sep 25 2003", datetime(2003, 9, 25)),
  2661. ("Sep 25 2003", datetime(2003, 9, 25)),
  2662. ("January 1 2014", datetime(2014, 1, 1)),
  2663. # GHE10537
  2664. ("2014-06", datetime(2014, 6, 1)),
  2665. ("06-2014", datetime(2014, 6, 1)),
  2666. ("2014-6", datetime(2014, 6, 1)),
  2667. ("6-2014", datetime(2014, 6, 1)),
  2668. ("20010101 12", datetime(2001, 1, 1, 12)),
  2669. ("20010101 1234", datetime(2001, 1, 1, 12, 34)),
  2670. ("20010101 123456", datetime(2001, 1, 1, 12, 34, 56)),
  2671. ],
  2672. )
  2673. def test_parsers(self, date_str, expected, cache):
  2674. # dateutil >= 2.5.0 defaults to yearfirst=True
  2675. # https://github.com/dateutil/dateutil/issues/217
  2676. yearfirst = True
  2677. result1, _ = parsing.parse_datetime_string_with_reso(
  2678. date_str, yearfirst=yearfirst
  2679. )
  2680. result2 = to_datetime(date_str, yearfirst=yearfirst)
  2681. result3 = to_datetime([date_str], yearfirst=yearfirst)
  2682. # result5 is used below
  2683. result4 = to_datetime(
  2684. np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
  2685. )
  2686. result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
  2687. # result7 is used below
  2688. result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
  2689. result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst)
  2690. for res in [result1, result2]:
  2691. assert res == expected
  2692. for res in [result3, result4, result6, result8, result9]:
  2693. exp = DatetimeIndex([Timestamp(expected)])
  2694. tm.assert_index_equal(res, exp)
  2695. # these really need to have yearfirst, but we don't support
  2696. if not yearfirst:
  2697. result5 = Timestamp(date_str)
  2698. assert result5 == expected
  2699. result7 = date_range(date_str, freq="S", periods=1, yearfirst=yearfirst)
  2700. assert result7 == expected
  2701. def test_na_values_with_cache(
  2702. self, cache, unique_nulls_fixture, unique_nulls_fixture2
  2703. ):
  2704. # GH22305
  2705. expected = Index([NaT, NaT], dtype="datetime64[ns]")
  2706. result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], cache=cache)
  2707. tm.assert_index_equal(result, expected)
  2708. def test_parsers_nat(self):
  2709. # Test that each of several string-accepting methods return pd.NaT
  2710. result1, _ = parsing.parse_datetime_string_with_reso("NaT")
  2711. result2 = to_datetime("NaT")
  2712. result3 = Timestamp("NaT")
  2713. result4 = DatetimeIndex(["NaT"])[0]
  2714. assert result1 is NaT
  2715. assert result2 is NaT
  2716. assert result3 is NaT
  2717. assert result4 is NaT
  2718. @pytest.mark.parametrize(
  2719. "date_str, dayfirst, yearfirst, expected",
  2720. [
  2721. ("10-11-12", False, False, datetime(2012, 10, 11)),
  2722. ("10-11-12", True, False, datetime(2012, 11, 10)),
  2723. ("10-11-12", False, True, datetime(2010, 11, 12)),
  2724. ("10-11-12", True, True, datetime(2010, 12, 11)),
  2725. ("20/12/21", False, False, datetime(2021, 12, 20)),
  2726. ("20/12/21", True, False, datetime(2021, 12, 20)),
  2727. ("20/12/21", False, True, datetime(2020, 12, 21)),
  2728. ("20/12/21", True, True, datetime(2020, 12, 21)),
  2729. ],
  2730. )
  2731. def test_parsers_dayfirst_yearfirst(
  2732. self, cache, date_str, dayfirst, yearfirst, expected
  2733. ):
  2734. # OK
  2735. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  2736. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
  2737. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  2738. # OK
  2739. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2740. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2741. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  2742. # bug fix in 2.5.2
  2743. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00
  2744. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  2745. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  2746. # OK
  2747. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2748. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2749. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  2750. # OK
  2751. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2752. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2753. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  2754. # OK
  2755. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2756. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2757. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  2758. # revert of bug in 2.5.2
  2759. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  2760. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12
  2761. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  2762. # OK
  2763. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2764. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2765. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  2766. # str : dayfirst, yearfirst, expected
  2767. # compare with dateutil result
  2768. dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst)
  2769. assert dateutil_result == expected
  2770. result1, _ = parsing.parse_datetime_string_with_reso(
  2771. date_str, dayfirst=dayfirst, yearfirst=yearfirst
  2772. )
  2773. # we don't support dayfirst/yearfirst here:
  2774. if not dayfirst and not yearfirst:
  2775. result2 = Timestamp(date_str)
  2776. assert result2 == expected
  2777. result3 = to_datetime(
  2778. date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
  2779. )
  2780. result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0]
  2781. assert result1 == expected
  2782. assert result3 == expected
  2783. assert result4 == expected
  2784. @pytest.mark.parametrize(
  2785. "date_str, exp_def",
  2786. [["10:15", datetime(1, 1, 1, 10, 15)], ["9:05", datetime(1, 1, 1, 9, 5)]],
  2787. )
  2788. def test_parsers_timestring(self, date_str, exp_def):
  2789. # must be the same as dateutil result
  2790. exp_now = parse(date_str)
  2791. result1, _ = parsing.parse_datetime_string_with_reso(date_str)
  2792. result2 = to_datetime(date_str)
  2793. result3 = to_datetime([date_str])
  2794. result4 = Timestamp(date_str)
  2795. result5 = DatetimeIndex([date_str])[0]
  2796. # parse time string return time string based on default date
  2797. # others are not, and can't be changed because it is used in
  2798. # time series plot
  2799. assert result1 == exp_def
  2800. assert result2 == exp_now
  2801. assert result3 == exp_now
  2802. assert result4 == exp_now
  2803. assert result5 == exp_now
  2804. @pytest.mark.parametrize(
  2805. "dt_string, tz, dt_string_repr",
  2806. [
  2807. (
  2808. "2013-01-01 05:45+0545",
  2809. timezone(timedelta(minutes=345)),
  2810. "Timestamp('2013-01-01 05:45:00+0545', tz='UTC+05:45')",
  2811. ),
  2812. (
  2813. "2013-01-01 05:30+0530",
  2814. timezone(timedelta(minutes=330)),
  2815. "Timestamp('2013-01-01 05:30:00+0530', tz='UTC+05:30')",
  2816. ),
  2817. ],
  2818. )
  2819. def test_parsers_timezone_minute_offsets_roundtrip(
  2820. self, cache, dt_string, tz, dt_string_repr
  2821. ):
  2822. # GH11708
  2823. base = to_datetime("2013-01-01 00:00:00", cache=cache)
  2824. base = base.tz_localize("UTC").tz_convert(tz)
  2825. dt_time = to_datetime(dt_string, cache=cache)
  2826. assert base == dt_time
  2827. assert dt_string_repr == repr(dt_time)
  2828. @pytest.fixture(params=["D", "s", "ms", "us", "ns"])
  2829. def units(request):
  2830. """Day and some time units.
  2831. * D
  2832. * s
  2833. * ms
  2834. * us
  2835. * ns
  2836. """
  2837. return request.param
  2838. @pytest.fixture
  2839. def epoch_1960():
  2840. """Timestamp at 1960-01-01."""
  2841. return Timestamp("1960-01-01")
  2842. @pytest.fixture
  2843. def units_from_epochs():
  2844. return list(range(5))
  2845. @pytest.fixture(params=["timestamp", "pydatetime", "datetime64", "str_1960"])
  2846. def epochs(epoch_1960, request):
  2847. """Timestamp at 1960-01-01 in various forms.
  2848. * Timestamp
  2849. * datetime.datetime
  2850. * numpy.datetime64
  2851. * str
  2852. """
  2853. assert request.param in {"timestamp", "pydatetime", "datetime64", "str_1960"}
  2854. if request.param == "timestamp":
  2855. return epoch_1960
  2856. elif request.param == "pydatetime":
  2857. return epoch_1960.to_pydatetime()
  2858. elif request.param == "datetime64":
  2859. return epoch_1960.to_datetime64()
  2860. else:
  2861. return str(epoch_1960)
  2862. @pytest.fixture
  2863. def julian_dates():
  2864. return date_range("2014-1-1", periods=10).to_julian_date().values
  2865. class TestOrigin:
  2866. def test_origin_and_unit(self):
  2867. # GH#42624
  2868. ts = to_datetime(1, unit="s", origin=1)
  2869. expected = Timestamp("1970-01-01 00:00:02")
  2870. assert ts == expected
  2871. ts = to_datetime(1, unit="s", origin=1_000_000_000)
  2872. expected = Timestamp("2001-09-09 01:46:41")
  2873. assert ts == expected
  2874. def test_julian(self, julian_dates):
  2875. # gh-11276, gh-11745
  2876. # for origin as julian
  2877. result = Series(to_datetime(julian_dates, unit="D", origin="julian"))
  2878. expected = Series(
  2879. to_datetime(julian_dates - Timestamp(0).to_julian_date(), unit="D")
  2880. )
  2881. tm.assert_series_equal(result, expected)
  2882. def test_unix(self):
  2883. result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
  2884. expected = Series(
  2885. [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")]
  2886. )
  2887. tm.assert_series_equal(result, expected)
  2888. def test_julian_round_trip(self):
  2889. result = to_datetime(2456658, origin="julian", unit="D")
  2890. assert result.to_julian_date() == 2456658
  2891. # out-of-bounds
  2892. msg = "1 is Out of Bounds for origin='julian'"
  2893. with pytest.raises(ValueError, match=msg):
  2894. to_datetime(1, origin="julian", unit="D")
  2895. def test_invalid_unit(self, units, julian_dates):
  2896. # checking for invalid combination of origin='julian' and unit != D
  2897. if units != "D":
  2898. msg = "unit must be 'D' for origin='julian'"
  2899. with pytest.raises(ValueError, match=msg):
  2900. to_datetime(julian_dates, unit=units, origin="julian")
  2901. @pytest.mark.parametrize("unit", ["ns", "D"])
  2902. def test_invalid_origin(self, unit):
  2903. # need to have a numeric specified
  2904. msg = "it must be numeric with a unit specified"
  2905. with pytest.raises(ValueError, match=msg):
  2906. to_datetime("2005-01-01", origin="1960-01-01", unit=unit)
  2907. def test_epoch(self, units, epochs, epoch_1960, units_from_epochs):
  2908. expected = Series(
  2909. [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
  2910. )
  2911. result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
  2912. tm.assert_series_equal(result, expected)
  2913. @pytest.mark.parametrize(
  2914. "origin, exc",
  2915. [
  2916. ("random_string", ValueError),
  2917. ("epoch", ValueError),
  2918. ("13-24-1990", ValueError),
  2919. (datetime(1, 1, 1), OutOfBoundsDatetime),
  2920. ],
  2921. )
  2922. def test_invalid_origins(self, origin, exc, units, units_from_epochs):
  2923. msg = "|".join(
  2924. [
  2925. f"origin {origin} is Out of Bounds",
  2926. f"origin {origin} cannot be converted to a Timestamp",
  2927. "Cannot cast .* to unit='ns' without overflow",
  2928. ]
  2929. )
  2930. with pytest.raises(exc, match=msg):
  2931. to_datetime(units_from_epochs, unit=units, origin=origin)
  2932. def test_invalid_origins_tzinfo(self):
  2933. # GH16842
  2934. with pytest.raises(ValueError, match="must be tz-naive"):
  2935. to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=pytz.utc))
  2936. def test_incorrect_value_exception(self):
  2937. # GH47495
  2938. msg = (
  2939. "Unknown datetime string format, unable to parse: yesterday, at position 1"
  2940. )
  2941. with pytest.raises(ValueError, match=msg):
  2942. to_datetime(["today", "yesterday"])
  2943. @pytest.mark.parametrize(
  2944. "format, warning",
  2945. [
  2946. (None, UserWarning),
  2947. ("%Y-%m-%d %H:%M:%S", None),
  2948. ("%Y-%d-%m %H:%M:%S", None),
  2949. ],
  2950. )
  2951. def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
  2952. # see gh-23830
  2953. msg = r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0"
  2954. with pytest.raises(OutOfBoundsDatetime, match=msg):
  2955. to_datetime("2417-10-10 00:00:00", format=format)
  2956. @pytest.mark.parametrize(
  2957. "arg, origin, expected_str",
  2958. [
  2959. [200 * 365, "unix", "2169-11-13 00:00:00"],
  2960. [200 * 365, "1870-01-01", "2069-11-13 00:00:00"],
  2961. [300 * 365, "1870-01-01", "2169-10-20 00:00:00"],
  2962. ],
  2963. )
  2964. def test_processing_order(self, arg, origin, expected_str):
  2965. # make sure we handle out-of-bounds *before*
  2966. # constructing the dates
  2967. result = to_datetime(arg, unit="D", origin=origin)
  2968. expected = Timestamp(expected_str)
  2969. assert result == expected
  2970. result = to_datetime(200 * 365, unit="D", origin="1870-01-01")
  2971. expected = Timestamp("2069-11-13 00:00:00")
  2972. assert result == expected
  2973. result = to_datetime(300 * 365, unit="D", origin="1870-01-01")
  2974. expected = Timestamp("2169-10-20 00:00:00")
  2975. assert result == expected
  2976. @pytest.mark.parametrize(
  2977. "offset,utc,exp",
  2978. [
  2979. ["Z", True, "2019-01-01T00:00:00.000Z"],
  2980. ["Z", None, "2019-01-01T00:00:00.000Z"],
  2981. ["-01:00", True, "2019-01-01T01:00:00.000Z"],
  2982. ["-01:00", None, "2019-01-01T00:00:00.000-01:00"],
  2983. ],
  2984. )
  2985. def test_arg_tz_ns_unit(self, offset, utc, exp):
  2986. # GH 25546
  2987. arg = "2019-01-01T00:00:00.000" + offset
  2988. result = to_datetime([arg], unit="ns", utc=utc)
  2989. expected = to_datetime([exp])
  2990. tm.assert_index_equal(result, expected)
  2991. class TestShouldCache:
  2992. @pytest.mark.parametrize(
  2993. "listlike,do_caching",
  2994. [
  2995. ([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False),
  2996. ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True),
  2997. ],
  2998. )
  2999. def test_should_cache(self, listlike, do_caching):
  3000. assert (
  3001. tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7)
  3002. == do_caching
  3003. )
  3004. @pytest.mark.parametrize(
  3005. "unique_share,check_count, err_message",
  3006. [
  3007. (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"),
  3008. (10, 2, r"unique_share must be in next bounds: \(0; 1\)"),
  3009. ],
  3010. )
  3011. def test_should_cache_errors(self, unique_share, check_count, err_message):
  3012. arg = [5] * 10
  3013. with pytest.raises(AssertionError, match=err_message):
  3014. tools.should_cache(arg, unique_share, check_count)
  3015. @pytest.mark.parametrize(
  3016. "listlike",
  3017. [
  3018. (deque([Timestamp("2010-06-02 09:30:00")] * 51)),
  3019. ([Timestamp("2010-06-02 09:30:00")] * 51),
  3020. (tuple([Timestamp("2010-06-02 09:30:00")] * 51)),
  3021. ],
  3022. )
  3023. def test_no_slicing_errors_in_should_cache(self, listlike):
  3024. # GH#29403
  3025. assert tools.should_cache(listlike) is True
  3026. def test_nullable_integer_to_datetime():
  3027. # Test for #30050
  3028. ser = Series([1, 2, None, 2**61, None])
  3029. ser = ser.astype("Int64")
  3030. ser_copy = ser.copy()
  3031. res = to_datetime(ser, unit="ns")
  3032. expected = Series(
  3033. [
  3034. np.datetime64("1970-01-01 00:00:00.000000001"),
  3035. np.datetime64("1970-01-01 00:00:00.000000002"),
  3036. np.datetime64("NaT"),
  3037. np.datetime64("2043-01-25 23:56:49.213693952"),
  3038. np.datetime64("NaT"),
  3039. ]
  3040. )
  3041. tm.assert_series_equal(res, expected)
  3042. # Check that ser isn't mutated
  3043. tm.assert_series_equal(ser, ser_copy)
  3044. @pytest.mark.parametrize("klass", [np.array, list])
  3045. def test_na_to_datetime(nulls_fixture, klass):
  3046. if isinstance(nulls_fixture, Decimal):
  3047. with pytest.raises(TypeError, match="not convertible to datetime"):
  3048. to_datetime(klass([nulls_fixture]))
  3049. else:
  3050. result = to_datetime(klass([nulls_fixture]))
  3051. assert result[0] is NaT
  3052. @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
  3053. @pytest.mark.parametrize(
  3054. "args, format",
  3055. [
  3056. (["03/24/2016", "03/25/2016", ""], "%m/%d/%Y"),
  3057. (["2016-03-24", "2016-03-25", ""], "%Y-%m-%d"),
  3058. ],
  3059. ids=["non-ISO8601", "ISO8601"],
  3060. )
  3061. def test_empty_string_datetime(errors, args, format):
  3062. # GH13044, GH50251
  3063. td = Series(args)
  3064. # coerce empty string to pd.NaT
  3065. result = to_datetime(td, format=format, errors=errors)
  3066. expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
  3067. tm.assert_series_equal(expected, result)
  3068. def test_empty_string_datetime_coerce__unit():
  3069. # GH13044
  3070. # coerce empty string to pd.NaT
  3071. result = to_datetime([1, ""], unit="s", errors="coerce")
  3072. expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]")
  3073. tm.assert_index_equal(expected, result)
  3074. # verify that no exception is raised even when errors='raise' is set
  3075. result = to_datetime([1, ""], unit="s", errors="raise")
  3076. tm.assert_index_equal(expected, result)
  3077. @td.skip_if_no("xarray")
  3078. def test_xarray_coerce_unit():
  3079. # GH44053
  3080. import xarray as xr
  3081. arr = xr.DataArray([1, 2, 3])
  3082. result = to_datetime(arr, unit="ns")
  3083. expected = DatetimeIndex(
  3084. [
  3085. "1970-01-01 00:00:00.000000001",
  3086. "1970-01-01 00:00:00.000000002",
  3087. "1970-01-01 00:00:00.000000003",
  3088. ],
  3089. dtype="datetime64[ns]",
  3090. freq=None,
  3091. )
  3092. tm.assert_index_equal(result, expected)
  3093. @pytest.mark.parametrize("cache", [True, False])
  3094. def test_to_datetime_monotonic_increasing_index(cache):
  3095. # GH28238
  3096. cstart = start_caching_at
  3097. times = date_range(Timestamp("1980"), periods=cstart, freq="YS")
  3098. times = times.to_frame(index=False, name="DT").sample(n=cstart, random_state=1)
  3099. times.index = times.index.to_series().astype(float) / 1000
  3100. result = to_datetime(times.iloc[:, 0], cache=cache)
  3101. expected = times.iloc[:, 0]
  3102. tm.assert_series_equal(result, expected)
  3103. @pytest.mark.parametrize(
  3104. "series_length",
  3105. [40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)],
  3106. )
  3107. def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
  3108. # GH#45319
  3109. s = Series(
  3110. [datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
  3111. + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
  3112. )
  3113. result1 = to_datetime(s, errors="coerce", utc=True)
  3114. expected1 = Series(
  3115. [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length)
  3116. )
  3117. tm.assert_series_equal(result1, expected1)
  3118. result2 = to_datetime(s, errors="ignore", utc=True)
  3119. expected2 = Series(
  3120. [datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
  3121. + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
  3122. )
  3123. tm.assert_series_equal(result2, expected2)
  3124. with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"):
  3125. to_datetime(s, errors="raise", utc=True)
  3126. def test_to_datetime_format_f_parse_nanos():
  3127. # GH 48767
  3128. timestamp = "15/02/2020 02:03:04.123456789"
  3129. timestamp_format = "%d/%m/%Y %H:%M:%S.%f"
  3130. result = to_datetime(timestamp, format=timestamp_format)
  3131. expected = Timestamp(
  3132. year=2020,
  3133. month=2,
  3134. day=15,
  3135. hour=2,
  3136. minute=3,
  3137. second=4,
  3138. microsecond=123456,
  3139. nanosecond=789,
  3140. )
  3141. assert result == expected
  3142. def test_to_datetime_mixed_iso8601():
  3143. # https://github.com/pandas-dev/pandas/issues/50411
  3144. result = to_datetime(["2020-01-01", "2020-01-01 05:00:00"], format="ISO8601")
  3145. expected = DatetimeIndex(["2020-01-01 00:00:00", "2020-01-01 05:00:00"])
  3146. tm.assert_index_equal(result, expected)
  3147. def test_to_datetime_mixed_other():
  3148. # https://github.com/pandas-dev/pandas/issues/50411
  3149. result = to_datetime(["01/11/2000", "12 January 2000"], format="mixed")
  3150. expected = DatetimeIndex(["2000-01-11", "2000-01-12"])
  3151. tm.assert_index_equal(result, expected)
  3152. @pytest.mark.parametrize("exact", [True, False])
  3153. @pytest.mark.parametrize("format", ["ISO8601", "mixed"])
  3154. def test_to_datetime_mixed_or_iso_exact(exact, format):
  3155. msg = "Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'"
  3156. with pytest.raises(ValueError, match=msg):
  3157. to_datetime(["2020-01-01"], exact=exact, format=format)
  3158. def test_to_datetime_mixed_not_necessarily_iso8601_raise():
  3159. # https://github.com/pandas-dev/pandas/issues/50411
  3160. with pytest.raises(
  3161. ValueError, match="Time data 01-01-2000 is not ISO8601 format, at position 1"
  3162. ):
  3163. to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601")
  3164. @pytest.mark.parametrize(
  3165. ("errors", "expected"),
  3166. [
  3167. ("coerce", DatetimeIndex(["2020-01-01 00:00:00", NaT])),
  3168. ("ignore", Index(["2020-01-01", "01-01-2000"])),
  3169. ],
  3170. )
  3171. def test_to_datetime_mixed_not_necessarily_iso8601_coerce(errors, expected):
  3172. # https://github.com/pandas-dev/pandas/issues/50411
  3173. result = to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601", errors=errors)
  3174. tm.assert_index_equal(result, expected)
  3175. def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
  3176. # GH 52425
  3177. pytest.importorskip("pyarrow")
  3178. ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]")
  3179. result = to_datetime(ser)
  3180. expected = Series([1, 2], dtype="datetime64[ns]")
  3181. tm.assert_series_equal(result, expected)