test_array_to_datetime.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. from datetime import (
  2. date,
  3. datetime,
  4. timedelta,
  5. timezone,
  6. )
  7. from dateutil.tz.tz import tzoffset
  8. import numpy as np
  9. import pytest
  10. from pandas._libs import (
  11. iNaT,
  12. tslib,
  13. )
  14. from pandas import Timestamp
  15. import pandas._testing as tm
  16. @pytest.mark.parametrize(
  17. "data,expected",
  18. [
  19. (
  20. ["01-01-2013", "01-02-2013"],
  21. [
  22. "2013-01-01T00:00:00.000000000",
  23. "2013-01-02T00:00:00.000000000",
  24. ],
  25. ),
  26. (
  27. ["Mon Sep 16 2013", "Tue Sep 17 2013"],
  28. [
  29. "2013-09-16T00:00:00.000000000",
  30. "2013-09-17T00:00:00.000000000",
  31. ],
  32. ),
  33. ],
  34. )
  35. def test_parsing_valid_dates(data, expected):
  36. arr = np.array(data, dtype=object)
  37. result, _ = tslib.array_to_datetime(arr)
  38. expected = np.array(expected, dtype="M8[ns]")
  39. tm.assert_numpy_array_equal(result, expected)
  40. @pytest.mark.parametrize(
  41. "dt_string, expected_tz",
  42. [
  43. ["01-01-2013 08:00:00+08:00", 480],
  44. ["2013-01-01T08:00:00.000000000+0800", 480],
  45. ["2012-12-31T16:00:00.000000000-0800", -480],
  46. ["12-31-2012 23:00:00-01:00", -60],
  47. ],
  48. )
  49. def test_parsing_timezone_offsets(dt_string, expected_tz):
  50. # All of these datetime strings with offsets are equivalent
  51. # to the same datetime after the timezone offset is added.
  52. arr = np.array(["01-01-2013 00:00:00"], dtype=object)
  53. expected, _ = tslib.array_to_datetime(arr)
  54. arr = np.array([dt_string], dtype=object)
  55. result, result_tz = tslib.array_to_datetime(arr)
  56. tm.assert_numpy_array_equal(result, expected)
  57. assert result_tz == timezone(timedelta(minutes=expected_tz))
  58. def test_parsing_non_iso_timezone_offset():
  59. dt_string = "01-01-2013T00:00:00.000000000+0000"
  60. arr = np.array([dt_string], dtype=object)
  61. with tm.assert_produces_warning(None):
  62. # GH#50949 should not get tzlocal-deprecation warning here
  63. result, result_tz = tslib.array_to_datetime(arr)
  64. expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")])
  65. tm.assert_numpy_array_equal(result, expected)
  66. assert result_tz is timezone.utc
  67. def test_parsing_different_timezone_offsets():
  68. # see gh-17697
  69. data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"]
  70. data = np.array(data, dtype=object)
  71. result, result_tz = tslib.array_to_datetime(data)
  72. expected = np.array(
  73. [
  74. datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)),
  75. datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)),
  76. ],
  77. dtype=object,
  78. )
  79. tm.assert_numpy_array_equal(result, expected)
  80. assert result_tz is None
  81. @pytest.mark.parametrize(
  82. "data", [["-352.737091", "183.575577"], ["1", "2", "3", "4", "5"]]
  83. )
  84. def test_number_looking_strings_not_into_datetime(data):
  85. # see gh-4601
  86. #
  87. # These strings don't look like datetimes, so
  88. # they shouldn't be attempted to be converted.
  89. arr = np.array(data, dtype=object)
  90. result, _ = tslib.array_to_datetime(arr, errors="ignore")
  91. tm.assert_numpy_array_equal(result, arr)
  92. @pytest.mark.parametrize(
  93. "invalid_date",
  94. [
  95. date(1000, 1, 1),
  96. datetime(1000, 1, 1),
  97. "1000-01-01",
  98. "Jan 1, 1000",
  99. np.datetime64("1000-01-01"),
  100. ],
  101. )
  102. @pytest.mark.parametrize("errors", ["coerce", "raise"])
  103. def test_coerce_outside_ns_bounds(invalid_date, errors):
  104. arr = np.array([invalid_date], dtype="object")
  105. kwargs = {"values": arr, "errors": errors}
  106. if errors == "raise":
  107. msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
  108. with pytest.raises(ValueError, match=msg):
  109. tslib.array_to_datetime(**kwargs)
  110. else: # coerce.
  111. result, _ = tslib.array_to_datetime(**kwargs)
  112. expected = np.array([iNaT], dtype="M8[ns]")
  113. tm.assert_numpy_array_equal(result, expected)
  114. def test_coerce_outside_ns_bounds_one_valid():
  115. arr = np.array(["1/1/1000", "1/1/2000"], dtype=object)
  116. result, _ = tslib.array_to_datetime(arr, errors="coerce")
  117. expected = [iNaT, "2000-01-01T00:00:00.000000000"]
  118. expected = np.array(expected, dtype="M8[ns]")
  119. tm.assert_numpy_array_equal(result, expected)
  120. @pytest.mark.parametrize("errors", ["ignore", "coerce"])
  121. def test_coerce_of_invalid_datetimes(errors):
  122. arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)
  123. kwargs = {"values": arr, "errors": errors}
  124. if errors == "ignore":
  125. # Without coercing, the presence of any invalid
  126. # dates prevents any values from being converted.
  127. result, _ = tslib.array_to_datetime(**kwargs)
  128. tm.assert_numpy_array_equal(result, arr)
  129. else: # coerce.
  130. # With coercing, the invalid dates becomes iNaT
  131. result, _ = tslib.array_to_datetime(arr, errors="coerce")
  132. expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT]
  133. tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]"))
  134. def test_to_datetime_barely_out_of_bounds():
  135. # see gh-19382, gh-19529
  136. #
  137. # Close enough to bounds that dropping nanos
  138. # would result in an in-bounds datetime.
  139. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
  140. msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$"
  141. with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
  142. tslib.array_to_datetime(arr)
  143. class SubDatetime(datetime):
  144. pass
  145. @pytest.mark.parametrize(
  146. "data,expected",
  147. [
  148. ([SubDatetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
  149. ([datetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
  150. ([Timestamp(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
  151. ],
  152. )
  153. def test_datetime_subclass(data, expected):
  154. # GH 25851
  155. # ensure that subclassed datetime works with
  156. # array_to_datetime
  157. arr = np.array(data, dtype=object)
  158. result, _ = tslib.array_to_datetime(arr)
  159. expected = np.array(expected, dtype="M8[ns]")
  160. tm.assert_numpy_array_equal(result, expected)