test_ujson.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058
  1. import calendar
  2. import datetime
  3. import decimal
  4. import json
  5. import locale
  6. import math
  7. import re
  8. import time
  9. import dateutil
  10. import numpy as np
  11. import pytest
  12. import pytz
  13. import pandas._libs.json as ujson
  14. from pandas.compat import IS64
  15. from pandas import (
  16. DataFrame,
  17. DatetimeIndex,
  18. Index,
  19. NaT,
  20. PeriodIndex,
  21. Series,
  22. Timedelta,
  23. Timestamp,
  24. date_range,
  25. )
  26. import pandas._testing as tm
  27. def _clean_dict(d):
  28. """
  29. Sanitize dictionary for JSON by converting all keys to strings.
  30. Parameters
  31. ----------
  32. d : dict
  33. The dictionary to convert.
  34. Returns
  35. -------
  36. cleaned_dict : dict
  37. """
  38. return {str(k): v for k, v in d.items()}
  39. @pytest.fixture(
  40. params=[None, "split", "records", "values", "index"] # Column indexed by default.
  41. )
  42. def orient(request):
  43. return request.param
  44. class TestUltraJSONTests:
  45. @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
  46. def test_encode_decimal(self):
  47. sut = decimal.Decimal("1337.1337")
  48. encoded = ujson.encode(sut, double_precision=15)
  49. decoded = ujson.decode(encoded)
  50. assert decoded == 1337.1337
  51. sut = decimal.Decimal("0.95")
  52. encoded = ujson.encode(sut, double_precision=1)
  53. assert encoded == "1.0"
  54. decoded = ujson.decode(encoded)
  55. assert decoded == 1.0
  56. sut = decimal.Decimal("0.94")
  57. encoded = ujson.encode(sut, double_precision=1)
  58. assert encoded == "0.9"
  59. decoded = ujson.decode(encoded)
  60. assert decoded == 0.9
  61. sut = decimal.Decimal("1.95")
  62. encoded = ujson.encode(sut, double_precision=1)
  63. assert encoded == "2.0"
  64. decoded = ujson.decode(encoded)
  65. assert decoded == 2.0
  66. sut = decimal.Decimal("-1.95")
  67. encoded = ujson.encode(sut, double_precision=1)
  68. assert encoded == "-2.0"
  69. decoded = ujson.decode(encoded)
  70. assert decoded == -2.0
  71. sut = decimal.Decimal("0.995")
  72. encoded = ujson.encode(sut, double_precision=2)
  73. assert encoded == "1.0"
  74. decoded = ujson.decode(encoded)
  75. assert decoded == 1.0
  76. sut = decimal.Decimal("0.9995")
  77. encoded = ujson.encode(sut, double_precision=3)
  78. assert encoded == "1.0"
  79. decoded = ujson.decode(encoded)
  80. assert decoded == 1.0
  81. sut = decimal.Decimal("0.99999999999999944")
  82. encoded = ujson.encode(sut, double_precision=15)
  83. assert encoded == "1.0"
  84. decoded = ujson.decode(encoded)
  85. assert decoded == 1.0
  86. @pytest.mark.parametrize("ensure_ascii", [True, False])
  87. def test_encode_string_conversion(self, ensure_ascii):
  88. string_input = "A string \\ / \b \f \n \r \t </script> &"
  89. not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
  90. html_encoded = (
  91. '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
  92. )
  93. def helper(expected_output, **encode_kwargs):
  94. output = ujson.encode(
  95. string_input, ensure_ascii=ensure_ascii, **encode_kwargs
  96. )
  97. assert output == expected_output
  98. assert string_input == json.loads(output)
  99. assert string_input == ujson.decode(output)
  100. # Default behavior assumes encode_html_chars=False.
  101. helper(not_html_encoded)
  102. # Make sure explicit encode_html_chars=False works.
  103. helper(not_html_encoded, encode_html_chars=False)
  104. # Make sure explicit encode_html_chars=True does the encoding.
  105. helper(html_encoded, encode_html_chars=True)
  106. @pytest.mark.parametrize(
  107. "long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388]
  108. )
  109. def test_double_long_numbers(self, long_number):
  110. sut = {"a": long_number}
  111. encoded = ujson.encode(sut, double_precision=15)
  112. decoded = ujson.decode(encoded)
  113. assert sut == decoded
  114. def test_encode_non_c_locale(self):
  115. lc_category = locale.LC_NUMERIC
  116. # We just need one of these locales to work.
  117. for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
  118. if tm.can_set_locale(new_locale, lc_category):
  119. with tm.set_locale(new_locale, lc_category):
  120. assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60
  121. assert ujson.loads("4.78", precise_float=True) == 4.78
  122. break
  123. def test_decimal_decode_test_precise(self):
  124. sut = {"a": 4.56}
  125. encoded = ujson.encode(sut)
  126. decoded = ujson.decode(encoded, precise_float=True)
  127. assert sut == decoded
  128. def test_encode_double_tiny_exponential(self):
  129. num = 1e-40
  130. assert num == ujson.decode(ujson.encode(num))
  131. num = 1e-100
  132. assert num == ujson.decode(ujson.encode(num))
  133. num = -1e-45
  134. assert num == ujson.decode(ujson.encode(num))
  135. num = -1e-145
  136. assert np.allclose(num, ujson.decode(ujson.encode(num)))
  137. @pytest.mark.parametrize("unicode_key", ["key1", "بن"])
  138. def test_encode_dict_with_unicode_keys(self, unicode_key):
  139. unicode_dict = {unicode_key: "value1"}
  140. assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))
  141. @pytest.mark.parametrize(
  142. "double_input", [math.pi, -math.pi] # Should work with negatives too.
  143. )
  144. def test_encode_double_conversion(self, double_input):
  145. output = ujson.encode(double_input)
  146. assert round(double_input, 5) == round(json.loads(output), 5)
  147. assert round(double_input, 5) == round(ujson.decode(output), 5)
  148. def test_encode_with_decimal(self):
  149. decimal_input = 1.0
  150. output = ujson.encode(decimal_input)
  151. assert output == "1.0"
  152. def test_encode_array_of_nested_arrays(self):
  153. nested_input = [[[[]]]] * 20
  154. output = ujson.encode(nested_input)
  155. assert nested_input == json.loads(output)
  156. assert nested_input == ujson.decode(output)
  157. def test_encode_array_of_doubles(self):
  158. doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10
  159. output = ujson.encode(doubles_input)
  160. assert doubles_input == json.loads(output)
  161. assert doubles_input == ujson.decode(output)
  162. def test_double_precision(self):
  163. double_input = 30.012345678901234
  164. output = ujson.encode(double_input, double_precision=15)
  165. assert double_input == json.loads(output)
  166. assert double_input == ujson.decode(output)
  167. for double_precision in (3, 9):
  168. output = ujson.encode(double_input, double_precision=double_precision)
  169. rounded_input = round(double_input, double_precision)
  170. assert rounded_input == json.loads(output)
  171. assert rounded_input == ujson.decode(output)
  172. @pytest.mark.parametrize(
  173. "invalid_val",
  174. [
  175. 20,
  176. -1,
  177. "9",
  178. None,
  179. ],
  180. )
  181. def test_invalid_double_precision(self, invalid_val):
  182. double_input = 30.12345678901234567890
  183. expected_exception = ValueError if isinstance(invalid_val, int) else TypeError
  184. msg = (
  185. r"Invalid value '.*' for option 'double_precision', max is '15'|"
  186. r"an integer is required \(got type |"
  187. r"object cannot be interpreted as an integer"
  188. )
  189. with pytest.raises(expected_exception, match=msg):
  190. ujson.encode(double_input, double_precision=invalid_val)
  191. def test_encode_string_conversion2(self):
  192. string_input = "A string \\ / \b \f \n \r \t"
  193. output = ujson.encode(string_input)
  194. assert string_input == json.loads(output)
  195. assert string_input == ujson.decode(output)
  196. assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
  197. @pytest.mark.parametrize(
  198. "unicode_input",
  199. ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"],
  200. )
  201. def test_encode_unicode_conversion(self, unicode_input):
  202. enc = ujson.encode(unicode_input)
  203. dec = ujson.decode(enc)
  204. assert enc == json.dumps(unicode_input)
  205. assert dec == json.loads(enc)
  206. def test_encode_control_escaping(self):
  207. escaped_input = "\x19"
  208. enc = ujson.encode(escaped_input)
  209. dec = ujson.decode(enc)
  210. assert escaped_input == dec
  211. assert enc == json.dumps(escaped_input)
  212. def test_encode_unicode_surrogate_pair(self):
  213. surrogate_input = "\xf0\x90\x8d\x86"
  214. enc = ujson.encode(surrogate_input)
  215. dec = ujson.decode(enc)
  216. assert enc == json.dumps(surrogate_input)
  217. assert dec == json.loads(enc)
  218. def test_encode_unicode_4bytes_utf8(self):
  219. four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
  220. enc = ujson.encode(four_bytes_input)
  221. dec = ujson.decode(enc)
  222. assert enc == json.dumps(four_bytes_input)
  223. assert dec == json.loads(enc)
  224. def test_encode_unicode_4bytes_utf8highest(self):
  225. four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
  226. enc = ujson.encode(four_bytes_input)
  227. dec = ujson.decode(enc)
  228. assert enc == json.dumps(four_bytes_input)
  229. assert dec == json.loads(enc)
  230. def test_encode_unicode_error(self):
  231. string = "'\udac0'"
  232. msg = (
  233. r"'utf-8' codec can't encode character '\\udac0' "
  234. r"in position 1: surrogates not allowed"
  235. )
  236. with pytest.raises(UnicodeEncodeError, match=msg):
  237. ujson.dumps([string])
  238. def test_encode_array_in_array(self):
  239. arr_in_arr_input = [[[[]]]]
  240. output = ujson.encode(arr_in_arr_input)
  241. assert arr_in_arr_input == json.loads(output)
  242. assert output == json.dumps(arr_in_arr_input)
  243. assert arr_in_arr_input == ujson.decode(output)
  244. @pytest.mark.parametrize(
  245. "num_input",
  246. [
  247. 31337,
  248. -31337, # Negative number.
  249. -9223372036854775808, # Large negative number.
  250. ],
  251. )
  252. def test_encode_num_conversion(self, num_input):
  253. output = ujson.encode(num_input)
  254. assert num_input == json.loads(output)
  255. assert output == json.dumps(num_input)
  256. assert num_input == ujson.decode(output)
  257. def test_encode_list_conversion(self):
  258. list_input = [1, 2, 3, 4]
  259. output = ujson.encode(list_input)
  260. assert list_input == json.loads(output)
  261. assert list_input == ujson.decode(output)
  262. def test_encode_dict_conversion(self):
  263. dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
  264. output = ujson.encode(dict_input)
  265. assert dict_input == json.loads(output)
  266. assert dict_input == ujson.decode(output)
  267. @pytest.mark.parametrize("builtin_value", [None, True, False])
  268. def test_encode_builtin_values_conversion(self, builtin_value):
  269. output = ujson.encode(builtin_value)
  270. assert builtin_value == json.loads(output)
  271. assert output == json.dumps(builtin_value)
  272. assert builtin_value == ujson.decode(output)
  273. def test_encode_datetime_conversion(self):
  274. datetime_input = datetime.datetime.fromtimestamp(time.time())
  275. output = ujson.encode(datetime_input, date_unit="s")
  276. expected = calendar.timegm(datetime_input.utctimetuple())
  277. assert int(expected) == json.loads(output)
  278. assert int(expected) == ujson.decode(output)
  279. def test_encode_date_conversion(self):
  280. date_input = datetime.date.fromtimestamp(time.time())
  281. output = ujson.encode(date_input, date_unit="s")
  282. tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
  283. expected = calendar.timegm(tup)
  284. assert int(expected) == json.loads(output)
  285. assert int(expected) == ujson.decode(output)
  286. @pytest.mark.parametrize(
  287. "test",
  288. [datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243)],
  289. )
  290. def test_encode_time_conversion_basic(self, test):
  291. output = ujson.encode(test)
  292. expected = f'"{test.isoformat()}"'
  293. assert expected == output
  294. def test_encode_time_conversion_pytz(self):
  295. # see gh-11473: to_json segfaults with timezone-aware datetimes
  296. test = datetime.time(10, 12, 15, 343243, pytz.utc)
  297. output = ujson.encode(test)
  298. expected = f'"{test.isoformat()}"'
  299. assert expected == output
  300. def test_encode_time_conversion_dateutil(self):
  301. # see gh-11473: to_json segfaults with timezone-aware datetimes
  302. test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
  303. output = ujson.encode(test)
  304. expected = f'"{test.isoformat()}"'
  305. assert expected == output
  306. @pytest.mark.parametrize(
  307. "decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf]
  308. )
  309. def test_encode_as_null(self, decoded_input):
  310. assert ujson.encode(decoded_input) == "null", "Expected null"
  311. def test_datetime_units(self):
  312. val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
  313. stamp = Timestamp(val).as_unit("ns")
  314. roundtrip = ujson.decode(ujson.encode(val, date_unit="s"))
  315. assert roundtrip == stamp._value // 10**9
  316. roundtrip = ujson.decode(ujson.encode(val, date_unit="ms"))
  317. assert roundtrip == stamp._value // 10**6
  318. roundtrip = ujson.decode(ujson.encode(val, date_unit="us"))
  319. assert roundtrip == stamp._value // 10**3
  320. roundtrip = ujson.decode(ujson.encode(val, date_unit="ns"))
  321. assert roundtrip == stamp._value
  322. msg = "Invalid value 'foo' for option 'date_unit'"
  323. with pytest.raises(ValueError, match=msg):
  324. ujson.encode(val, date_unit="foo")
  325. def test_encode_to_utf8(self):
  326. unencoded = "\xe6\x97\xa5\xd1\x88"
  327. enc = ujson.encode(unencoded, ensure_ascii=False)
  328. dec = ujson.decode(enc)
  329. assert enc == json.dumps(unencoded, ensure_ascii=False)
  330. assert dec == json.loads(enc)
  331. def test_decode_from_unicode(self):
  332. unicode_input = '{"obj": 31337}'
  333. dec1 = ujson.decode(unicode_input)
  334. dec2 = ujson.decode(str(unicode_input))
  335. assert dec1 == dec2
  336. def test_encode_recursion_max(self):
  337. # 8 is the max recursion depth
  338. class O2:
  339. member = 0
  340. class O1:
  341. member = 0
  342. decoded_input = O1()
  343. decoded_input.member = O2()
  344. decoded_input.member.member = decoded_input
  345. with pytest.raises(OverflowError, match="Maximum recursion level reached"):
  346. ujson.encode(decoded_input)
  347. def test_decode_jibberish(self):
  348. jibberish = "fdsa sda v9sa fdsa"
  349. msg = "Unexpected character found when decoding 'false'"
  350. with pytest.raises(ValueError, match=msg):
  351. ujson.decode(jibberish)
  352. @pytest.mark.parametrize(
  353. "broken_json",
  354. [
  355. "[", # Broken array start.
  356. "{", # Broken object start.
  357. "]", # Broken array end.
  358. "}", # Broken object end.
  359. ],
  360. )
  361. def test_decode_broken_json(self, broken_json):
  362. msg = "Expected object or value"
  363. with pytest.raises(ValueError, match=msg):
  364. ujson.decode(broken_json)
  365. @pytest.mark.parametrize("too_big_char", ["[", "{"])
  366. def test_decode_depth_too_big(self, too_big_char):
  367. with pytest.raises(ValueError, match="Reached object decoding depth limit"):
  368. ujson.decode(too_big_char * (1024 * 1024))
  369. @pytest.mark.parametrize(
  370. "bad_string",
  371. [
  372. '"TESTING', # Unterminated.
  373. '"TESTING\\"', # Unterminated escape.
  374. "tru", # Broken True.
  375. "fa", # Broken False.
  376. "n", # Broken None.
  377. ],
  378. )
  379. def test_decode_bad_string(self, bad_string):
  380. msg = (
  381. "Unexpected character found when decoding|"
  382. "Unmatched ''\"' when when decoding 'string'"
  383. )
  384. with pytest.raises(ValueError, match=msg):
  385. ujson.decode(bad_string)
  386. @pytest.mark.parametrize(
  387. "broken_json, err_msg",
  388. [
  389. (
  390. '{{1337:""}}',
  391. "Key name of object must be 'string' when decoding 'object'",
  392. ),
  393. ('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"),
  394. ("[[[true", "Unexpected character found when decoding array value (2)"),
  395. ],
  396. )
  397. def test_decode_broken_json_leak(self, broken_json, err_msg):
  398. for _ in range(1000):
  399. with pytest.raises(ValueError, match=re.escape(err_msg)):
  400. ujson.decode(broken_json)
  401. @pytest.mark.parametrize(
  402. "invalid_dict",
  403. [
  404. "{{{{31337}}}}", # No key.
  405. '{{{{"key":}}}}', # No value.
  406. '{{{{"key"}}}}', # No colon or value.
  407. ],
  408. )
  409. def test_decode_invalid_dict(self, invalid_dict):
  410. msg = (
  411. "Key name of object must be 'string' when decoding 'object'|"
  412. "No ':' found when decoding object value|"
  413. "Expected object or value"
  414. )
  415. with pytest.raises(ValueError, match=msg):
  416. ujson.decode(invalid_dict)
  417. @pytest.mark.parametrize(
  418. "numeric_int_as_str", ["31337", "-31337"] # Should work with negatives.
  419. )
  420. def test_decode_numeric_int(self, numeric_int_as_str):
  421. assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str)
  422. def test_encode_null_character(self):
  423. wrapped_input = "31337 \x00 1337"
  424. output = ujson.encode(wrapped_input)
  425. assert wrapped_input == json.loads(output)
  426. assert output == json.dumps(wrapped_input)
  427. assert wrapped_input == ujson.decode(output)
  428. alone_input = "\x00"
  429. output = ujson.encode(alone_input)
  430. assert alone_input == json.loads(output)
  431. assert output == json.dumps(alone_input)
  432. assert alone_input == ujson.decode(output)
  433. assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ")
  434. def test_decode_null_character(self):
  435. wrapped_input = '"31337 \\u0000 31337"'
  436. assert ujson.decode(wrapped_input) == json.loads(wrapped_input)
  437. def test_encode_list_long_conversion(self):
  438. long_input = [
  439. 9223372036854775807,
  440. 9223372036854775807,
  441. 9223372036854775807,
  442. 9223372036854775807,
  443. 9223372036854775807,
  444. 9223372036854775807,
  445. ]
  446. output = ujson.encode(long_input)
  447. assert long_input == json.loads(output)
  448. assert long_input == ujson.decode(output)
  449. @pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615])
  450. def test_encode_long_conversion(self, long_input):
  451. output = ujson.encode(long_input)
  452. assert long_input == json.loads(output)
  453. assert output == json.dumps(long_input)
  454. assert long_input == ujson.decode(output)
  455. @pytest.mark.parametrize("bigNum", [2**64, -(2**63) - 1])
  456. def test_dumps_ints_larger_than_maxsize(self, bigNum):
  457. encoding = ujson.encode(bigNum)
  458. assert str(bigNum) == encoding
  459. with pytest.raises(
  460. ValueError,
  461. match="Value is too big|Value is too small",
  462. ):
  463. assert ujson.loads(encoding) == bigNum
  464. @pytest.mark.parametrize(
  465. "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"]
  466. )
  467. def test_decode_numeric_int_exp(self, int_exp):
  468. assert ujson.decode(int_exp) == json.loads(int_exp)
  469. def test_loads_non_str_bytes_raises(self):
  470. msg = "Expected 'str' or 'bytes'"
  471. with pytest.raises(TypeError, match=msg):
  472. ujson.loads(None)
  473. @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1])
  474. def test_decode_number_with_32bit_sign_bit(self, val):
  475. # Test that numbers that fit within 32 bits but would have the
  476. # sign bit set (2**31 <= x < 2**32) are decoded properly.
  477. doc = f'{{"id": {val}}}'
  478. assert ujson.decode(doc)["id"] == val
  479. def test_encode_big_escape(self):
  480. # Make sure no Exception is raised.
  481. for _ in range(10):
  482. base = "\u00e5".encode()
  483. escape_input = base * 1024 * 1024 * 2
  484. ujson.encode(escape_input)
  485. def test_decode_big_escape(self):
  486. # Make sure no Exception is raised.
  487. for _ in range(10):
  488. base = "\u00e5".encode()
  489. quote = b'"'
  490. escape_input = quote + (base * 1024 * 1024 * 2) + quote
  491. ujson.decode(escape_input)
  492. def test_to_dict(self):
  493. d = {"key": 31337}
  494. class DictTest:
  495. def toDict(self):
  496. return d
  497. o = DictTest()
  498. output = ujson.encode(o)
  499. dec = ujson.decode(output)
  500. assert dec == d
  501. def test_default_handler(self):
  502. class _TestObject:
  503. def __init__(self, val) -> None:
  504. self.val = val
  505. @property
  506. def recursive_attr(self):
  507. return _TestObject("recursive_attr")
  508. def __str__(self) -> str:
  509. return str(self.val)
  510. msg = "Maximum recursion level reached"
  511. with pytest.raises(OverflowError, match=msg):
  512. ujson.encode(_TestObject("foo"))
  513. assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str)
  514. def my_handler(_):
  515. return "foobar"
  516. assert '"foobar"' == ujson.encode(
  517. _TestObject("foo"), default_handler=my_handler
  518. )
  519. def my_handler_raises(_):
  520. raise TypeError("I raise for anything")
  521. with pytest.raises(TypeError, match="I raise for anything"):
  522. ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)
  523. def my_int_handler(_):
  524. return 42
  525. assert (
  526. ujson.decode(
  527. ujson.encode(_TestObject("foo"), default_handler=my_int_handler)
  528. )
  529. == 42
  530. )
  531. def my_obj_handler(_):
  532. return datetime.datetime(2013, 2, 3)
  533. assert ujson.decode(
  534. ujson.encode(datetime.datetime(2013, 2, 3))
  535. ) == ujson.decode(
  536. ujson.encode(_TestObject("foo"), default_handler=my_obj_handler)
  537. )
  538. obj_list = [_TestObject("foo"), _TestObject("bar")]
  539. assert json.loads(json.dumps(obj_list, default=str)) == ujson.decode(
  540. ujson.encode(obj_list, default_handler=str)
  541. )
  542. def test_encode_object(self):
  543. class _TestObject:
  544. def __init__(self, a, b, _c, d) -> None:
  545. self.a = a
  546. self.b = b
  547. self._c = _c
  548. self.d = d
  549. def e(self):
  550. return 5
  551. # JSON keys should be all non-callable non-underscore attributes, see GH-42768
  552. test_object = _TestObject(a=1, b=2, _c=3, d=4)
  553. assert ujson.decode(ujson.encode(test_object)) == {"a": 1, "b": 2, "d": 4}
  554. class TestNumpyJSONTests:
  555. @pytest.mark.parametrize("bool_input", [True, False])
  556. def test_bool(self, bool_input):
  557. b = bool(bool_input)
  558. assert ujson.decode(ujson.encode(b)) == b
  559. def test_bool_array(self):
  560. bool_array = np.array(
  561. [True, False, True, True, False, True, False, False], dtype=bool
  562. )
  563. output = np.array(ujson.decode(ujson.encode(bool_array)), dtype=bool)
  564. tm.assert_numpy_array_equal(bool_array, output)
  565. def test_int(self, any_int_numpy_dtype):
  566. klass = np.dtype(any_int_numpy_dtype).type
  567. num = klass(1)
  568. assert klass(ujson.decode(ujson.encode(num))) == num
  569. def test_int_array(self, any_int_numpy_dtype):
  570. arr = np.arange(100, dtype=int)
  571. arr_input = arr.astype(any_int_numpy_dtype)
  572. arr_output = np.array(
  573. ujson.decode(ujson.encode(arr_input)), dtype=any_int_numpy_dtype
  574. )
  575. tm.assert_numpy_array_equal(arr_input, arr_output)
  576. def test_int_max(self, any_int_numpy_dtype):
  577. if any_int_numpy_dtype in ("int64", "uint64") and not IS64:
  578. pytest.skip("Cannot test 64-bit integer on 32-bit platform")
  579. klass = np.dtype(any_int_numpy_dtype).type
  580. # uint64 max will always overflow,
  581. # as it's encoded to signed.
  582. if any_int_numpy_dtype == "uint64":
  583. num = np.iinfo("int64").max
  584. else:
  585. num = np.iinfo(any_int_numpy_dtype).max
  586. assert klass(ujson.decode(ujson.encode(num))) == num
  587. def test_float(self, float_numpy_dtype):
  588. klass = np.dtype(float_numpy_dtype).type
  589. num = klass(256.2013)
  590. assert klass(ujson.decode(ujson.encode(num))) == num
  591. def test_float_array(self, float_numpy_dtype):
  592. arr = np.arange(12.5, 185.72, 1.7322, dtype=float)
  593. float_input = arr.astype(float_numpy_dtype)
  594. float_output = np.array(
  595. ujson.decode(ujson.encode(float_input, double_precision=15)),
  596. dtype=float_numpy_dtype,
  597. )
  598. tm.assert_almost_equal(float_input, float_output)
  599. def test_float_max(self, float_numpy_dtype):
  600. klass = np.dtype(float_numpy_dtype).type
  601. num = klass(np.finfo(float_numpy_dtype).max / 10)
  602. tm.assert_almost_equal(
  603. klass(ujson.decode(ujson.encode(num, double_precision=15))), num
  604. )
  605. def test_array_basic(self):
  606. arr = np.arange(96)
  607. arr = arr.reshape((2, 2, 2, 2, 3, 2))
  608. tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
  609. @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)])
  610. def test_array_reshaped(self, shape):
  611. arr = np.arange(100)
  612. arr = arr.reshape(shape)
  613. tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
  614. def test_array_list(self):
  615. arr_list = [
  616. "a",
  617. [],
  618. {},
  619. {},
  620. [],
  621. 42,
  622. 97.8,
  623. ["a", "b"],
  624. {"key": "val"},
  625. ]
  626. arr = np.array(arr_list, dtype=object)
  627. result = np.array(ujson.decode(ujson.encode(arr)), dtype=object)
  628. tm.assert_numpy_array_equal(result, arr)
  629. def test_array_float(self):
  630. dtype = np.float32
  631. arr = np.arange(100.202, 200.202, 1, dtype=dtype)
  632. arr = arr.reshape((5, 5, 4))
  633. arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype)
  634. tm.assert_almost_equal(arr, arr_out)
  635. def test_0d_array(self):
  636. # gh-18878
  637. msg = re.escape("array(1) (0d array) is not JSON serializable at the moment")
  638. with pytest.raises(TypeError, match=msg):
  639. ujson.encode(np.array(1))
  640. class TestPandasJSONTests:
  641. def test_dataframe(self, orient):
  642. dtype = np.int64
  643. df = DataFrame(
  644. [[1, 2, 3], [4, 5, 6]],
  645. index=["a", "b"],
  646. columns=["x", "y", "z"],
  647. dtype=dtype,
  648. )
  649. encode_kwargs = {} if orient is None else {"orient": orient}
  650. assert (df.dtypes == dtype).all()
  651. output = ujson.decode(ujson.encode(df, **encode_kwargs))
  652. assert (df.dtypes == dtype).all()
  653. # Ensure proper DataFrame initialization.
  654. if orient == "split":
  655. dec = _clean_dict(output)
  656. output = DataFrame(**dec)
  657. else:
  658. output = DataFrame(output)
  659. # Corrections to enable DataFrame comparison.
  660. if orient == "values":
  661. df.columns = [0, 1, 2]
  662. df.index = [0, 1]
  663. elif orient == "records":
  664. df.index = [0, 1]
  665. elif orient == "index":
  666. df = df.transpose()
  667. assert (df.dtypes == dtype).all()
  668. tm.assert_frame_equal(output, df)
  669. def test_dataframe_nested(self, orient):
  670. df = DataFrame(
  671. [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"]
  672. )
  673. nested = {"df1": df, "df2": df.copy()}
  674. kwargs = {} if orient is None else {"orient": orient}
  675. exp = {
  676. "df1": ujson.decode(ujson.encode(df, **kwargs)),
  677. "df2": ujson.decode(ujson.encode(df, **kwargs)),
  678. }
  679. assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
  680. def test_series(self, orient):
  681. dtype = np.int64
  682. s = Series(
  683. [10, 20, 30, 40, 50, 60],
  684. name="series",
  685. index=[6, 7, 8, 9, 10, 15],
  686. dtype=dtype,
  687. ).sort_values()
  688. assert s.dtype == dtype
  689. encode_kwargs = {} if orient is None else {"orient": orient}
  690. output = ujson.decode(ujson.encode(s, **encode_kwargs))
  691. assert s.dtype == dtype
  692. if orient == "split":
  693. dec = _clean_dict(output)
  694. output = Series(**dec)
  695. else:
  696. output = Series(output)
  697. if orient in (None, "index"):
  698. s.name = None
  699. output = output.sort_values()
  700. s.index = ["6", "7", "8", "9", "10", "15"]
  701. elif orient in ("records", "values"):
  702. s.name = None
  703. s.index = [0, 1, 2, 3, 4, 5]
  704. assert s.dtype == dtype
  705. tm.assert_series_equal(output, s)
  706. def test_series_nested(self, orient):
  707. s = Series(
  708. [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]
  709. ).sort_values()
  710. nested = {"s1": s, "s2": s.copy()}
  711. kwargs = {} if orient is None else {"orient": orient}
  712. exp = {
  713. "s1": ujson.decode(ujson.encode(s, **kwargs)),
  714. "s2": ujson.decode(ujson.encode(s, **kwargs)),
  715. }
  716. assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
  717. def test_index(self):
  718. i = Index([23, 45, 18, 98, 43, 11], name="index")
  719. # Column indexed.
  720. output = Index(ujson.decode(ujson.encode(i)), name="index")
  721. tm.assert_index_equal(i, output)
  722. dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split")))
  723. output = Index(**dec)
  724. tm.assert_index_equal(i, output)
  725. assert i.name == output.name
  726. tm.assert_index_equal(i, output)
  727. assert i.name == output.name
  728. output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index")
  729. tm.assert_index_equal(i, output)
  730. output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index")
  731. tm.assert_index_equal(i, output)
  732. output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index")
  733. tm.assert_index_equal(i, output)
  734. def test_datetime_index(self):
  735. date_unit = "ns"
  736. # freq doesn't round-trip
  737. rng = DatetimeIndex(list(date_range("1/1/2000", periods=20)), freq=None)
  738. encoded = ujson.encode(rng, date_unit=date_unit)
  739. decoded = DatetimeIndex(np.array(ujson.decode(encoded)))
  740. tm.assert_index_equal(rng, decoded)
  741. ts = Series(np.random.randn(len(rng)), index=rng)
  742. decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit)))
  743. idx_values = decoded.index.values.astype(np.int64)
  744. decoded.index = DatetimeIndex(idx_values)
  745. tm.assert_series_equal(ts, decoded)
  746. @pytest.mark.parametrize(
  747. "invalid_arr",
  748. [
  749. "[31337,]", # Trailing comma.
  750. "[,31337]", # Leading comma.
  751. "[]]", # Unmatched bracket.
  752. "[,]", # Only comma.
  753. ],
  754. )
  755. def test_decode_invalid_array(self, invalid_arr):
  756. msg = (
  757. "Expected object or value|Trailing data|"
  758. "Unexpected character found when decoding array value"
  759. )
  760. with pytest.raises(ValueError, match=msg):
  761. ujson.decode(invalid_arr)
  762. @pytest.mark.parametrize("arr", [[], [31337]])
  763. def test_decode_array(self, arr):
  764. assert arr == ujson.decode(str(arr))
  765. @pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808])
  766. def test_decode_extreme_numbers(self, extreme_num):
  767. assert extreme_num == ujson.decode(str(extreme_num))
  768. @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-2**63-1}"])
  769. def test_decode_too_extreme_numbers(self, too_extreme_num):
  770. with pytest.raises(
  771. ValueError,
  772. match="Value is too big|Value is too small",
  773. ):
  774. ujson.decode(too_extreme_num)
  775. def test_decode_with_trailing_whitespaces(self):
  776. assert {} == ujson.decode("{}\n\t ")
  777. def test_decode_with_trailing_non_whitespaces(self):
  778. with pytest.raises(ValueError, match="Trailing data"):
  779. ujson.decode("{}\n\t a")
  780. @pytest.mark.parametrize("value", [f"{2**64}", f"{-2**63-1}"])
  781. def test_decode_array_with_big_int(self, value):
  782. with pytest.raises(
  783. ValueError,
  784. match="Value is too big|Value is too small",
  785. ):
  786. ujson.loads(value)
  787. @pytest.mark.parametrize(
  788. "float_number",
  789. [
  790. 1.1234567893,
  791. 1.234567893,
  792. 1.34567893,
  793. 1.4567893,
  794. 1.567893,
  795. 1.67893,
  796. 1.7893,
  797. 1.893,
  798. 1.3,
  799. ],
  800. )
  801. @pytest.mark.parametrize("sign", [-1, 1])
  802. def test_decode_floating_point(self, sign, float_number):
  803. float_number *= sign
  804. tm.assert_almost_equal(float_number, ujson.loads(str(float_number)), rtol=1e-15)
  805. def test_encode_big_set(self):
  806. s = set()
  807. for x in range(0, 100000):
  808. s.add(x)
  809. # Make sure no Exception is raised.
  810. ujson.encode(s)
  811. def test_encode_empty_set(self):
  812. assert "[]" == ujson.encode(set())
  813. def test_encode_set(self):
  814. s = {1, 2, 3, 4, 5, 6, 7, 8, 9}
  815. enc = ujson.encode(s)
  816. dec = ujson.decode(enc)
  817. for v in dec:
  818. assert v in s
  819. @pytest.mark.parametrize(
  820. "td",
  821. [
  822. Timedelta(days=366),
  823. Timedelta(days=-1),
  824. Timedelta(hours=13, minutes=5, seconds=5),
  825. Timedelta(hours=13, minutes=20, seconds=30),
  826. Timedelta(days=-1, nanoseconds=5),
  827. Timedelta(nanoseconds=1),
  828. Timedelta(microseconds=1, nanoseconds=1),
  829. Timedelta(milliseconds=1, microseconds=1, nanoseconds=1),
  830. Timedelta(milliseconds=999, microseconds=999, nanoseconds=999),
  831. ],
  832. )
  833. def test_encode_timedelta_iso(self, td):
  834. # GH 28256
  835. result = ujson.encode(td, iso_dates=True)
  836. expected = f'"{td.isoformat()}"'
  837. assert result == expected
  838. def test_encode_periodindex(self):
  839. # GH 46683
  840. p = PeriodIndex(["2022-04-06", "2022-04-07"], freq="D")
  841. df = DataFrame(index=p)
  842. assert df.to_json() == "{}"