test_pandas.py 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965
  1. import datetime
  2. from datetime import timedelta
  3. from decimal import Decimal
  4. from io import StringIO
  5. import json
  6. import os
  7. import sys
  8. import time
  9. import numpy as np
  10. import pytest
  11. from pandas.compat import IS64
  12. import pandas.util._test_decorators as td
  13. import pandas as pd
  14. from pandas import (
  15. NA,
  16. DataFrame,
  17. DatetimeIndex,
  18. Series,
  19. Timestamp,
  20. read_json,
  21. )
  22. import pandas._testing as tm
  23. from pandas.core.arrays import (
  24. ArrowStringArray,
  25. StringArray,
  26. )
  27. def assert_json_roundtrip_equal(result, expected, orient):
  28. if orient in ("records", "values"):
  29. expected = expected.reset_index(drop=True)
  30. if orient == "values":
  31. expected.columns = range(len(expected.columns))
  32. tm.assert_frame_equal(result, expected)
  33. class TestPandasContainer:
  34. @pytest.fixture
  35. def categorical_frame(self):
  36. _seriesd = tm.getSeriesData()
  37. _cat_frame = DataFrame(_seriesd)
  38. cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
  39. _cat_frame.index = pd.CategoricalIndex(cat, name="E")
  40. _cat_frame["E"] = list(reversed(cat))
  41. _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
  42. return _cat_frame
  43. @pytest.fixture
  44. def datetime_series(self):
  45. # Same as usual datetime_series, but with index freq set to None,
  46. # since that doesn't round-trip, see GH#33711
  47. ser = tm.makeTimeSeries()
  48. ser.name = "ts"
  49. ser.index = ser.index._with_freq(None)
  50. return ser
  51. @pytest.fixture
  52. def datetime_frame(self):
  53. # Same as usual datetime_frame, but with index freq set to None,
  54. # since that doesn't round-trip, see GH#33711
  55. df = DataFrame(tm.getTimeSeriesData())
  56. df.index = df.index._with_freq(None)
  57. return df
  58. def test_frame_double_encoded_labels(self, orient):
  59. df = DataFrame(
  60. [["a", "b"], ["c", "d"]],
  61. index=['index " 1', "index / 2"],
  62. columns=["a \\ b", "y / z"],
  63. )
  64. result = read_json(df.to_json(orient=orient), orient=orient)
  65. expected = df.copy()
  66. assert_json_roundtrip_equal(result, expected, orient)
  67. @pytest.mark.parametrize("orient", ["split", "records", "values"])
  68. def test_frame_non_unique_index(self, orient):
  69. df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
  70. result = read_json(df.to_json(orient=orient), orient=orient)
  71. expected = df.copy()
  72. assert_json_roundtrip_equal(result, expected, orient)
  73. @pytest.mark.parametrize("orient", ["index", "columns"])
  74. def test_frame_non_unique_index_raises(self, orient):
  75. df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
  76. msg = f"DataFrame index must be unique for orient='{orient}'"
  77. with pytest.raises(ValueError, match=msg):
  78. df.to_json(orient=orient)
  79. @pytest.mark.parametrize("orient", ["split", "values"])
  80. @pytest.mark.parametrize(
  81. "data",
  82. [
  83. [["a", "b"], ["c", "d"]],
  84. [[1.5, 2.5], [3.5, 4.5]],
  85. [[1, 2.5], [3, 4.5]],
  86. [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
  87. ],
  88. )
  89. def test_frame_non_unique_columns(self, orient, data):
  90. df = DataFrame(data, index=[1, 2], columns=["x", "x"])
  91. result = read_json(
  92. df.to_json(orient=orient), orient=orient, convert_dates=["x"]
  93. )
  94. if orient == "values":
  95. expected = DataFrame(data)
  96. if expected.iloc[:, 0].dtype == "datetime64[ns]":
  97. # orient == "values" by default will write Timestamp objects out
  98. # in milliseconds; these are internally stored in nanosecond,
  99. # so divide to get where we need
  100. # TODO: a to_epoch method would also solve; see GH 14772
  101. expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000
  102. elif orient == "split":
  103. expected = df
  104. expected.columns = ["x", "x.1"]
  105. tm.assert_frame_equal(result, expected)
  106. @pytest.mark.parametrize("orient", ["index", "columns", "records"])
  107. def test_frame_non_unique_columns_raises(self, orient):
  108. df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])
  109. msg = f"DataFrame columns must be unique for orient='{orient}'"
  110. with pytest.raises(ValueError, match=msg):
  111. df.to_json(orient=orient)
  112. def test_frame_default_orient(self, float_frame):
  113. assert float_frame.to_json() == float_frame.to_json(orient="columns")
  114. @pytest.mark.parametrize("dtype", [False, float])
  115. @pytest.mark.parametrize("convert_axes", [True, False])
  116. def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
  117. data = float_frame.to_json(orient=orient)
  118. result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
  119. expected = float_frame
  120. assert_json_roundtrip_equal(result, expected, orient)
  121. @pytest.mark.parametrize("dtype", [False, np.int64])
  122. @pytest.mark.parametrize("convert_axes", [True, False])
  123. def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
  124. data = int_frame.to_json(orient=orient)
  125. result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
  126. expected = int_frame
  127. assert_json_roundtrip_equal(result, expected, orient)
  128. @pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"])
  129. @pytest.mark.parametrize("convert_axes", [True, False])
  130. def test_roundtrip_str_axes(self, orient, convert_axes, dtype):
  131. df = DataFrame(
  132. np.zeros((200, 4)),
  133. columns=[str(i) for i in range(4)],
  134. index=[str(i) for i in range(200)],
  135. dtype=dtype,
  136. )
  137. data = df.to_json(orient=orient)
  138. result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
  139. expected = df.copy()
  140. if not dtype:
  141. expected = expected.astype(np.int64)
  142. # index columns, and records orients cannot fully preserve the string
  143. # dtype for axes as the index and column labels are used as keys in
  144. # JSON objects. JSON keys are by definition strings, so there's no way
  145. # to disambiguate whether those keys actually were strings or numeric
  146. # beforehand and numeric wins out.
  147. if convert_axes and (orient in ("index", "columns")):
  148. expected.columns = expected.columns.astype(np.int64)
  149. expected.index = expected.index.astype(np.int64)
  150. elif orient == "records" and convert_axes:
  151. expected.columns = expected.columns.astype(np.int64)
  152. elif convert_axes and orient == "split":
  153. expected.columns = expected.columns.astype(np.int64)
  154. assert_json_roundtrip_equal(result, expected, orient)
  155. @pytest.mark.parametrize("convert_axes", [True, False])
  156. def test_roundtrip_categorical(
  157. self, request, orient, categorical_frame, convert_axes
  158. ):
  159. # TODO: create a better frame to test with and improve coverage
  160. if orient in ("index", "columns"):
  161. request.node.add_marker(
  162. pytest.mark.xfail(
  163. reason=f"Can't have duplicate index values for orient '{orient}')"
  164. )
  165. )
  166. data = categorical_frame.to_json(orient=orient)
  167. result = read_json(data, orient=orient, convert_axes=convert_axes)
  168. expected = categorical_frame.copy()
  169. expected.index = expected.index.astype(str) # Categorical not preserved
  170. expected.index.name = None # index names aren't preserved in JSON
  171. assert_json_roundtrip_equal(result, expected, orient)
  172. @pytest.mark.parametrize("convert_axes", [True, False])
  173. def test_roundtrip_empty(self, orient, convert_axes):
  174. empty_frame = DataFrame()
  175. data = empty_frame.to_json(orient=orient)
  176. result = read_json(data, orient=orient, convert_axes=convert_axes)
  177. if orient == "split":
  178. idx = pd.Index([], dtype=(float if convert_axes else object))
  179. expected = DataFrame(index=idx, columns=idx)
  180. elif orient in ["index", "columns"]:
  181. expected = DataFrame()
  182. else:
  183. expected = empty_frame.copy()
  184. tm.assert_frame_equal(result, expected)
  185. @pytest.mark.parametrize("convert_axes", [True, False])
  186. def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
  187. # TODO: improve coverage with date_format parameter
  188. data = datetime_frame.to_json(orient=orient)
  189. result = read_json(data, orient=orient, convert_axes=convert_axes)
  190. expected = datetime_frame.copy()
  191. if not convert_axes: # one off for ts handling
  192. # DTI gets converted to epoch values
  193. idx = expected.index.view(np.int64) // 1000000
  194. if orient != "split": # TODO: handle consistently across orients
  195. idx = idx.astype(str)
  196. expected.index = idx
  197. assert_json_roundtrip_equal(result, expected, orient)
  198. @pytest.mark.parametrize("convert_axes", [True, False])
  199. def test_roundtrip_mixed(self, orient, convert_axes):
  200. index = pd.Index(["a", "b", "c", "d", "e"])
  201. values = {
  202. "A": [0.0, 1.0, 2.0, 3.0, 4.0],
  203. "B": [0.0, 1.0, 0.0, 1.0, 0.0],
  204. "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
  205. "D": [True, False, True, False, True],
  206. }
  207. df = DataFrame(data=values, index=index)
  208. data = df.to_json(orient=orient)
  209. result = read_json(data, orient=orient, convert_axes=convert_axes)
  210. expected = df.copy()
  211. expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))
  212. assert_json_roundtrip_equal(result, expected, orient)
  213. @pytest.mark.xfail(
  214. reason="#50456 Column multiindex is stored and loaded differently",
  215. raises=AssertionError,
  216. )
  217. @pytest.mark.parametrize(
  218. "columns",
  219. [
  220. [["2022", "2022"], ["JAN", "FEB"]],
  221. [["2022", "2023"], ["JAN", "JAN"]],
  222. [["2022", "2022"], ["JAN", "JAN"]],
  223. ],
  224. )
  225. def test_roundtrip_multiindex(self, columns):
  226. df = DataFrame(
  227. [[1, 2], [3, 4]],
  228. columns=pd.MultiIndex.from_arrays(columns),
  229. )
  230. result = read_json(df.to_json(orient="split"), orient="split")
  231. tm.assert_frame_equal(result, df)
  232. @pytest.mark.parametrize(
  233. "data,msg,orient",
  234. [
  235. ('{"key":b:a:d}', "Expected object or value", "columns"),
  236. # too few indices
  237. (
  238. '{"columns":["A","B"],'
  239. '"index":["2","3"],'
  240. '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
  241. "|".join(
  242. [
  243. r"Length of values \(3\) does not match length of index \(2\)",
  244. ]
  245. ),
  246. "split",
  247. ),
  248. # too many columns
  249. (
  250. '{"columns":["A","B","C"],'
  251. '"index":["1","2","3"],'
  252. '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
  253. "3 columns passed, passed data had 2 columns",
  254. "split",
  255. ),
  256. # bad key
  257. (
  258. '{"badkey":["A","B"],'
  259. '"index":["2","3"],'
  260. '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
  261. r"unexpected key\(s\): badkey",
  262. "split",
  263. ),
  264. ],
  265. )
  266. def test_frame_from_json_bad_data_raises(self, data, msg, orient):
  267. with pytest.raises(ValueError, match=msg):
  268. read_json(StringIO(data), orient=orient)
  269. @pytest.mark.parametrize("dtype", [True, False])
  270. @pytest.mark.parametrize("convert_axes", [True, False])
  271. def test_frame_from_json_missing_data(self, orient, convert_axes, dtype):
  272. num_df = DataFrame([[1, 2], [4, 5, 6]])
  273. result = read_json(
  274. num_df.to_json(orient=orient),
  275. orient=orient,
  276. convert_axes=convert_axes,
  277. dtype=dtype,
  278. )
  279. assert np.isnan(result.iloc[0, 2])
  280. obj_df = DataFrame([["1", "2"], ["4", "5", "6"]])
  281. result = read_json(
  282. obj_df.to_json(orient=orient),
  283. orient=orient,
  284. convert_axes=convert_axes,
  285. dtype=dtype,
  286. )
  287. assert np.isnan(result.iloc[0, 2])
  288. @pytest.mark.parametrize("dtype", [True, False])
  289. def test_frame_read_json_dtype_missing_value(self, dtype):
  290. # GH28501 Parse missing values using read_json with dtype=False
  291. # to NaN instead of None
  292. result = read_json("[null]", dtype=dtype)
  293. expected = DataFrame([np.nan])
  294. tm.assert_frame_equal(result, expected)
  295. @pytest.mark.parametrize("inf", [np.inf, np.NINF])
  296. @pytest.mark.parametrize("dtype", [True, False])
  297. def test_frame_infinity(self, inf, dtype):
  298. # infinities get mapped to nulls which get mapped to NaNs during
  299. # deserialisation
  300. df = DataFrame([[1, 2], [4, 5, 6]])
  301. df.loc[0, 2] = inf
  302. result = read_json(df.to_json(), dtype=dtype)
  303. assert np.isnan(result.iloc[0, 2])
  304. @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
  305. @pytest.mark.parametrize(
  306. "value,precision,expected_val",
  307. [
  308. (0.95, 1, 1.0),
  309. (1.95, 1, 2.0),
  310. (-1.95, 1, -2.0),
  311. (0.995, 2, 1.0),
  312. (0.9995, 3, 1.0),
  313. (0.99999999999999944, 15, 1.0),
  314. ],
  315. )
  316. def test_frame_to_json_float_precision(self, value, precision, expected_val):
  317. df = DataFrame([{"a_float": value}])
  318. encoded = df.to_json(double_precision=precision)
  319. assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'
  320. def test_frame_to_json_except(self):
  321. df = DataFrame([1, 2, 3])
  322. msg = "Invalid value 'garbage' for option 'orient'"
  323. with pytest.raises(ValueError, match=msg):
  324. df.to_json(orient="garbage")
  325. def test_frame_empty(self):
  326. df = DataFrame(columns=["jim", "joe"])
  327. assert not df._is_mixed_type
  328. tm.assert_frame_equal(
  329. read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False
  330. )
  331. # GH 7445
  332. result = DataFrame({"test": []}, index=[]).to_json(orient="columns")
  333. expected = '{"test":{}}'
  334. assert result == expected
  335. def test_frame_empty_mixedtype(self):
  336. # mixed type
  337. df = DataFrame(columns=["jim", "joe"])
  338. df["joe"] = df["joe"].astype("i8")
  339. assert df._is_mixed_type
  340. tm.assert_frame_equal(
  341. read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False
  342. )
  343. def test_frame_mixedtype_orient(self): # GH10289
  344. vals = [
  345. [10, 1, "foo", 0.1, 0.01],
  346. [20, 2, "bar", 0.2, 0.02],
  347. [30, 3, "baz", 0.3, 0.03],
  348. [40, 4, "qux", 0.4, 0.04],
  349. ]
  350. df = DataFrame(
  351. vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]
  352. )
  353. assert df._is_mixed_type
  354. right = df.copy()
  355. for orient in ["split", "index", "columns"]:
  356. inp = df.to_json(orient=orient)
  357. left = read_json(inp, orient=orient, convert_axes=False)
  358. tm.assert_frame_equal(left, right)
  359. right.index = pd.RangeIndex(len(df))
  360. inp = df.to_json(orient="records")
  361. left = read_json(inp, orient="records", convert_axes=False)
  362. tm.assert_frame_equal(left, right)
  363. right.columns = pd.RangeIndex(df.shape[1])
  364. inp = df.to_json(orient="values")
  365. left = read_json(inp, orient="values", convert_axes=False)
  366. tm.assert_frame_equal(left, right)
  367. def test_v12_compat(self, datapath):
  368. dti = pd.date_range("2000-01-03", "2000-01-07")
  369. # freq doesn't roundtrip
  370. dti = DatetimeIndex(np.asarray(dti), freq=None)
  371. df = DataFrame(
  372. [
  373. [1.56808523, 0.65727391, 1.81021139, -0.17251653],
  374. [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
  375. [1.51493992, 0.11805825, 1.629455, -1.31506612],
  376. [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
  377. [0.05951614, -2.69652057, 1.28163262, 0.34703478],
  378. ],
  379. columns=["A", "B", "C", "D"],
  380. index=dti,
  381. )
  382. df["date"] = Timestamp("19920106 18:21:32.12")
  383. df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101")
  384. df["modified"] = df["date"]
  385. df.iloc[1, df.columns.get_loc("modified")] = pd.NaT
  386. dirpath = datapath("io", "json", "data")
  387. v12_json = os.path.join(dirpath, "tsframe_v012.json")
  388. df_unser = read_json(v12_json)
  389. tm.assert_frame_equal(df, df_unser)
  390. df_iso = df.drop(["modified"], axis=1)
  391. v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
  392. df_unser_iso = read_json(v12_iso_json)
  393. tm.assert_frame_equal(df_iso, df_unser_iso)
  394. def test_blocks_compat_GH9037(self):
  395. index = pd.date_range("20000101", periods=10, freq="H")
  396. # freq doesn't round-trip
  397. index = DatetimeIndex(list(index), freq=None)
  398. df_mixed = DataFrame(
  399. {
  400. "float_1": [
  401. -0.92077639,
  402. 0.77434435,
  403. 1.25234727,
  404. 0.61485564,
  405. -0.60316077,
  406. 0.24653374,
  407. 0.28668979,
  408. -2.51969012,
  409. 0.95748401,
  410. -1.02970536,
  411. ],
  412. "int_1": [
  413. 19680418,
  414. 75337055,
  415. 99973684,
  416. 65103179,
  417. 79373900,
  418. 40314334,
  419. 21290235,
  420. 4991321,
  421. 41903419,
  422. 16008365,
  423. ],
  424. "str_1": [
  425. "78c608f1",
  426. "64a99743",
  427. "13d2ff52",
  428. "ca7f4af2",
  429. "97236474",
  430. "bde7e214",
  431. "1a6bde47",
  432. "b1190be5",
  433. "7a669144",
  434. "8d64d068",
  435. ],
  436. "float_2": [
  437. -0.0428278,
  438. -1.80872357,
  439. 3.36042349,
  440. -0.7573685,
  441. -0.48217572,
  442. 0.86229683,
  443. 1.08935819,
  444. 0.93898739,
  445. -0.03030452,
  446. 1.43366348,
  447. ],
  448. "str_2": [
  449. "14f04af9",
  450. "d085da90",
  451. "4bcfac83",
  452. "81504caf",
  453. "2ffef4a9",
  454. "08e2f5c4",
  455. "07e1af03",
  456. "addbd4a7",
  457. "1f6a09ba",
  458. "4bfc4d87",
  459. ],
  460. "int_2": [
  461. 86967717,
  462. 98098830,
  463. 51927505,
  464. 20372254,
  465. 12601730,
  466. 20884027,
  467. 34193846,
  468. 10561746,
  469. 24867120,
  470. 76131025,
  471. ],
  472. },
  473. index=index,
  474. )
  475. # JSON deserialisation always creates unicode strings
  476. df_mixed.columns = df_mixed.columns.astype("unicode")
  477. df_roundtrip = read_json(df_mixed.to_json(orient="split"), orient="split")
  478. tm.assert_frame_equal(
  479. df_mixed,
  480. df_roundtrip,
  481. check_index_type=True,
  482. check_column_type=True,
  483. by_blocks=True,
  484. check_exact=True,
  485. )
  486. def test_frame_nonprintable_bytes(self):
  487. # GH14256: failing column caused segfaults, if it is not the last one
  488. class BinaryThing:
  489. def __init__(self, hexed) -> None:
  490. self.hexed = hexed
  491. self.binary = bytes.fromhex(hexed)
  492. def __str__(self) -> str:
  493. return self.hexed
  494. hexed = "574b4454ba8c5eb4f98a8f45"
  495. binthing = BinaryThing(hexed)
  496. # verify the proper conversion of printable content
  497. df_printable = DataFrame({"A": [binthing.hexed]})
  498. assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'
  499. # check if non-printable content throws appropriate Exception
  500. df_nonprintable = DataFrame({"A": [binthing]})
  501. msg = "Unsupported UTF-8 sequence length when encoding string"
  502. with pytest.raises(OverflowError, match=msg):
  503. df_nonprintable.to_json()
  504. # the same with multiple columns threw segfaults
  505. df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
  506. with pytest.raises(OverflowError, match=msg):
  507. df_mixed.to_json()
  508. # default_handler should resolve exceptions for non-string types
  509. result = df_nonprintable.to_json(default_handler=str)
  510. expected = f'{{"A":{{"0":"{hexed}"}}}}'
  511. assert result == expected
  512. assert (
  513. df_mixed.to_json(default_handler=str)
  514. == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
  515. )
  516. def test_label_overflow(self):
  517. # GH14256: buffer length not checked when writing label
  518. result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
  519. expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
  520. assert result == expected
  521. def test_series_non_unique_index(self):
  522. s = Series(["a", "b"], index=[1, 1])
  523. msg = "Series index must be unique for orient='index'"
  524. with pytest.raises(ValueError, match=msg):
  525. s.to_json(orient="index")
  526. tm.assert_series_equal(
  527. s, read_json(s.to_json(orient="split"), orient="split", typ="series")
  528. )
  529. unserialized = read_json(
  530. s.to_json(orient="records"), orient="records", typ="series"
  531. )
  532. tm.assert_numpy_array_equal(s.values, unserialized.values)
  533. def test_series_default_orient(self, string_series):
  534. assert string_series.to_json() == string_series.to_json(orient="index")
  535. def test_series_roundtrip_simple(self, orient, string_series):
  536. data = string_series.to_json(orient=orient)
  537. result = read_json(data, typ="series", orient=orient)
  538. expected = string_series
  539. if orient in ("values", "records"):
  540. expected = expected.reset_index(drop=True)
  541. if orient != "split":
  542. expected.name = None
  543. tm.assert_series_equal(result, expected)
  544. @pytest.mark.parametrize("dtype", [False, None])
  545. def test_series_roundtrip_object(self, orient, dtype, object_series):
  546. data = object_series.to_json(orient=orient)
  547. result = read_json(data, typ="series", orient=orient, dtype=dtype)
  548. expected = object_series
  549. if orient in ("values", "records"):
  550. expected = expected.reset_index(drop=True)
  551. if orient != "split":
  552. expected.name = None
  553. tm.assert_series_equal(result, expected)
  554. def test_series_roundtrip_empty(self, orient):
  555. empty_series = Series([], index=[], dtype=np.float64)
  556. data = empty_series.to_json(orient=orient)
  557. result = read_json(data, typ="series", orient=orient)
  558. expected = empty_series.reset_index(drop=True)
  559. if orient in ("split"):
  560. expected.index = expected.index.astype(np.float64)
  561. tm.assert_series_equal(result, expected)
  562. def test_series_roundtrip_timeseries(self, orient, datetime_series):
  563. data = datetime_series.to_json(orient=orient)
  564. result = read_json(data, typ="series", orient=orient)
  565. expected = datetime_series
  566. if orient in ("values", "records"):
  567. expected = expected.reset_index(drop=True)
  568. if orient != "split":
  569. expected.name = None
  570. tm.assert_series_equal(result, expected)
  571. @pytest.mark.parametrize("dtype", [np.float64, int])
  572. def test_series_roundtrip_numeric(self, orient, dtype):
  573. s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
  574. data = s.to_json(orient=orient)
  575. result = read_json(data, typ="series", orient=orient)
  576. expected = s.copy()
  577. if orient in ("values", "records"):
  578. expected = expected.reset_index(drop=True)
  579. tm.assert_series_equal(result, expected)
  580. def test_series_to_json_except(self):
  581. s = Series([1, 2, 3])
  582. msg = "Invalid value 'garbage' for option 'orient'"
  583. with pytest.raises(ValueError, match=msg):
  584. s.to_json(orient="garbage")
  585. def test_series_from_json_precise_float(self):
  586. s = Series([4.56, 4.56, 4.56])
  587. result = read_json(s.to_json(), typ="series", precise_float=True)
  588. tm.assert_series_equal(result, s, check_index_type=False)
  589. def test_series_with_dtype(self):
  590. # GH 21986
  591. s = Series([4.56, 4.56, 4.56])
  592. result = read_json(s.to_json(), typ="series", dtype=np.int64)
  593. expected = Series([4] * 3)
  594. tm.assert_series_equal(result, expected)
  595. @pytest.mark.parametrize(
  596. "dtype,expected",
  597. [
  598. (True, Series(["2000-01-01"], dtype="datetime64[ns]")),
  599. (False, Series([946684800000])),
  600. ],
  601. )
  602. def test_series_with_dtype_datetime(self, dtype, expected):
  603. s = Series(["2000-01-01"], dtype="datetime64[ns]")
  604. data = s.to_json()
  605. result = read_json(data, typ="series", dtype=dtype)
  606. tm.assert_series_equal(result, expected)
  607. def test_frame_from_json_precise_float(self):
  608. df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
  609. result = read_json(df.to_json(), precise_float=True)
  610. tm.assert_frame_equal(result, df)
  611. def test_typ(self):
  612. s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
  613. result = read_json(s.to_json(), typ=None)
  614. tm.assert_series_equal(result, s)
  615. def test_reconstruction_index(self):
  616. df = DataFrame([[1, 2, 3], [4, 5, 6]])
  617. result = read_json(df.to_json())
  618. tm.assert_frame_equal(result, df)
  619. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
  620. result = read_json(df.to_json())
  621. tm.assert_frame_equal(result, df)
  622. def test_path(self, float_frame, int_frame, datetime_frame):
  623. with tm.ensure_clean("test.json") as path:
  624. for df in [float_frame, int_frame, datetime_frame]:
  625. df.to_json(path)
  626. read_json(path)
  627. def test_axis_dates(self, datetime_series, datetime_frame):
  628. # frame
  629. json = datetime_frame.to_json()
  630. result = read_json(json)
  631. tm.assert_frame_equal(result, datetime_frame)
  632. # series
  633. json = datetime_series.to_json()
  634. result = read_json(json, typ="series")
  635. tm.assert_series_equal(result, datetime_series, check_names=False)
  636. assert result.name is None
  637. def test_convert_dates(self, datetime_series, datetime_frame):
  638. # frame
  639. df = datetime_frame
  640. df["date"] = Timestamp("20130101")
  641. json = df.to_json()
  642. result = read_json(json)
  643. tm.assert_frame_equal(result, df)
  644. df["foo"] = 1.0
  645. json = df.to_json(date_unit="ns")
  646. result = read_json(json, convert_dates=False)
  647. expected = df.copy()
  648. expected["date"] = expected["date"].values.view("i8")
  649. expected["foo"] = expected["foo"].astype("int64")
  650. tm.assert_frame_equal(result, expected)
  651. # series
  652. ts = Series(Timestamp("20130101"), index=datetime_series.index)
  653. json = ts.to_json()
  654. result = read_json(json, typ="series")
  655. tm.assert_series_equal(result, ts)
  656. @pytest.mark.parametrize("date_format", ["epoch", "iso"])
  657. @pytest.mark.parametrize("as_object", [True, False])
  658. @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp])
  659. def test_date_index_and_values(self, date_format, as_object, date_typ):
  660. data = [date_typ(year=2020, month=1, day=1), pd.NaT]
  661. if as_object:
  662. data.append("a")
  663. ser = Series(data, index=data)
  664. result = ser.to_json(date_format=date_format)
  665. if date_format == "epoch":
  666. expected = '{"1577836800000":1577836800000,"null":null}'
  667. else:
  668. expected = (
  669. '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
  670. )
  671. if as_object:
  672. expected = expected.replace("}", ',"a":"a"}')
  673. assert result == expected
  674. @pytest.mark.parametrize(
  675. "infer_word",
  676. [
  677. "trade_time",
  678. "date",
  679. "datetime",
  680. "sold_at",
  681. "modified",
  682. "timestamp",
  683. "timestamps",
  684. ],
  685. )
  686. def test_convert_dates_infer(self, infer_word):
  687. # GH10747
  688. from pandas.io.json import dumps
  689. data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
  690. expected = DataFrame(
  691. [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
  692. )
  693. result = read_json(dumps(data))[["id", infer_word]]
  694. tm.assert_frame_equal(result, expected)
  695. @pytest.mark.parametrize(
  696. "date,date_unit",
  697. [
  698. ("20130101 20:43:42.123", None),
  699. ("20130101 20:43:42", "s"),
  700. ("20130101 20:43:42.123", "ms"),
  701. ("20130101 20:43:42.123456", "us"),
  702. ("20130101 20:43:42.123456789", "ns"),
  703. ],
  704. )
  705. def test_date_format_frame(self, date, date_unit, datetime_frame):
  706. df = datetime_frame
  707. df["date"] = Timestamp(date)
  708. df.iloc[1, df.columns.get_loc("date")] = pd.NaT
  709. df.iloc[5, df.columns.get_loc("date")] = pd.NaT
  710. if date_unit:
  711. json = df.to_json(date_format="iso", date_unit=date_unit)
  712. else:
  713. json = df.to_json(date_format="iso")
  714. result = read_json(json)
  715. expected = df.copy()
  716. tm.assert_frame_equal(result, expected)
  717. def test_date_format_frame_raises(self, datetime_frame):
  718. df = datetime_frame
  719. msg = "Invalid value 'foo' for option 'date_unit'"
  720. with pytest.raises(ValueError, match=msg):
  721. df.to_json(date_format="iso", date_unit="foo")
  722. @pytest.mark.parametrize(
  723. "date,date_unit",
  724. [
  725. ("20130101 20:43:42.123", None),
  726. ("20130101 20:43:42", "s"),
  727. ("20130101 20:43:42.123", "ms"),
  728. ("20130101 20:43:42.123456", "us"),
  729. ("20130101 20:43:42.123456789", "ns"),
  730. ],
  731. )
  732. def test_date_format_series(self, date, date_unit, datetime_series):
  733. ts = Series(Timestamp(date), index=datetime_series.index)
  734. ts.iloc[1] = pd.NaT
  735. ts.iloc[5] = pd.NaT
  736. if date_unit:
  737. json = ts.to_json(date_format="iso", date_unit=date_unit)
  738. else:
  739. json = ts.to_json(date_format="iso")
  740. result = read_json(json, typ="series")
  741. expected = ts.copy()
  742. tm.assert_series_equal(result, expected)
  743. def test_date_format_series_raises(self, datetime_series):
  744. ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
  745. msg = "Invalid value 'foo' for option 'date_unit'"
  746. with pytest.raises(ValueError, match=msg):
  747. ts.to_json(date_format="iso", date_unit="foo")
  748. @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
  749. def test_date_unit(self, unit, datetime_frame):
  750. df = datetime_frame
  751. df["date"] = Timestamp("20130101 20:43:42")
  752. dl = df.columns.get_loc("date")
  753. df.iloc[1, dl] = Timestamp("19710101 20:43:42")
  754. df.iloc[2, dl] = Timestamp("21460101 20:43:42")
  755. df.iloc[4, dl] = pd.NaT
  756. json = df.to_json(date_format="epoch", date_unit=unit)
  757. # force date unit
  758. result = read_json(json, date_unit=unit)
  759. tm.assert_frame_equal(result, df)
  760. # detect date unit
  761. result = read_json(json, date_unit=None)
  762. tm.assert_frame_equal(result, df)
  763. def test_weird_nested_json(self):
  764. # this used to core dump the parser
  765. s = r"""{
  766. "status": "success",
  767. "data": {
  768. "posts": [
  769. {
  770. "id": 1,
  771. "title": "A blog post",
  772. "body": "Some useful content"
  773. },
  774. {
  775. "id": 2,
  776. "title": "Another blog post",
  777. "body": "More content"
  778. }
  779. ]
  780. }
  781. }"""
  782. read_json(s)
  783. def test_doc_example(self):
  784. dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB"))
  785. dfj2["date"] = Timestamp("20130101")
  786. dfj2["ints"] = range(5)
  787. dfj2["bools"] = True
  788. dfj2.index = pd.date_range("20130101", periods=5)
  789. json = dfj2.to_json()
  790. result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
  791. tm.assert_frame_equal(result, result)
  792. def test_round_trip_exception_(self, datapath):
  793. # GH 3867
  794. path = datapath("io", "json", "data", "teams.csv")
  795. df = pd.read_csv(path)
  796. s = df.to_json()
  797. result = read_json(s)
  798. tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)
  799. @pytest.mark.network
  800. @tm.network(
  801. url="https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5",
  802. check_before_test=True,
  803. )
  804. @pytest.mark.parametrize(
  805. "field,dtype",
  806. [
  807. ["created_at", pd.DatetimeTZDtype(tz="UTC")],
  808. ["closed_at", "datetime64[ns]"],
  809. ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
  810. ],
  811. )
  812. def test_url(self, field, dtype):
  813. url = "https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5"
  814. result = read_json(url, convert_dates=True)
  815. assert result[field].dtype == dtype
  816. def test_timedelta(self):
  817. converter = lambda x: pd.to_timedelta(x, unit="ms")
  818. ser = Series([timedelta(23), timedelta(seconds=5)])
  819. assert ser.dtype == "timedelta64[ns]"
  820. result = read_json(ser.to_json(), typ="series").apply(converter)
  821. tm.assert_series_equal(result, ser)
  822. ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1]))
  823. assert ser.dtype == "timedelta64[ns]"
  824. result = read_json(ser.to_json(), typ="series").apply(converter)
  825. tm.assert_series_equal(result, ser)
  826. frame = DataFrame([timedelta(23), timedelta(seconds=5)])
  827. assert frame[0].dtype == "timedelta64[ns]"
  828. tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter))
  829. def test_timedelta2(self):
  830. frame = DataFrame(
  831. {
  832. "a": [timedelta(days=23), timedelta(seconds=5)],
  833. "b": [1, 2],
  834. "c": pd.date_range(start="20130101", periods=2),
  835. }
  836. )
  837. result = read_json(frame.to_json(date_unit="ns"))
  838. result["a"] = pd.to_timedelta(result.a, unit="ns")
  839. result["c"] = pd.to_datetime(result.c)
  840. tm.assert_frame_equal(frame, result)
  841. def test_mixed_timedelta_datetime(self):
  842. td = timedelta(23)
  843. ts = Timestamp("20130101")
  844. frame = DataFrame({"a": [td, ts]}, dtype=object)
  845. expected = DataFrame(
  846. {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]}
  847. )
  848. result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"})
  849. tm.assert_frame_equal(result, expected, check_index_type=False)
  850. @pytest.mark.parametrize("as_object", [True, False])
  851. @pytest.mark.parametrize("date_format", ["iso", "epoch"])
  852. @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
  853. def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
  854. # GH28156: to_json not correctly formatting Timedelta
  855. data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
  856. if as_object:
  857. data.append("a")
  858. ser = Series(data, index=data)
  859. if date_format == "iso":
  860. expected = (
  861. '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
  862. )
  863. else:
  864. expected = '{"86400000":86400000,"172800000":172800000,"null":null}'
  865. if as_object:
  866. expected = expected.replace("}", ',"a":"a"}')
  867. result = ser.to_json(date_format=date_format)
  868. assert result == expected
  869. def test_default_handler(self):
  870. value = object()
  871. frame = DataFrame({"a": [7, value]})
  872. expected = DataFrame({"a": [7, str(value)]})
  873. result = read_json(frame.to_json(default_handler=str))
  874. tm.assert_frame_equal(expected, result, check_index_type=False)
  875. def test_default_handler_indirect(self):
  876. from pandas.io.json import dumps
  877. def default(obj):
  878. if isinstance(obj, complex):
  879. return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
  880. return str(obj)
  881. df_list = [
  882. 9,
  883. DataFrame(
  884. {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]},
  885. columns=["a", "b"],
  886. ),
  887. ]
  888. expected = (
  889. '[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
  890. '["re",4.0],["im",-5.0]],"N\\/A"]]]'
  891. )
  892. assert dumps(df_list, default_handler=default, orient="values") == expected
  893. def test_default_handler_numpy_unsupported_dtype(self):
  894. # GH12554 to_json raises 'Unhandled numpy dtype 15'
  895. df = DataFrame(
  896. {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]},
  897. columns=["a", "b"],
  898. )
  899. expected = (
  900. '[["(1+0j)","(nan+0j)"],'
  901. '["(2.3+0j)","(nan+0j)"],'
  902. '["(4-5j)","(1.2+0j)"]]'
  903. )
  904. assert df.to_json(default_handler=str, orient="values") == expected
  905. def test_default_handler_raises(self):
  906. msg = "raisin"
  907. def my_handler_raises(obj):
  908. raise TypeError(msg)
  909. with pytest.raises(TypeError, match=msg):
  910. DataFrame({"a": [1, 2, object()]}).to_json(
  911. default_handler=my_handler_raises
  912. )
  913. with pytest.raises(TypeError, match=msg):
  914. DataFrame({"a": [1, 2, complex(4, -5)]}).to_json(
  915. default_handler=my_handler_raises
  916. )
  917. def test_categorical(self):
  918. # GH4377 df.to_json segfaults with non-ndarray blocks
  919. df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
  920. df["B"] = df["A"]
  921. expected = df.to_json()
  922. df["B"] = df["A"].astype("category")
  923. assert expected == df.to_json()
  924. s = df["A"]
  925. sc = df["B"]
  926. assert s.to_json() == sc.to_json()
  927. def test_datetime_tz(self):
  928. # GH4377 df.to_json segfaults with non-ndarray blocks
  929. tz_range = pd.date_range("20130101", periods=3, tz="US/Eastern")
  930. tz_naive = tz_range.tz_convert("utc").tz_localize(None)
  931. df = DataFrame({"A": tz_range, "B": pd.date_range("20130101", periods=3)})
  932. df_naive = df.copy()
  933. df_naive["A"] = tz_naive
  934. expected = df_naive.to_json()
  935. assert expected == df.to_json()
  936. stz = Series(tz_range)
  937. s_naive = Series(tz_naive)
  938. assert stz.to_json() == s_naive.to_json()
  939. def test_sparse(self):
  940. # GH4377 df.to_json segfaults with non-ndarray blocks
  941. df = DataFrame(np.random.randn(10, 4))
  942. df.loc[:8] = np.nan
  943. sdf = df.astype("Sparse")
  944. expected = df.to_json()
  945. assert expected == sdf.to_json()
  946. s = Series(np.random.randn(10))
  947. s.loc[:8] = np.nan
  948. ss = s.astype("Sparse")
  949. expected = s.to_json()
  950. assert expected == ss.to_json()
  951. @pytest.mark.parametrize(
  952. "ts",
  953. [
  954. Timestamp("2013-01-10 05:00:00Z"),
  955. Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
  956. Timestamp("2013-01-10 00:00:00-0500"),
  957. ],
  958. )
  959. def test_tz_is_utc(self, ts):
  960. from pandas.io.json import dumps
  961. exp = '"2013-01-10T05:00:00.000Z"'
  962. assert dumps(ts, iso_dates=True) == exp
  963. dt = ts.to_pydatetime()
  964. assert dumps(dt, iso_dates=True) == exp
  965. def test_tz_is_naive(self):
  966. from pandas.io.json import dumps
  967. ts = Timestamp("2013-01-10 05:00:00")
  968. exp = '"2013-01-10T05:00:00.000"'
  969. assert dumps(ts, iso_dates=True) == exp
  970. dt = ts.to_pydatetime()
  971. assert dumps(dt, iso_dates=True) == exp
  972. @pytest.mark.parametrize(
  973. "tz_range",
  974. [
  975. pd.date_range("2013-01-01 05:00:00Z", periods=2),
  976. pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
  977. pd.date_range("2013-01-01 00:00:00-0500", periods=2),
  978. ],
  979. )
  980. def test_tz_range_is_utc(self, tz_range):
  981. from pandas.io.json import dumps
  982. exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
  983. dfexp = (
  984. '{"DT":{'
  985. '"0":"2013-01-01T05:00:00.000Z",'
  986. '"1":"2013-01-02T05:00:00.000Z"}}'
  987. )
  988. assert dumps(tz_range, iso_dates=True) == exp
  989. dti = DatetimeIndex(tz_range)
  990. # Ensure datetimes in object array are serialized correctly
  991. # in addition to the normal DTI case
  992. assert dumps(dti, iso_dates=True) == exp
  993. assert dumps(dti.astype(object), iso_dates=True) == exp
  994. df = DataFrame({"DT": dti})
  995. result = dumps(df, iso_dates=True)
  996. assert result == dfexp
  997. assert dumps(df.astype({"DT": object}), iso_dates=True)
  998. def test_tz_range_is_naive(self):
  999. from pandas.io.json import dumps
  1000. dti = pd.date_range("2013-01-01 05:00:00", periods=2)
  1001. exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
  1002. dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
  1003. # Ensure datetimes in object array are serialized correctly
  1004. # in addition to the normal DTI case
  1005. assert dumps(dti, iso_dates=True) == exp
  1006. assert dumps(dti.astype(object), iso_dates=True) == exp
  1007. df = DataFrame({"DT": dti})
  1008. result = dumps(df, iso_dates=True)
  1009. assert result == dfexp
  1010. assert dumps(df.astype({"DT": object}), iso_dates=True)
  1011. def test_read_inline_jsonl(self):
  1012. # GH9180
  1013. result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
  1014. expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  1015. tm.assert_frame_equal(result, expected)
  1016. @pytest.mark.single_cpu
  1017. @td.skip_if_not_us_locale
  1018. def test_read_s3_jsonl(self, s3_resource, s3so):
  1019. # GH17200
  1020. result = read_json(
  1021. "s3n://pandas-test/items.jsonl", lines=True, storage_options=s3so
  1022. )
  1023. expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  1024. tm.assert_frame_equal(result, expected)
  1025. def test_read_local_jsonl(self):
  1026. # GH17200
  1027. with tm.ensure_clean("tmp_items.json") as path:
  1028. with open(path, "w") as infile:
  1029. infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
  1030. result = read_json(path, lines=True)
  1031. expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  1032. tm.assert_frame_equal(result, expected)
  1033. def test_read_jsonl_unicode_chars(self):
  1034. # GH15132: non-ascii unicode characters
  1035. # \u201d == RIGHT DOUBLE QUOTATION MARK
  1036. # simulate file handle
  1037. json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
  1038. json = StringIO(json)
  1039. result = read_json(json, lines=True)
  1040. expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
  1041. tm.assert_frame_equal(result, expected)
  1042. # simulate string
  1043. json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
  1044. result = read_json(json, lines=True)
  1045. expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
  1046. tm.assert_frame_equal(result, expected)
  1047. @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
  1048. def test_to_json_large_numbers(self, bigNum):
  1049. # GH34473
  1050. series = Series(bigNum, dtype=object, index=["articleId"])
  1051. json = series.to_json()
  1052. expected = '{"articleId":' + str(bigNum) + "}"
  1053. assert json == expected
  1054. df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
  1055. json = df.to_json()
  1056. expected = '{"0":{"articleId":' + str(bigNum) + "}}"
  1057. assert json == expected
  1058. @pytest.mark.parametrize("bigNum", [-(2**63) - 1, 2**64])
  1059. def test_read_json_large_numbers(self, bigNum):
  1060. # GH20599, 26068
  1061. json = StringIO('{"articleId":' + str(bigNum) + "}")
  1062. msg = r"Value is too small|Value is too big"
  1063. with pytest.raises(ValueError, match=msg):
  1064. read_json(json)
  1065. json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}")
  1066. with pytest.raises(ValueError, match=msg):
  1067. read_json(json)
  1068. def test_read_json_large_numbers2(self):
  1069. # GH18842
  1070. json = '{"articleId": "1404366058080022500245"}'
  1071. json = StringIO(json)
  1072. result = read_json(json, typ="series")
  1073. expected = Series(1.404366e21, index=["articleId"])
  1074. tm.assert_series_equal(result, expected)
  1075. json = '{"0": {"articleId": "1404366058080022500245"}}'
  1076. json = StringIO(json)
  1077. result = read_json(json)
  1078. expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
  1079. tm.assert_frame_equal(result, expected)
  1080. def test_to_jsonl(self):
  1081. # GH9180
  1082. df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  1083. result = df.to_json(orient="records", lines=True)
  1084. expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
  1085. assert result == expected
  1086. df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
  1087. result = df.to_json(orient="records", lines=True)
  1088. expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
  1089. assert result == expected
  1090. tm.assert_frame_equal(read_json(result, lines=True), df)
  1091. # GH15096: escaped characters in columns and data
  1092. df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
  1093. result = df.to_json(orient="records", lines=True)
  1094. expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
  1095. assert result == expected
  1096. tm.assert_frame_equal(read_json(result, lines=True), df)
  1097. # TODO: there is a near-identical test for pytables; can we share?
  1098. @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
  1099. def test_latin_encoding(self):
  1100. # GH 13774
  1101. values = [
  1102. [b"E\xc9, 17", b"", b"a", b"b", b"c"],
  1103. [b"E\xc9, 17", b"a", b"b", b"c"],
  1104. [b"EE, 17", b"", b"a", b"b", b"c"],
  1105. [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
  1106. [b"", b"a", b"b", b"c"],
  1107. [b"\xf8\xfc", b"a", b"b", b"c"],
  1108. [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
  1109. [np.nan, b"", b"b", b"c"],
  1110. [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
  1111. ]
  1112. values = [
  1113. [x.decode("latin-1") if isinstance(x, bytes) else x for x in y]
  1114. for y in values
  1115. ]
  1116. examples = []
  1117. for dtype in ["category", object]:
  1118. for val in values:
  1119. examples.append(Series(val, dtype=dtype))
  1120. def roundtrip(s, encoding="latin-1"):
  1121. with tm.ensure_clean("test.json") as path:
  1122. s.to_json(path, encoding=encoding)
  1123. retr = read_json(path, encoding=encoding)
  1124. tm.assert_series_equal(s, retr, check_categorical=False)
  1125. for s in examples:
  1126. roundtrip(s)
  1127. def test_data_frame_size_after_to_json(self):
  1128. # GH15344
  1129. df = DataFrame({"a": [str(1)]})
  1130. size_before = df.memory_usage(index=True, deep=True).sum()
  1131. df.to_json()
  1132. size_after = df.memory_usage(index=True, deep=True).sum()
  1133. assert size_before == size_after
  1134. @pytest.mark.parametrize(
  1135. "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
  1136. )
  1137. @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]])
  1138. def test_from_json_to_json_table_index_and_columns(self, index, columns):
  1139. # GH25433 GH25435
  1140. expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
  1141. dfjson = expected.to_json(orient="table")
  1142. result = read_json(dfjson, orient="table")
  1143. tm.assert_frame_equal(result, expected)
  1144. def test_from_json_to_json_table_dtypes(self):
  1145. # GH21345
  1146. expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
  1147. dfjson = expected.to_json(orient="table")
  1148. result = read_json(dfjson, orient="table")
  1149. tm.assert_frame_equal(result, expected)
  1150. @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
  1151. def test_to_json_from_json_columns_dtypes(self, orient):
  1152. # GH21892 GH33205
  1153. expected = DataFrame.from_dict(
  1154. {
  1155. "Integer": Series([1, 2, 3], dtype="int64"),
  1156. "Float": Series([None, 2.0, 3.0], dtype="float64"),
  1157. "Object": Series([None, "", "c"], dtype="object"),
  1158. "Bool": Series([True, False, True], dtype="bool"),
  1159. "Category": Series(["a", "b", None], dtype="category"),
  1160. "Datetime": Series(
  1161. ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
  1162. ),
  1163. }
  1164. )
  1165. dfjson = expected.to_json(orient=orient)
  1166. result = read_json(
  1167. dfjson,
  1168. orient=orient,
  1169. dtype={
  1170. "Integer": "int64",
  1171. "Float": "float64",
  1172. "Object": "object",
  1173. "Bool": "bool",
  1174. "Category": "category",
  1175. "Datetime": "datetime64[ns]",
  1176. },
  1177. )
  1178. tm.assert_frame_equal(result, expected)
  1179. @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
  1180. def test_read_json_table_dtype_raises(self, dtype):
  1181. # GH21345
  1182. df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
  1183. dfjson = df.to_json(orient="table")
  1184. msg = "cannot pass both dtype and orient='table'"
  1185. with pytest.raises(ValueError, match=msg):
  1186. read_json(dfjson, orient="table", dtype=dtype)
  1187. def test_read_json_table_convert_axes_raises(self):
  1188. # GH25433 GH25435
  1189. df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
  1190. dfjson = df.to_json(orient="table")
  1191. msg = "cannot pass both convert_axes and orient='table'"
  1192. with pytest.raises(ValueError, match=msg):
  1193. read_json(dfjson, orient="table", convert_axes=True)
  1194. @pytest.mark.parametrize(
  1195. "data, expected",
  1196. [
  1197. (
  1198. DataFrame([[1, 2], [4, 5]], columns=["a", "b"]),
  1199. {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
  1200. ),
  1201. (
  1202. DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"),
  1203. {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
  1204. ),
  1205. (
  1206. DataFrame(
  1207. [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
  1208. ),
  1209. {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
  1210. ),
  1211. (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}),
  1212. (
  1213. Series([1, 2, 3], name="A").rename_axis("foo"),
  1214. {"name": "A", "data": [1, 2, 3]},
  1215. ),
  1216. (
  1217. Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]),
  1218. {"name": "A", "data": [1, 2]},
  1219. ),
  1220. ],
  1221. )
  1222. def test_index_false_to_json_split(self, data, expected):
  1223. # GH 17394
  1224. # Testing index=False in to_json with orient='split'
  1225. result = data.to_json(orient="split", index=False)
  1226. result = json.loads(result)
  1227. assert result == expected
  1228. @pytest.mark.parametrize(
  1229. "data",
  1230. [
  1231. (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])),
  1232. (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")),
  1233. (
  1234. DataFrame(
  1235. [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
  1236. )
  1237. ),
  1238. (Series([1, 2, 3], name="A")),
  1239. (Series([1, 2, 3], name="A").rename_axis("foo")),
  1240. (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])),
  1241. ],
  1242. )
  1243. def test_index_false_to_json_table(self, data):
  1244. # GH 17394
  1245. # Testing index=False in to_json with orient='table'
  1246. result = data.to_json(orient="table", index=False)
  1247. result = json.loads(result)
  1248. expected = {
  1249. "schema": pd.io.json.build_table_schema(data, index=False),
  1250. "data": DataFrame(data).to_dict(orient="records"),
  1251. }
  1252. assert result == expected
  1253. @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"])
  1254. def test_index_false_error_to_json(self, orient):
  1255. # GH 17394
  1256. # Testing error message from to_json with index=False
  1257. df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
  1258. msg = "'index=False' is only valid when 'orient' is 'split' or 'table'"
  1259. with pytest.raises(ValueError, match=msg):
  1260. df.to_json(orient=orient, index=False)
  1261. @pytest.mark.parametrize("orient", ["split", "table"])
  1262. @pytest.mark.parametrize("index", [True, False])
  1263. def test_index_false_from_json_to_json(self, orient, index):
  1264. # GH25170
  1265. # Test index=False in from_json to_json
  1266. expected = DataFrame({"a": [1, 2], "b": [3, 4]})
  1267. dfjson = expected.to_json(orient=orient, index=index)
  1268. result = read_json(dfjson, orient=orient)
  1269. tm.assert_frame_equal(result, expected)
  1270. def test_read_timezone_information(self):
  1271. # GH 25546
  1272. result = read_json(
  1273. '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index"
  1274. )
  1275. expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC"))
  1276. tm.assert_series_equal(result, expected)
  1277. @pytest.mark.parametrize(
  1278. "url",
  1279. [
  1280. "s3://example-fsspec/",
  1281. "gcs://another-fsspec/file.json",
  1282. "https://example-site.com/data",
  1283. "some-protocol://data.txt",
  1284. ],
  1285. )
  1286. def test_read_json_with_url_value(self, url):
  1287. # GH 36271
  1288. result = read_json(f'{{"url":{{"0":"{url}"}}}}')
  1289. expected = DataFrame({"url": [url]})
  1290. tm.assert_frame_equal(result, expected)
  1291. @pytest.mark.parametrize(
  1292. "compression",
  1293. ["", ".gz", ".bz2", ".tar"],
  1294. )
  1295. def test_read_json_with_very_long_file_path(self, compression):
  1296. # GH 46718
  1297. long_json_path = f'{"a" * 1000}.json{compression}'
  1298. with pytest.raises(
  1299. FileNotFoundError, match=f"File {long_json_path} does not exist"
  1300. ):
  1301. # path too long for Windows is handled in file_exists() but raises in
  1302. # _get_data_from_filepath()
  1303. read_json(long_json_path)
  1304. @pytest.mark.parametrize(
  1305. "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
  1306. )
  1307. def test_timedelta_as_label(self, date_format, key):
  1308. df = DataFrame([[1]], columns=[pd.Timedelta("1D")])
  1309. expected = f'{{"{key}":{{"0":1}}}}'
  1310. result = df.to_json(date_format=date_format)
  1311. assert result == expected
  1312. @pytest.mark.parametrize(
  1313. "orient,expected",
  1314. [
  1315. ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
  1316. ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
  1317. # TODO: the below have separate encoding procedures
  1318. pytest.param(
  1319. "split",
  1320. "",
  1321. marks=pytest.mark.xfail(
  1322. reason="Produces JSON but not in a consistent manner"
  1323. ),
  1324. ),
  1325. pytest.param(
  1326. "table",
  1327. "",
  1328. marks=pytest.mark.xfail(
  1329. reason="Produces JSON but not in a consistent manner"
  1330. ),
  1331. ),
  1332. ],
  1333. )
  1334. def test_tuple_labels(self, orient, expected):
  1335. # GH 20500
  1336. df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
  1337. result = df.to_json(orient=orient)
  1338. assert result == expected
  1339. @pytest.mark.parametrize("indent", [1, 2, 4])
  1340. def test_to_json_indent(self, indent):
  1341. # GH 12004
  1342. df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
  1343. result = df.to_json(indent=indent)
  1344. spaces = " " * indent
  1345. expected = f"""{{
  1346. {spaces}"a":{{
  1347. {spaces}{spaces}"0":"foo",
  1348. {spaces}{spaces}"1":"baz"
  1349. {spaces}}},
  1350. {spaces}"b":{{
  1351. {spaces}{spaces}"0":"bar",
  1352. {spaces}{spaces}"1":"qux"
  1353. {spaces}}}
  1354. }}"""
  1355. assert result == expected
  1356. @pytest.mark.parametrize(
  1357. "orient,expected",
  1358. [
  1359. (
  1360. "split",
  1361. """{
  1362. "columns":[
  1363. "a",
  1364. "b"
  1365. ],
  1366. "index":[
  1367. 0,
  1368. 1
  1369. ],
  1370. "data":[
  1371. [
  1372. "foo",
  1373. "bar"
  1374. ],
  1375. [
  1376. "baz",
  1377. "qux"
  1378. ]
  1379. ]
  1380. }""",
  1381. ),
  1382. (
  1383. "records",
  1384. """[
  1385. {
  1386. "a":"foo",
  1387. "b":"bar"
  1388. },
  1389. {
  1390. "a":"baz",
  1391. "b":"qux"
  1392. }
  1393. ]""",
  1394. ),
  1395. (
  1396. "index",
  1397. """{
  1398. "0":{
  1399. "a":"foo",
  1400. "b":"bar"
  1401. },
  1402. "1":{
  1403. "a":"baz",
  1404. "b":"qux"
  1405. }
  1406. }""",
  1407. ),
  1408. (
  1409. "columns",
  1410. """{
  1411. "a":{
  1412. "0":"foo",
  1413. "1":"baz"
  1414. },
  1415. "b":{
  1416. "0":"bar",
  1417. "1":"qux"
  1418. }
  1419. }""",
  1420. ),
  1421. (
  1422. "values",
  1423. """[
  1424. [
  1425. "foo",
  1426. "bar"
  1427. ],
  1428. [
  1429. "baz",
  1430. "qux"
  1431. ]
  1432. ]""",
  1433. ),
  1434. (
  1435. "table",
  1436. """{
  1437. "schema":{
  1438. "fields":[
  1439. {
  1440. "name":"index",
  1441. "type":"integer"
  1442. },
  1443. {
  1444. "name":"a",
  1445. "type":"string"
  1446. },
  1447. {
  1448. "name":"b",
  1449. "type":"string"
  1450. }
  1451. ],
  1452. "primaryKey":[
  1453. "index"
  1454. ],
  1455. "pandas_version":"1.4.0"
  1456. },
  1457. "data":[
  1458. {
  1459. "index":0,
  1460. "a":"foo",
  1461. "b":"bar"
  1462. },
  1463. {
  1464. "index":1,
  1465. "a":"baz",
  1466. "b":"qux"
  1467. }
  1468. ]
  1469. }""",
  1470. ),
  1471. ],
  1472. )
  1473. def test_json_indent_all_orients(self, orient, expected):
  1474. # GH 12004
  1475. df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
  1476. result = df.to_json(orient=orient, indent=4)
  1477. assert result == expected
  1478. def test_json_negative_indent_raises(self):
  1479. with pytest.raises(ValueError, match="must be a nonnegative integer"):
  1480. DataFrame().to_json(indent=-1)
  1481. def test_emca_262_nan_inf_support(self):
  1482. # GH 12213
  1483. data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
  1484. result = read_json(data)
  1485. expected = DataFrame(
  1486. ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
  1487. )
  1488. tm.assert_frame_equal(result, expected)
  1489. def test_frame_int_overflow(self):
  1490. # GH 30320
  1491. encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}])
  1492. expected = DataFrame({"col": ["31900441201190696999", "Text"]})
  1493. result = read_json(encoded_json)
  1494. tm.assert_frame_equal(result, expected)
  1495. @pytest.mark.parametrize(
  1496. "dataframe,expected",
  1497. [
  1498. (
  1499. DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}),
  1500. '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,'
  1501. '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}',
  1502. )
  1503. ],
  1504. )
  1505. def test_json_multiindex(self, dataframe, expected):
  1506. series = dataframe.stack()
  1507. result = series.to_json(orient="index")
  1508. assert result == expected
  1509. @pytest.mark.single_cpu
  1510. def test_to_s3(self, s3_resource, s3so):
  1511. # GH 28375
  1512. mock_bucket_name, target_file = "pandas-test", "test.json"
  1513. df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
  1514. df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
  1515. timeout = 5
  1516. while True:
  1517. if target_file in (
  1518. obj.key for obj in s3_resource.Bucket("pandas-test").objects.all()
  1519. ):
  1520. break
  1521. time.sleep(0.1)
  1522. timeout -= 0.1
  1523. assert timeout > 0, "Timed out waiting for file to appear on moto"
  1524. def test_json_pandas_nulls(self, nulls_fixture, request):
  1525. # GH 31615
  1526. if isinstance(nulls_fixture, Decimal):
  1527. mark = pytest.mark.xfail(reason="not implemented")
  1528. request.node.add_marker(mark)
  1529. result = DataFrame([[nulls_fixture]]).to_json()
  1530. assert result == '{"0":{"0":null}}'
  1531. def test_readjson_bool_series(self):
  1532. # GH31464
  1533. result = read_json("[true, true, false]", typ="series")
  1534. expected = Series([True, True, False])
  1535. tm.assert_series_equal(result, expected)
  1536. def test_to_json_multiindex_escape(self):
  1537. # GH 15273
  1538. df = DataFrame(
  1539. True,
  1540. index=pd.date_range("2017-01-20", "2017-01-23"),
  1541. columns=["foo", "bar"],
  1542. ).stack()
  1543. result = df.to_json()
  1544. expected = (
  1545. "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true,"
  1546. "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true,"
  1547. "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true,"
  1548. "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true,"
  1549. "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true,"
  1550. "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true,"
  1551. "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true,"
  1552. "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}"
  1553. )
  1554. assert result == expected
  1555. def test_to_json_series_of_objects(self):
  1556. class _TestObject:
  1557. def __init__(self, a, b, _c, d) -> None:
  1558. self.a = a
  1559. self.b = b
  1560. self._c = _c
  1561. self.d = d
  1562. def e(self):
  1563. return 5
  1564. # JSON keys should be all non-callable non-underscore attributes, see GH-42768
  1565. series = Series([_TestObject(a=1, b=2, _c=3, d=4)])
  1566. assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}}
  1567. @pytest.mark.parametrize(
  1568. "data,expected",
  1569. [
  1570. (
  1571. Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}),
  1572. '{"0":{"imag":8.0,"real":-6.0},'
  1573. '"1":{"imag":1.0,"real":0.0},'
  1574. '"2":{"imag":-5.0,"real":9.0}}',
  1575. ),
  1576. (
  1577. Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}),
  1578. '{"0":{"imag":0.66,"real":-9.39},'
  1579. '"1":{"imag":9.32,"real":3.95},'
  1580. '"2":{"imag":-0.17,"real":4.03}}',
  1581. ),
  1582. (
  1583. DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]),
  1584. '{"0":{"0":{"imag":3.0,"real":-2.0},'
  1585. '"1":{"imag":-3.0,"real":4.0}},'
  1586. '"1":{"0":{"imag":0.0,"real":-1.0},'
  1587. '"1":{"imag":-10.0,"real":0.0}}}',
  1588. ),
  1589. (
  1590. DataFrame(
  1591. [[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]]
  1592. ),
  1593. '{"0":{"0":{"imag":0.34,"real":-0.28},'
  1594. '"1":{"imag":-0.34,"real":0.41}},'
  1595. '"1":{"0":{"imag":-0.39,"real":-1.08},'
  1596. '"1":{"imag":-1.35,"real":-0.78}}}',
  1597. ),
  1598. ],
  1599. )
  1600. def test_complex_data_tojson(self, data, expected):
  1601. # GH41174
  1602. result = data.to_json()
  1603. assert result == expected
  1604. def test_json_uint64(self):
  1605. # GH21073
  1606. expected = (
  1607. '{"columns":["col1"],"index":[0,1],'
  1608. '"data":[[13342205958987758245],[12388075603347835679]]}'
  1609. )
  1610. df = DataFrame(data={"col1": [13342205958987758245, 12388075603347835679]})
  1611. result = df.to_json(orient="split")
  1612. assert result == expected
  1613. @pytest.mark.parametrize(
  1614. "orient", ["split", "records", "values", "index", "columns"]
  1615. )
  1616. def test_read_json_dtype_backend(self, string_storage, dtype_backend, orient):
  1617. # GH#50750
  1618. pa = pytest.importorskip("pyarrow")
  1619. df = DataFrame(
  1620. {
  1621. "a": Series([1, np.nan, 3], dtype="Int64"),
  1622. "b": Series([1, 2, 3], dtype="Int64"),
  1623. "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
  1624. "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
  1625. "e": [True, False, None],
  1626. "f": [True, False, True],
  1627. "g": ["a", "b", "c"],
  1628. "h": ["a", "b", None],
  1629. }
  1630. )
  1631. if string_storage == "python":
  1632. string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
  1633. string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))
  1634. else:
  1635. string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
  1636. string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
  1637. out = df.to_json(orient=orient)
  1638. with pd.option_context("mode.string_storage", string_storage):
  1639. result = read_json(out, dtype_backend=dtype_backend, orient=orient)
  1640. expected = DataFrame(
  1641. {
  1642. "a": Series([1, np.nan, 3], dtype="Int64"),
  1643. "b": Series([1, 2, 3], dtype="Int64"),
  1644. "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
  1645. "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
  1646. "e": Series([True, False, NA], dtype="boolean"),
  1647. "f": Series([True, False, True], dtype="boolean"),
  1648. "g": string_array,
  1649. "h": string_array_na,
  1650. }
  1651. )
  1652. if dtype_backend == "pyarrow":
  1653. from pandas.arrays import ArrowExtensionArray
  1654. expected = DataFrame(
  1655. {
  1656. col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
  1657. for col in expected.columns
  1658. }
  1659. )
  1660. if orient == "values":
  1661. expected.columns = list(range(0, 8))
  1662. tm.assert_frame_equal(result, expected)
  1663. @pytest.mark.parametrize("orient", ["split", "records", "index"])
  1664. def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
  1665. # GH#50750
  1666. pa = pytest.importorskip("pyarrow")
  1667. ser = Series([1, np.nan, 3], dtype="Int64")
  1668. out = ser.to_json(orient=orient)
  1669. with pd.option_context("mode.string_storage", string_storage):
  1670. result = read_json(
  1671. out, dtype_backend=dtype_backend, orient=orient, typ="series"
  1672. )
  1673. expected = Series([1, np.nan, 3], dtype="Int64")
  1674. if dtype_backend == "pyarrow":
  1675. from pandas.arrays import ArrowExtensionArray
  1676. expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))
  1677. tm.assert_series_equal(result, expected)
  1678. def test_invalid_dtype_backend(self):
  1679. msg = (
  1680. "dtype_backend numpy is invalid, only 'numpy_nullable' and "
  1681. "'pyarrow' are allowed."
  1682. )
  1683. with pytest.raises(ValueError, match=msg):
  1684. read_json("test", dtype_backend="numpy")
  1685. def test_invalid_engine():
  1686. # GH 48893
  1687. ser = Series(range(1))
  1688. out = ser.to_json()
  1689. with pytest.raises(ValueError, match="The engine type foo"):
  1690. read_json(out, engine="foo")
  1691. def test_pyarrow_engine_lines_false():
  1692. # GH 48893
  1693. ser = Series(range(1))
  1694. out = ser.to_json()
  1695. with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
  1696. read_json(out, engine="pyarrow", lines=False)