test_json_table_schema.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. """Tests for Table Schema integration."""
  2. from collections import OrderedDict
  3. import json
  4. import numpy as np
  5. import pytest
  6. from pandas.core.dtypes.dtypes import (
  7. CategoricalDtype,
  8. DatetimeTZDtype,
  9. PeriodDtype,
  10. )
  11. import pandas as pd
  12. from pandas import DataFrame
  13. import pandas._testing as tm
  14. from pandas.io.json._table_schema import (
  15. as_json_table_type,
  16. build_table_schema,
  17. convert_json_field_to_pandas_type,
  18. convert_pandas_type_to_json_field,
  19. set_default_names,
  20. )
  21. @pytest.fixture
  22. def df_schema():
  23. return DataFrame(
  24. {
  25. "A": [1, 2, 3, 4],
  26. "B": ["a", "b", "c", "c"],
  27. "C": pd.date_range("2016-01-01", freq="d", periods=4),
  28. "D": pd.timedelta_range("1H", periods=4, freq="T"),
  29. },
  30. index=pd.Index(range(4), name="idx"),
  31. )
  32. @pytest.fixture
  33. def df_table():
  34. return DataFrame(
  35. {
  36. "A": [1, 2, 3, 4],
  37. "B": ["a", "b", "c", "c"],
  38. "C": pd.date_range("2016-01-01", freq="d", periods=4),
  39. "D": pd.timedelta_range("1H", periods=4, freq="T"),
  40. "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
  41. "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
  42. "G": [1.0, 2.0, 3, 4.0],
  43. "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
  44. },
  45. index=pd.Index(range(4), name="idx"),
  46. )
  47. class TestBuildSchema:
  48. def test_build_table_schema(self, df_schema):
  49. result = build_table_schema(df_schema, version=False)
  50. expected = {
  51. "fields": [
  52. {"name": "idx", "type": "integer"},
  53. {"name": "A", "type": "integer"},
  54. {"name": "B", "type": "string"},
  55. {"name": "C", "type": "datetime"},
  56. {"name": "D", "type": "duration"},
  57. ],
  58. "primaryKey": ["idx"],
  59. }
  60. assert result == expected
  61. result = build_table_schema(df_schema)
  62. assert "pandas_version" in result
  63. def test_series(self):
  64. s = pd.Series([1, 2, 3], name="foo")
  65. result = build_table_schema(s, version=False)
  66. expected = {
  67. "fields": [
  68. {"name": "index", "type": "integer"},
  69. {"name": "foo", "type": "integer"},
  70. ],
  71. "primaryKey": ["index"],
  72. }
  73. assert result == expected
  74. result = build_table_schema(s)
  75. assert "pandas_version" in result
  76. def test_series_unnamed(self):
  77. result = build_table_schema(pd.Series([1, 2, 3]), version=False)
  78. expected = {
  79. "fields": [
  80. {"name": "index", "type": "integer"},
  81. {"name": "values", "type": "integer"},
  82. ],
  83. "primaryKey": ["index"],
  84. }
  85. assert result == expected
  86. def test_multiindex(self, df_schema):
  87. df = df_schema
  88. idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)])
  89. df.index = idx
  90. result = build_table_schema(df, version=False)
  91. expected = {
  92. "fields": [
  93. {"name": "level_0", "type": "string"},
  94. {"name": "level_1", "type": "integer"},
  95. {"name": "A", "type": "integer"},
  96. {"name": "B", "type": "string"},
  97. {"name": "C", "type": "datetime"},
  98. {"name": "D", "type": "duration"},
  99. ],
  100. "primaryKey": ["level_0", "level_1"],
  101. }
  102. assert result == expected
  103. df.index.names = ["idx0", None]
  104. expected["fields"][0]["name"] = "idx0"
  105. expected["primaryKey"] = ["idx0", "level_1"]
  106. result = build_table_schema(df, version=False)
  107. assert result == expected
  108. class TestTableSchemaType:
  109. @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64])
  110. def test_as_json_table_type_int_data(self, int_type):
  111. int_data = [1, 2, 3]
  112. assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer"
  113. @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64])
  114. def test_as_json_table_type_float_data(self, float_type):
  115. float_data = [1.0, 2.0, 3.0]
  116. assert (
  117. as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number"
  118. )
  119. @pytest.mark.parametrize("bool_type", [bool, np.bool_])
  120. def test_as_json_table_type_bool_data(self, bool_type):
  121. bool_data = [True, False]
  122. assert (
  123. as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean"
  124. )
  125. @pytest.mark.parametrize(
  126. "date_data",
  127. [
  128. pd.to_datetime(["2016"]),
  129. pd.to_datetime(["2016"], utc=True),
  130. pd.Series(pd.to_datetime(["2016"])),
  131. pd.Series(pd.to_datetime(["2016"], utc=True)),
  132. pd.period_range("2016", freq="A", periods=3),
  133. ],
  134. )
  135. def test_as_json_table_type_date_data(self, date_data):
  136. assert as_json_table_type(date_data.dtype) == "datetime"
  137. @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])])
  138. def test_as_json_table_type_string_data(self, str_data):
  139. assert as_json_table_type(str_data.dtype) == "string"
  140. @pytest.mark.parametrize(
  141. "cat_data",
  142. [
  143. pd.Categorical(["a"]),
  144. pd.Categorical([1]),
  145. pd.Series(pd.Categorical([1])),
  146. pd.CategoricalIndex([1]),
  147. pd.Categorical([1]),
  148. ],
  149. )
  150. def test_as_json_table_type_categorical_data(self, cat_data):
  151. assert as_json_table_type(cat_data.dtype) == "any"
  152. # ------
  153. # dtypes
  154. # ------
  155. @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64])
  156. def test_as_json_table_type_int_dtypes(self, int_dtype):
  157. assert as_json_table_type(int_dtype) == "integer"
  158. @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64])
  159. def test_as_json_table_type_float_dtypes(self, float_dtype):
  160. assert as_json_table_type(float_dtype) == "number"
  161. @pytest.mark.parametrize("bool_dtype", [bool, np.bool_])
  162. def test_as_json_table_type_bool_dtypes(self, bool_dtype):
  163. assert as_json_table_type(bool_dtype) == "boolean"
  164. @pytest.mark.parametrize(
  165. "date_dtype",
  166. [
  167. np.datetime64,
  168. np.dtype("<M8[ns]"),
  169. PeriodDtype("D"),
  170. DatetimeTZDtype("ns", "US/Central"),
  171. ],
  172. )
  173. def test_as_json_table_type_date_dtypes(self, date_dtype):
  174. # TODO: datedate.date? datetime.time?
  175. assert as_json_table_type(date_dtype) == "datetime"
  176. @pytest.mark.parametrize("td_dtype", [np.timedelta64, np.dtype("<m8[ns]")])
  177. def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
  178. assert as_json_table_type(td_dtype) == "duration"
  179. @pytest.mark.parametrize("str_dtype", [object]) # TODO(GH#14904) flesh out dtypes?
  180. def test_as_json_table_type_string_dtypes(self, str_dtype):
  181. assert as_json_table_type(str_dtype) == "string"
  182. def test_as_json_table_type_categorical_dtypes(self):
  183. assert as_json_table_type(pd.Categorical(["a"]).dtype) == "any"
  184. assert as_json_table_type(CategoricalDtype()) == "any"
  185. class TestTableOrient:
  186. def test_build_series(self):
  187. s = pd.Series([1, 2], name="a")
  188. s.index.name = "id"
  189. result = s.to_json(orient="table", date_format="iso")
  190. result = json.loads(result, object_pairs_hook=OrderedDict)
  191. assert "pandas_version" in result["schema"]
  192. result["schema"].pop("pandas_version")
  193. fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]
  194. schema = {"fields": fields, "primaryKey": ["id"]}
  195. expected = OrderedDict(
  196. [
  197. ("schema", schema),
  198. (
  199. "data",
  200. [
  201. OrderedDict([("id", 0), ("a", 1)]),
  202. OrderedDict([("id", 1), ("a", 2)]),
  203. ],
  204. ),
  205. ]
  206. )
  207. assert result == expected
  208. def test_read_json_from_to_json_results(self):
  209. # GH32383
  210. df = DataFrame(
  211. {
  212. "_id": {"row_0": 0},
  213. "category": {"row_0": "Goods"},
  214. "recommender_id": {"row_0": 3},
  215. "recommender_name_jp": {"row_0": "浦田"},
  216. "recommender_name_en": {"row_0": "Urata"},
  217. "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
  218. "name_en": {"row_0": "Hakata Dolls Matsuo"},
  219. }
  220. )
  221. result1 = pd.read_json(df.to_json())
  222. result2 = DataFrame.from_dict(json.loads(df.to_json()))
  223. tm.assert_frame_equal(result1, df)
  224. tm.assert_frame_equal(result2, df)
  225. def test_to_json(self, df_table):
  226. df = df_table
  227. df.index.name = "idx"
  228. result = df.to_json(orient="table", date_format="iso")
  229. result = json.loads(result, object_pairs_hook=OrderedDict)
  230. assert "pandas_version" in result["schema"]
  231. result["schema"].pop("pandas_version")
  232. fields = [
  233. {"name": "idx", "type": "integer"},
  234. {"name": "A", "type": "integer"},
  235. {"name": "B", "type": "string"},
  236. {"name": "C", "type": "datetime"},
  237. {"name": "D", "type": "duration"},
  238. {
  239. "constraints": {"enum": ["a", "b", "c"]},
  240. "name": "E",
  241. "ordered": False,
  242. "type": "any",
  243. },
  244. {
  245. "constraints": {"enum": ["a", "b", "c"]},
  246. "name": "F",
  247. "ordered": True,
  248. "type": "any",
  249. },
  250. {"name": "G", "type": "number"},
  251. {"name": "H", "type": "datetime", "tz": "US/Central"},
  252. ]
  253. schema = {"fields": fields, "primaryKey": ["idx"]}
  254. data = [
  255. OrderedDict(
  256. [
  257. ("idx", 0),
  258. ("A", 1),
  259. ("B", "a"),
  260. ("C", "2016-01-01T00:00:00.000"),
  261. ("D", "P0DT1H0M0S"),
  262. ("E", "a"),
  263. ("F", "a"),
  264. ("G", 1.0),
  265. ("H", "2016-01-01T06:00:00.000Z"),
  266. ]
  267. ),
  268. OrderedDict(
  269. [
  270. ("idx", 1),
  271. ("A", 2),
  272. ("B", "b"),
  273. ("C", "2016-01-02T00:00:00.000"),
  274. ("D", "P0DT1H1M0S"),
  275. ("E", "b"),
  276. ("F", "b"),
  277. ("G", 2.0),
  278. ("H", "2016-01-02T06:00:00.000Z"),
  279. ]
  280. ),
  281. OrderedDict(
  282. [
  283. ("idx", 2),
  284. ("A", 3),
  285. ("B", "c"),
  286. ("C", "2016-01-03T00:00:00.000"),
  287. ("D", "P0DT1H2M0S"),
  288. ("E", "c"),
  289. ("F", "c"),
  290. ("G", 3.0),
  291. ("H", "2016-01-03T06:00:00.000Z"),
  292. ]
  293. ),
  294. OrderedDict(
  295. [
  296. ("idx", 3),
  297. ("A", 4),
  298. ("B", "c"),
  299. ("C", "2016-01-04T00:00:00.000"),
  300. ("D", "P0DT1H3M0S"),
  301. ("E", "c"),
  302. ("F", "c"),
  303. ("G", 4.0),
  304. ("H", "2016-01-04T06:00:00.000Z"),
  305. ]
  306. ),
  307. ]
  308. expected = OrderedDict([("schema", schema), ("data", data)])
  309. assert result == expected
  310. def test_to_json_float_index(self):
  311. data = pd.Series(1, index=[1.0, 2.0])
  312. result = data.to_json(orient="table", date_format="iso")
  313. result = json.loads(result, object_pairs_hook=OrderedDict)
  314. result["schema"].pop("pandas_version")
  315. expected = OrderedDict(
  316. [
  317. (
  318. "schema",
  319. {
  320. "fields": [
  321. {"name": "index", "type": "number"},
  322. {"name": "values", "type": "integer"},
  323. ],
  324. "primaryKey": ["index"],
  325. },
  326. ),
  327. (
  328. "data",
  329. [
  330. OrderedDict([("index", 1.0), ("values", 1)]),
  331. OrderedDict([("index", 2.0), ("values", 1)]),
  332. ],
  333. ),
  334. ]
  335. )
  336. assert result == expected
  337. def test_to_json_period_index(self):
  338. idx = pd.period_range("2016", freq="Q-JAN", periods=2)
  339. data = pd.Series(1, idx)
  340. result = data.to_json(orient="table", date_format="iso")
  341. result = json.loads(result, object_pairs_hook=OrderedDict)
  342. result["schema"].pop("pandas_version")
  343. fields = [
  344. {"freq": "Q-JAN", "name": "index", "type": "datetime"},
  345. {"name": "values", "type": "integer"},
  346. ]
  347. schema = {"fields": fields, "primaryKey": ["index"]}
  348. data = [
  349. OrderedDict([("index", "2015-11-01T00:00:00.000"), ("values", 1)]),
  350. OrderedDict([("index", "2016-02-01T00:00:00.000"), ("values", 1)]),
  351. ]
  352. expected = OrderedDict([("schema", schema), ("data", data)])
  353. assert result == expected
  354. def test_to_json_categorical_index(self):
  355. data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
  356. result = data.to_json(orient="table", date_format="iso")
  357. result = json.loads(result, object_pairs_hook=OrderedDict)
  358. result["schema"].pop("pandas_version")
  359. expected = OrderedDict(
  360. [
  361. (
  362. "schema",
  363. {
  364. "fields": [
  365. {
  366. "name": "index",
  367. "type": "any",
  368. "constraints": {"enum": ["a", "b"]},
  369. "ordered": False,
  370. },
  371. {"name": "values", "type": "integer"},
  372. ],
  373. "primaryKey": ["index"],
  374. },
  375. ),
  376. (
  377. "data",
  378. [
  379. OrderedDict([("index", "a"), ("values", 1)]),
  380. OrderedDict([("index", "b"), ("values", 1)]),
  381. ],
  382. ),
  383. ]
  384. )
  385. assert result == expected
  386. def test_date_format_raises(self, df_table):
  387. msg = (
  388. "Trying to write with `orient='table'` and `date_format='epoch'`. Table "
  389. "Schema requires dates to be formatted with `date_format='iso'`"
  390. )
  391. with pytest.raises(ValueError, match=msg):
  392. df_table.to_json(orient="table", date_format="epoch")
  393. # others work
  394. df_table.to_json(orient="table", date_format="iso")
  395. df_table.to_json(orient="table")
  396. def test_convert_pandas_type_to_json_field_int(self, index_or_series):
  397. kind = index_or_series
  398. data = [1, 2, 3]
  399. result = convert_pandas_type_to_json_field(kind(data, name="name"))
  400. expected = {"name": "name", "type": "integer"}
  401. assert result == expected
  402. def test_convert_pandas_type_to_json_field_float(self, index_or_series):
  403. kind = index_or_series
  404. data = [1.0, 2.0, 3.0]
  405. result = convert_pandas_type_to_json_field(kind(data, name="name"))
  406. expected = {"name": "name", "type": "number"}
  407. assert result == expected
  408. @pytest.mark.parametrize(
  409. "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
  410. )
  411. @pytest.mark.parametrize("wrapper", [None, pd.Series])
  412. def test_convert_pandas_type_to_json_field_datetime(
  413. self, dt_args, extra_exp, wrapper
  414. ):
  415. data = [1.0, 2.0, 3.0]
  416. data = pd.to_datetime(data, **dt_args)
  417. if wrapper is pd.Series:
  418. data = pd.Series(data, name="values")
  419. result = convert_pandas_type_to_json_field(data)
  420. expected = {"name": "values", "type": "datetime"}
  421. expected.update(extra_exp)
  422. assert result == expected
  423. def test_convert_pandas_type_to_json_period_range(self):
  424. arr = pd.period_range("2016", freq="A-DEC", periods=4)
  425. result = convert_pandas_type_to_json_field(arr)
  426. expected = {"name": "values", "type": "datetime", "freq": "A-DEC"}
  427. assert result == expected
  428. @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
  429. @pytest.mark.parametrize("ordered", [True, False])
  430. def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
  431. data = ["a", "b", "c"]
  432. if kind is pd.Categorical:
  433. arr = pd.Series(kind(data, ordered=ordered), name="cats")
  434. elif kind is pd.CategoricalIndex:
  435. arr = kind(data, ordered=ordered, name="cats")
  436. result = convert_pandas_type_to_json_field(arr)
  437. expected = {
  438. "name": "cats",
  439. "type": "any",
  440. "constraints": {"enum": data},
  441. "ordered": ordered,
  442. }
  443. assert result == expected
  444. @pytest.mark.parametrize(
  445. "inp,exp",
  446. [
  447. ({"type": "integer"}, "int64"),
  448. ({"type": "number"}, "float64"),
  449. ({"type": "boolean"}, "bool"),
  450. ({"type": "duration"}, "timedelta64"),
  451. ({"type": "datetime"}, "datetime64[ns]"),
  452. ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
  453. ({"type": "any"}, "object"),
  454. (
  455. {
  456. "type": "any",
  457. "constraints": {"enum": ["a", "b", "c"]},
  458. "ordered": False,
  459. },
  460. CategoricalDtype(categories=["a", "b", "c"], ordered=False),
  461. ),
  462. (
  463. {
  464. "type": "any",
  465. "constraints": {"enum": ["a", "b", "c"]},
  466. "ordered": True,
  467. },
  468. CategoricalDtype(categories=["a", "b", "c"], ordered=True),
  469. ),
  470. ({"type": "string"}, "object"),
  471. ],
  472. )
  473. def test_convert_json_field_to_pandas_type(self, inp, exp):
  474. field = {"name": "foo"}
  475. field.update(inp)
  476. assert convert_json_field_to_pandas_type(field) == exp
  477. @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
  478. def test_convert_json_field_to_pandas_type_raises(self, inp):
  479. field = {"type": inp}
  480. with pytest.raises(
  481. ValueError, match=f"Unsupported or invalid field type: {inp}"
  482. ):
  483. convert_json_field_to_pandas_type(field)
  484. def test_categorical(self):
  485. s = pd.Series(pd.Categorical(["a", "b", "a"]))
  486. s.index.name = "idx"
  487. result = s.to_json(orient="table", date_format="iso")
  488. result = json.loads(result, object_pairs_hook=OrderedDict)
  489. result["schema"].pop("pandas_version")
  490. fields = [
  491. {"name": "idx", "type": "integer"},
  492. {
  493. "constraints": {"enum": ["a", "b"]},
  494. "name": "values",
  495. "ordered": False,
  496. "type": "any",
  497. },
  498. ]
  499. expected = OrderedDict(
  500. [
  501. ("schema", {"fields": fields, "primaryKey": ["idx"]}),
  502. (
  503. "data",
  504. [
  505. OrderedDict([("idx", 0), ("values", "a")]),
  506. OrderedDict([("idx", 1), ("values", "b")]),
  507. OrderedDict([("idx", 2), ("values", "a")]),
  508. ],
  509. ),
  510. ]
  511. )
  512. assert result == expected
  513. @pytest.mark.parametrize(
  514. "idx,nm,prop",
  515. [
  516. (pd.Index([1]), "index", "name"),
  517. (pd.Index([1], name="myname"), "myname", "name"),
  518. (
  519. pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
  520. ["level_0", "level_1"],
  521. "names",
  522. ),
  523. (
  524. pd.MultiIndex.from_product(
  525. [("a", "b"), ("c", "d")], names=["n1", "n2"]
  526. ),
  527. ["n1", "n2"],
  528. "names",
  529. ),
  530. (
  531. pd.MultiIndex.from_product(
  532. [("a", "b"), ("c", "d")], names=["n1", None]
  533. ),
  534. ["n1", "level_1"],
  535. "names",
  536. ),
  537. ],
  538. )
  539. def test_set_names_unset(self, idx, nm, prop):
  540. data = pd.Series(1, idx)
  541. result = set_default_names(data)
  542. assert getattr(result.index, prop) == nm
  543. @pytest.mark.parametrize(
  544. "idx",
  545. [
  546. pd.Index([], name="index"),
  547. pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
  548. pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
  549. ],
  550. )
  551. def test_warns_non_roundtrippable_names(self, idx):
  552. # GH 19130
  553. df = DataFrame(index=idx)
  554. df.index.name = "index"
  555. with tm.assert_produces_warning():
  556. set_default_names(df)
  557. def test_timestamp_in_columns(self):
  558. df = DataFrame(
  559. [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
  560. )
  561. result = df.to_json(orient="table")
  562. js = json.loads(result)
  563. assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000"
  564. assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
  565. @pytest.mark.parametrize(
  566. "case",
  567. [
  568. pd.Series([1], index=pd.Index([1], name="a"), name="a"),
  569. DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
  570. DataFrame(
  571. {"A": [1]},
  572. index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
  573. ),
  574. ],
  575. )
  576. def test_overlapping_names(self, case):
  577. with pytest.raises(ValueError, match="Overlapping"):
  578. case.to_json(orient="table")
  579. def test_mi_falsey_name(self):
  580. # GH 16203
  581. df = DataFrame(
  582. np.random.randn(4, 4),
  583. index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]),
  584. )
  585. result = [x["name"] for x in build_table_schema(df)["fields"]]
  586. assert result == ["level_0", "level_1", 0, 1, 2, 3]
  587. class TestTableOrientReader:
  588. @pytest.mark.parametrize(
  589. "index_nm",
  590. [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
  591. )
  592. @pytest.mark.parametrize(
  593. "vals",
  594. [
  595. {"ints": [1, 2, 3, 4]},
  596. {"objects": ["a", "b", "c", "d"]},
  597. {"objects": ["1", "2", "3", "4"]},
  598. {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
  599. {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
  600. {
  601. "ordered_cats": pd.Series(
  602. pd.Categorical(["a", "b", "c", "c"], ordered=True)
  603. )
  604. },
  605. {"floats": [1.0, 2.0, 3.0, 4.0]},
  606. {"floats": [1.1, 2.2, 3.3, 4.4]},
  607. {"bools": [True, False, False, True]},
  608. {
  609. "timezones": pd.date_range(
  610. "2016-01-01", freq="d", periods=4, tz="US/Central"
  611. ) # added in # GH 35973
  612. },
  613. ],
  614. )
  615. def test_read_json_table_orient(self, index_nm, vals, recwarn):
  616. df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
  617. out = df.to_json(orient="table")
  618. result = pd.read_json(out, orient="table")
  619. tm.assert_frame_equal(df, result)
  620. @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
  621. @pytest.mark.parametrize(
  622. "vals",
  623. [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}],
  624. )
  625. def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
  626. df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
  627. out = df.to_json(orient="table")
  628. with pytest.raises(NotImplementedError, match="can not yet read "):
  629. pd.read_json(out, orient="table")
  630. @pytest.mark.parametrize(
  631. "index_nm",
  632. [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
  633. )
  634. @pytest.mark.parametrize(
  635. "vals",
  636. [
  637. {"ints": [1, 2, 3, 4]},
  638. {"objects": ["a", "b", "c", "d"]},
  639. {"objects": ["1", "2", "3", "4"]},
  640. {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
  641. {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
  642. {
  643. "ordered_cats": pd.Series(
  644. pd.Categorical(["a", "b", "c", "c"], ordered=True)
  645. )
  646. },
  647. {"floats": [1.0, 2.0, 3.0, 4.0]},
  648. {"floats": [1.1, 2.2, 3.3, 4.4]},
  649. {"bools": [True, False, False, True]},
  650. {
  651. "timezones": pd.date_range(
  652. "2016-01-01", freq="d", periods=4, tz="US/Central"
  653. ) # added in # GH 35973
  654. },
  655. ],
  656. )
  657. def test_read_json_table_period_orient(self, index_nm, vals, recwarn):
  658. df = DataFrame(
  659. vals,
  660. index=pd.Index(
  661. (pd.Period(f"2022Q{q}") for q in range(1, 5)), name=index_nm
  662. ),
  663. )
  664. out = df.to_json(orient="table")
  665. result = pd.read_json(out, orient="table")
  666. tm.assert_frame_equal(df, result)
  667. @pytest.mark.parametrize(
  668. "idx",
  669. [
  670. pd.Index(range(4)),
  671. pd.date_range(
  672. "2020-08-30",
  673. freq="d",
  674. periods=4,
  675. )._with_freq(None),
  676. pd.date_range(
  677. "2020-08-30", freq="d", periods=4, tz="US/Central"
  678. )._with_freq(None),
  679. pd.MultiIndex.from_product(
  680. [
  681. pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
  682. ["x", "y"],
  683. ],
  684. ),
  685. ],
  686. )
  687. @pytest.mark.parametrize(
  688. "vals",
  689. [
  690. {"floats": [1.1, 2.2, 3.3, 4.4]},
  691. {"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
  692. {
  693. "timezones": pd.date_range(
  694. "2020-08-30", freq="d", periods=4, tz="Europe/London"
  695. )
  696. },
  697. ],
  698. )
  699. def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
  700. # GH 35973
  701. df = DataFrame(vals, index=idx)
  702. out = df.to_json(orient="table")
  703. result = pd.read_json(out, orient="table")
  704. tm.assert_frame_equal(df, result)
  705. def test_comprehensive(self):
  706. df = DataFrame(
  707. {
  708. "A": [1, 2, 3, 4],
  709. "B": ["a", "b", "c", "c"],
  710. "C": pd.date_range("2016-01-01", freq="d", periods=4),
  711. # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
  712. "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
  713. "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
  714. "G": [1.1, 2.2, 3.3, 4.4],
  715. "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
  716. "I": [True, False, False, True],
  717. },
  718. index=pd.Index(range(4), name="idx"),
  719. )
  720. out = df.to_json(orient="table")
  721. result = pd.read_json(out, orient="table")
  722. tm.assert_frame_equal(df, result)
  723. @pytest.mark.parametrize(
  724. "index_names",
  725. [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"], ["index", "foo"]],
  726. )
  727. def test_multiindex(self, index_names):
  728. # GH 18912
  729. df = DataFrame(
  730. [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]],
  731. index=[["A", "B"], ["Null", "Eins"]],
  732. columns=["Aussprache", "Griechisch", "Args"],
  733. )
  734. df.index.names = index_names
  735. out = df.to_json(orient="table")
  736. result = pd.read_json(out, orient="table")
  737. tm.assert_frame_equal(df, result)
  738. def test_empty_frame_roundtrip(self):
  739. # GH 21287
  740. df = DataFrame(columns=["a", "b", "c"])
  741. expected = df.copy()
  742. out = df.to_json(orient="table")
  743. result = pd.read_json(out, orient="table")
  744. tm.assert_frame_equal(expected, result)
  745. def test_read_json_orient_table_old_schema_version(self):
  746. df_json = """
  747. {
  748. "schema":{
  749. "fields":[
  750. {"name":"index","type":"integer"},
  751. {"name":"a","type":"string"}
  752. ],
  753. "primaryKey":["index"],
  754. "pandas_version":"0.20.0"
  755. },
  756. "data":[
  757. {"index":0,"a":1},
  758. {"index":1,"a":2.0},
  759. {"index":2,"a":"s"}
  760. ]
  761. }
  762. """
  763. expected = DataFrame({"a": [1, 2.0, "s"]})
  764. result = pd.read_json(df_json, orient="table")
  765. tm.assert_frame_equal(expected, result)