123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846 |
- """Tests for Table Schema integration."""
- from collections import OrderedDict
- import json
- import numpy as np
- import pytest
- from pandas.core.dtypes.dtypes import (
- CategoricalDtype,
- DatetimeTZDtype,
- PeriodDtype,
- )
- import pandas as pd
- from pandas import DataFrame
- import pandas._testing as tm
- from pandas.io.json._table_schema import (
- as_json_table_type,
- build_table_schema,
- convert_json_field_to_pandas_type,
- convert_pandas_type_to_json_field,
- set_default_names,
- )
- @pytest.fixture
- def df_schema():
- return DataFrame(
- {
- "A": [1, 2, 3, 4],
- "B": ["a", "b", "c", "c"],
- "C": pd.date_range("2016-01-01", freq="d", periods=4),
- "D": pd.timedelta_range("1H", periods=4, freq="T"),
- },
- index=pd.Index(range(4), name="idx"),
- )
- @pytest.fixture
- def df_table():
- return DataFrame(
- {
- "A": [1, 2, 3, 4],
- "B": ["a", "b", "c", "c"],
- "C": pd.date_range("2016-01-01", freq="d", periods=4),
- "D": pd.timedelta_range("1H", periods=4, freq="T"),
- "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
- "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
- "G": [1.0, 2.0, 3, 4.0],
- "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
- },
- index=pd.Index(range(4), name="idx"),
- )
- class TestBuildSchema:
- def test_build_table_schema(self, df_schema):
- result = build_table_schema(df_schema, version=False)
- expected = {
- "fields": [
- {"name": "idx", "type": "integer"},
- {"name": "A", "type": "integer"},
- {"name": "B", "type": "string"},
- {"name": "C", "type": "datetime"},
- {"name": "D", "type": "duration"},
- ],
- "primaryKey": ["idx"],
- }
- assert result == expected
- result = build_table_schema(df_schema)
- assert "pandas_version" in result
- def test_series(self):
- s = pd.Series([1, 2, 3], name="foo")
- result = build_table_schema(s, version=False)
- expected = {
- "fields": [
- {"name": "index", "type": "integer"},
- {"name": "foo", "type": "integer"},
- ],
- "primaryKey": ["index"],
- }
- assert result == expected
- result = build_table_schema(s)
- assert "pandas_version" in result
- def test_series_unnamed(self):
- result = build_table_schema(pd.Series([1, 2, 3]), version=False)
- expected = {
- "fields": [
- {"name": "index", "type": "integer"},
- {"name": "values", "type": "integer"},
- ],
- "primaryKey": ["index"],
- }
- assert result == expected
- def test_multiindex(self, df_schema):
- df = df_schema
- idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)])
- df.index = idx
- result = build_table_schema(df, version=False)
- expected = {
- "fields": [
- {"name": "level_0", "type": "string"},
- {"name": "level_1", "type": "integer"},
- {"name": "A", "type": "integer"},
- {"name": "B", "type": "string"},
- {"name": "C", "type": "datetime"},
- {"name": "D", "type": "duration"},
- ],
- "primaryKey": ["level_0", "level_1"],
- }
- assert result == expected
- df.index.names = ["idx0", None]
- expected["fields"][0]["name"] = "idx0"
- expected["primaryKey"] = ["idx0", "level_1"]
- result = build_table_schema(df, version=False)
- assert result == expected
- class TestTableSchemaType:
- @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64])
- def test_as_json_table_type_int_data(self, int_type):
- int_data = [1, 2, 3]
- assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer"
- @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64])
- def test_as_json_table_type_float_data(self, float_type):
- float_data = [1.0, 2.0, 3.0]
- assert (
- as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number"
- )
- @pytest.mark.parametrize("bool_type", [bool, np.bool_])
- def test_as_json_table_type_bool_data(self, bool_type):
- bool_data = [True, False]
- assert (
- as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean"
- )
- @pytest.mark.parametrize(
- "date_data",
- [
- pd.to_datetime(["2016"]),
- pd.to_datetime(["2016"], utc=True),
- pd.Series(pd.to_datetime(["2016"])),
- pd.Series(pd.to_datetime(["2016"], utc=True)),
- pd.period_range("2016", freq="A", periods=3),
- ],
- )
- def test_as_json_table_type_date_data(self, date_data):
- assert as_json_table_type(date_data.dtype) == "datetime"
- @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])])
- def test_as_json_table_type_string_data(self, str_data):
- assert as_json_table_type(str_data.dtype) == "string"
- @pytest.mark.parametrize(
- "cat_data",
- [
- pd.Categorical(["a"]),
- pd.Categorical([1]),
- pd.Series(pd.Categorical([1])),
- pd.CategoricalIndex([1]),
- pd.Categorical([1]),
- ],
- )
- def test_as_json_table_type_categorical_data(self, cat_data):
- assert as_json_table_type(cat_data.dtype) == "any"
- # ------
- # dtypes
- # ------
- @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64])
- def test_as_json_table_type_int_dtypes(self, int_dtype):
- assert as_json_table_type(int_dtype) == "integer"
- @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64])
- def test_as_json_table_type_float_dtypes(self, float_dtype):
- assert as_json_table_type(float_dtype) == "number"
- @pytest.mark.parametrize("bool_dtype", [bool, np.bool_])
- def test_as_json_table_type_bool_dtypes(self, bool_dtype):
- assert as_json_table_type(bool_dtype) == "boolean"
- @pytest.mark.parametrize(
- "date_dtype",
- [
- np.datetime64,
- np.dtype("<M8[ns]"),
- PeriodDtype("D"),
- DatetimeTZDtype("ns", "US/Central"),
- ],
- )
- def test_as_json_table_type_date_dtypes(self, date_dtype):
- # TODO: datedate.date? datetime.time?
- assert as_json_table_type(date_dtype) == "datetime"
- @pytest.mark.parametrize("td_dtype", [np.timedelta64, np.dtype("<m8[ns]")])
- def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
- assert as_json_table_type(td_dtype) == "duration"
- @pytest.mark.parametrize("str_dtype", [object]) # TODO(GH#14904) flesh out dtypes?
- def test_as_json_table_type_string_dtypes(self, str_dtype):
- assert as_json_table_type(str_dtype) == "string"
- def test_as_json_table_type_categorical_dtypes(self):
- assert as_json_table_type(pd.Categorical(["a"]).dtype) == "any"
- assert as_json_table_type(CategoricalDtype()) == "any"
- class TestTableOrient:
- def test_build_series(self):
- s = pd.Series([1, 2], name="a")
- s.index.name = "id"
- result = s.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- assert "pandas_version" in result["schema"]
- result["schema"].pop("pandas_version")
- fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]
- schema = {"fields": fields, "primaryKey": ["id"]}
- expected = OrderedDict(
- [
- ("schema", schema),
- (
- "data",
- [
- OrderedDict([("id", 0), ("a", 1)]),
- OrderedDict([("id", 1), ("a", 2)]),
- ],
- ),
- ]
- )
- assert result == expected
- def test_read_json_from_to_json_results(self):
- # GH32383
- df = DataFrame(
- {
- "_id": {"row_0": 0},
- "category": {"row_0": "Goods"},
- "recommender_id": {"row_0": 3},
- "recommender_name_jp": {"row_0": "浦田"},
- "recommender_name_en": {"row_0": "Urata"},
- "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
- "name_en": {"row_0": "Hakata Dolls Matsuo"},
- }
- )
- result1 = pd.read_json(df.to_json())
- result2 = DataFrame.from_dict(json.loads(df.to_json()))
- tm.assert_frame_equal(result1, df)
- tm.assert_frame_equal(result2, df)
- def test_to_json(self, df_table):
- df = df_table
- df.index.name = "idx"
- result = df.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- assert "pandas_version" in result["schema"]
- result["schema"].pop("pandas_version")
- fields = [
- {"name": "idx", "type": "integer"},
- {"name": "A", "type": "integer"},
- {"name": "B", "type": "string"},
- {"name": "C", "type": "datetime"},
- {"name": "D", "type": "duration"},
- {
- "constraints": {"enum": ["a", "b", "c"]},
- "name": "E",
- "ordered": False,
- "type": "any",
- },
- {
- "constraints": {"enum": ["a", "b", "c"]},
- "name": "F",
- "ordered": True,
- "type": "any",
- },
- {"name": "G", "type": "number"},
- {"name": "H", "type": "datetime", "tz": "US/Central"},
- ]
- schema = {"fields": fields, "primaryKey": ["idx"]}
- data = [
- OrderedDict(
- [
- ("idx", 0),
- ("A", 1),
- ("B", "a"),
- ("C", "2016-01-01T00:00:00.000"),
- ("D", "P0DT1H0M0S"),
- ("E", "a"),
- ("F", "a"),
- ("G", 1.0),
- ("H", "2016-01-01T06:00:00.000Z"),
- ]
- ),
- OrderedDict(
- [
- ("idx", 1),
- ("A", 2),
- ("B", "b"),
- ("C", "2016-01-02T00:00:00.000"),
- ("D", "P0DT1H1M0S"),
- ("E", "b"),
- ("F", "b"),
- ("G", 2.0),
- ("H", "2016-01-02T06:00:00.000Z"),
- ]
- ),
- OrderedDict(
- [
- ("idx", 2),
- ("A", 3),
- ("B", "c"),
- ("C", "2016-01-03T00:00:00.000"),
- ("D", "P0DT1H2M0S"),
- ("E", "c"),
- ("F", "c"),
- ("G", 3.0),
- ("H", "2016-01-03T06:00:00.000Z"),
- ]
- ),
- OrderedDict(
- [
- ("idx", 3),
- ("A", 4),
- ("B", "c"),
- ("C", "2016-01-04T00:00:00.000"),
- ("D", "P0DT1H3M0S"),
- ("E", "c"),
- ("F", "c"),
- ("G", 4.0),
- ("H", "2016-01-04T06:00:00.000Z"),
- ]
- ),
- ]
- expected = OrderedDict([("schema", schema), ("data", data)])
- assert result == expected
- def test_to_json_float_index(self):
- data = pd.Series(1, index=[1.0, 2.0])
- result = data.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- result["schema"].pop("pandas_version")
- expected = OrderedDict(
- [
- (
- "schema",
- {
- "fields": [
- {"name": "index", "type": "number"},
- {"name": "values", "type": "integer"},
- ],
- "primaryKey": ["index"],
- },
- ),
- (
- "data",
- [
- OrderedDict([("index", 1.0), ("values", 1)]),
- OrderedDict([("index", 2.0), ("values", 1)]),
- ],
- ),
- ]
- )
- assert result == expected
- def test_to_json_period_index(self):
- idx = pd.period_range("2016", freq="Q-JAN", periods=2)
- data = pd.Series(1, idx)
- result = data.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- result["schema"].pop("pandas_version")
- fields = [
- {"freq": "Q-JAN", "name": "index", "type": "datetime"},
- {"name": "values", "type": "integer"},
- ]
- schema = {"fields": fields, "primaryKey": ["index"]}
- data = [
- OrderedDict([("index", "2015-11-01T00:00:00.000"), ("values", 1)]),
- OrderedDict([("index", "2016-02-01T00:00:00.000"), ("values", 1)]),
- ]
- expected = OrderedDict([("schema", schema), ("data", data)])
- assert result == expected
- def test_to_json_categorical_index(self):
- data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
- result = data.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- result["schema"].pop("pandas_version")
- expected = OrderedDict(
- [
- (
- "schema",
- {
- "fields": [
- {
- "name": "index",
- "type": "any",
- "constraints": {"enum": ["a", "b"]},
- "ordered": False,
- },
- {"name": "values", "type": "integer"},
- ],
- "primaryKey": ["index"],
- },
- ),
- (
- "data",
- [
- OrderedDict([("index", "a"), ("values", 1)]),
- OrderedDict([("index", "b"), ("values", 1)]),
- ],
- ),
- ]
- )
- assert result == expected
- def test_date_format_raises(self, df_table):
- msg = (
- "Trying to write with `orient='table'` and `date_format='epoch'`. Table "
- "Schema requires dates to be formatted with `date_format='iso'`"
- )
- with pytest.raises(ValueError, match=msg):
- df_table.to_json(orient="table", date_format="epoch")
- # others work
- df_table.to_json(orient="table", date_format="iso")
- df_table.to_json(orient="table")
- def test_convert_pandas_type_to_json_field_int(self, index_or_series):
- kind = index_or_series
- data = [1, 2, 3]
- result = convert_pandas_type_to_json_field(kind(data, name="name"))
- expected = {"name": "name", "type": "integer"}
- assert result == expected
- def test_convert_pandas_type_to_json_field_float(self, index_or_series):
- kind = index_or_series
- data = [1.0, 2.0, 3.0]
- result = convert_pandas_type_to_json_field(kind(data, name="name"))
- expected = {"name": "name", "type": "number"}
- assert result == expected
- @pytest.mark.parametrize(
- "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
- )
- @pytest.mark.parametrize("wrapper", [None, pd.Series])
- def test_convert_pandas_type_to_json_field_datetime(
- self, dt_args, extra_exp, wrapper
- ):
- data = [1.0, 2.0, 3.0]
- data = pd.to_datetime(data, **dt_args)
- if wrapper is pd.Series:
- data = pd.Series(data, name="values")
- result = convert_pandas_type_to_json_field(data)
- expected = {"name": "values", "type": "datetime"}
- expected.update(extra_exp)
- assert result == expected
- def test_convert_pandas_type_to_json_period_range(self):
- arr = pd.period_range("2016", freq="A-DEC", periods=4)
- result = convert_pandas_type_to_json_field(arr)
- expected = {"name": "values", "type": "datetime", "freq": "A-DEC"}
- assert result == expected
- @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
- @pytest.mark.parametrize("ordered", [True, False])
- def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
- data = ["a", "b", "c"]
- if kind is pd.Categorical:
- arr = pd.Series(kind(data, ordered=ordered), name="cats")
- elif kind is pd.CategoricalIndex:
- arr = kind(data, ordered=ordered, name="cats")
- result = convert_pandas_type_to_json_field(arr)
- expected = {
- "name": "cats",
- "type": "any",
- "constraints": {"enum": data},
- "ordered": ordered,
- }
- assert result == expected
- @pytest.mark.parametrize(
- "inp,exp",
- [
- ({"type": "integer"}, "int64"),
- ({"type": "number"}, "float64"),
- ({"type": "boolean"}, "bool"),
- ({"type": "duration"}, "timedelta64"),
- ({"type": "datetime"}, "datetime64[ns]"),
- ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
- ({"type": "any"}, "object"),
- (
- {
- "type": "any",
- "constraints": {"enum": ["a", "b", "c"]},
- "ordered": False,
- },
- CategoricalDtype(categories=["a", "b", "c"], ordered=False),
- ),
- (
- {
- "type": "any",
- "constraints": {"enum": ["a", "b", "c"]},
- "ordered": True,
- },
- CategoricalDtype(categories=["a", "b", "c"], ordered=True),
- ),
- ({"type": "string"}, "object"),
- ],
- )
- def test_convert_json_field_to_pandas_type(self, inp, exp):
- field = {"name": "foo"}
- field.update(inp)
- assert convert_json_field_to_pandas_type(field) == exp
- @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
- def test_convert_json_field_to_pandas_type_raises(self, inp):
- field = {"type": inp}
- with pytest.raises(
- ValueError, match=f"Unsupported or invalid field type: {inp}"
- ):
- convert_json_field_to_pandas_type(field)
- def test_categorical(self):
- s = pd.Series(pd.Categorical(["a", "b", "a"]))
- s.index.name = "idx"
- result = s.to_json(orient="table", date_format="iso")
- result = json.loads(result, object_pairs_hook=OrderedDict)
- result["schema"].pop("pandas_version")
- fields = [
- {"name": "idx", "type": "integer"},
- {
- "constraints": {"enum": ["a", "b"]},
- "name": "values",
- "ordered": False,
- "type": "any",
- },
- ]
- expected = OrderedDict(
- [
- ("schema", {"fields": fields, "primaryKey": ["idx"]}),
- (
- "data",
- [
- OrderedDict([("idx", 0), ("values", "a")]),
- OrderedDict([("idx", 1), ("values", "b")]),
- OrderedDict([("idx", 2), ("values", "a")]),
- ],
- ),
- ]
- )
- assert result == expected
- @pytest.mark.parametrize(
- "idx,nm,prop",
- [
- (pd.Index([1]), "index", "name"),
- (pd.Index([1], name="myname"), "myname", "name"),
- (
- pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
- ["level_0", "level_1"],
- "names",
- ),
- (
- pd.MultiIndex.from_product(
- [("a", "b"), ("c", "d")], names=["n1", "n2"]
- ),
- ["n1", "n2"],
- "names",
- ),
- (
- pd.MultiIndex.from_product(
- [("a", "b"), ("c", "d")], names=["n1", None]
- ),
- ["n1", "level_1"],
- "names",
- ),
- ],
- )
- def test_set_names_unset(self, idx, nm, prop):
- data = pd.Series(1, idx)
- result = set_default_names(data)
- assert getattr(result.index, prop) == nm
- @pytest.mark.parametrize(
- "idx",
- [
- pd.Index([], name="index"),
- pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
- pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
- ],
- )
- def test_warns_non_roundtrippable_names(self, idx):
- # GH 19130
- df = DataFrame(index=idx)
- df.index.name = "index"
- with tm.assert_produces_warning():
- set_default_names(df)
- def test_timestamp_in_columns(self):
- df = DataFrame(
- [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
- )
- result = df.to_json(orient="table")
- js = json.loads(result)
- assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000"
- assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
- @pytest.mark.parametrize(
- "case",
- [
- pd.Series([1], index=pd.Index([1], name="a"), name="a"),
- DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
- DataFrame(
- {"A": [1]},
- index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
- ),
- ],
- )
- def test_overlapping_names(self, case):
- with pytest.raises(ValueError, match="Overlapping"):
- case.to_json(orient="table")
- def test_mi_falsey_name(self):
- # GH 16203
- df = DataFrame(
- np.random.randn(4, 4),
- index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]),
- )
- result = [x["name"] for x in build_table_schema(df)["fields"]]
- assert result == ["level_0", "level_1", 0, 1, 2, 3]
- class TestTableOrientReader:
- @pytest.mark.parametrize(
- "index_nm",
- [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
- )
- @pytest.mark.parametrize(
- "vals",
- [
- {"ints": [1, 2, 3, 4]},
- {"objects": ["a", "b", "c", "d"]},
- {"objects": ["1", "2", "3", "4"]},
- {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
- {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
- {
- "ordered_cats": pd.Series(
- pd.Categorical(["a", "b", "c", "c"], ordered=True)
- )
- },
- {"floats": [1.0, 2.0, 3.0, 4.0]},
- {"floats": [1.1, 2.2, 3.3, 4.4]},
- {"bools": [True, False, False, True]},
- {
- "timezones": pd.date_range(
- "2016-01-01", freq="d", periods=4, tz="US/Central"
- ) # added in # GH 35973
- },
- ],
- )
- def test_read_json_table_orient(self, index_nm, vals, recwarn):
- df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(df, result)
- @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
- @pytest.mark.parametrize(
- "vals",
- [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}],
- )
- def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
- df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
- out = df.to_json(orient="table")
- with pytest.raises(NotImplementedError, match="can not yet read "):
- pd.read_json(out, orient="table")
- @pytest.mark.parametrize(
- "index_nm",
- [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
- )
- @pytest.mark.parametrize(
- "vals",
- [
- {"ints": [1, 2, 3, 4]},
- {"objects": ["a", "b", "c", "d"]},
- {"objects": ["1", "2", "3", "4"]},
- {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
- {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
- {
- "ordered_cats": pd.Series(
- pd.Categorical(["a", "b", "c", "c"], ordered=True)
- )
- },
- {"floats": [1.0, 2.0, 3.0, 4.0]},
- {"floats": [1.1, 2.2, 3.3, 4.4]},
- {"bools": [True, False, False, True]},
- {
- "timezones": pd.date_range(
- "2016-01-01", freq="d", periods=4, tz="US/Central"
- ) # added in # GH 35973
- },
- ],
- )
- def test_read_json_table_period_orient(self, index_nm, vals, recwarn):
- df = DataFrame(
- vals,
- index=pd.Index(
- (pd.Period(f"2022Q{q}") for q in range(1, 5)), name=index_nm
- ),
- )
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(df, result)
- @pytest.mark.parametrize(
- "idx",
- [
- pd.Index(range(4)),
- pd.date_range(
- "2020-08-30",
- freq="d",
- periods=4,
- )._with_freq(None),
- pd.date_range(
- "2020-08-30", freq="d", periods=4, tz="US/Central"
- )._with_freq(None),
- pd.MultiIndex.from_product(
- [
- pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
- ["x", "y"],
- ],
- ),
- ],
- )
- @pytest.mark.parametrize(
- "vals",
- [
- {"floats": [1.1, 2.2, 3.3, 4.4]},
- {"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
- {
- "timezones": pd.date_range(
- "2020-08-30", freq="d", periods=4, tz="Europe/London"
- )
- },
- ],
- )
- def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
- # GH 35973
- df = DataFrame(vals, index=idx)
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(df, result)
- def test_comprehensive(self):
- df = DataFrame(
- {
- "A": [1, 2, 3, 4],
- "B": ["a", "b", "c", "c"],
- "C": pd.date_range("2016-01-01", freq="d", periods=4),
- # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
- "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
- "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
- "G": [1.1, 2.2, 3.3, 4.4],
- "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
- "I": [True, False, False, True],
- },
- index=pd.Index(range(4), name="idx"),
- )
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(df, result)
- @pytest.mark.parametrize(
- "index_names",
- [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"], ["index", "foo"]],
- )
- def test_multiindex(self, index_names):
- # GH 18912
- df = DataFrame(
- [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]],
- index=[["A", "B"], ["Null", "Eins"]],
- columns=["Aussprache", "Griechisch", "Args"],
- )
- df.index.names = index_names
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(df, result)
- def test_empty_frame_roundtrip(self):
- # GH 21287
- df = DataFrame(columns=["a", "b", "c"])
- expected = df.copy()
- out = df.to_json(orient="table")
- result = pd.read_json(out, orient="table")
- tm.assert_frame_equal(expected, result)
- def test_read_json_orient_table_old_schema_version(self):
- df_json = """
- {
- "schema":{
- "fields":[
- {"name":"index","type":"integer"},
- {"name":"a","type":"string"}
- ],
- "primaryKey":["index"],
- "pandas_version":"0.20.0"
- },
- "data":[
- {"index":0,"a":1},
- {"index":1,"a":2.0},
- {"index":2,"a":"s"}
- ]
- }
- """
- expected = DataFrame({"a": [1, 2.0, "s"]})
- result = pd.read_json(df_json, orient="table")
- tm.assert_frame_equal(expected, result)
|