123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- """
- Tests that work on both the Python and C engines but do not have a
- specific classification into the other test modules.
- """
- from datetime import datetime
- from io import StringIO
- import os
- import pytest
- from pandas import (
- DataFrame,
- Index,
- MultiIndex,
- )
- import pandas._testing as tm
- xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
- # GH#43650: Some expected failures with the pyarrow engine can occasionally
- # cause a deadlock instead, so we skip these instead of xfailing
- skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
- @pytest.mark.parametrize(
- "data,kwargs,expected",
- [
- (
- """foo,2,3,4,5
- bar,7,8,9,10
- baz,12,13,14,15
- qux,12,13,14,15
- foo2,12,13,14,15
- bar2,12,13,14,15
- """,
- {"index_col": 0, "names": ["index", "A", "B", "C", "D"]},
- DataFrame(
- [
- [2, 3, 4, 5],
- [7, 8, 9, 10],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- ],
- index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"),
- columns=["A", "B", "C", "D"],
- ),
- ),
- (
- """foo,one,2,3,4,5
- foo,two,7,8,9,10
- foo,three,12,13,14,15
- bar,one,12,13,14,15
- bar,two,12,13,14,15
- """,
- {"index_col": [0, 1], "names": ["index1", "index2", "A", "B", "C", "D"]},
- DataFrame(
- [
- [2, 3, 4, 5],
- [7, 8, 9, 10],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- ],
- index=MultiIndex.from_tuples(
- [
- ("foo", "one"),
- ("foo", "two"),
- ("foo", "three"),
- ("bar", "one"),
- ("bar", "two"),
- ],
- names=["index1", "index2"],
- ),
- columns=["A", "B", "C", "D"],
- ),
- ),
- ],
- )
- def test_pass_names_with_index(all_parsers, data, kwargs, expected):
- parser = all_parsers
- result = parser.read_csv(StringIO(data), **kwargs)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
- def test_multi_index_no_level_names(all_parsers, index_col):
- data = """index1,index2,A,B,C,D
- foo,one,2,3,4,5
- foo,two,7,8,9,10
- foo,three,12,13,14,15
- bar,one,12,13,14,15
- bar,two,12,13,14,15
- """
- headless_data = "\n".join(data.split("\n")[1:])
- names = ["A", "B", "C", "D"]
- parser = all_parsers
- result = parser.read_csv(
- StringIO(headless_data), index_col=index_col, header=None, names=names
- )
- expected = parser.read_csv(StringIO(data), index_col=index_col)
- # No index names in headless data.
- expected.index.names = [None] * 2
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- def test_multi_index_no_level_names_implicit(all_parsers):
- parser = all_parsers
- data = """A,B,C,D
- foo,one,2,3,4,5
- foo,two,7,8,9,10
- foo,three,12,13,14,15
- bar,one,12,13,14,15
- bar,two,12,13,14,15
- """
- result = parser.read_csv(StringIO(data))
- expected = DataFrame(
- [
- [2, 3, 4, 5],
- [7, 8, 9, 10],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- ],
- columns=["A", "B", "C", "D"],
- index=MultiIndex.from_tuples(
- [
- ("foo", "one"),
- ("foo", "two"),
- ("foo", "three"),
- ("bar", "one"),
- ("bar", "two"),
- ]
- ),
- )
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- @pytest.mark.parametrize(
- "data,expected,header",
- [
- ("a,b", DataFrame(columns=["a", "b"]), [0]),
- (
- "a,b\nc,d",
- DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])),
- [0, 1],
- ),
- ],
- )
- @pytest.mark.parametrize("round_trip", [True, False])
- def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
- # see gh-14545
- parser = all_parsers
- data = expected.to_csv(index=False) if round_trip else data
- result = parser.read_csv(StringIO(data), header=header)
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- def test_no_unnamed_index(all_parsers):
- parser = all_parsers
- data = """ id c0 c1 c2
- 0 1 0 a b
- 1 2 0 c d
- 2 2 2 e f
- """
- result = parser.read_csv(StringIO(data), sep=" ")
- expected = DataFrame(
- [[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]],
- columns=["Unnamed: 0", "id", "c0", "c1", "c2"],
- )
- tm.assert_frame_equal(result, expected)
- def test_read_duplicate_index_explicit(all_parsers):
- data = """index,A,B,C,D
- foo,2,3,4,5
- bar,7,8,9,10
- baz,12,13,14,15
- qux,12,13,14,15
- foo,12,13,14,15
- bar,12,13,14,15
- """
- parser = all_parsers
- result = parser.read_csv(StringIO(data), index_col=0)
- expected = DataFrame(
- [
- [2, 3, 4, 5],
- [7, 8, 9, 10],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- ],
- columns=["A", "B", "C", "D"],
- index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"),
- )
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- def test_read_duplicate_index_implicit(all_parsers):
- data = """A,B,C,D
- foo,2,3,4,5
- bar,7,8,9,10
- baz,12,13,14,15
- qux,12,13,14,15
- foo,12,13,14,15
- bar,12,13,14,15
- """
- parser = all_parsers
- result = parser.read_csv(StringIO(data))
- expected = DataFrame(
- [
- [2, 3, 4, 5],
- [7, 8, 9, 10],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- [12, 13, 14, 15],
- ],
- columns=["A", "B", "C", "D"],
- index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]),
- )
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- def test_read_csv_no_index_name(all_parsers, csv_dir_path):
- parser = all_parsers
- csv2 = os.path.join(csv_dir_path, "test2.csv")
- result = parser.read_csv(csv2, index_col=0, parse_dates=True)
- expected = DataFrame(
- [
- [0.980269, 3.685731, -0.364216805298, -1.159738, "foo"],
- [1.047916, -0.041232, -0.16181208307, 0.212549, "bar"],
- [0.498581, 0.731168, -0.537677223318, 1.346270, "baz"],
- [1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"],
- [-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"],
- ],
- columns=["A", "B", "C", "D", "E"],
- index=Index(
- [
- datetime(2000, 1, 3),
- datetime(2000, 1, 4),
- datetime(2000, 1, 5),
- datetime(2000, 1, 6),
- datetime(2000, 1, 7),
- ]
- ),
- )
- tm.assert_frame_equal(result, expected)
- @xfail_pyarrow
- def test_empty_with_index(all_parsers):
- # see gh-10184
- data = "x,y"
- parser = all_parsers
- result = parser.read_csv(StringIO(data), index_col=0)
- expected = DataFrame(columns=["y"], index=Index([], name="x"))
- tm.assert_frame_equal(result, expected)
- @skip_pyarrow
- def test_empty_with_multi_index(all_parsers):
- # see gh-10467
- data = "x,y,z"
- parser = all_parsers
- result = parser.read_csv(StringIO(data), index_col=["x", "y"])
- expected = DataFrame(
- columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"])
- )
- tm.assert_frame_equal(result, expected)
- @skip_pyarrow
- def test_empty_with_reversed_multi_index(all_parsers):
- data = "x,y,z"
- parser = all_parsers
- result = parser.read_csv(StringIO(data), index_col=[1, 0])
- expected = DataFrame(
- columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"])
- )
- tm.assert_frame_equal(result, expected)
|