123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- from datetime import datetime
- from io import StringIO
- from textwrap import dedent
- import numpy as np
- import pytest
- import pandas.util._test_decorators as td
- from pandas import (
- DataFrame,
- Series,
- option_context,
- to_datetime,
- )
- def test_repr_embedded_ndarray():
- arr = np.empty(10, dtype=[("err", object)])
- for i in range(len(arr)):
- arr["err"][i] = np.random.randn(i)
- df = DataFrame(arr)
- repr(df["err"])
- repr(df)
- df.to_string()
- def test_repr_tuples():
- buf = StringIO()
- df = DataFrame({"tups": list(zip(range(10), range(10)))})
- repr(df)
- df.to_string(col_space=10, buf=buf)
- def test_to_string_truncate():
- # GH 9784 - dont truncate when calling DataFrame.to_string
- df = DataFrame(
- [
- {
- "a": "foo",
- "b": "bar",
- "c": "let's make this a very VERY long line that is longer "
- "than the default 50 character limit",
- "d": 1,
- },
- {"a": "foo", "b": "bar", "c": "stuff", "d": 1},
- ]
- )
- df.set_index(["a", "b", "c"])
- assert df.to_string() == (
- " a b "
- " c d\n"
- "0 foo bar let's make this a very VERY long line t"
- "hat is longer than the default 50 character limit 1\n"
- "1 foo bar "
- " stuff 1"
- )
- with option_context("max_colwidth", 20):
- # the display option has no effect on the to_string method
- assert df.to_string() == (
- " a b "
- " c d\n"
- "0 foo bar let's make this a very VERY long line t"
- "hat is longer than the default 50 character limit 1\n"
- "1 foo bar "
- " stuff 1"
- )
- assert df.to_string(max_colwidth=20) == (
- " a b c d\n"
- "0 foo bar let's make this ... 1\n"
- "1 foo bar stuff 1"
- )
- @pytest.mark.parametrize(
- "input_array, expected",
- [
- ("a", "a"),
- (["a", "b"], "a\nb"),
- ([1, "a"], "1\na"),
- (1, "1"),
- ([0, -1], " 0\n-1"),
- (1.0, "1.0"),
- ([" a", " b"], " a\n b"),
- ([".1", "1"], ".1\n 1"),
- (["10", "-10"], " 10\n-10"),
- ],
- )
- def test_format_remove_leading_space_series(input_array, expected):
- # GH: 24980
- s = Series(input_array).to_string(index=False)
- assert s == expected
- @pytest.mark.parametrize(
- "input_array, expected",
- [
- ({"A": ["a"]}, "A\na"),
- ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
- ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
- ],
- )
- def test_format_remove_leading_space_dataframe(input_array, expected):
- # GH: 24980
- df = DataFrame(input_array).to_string(index=False)
- assert df == expected
- @pytest.mark.parametrize(
- "max_cols, max_rows, expected",
- [
- (
- 10,
- None,
- " 0 1 2 3 4 ... 6 7 8 9 10\n"
- " 0 0 0 0 0 ... 0 0 0 0 0\n"
- " 0 0 0 0 0 ... 0 0 0 0 0\n"
- " 0 0 0 0 0 ... 0 0 0 0 0\n"
- " 0 0 0 0 0 ... 0 0 0 0 0",
- ),
- (
- None,
- 2,
- " 0 1 2 3 4 5 6 7 8 9 10\n"
- " 0 0 0 0 0 0 0 0 0 0 0\n"
- " .. .. .. .. .. .. .. .. .. .. ..\n"
- " 0 0 0 0 0 0 0 0 0 0 0",
- ),
- (
- 10,
- 2,
- " 0 1 2 3 4 ... 6 7 8 9 10\n"
- " 0 0 0 0 0 ... 0 0 0 0 0\n"
- " .. .. .. .. .. ... .. .. .. .. ..\n"
- " 0 0 0 0 0 ... 0 0 0 0 0",
- ),
- (
- 9,
- 2,
- " 0 1 2 3 ... 7 8 9 10\n"
- " 0 0 0 0 ... 0 0 0 0\n"
- " .. .. .. .. ... .. .. .. ..\n"
- " 0 0 0 0 ... 0 0 0 0",
- ),
- (
- 1,
- 1,
- " 0 ...\n 0 ...\n.. ...",
- ),
- ],
- )
- def test_truncation_no_index(max_cols, max_rows, expected):
- df = DataFrame([[0] * 11] * 4)
- assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected
- def test_to_string_unicode_columns(float_frame):
- df = DataFrame({"\u03c3": np.arange(10.0)})
- buf = StringIO()
- df.to_string(buf=buf)
- buf.getvalue()
- buf = StringIO()
- df.info(buf=buf)
- buf.getvalue()
- result = float_frame.to_string()
- assert isinstance(result, str)
- def test_to_string_utf8_columns():
- n = "\u05d0".encode()
- with option_context("display.max_rows", 1):
- df = DataFrame([1, 2], columns=[n])
- repr(df)
- def test_to_string_unicode_two():
- dm = DataFrame({"c/\u03c3": []})
- buf = StringIO()
- dm.to_string(buf)
- def test_to_string_unicode_three():
- dm = DataFrame(["\xc2"])
- buf = StringIO()
- dm.to_string(buf)
- def test_to_string_with_formatters():
- df = DataFrame(
- {
- "int": [1, 2, 3],
- "float": [1.0, 2.0, 3.0],
- "object": [(1, 2), True, False],
- },
- columns=["int", "float", "object"],
- )
- formatters = [
- ("int", lambda x: f"0x{x:x}"),
- ("float", lambda x: f"[{x: 4.1f}]"),
- ("object", lambda x: f"-{x!s}-"),
- ]
- result = df.to_string(formatters=dict(formatters))
- result2 = df.to_string(formatters=list(zip(*formatters))[1])
- assert result == (
- " int float object\n"
- "0 0x1 [ 1.0] -(1, 2)-\n"
- "1 0x2 [ 2.0] -True-\n"
- "2 0x3 [ 3.0] -False-"
- )
- assert result == result2
- def test_to_string_with_datetime64_monthformatter():
- months = [datetime(2016, 1, 1), datetime(2016, 2, 2)]
- x = DataFrame({"months": months})
- def format_func(x):
- return x.strftime("%Y-%m")
- result = x.to_string(formatters={"months": format_func})
- expected = dedent(
- """\
- months
- 0 2016-01
- 1 2016-02"""
- )
- assert result.strip() == expected
- def test_to_string_with_datetime64_hourformatter():
- x = DataFrame(
- {"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")}
- )
- def format_func(x):
- return x.strftime("%H:%M")
- result = x.to_string(formatters={"hod": format_func})
- expected = dedent(
- """\
- hod
- 0 10:10
- 1 12:12"""
- )
- assert result.strip() == expected
- def test_to_string_with_formatters_unicode():
- df = DataFrame({"c/\u03c3": [1, 2, 3]})
- result = df.to_string(formatters={"c/\u03c3": str})
- expected = dedent(
- """\
- c/\u03c3
- 0 1
- 1 2
- 2 3"""
- )
- assert result == expected
- def test_to_string_complex_number_trims_zeros():
- s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j])
- result = s.to_string()
- expected = dedent(
- """\
- 0 1.00+1.00j
- 1 1.00+1.00j
- 2 1.05+1.00j"""
- )
- assert result == expected
- def test_nullable_float_to_string(float_ea_dtype):
- # https://github.com/pandas-dev/pandas/issues/36775
- dtype = float_ea_dtype
- s = Series([0.0, 1.0, None], dtype=dtype)
- result = s.to_string()
- expected = dedent(
- """\
- 0 0.0
- 1 1.0
- 2 <NA>"""
- )
- assert result == expected
- def test_nullable_int_to_string(any_int_ea_dtype):
- # https://github.com/pandas-dev/pandas/issues/36775
- dtype = any_int_ea_dtype
- s = Series([0, 1, None], dtype=dtype)
- result = s.to_string()
- expected = dedent(
- """\
- 0 0
- 1 1
- 2 <NA>"""
- )
- assert result == expected
- @pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
- def test_to_string_na_rep_and_float_format(na_rep):
- # GH 13828
- df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"])
- result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format)
- expected = dedent(
- f"""\
- Group Data
- 0 A 1.22
- 1 A {na_rep}"""
- )
- assert result == expected
- @pytest.mark.parametrize(
- "data,expected",
- [
- (
- {"col1": [1, 2], "col2": [3, 4]},
- " col1 col2\n0 1 3\n1 2 4",
- ),
- (
- {"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]},
- " col1 col2\n0 Abc NaN\n1 0.756 4.5435",
- ),
- (
- {"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]},
- " col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23",
- ),
- ],
- )
- def test_to_string_max_rows_zero(data, expected):
- # GH35394
- result = DataFrame(data=data).to_string(max_rows=0)
- assert result == expected
- @td.skip_if_no("pyarrow")
- def test_to_string_string_dtype():
- # GH#50099
- df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]})
- df = df.astype(
- {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"}
- )
- result = df.dtypes.to_string()
- expected = dedent(
- """\
- x string[pyarrow]
- y string[python]
- z int64[pyarrow]"""
- )
- assert result == expected
|