123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- import numpy as np
- import pytest
- import pandas as pd
- import pandas._testing as tm
- def test_data_frame_value_counts_unsorted():
- df = pd.DataFrame(
- {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
- index=["falcon", "dog", "cat", "ant"],
- )
- result = df.value_counts(sort=False)
- expected = pd.Series(
- data=[1, 2, 1],
- index=pd.MultiIndex.from_arrays(
- [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"]
- ),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_ascending():
- df = pd.DataFrame(
- {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
- index=["falcon", "dog", "cat", "ant"],
- )
- result = df.value_counts(ascending=True)
- expected = pd.Series(
- data=[1, 1, 2],
- index=pd.MultiIndex.from_arrays(
- [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"]
- ),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_default():
- df = pd.DataFrame(
- {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
- index=["falcon", "dog", "cat", "ant"],
- )
- result = df.value_counts()
- expected = pd.Series(
- data=[2, 1, 1],
- index=pd.MultiIndex.from_arrays(
- [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"]
- ),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_normalize():
- df = pd.DataFrame(
- {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
- index=["falcon", "dog", "cat", "ant"],
- )
- result = df.value_counts(normalize=True)
- expected = pd.Series(
- data=[0.5, 0.25, 0.25],
- index=pd.MultiIndex.from_arrays(
- [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"]
- ),
- name="proportion",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_single_col_default():
- df = pd.DataFrame({"num_legs": [2, 4, 4, 6]})
- result = df.value_counts()
- expected = pd.Series(
- data=[2, 1, 1],
- index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_empty():
- df_no_cols = pd.DataFrame()
- result = df_no_cols.value_counts()
- expected = pd.Series(
- [], dtype=np.int64, name="count", index=np.array([], dtype=np.intp)
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_empty_normalize():
- df_no_cols = pd.DataFrame()
- result = df_no_cols.value_counts(normalize=True)
- expected = pd.Series(
- [], dtype=np.float64, name="proportion", index=np.array([], dtype=np.intp)
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_dropna_true(nulls_fixture):
-
- df = pd.DataFrame(
- {
- "first_name": ["John", "Anne", "John", "Beth"],
- "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"],
- },
- )
- result = df.value_counts()
- expected = pd.Series(
- data=[1, 1],
- index=pd.MultiIndex.from_arrays(
- [("Beth", "John"), ("Louise", "Smith")], names=["first_name", "middle_name"]
- ),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- def test_data_frame_value_counts_dropna_false(nulls_fixture):
-
- df = pd.DataFrame(
- {
- "first_name": ["John", "Anne", "John", "Beth"],
- "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"],
- },
- )
- result = df.value_counts(dropna=False)
- expected = pd.Series(
- data=[1, 1, 1, 1],
- index=pd.MultiIndex(
- levels=[
- pd.Index(["Anne", "Beth", "John"]),
- pd.Index(["Louise", "Smith", nulls_fixture]),
- ],
- codes=[[0, 1, 2, 2], [2, 0, 1, 2]],
- names=["first_name", "middle_name"],
- ),
- name="count",
- )
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("columns", (["first_name", "middle_name"], [0, 1]))
- def test_data_frame_value_counts_subset(nulls_fixture, columns):
-
- df = pd.DataFrame(
- {
- columns[0]: ["John", "Anne", "John", "Beth"],
- columns[1]: ["Smith", nulls_fixture, nulls_fixture, "Louise"],
- },
- )
- result = df.value_counts(columns[0])
- expected = pd.Series(
- data=[2, 1, 1],
- index=pd.Index(["John", "Anne", "Beth"], name=columns[0]),
- name="count",
- )
- tm.assert_series_equal(result, expected)
|