123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734 |
- from contextlib import contextmanager
- import re
- import struct
- import tracemalloc
- from typing import Generator
- import numpy as np
- import pytest
- from pandas._libs import hashtable as ht
- import pandas as pd
- import pandas._testing as tm
- from pandas.core.algorithms import isin
- @contextmanager
- def activated_tracemalloc() -> Generator[None, None, None]:
- tracemalloc.start()
- try:
- yield
- finally:
- tracemalloc.stop()
- def get_allocated_khash_memory():
- snapshot = tracemalloc.take_snapshot()
- snapshot = snapshot.filter_traces(
- (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
- )
- return sum(map(lambda x: x.size, snapshot.traces))
- @pytest.mark.parametrize(
- "table_type, dtype",
- [
- (ht.PyObjectHashTable, np.object_),
- (ht.Complex128HashTable, np.complex128),
- (ht.Int64HashTable, np.int64),
- (ht.UInt64HashTable, np.uint64),
- (ht.Float64HashTable, np.float64),
- (ht.Complex64HashTable, np.complex64),
- (ht.Int32HashTable, np.int32),
- (ht.UInt32HashTable, np.uint32),
- (ht.Float32HashTable, np.float32),
- (ht.Int16HashTable, np.int16),
- (ht.UInt16HashTable, np.uint16),
- (ht.Int8HashTable, np.int8),
- (ht.UInt8HashTable, np.uint8),
- (ht.IntpHashTable, np.intp),
- ],
- )
- class TestHashTable:
- def test_get_set_contains_len(self, table_type, dtype):
- index = 5
- table = table_type(55)
- assert len(table) == 0
- assert index not in table
- table.set_item(index, 42)
- assert len(table) == 1
- assert index in table
- assert table.get_item(index) == 42
- table.set_item(index + 1, 41)
- assert index in table
- assert index + 1 in table
- assert len(table) == 2
- assert table.get_item(index) == 42
- assert table.get_item(index + 1) == 41
- table.set_item(index, 21)
- assert index in table
- assert index + 1 in table
- assert len(table) == 2
- assert table.get_item(index) == 21
- assert table.get_item(index + 1) == 41
- assert index + 2 not in table
- table.set_item(index + 1, 21)
- assert index in table
- assert index + 1 in table
- assert len(table) == 2
- assert table.get_item(index) == 21
- assert table.get_item(index + 1) == 21
- with pytest.raises(KeyError, match=str(index + 2)):
- table.get_item(index + 2)
- def test_get_set_contains_len_mask(self, table_type, dtype):
- if table_type == ht.PyObjectHashTable:
- pytest.skip("Mask not supported for object")
- index = 5
- table = table_type(55, uses_mask=True)
- assert len(table) == 0
- assert index not in table
- table.set_item(index, 42)
- assert len(table) == 1
- assert index in table
- assert table.get_item(index) == 42
- with pytest.raises(KeyError, match="NA"):
- table.get_na()
- table.set_item(index + 1, 41)
- table.set_na(41)
- assert pd.NA in table
- assert index in table
- assert index + 1 in table
- assert len(table) == 3
- assert table.get_item(index) == 42
- assert table.get_item(index + 1) == 41
- assert table.get_na() == 41
- table.set_na(21)
- assert index in table
- assert index + 1 in table
- assert len(table) == 3
- assert table.get_item(index + 1) == 41
- assert table.get_na() == 21
- assert index + 2 not in table
- with pytest.raises(KeyError, match=str(index + 2)):
- table.get_item(index + 2)
- def test_map_keys_to_values(self, table_type, dtype, writable):
- # only Int64HashTable has this method
- if table_type == ht.Int64HashTable:
- N = 77
- table = table_type()
- keys = np.arange(N).astype(dtype)
- vals = np.arange(N).astype(np.int64) + N
- keys.flags.writeable = writable
- vals.flags.writeable = writable
- table.map_keys_to_values(keys, vals)
- for i in range(N):
- assert table.get_item(keys[i]) == i + N
- def test_map_locations(self, table_type, dtype, writable):
- N = 8
- table = table_type()
- keys = (np.arange(N) + N).astype(dtype)
- keys.flags.writeable = writable
- table.map_locations(keys)
- for i in range(N):
- assert table.get_item(keys[i]) == i
- def test_map_locations_mask(self, table_type, dtype, writable):
- if table_type == ht.PyObjectHashTable:
- pytest.skip("Mask not supported for object")
- N = 3
- table = table_type(uses_mask=True)
- keys = (np.arange(N) + N).astype(dtype)
- keys.flags.writeable = writable
- table.map_locations(keys, np.array([False, False, True]))
- for i in range(N - 1):
- assert table.get_item(keys[i]) == i
- with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
- table.get_item(keys[N - 1])
- assert table.get_na() == 2
- def test_lookup(self, table_type, dtype, writable):
- N = 3
- table = table_type()
- keys = (np.arange(N) + N).astype(dtype)
- keys.flags.writeable = writable
- table.map_locations(keys)
- result = table.lookup(keys)
- expected = np.arange(N)
- tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64))
- def test_lookup_wrong(self, table_type, dtype):
- if dtype in (np.int8, np.uint8):
- N = 100
- else:
- N = 512
- table = table_type()
- keys = (np.arange(N) + N).astype(dtype)
- table.map_locations(keys)
- wrong_keys = np.arange(N).astype(dtype)
- result = table.lookup(wrong_keys)
- assert np.all(result == -1)
- def test_lookup_mask(self, table_type, dtype, writable):
- if table_type == ht.PyObjectHashTable:
- pytest.skip("Mask not supported for object")
- N = 3
- table = table_type(uses_mask=True)
- keys = (np.arange(N) + N).astype(dtype)
- mask = np.array([False, True, False])
- keys.flags.writeable = writable
- table.map_locations(keys, mask)
- result = table.lookup(keys, mask)
- expected = np.arange(N)
- tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64))
- result = table.lookup(np.array([1 + N]).astype(dtype), np.array([False]))
- tm.assert_numpy_array_equal(
- result.astype(np.int64), np.array([-1], dtype=np.int64)
- )
- def test_unique(self, table_type, dtype, writable):
- if dtype in (np.int8, np.uint8):
- N = 88
- else:
- N = 1000
- table = table_type()
- expected = (np.arange(N) + N).astype(dtype)
- keys = np.repeat(expected, 5)
- keys.flags.writeable = writable
- unique = table.unique(keys)
- tm.assert_numpy_array_equal(unique, expected)
- def test_tracemalloc_works(self, table_type, dtype):
- if dtype in (np.int8, np.uint8):
- N = 256
- else:
- N = 30000
- keys = np.arange(N).astype(dtype)
- with activated_tracemalloc():
- table = table_type()
- table.map_locations(keys)
- used = get_allocated_khash_memory()
- my_size = table.sizeof()
- assert used == my_size
- del table
- assert get_allocated_khash_memory() == 0
- def test_tracemalloc_for_empty(self, table_type, dtype):
- with activated_tracemalloc():
- table = table_type()
- used = get_allocated_khash_memory()
- my_size = table.sizeof()
- assert used == my_size
- del table
- assert get_allocated_khash_memory() == 0
- def test_get_state(self, table_type, dtype):
- table = table_type(1000)
- state = table.get_state()
- assert state["size"] == 0
- assert state["n_occupied"] == 0
- assert "n_buckets" in state
- assert "upper_bound" in state
- @pytest.mark.parametrize("N", range(1, 110))
- def test_no_reallocation(self, table_type, dtype, N):
- keys = np.arange(N).astype(dtype)
- preallocated_table = table_type(N)
- n_buckets_start = preallocated_table.get_state()["n_buckets"]
- preallocated_table.map_locations(keys)
- n_buckets_end = preallocated_table.get_state()["n_buckets"]
- # original number of buckets was enough:
- assert n_buckets_start == n_buckets_end
- # check with clean table (not too much preallocated)
- clean_table = table_type()
- clean_table.map_locations(keys)
- assert n_buckets_start == clean_table.get_state()["n_buckets"]
- class TestHashTableUnsorted:
- # TODO: moved from test_algos; may be redundancies with other tests
- def test_string_hashtable_set_item_signature(self):
- # GH#30419 fix typing in StringHashTable.set_item to prevent segfault
- tbl = ht.StringHashTable()
- tbl.set_item("key", 1)
- assert tbl.get_item("key") == 1
- with pytest.raises(TypeError, match="'key' has incorrect type"):
- # key arg typed as string, not object
- tbl.set_item(4, 6)
- with pytest.raises(TypeError, match="'val' has incorrect type"):
- tbl.get_item(4)
- def test_lookup_nan(self, writable):
- # GH#21688 ensure we can deal with readonly memory views
- xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
- xs.setflags(write=writable)
- m = ht.Float64HashTable()
- m.map_locations(xs)
- tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))
- def test_add_signed_zeros(self):
- # GH#21866 inconsistent hash-function for float64
- # default hash-function would lead to different hash-buckets
- # for 0.0 and -0.0 if there are more than 2^30 hash-buckets
- # but this would mean 16GB
- N = 4 # 12 * 10**8 would trigger the error, if you have enough memory
- m = ht.Float64HashTable(N)
- m.set_item(0.0, 0)
- m.set_item(-0.0, 0)
- assert len(m) == 1 # 0.0 and -0.0 are equivalent
- def test_add_different_nans(self):
- # GH#21866 inconsistent hash-function for float64
- # create different nans from bit-patterns:
- NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
- NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
- assert NAN1 != NAN1
- assert NAN2 != NAN2
- # default hash function would lead to different hash-buckets
- # for NAN1 and NAN2 even if there are only 4 buckets:
- m = ht.Float64HashTable()
- m.set_item(NAN1, 0)
- m.set_item(NAN2, 0)
- assert len(m) == 1 # NAN1 and NAN2 are equivalent
- def test_lookup_overflow(self, writable):
- xs = np.array([1, 2, 2**63], dtype=np.uint64)
- # GH 21688 ensure we can deal with readonly memory views
- xs.setflags(write=writable)
- m = ht.UInt64HashTable()
- m.map_locations(xs)
- tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))
- @pytest.mark.parametrize("nvals", [0, 10]) # resizing to 0 is special case
- @pytest.mark.parametrize(
- "htable, uniques, dtype, safely_resizes",
- [
- (ht.PyObjectHashTable, ht.ObjectVector, "object", False),
- (ht.StringHashTable, ht.ObjectVector, "object", True),
- (ht.Float64HashTable, ht.Float64Vector, "float64", False),
- (ht.Int64HashTable, ht.Int64Vector, "int64", False),
- (ht.Int32HashTable, ht.Int32Vector, "int32", False),
- (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False),
- ],
- )
- def test_vector_resize(
- self, writable, htable, uniques, dtype, safely_resizes, nvals
- ):
- # Test for memory errors after internal vector
- # reallocations (GH 7157)
- # Changed from using np.random.rand to range
- # which could cause flaky CI failures when safely_resizes=False
- vals = np.array(range(1000), dtype=dtype)
- # GH 21688 ensures we can deal with read-only memory views
- vals.setflags(write=writable)
- # initialise instances; cannot initialise in parametrization,
- # as otherwise external views would be held on the array (which is
- # one of the things this test is checking)
- htable = htable()
- uniques = uniques()
- # get_labels may append to uniques
- htable.get_labels(vals[:nvals], uniques, 0, -1)
- # to_array() sets an external_view_exists flag on uniques.
- tmp = uniques.to_array()
- oldshape = tmp.shape
- # subsequent get_labels() calls can no longer append to it
- # (except for StringHashTables + ObjectVector)
- if safely_resizes:
- htable.get_labels(vals, uniques, 0, -1)
- else:
- with pytest.raises(ValueError, match="external reference.*"):
- htable.get_labels(vals, uniques, 0, -1)
- uniques.to_array() # should not raise here
- assert tmp.shape == oldshape
- @pytest.mark.parametrize(
- "hashtable",
- [
- ht.PyObjectHashTable,
- ht.StringHashTable,
- ht.Float64HashTable,
- ht.Int64HashTable,
- ht.Int32HashTable,
- ht.UInt64HashTable,
- ],
- )
- def test_hashtable_large_sizehint(self, hashtable):
- # GH#22729 smoketest for not raising when passing a large size_hint
- size_hint = np.iinfo(np.uint32).max + 1
- hashtable(size_hint=size_hint)
- class TestPyObjectHashTableWithNans:
- def test_nan_float(self):
- nan1 = float("nan")
- nan2 = float("nan")
- assert nan1 is not nan2
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- def test_nan_complex_both(self):
- nan1 = complex(float("nan"), float("nan"))
- nan2 = complex(float("nan"), float("nan"))
- assert nan1 is not nan2
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- def test_nan_complex_real(self):
- nan1 = complex(float("nan"), 1)
- nan2 = complex(float("nan"), 1)
- other = complex(float("nan"), 2)
- assert nan1 is not nan2
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- with pytest.raises(KeyError, match=None) as error:
- table.get_item(other)
- assert str(error.value) == str(other)
- def test_nan_complex_imag(self):
- nan1 = complex(1, float("nan"))
- nan2 = complex(1, float("nan"))
- other = complex(2, float("nan"))
- assert nan1 is not nan2
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- with pytest.raises(KeyError, match=None) as error:
- table.get_item(other)
- assert str(error.value) == str(other)
- def test_nan_in_tuple(self):
- nan1 = (float("nan"),)
- nan2 = (float("nan"),)
- assert nan1[0] is not nan2[0]
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- def test_nan_in_nested_tuple(self):
- nan1 = (1, (2, (float("nan"),)))
- nan2 = (1, (2, (float("nan"),)))
- other = (1, 2)
- table = ht.PyObjectHashTable()
- table.set_item(nan1, 42)
- assert table.get_item(nan2) == 42
- with pytest.raises(KeyError, match=None) as error:
- table.get_item(other)
- assert str(error.value) == str(other)
- def test_hash_equal_tuple_with_nans():
- a = (float("nan"), (float("nan"), float("nan")))
- b = (float("nan"), (float("nan"), float("nan")))
- assert ht.object_hash(a) == ht.object_hash(b)
- assert ht.objects_are_equal(a, b)
- def test_get_labels_groupby_for_Int64(writable):
- table = ht.Int64HashTable()
- vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)
- vals.flags.writeable = writable
- arr, unique = table.get_labels_groupby(vals)
- expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp)
- expected_unique = np.array([1, 2], dtype=np.int64)
- tm.assert_numpy_array_equal(arr, expected_arr)
- tm.assert_numpy_array_equal(unique, expected_unique)
- def test_tracemalloc_works_for_StringHashTable():
- N = 1000
- keys = np.arange(N).astype(np.str_).astype(np.object_)
- with activated_tracemalloc():
- table = ht.StringHashTable()
- table.map_locations(keys)
- used = get_allocated_khash_memory()
- my_size = table.sizeof()
- assert used == my_size
- del table
- assert get_allocated_khash_memory() == 0
- def test_tracemalloc_for_empty_StringHashTable():
- with activated_tracemalloc():
- table = ht.StringHashTable()
- used = get_allocated_khash_memory()
- my_size = table.sizeof()
- assert used == my_size
- del table
- assert get_allocated_khash_memory() == 0
- @pytest.mark.parametrize("N", range(1, 110))
- def test_no_reallocation_StringHashTable(N):
- keys = np.arange(N).astype(np.str_).astype(np.object_)
- preallocated_table = ht.StringHashTable(N)
- n_buckets_start = preallocated_table.get_state()["n_buckets"]
- preallocated_table.map_locations(keys)
- n_buckets_end = preallocated_table.get_state()["n_buckets"]
- # original number of buckets was enough:
- assert n_buckets_start == n_buckets_end
- # check with clean table (not too much preallocated)
- clean_table = ht.StringHashTable()
- clean_table.map_locations(keys)
- assert n_buckets_start == clean_table.get_state()["n_buckets"]
- @pytest.mark.parametrize(
- "table_type, dtype",
- [
- (ht.Float64HashTable, np.float64),
- (ht.Float32HashTable, np.float32),
- (ht.Complex128HashTable, np.complex128),
- (ht.Complex64HashTable, np.complex64),
- ],
- )
- class TestHashTableWithNans:
- def test_get_set_contains_len(self, table_type, dtype):
- index = float("nan")
- table = table_type()
- assert index not in table
- table.set_item(index, 42)
- assert len(table) == 1
- assert index in table
- assert table.get_item(index) == 42
- table.set_item(index, 41)
- assert len(table) == 1
- assert index in table
- assert table.get_item(index) == 41
- def test_map_locations(self, table_type, dtype):
- N = 10
- table = table_type()
- keys = np.full(N, np.nan, dtype=dtype)
- table.map_locations(keys)
- assert len(table) == 1
- assert table.get_item(np.nan) == N - 1
- def test_unique(self, table_type, dtype):
- N = 1020
- table = table_type()
- keys = np.full(N, np.nan, dtype=dtype)
- unique = table.unique(keys)
- assert np.all(np.isnan(unique)) and len(unique) == 1
- def test_unique_for_nan_objects_floats():
- table = ht.PyObjectHashTable()
- keys = np.array([float("nan") for i in range(50)], dtype=np.object_)
- unique = table.unique(keys)
- assert len(unique) == 1
- def test_unique_for_nan_objects_complex():
- table = ht.PyObjectHashTable()
- keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_)
- unique = table.unique(keys)
- assert len(unique) == 1
- def test_unique_for_nan_objects_tuple():
- table = ht.PyObjectHashTable()
- keys = np.array(
- [1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_
- )
- unique = table.unique(keys)
- assert len(unique) == 2
- @pytest.mark.parametrize(
- "dtype",
- [
- np.object_,
- np.complex128,
- np.int64,
- np.uint64,
- np.float64,
- np.complex64,
- np.int32,
- np.uint32,
- np.float32,
- np.int16,
- np.uint16,
- np.int8,
- np.uint8,
- np.intp,
- ],
- )
- class TestHelpFunctions:
- def test_value_count(self, dtype, writable):
- N = 43
- expected = (np.arange(N) + N).astype(dtype)
- values = np.repeat(expected, 5)
- values.flags.writeable = writable
- keys, counts = ht.value_count(values, False)
- tm.assert_numpy_array_equal(np.sort(keys), expected)
- assert np.all(counts == 5)
- def test_value_count_stable(self, dtype, writable):
- # GH12679
- values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
- values.flags.writeable = writable
- keys, counts = ht.value_count(values, False)
- tm.assert_numpy_array_equal(keys, values)
- assert np.all(counts == 1)
- def test_duplicated_first(self, dtype, writable):
- N = 100
- values = np.repeat(np.arange(N).astype(dtype), 5)
- values.flags.writeable = writable
- result = ht.duplicated(values)
- expected = np.ones_like(values, dtype=np.bool_)
- expected[::5] = False
- tm.assert_numpy_array_equal(result, expected)
- def test_ismember_yes(self, dtype, writable):
- N = 127
- arr = np.arange(N).astype(dtype)
- values = np.arange(N).astype(dtype)
- arr.flags.writeable = writable
- values.flags.writeable = writable
- result = ht.ismember(arr, values)
- expected = np.ones_like(values, dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
- def test_ismember_no(self, dtype):
- N = 17
- arr = np.arange(N).astype(dtype)
- values = (np.arange(N) + N).astype(dtype)
- result = ht.ismember(arr, values)
- expected = np.zeros_like(values, dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
- def test_mode(self, dtype, writable):
- if dtype in (np.int8, np.uint8):
- N = 53
- else:
- N = 11111
- values = np.repeat(np.arange(N).astype(dtype), 5)
- values[0] = 42
- values.flags.writeable = writable
- result = ht.mode(values, False)
- assert result == 42
- def test_mode_stable(self, dtype, writable):
- values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
- values.flags.writeable = writable
- keys = ht.mode(values, False)
- tm.assert_numpy_array_equal(keys, values)
- def test_modes_with_nans():
- # GH42688, nans aren't mangled
- nulls = [pd.NA, np.nan, pd.NaT, None]
- values = np.array([True] + nulls * 2, dtype=np.object_)
- modes = ht.mode(values, False)
- assert modes.size == len(nulls)
- def test_unique_label_indices_intp(writable):
- keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
- keys.flags.writeable = writable
- result = ht.unique_label_indices(keys)
- expected = np.array([0, 1, 5], dtype=np.intp)
- tm.assert_numpy_array_equal(result, expected)
- def test_unique_label_indices():
- a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)
- left = ht.unique_label_indices(a)
- right = np.unique(a, return_index=True)[1]
- tm.assert_numpy_array_equal(left, right, check_dtype=False)
- a[np.random.choice(len(a), 10)] = -1
- left = ht.unique_label_indices(a)
- right = np.unique(a, return_index=True)[1][1:]
- tm.assert_numpy_array_equal(left, right, check_dtype=False)
- @pytest.mark.parametrize(
- "dtype",
- [
- np.float64,
- np.float32,
- np.complex128,
- np.complex64,
- ],
- )
- class TestHelpFunctionsWithNans:
- def test_value_count(self, dtype):
- values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
- keys, counts = ht.value_count(values, True)
- assert len(keys) == 0
- keys, counts = ht.value_count(values, False)
- assert len(keys) == 1 and np.all(np.isnan(keys))
- assert counts[0] == 3
- def test_duplicated_first(self, dtype):
- values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
- result = ht.duplicated(values)
- expected = np.array([False, True, True])
- tm.assert_numpy_array_equal(result, expected)
- def test_ismember_yes(self, dtype):
- arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
- values = np.array([np.nan, np.nan], dtype=dtype)
- result = ht.ismember(arr, values)
- expected = np.array([True, True, True], dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
- def test_ismember_no(self, dtype):
- arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
- values = np.array([1], dtype=dtype)
- result = ht.ismember(arr, values)
- expected = np.array([False, False, False], dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
- def test_mode(self, dtype):
- values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
- assert ht.mode(values, True) == 42
- assert np.isnan(ht.mode(values, False))
- def test_ismember_tuple_with_nans():
- # GH-41836
- values = [("a", float("nan")), ("b", 1)]
- comps = [("a", float("nan"))]
- result = isin(values, comps)
- expected = np.array([True, False], dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
- def test_float_complex_int_are_equal_as_objects():
- values = ["a", 5, 5.0, 5.0 + 0j]
- comps = list(range(129))
- result = isin(values, comps)
- expected = np.array([False, True, True, True], dtype=np.bool_)
- tm.assert_numpy_array_equal(result, expected)
|