123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897 |
- cimport cython
- from cython cimport Py_ssize_t
- import numpy as np
- cimport numpy as cnp
- from numpy cimport (
- int64_t,
- intp_t,
- ndarray,
- uint64_t,
- )
- cnp.import_array()
- from pandas._libs.algos import groupsort_indexer
- from pandas._libs.dtypes cimport (
- numeric_object_t,
- numeric_t,
- )
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def inner_join(const intp_t[:] left, const intp_t[:] right,
- Py_ssize_t max_groups):
- cdef:
- Py_ssize_t i, j, k, count = 0
- intp_t[::1] left_sorter, right_sorter
- intp_t[::1] left_count, right_count
- intp_t[::1] left_indexer, right_indexer
- intp_t lc, rc
- Py_ssize_t left_pos = 0, right_pos = 0, position = 0
- Py_ssize_t offset
- left_sorter, left_count = groupsort_indexer(left, max_groups)
- right_sorter, right_count = groupsort_indexer(right, max_groups)
- with nogil:
- # First pass, determine size of result set, do not use the NA group
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc > 0 and lc > 0:
- count += lc * rc
- left_indexer = np.empty(count, dtype=np.intp)
- right_indexer = np.empty(count, dtype=np.intp)
- with nogil:
- # exclude the NA group
- left_pos = left_count[0]
- right_pos = right_count[0]
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc > 0 and lc > 0:
- for j in range(lc):
- offset = position + j * rc
- for k in range(rc):
- left_indexer[offset + k] = left_pos + j
- right_indexer[offset + k] = right_pos + k
- position += lc * rc
- left_pos += lc
- right_pos += rc
- # Will overwrite left/right indexer with the result
- _get_result_indexer(left_sorter, left_indexer)
- _get_result_indexer(right_sorter, right_indexer)
- return np.asarray(left_indexer), np.asarray(right_indexer)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def left_outer_join(const intp_t[:] left, const intp_t[:] right,
- Py_ssize_t max_groups, bint sort=True):
- cdef:
- Py_ssize_t i, j, k, count = 0
- ndarray[intp_t] rev
- intp_t[::1] left_count, right_count
- intp_t[::1] left_sorter, right_sorter
- intp_t[::1] left_indexer, right_indexer
- intp_t lc, rc
- Py_ssize_t left_pos = 0, right_pos = 0, position = 0
- Py_ssize_t offset
- left_sorter, left_count = groupsort_indexer(left, max_groups)
- right_sorter, right_count = groupsort_indexer(right, max_groups)
- with nogil:
- # First pass, determine size of result set, do not use the NA group
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc > 0:
- count += lc * rc
- else:
- count += lc
- left_indexer = np.empty(count, dtype=np.intp)
- right_indexer = np.empty(count, dtype=np.intp)
- with nogil:
- # exclude the NA group
- left_pos = left_count[0]
- right_pos = right_count[0]
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc == 0:
- for j in range(lc):
- left_indexer[position + j] = left_pos + j
- right_indexer[position + j] = -1
- position += lc
- else:
- for j in range(lc):
- offset = position + j * rc
- for k in range(rc):
- left_indexer[offset + k] = left_pos + j
- right_indexer[offset + k] = right_pos + k
- position += lc * rc
- left_pos += lc
- right_pos += rc
- # Will overwrite left/right indexer with the result
- _get_result_indexer(left_sorter, left_indexer)
- _get_result_indexer(right_sorter, right_indexer)
- if not sort: # if not asked to sort, revert to original order
- if len(left) == len(left_indexer):
- # no multiple matches for any row on the left
- # this is a short-cut to avoid groupsort_indexer
- # otherwise, the `else` path also works in this case
- rev = np.empty(len(left), dtype=np.intp)
- rev.put(np.asarray(left_sorter), np.arange(len(left)))
- else:
- rev, _ = groupsort_indexer(left_indexer, len(left))
- return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev)
- else:
- return np.asarray(left_indexer), np.asarray(right_indexer)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def full_outer_join(const intp_t[:] left, const intp_t[:] right,
- Py_ssize_t max_groups):
- cdef:
- Py_ssize_t i, j, k, count = 0
- intp_t[::1] left_sorter, right_sorter
- intp_t[::1] left_count, right_count
- intp_t[::1] left_indexer, right_indexer
- intp_t lc, rc
- intp_t left_pos = 0, right_pos = 0
- Py_ssize_t offset, position = 0
- left_sorter, left_count = groupsort_indexer(left, max_groups)
- right_sorter, right_count = groupsort_indexer(right, max_groups)
- with nogil:
- # First pass, determine size of result set, do not use the NA group
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc > 0 and lc > 0:
- count += lc * rc
- else:
- count += lc + rc
- left_indexer = np.empty(count, dtype=np.intp)
- right_indexer = np.empty(count, dtype=np.intp)
- with nogil:
- # exclude the NA group
- left_pos = left_count[0]
- right_pos = right_count[0]
- for i in range(1, max_groups + 1):
- lc = left_count[i]
- rc = right_count[i]
- if rc == 0:
- for j in range(lc):
- left_indexer[position + j] = left_pos + j
- right_indexer[position + j] = -1
- position += lc
- elif lc == 0:
- for j in range(rc):
- left_indexer[position + j] = -1
- right_indexer[position + j] = right_pos + j
- position += rc
- else:
- for j in range(lc):
- offset = position + j * rc
- for k in range(rc):
- left_indexer[offset + k] = left_pos + j
- right_indexer[offset + k] = right_pos + k
- position += lc * rc
- left_pos += lc
- right_pos += rc
- # Will overwrite left/right indexer with the result
- _get_result_indexer(left_sorter, left_indexer)
- _get_result_indexer(right_sorter, right_indexer)
- return np.asarray(left_indexer), np.asarray(right_indexer)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) nogil:
- """NOTE: overwrites indexer with the result to avoid allocating another array"""
- cdef:
- Py_ssize_t i, n, idx
- if len(sorter) > 0:
- # cython-only equivalent to
- # `res = algos.take_nd(sorter, indexer, fill_value=-1)`
- n = indexer.shape[0]
- for i in range(n):
- idx = indexer[i]
- if idx == -1:
- indexer[i] = -1
- else:
- indexer[i] = sorter[idx]
- else:
- # length-0 case
- indexer[:] = -1
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:
- cdef:
- Py_ssize_t i, n = len(indexer)
- ndarray[intp_t] result
- intp_t val, last_obs
- result = np.empty(n, dtype=np.intp)
- last_obs = -1
- for i in range(n):
- val = indexer[i]
- if val == -1:
- result[i] = last_obs
- else:
- result[i] = val
- last_obs = val
- return result
- # ----------------------------------------------------------------------
- # left_join_indexer, inner_join_indexer, outer_join_indexer
- # ----------------------------------------------------------------------
- # Joins on ordered, unique indices
- # right might contain non-unique values
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def left_join_indexer_unique(
- ndarray[numeric_object_t] left,
- ndarray[numeric_object_t] right
- ):
- """
- Both left and right are strictly monotonic increasing.
- """
- cdef:
- Py_ssize_t i, j, nleft, nright
- ndarray[intp_t] indexer
- numeric_object_t rval
- i = 0
- j = 0
- nleft = len(left)
- nright = len(right)
- indexer = np.empty(nleft, dtype=np.intp)
- while True:
- if i == nleft:
- break
- if j == nright:
- indexer[i] = -1
- i += 1
- continue
- rval = right[j]
- while i < nleft - 1 and left[i] == rval:
- indexer[i] = j
- i += 1
- if left[i] == rval:
- indexer[i] = j
- i += 1
- while i < nleft - 1 and left[i] == rval:
- indexer[i] = j
- i += 1
- j += 1
- elif left[i] > rval:
- indexer[i] = -1
- j += 1
- else:
- indexer[i] = -1
- i += 1
- return indexer
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
- """
- Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
- Both left and right are monotonic increasing, but at least one of them
- is non-unique (if both were unique we'd use left_join_indexer_unique).
- """
- cdef:
- Py_ssize_t i, j, nright, nleft, count
- numeric_object_t lval, rval
- ndarray[intp_t] lindexer, rindexer
- ndarray[numeric_object_t] result
- nleft = len(left)
- nright = len(right)
- # First pass is to find the size 'count' of our output indexers.
- i = 0
- j = 0
- count = 0
- if nleft > 0:
- while i < nleft:
- if j == nright:
- count += nleft - i
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- # This block is identical across
- # left_join_indexer, inner_join_indexer, outer_join_indexer
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- count += 1
- i += 1
- else:
- j += 1
- # do it again now that result size is known
- lindexer = np.empty(count, dtype=np.intp)
- rindexer = np.empty(count, dtype=np.intp)
- result = np.empty(count, dtype=left.dtype)
- i = 0
- j = 0
- count = 0
- if nleft > 0:
- while i < nleft:
- if j == nright:
- while i < nleft:
- lindexer[count] = i
- rindexer[count] = -1
- result[count] = left[i]
- i += 1
- count += 1
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- lindexer[count] = i
- rindexer[count] = j
- result[count] = lval
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- # i.e. lval not in right; we keep for left_join_indexer
- lindexer[count] = i
- rindexer[count] = -1
- result[count] = lval
- count += 1
- i += 1
- else:
- # i.e. rval not in left; we discard for left_join_indexer
- j += 1
- return result, lindexer, rindexer
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
- """
- Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
- Both left and right are monotonic increasing but not necessarily unique.
- """
- cdef:
- Py_ssize_t i, j, nright, nleft, count
- numeric_object_t lval, rval
- ndarray[intp_t] lindexer, rindexer
- ndarray[numeric_object_t] result
- nleft = len(left)
- nright = len(right)
- # First pass is to find the size 'count' of our output indexers.
- i = 0
- j = 0
- count = 0
- if nleft > 0 and nright > 0:
- while True:
- if i == nleft:
- break
- if j == nright:
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- # i.e. lval not in right; we discard for inner_indexer
- i += 1
- else:
- # i.e. rval not in left; we discard for inner_indexer
- j += 1
- # do it again now that result size is known
- lindexer = np.empty(count, dtype=np.intp)
- rindexer = np.empty(count, dtype=np.intp)
- result = np.empty(count, dtype=left.dtype)
- i = 0
- j = 0
- count = 0
- if nleft > 0 and nright > 0:
- while True:
- if i == nleft:
- break
- if j == nright:
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- lindexer[count] = i
- rindexer[count] = j
- result[count] = lval
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- # i.e. lval not in right; we discard for inner_indexer
- i += 1
- else:
- # i.e. rval not in left; we discard for inner_indexer
- j += 1
- return result, lindexer, rindexer
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
- """
- Both left and right are monotonic increasing but not necessarily unique.
- """
- cdef:
- Py_ssize_t i, j, nright, nleft, count
- numeric_object_t lval, rval
- ndarray[intp_t] lindexer, rindexer
- ndarray[numeric_object_t] result
- nleft = len(left)
- nright = len(right)
- # First pass is to find the size 'count' of our output indexers.
- # count will be length of left plus the number of elements of right not in
- # left (counting duplicates)
- i = 0
- j = 0
- count = 0
- if nleft == 0:
- count = nright
- elif nright == 0:
- count = nleft
- else:
- while True:
- if i == nleft:
- count += nright - j
- break
- if j == nright:
- count += nleft - i
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- count += 1
- i += 1
- else:
- count += 1
- j += 1
- lindexer = np.empty(count, dtype=np.intp)
- rindexer = np.empty(count, dtype=np.intp)
- result = np.empty(count, dtype=left.dtype)
- # do it again, but populate the indexers / result
- i = 0
- j = 0
- count = 0
- if nleft == 0:
- for j in range(nright):
- lindexer[j] = -1
- rindexer[j] = j
- result[j] = right[j]
- elif nright == 0:
- for i in range(nleft):
- lindexer[i] = i
- rindexer[i] = -1
- result[i] = left[i]
- else:
- while True:
- if i == nleft:
- while j < nright:
- lindexer[count] = -1
- rindexer[count] = j
- result[count] = right[j]
- count += 1
- j += 1
- break
- if j == nright:
- while i < nleft:
- lindexer[count] = i
- rindexer[count] = -1
- result[count] = left[i]
- count += 1
- i += 1
- break
- lval = left[i]
- rval = right[j]
- if lval == rval:
- lindexer[count] = i
- rindexer[count] = j
- result[count] = lval
- count += 1
- if i < nleft - 1:
- if j < nright - 1 and right[j + 1] == rval:
- j += 1
- else:
- i += 1
- if left[i] != rval:
- j += 1
- elif j < nright - 1:
- j += 1
- if lval != right[j]:
- i += 1
- else:
- # end of the road
- break
- elif lval < rval:
- # i.e. lval not in right; we keep for outer_join_indexer
- lindexer[count] = i
- rindexer[count] = -1
- result[count] = lval
- count += 1
- i += 1
- else:
- # i.e. rval not in left; we keep for outer_join_indexer
- lindexer[count] = -1
- rindexer[count] = j
- result[count] = rval
- count += 1
- j += 1
- return result, lindexer, rindexer
- # ----------------------------------------------------------------------
- # asof_join_by
- # ----------------------------------------------------------------------
- from pandas._libs.hashtable cimport (
- HashTable,
- Int64HashTable,
- PyObjectHashTable,
- UInt64HashTable,
- )
- ctypedef fused by_t:
- object
- int64_t
- uint64_t
- def asof_join_backward_on_X_by_Y(numeric_t[:] left_values,
- numeric_t[:] right_values,
- by_t[:] left_by_values,
- by_t[:] right_by_values,
- bint allow_exact_matches=True,
- tolerance=None,
- bint use_hashtable=True):
- cdef:
- Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
- ndarray[intp_t] left_indexer, right_indexer
- bint has_tolerance = False
- numeric_t tolerance_ = 0
- numeric_t diff = 0
- HashTable hash_table
- by_t by_value
- # if we are using tolerance, set our objects
- if tolerance is not None:
- has_tolerance = True
- tolerance_ = tolerance
- left_size = len(left_values)
- right_size = len(right_values)
- left_indexer = np.empty(left_size, dtype=np.intp)
- right_indexer = np.empty(left_size, dtype=np.intp)
- if use_hashtable:
- if by_t is object:
- hash_table = PyObjectHashTable(right_size)
- elif by_t is int64_t:
- hash_table = Int64HashTable(right_size)
- elif by_t is uint64_t:
- hash_table = UInt64HashTable(right_size)
- right_pos = 0
- for left_pos in range(left_size):
- # restart right_pos if it went negative in a previous iteration
- if right_pos < 0:
- right_pos = 0
- # find last position in right whose value is less than left's
- if allow_exact_matches:
- while (right_pos < right_size and
- right_values[right_pos] <= left_values[left_pos]):
- if use_hashtable:
- hash_table.set_item(right_by_values[right_pos], right_pos)
- right_pos += 1
- else:
- while (right_pos < right_size and
- right_values[right_pos] < left_values[left_pos]):
- if use_hashtable:
- hash_table.set_item(right_by_values[right_pos], right_pos)
- right_pos += 1
- right_pos -= 1
- # save positions as the desired index
- if use_hashtable:
- by_value = left_by_values[left_pos]
- found_right_pos = (hash_table.get_item(by_value)
- if by_value in hash_table else -1)
- else:
- found_right_pos = right_pos
- left_indexer[left_pos] = left_pos
- right_indexer[left_pos] = found_right_pos
- # if needed, verify that tolerance is met
- if has_tolerance and found_right_pos != -1:
- diff = left_values[left_pos] - right_values[found_right_pos]
- if diff > tolerance_:
- right_indexer[left_pos] = -1
- return left_indexer, right_indexer
- def asof_join_forward_on_X_by_Y(numeric_t[:] left_values,
- numeric_t[:] right_values,
- by_t[:] left_by_values,
- by_t[:] right_by_values,
- bint allow_exact_matches=1,
- tolerance=None,
- bint use_hashtable=True):
- cdef:
- Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
- ndarray[intp_t] left_indexer, right_indexer
- bint has_tolerance = False
- numeric_t tolerance_ = 0
- numeric_t diff = 0
- HashTable hash_table
- by_t by_value
- # if we are using tolerance, set our objects
- if tolerance is not None:
- has_tolerance = True
- tolerance_ = tolerance
- left_size = len(left_values)
- right_size = len(right_values)
- left_indexer = np.empty(left_size, dtype=np.intp)
- right_indexer = np.empty(left_size, dtype=np.intp)
- if use_hashtable:
- if by_t is object:
- hash_table = PyObjectHashTable(right_size)
- elif by_t is int64_t:
- hash_table = Int64HashTable(right_size)
- elif by_t is uint64_t:
- hash_table = UInt64HashTable(right_size)
- right_pos = right_size - 1
- for left_pos in range(left_size - 1, -1, -1):
- # restart right_pos if it went over in a previous iteration
- if right_pos == right_size:
- right_pos = right_size - 1
- # find first position in right whose value is greater than left's
- if allow_exact_matches:
- while (right_pos >= 0 and
- right_values[right_pos] >= left_values[left_pos]):
- if use_hashtable:
- hash_table.set_item(right_by_values[right_pos], right_pos)
- right_pos -= 1
- else:
- while (right_pos >= 0 and
- right_values[right_pos] > left_values[left_pos]):
- if use_hashtable:
- hash_table.set_item(right_by_values[right_pos], right_pos)
- right_pos -= 1
- right_pos += 1
- # save positions as the desired index
- if use_hashtable:
- by_value = left_by_values[left_pos]
- found_right_pos = (hash_table.get_item(by_value)
- if by_value in hash_table else -1)
- else:
- found_right_pos = (right_pos
- if right_pos != right_size else -1)
- left_indexer[left_pos] = left_pos
- right_indexer[left_pos] = found_right_pos
- # if needed, verify that tolerance is met
- if has_tolerance and found_right_pos != -1:
- diff = right_values[found_right_pos] - left_values[left_pos]
- if diff > tolerance_:
- right_indexer[left_pos] = -1
- return left_indexer, right_indexer
- def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values,
- ndarray[numeric_t] right_values,
- ndarray[by_t] left_by_values,
- ndarray[by_t] right_by_values,
- bint allow_exact_matches=True,
- tolerance=None,
- bint use_hashtable=True):
- cdef:
- ndarray[intp_t] bli, bri, fli, fri
- ndarray[intp_t] left_indexer, right_indexer
- Py_ssize_t left_size, i
- numeric_t bdiff, fdiff
- # search both forward and backward
- # TODO(cython3):
- # Bug in beta1 preventing Cython from choosing
- # right specialization when one fused memview is None
- # Doesn't matter what type we choose
- # (nothing happens anyways since it is None)
- # GH 51640
- if left_by_values is not None and left_by_values.dtype != object:
- by_dtype = f"{left_by_values.dtype}_t"
- else:
- by_dtype = object
- bli, bri = asof_join_backward_on_X_by_Y[f"{left_values.dtype}_t", by_dtype](
- left_values,
- right_values,
- left_by_values,
- right_by_values,
- allow_exact_matches,
- tolerance,
- use_hashtable
- )
- fli, fri = asof_join_forward_on_X_by_Y[f"{left_values.dtype}_t", by_dtype](
- left_values,
- right_values,
- left_by_values,
- right_by_values,
- allow_exact_matches,
- tolerance,
- use_hashtable
- )
- # choose the smaller timestamp
- left_size = len(left_values)
- left_indexer = np.empty(left_size, dtype=np.intp)
- right_indexer = np.empty(left_size, dtype=np.intp)
- for i in range(len(bri)):
- # choose timestamp from right with smaller difference
- if bri[i] != -1 and fri[i] != -1:
- bdiff = left_values[bli[i]] - right_values[bri[i]]
- fdiff = right_values[fri[i]] - left_values[fli[i]]
- right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i]
- else:
- right_indexer[i] = bri[i] if bri[i] != -1 else fri[i]
- left_indexer[i] = bli[i]
- return left_indexer, right_indexer
|