SRI-DYZBC2
/
Vehicle-cpp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
							cimport cython
import numpy as np

cimport numpy as cnp
from numpy cimport (
    float64_t,
    int8_t,
    int32_t,
    int64_t,
    ndarray,
    uint8_t,
)

cnp.import_array()


# -----------------------------------------------------------------------------
# Preamble stuff

cdef float64_t NaN = <float64_t>np.NaN
cdef float64_t INF = <float64_t>np.inf

# -----------------------------------------------------------------------------


cdef class SparseIndex:
    """
    Abstract superclass for sparse index types.
    """

    def __init__(self):
        raise NotImplementedError


cdef class IntIndex(SparseIndex):
    """
    Object for holding exact integer sparse indexing information

    Parameters
    ----------
    length : integer
    indices : array-like
        Contains integers corresponding to the indices.
    check_integrity : bool, default=True
        Check integrity of the input.
    """

    cdef readonly:
        Py_ssize_t length, npoints
        ndarray indices

    def __init__(self, Py_ssize_t length, indices, bint check_integrity=True):
        self.length = length
        self.indices = np.ascontiguousarray(indices, dtype=np.int32)
        self.npoints = len(self.indices)

        if check_integrity:
            self.check_integrity()

    def __reduce__(self):
        args = (self.length, self.indices)
        return IntIndex, args

    def __repr__(self) -> str:
        output = "IntIndex\n"
        output += f"Indices: {repr(self.indices)}\n"
        return output

    @property
    def nbytes(self) -> int:
        return self.indices.nbytes

    cdef check_integrity(self):
        """
        Checks the following:

        - Indices are strictly ascending
        - Number of indices is at most self.length
        - Indices are at least 0 and at most the total length less one

        A ValueError is raised if any of these conditions is violated.
        """

        if self.npoints > self.length:
            raise ValueError(
                f"Too many indices. Expected {self.length} but found {self.npoints}"
            )

        # Indices are vacuously ordered and non-negative
        # if the sequence of indices is empty.
        if self.npoints == 0:
            return

        if self.indices.min() < 0:
            raise ValueError("No index can be less than zero")

        if self.indices.max() >= self.length:
            raise ValueError("All indices must be less than the length")

        monotonic = np.all(self.indices[:-1] < self.indices[1:])
        if not monotonic:
            raise ValueError("Indices must be strictly increasing")

    def equals(self, other: object) -> bool:
        if not isinstance(other, IntIndex):
            return False

        if self is other:
            return True

        same_length = self.length == other.length
        same_indices = np.array_equal(self.indices, other.indices)
        return same_length and same_indices

    @property
    def ngaps(self) -> int:
        return self.length - self.npoints

    cpdef to_int_index(self):
        return self

    def to_block_index(self):
        locs, lens = get_blocks(self.indices)
        return BlockIndex(self.length, locs, lens)

    cpdef IntIndex intersect(self, SparseIndex y_):
        cdef:
            Py_ssize_t xi, yi = 0, result_indexer = 0
            int32_t xind
            ndarray[int32_t, ndim=1] xindices, yindices, new_indices
            IntIndex y

        # if is one already, returns self
        y = y_.to_int_index()

        if self.length != y.length:
            raise Exception("Indices must reference same underlying length")

        xindices = self.indices
        yindices = y.indices
        new_indices = np.empty(min(
            len(xindices), len(yindices)), dtype=np.int32)

        for xi in range(self.npoints):
            xind = xindices[xi]

            while yi < y.npoints and yindices[yi] < xind:
                yi += 1

            if yi >= y.npoints:
                break

            # TODO: would a two-pass algorithm be faster?
            if yindices[yi] == xind:
                new_indices[result_indexer] = xind
                result_indexer += 1

        new_indices = new_indices[:result_indexer]
        return IntIndex(self.length, new_indices)

    cpdef IntIndex make_union(self, SparseIndex y_):

        cdef:
            ndarray[int32_t, ndim=1] new_indices
            IntIndex y

        # if is one already, returns self
        y = y_.to_int_index()

        if self.length != y.length:
            raise ValueError("Indices must reference same underlying length")

        new_indices = np.union1d(self.indices, y.indices)
        return IntIndex(self.length, new_indices)

    @cython.wraparound(False)
    cpdef int32_t lookup(self, Py_ssize_t index):
        """
        Return the internal location if value exists on given index.
        Return -1 otherwise.
        """
        cdef:
            int32_t res
            ndarray[int32_t, ndim=1] inds

        inds = self.indices
        if self.npoints == 0:
            return -1
        elif index < 0 or self.length <= index:
            return -1

        res = inds.searchsorted(index)
        if res == self.npoints:
            return -1
        elif inds[res] == index:
            return res
        else:
            return -1

    @cython.wraparound(False)
    cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
        """
        Vectorized lookup, returns ndarray[int32_t]
        """
        cdef:
            Py_ssize_t n
            ndarray[int32_t, ndim=1] inds
            ndarray[uint8_t, ndim=1, cast=True] mask
            ndarray[int32_t, ndim=1] masked
            ndarray[int32_t, ndim=1] res
            ndarray[int32_t, ndim=1] results

        n = len(indexer)
        results = np.empty(n, dtype=np.int32)
        results[:] = -1

        if self.npoints == 0:
            return results

        inds = self.indices
        mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1])

        masked = indexer[mask]
        res = inds.searchsorted(masked).astype(np.int32)

        res[inds[res] != masked] = -1
        results[mask] = res
        return results


cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
    cdef:
        Py_ssize_t i, npoints, result_indexer = 0
        int32_t block, length = 1, cur, prev
        ndarray[int32_t, ndim=1] locs, lens

    npoints = len(indices)

    # just handle the special empty case separately
    if npoints == 0:
        return np.array([], dtype=np.int32), np.array([], dtype=np.int32)

    # block size can't be longer than npoints
    locs = np.empty(npoints, dtype=np.int32)
    lens = np.empty(npoints, dtype=np.int32)

    # TODO: two-pass algorithm faster?
    prev = block = indices[0]
    for i in range(1, npoints):
        cur = indices[i]
        if cur - prev > 1:
            # new block
            locs[result_indexer] = block
            lens[result_indexer] = length
            block = cur
            length = 1
            result_indexer += 1
        else:
            # same block, increment length
            length += 1

        prev = cur

    locs[result_indexer] = block
    lens[result_indexer] = length
    result_indexer += 1
    locs = locs[:result_indexer]
    lens = lens[:result_indexer]
    return locs, lens


# -----------------------------------------------------------------------------
# BlockIndex

cdef class BlockIndex(SparseIndex):
    """
    Object for holding block-based sparse indexing information

    Parameters
    ----------
    """
    cdef readonly:
        int32_t nblocks, npoints, length
        ndarray blocs, blengths

    cdef:
        object __weakref__  # need to be picklable
        int32_t *locbuf
        int32_t *lenbuf

    def __init__(self, length, blocs, blengths):

        self.blocs = np.ascontiguousarray(blocs, dtype=np.int32)
        self.blengths = np.ascontiguousarray(blengths, dtype=np.int32)

        # in case we need
        self.locbuf = <int32_t*>self.blocs.data
        self.lenbuf = <int32_t*>self.blengths.data

        self.length = length
        self.nblocks = np.int32(len(self.blocs))
        self.npoints = self.blengths.sum()

        self.check_integrity()

    def __reduce__(self):
        args = (self.length, self.blocs, self.blengths)
        return BlockIndex, args

    def __repr__(self) -> str:
        output = "BlockIndex\n"
        output += f"Block locations: {repr(self.blocs)}\n"
        output += f"Block lengths: {repr(self.blengths)}"

        return output

    @property
    def nbytes(self) -> int:
        return self.blocs.nbytes + self.blengths.nbytes

    @property
    def ngaps(self) -> int:
        return self.length - self.npoints

    cdef check_integrity(self):
        """
        Check:
        - Locations are in ascending order
        - No overlapping blocks
        - Blocks to not start after end of index, nor extend beyond end
        """
        cdef:
            Py_ssize_t i
            ndarray[int32_t, ndim=1] blocs, blengths

        blocs = self.blocs
        blengths = self.blengths

        if len(blocs) != len(blengths):
            raise ValueError("block bound arrays must be same length")

        for i in range(self.nblocks):
            if i > 0:
                if blocs[i] <= blocs[i - 1]:
                    raise ValueError("Locations not in ascending order")

            if i < self.nblocks - 1:
                if blocs[i] + blengths[i] > blocs[i + 1]:
                    raise ValueError(f"Block {i} overlaps")
            else:
                if blocs[i] + blengths[i] > self.length:
                    raise ValueError(f"Block {i} extends beyond end")

            # no zero-length blocks
            if blengths[i] == 0:
                raise ValueError(f"Zero-length block {i}")

    def equals(self, other: object) -> bool:
        if not isinstance(other, BlockIndex):
            return False

        if self is other:
            return True

        same_length = self.length == other.length
        same_blocks = (np.array_equal(self.blocs, other.blocs) and
                       np.array_equal(self.blengths, other.blengths))
        return same_length and same_blocks

    def to_block_index(self):
        return self

    cpdef to_int_index(self):
        cdef:
            int32_t i = 0, j, b
            int32_t offset
            ndarray[int32_t, ndim=1] indices

        indices = np.empty(self.npoints, dtype=np.int32)

        for b in range(self.nblocks):
            offset = self.locbuf[b]

            for j in range(self.lenbuf[b]):
                indices[i] = offset + j
                i += 1

        return IntIndex(self.length, indices)

    @property
    def indices(self):
        return self.to_int_index().indices

    cpdef BlockIndex intersect(self, SparseIndex other):
        """
        Intersect two BlockIndex objects

        Returns
        -------
        BlockIndex
        """
        cdef:
            BlockIndex y
            ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen
            Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0
            int32_t cur_loc, cur_length, diff

        y = other.to_block_index()

        if self.length != y.length:
            raise Exception("Indices must reference same underlying length")

        xloc = self.blocs
        xlen = self.blengths
        yloc = y.blocs
        ylen = y.blengths

        # block may be split, but can't exceed original len / 2 + 1
        max_len = min(self.length, y.length) // 2 + 1
        out_bloc = np.empty(max_len, dtype=np.int32)
        out_blen = np.empty(max_len, dtype=np.int32)

        while True:
            # we are done (or possibly never began)
            if xi >= self.nblocks or yi >= y.nblocks:
                break

            # completely symmetric...would like to avoid code dup but oh well
            if xloc[xi] >= yloc[yi]:
                cur_loc = xloc[xi]
                diff = xloc[xi] - yloc[yi]

                if ylen[yi] <= diff:
                    # have to skip this block
                    yi += 1
                    continue

                if ylen[yi] - diff < xlen[xi]:
                    # take end of y block, move onward
                    cur_length = ylen[yi] - diff
                    yi += 1
                else:
                    # take end of x block
                    cur_length = xlen[xi]
                    xi += 1

            else:  # xloc[xi] < yloc[yi]
                cur_loc = yloc[yi]
                diff = yloc[yi] - xloc[xi]

                if xlen[xi] <= diff:
                    # have to skip this block
                    xi += 1
                    continue

                if xlen[xi] - diff < ylen[yi]:
                    # take end of x block, move onward
                    cur_length = xlen[xi] - diff
                    xi += 1
                else:
                    # take end of y block
                    cur_length = ylen[yi]
                    yi += 1

            out_bloc[result_indexer] = cur_loc
            out_blen[result_indexer] = cur_length
            result_indexer += 1

        out_bloc = out_bloc[:result_indexer]
        out_blen = out_blen[:result_indexer]

        return BlockIndex(self.length, out_bloc, out_blen)

    cpdef BlockIndex make_union(self, SparseIndex y):
        """
        Combine together two BlockIndex objects, accepting indices if contained
        in one or the other

        Parameters
        ----------
        other : SparseIndex

        Notes
        -----
        union is a protected keyword in Cython, hence make_union

        Returns
        -------
        BlockIndex
        """
        return BlockUnion(self, y.to_block_index()).result

    cpdef Py_ssize_t lookup(self, Py_ssize_t index):
        """
        Return the internal location if value exists on given index.
        Return -1 otherwise.
        """
        cdef:
            Py_ssize_t i, cum_len
            ndarray[int32_t, ndim=1] locs, lens

        locs = self.blocs
        lens = self.blengths

        if self.nblocks == 0:
            return -1
        elif index < locs[0]:
            return -1

        cum_len = 0
        for i in range(self.nblocks):
            if index >= locs[i] and index < locs[i] + lens[i]:
                return cum_len + index - locs[i]
            cum_len += lens[i]

        return -1

    @cython.wraparound(False)
    cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
        """
        Vectorized lookup, returns ndarray[int32_t]
        """
        cdef:
            Py_ssize_t n, i, j, ind_val
            ndarray[int32_t, ndim=1] locs, lens
            ndarray[int32_t, ndim=1] results

        locs = self.blocs
        lens = self.blengths

        n = len(indexer)
        results = np.empty(n, dtype=np.int32)
        results[:] = -1

        if self.npoints == 0:
            return results

        for i in range(n):
            ind_val = indexer[i]
            if not (ind_val < 0 or self.length <= ind_val):
                cum_len = 0
                for j in range(self.nblocks):
                    if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
                        results[i] = cum_len + ind_val - locs[j]
                    cum_len += lens[j]
        return results


@cython.internal
cdef class BlockMerge:
    """
    Object-oriented approach makes sharing state between recursive functions a
    lot easier and reduces code duplication
    """
    cdef:
        BlockIndex x, y, result
        ndarray xstart, xlen, xend, ystart, ylen, yend
        int32_t xi, yi  # block indices

    def __init__(self, BlockIndex x, BlockIndex y):
        self.x = x
        self.y = y

        if x.length != y.length:
            raise Exception("Indices must reference same underlying length")

        self.xstart = self.x.blocs
        self.ystart = self.y.blocs

        self.xend = self.x.blocs + self.x.blengths
        self.yend = self.y.blocs + self.y.blengths

        # self.xlen = self.x.blengths
        # self.ylen = self.y.blengths

        self.xi = 0
        self.yi = 0

        self.result = self._make_merged_blocks()

    cdef _make_merged_blocks(self):
        raise NotImplementedError

    cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode):
        if mode == 0:
            self.xi = xi
            self.yi = yi
        else:
            self.xi = yi
            self.yi = xi


@cython.internal
cdef class BlockUnion(BlockMerge):
    """
    Object-oriented approach makes sharing state between recursive functions a
    lot easier and reduces code duplication
    """

    cdef _make_merged_blocks(self):
        cdef:
            ndarray[int32_t, ndim=1] xstart, xend, ystart
            ndarray[int32_t, ndim=1] yend, out_bloc, out_blen
            int32_t nstart, nend
            Py_ssize_t max_len, result_indexer = 0

        xstart = self.xstart
        xend = self.xend
        ystart = self.ystart
        yend = self.yend

        max_len = min(self.x.length, self.y.length) // 2 + 1
        out_bloc = np.empty(max_len, dtype=np.int32)
        out_blen = np.empty(max_len, dtype=np.int32)

        while True:
            # we are done (or possibly never began)
            if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks:
                break
            elif self.yi >= self.y.nblocks:
                # through with y, just pass through x blocks
                nstart = xstart[self.xi]
                nend = xend[self.xi]
                self.xi += 1
            elif self.xi >= self.x.nblocks:
                # through with x, just pass through y blocks
                nstart = ystart[self.yi]
                nend = yend[self.yi]
                self.yi += 1
            else:
                # find end of new block
                if xstart[self.xi] < ystart[self.yi]:
                    nstart = xstart[self.xi]
                    nend = self._find_next_block_end(0)
                else:
                    nstart = ystart[self.yi]
                    nend = self._find_next_block_end(1)

            out_bloc[result_indexer] = nstart
            out_blen[result_indexer] = nend - nstart
            result_indexer += 1

        out_bloc = out_bloc[:result_indexer]
        out_blen = out_blen[:result_indexer]

        return BlockIndex(self.x.length, out_bloc, out_blen)

    cdef int32_t _find_next_block_end(self, bint mode) except -1:
        """
        Wow, this got complicated in a hurry

        mode 0: block started in index x
        mode 1: block started in index y
        """
        cdef:
            ndarray[int32_t, ndim=1] xstart, xend, ystart, yend
            int32_t xi, yi, ynblocks, nend

        if mode != 0 and mode != 1:
            raise Exception("Mode must be 0 or 1")

        # so symmetric code will work
        if mode == 0:
            xstart = self.xstart
            xend = self.xend
            xi = self.xi

            ystart = self.ystart
            yend = self.yend
            yi = self.yi
            ynblocks = self.y.nblocks
        else:
            xstart = self.ystart
            xend = self.yend
            xi = self.yi

            ystart = self.xstart
            yend = self.xend
            yi = self.xi
            ynblocks = self.x.nblocks

        nend = xend[xi]

        # done with y?
        if yi == ynblocks:
            self._set_current_indices(xi + 1, yi, mode)
            return nend
        elif nend < ystart[yi]:
            # block ends before y block
            self._set_current_indices(xi + 1, yi, mode)
            return nend
        else:
            while yi < ynblocks and nend > yend[yi]:
                yi += 1

            self._set_current_indices(xi + 1, yi, mode)

            if yi == ynblocks:
                return nend

            if nend < ystart[yi]:
                # we're done, return the block end
                return nend
            else:
                # merge blocks, continue searching
                # this also catches the case where blocks
                return self._find_next_block_end(1 - mode)


# -----------------------------------------------------------------------------
# Sparse arithmetic

include "sparse_op_helper.pxi"


# -----------------------------------------------------------------------------
# SparseArray mask create operations

def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
    cdef:
        object value
        Py_ssize_t i
        Py_ssize_t new_length = len(arr)
        ndarray[int8_t, ndim=1] mask

    mask = np.ones(new_length, dtype=np.int8)

    for i in range(new_length):
        value = arr[i]
        if value == fill_value and type(value) == type(fill_value):
            mask[i] = 0

    return mask.view(dtype=bool)