Browse Source

[Backport #14723] MAINT: Ignore .pxi files

All `.pxi` files have the warning to not edit directly, so why not
have Git ignore any changes made there?    In addition, this PR
deletes all of the `.pxi` files from the codebase because they are
automatically generated by the `.pxi.in` files

Author: gfyoung <gfyoung17@gmail.com>

Closes #14723 from gfyoung/gitignore-pxi-ignore and squashes the following commits:

fe0c410 [gfyoung] MAINT: Ignore .pxi files

(cherry picked from commit ee108164ee)
pull/15648/head
gfyoung 6 years ago committed by Joris Van den Bossche
parent
commit
f30742feaa
  1. 1
      .gitignore
  2. 2764
      pandas/src/algos_common_helper.pxi
  3. 1375
      pandas/src/algos_groupby_helper.pxi
  4. 4949
      pandas/src/algos_take_helper.pxi
  5. 860
      pandas/src/hashtable_class_helper.pxi
  6. 197
      pandas/src/hashtable_func_helper.pxi
  7. 1899
      pandas/src/join_helper.pxi
  8. 373
      pandas/src/joins_func_helper.pxi
  9. 5864
      pandas/src/sparse_op_helper.pxi

1
.gitignore vendored

@ -27,6 +27,7 @@
*.class
*.dll
*.exe
*.pxi
*.o
*.py[ocd]
*.so

2764
pandas/src/algos_common_helper.pxi

@ -1,2764 +0,0 @@
"""
Template for each `dtype` helper function using 1-d template
# 1-d template
- map_indices
- pad
- pad_1d
- pad_2d
- backfill
- backfill_1d
- backfill_2d
- is_monotonic
- arrmap
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""
#----------------------------------------------------------------------
# 1-d template
#----------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices_float64(ndarray[float64_t] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
Example:
array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
length = len(index)
for i in range(length):
result[index[i]] = i
return result
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_float64(ndarray[float64_t] old, ndarray[float64_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef float64_t cur, next
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
i = j = 0
cur = old[0]
while j <= nright - 1 and new[j] < cur:
j += 1
while True:
if j == nright:
break
if i == nleft - 1:
while j < nright:
if new[j] == cur:
indexer[j] = i
elif new[j] > cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
break
next = old[i + 1]
while j < nright and cur <= new[j] < next:
if new[j] == cur:
indexer[j] = i
elif fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
fill_count = 0
i += 1
cur = next
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace_float64(ndarray[float64_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef float64_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[0]
for i in range(N):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace_float64(ndarray[float64_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef float64_t val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
"""
Backfilling logic for generating fill vector
Diagram of what's going on
Old New Fill vector Mask
. 0 1
. 0 1
. 0 1
A A 0 1
. 1 1
. 1 1
. 1 1
. 1 1
. 1 1
B B 1 1
. 2 1
. 2 1
. 2 1
C C 2 1
. 0
. 0
D
"""
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_float64(ndarray[float64_t] old, ndarray[float64_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef float64_t cur, prev
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
i = nleft - 1
j = nright - 1
cur = old[nleft - 1]
while j >= 0 and new[j] > cur:
j -= 1
while True:
if j < 0:
break
if i == 0:
while j >= 0:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
break
prev = old[i - 1]
while j >= 0 and prev < new[j] <= cur:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
fill_count = 0
i -= 1
cur = prev
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace_float64(ndarray[float64_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef float64_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[N - 1]
for i in range(N - 1, -1, -1):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace_float64(ndarray[float64_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef float64_t val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, N - 1]
for i in range(N - 1, -1, -1):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_float64(ndarray[float64_t] arr, bint timelike):
"""
Returns
-------
is_monotonic_inc, is_monotonic_dec, is_unique
"""
cdef:
Py_ssize_t i, n
float64_t prev, cur
bint is_monotonic_inc = 1
bint is_monotonic_dec = 1
bint is_unique = 1
n = len(arr)
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True
if timelike and arr[0] == iNaT:
return False, False, True
with nogil:
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if cur < prev:
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if not is_monotonic_inc and not is_monotonic_dec:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
prev = cur
return is_monotonic_inc, is_monotonic_dec, \
is_unique and (is_monotonic_inc or is_monotonic_dec)
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_float64(ndarray[float64_t] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0
cdef ndarray[object] result = np.empty(length, dtype=np.object_)
from pandas.lib import maybe_convert_objects
for i in range(length):
result[i] = func(index[i])
return maybe_convert_objects(result)
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices_float32(ndarray[float32_t] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
Example:
array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
length = len(index)
for i in range(length):
result[index[i]] = i
return result
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_float32(ndarray[float32_t] old, ndarray[float32_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef float32_t cur, next
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
i = j = 0
cur = old[0]
while j <= nright - 1 and new[j] < cur:
j += 1
while True:
if j == nright:
break
if i == nleft - 1:
while j < nright:
if new[j] == cur:
indexer[j] = i
elif new[j] > cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
break
next = old[i + 1]
while j < nright and cur <= new[j] < next:
if new[j] == cur:
indexer[j] = i
elif fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
fill_count = 0
i += 1
cur = next
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace_float32(ndarray[float32_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef float32_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[0]
for i in range(N):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace_float32(ndarray[float32_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef float32_t val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
"""
Backfilling logic for generating fill vector
Diagram of what's going on
Old New Fill vector Mask
. 0 1
. 0 1
. 0 1
A A 0 1
. 1 1
. 1 1
. 1 1
. 1 1
. 1 1
B B 1 1
. 2 1
. 2 1
. 2 1
C C 2 1
. 0
. 0
D
"""
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_float32(ndarray[float32_t] old, ndarray[float32_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef float32_t cur, prev
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
i = nleft - 1
j = nright - 1
cur = old[nleft - 1]
while j >= 0 and new[j] > cur:
j -= 1
while True:
if j < 0:
break
if i == 0:
while j >= 0:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
break
prev = old[i - 1]
while j >= 0 and prev < new[j] <= cur:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
fill_count = 0
i -= 1
cur = prev
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace_float32(ndarray[float32_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef float32_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[N - 1]
for i in range(N - 1, -1, -1):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace_float32(ndarray[float32_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef float32_t val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, N - 1]
for i in range(N - 1, -1, -1):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_float32(ndarray[float32_t] arr, bint timelike):
"""
Returns
-------
is_monotonic_inc, is_monotonic_dec, is_unique
"""
cdef:
Py_ssize_t i, n
float32_t prev, cur
bint is_monotonic_inc = 1
bint is_monotonic_dec = 1
bint is_unique = 1
n = len(arr)
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True
if timelike and arr[0] == iNaT:
return False, False, True
with nogil:
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if cur < prev:
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if not is_monotonic_inc and not is_monotonic_dec:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
prev = cur
return is_monotonic_inc, is_monotonic_dec, \
is_unique and (is_monotonic_inc or is_monotonic_dec)
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_float32(ndarray[float32_t] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0
cdef ndarray[object] result = np.empty(length, dtype=np.object_)
from pandas.lib import maybe_convert_objects
for i in range(length):
result[i] = func(index[i])
return maybe_convert_objects(result)
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices_object(ndarray[object] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
Example:
array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
length = len(index)
for i in range(length):
result[index[i]] = i
return result
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_object(ndarray[object] old, ndarray[object] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef object cur, next
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
i = j = 0
cur = old[0]
while j <= nright - 1 and new[j] < cur:
j += 1
while True:
if j == nright:
break
if i == nleft - 1:
while j < nright:
if new[j] == cur:
indexer[j] = i
elif new[j] > cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
break
next = old[i + 1]
while j < nright and cur <= new[j] < next:
if new[j] == cur:
indexer[j] = i
elif fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
fill_count = 0
i += 1
cur = next
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace_object(ndarray[object] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef object val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[0]
for i in range(N):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace_object(ndarray[object, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef object val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
"""
Backfilling logic for generating fill vector
Diagram of what's going on
Old New Fill vector Mask
. 0 1
. 0 1
. 0 1
A A 0 1
. 1 1
. 1 1
. 1 1
. 1 1
. 1 1
B B 1 1
. 2 1
. 2 1
. 2 1
C C 2 1
. 0
. 0
D
"""
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_object(ndarray[object] old, ndarray[object] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef object cur, prev
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
i = nleft - 1
j = nright - 1
cur = old[nleft - 1]
while j >= 0 and new[j] > cur:
j -= 1
while True:
if j < 0:
break
if i == 0:
while j >= 0:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
break
prev = old[i - 1]
while j >= 0 and prev < new[j] <= cur:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
fill_count = 0
i -= 1
cur = prev
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace_object(ndarray[object] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef object val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[N - 1]
for i in range(N - 1, -1, -1):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace_object(ndarray[object, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef object val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, N - 1]
for i in range(N - 1, -1, -1):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_object(ndarray[object] arr, bint timelike):
"""
Returns
-------
is_monotonic_inc, is_monotonic_dec, is_unique
"""
cdef:
Py_ssize_t i, n
object prev, cur
bint is_monotonic_inc = 1
bint is_monotonic_dec = 1
bint is_unique = 1
n = len(arr)
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True
if timelike and arr[0] == iNaT:
return False, False, True
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if cur < prev:
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
is_monotonic_inc = 0
is_monotonic_dec = 0
break
if not is_monotonic_inc and not is_monotonic_dec:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
prev = cur
return is_monotonic_inc, is_monotonic_dec, \
is_unique and (is_monotonic_inc or is_monotonic_dec)
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_object(ndarray[object] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0
cdef ndarray[object] result = np.empty(length, dtype=np.object_)
from pandas.lib import maybe_convert_objects
for i in range(length):
result[i] = func(index[i])
return maybe_convert_objects(result)
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef map_indices_int32(ndarray[int32_t] index):
"""
Produce a dict mapping the values of the input array to their respective
locations.
Example:
array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
length = len(index)
for i in range(length):
result[index[i]] = i
return result
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_int32(ndarray[int32_t] old, ndarray[int32_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef int32_t cur, next
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
i = j = 0
cur = old[0]
while j <= nright - 1 and new[j] < cur:
j += 1
while True:
if j == nright:
break
if i == nleft - 1:
while j < nright:
if new[j] == cur:
indexer[j] = i
elif new[j] > cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
break
next = old[i + 1]
while j < nright and cur <= new[j] < next:
if new[j] == cur:
indexer[j] = i
elif fill_count < lim:
indexer[j] = i
fill_count += 1
j += 1
fill_count = 0
i += 1
cur = next
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace_int32(ndarray[int32_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef int32_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[0]
for i in range(N):
if mask[i]:
if fill_count >= lim:
continue
fill_count += 1
values[i] = val
else:
fill_count = 0
val = values[i]
@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace_int32(ndarray[int32_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef int32_t val
cdef int lim, fill_count = 0
K, N = (<object> values).shape
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
for j in range(K):
fill_count = 0
val = values[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= lim:
continue
fill_count += 1
values[j, i] = val
else:
fill_count = 0
val = values[j, i]
"""
Backfilling logic for generating fill vector
Diagram of what's going on
Old New Fill vector Mask
. 0 1
. 0 1
. 0 1
A A 0 1
. 1 1
. 1 1
. 1 1
. 1 1
. 1 1
B B 1 1
. 2 1
. 2 1
. 2 1
C C 2 1
. 0
. 0
D
"""
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_int32(ndarray[int32_t] old, ndarray[int32_t] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef int32_t cur, prev
cdef int lim, fill_count = 0
nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer.fill(-1)
if limit is None:
lim = nright
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
i = nleft - 1
j = nright - 1
cur = old[nleft - 1]
while j >= 0 and new[j] > cur:
j -= 1
while True:
if j < 0:
break
if i == 0:
while j >= 0:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
break
prev = old[i - 1]
while j >= 0 and prev < new[j] <= cur:
if new[j] == cur:
indexer[j] = i
elif new[j] < cur and fill_count < lim:
indexer[j] = i
fill_count += 1
j -= 1
fill_count = 0
i -= 1
cur = prev
return indexer
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace_int32(ndarray[int32_t] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef int32_t val
cdef int lim, fill_count = 0
N = len(values)
# GH 2778
if N == 0:
return
if limit is None:
lim = N
else:
if limit < 0:
raise ValueError('Limit must be non-negative')
lim = limit
val = values[N - 1]