merge_asof() has type specializations and can take multiple 'by' parameters (#13936)
This commit is contained in:
parent
3552dc0c45
commit
75157fcbc0
|
@ -310,6 +310,25 @@ class merge_asof_noby(object):
|
|||
merge_asof(self.df1, self.df2, on='time')
|
||||
|
||||
|
||||
class merge_asof_int32_noby(object):
|
||||
|
||||
def setup(self):
|
||||
np.random.seed(0)
|
||||
one_count = 200000
|
||||
two_count = 1000000
|
||||
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
|
||||
'value1': np.random.randn(one_count)})
|
||||
self.df1.time = np.int32(self.df1.time)
|
||||
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
|
||||
'value2': np.random.randn(two_count)})
|
||||
self.df2.time = np.int32(self.df2.time)
|
||||
self.df1 = self.df1.sort_values('time')
|
||||
self.df2 = self.df2.sort_values('time')
|
||||
|
||||
def time_merge_asof_int32_noby(self):
|
||||
merge_asof(self.df1, self.df2, on='time')
|
||||
|
||||
|
||||
class merge_asof_by_object(object):
|
||||
|
||||
def setup(self):
|
||||
|
@ -318,10 +337,10 @@ class merge_asof_by_object(object):
|
|||
one_count = 200000
|
||||
two_count = 1000000
|
||||
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
|
||||
'key': np.random.choice(list(string.uppercase), one_count),
|
||||
'key': np.random.choice(list(string.ascii_uppercase), one_count),
|
||||
'value1': np.random.randn(one_count)})
|
||||
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
|
||||
'key': np.random.choice(list(string.uppercase), two_count),
|
||||
'key': np.random.choice(list(string.ascii_uppercase), two_count),
|
||||
'value2': np.random.randn(two_count)})
|
||||
self.df1 = self.df1.sort_values('time')
|
||||
self.df2 = self.df2.sort_values('time')
|
||||
|
@ -349,6 +368,28 @@ class merge_asof_by_int(object):
|
|||
merge_asof(self.df1, self.df2, on='time', by='key')
|
||||
|
||||
|
||||
class merge_asof_multiby(object):
|
||||
|
||||
def setup(self):
|
||||
import string
|
||||
np.random.seed(0)
|
||||
one_count = 200000
|
||||
two_count = 1000000
|
||||
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
|
||||
'key1': np.random.choice(list(string.ascii_uppercase), one_count),
|
||||
'key2': np.random.choice(list(string.ascii_uppercase), one_count),
|
||||
'value1': np.random.randn(one_count)})
|
||||
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
|
||||
'key1': np.random.choice(list(string.ascii_uppercase), two_count),
|
||||
'key2': np.random.choice(list(string.ascii_uppercase), two_count),
|
||||
'value2': np.random.randn(two_count)})
|
||||
self.df1 = self.df1.sort_values('time')
|
||||
self.df2 = self.df2.sort_values('time')
|
||||
|
||||
def time_merge_asof_multiby(self):
|
||||
merge_asof(self.df1, self.df2, on='time', by=['key1', 'key2'])
|
||||
|
||||
|
||||
class join_non_unique_equal(object):
|
||||
goal_time = 0.2
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: boundscheck=False, wraparound=False
|
||||
"""
|
||||
Template for each `dtype` helper function for hashtable
|
||||
|
||||
|
@ -12,10 +13,10 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
|||
from hashtable cimport *
|
||||
|
||||
|
||||
def asof_join_int64_t_by_object(ndarray[int64_t] left_values,
|
||||
ndarray[int64_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
def asof_join_uint8_t_by_int64_t(ndarray[uint8_t] left_values,
|
||||
ndarray[uint8_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
|
@ -23,9 +24,9 @@ def asof_join_int64_t_by_object(ndarray[int64_t] left_values,
|
|||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int64_t tolerance_
|
||||
PyObjectHashTable hash_table
|
||||
object by_value
|
||||
uint8_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
|
@ -38,7 +39,7 @@ def asof_join_int64_t_by_object(ndarray[int64_t] left_values,
|
|||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = PyObjectHashTable(right_size)
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
|
@ -75,10 +76,10 @@ def asof_join_int64_t_by_object(ndarray[int64_t] left_values,
|
|||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_double_by_object(ndarray[double] left_values,
|
||||
ndarray[double] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
def asof_join_uint16_t_by_int64_t(ndarray[uint16_t] left_values,
|
||||
ndarray[uint16_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
|
@ -86,9 +87,9 @@ def asof_join_double_by_object(ndarray[double] left_values,
|
|||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
double tolerance_
|
||||
PyObjectHashTable hash_table
|
||||
object by_value
|
||||
uint16_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
|
@ -101,7 +102,322 @@ def asof_join_double_by_object(ndarray[double] left_values,
|
|||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = PyObjectHashTable(right_size)
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint32_t_by_int64_t(ndarray[uint32_t] left_values,
|
||||
ndarray[uint32_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint32_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint64_t_by_int64_t(ndarray[uint64_t] left_values,
|
||||
ndarray[uint64_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint64_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int8_t_by_int64_t(ndarray[int8_t] left_values,
|
||||
ndarray[int8_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int8_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int16_t_by_int64_t(ndarray[int16_t] left_values,
|
||||
ndarray[int16_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int16_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int32_t_by_int64_t(ndarray[int32_t] left_values,
|
||||
ndarray[int32_t] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int32_t tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
|
@ -201,6 +517,69 @@ def asof_join_int64_t_by_int64_t(ndarray[int64_t] left_values,
|
|||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_float_by_int64_t(ndarray[float] left_values,
|
||||
ndarray[float] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
ndarray[int64_t] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
float tolerance_
|
||||
Int64HashTable hash_table
|
||||
int64_t by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = Int64HashTable(right_size)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table.set_item(right_by_values[right_pos], right_pos)
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table.get_item(by_value)\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_double_by_int64_t(ndarray[double] left_values,
|
||||
ndarray[double] right_values,
|
||||
ndarray[int64_t] left_by_values,
|
||||
|
@ -264,11 +643,1005 @@ def asof_join_double_by_int64_t(ndarray[double] left_values,
|
|||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint8_t_by_object(ndarray[uint8_t] left_values,
|
||||
ndarray[uint8_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint8_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint16_t_by_object(ndarray[uint16_t] left_values,
|
||||
ndarray[uint16_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint16_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint32_t_by_object(ndarray[uint32_t] left_values,
|
||||
ndarray[uint32_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint32_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint64_t_by_object(ndarray[uint64_t] left_values,
|
||||
ndarray[uint64_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint64_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int8_t_by_object(ndarray[int8_t] left_values,
|
||||
ndarray[int8_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int8_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int16_t_by_object(ndarray[int16_t] left_values,
|
||||
ndarray[int16_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int16_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int32_t_by_object(ndarray[int32_t] left_values,
|
||||
ndarray[int32_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int32_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int64_t_by_object(ndarray[int64_t] left_values,
|
||||
ndarray[int64_t] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int64_t tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_float_by_object(ndarray[float] left_values,
|
||||
ndarray[float] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
float tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_double_by_object(ndarray[double] left_values,
|
||||
ndarray[double] right_values,
|
||||
ndarray[object] left_by_values,
|
||||
ndarray[object] right_by_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
double tolerance_
|
||||
dict hash_table
|
||||
object by_value
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
hash_table = {}
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
hash_table[right_by_values[right_pos]] = right_pos
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
by_value = left_by_values[left_pos]
|
||||
found_right_pos = hash_table[by_value]\
|
||||
if by_value in hash_table else -1
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = found_right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and found_right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[found_right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# asof_join
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
|
||||
def asof_join_uint8_t(ndarray[uint8_t] left_values,
|
||||
ndarray[uint8_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint8_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint16_t(ndarray[uint16_t] left_values,
|
||||
ndarray[uint16_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint16_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint32_t(ndarray[uint32_t] left_values,
|
||||
ndarray[uint32_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint32_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_uint64_t(ndarray[uint64_t] left_values,
|
||||
ndarray[uint64_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
uint64_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int8_t(ndarray[int8_t] left_values,
|
||||
ndarray[int8_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int8_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int16_t(ndarray[int16_t] left_values,
|
||||
ndarray[int16_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int16_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int32_t(ndarray[int32_t] left_values,
|
||||
ndarray[int32_t] right_values,
|
||||
bint allow_exact_matches=1,
|
||||
tolerance=None):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t left_pos, right_pos, left_size, right_size
|
||||
ndarray[int64_t] left_indexer, right_indexer
|
||||
bint has_tolerance = 0
|
||||
int32_t tolerance_
|
||||
|
||||
# if we are using tolerance, set our objects
|
||||
if tolerance is not None:
|
||||
has_tolerance = 1
|
||||
tolerance_ = tolerance
|
||||
|
||||
left_size = len(left_values)
|
||||
right_size = len(right_values)
|
||||
|
||||
left_indexer = np.empty(left_size, dtype=np.int64)
|
||||
right_indexer = np.empty(left_size, dtype=np.int64)
|
||||
|
||||
right_pos = 0
|
||||
for left_pos in range(left_size):
|
||||
# restart right_pos if it went negative in a previous iteration
|
||||
if right_pos < 0:
|
||||
right_pos = 0
|
||||
|
||||
# find last position in right whose value is less than left's value
|
||||
if allow_exact_matches:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] <= left_values[left_pos]:
|
||||
right_pos += 1
|
||||
else:
|
||||
while right_pos < right_size and\
|
||||
right_values[right_pos] < left_values[left_pos]:
|
||||
right_pos += 1
|
||||
right_pos -= 1
|
||||
|
||||
# save positions as the desired index
|
||||
left_indexer[left_pos] = left_pos
|
||||
right_indexer[left_pos] = right_pos
|
||||
|
||||
# if needed, verify that tolerance is met
|
||||
if has_tolerance and right_pos != -1:
|
||||
diff = left_values[left_pos] - right_values[right_pos]
|
||||
if diff > tolerance_:
|
||||
right_indexer[left_pos] = -1
|
||||
|
||||
return left_indexer, right_indexer
|
||||
|
||||
|
||||
def asof_join_int64_t(ndarray[int64_t] left_values,
|
||||
ndarray[int64_t] right_values,
|
||||