PERF: Improve replace perf
This commit is contained in:
parent
dfeae396c8
commit
ffc59b04d2
|
@ -32,6 +32,30 @@ class replace_large_dict(object):
|
|||
self.s.replace(self.to_rep, inplace=True)
|
||||
|
||||
|
||||
class replace_convert(object):
|
||||
goal_time = 0.5
|
||||
|
||||
def setup(self):
|
||||
self.n = (10 ** 3)
|
||||
self.to_ts = dict(((i, pd.Timestamp(i)) for i in range(self.n)))
|
||||
self.to_td = dict(((i, pd.Timedelta(i)) for i in range(self.n)))
|
||||
self.s = Series(np.random.randint(self.n, size=(10 ** 3)))
|
||||
self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)),
|
||||
'B': np.random.randint(self.n, size=(10 ** 3))})
|
||||
|
||||
def time_replace_series_timestamp(self):
|
||||
self.s.replace(self.to_ts)
|
||||
|
||||
def time_replace_series_timedelta(self):
|
||||
self.s.replace(self.to_td)
|
||||
|
||||
def time_replace_frame_timestamp(self):
|
||||
self.df.replace(self.to_ts)
|
||||
|
||||
def time_replace_frame_timedelta(self):
|
||||
self.df.replace(self.to_td)
|
||||
|
||||
|
||||
class replace_replacena(object):
|
||||
goal_time = 0.2
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ Highlights include:
|
|||
Performance Improvements
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
- Improved performance of ``.replace()`` (:issue:`12745`)
|
||||
|
||||
.. _whatsnew_0192.bug_fixes:
|
||||
|
||||
|
|
|
@ -3477,20 +3477,27 @@ class NDFrame(PandasObject):
|
|||
res = self if inplace else self.copy()
|
||||
for c, src in compat.iteritems(to_replace):
|
||||
if c in value and c in self:
|
||||
# object conversion is handled in
|
||||
# series.replace which is called recursivelly
|
||||
res[c] = res[c].replace(to_replace=src,
|
||||
value=value[c],
|
||||
inplace=False, regex=regex)
|
||||
inplace=False,
|
||||
regex=regex)
|
||||
return None if inplace else res
|
||||
|
||||
# {'A': NA} -> 0
|
||||
elif not is_list_like(value):
|
||||
for k, src in compat.iteritems(to_replace):
|
||||
if k in self:
|
||||
new_data = new_data.replace(to_replace=src,
|
||||
value=value,
|
||||
filter=[k],
|
||||
inplace=inplace,
|
||||
regex=regex)
|
||||
keys = [(k, src) for k, src in compat.iteritems(to_replace)
|
||||
if k in self]
|
||||
keys_len = len(keys) - 1
|
||||
for i, (k, src) in enumerate(keys):
|
||||
convert = i == keys_len
|
||||
new_data = new_data.replace(to_replace=src,
|
||||
value=value,
|
||||
filter=[k],
|
||||
inplace=inplace,
|
||||
regex=regex,
|
||||
convert=convert)
|
||||
else:
|
||||
raise TypeError('value argument must be scalar, dict, or '
|
||||
'Series')
|
||||
|
|
|
@ -622,7 +622,6 @@ class Block(PandasObject):
|
|||
|
||||
original_to_replace = to_replace
|
||||
mask = isnull(self.values)
|
||||
|
||||
# try to replace, if we raise an error, convert to ObjectBlock and
|
||||
# retry
|
||||
try:
|
||||
|
@ -1795,13 +1794,14 @@ class BoolBlock(NumericBlock):
|
|||
return issubclass(value.dtype.type, np.bool_)
|
||||
|
||||
def replace(self, to_replace, value, inplace=False, filter=None,
|
||||
regex=False, mgr=None):
|
||||
regex=False, convert=True, mgr=None):
|
||||
to_replace_values = np.atleast_1d(to_replace)
|
||||
if not np.can_cast(to_replace_values, bool):
|
||||
return self
|
||||
return super(BoolBlock, self).replace(to_replace, value,
|
||||
inplace=inplace, filter=filter,
|
||||
regex=regex, mgr=mgr)
|
||||
regex=regex, convert=convert,
|
||||
mgr=mgr)
|
||||
|
||||
|
||||
class ObjectBlock(Block):
|
||||
|
@ -3214,6 +3214,7 @@ class BlockManager(PandasObject):
|
|||
masks = [comp(s) for i, s in enumerate(src_list)]
|
||||
|
||||
result_blocks = []
|
||||
src_len = len(src_list) - 1
|
||||
for blk in self.blocks:
|
||||
|
||||
# its possible to get multiple result blocks here
|
||||
|
@ -3223,8 +3224,9 @@ class BlockManager(PandasObject):
|
|||
new_rb = []
|
||||
for b in rb:
|
||||
if b.dtype == np.object_:
|
||||
convert = i == src_len
|
||||
result = b.replace(s, d, inplace=inplace, regex=regex,
|
||||
mgr=mgr)
|
||||
mgr=mgr, convert=convert)
|
||||
new_rb = _extend_blocks(result, new_rb)
|
||||
else:
|
||||
# get our mask for this element, sized to this
|
||||
|
@ -4788,7 +4790,12 @@ def _putmask_smart(v, m, n):
|
|||
|
||||
# change the dtype
|
||||
dtype, _ = _maybe_promote(n.dtype)
|
||||
nv = v.astype(dtype)
|
||||
|
||||
if is_extension_type(v.dtype) and is_object_dtype(dtype):
|
||||
nv = v.get_values(dtype)
|
||||
else:
|
||||
nv = v.astype(dtype)
|
||||
|
||||
try:
|
||||
nv[m] = n[m]
|
||||
except ValueError:
|
||||
|
|
Loading…
Reference in New Issue