cache and remove boxing (#14931)

(cherry picked from commit 4c3d4d4fbb)
This commit is contained in:
Maximilian Roos 2016-12-20 20:58:19 -05:00 committed by Joris Van den Bossche
parent 85bc6d7cd8
commit a8d8fae410
5 changed files with 36 additions and 16 deletions

View File

@ -47,3 +47,28 @@ class period_algorithm(object):
self.i.value_counts()
class period_standard_indexing(object):
goal_time = 0.2
def setup(self):
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.period = self.index[500]
def time_get_loc(self):
self.index.get_loc(self.period)
def time_shape(self):
self.index.shape
def time_shallow_copy(self):
self.index._shallow_copy()
def time_series_loc(self):
self.series.loc[self.period]
def time_align(self):
pd.DataFrame({'a': self.series, 'b': self.series[:500]})
def time_intersection(self):
self.index[:750].intersection(self.index[250:])

View File

@ -22,6 +22,7 @@ Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~
- Improved performance of ``.replace()`` (:issue:`12745`)
- Improved performance of ``PeriodIndex`` (:issue:`14822`)
- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`)

View File

@ -814,7 +814,7 @@ class IndexOpsMixin(object):
@property
def shape(self):
""" return a tuple of the shape of the underlying data """
return self.values.shape
return self._values.shape
@property
def ndim(self):
@ -842,22 +842,22 @@ class IndexOpsMixin(object):
@property
def itemsize(self):
""" return the size of the dtype of the item of the underlying data """
return self.values.itemsize
return self._values.itemsize
@property
def nbytes(self):
""" return the number of bytes in the underlying data """
return self.values.nbytes
return self._values.nbytes
@property
def strides(self):
""" return the strides of the underlying data """
return self.values.strides
return self._values.strides
@property
def size(self):
""" return the number of elements in the underlying data """
return self.values.size
return self._values.size
@property
def flags(self):

View File

@ -64,6 +64,7 @@ def dt64arr_to_periodarr(data, freq, tz):
# --- Period index sketch
_DIFFERENT_FREQ_INDEX = period._DIFFERENT_FREQ_INDEX
@ -304,7 +305,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
if (len(values) > 0 and is_float_dtype(values)):
raise TypeError("PeriodIndex can't take floats")
else:
return PeriodIndex(values, name=name, freq=freq, **kwargs)
return cls(values, name=name, freq=freq, **kwargs)
values = np.array(values, dtype='int64', copy=False)
@ -325,6 +326,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
if kwargs.get('freq') is None:
# freq must be provided
kwargs['freq'] = self.freq
if values is None:
values = self._values
return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs)
def _coerce_scalar_to_index(self, item):
@ -355,9 +358,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
def asi8(self):
return self._values.view('i8')
@property
@cache_readonly
def _int64index(self):
# do not cache, same as .asi8
return Int64Index(self.asi8, name=self.name, fastpath=True)
@property

View File

@ -2101,14 +2101,6 @@ class TestPeriodIndex(tm.TestCase):
exp = idx.values < idx.values[10]
self.assert_numpy_array_equal(result, exp)
def test_getitem_ndim2(self):
idx = period_range('2007-01', periods=3, freq='M')
result = idx[:, None]
# MPL kludge, internally has incorrect shape
tm.assertIsInstance(result, PeriodIndex)
self.assertEqual(result.shape, (len(idx), ))
def test_getitem_index(self):
idx = period_range('2007-01', periods=10, freq='M', name='x')