Browse Source

CLN: Removed SparsePanel

Title is self-explanatory.  Picks up where #11157 left off.

Author: gfyoung <gfyoung17@gmail.com>

Closes #13778 from gfyoung/remove-sparse-panel and squashes the following commits:

f3fa93b [gfyoung] CLN: Removed SparsePanel
pull/13716/merge
gfyoung 6 years ago committed by Jeff Reback
parent
commit
690d52cf6b
  1. 92
      bench/bench_sparse.py
  2. 20
      doc/source/sparse.rst
  3. 2
      doc/source/whatsnew/v0.19.0.txt
  4. 2
      pandas/api/tests/test_api.py
  5. 22
      pandas/core/panel.py
  6. 1
      pandas/core/sparse.py
  7. 14
      pandas/io/packers.py
  8. 37
      pandas/io/pytables.py
  9. 20
      pandas/io/tests/test_packers.py
  10. 17
      pandas/io/tests/test_pytables.py
  11. 1
      pandas/sparse/api.py
  12. 563
      pandas/sparse/panel.py
  13. 279
      pandas/sparse/tests/test_panel.py
  14. 4
      pandas/stats/plm.py
  15. 54
      pandas/tests/test_panel.py
  16. 16
      pandas/util/testing.py

92
bench/bench_sparse.py

@ -1,92 +0,0 @@
import numpy as np
from pandas import *
import pandas.core.sparse as spm
import pandas.compat as compat
reload(spm)
from pandas.core.sparse import *
N = 10000.
arr1 = np.arange(N)
index = Index(np.arange(N))
off = N // 10
arr1[off: 2 * off] = np.NaN
arr1[4 * off: 5 * off] = np.NaN
arr1[8 * off: 9 * off] = np.NaN
arr2 = np.arange(N)
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN
s1 = SparseSeries(arr1, index=index)
s2 = SparseSeries(arr2, index=index)
is1 = SparseSeries(arr1, kind='integer', index=index)
is2 = SparseSeries(arr2, kind='integer', index=index)
s1_dense = s1.to_dense()
s2_dense = s2.to_dense()
if compat.is_platform_linux():
pth = '/home/wesm/code/pandas/example'
else:
pth = '/Users/wesm/code/pandas/example'
dm = DataFrame.load(pth)
sdf = dm.to_sparse()
def new_data_like(sdf):
new_data = {}
for col, series in compat.iteritems(sdf):
new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
index=sdf.index,
sparse_index=series.sp_index,
fill_value=series.fill_value)
return SparseDataFrame(new_data)
# data = {}
# for col, ser in dm.iteritems():
# data[col] = SparseSeries(ser)
dwp = Panel.fromDict({'foo': dm})
# sdf = SparseDataFrame(data)
lp = stack_sparse_frame(sdf)
swp = SparsePanel({'A': sdf})
swp = SparsePanel({'A': sdf,
'B': sdf,
'C': sdf,
'D': sdf})
y = sdf
x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10,
'x2': sdf + new_data_like(sdf) / 10})
dense_y = sdf
dense_x = x.to_dense()
# import hotshot, hotshot.stats
# prof = hotshot.Profile('test.prof')
# benchtime, stones = prof.runcall(ols, y=y, x=x)
# prof.close()
# stats = hotshot.stats.load('test.prof')
dense_model = ols(y=dense_y, x=dense_x)
import pandas.stats.plm as plm
import pandas.stats.interface as face
reload(plm)
reload(face)
# model = face.ols(y=y, x=x)

20
doc/source/sparse.rst

@ -15,13 +15,14 @@
Sparse data structures
**********************
We have implemented "sparse" versions of Series, DataFrame, and Panel. These
are not sparse in the typical "mostly 0". You can view these objects as being
"compressed" where any data matching a specific value (NaN/missing by default,
though any value can be chosen) is omitted. A special ``SparseIndex`` object
tracks where data has been "sparsified". This will make much more sense in an
example. All of the standard pandas data structures have a ``to_sparse``
method:
.. note:: The ``SparsePanel`` class has been removed in 0.19.0
We have implemented "sparse" versions of Series and DataFrame. These are not sparse
in the typical "mostly 0". Rather, you can view these objects as being "compressed"
where any data matching a specific value (``NaN`` / missing value, though any value
can be chosen) is omitted. A special ``SparseIndex`` object tracks where data has been
"sparsified". This will make much more sense in an example. All of the standard pandas
data structures have a ``to_sparse`` method:
.. ipython:: python
@ -77,9 +78,8 @@ distinct from the ``fill_value``:
sparr = pd.SparseArray(arr)
sparr
Like the indexed objects (SparseSeries, SparseDataFrame, SparsePanel), a
``SparseArray`` can be converted back to a regular ndarray by calling
``to_dense``:
Like the indexed objects (SparseSeries, SparseDataFrame), a ``SparseArray``
can be converted back to a regular ndarray by calling ``to_dense``:
.. ipython:: python

2
doc/source/whatsnew/v0.19.0.txt

@ -330,6 +330,7 @@ API changes
~~~~~~~~~~~
- ``Panel.to_sparse`` will raise a ``NotImplementedError`` exception when called (:issue:`13778`)
- ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue:`12882`)
- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`)
- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)
@ -619,6 +620,7 @@ Deprecations
Removal of prior version deprecations/changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- The ``SparsePanel`` class has been removed (:issue:`13778`)
- The ``pd.sandbox`` module has been removed in favor of the external library ``pandas-qt`` (:issue:`13670`)
- The ``pandas.io.data`` and ``pandas.io.wb`` modules are removed in favor of
the `pandas-datareader package <https://github.com/pydata/pandas-datareader>`__ (:issue:`13724`).

2
pandas/api/tests/test_api.py

@ -57,7 +57,7 @@ class TestPDApi(Base, tm.TestCase):
'TimedeltaIndex', 'Timestamp']
# these are already deprecated; awaiting removal
deprecated_classes = ['SparsePanel', 'TimeSeries', 'WidePanel',
deprecated_classes = ['TimeSeries', 'WidePanel',
'SparseTimeSeries', 'Panel4D']
# these should be deperecated in the future

22
pandas/core/panel.py

@ -393,25 +393,15 @@ class Panel(NDFrame):
fromDict = from_dict
def to_sparse(self, fill_value=None, kind='block'):
def to_sparse(self, *args, **kwargs):
"""
Convert to SparsePanel
Parameters
----------
fill_value : float, default NaN
kind : {'block', 'integer'}
NOT IMPLEMENTED: do not call this method, as sparsifying is not
supported for Panel objects and will raise an error.
Returns
-------
y : SparseDataFrame
Convert to SparsePanel
"""
from pandas.core.sparse import SparsePanel
frames = dict(self.iteritems())
return SparsePanel(frames, items=self.items,
major_axis=self.major_axis,
minor_axis=self.minor_axis, default_kind=kind,
default_fill_value=fill_value)
raise NotImplementedError("sparsifying is not supported "
"for Panel objects")
def to_excel(self, path, na_rep='', engine=None, **kwargs):
"""

1
pandas/core/sparse.py

@ -8,4 +8,3 @@ with float64 data
from pandas.sparse.series import SparseSeries
from pandas.sparse.frame import SparseDataFrame
from pandas.sparse.panel import SparsePanel

14
pandas/io/packers.py

@ -56,7 +56,7 @@ from pandas import (Timestamp, Period, Series, DataFrame, # noqa
Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT,
Categorical)
from pandas.tslib import NaTType
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
from pandas.sparse.api import SparseSeries, SparseDataFrame
from pandas.sparse.array import BlockIndex, IntIndex
from pandas.core.generic import NDFrame
from pandas.core.common import PerformanceWarning
@ -447,18 +447,6 @@ def encode(obj):
# d['data'] = dict([(name, ss)
# for name, ss in compat.iteritems(obj)])
# return d
elif isinstance(obj, SparsePanel):
raise NotImplementedError(
'msgpack sparse frame is not implemented'
)
# d = {'typ': 'sparse_panel',
# 'klass': obj.__class__.__name__,
# 'items': obj.items}
# for f in ['default_fill_value', 'default_kind']:
# d[f] = getattr(obj, f, None)
# d['data'] = dict([(name, df)
# for name, df in compat.iteritems(obj)])
# return d
else:
data = obj._data

37
pandas/io/pytables.py

@ -29,7 +29,7 @@ from pandas import (Series, DataFrame, Panel, Panel4D, Index,
MultiIndex, Int64Index, isnull)
from pandas.core import config
from pandas.io.common import _stringify_path
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
from pandas.sparse.api import SparseSeries, SparseDataFrame
from pandas.sparse.array import BlockIndex, IntIndex
from pandas.tseries.api import PeriodIndex, DatetimeIndex
from pandas.tseries.tdi import TimedeltaIndex
@ -169,7 +169,6 @@ _TYPE_MAP = {
SparseDataFrame: u('sparse_frame'),
Panel: u('wide'),
Panel4D: u('ndim'),
SparsePanel: u('sparse_panel')
}
# storer class map
@ -183,7 +182,6 @@ _STORER_MAP = {
u('frame'): 'FrameFixed',
u('sparse_frame'): 'SparseFrameFixed',
u('wide'): 'PanelFixed',
u('sparse_panel'): 'SparsePanelFixed',
}
# table class map
@ -2777,39 +2775,6 @@ class SparseFrameFixed(SparseFixed):
self.write_index('columns', obj.columns)
class SparsePanelFixed(SparseFixed):
pandas_kind = u('sparse_panel')
attributes = ['default_kind', 'default_fill_value']
def read(self, **kwargs):
kwargs = self.validate_read(kwargs)
items = self.read_index('items')
sdict = {}
for name in items:
key = 'sparse_frame_%s' % name
s = SparseFrameFixed(self.parent, getattr(self.group, key))
s.infer_axes()
sdict[name] = s.read()
return SparsePanel(sdict, items=items, default_kind=self.default_kind,
default_fill_value=self.default_fill_value)
def write(self, obj, **kwargs):
super(SparsePanelFixed, self).write(obj, **kwargs)
self.attrs.default_fill_value = obj.default_fill_value
self.attrs.default_kind = obj.default_kind
self.write_index('items', obj.items)
for name, sdf in obj.iteritems():
key = 'sparse_frame_%s' % name
if key not in self.group._v_children:
node = self._handle.create_group(self.group, key)
else:
node = getattr(self.group, key)
s = SparseFrameFixed(self.parent, node)
s.write(sdf)
class BlockManagerFixed(GenericFixed):
attributes = ['ndim', 'nblocks']
is_shape_reversed = False

20
pandas/io/tests/test_packers.py

@ -542,26 +542,6 @@ class TestSparse(TestPackers):
self._check_roundtrip(ss3, tm.assert_frame_equal,
check_frame_type=True)
def test_sparse_panel(self):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
items = ['x', 'y', 'z']
p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items))
sp = p.to_sparse()
self._check_roundtrip(sp, tm.assert_panel_equal,
check_panel_type=True)
sp2 = p.to_sparse(kind='integer')
self._check_roundtrip(sp2, tm.assert_panel_equal,
check_panel_type=True)
sp3 = p.to_sparse(fill_value=0)
self._check_roundtrip(sp3, tm.assert_panel_equal,
check_panel_type=True)
class TestCompression(TestPackers):
"""See https://github.com/pydata/pandas/pull/9783

17
pandas/io/tests/test_pytables.py

@ -2688,23 +2688,6 @@ class TestHDFStore(Base, tm.TestCase):
self._check_double_roundtrip(ss3, tm.assert_frame_equal,
check_frame_type=True)
def test_sparse_panel(self):
items = ['x', 'y', 'z']
p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items))
sp = p.to_sparse()
self._check_double_roundtrip(sp, assert_panel_equal,
check_panel_type=True)
sp2 = p.to_sparse(kind='integer')
self._check_double_roundtrip(sp2, assert_panel_equal,
check_panel_type=True)
sp3 = p.to_sparse(fill_value=0)
self._check_double_roundtrip(sp3, assert_panel_equal,
check_panel_type=True)
def test_float_index(self):
# GH #454

1
pandas/sparse/api.py

@ -4,4 +4,3 @@ from pandas.sparse.array import SparseArray
from pandas.sparse.list import SparseList
from pandas.sparse.series import SparseSeries, SparseTimeSeries
from pandas.sparse.frame import SparseDataFrame
from pandas.sparse.panel import SparsePanel

563
pandas/sparse/panel.py

@ -1,563 +0,0 @@
"""
Data structures for sparse float data. Life is made simpler by dealing only
with float64 data
"""
# pylint: disable=E1101,E1103,W0231
import warnings
from pandas.compat import lrange, zip
from pandas import compat
import numpy as np
from pandas.types.common import is_list_like, is_scalar
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.frame import DataFrame
from pandas.core.panel import Panel
from pandas.sparse.frame import SparseDataFrame
from pandas.util.decorators import deprecate
import pandas.core.common as com
import pandas.core.ops as ops
class SparsePanelAxis(object):
def __init__(self, cache_field, frame_attr):
self.cache_field = cache_field
self.frame_attr = frame_attr
def __get__(self, obj, type=None):
return getattr(obj, self.cache_field, None)
def __set__(self, obj, value):
value = _ensure_index(value)
if isinstance(value, MultiIndex):
raise NotImplementedError("value cannot be a MultiIndex")
for v in compat.itervalues(obj._frames):
setattr(v, self.frame_attr, value)
setattr(obj, self.cache_field, value)
class SparsePanel(Panel):
"""
Sparse version of Panel
Parameters
----------
frames : dict of DataFrame objects
items : array-like
major_axis : array-like
minor_axis : array-like
default_kind : {'block', 'integer'}, default 'block'
Default sparse kind for converting Series to SparseSeries. Will not
override SparseSeries passed into constructor
default_fill_value : float
Default fill_value for converting Series to SparseSeries. Will not
override SparseSeries passed in
Notes
-----
"""
ndim = 3
_typ = 'panel'
_subtyp = 'sparse_panel'
def __init__(self, frames=None, items=None, major_axis=None,
minor_axis=None, default_fill_value=np.nan,
default_kind='block', copy=False):
# deprecation #11157
warnings.warn("SparsePanel is deprecated and will be removed in a "
"future version", FutureWarning, stacklevel=3)
if frames is None:
frames = {}
if isinstance(frames, np.ndarray):
new_frames = {}
for item, vals in zip(items, frames):
new_frames[item] = SparseDataFrame(
vals, index=major_axis, columns=minor_axis,
default_fill_value=default_fill_value,
default_kind=default_kind)
frames = new_frames
if not isinstance(frames, dict):
raise TypeError('input must be a dict, a %r was passed' %
type(frames).__name__)
self.default_fill_value = fill_value = default_fill_value
self.default_kind = kind = default_kind
# pre-filter, if necessary
if items is None:
items = Index(sorted(frames.keys()))
items = _ensure_index(items)
(clean_frames, major_axis,
minor_axis) = _convert_frames(frames, major_axis, minor_axis,
kind=kind, fill_value=fill_value)
self._frames = clean_frames
# do we want to fill missing ones?
for item in items:
if item not in clean_frames:
raise ValueError('column %r not found in data' % item)
self._items = items
self.major_axis = major_axis
self.minor_axis = minor_axis
def _consolidate_inplace(self): # pragma: no cover
# do nothing when DataFrame calls this method
pass
def __array_wrap__(self, result):
return SparsePanel(result, items=self.items,
major_axis=self.major_axis,
minor_axis=self.minor_axis,
default_kind=self.default_kind,
default_fill_value=self.default_fill_value)
@classmethod
def from_dict(cls, data):
"""
Analogous to Panel.from_dict
"""
return SparsePanel(data)
def to_dense(self):
"""
Convert SparsePanel to (dense) Panel
Returns
-------
dense : Panel
"""
return Panel(self.values, self.items, self.major_axis, self.minor_axis)
def as_matrix(self):
return self.values
@property
def values(self):
# return dense values
return np.array([self._frames[item].values for item in self.items])
# need a special property for items to make the field assignable
_items = None
def _get_items(self):
return self._items
def _set_items(self, new_items):
new_items = _ensure_index(new_items)
if isinstance(new_items, MultiIndex):
raise NotImplementedError("itemps cannot be a MultiIndex")
# need to create new frames dict
old_frame_dict = self._frames
old_items = self._items
self._frames = dict((new_k, old_frame_dict[old_k])
for new_k, old_k in zip(new_items, old_items))
self._items = new_items
items = property(fget=_get_items, fset=_set_items)
# DataFrame's index
major_axis = SparsePanelAxis('_major_axis', 'index')
# DataFrame's columns / "items"
minor_axis = SparsePanelAxis('_minor_axis', 'columns')
def _ixs(self, i, axis=0):
"""
for compat as we don't support Block Manager here
i : int, slice, or sequence of integers
axis : int
"""
key = self._get_axis(axis)[i]
# xs cannot handle a non-scalar key, so just reindex here
if is_list_like(key):
return self.reindex(**{self._get_axis_name(axis): key})
return self.xs(key, axis=axis)
def _slice(self, slobj, axis=0, kind=None):
"""
for compat as we don't support Block Manager here
"""
axis = self._get_axis_name(axis)
index = self._get_axis(axis)
return self.reindex(**{axis: index[slobj]})
def _get_item_cache(self, key):
return self._frames[key]
def __setitem__(self, key, value):
if isinstance(value, DataFrame):
value = value.reindex(index=self.major_axis,
columns=self.minor_axis)
if not isinstance(value, SparseDataFrame):
value = value.to_sparse(fill_value=self.default_fill_value,
kind=self.default_kind)
else:
raise ValueError('only DataFrame objects can be set currently')
self._frames[key] = value
if key not in self.items:
self._items = Index(list(self.items) + [key])
def set_value(self, item, major, minor, value):
"""
Quickly set single value at (item, major, minor) location
Parameters
----------
item : item label (panel item)
major : major axis label (panel item row)
minor : minor axis label (panel item column)
value : scalar
Notes
-----
This method *always* returns a new object. It is not particularly
efficient but is provided for API compatibility with Panel
Returns
-------
panel : SparsePanel
"""
dense = self.to_dense().set_value(item, major, minor, value)
return dense.to_sparse(kind=self.default_kind,
fill_value=self.default_fill_value)
def __delitem__(self, key):
loc = self.items.get_loc(key)
indices = lrange(loc) + lrange(loc + 1, len(self.items))
del self._frames[key]
self._items = self._items.take(indices)
def __getstate__(self):
# pickling
from pandas.io.pickle import _pickle_array
return (self._frames, _pickle_array(self.items),
_pickle_array(self.major_axis),
_pickle_array(self.minor_axis), self.default_fill_value,
self.default_kind)
def __setstate__(self, state):
frames, items, major, minor, fv, kind = state
from pandas.io.pickle import _unpickle_array
self.default_fill_value = fv
self.default_kind = kind
self._items = _ensure_index(_unpickle_array(items))
self._major_axis = _ensure_index(_unpickle_array(major))
self._minor_axis = _ensure_index(_unpickle_array(minor))
self._frames = frames
def copy(self, deep=True):
"""
Make a copy of the sparse panel
Returns
-------
copy : SparsePanel
"""
d = self._construct_axes_dict()
if deep:
new_data = dict((k, v.copy(deep=True))
for k, v in compat.iteritems(self._frames))
d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d))
else:
new_data = self._frames.copy()
d['default_fill_value'] = self.default_fill_value
d['default_kind'] = self.default_kind
return SparsePanel(new_data, **d)
def to_frame(self, filter_observations=True):
"""
Convert SparsePanel to (dense) DataFrame
Returns
-------
frame : DataFrame
"""
if not filter_observations:
raise TypeError('filter_observations=False not supported for '
'SparsePanel.to_long')
I, N, K = self.shape
counts = np.zeros(N * K, dtype=int)
d_values = {}
d_indexer = {}
for item in self.items:
frame = self[item]
values, major, minor = _stack_sparse_info(frame)
# values are stacked column-major
indexer = minor * N + major
counts.put(indexer, counts.take(indexer) + 1) # cuteness
d_values[item] = values
d_indexer[item] = indexer
# have full set of observations for each item
mask = counts == I
# for each item, take mask values at index locations for those sparse
# values, and use that to select values
values = np.column_stack([d_values[item][mask.take(d_indexer[item])]
for item in self.items])
inds, = mask.nonzero()
# still column major
major_labels = inds % N
minor_labels = inds // N
index = MultiIndex(levels=[self.major_axis, self.minor_axis],
labels=[major_labels, minor_labels],
verify_integrity=False)
df = DataFrame(values, index=index, columns=self.items)
return df.sortlevel(level=0)
to_long = deprecate('to_long', to_frame)
toLong = deprecate('toLong', to_frame)
def reindex(self, major=None, items=None, minor=None, major_axis=None,
minor_axis=None, copy=False):
"""
Conform / reshape panel axis labels to new input labels
Parameters
----------
major : array-like, default None
items : array-like, default None
minor : array-like, default None
copy : boolean, default False
Copy underlying SparseDataFrame objects
Returns
-------
reindexed : SparsePanel
"""
major = com._mut_exclusive(major=major, major_axis=major_axis)
minor = com._mut_exclusive(minor=minor, minor_axis=minor_axis)
if com._all_none(items, major, minor):
raise ValueError('Must specify at least one axis')
major = self.major_axis if major is None else major
minor = self.minor_axis if minor is None else minor
if items is not None:
new_frames = {}
for item in items:
if item in self._frames:
new_frames[item] = self._frames[item]
else:
raise NotImplementedError('Reindexing with new items not '
'yet supported')
else:
new_frames = self._frames
if copy:
new_frames = dict((k, v.copy())
for k, v in compat.iteritems(new_frames))
return SparsePanel(new_frames, items=items, major_axis=major,
minor_axis=minor,
default_fill_value=self.default_fill_value,
default_kind=self.default_kind)
def _combine(self, other, func, axis=0):
if isinstance(other, DataFrame):
return self._combineFrame(other, func, axis=axis)
elif isinstance(other, Panel):
return self._combinePanel(other, func)
elif is_scalar(other):
new_frames = dict((k, func(v, other))
for k, v in self.iteritems())
return self._new_like(new_frames)
def _combineFrame(self, other, func, axis=0):
index, columns = self._get_plane_axes(axis)
axis = self._get_axis_number(axis)
other = other.reindex(index=index, columns=columns)
if axis == 0:
new_values = func(self.values, other.values)
elif axis == 1:
new_values = func(self.values.swapaxes(0, 1), other.values.T)
new_values = new_values.swapaxes(0, 1)
elif axis == 2:
new_values = func(self.values.swapaxes(0, 2), other.values)
new_values = new_values.swapaxes(0, 2)
# TODO: make faster!
new_frames = {}
for item, item_slice in zip(self.items, new_values):
old_frame = self[item]
ofv = old_frame.default_fill_value
ok = old_frame.default_kind
new_frames[item] = SparseDataFrame(item_slice,
index=self.major_axis,
columns=self.minor_axis,
default_fill_value=ofv,
default_kind=ok)
return self._new_like(new_frames)
def _new_like(self, new_frames):
return SparsePanel(new_frames, self.items, self.major_axis,
self.minor_axis,
default_fill_value=self.default_fill_value,
default_kind=self.default_kind)
def _combinePanel(self, other, func):
items = self.items.union(other.items)
major = self.major_axis.union(other.major_axis)
minor = self.minor_axis.union(other.minor_axis)
# could check that everything's the same size, but forget it
this = self.reindex(items=items, major=major, minor=minor)
other = other.reindex(items=items, major=major, minor=minor)
new_frames = {}
for item in items:
new_frames[item] = func(this[item], other[item])
if not isinstance(other, SparsePanel):
new_default_fill = self.default_fill_value
else:
# maybe unnecessary
new_default_fill = func(self.default_fill_value,
other.default_fill_value)
return SparsePanel(new_frames, items, major, minor,
default_fill_value=new_default_fill,
default_kind=self.default_kind)
def major_xs(self, key):
"""
Return slice of panel along major axis
Parameters
----------
key : object
Major axis label
Returns
-------
y : DataFrame
index -> minor axis, columns -> items
"""
slices = dict((k, v.xs(key)) for k, v in self.iteritems())
return DataFrame(slices, index=self.minor_axis, columns=self.items)
def minor_xs(self, key):
"""
Return slice of panel along minor axis
Parameters
----------
key : object
Minor axis label
Returns
-------
y : SparseDataFrame
index -> major axis, columns -> items
"""
slices = dict((k, v[key]) for k, v in self.iteritems())
return SparseDataFrame(slices, index=self.major_axis,
columns=self.items,
default_fill_value=self.default_fill_value,
default_kind=self.default_kind)
# TODO: allow SparsePanel to work with flex arithmetic.
# pow and mod only work for scalars for now
def pow(self, val, *args, **kwargs):
"""wrapper around `__pow__` (only works for scalar values)"""
return self.__pow__(val)
def mod(self, val, *args, **kwargs):
"""wrapper around `__mod__` (only works for scalar values"""
return self.__mod__(val)
# Sparse objects opt out of numexpr
SparsePanel._add_aggregate_operations(use_numexpr=False)
ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, **
ops.panel_special_funcs)
SparseWidePanel = SparsePanel
def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'):
from pandas.core.panel import _get_combined_index
output = {}
for item, df in compat.iteritems(frames):
if not isinstance(df, SparseDataFrame):
df = SparseDataFrame(df, default_kind=kind,
default_fill_value=fill_value)
output[item] = df
if index is None:
all_indexes = [x.index for x in output.values()]
index = _get_combined_index(all_indexes)
if columns is None:
all_columns = [x.columns for x in output.values()]
columns = _get_combined_index(all_columns)
index = _ensure_index(index)
columns = _ensure_index(columns)
for item, df in compat.iteritems(output):
if not (df.index.equals(index) and df.columns.equals(columns)):
output[item] = df.reindex(index=index, columns=columns)
return output, index, columns
def _stack_sparse_info(frame):
lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)]
# this is pretty fast
minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)
inds_to_concat = []
vals_to_concat = []
for col in frame.columns:
series = frame[col]
if not np.isnan(series.fill_value):
raise TypeError('This routine assumes NaN fill value')
int_index = series.sp_index.to_int_index()
inds_to_concat.append(int_index.indices)
vals_to_concat.append(series.sp_values)
major_labels = np.concatenate(inds_to_concat)
sparse_values = np.concatenate(vals_to_concat)
return sparse_values, major_labels, minor_labels

279
pandas/sparse/tests/test_panel.py

@ -1,279 +0,0 @@
# pylint: disable-msg=E1101,W0612
import nose # noqa
from numpy import nan
import pandas as pd
from pandas import DataFrame, bdate_range, Panel
from pandas.core.index import Index
import pandas.util.testing as tm
from pandas.sparse.api import SparseSeries, SparsePanel
import pandas.tests.test_panel as test_panel
def panel_data1():
index = bdate_range('1/1/2011', periods=8)
return DataFrame({
'A': [nan, nan, nan, 0, 1, 2, 3, 4],
'B': [0, 1, 2, 3, 4, nan, nan, nan],
'C': [0, 1, 2, nan, nan, nan, 3, 4],
'D': [nan, 0, 1, nan, 2, 3, 4, nan]
}, index=index)
def panel_data2():
index = bdate_range('1/1/2011', periods=9)
return DataFrame({
'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5],
'B': [0, 1, 2, 3, 4, 5, nan, nan, nan],
'C': [0, 1, 2, nan, nan, nan, 3, 4, 5],
'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan]
}, index=index)
def panel_data3():
index = bdate_range('1/1/2011', periods=10).shift(-2)
return DataFrame({
'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan],
'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan]
}, index=index)
class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse,
test_panel.SafeForSparse):
_multiprocess_can_split_ = True
def setUp(self):
self.data_dict = {
'ItemA': panel_data1(),
'ItemB': panel_data2(),
'ItemC': panel_data3(),
'ItemD': panel_data1(),
}
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.panel = SparsePanel(self.data_dict)
@staticmethod
def _test_op(panel, op):
# arithmetic tests
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = op(panel, 1)
tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1))
def test_constructor(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.assertRaises(ValueError, SparsePanel, self.data_dict,
items=['Item0', 'ItemA', 'ItemB'])
with tm.assertRaisesRegexp(TypeError,
"input must be a dict, a 'list' was "
"passed"):
SparsePanel(['a', 'b', 'c'])
# deprecation GH11157
def test_deprecation(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
SparsePanel()
# GH 9272
def test_constructor_empty(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
sp = SparsePanel()
self.assertEqual(len(sp.items), 0)
self.assertEqual(len(sp.major_axis), 0)
self.assertEqual(len(sp.minor_axis), 0)
def test_from_dict(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
fd = SparsePanel.from_dict(self.data_dict)
tm.assert_sp_panel_equal(fd, self.panel)
def test_pickle(self):
def _test_roundtrip(panel):
result = self.round_trip_pickle(panel)
tm.assertIsInstance(result.items, Index)
tm.assertIsInstance(result.major_axis, Index)
tm.assertIsInstance(result.minor_axis, Index)
tm.assert_sp_panel_equal(panel, result)
_test_roundtrip(self.panel)
def test_dense_to_sparse(self):
wp = Panel.from_dict(self.data_dict)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
dwp = wp.to_sparse()
tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries)
def test_to_dense(self):
dwp = self.panel.to_dense()
dwp2 = Panel.from_dict(self.data_dict)
tm.assert_panel_equal(dwp, dwp2)
def test_to_frame(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
def _compare_with_dense(panel):
slp = panel.to_frame()
dlp = panel.to_dense().to_frame()
self.assert_numpy_array_equal(slp.values, dlp.values)
self.assert_index_equal(slp.index, dlp.index,
check_names=False)
_compare_with_dense(self.panel)
_compare_with_dense(self.panel.reindex(items=['ItemA']))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
zero_panel = SparsePanel(self.data_dict, default_fill_value=0)
self.assertRaises(Exception, zero_panel.to_frame)
self.assertRaises(Exception, self.panel.to_frame,
filter_observations=False)
def test_long_to_wide_sparse(self):
pass
def test_values(self):
pass
def test_setitem(self):
self.panel['ItemE'] = self.panel['ItemC']
self.panel['ItemF'] = self.panel['ItemC'].to_dense()
tm.assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC'])
tm.assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC'])
expected = pd.Index(['ItemA', 'ItemB', 'ItemC',
'ItemD', 'ItemE', 'ItemF'])
tm.assert_index_equal(self.panel.items, expected)
self.assertRaises(Exception, self.panel.__setitem__, 'item6', 1)
def test_set_value(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
def _check_loc(item, major, minor, val=1.5):
res = self.panel.set_value(item, major, minor, val)
self.assertIsNot(res, self.panel)
self.assertEqual(res.get_value(item, major, minor), val)
_check_loc('ItemA', self.panel.major_axis[4],
self.panel.minor_axis[3])
_check_loc('ItemF', self.panel.major_axis[4],
self.panel.minor_axis[3])
_check_loc('ItemF', 'foo', self.panel.minor_axis[3])
_check_loc('ItemE', 'foo', 'bar')
def test_delitem_pop(self):
del self.panel['ItemB']
tm.assert_index_equal(self.panel.items,
pd.Index(['ItemA', 'ItemC', 'ItemD']))
crackle = self.panel['ItemC']
pop = self.panel.pop('ItemC')
self.assertIs(pop, crackle)
tm.assert_almost_equal(self.panel.items, pd.Index(['ItemA', 'ItemD']))
self.assertRaises(KeyError, self.panel.__delitem__, 'ItemC')
def test_copy(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
cop = self.panel.copy()
tm.assert_sp_panel_equal(cop, self.panel)
def test_reindex(self):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
def _compare_with_dense(swp, items, major, minor):
swp_re = swp.reindex(items=items, major=major, minor=minor)
dwp_re = swp.to_dense().reindex(items=items, major=major,
minor=minor)
tm.assert_panel_equal(swp_re.to_dense(), dwp_re)
_compare_with_dense(self.panel, self.panel.items[:2],
self.panel.major_axis[::2],
self.panel.minor_axis[::2])
_compare_with_dense(self.panel, None, self.panel.major_axis[::2],
self.panel.minor_axis[::2])
self.assertRaises(ValueError, self.panel.reindex)
# TODO: do something about this later...
self.assertRaises(Exception, self.panel.reindex,
items=['item0', 'ItemA', 'ItemB'])
# test copying
cp = self.panel.reindex(self.panel.major_axis, copy=True)
cp['ItemA']['E'] = cp['ItemA']['A']
self.assertNotIn('E', self.panel['ItemA'])
def test_operators(self):
def _check_ops(panel):
def _dense_comp(op):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
dense = panel.to_dense()
sparse_result = op(panel)
dense_result = op(dense)
tm.assert_panel_equal(sparse_result.to_dense(),
dense_result)
def _mixed_comp(op):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = op(panel, panel.to_dense())
expected = op(panel.to_dense(), panel.to_dense())
tm.assert_panel_equal(result, expected)
op1 = lambda x: x + 2
_dense_comp(op1)
op2 = lambda x: x.add(x.reindex(major=x.major_axis[::2]))
_dense_comp(op2)
op3 = lambda x: x.subtract(x.mean(0), axis=0)
_dense_comp(op3)
op4 = lambda x: x.subtract(x.mean(1), axis=1)
_dense_comp(op4)
op5 = lambda x: x.subtract(x.mean(2), axis=2)
_dense_comp(op5)
_mixed_comp(Panel.multiply)
_mixed_comp(Panel.subtract)
# TODO: this case not yet supported!
# op6 = lambda x: x.add(x.to_frame())
# _dense_comp(op6)
_check_ops(self.panel)
def test_major_xs(self):
def _dense_comp(sparse):
dense = sparse.to_dense()
for idx in sparse.major_axis:
dslice = dense.major_xs(idx)
sslice = sparse.major_xs(idx)
tm.assert_frame_equal(dslice, sslice)
_dense_comp(self.panel)
def test_minor_xs(self):
def _dense_comp(sparse):
dense = sparse.to_dense()
for idx in sparse.minor_axis:
dslice = dense.minor_xs(idx)
sslice = sparse.minor_xs(idx).to_dense()
tm.assert_frame_equal(dslice, sslice)
_dense_comp(self.panel)
if __name__ == '__main__':
import nose # noqa
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)

4
pandas/stats/plm.py

@ -18,7 +18,6 @@ from pandas.core.panel import Panel
from pandas.core.frame import DataFrame
from pandas.core.reshape import get_dummies
from pandas.core.series import Series
from pandas.core.sparse import SparsePanel
from pandas.stats.ols import OLS, MovingOLS
import pandas.stats.common as com
import pandas.stats.math as math
@ -137,8 +136,7 @@ class PanelOLS(OLS):
if isinstance(data, Panel):
data = data.copy()
if not isinstance(data, SparsePanel):
data, cat_mapping = self._convert_x(data)
data, cat_mapping = self._convert_x(data)
if not isinstance(data, Panel):
data = Panel.from_dict(data, intersect=True)

54
pandas/tests/test_panel.py

@ -5,7 +5,6 @@ from datetime import datetime
import operator
import nose
from functools import wraps
import numpy as np
import pandas as pd
@ -20,37 +19,16 @@ from pandas.core.series import remove_na
from pandas.formats.printing import pprint_thing
from pandas import compat
from pandas.compat import range, lrange, StringIO, OrderedDict, signature
from pandas import SparsePanel
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
assert_series_equal, assert_almost_equal,
assert_produces_warning, ensure_clean,
assertRaisesRegexp, makeCustomDataframe as
mkdf, makeMixedDataFrame)
ensure_clean, assertRaisesRegexp,
makeCustomDataframe as mkdf,
makeMixedDataFrame)
import pandas.core.panel as panelm
import pandas.util.testing as tm
def ignore_sparse_panel_future_warning(func):
"""
decorator to ignore FutureWarning if we have a SparsePanel
can be removed when SparsePanel is fully removed
"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if isinstance(self.panel, SparsePanel):
with assert_produces_warning(FutureWarning,
check_stacklevel=False):
return func(self, *args, **kwargs)
else:
return func(self, *args, **kwargs)
return wrapper
class PanelTests(object):
panel = None
@ -78,7 +56,6 @@ class SafeForLongAndSparse(object):
def test_repr(self):
repr(self.panel)
@ignore_sparse_panel_future_warning
def test_copy_names(self):
for attr in ('major_axis', 'minor_axis'):
getattr(self.panel, attr).name = None
@ -261,7 +238,6 @@ class SafeForSparse(object):
index, columns = self.panel._get_plane_axes('minor_axis')
index, columns = self.panel._get_plane_axes(0)
@ignore_sparse_panel_future_warning
def test_truncate(self):
dates = self.panel.major_axis
start, end = dates[1], dates[5]
@ -322,7 +298,6 @@ class SafeForSparse(object):
self.assertEqual(len(list(self.panel.iteritems())),
len(self.panel.items))
@ignore_sparse_panel_future_warning