Browse Source
Title is self-explanatory. Picks up where #11157 left off.
Author: gfyoung <gfyoung17@gmail.com>
Closes #13778 from gfyoung/remove-sparse-panel and squashes the following commits:
f3fa93b
[gfyoung] CLN: Removed SparsePanel
pull/13716/merge


16 changed files with 32 additions and 1112 deletions
@ -1,92 +0,0 @@
|
||||
import numpy as np |
||||
|
||||
from pandas import * |
||||
import pandas.core.sparse as spm |
||||
import pandas.compat as compat |
||||
reload(spm) |
||||
from pandas.core.sparse import * |
||||
|
||||
N = 10000. |
||||
|
||||
arr1 = np.arange(N) |
||||
index = Index(np.arange(N)) |
||||
|
||||
off = N // 10 |
||||
arr1[off: 2 * off] = np.NaN |
||||
arr1[4 * off: 5 * off] = np.NaN |
||||
arr1[8 * off: 9 * off] = np.NaN |
||||
|
||||
arr2 = np.arange(N) |
||||
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN |
||||
arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN |
||||
|
||||
s1 = SparseSeries(arr1, index=index) |
||||
s2 = SparseSeries(arr2, index=index) |
||||
|
||||
is1 = SparseSeries(arr1, kind='integer', index=index) |
||||
is2 = SparseSeries(arr2, kind='integer', index=index) |
||||
|
||||
s1_dense = s1.to_dense() |
||||
s2_dense = s2.to_dense() |
||||
|
||||
if compat.is_platform_linux(): |
||||
pth = '/home/wesm/code/pandas/example' |
||||
else: |
||||
pth = '/Users/wesm/code/pandas/example' |
||||
|
||||
dm = DataFrame.load(pth) |
||||
|
||||
sdf = dm.to_sparse() |
||||
|
||||
|
||||
def new_data_like(sdf): |
||||
new_data = {} |
||||
for col, series in compat.iteritems(sdf): |
||||
new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)), |
||||
index=sdf.index, |
||||
sparse_index=series.sp_index, |
||||
fill_value=series.fill_value) |
||||
|
||||
return SparseDataFrame(new_data) |
||||
|
||||
# data = {} |
||||
# for col, ser in dm.iteritems(): |
||||
# data[col] = SparseSeries(ser) |
||||
|
||||
dwp = Panel.fromDict({'foo': dm}) |
||||
# sdf = SparseDataFrame(data) |
||||
|
||||
|
||||
lp = stack_sparse_frame(sdf) |
||||
|
||||
|
||||
swp = SparsePanel({'A': sdf}) |
||||
swp = SparsePanel({'A': sdf, |
||||
'B': sdf, |
||||
'C': sdf, |
||||
'D': sdf}) |
||||
|
||||
y = sdf |
||||
x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10, |
||||
'x2': sdf + new_data_like(sdf) / 10}) |
||||
|
||||
dense_y = sdf |
||||
dense_x = x.to_dense() |
||||
|
||||
# import hotshot, hotshot.stats |
||||
# prof = hotshot.Profile('test.prof') |
||||
|
||||
# benchtime, stones = prof.runcall(ols, y=y, x=x) |
||||
|
||||
# prof.close() |
||||
|
||||
# stats = hotshot.stats.load('test.prof') |
||||
|
||||
dense_model = ols(y=dense_y, x=dense_x) |
||||
|
||||
import pandas.stats.plm as plm |
||||
import pandas.stats.interface as face |
||||
reload(plm) |
||||
reload(face) |
||||
|
||||
# model = face.ols(y=y, x=x) |
@ -1,563 +0,0 @@
|
||||
""" |
||||
Data structures for sparse float data. Life is made simpler by dealing only |
||||
with float64 data |
||||
""" |
||||
|
||||
# pylint: disable=E1101,E1103,W0231 |
||||
|
||||
import warnings |
||||
from pandas.compat import lrange, zip |
||||
from pandas import compat |
||||
import numpy as np |
||||
|
||||
from pandas.types.common import is_list_like, is_scalar |
||||
from pandas.core.index import Index, MultiIndex, _ensure_index |
||||
from pandas.core.frame import DataFrame |
||||
from pandas.core.panel import Panel |
||||
from pandas.sparse.frame import SparseDataFrame |
||||
from pandas.util.decorators import deprecate |
||||
|
||||
import pandas.core.common as com |
||||
import pandas.core.ops as ops |
||||
|
||||
|
||||
class SparsePanelAxis(object): |
||||
def __init__(self, cache_field, frame_attr): |
||||
self.cache_field = cache_field |
||||
self.frame_attr = frame_attr |
||||
|
||||
def __get__(self, obj, type=None): |
||||
return getattr(obj, self.cache_field, None) |
||||
|
||||
def __set__(self, obj, value): |
||||
value = _ensure_index(value) |
||||
|
||||
if isinstance(value, MultiIndex): |
||||
raise NotImplementedError("value cannot be a MultiIndex") |
||||
|
||||
for v in compat.itervalues(obj._frames): |
||||
setattr(v, self.frame_attr, value) |
||||
|
||||
setattr(obj, self.cache_field, value) |
||||
|
||||
|
||||
class SparsePanel(Panel): |
||||
""" |
||||
Sparse version of Panel |
||||
|
||||
Parameters |
||||
---------- |
||||
frames : dict of DataFrame objects |
||||
items : array-like |
||||
major_axis : array-like |
||||
minor_axis : array-like |
||||
default_kind : {'block', 'integer'}, default 'block' |
||||
Default sparse kind for converting Series to SparseSeries. Will not |
||||
override SparseSeries passed into constructor |
||||
default_fill_value : float |
||||
Default fill_value for converting Series to SparseSeries. Will not |
||||
override SparseSeries passed in |
||||
|
||||
Notes |
||||
----- |
||||
""" |
||||
ndim = 3 |
||||
_typ = 'panel' |
||||
_subtyp = 'sparse_panel' |
||||
|
||||
def __init__(self, frames=None, items=None, major_axis=None, |
||||
minor_axis=None, default_fill_value=np.nan, |
||||
default_kind='block', copy=False): |
||||
|
||||
# deprecation #11157 |
||||
warnings.warn("SparsePanel is deprecated and will be removed in a " |
||||
"future version", FutureWarning, stacklevel=3) |
||||
|
||||
if frames is None: |
||||
frames = {} |
||||
|
||||
if isinstance(frames, np.ndarray): |
||||
new_frames = {} |
||||
for item, vals in zip(items, frames): |
||||
new_frames[item] = SparseDataFrame( |
||||
vals, index=major_axis, columns=minor_axis, |
||||
default_fill_value=default_fill_value, |
||||
default_kind=default_kind) |
||||
frames = new_frames |
||||
|
||||
if not isinstance(frames, dict): |
||||
raise TypeError('input must be a dict, a %r was passed' % |
||||
type(frames).__name__) |
||||
|
||||
self.default_fill_value = fill_value = default_fill_value |
||||
self.default_kind = kind = default_kind |
||||
|
||||
# pre-filter, if necessary |
||||
if items is None: |
||||
items = Index(sorted(frames.keys())) |
||||
items = _ensure_index(items) |
||||
|
||||
(clean_frames, major_axis, |
||||
minor_axis) = _convert_frames(frames, major_axis, minor_axis, |
||||
kind=kind, fill_value=fill_value) |
||||
|
||||
self._frames = clean_frames |
||||
|
||||
# do we want to fill missing ones? |
||||
for item in items: |
||||
if item not in clean_frames: |
||||
raise ValueError('column %r not found in data' % item) |
||||
|
||||
self._items = items |
||||
self.major_axis = major_axis |
||||
self.minor_axis = minor_axis |
||||
|
||||
def _consolidate_inplace(self): # pragma: no cover |
||||
# do nothing when DataFrame calls this method |
||||
pass |
||||
|
||||
def __array_wrap__(self, result): |
||||
return SparsePanel(result, items=self.items, |
||||
major_axis=self.major_axis, |
||||
minor_axis=self.minor_axis, |
||||
default_kind=self.default_kind, |
||||
default_fill_value=self.default_fill_value) |
||||
|
||||
@classmethod |
||||
def from_dict(cls, data): |
||||
""" |
||||
Analogous to Panel.from_dict |
||||
""" |
||||
return SparsePanel(data) |
||||
|
||||
def to_dense(self): |
||||
""" |
||||
Convert SparsePanel to (dense) Panel |
||||
|
||||
Returns |
||||
------- |
||||
dense : Panel |
||||
""" |
||||
return Panel(self.values, self.items, self.major_axis, self.minor_axis) |
||||
|
||||
def as_matrix(self): |
||||
return self.values |
||||
|
||||
@property |
||||
def values(self): |
||||
# return dense values |
||||
return np.array([self._frames[item].values for item in self.items]) |
||||
|
||||
# need a special property for items to make the field assignable |
||||
|
||||
_items = None |
||||
|
||||
def _get_items(self): |
||||
return self._items |
||||
|
||||
def _set_items(self, new_items): |
||||
new_items = _ensure_index(new_items) |
||||
if isinstance(new_items, MultiIndex): |
||||
raise NotImplementedError("itemps cannot be a MultiIndex") |
||||
|
||||
# need to create new frames dict |
||||
|
||||
old_frame_dict = self._frames |
||||
old_items = self._items |
||||
self._frames = dict((new_k, old_frame_dict[old_k]) |
||||
for new_k, old_k in zip(new_items, old_items)) |
||||
self._items = new_items |
||||
|
||||
items = property(fget=_get_items, fset=_set_items) |
||||
|
||||
# DataFrame's index |
||||
major_axis = SparsePanelAxis('_major_axis', 'index') |
||||
|
||||
# DataFrame's columns / "items" |
||||
minor_axis = SparsePanelAxis('_minor_axis', 'columns') |
||||
|
||||
def _ixs(self, i, axis=0): |
||||
""" |
||||
for compat as we don't support Block Manager here |
||||
i : int, slice, or sequence of integers |
||||
axis : int |
||||
""" |
||||
|
||||
key = self._get_axis(axis)[i] |
||||
|
||||
# xs cannot handle a non-scalar key, so just reindex here |
||||
if is_list_like(key): |
||||
return self.reindex(**{self._get_axis_name(axis): key}) |
||||
|
||||
return self.xs(key, axis=axis) |
||||
|
||||
def _slice(self, slobj, axis=0, kind=None): |
||||
""" |
||||
for compat as we don't support Block Manager here |
||||
""" |
||||
axis = self._get_axis_name(axis) |
||||
index = self._get_axis(axis) |
||||
|
||||
return self.reindex(**{axis: index[slobj]}) |
||||
|
||||
def _get_item_cache(self, key): |
||||
return self._frames[key] |
||||
|
||||
def __setitem__(self, key, value): |
||||
if isinstance(value, DataFrame): |
||||
value = value.reindex(index=self.major_axis, |
||||
columns=self.minor_axis) |
||||
if not isinstance(value, SparseDataFrame): |
||||
value = value.to_sparse(fill_value=self.default_fill_value, |
||||
kind=self.default_kind) |
||||
else: |
||||
raise ValueError('only DataFrame objects can be set currently') |
||||
|
||||
self._frames[key] = value |
||||
|
||||
if key not in self.items: |
||||
self._items = Index(list(self.items) + [key]) |
||||
|
||||
def set_value(self, item, major, minor, value): |
||||
""" |
||||
Quickly set single value at (item, major, minor) location |
||||
|
||||
Parameters |
||||
---------- |
||||
item : item label (panel item) |
||||
major : major axis label (panel item row) |
||||
minor : minor axis label (panel item column) |
||||
value : scalar |
||||
|
||||
Notes |
||||
----- |
||||
This method *always* returns a new object. It is not particularly |
||||
efficient but is provided for API compatibility with Panel |
||||
|
||||
Returns |
||||
------- |
||||
panel : SparsePanel |
||||
""" |
||||
dense = self.to_dense().set_value(item, major, minor, value) |
||||
return dense.to_sparse(kind=self.default_kind, |
||||
fill_value=self.default_fill_value) |
||||
|
||||
def __delitem__(self, key): |
||||
loc = self.items.get_loc(key) |
||||
indices = lrange(loc) + lrange(loc + 1, len(self.items)) |
||||
del self._frames[key] |
||||
self._items = self._items.take(indices) |
||||
|
||||
def __getstate__(self): |
||||
# pickling |
||||
from pandas.io.pickle import _pickle_array |
||||
return (self._frames, _pickle_array(self.items), |
||||
_pickle_array(self.major_axis), |
||||
_pickle_array(self.minor_axis), self.default_fill_value, |
||||
self.default_kind) |
||||
|
||||
def __setstate__(self, state): |
||||
frames, items, major, minor, fv, kind = state |
||||
|
||||
from pandas.io.pickle import _unpickle_array |
||||
self.default_fill_value = fv |
||||
self.default_kind = kind |
||||
self._items = _ensure_index(_unpickle_array(items)) |
||||
self._major_axis = _ensure_index(_unpickle_array(major)) |
||||
self._minor_axis = _ensure_index(_unpickle_array(minor)) |
||||
self._frames = frames |
||||
|
||||
def copy(self, deep=True): |
||||
""" |
||||
Make a copy of the sparse panel |
||||
|
||||
Returns |
||||
------- |
||||
copy : SparsePanel |
||||
""" |
||||
|
||||
d = self._construct_axes_dict() |
||||
if deep: |
||||
new_data = dict((k, v.copy(deep=True)) |
||||
for k, v in compat.iteritems(self._frames)) |
||||
d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d)) |
||||
else: |
||||
new_data = self._frames.copy() |
||||
d['default_fill_value'] = self.default_fill_value |
||||
d['default_kind'] = self.default_kind |
||||
|
||||
return SparsePanel(new_data, **d) |
||||
|
||||
def to_frame(self, filter_observations=True): |
||||
""" |
||||
Convert SparsePanel to (dense) DataFrame |
||||
|
||||
Returns |
||||
------- |
||||
frame : DataFrame |
||||
""" |
||||
if not filter_observations: |
||||
raise TypeError('filter_observations=False not supported for ' |
||||
'SparsePanel.to_long') |
||||
|
||||
I, N, K = self.shape |
||||
counts = np.zeros(N * K, dtype=int) |
||||
|
||||
d_values = {} |
||||
d_indexer = {} |
||||
|
||||
for item in self.items: |
||||
frame = self[item] |
||||
|
||||
values, major, minor = _stack_sparse_info(frame) |
||||
|
||||
# values are stacked column-major |
||||
indexer = minor * N + major |
||||
counts.put(indexer, counts.take(indexer) + 1) # cuteness |
||||
|
||||
d_values[item] = values |
||||
d_indexer[item] = indexer |
||||
|
||||
# have full set of observations for each item |
||||
mask = counts == I |
||||
|
||||
# for each item, take mask values at index locations for those sparse |
||||
# values, and use that to select values |
||||
values = np.column_stack([d_values[item][mask.take(d_indexer[item])] |
||||
for item in self.items]) |
||||
|
||||
inds, = mask.nonzero() |
||||
|
||||
# still column major |
||||
major_labels = inds % N |
||||
minor_labels = inds // N |
||||
|
||||
index = MultiIndex(levels=[self.major_axis, self.minor_axis], |
||||
labels=[major_labels, minor_labels], |
||||
verify_integrity=False) |
||||
|
||||
df = DataFrame(values, index=index, columns=self.items) |
||||
return df.sortlevel(level=0) |
||||
|
||||
to_long = deprecate('to_long', to_frame) |
||||
toLong = deprecate('toLong', to_frame) |
||||
|
||||
def reindex(self, major=None, items=None, minor=None, major_axis=None, |
||||
minor_axis=None, copy=False): |
||||
""" |
||||
Conform / reshape panel axis labels to new input labels |
||||
|
||||
Parameters |
||||
---------- |
||||
major : array-like, default None |
||||
items : array-like, default None |
||||
minor : array-like, default None |
||||
copy : boolean, default False |
||||
Copy underlying SparseDataFrame objects |
||||
|
||||
Returns |
||||
------- |
||||
reindexed : SparsePanel |
||||
""" |
||||
major = com._mut_exclusive(major=major, major_axis=major_axis) |
||||
minor = com._mut_exclusive(minor=minor, minor_axis=minor_axis) |
||||
|
||||
if com._all_none(items, major, minor): |
||||
raise ValueError('Must specify at least one axis') |
||||
|
||||
major = self.major_axis if major is None else major |
||||
minor = self.minor_axis if minor is None else minor |
||||
|
||||
if items is not None: |
||||
new_frames = {} |
||||
for item in items: |
||||
if item in self._frames: |
||||
new_frames[item] = self._frames[item] |
||||
else: |
||||
raise NotImplementedError('Reindexing with new items not ' |
||||
'yet supported') |
||||
else: |
||||
new_frames = self._frames |
||||
|
||||
if copy: |
||||
new_frames = dict((k, v.copy()) |
||||
for k, v in compat.iteritems(new_frames)) |
||||
|
||||
return SparsePanel(new_frames, items=items, major_axis=major, |
||||
minor_axis=minor, |
||||
default_fill_value=self.default_fill_value, |
||||
default_kind=self.default_kind) |
||||
|
||||
def _combine(self, other, func, axis=0): |
||||
if isinstance(other, DataFrame): |
||||
return self._combineFrame(other, func, axis=axis) |
||||
elif isinstance(other, Panel): |
||||
return self._combinePanel(other, func) |
||||
elif is_scalar(other): |
||||
new_frames = dict((k, func(v, other)) |
||||
for k, v in self.iteritems()) |
||||
return self._new_like(new_frames) |
||||
|
||||
def _combineFrame(self, other, func, axis=0): |
||||
index, columns = self._get_plane_axes(axis) |
||||
axis = self._get_axis_number(axis) |
||||
|
||||
other = other.reindex(index=index, columns=columns) |
||||
|
||||
if axis == 0: |
||||
new_values = func(self.values, other.values) |
||||
elif axis == 1: |
||||
new_values = func(self.values.swapaxes(0, 1), other.values.T) |
||||
new_values = new_values.swapaxes(0, 1) |
||||
elif axis == 2: |
||||
new_values = func(self.values.swapaxes(0, 2), other.values) |
||||
new_values = new_values.swapaxes(0, 2) |
||||
|
||||
# TODO: make faster! |
||||
new_frames = {} |
||||
for item, item_slice in zip(self.items, new_values): |
||||
old_frame = self[item] |
||||
ofv = old_frame.default_fill_value |
||||
ok = old_frame.default_kind |
||||
new_frames[item] = SparseDataFrame(item_slice, |
||||
index=self.major_axis, |
||||
columns=self.minor_axis, |
||||
default_fill_value=ofv, |
||||
default_kind=ok) |
||||
|
||||
return self._new_like(new_frames) |
||||
|
||||
def _new_like(self, new_frames): |
||||
return SparsePanel(new_frames, self.items, self.major_axis, |
||||
self.minor_axis, |
||||
default_fill_value=self.default_fill_value, |
||||
default_kind=self.default_kind) |
||||
|
||||
def _combinePanel(self, other, func): |
||||
items = self.items.union(other.items) |
||||
major = self.major_axis.union(other.major_axis) |
||||
minor = self.minor_axis.union(other.minor_axis) |
||||
|
||||
# could check that everything's the same size, but forget it |
||||
|
||||
this = self.reindex(items=items, major=major, minor=minor) |
||||
other = other.reindex(items=items, major=major, minor=minor) |
||||
|
||||
new_frames = {} |
||||
for item in items: |
||||
new_frames[item] = func(this[item], other[item]) |
||||
|
||||
if not isinstance(other, SparsePanel): |
||||
new_default_fill = self.default_fill_value |
||||
else: |
||||
# maybe unnecessary |
||||
new_default_fill = func(self.default_fill_value, |
||||
other.default_fill_value) |
||||
|
||||
return SparsePanel(new_frames, items, major, minor, |
||||
default_fill_value=new_default_fill, |
||||
default_kind=self.default_kind) |
||||
|
||||
def major_xs(self, key): |
||||
""" |
||||
Return slice of panel along major axis |
||||
|
||||
Parameters |
||||
---------- |
||||
key : object |
||||
Major axis label |
||||
|
||||
Returns |
||||
------- |
||||
y : DataFrame |
||||
index -> minor axis, columns -> items |
||||
""" |
||||
slices = dict((k, v.xs(key)) for k, v in self.iteritems()) |
||||
return DataFrame(slices, index=self.minor_axis, columns=self.items) |
||||
|
||||
def minor_xs(self, key): |
||||
""" |
||||
Return slice of panel along minor axis |
||||
|
||||
Parameters |
||||
---------- |
||||
key : object |
||||
Minor axis label |
||||
|
||||
Returns |
||||
------- |
||||
y : SparseDataFrame |
||||
index -> major axis, columns -> items |
||||
""" |
||||
slices = dict((k, v[key]) for k, v in self.iteritems()) |
||||
return SparseDataFrame(slices, index=self.major_axis, |
||||
columns=self.items, |
||||
default_fill_value=self.default_fill_value, |
||||
default_kind=self.default_kind) |
||||
|
||||
# TODO: allow SparsePanel to work with flex arithmetic. |
||||
# pow and mod only work for scalars for now |
||||
def pow(self, val, *args, **kwargs): |
||||
"""wrapper around `__pow__` (only works for scalar values)""" |
||||
return self.__pow__(val) |
||||
|
||||
def mod(self, val, *args, **kwargs): |
||||
"""wrapper around `__mod__` (only works for scalar values""" |
||||
return self.__mod__(val) |
||||
|
||||
# Sparse objects opt out of numexpr |
||||
SparsePanel._add_aggregate_operations(use_numexpr=False) |
||||
ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, ** |
||||
ops.panel_special_funcs) |
||||
SparseWidePanel = SparsePanel |
||||
|
||||
|
||||
def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): |
||||
from pandas.core.panel import _get_combined_index |
||||
output = {} |
||||
for item, df in compat.iteritems(frames): |
||||
if not isinstance(df, SparseDataFrame): |
||||
df = SparseDataFrame(df, default_kind=kind, |
||||
default_fill_value=fill_value) |
||||
|
||||
output[item] = df |
||||
|
||||
if index is None: |
||||
all_indexes = [x.index for x in output.values()] |
||||
index = _get_combined_index(all_indexes) |
||||
if columns is None: |
||||
all_columns = [x.columns for x in output.values()] |
||||
columns = _get_combined_index(all_columns) |
||||
|
||||
index = _ensure_index(index) |
||||
columns = _ensure_index(columns) |
||||
|
||||
for item, df in compat.iteritems(output): |
||||
if not (df.index.equals(index) and df.columns.equals(columns)): |
||||
output[item] = df.reindex(index=index, columns=columns) |
||||
|
||||
return output, index, columns |
||||
|
||||
|
||||
def _stack_sparse_info(frame): |
||||
lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] |
||||
|
||||
# this is pretty fast |
||||
minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) |
||||
|
||||
inds_to_concat = [] |
||||
vals_to_concat = [] |
||||
for col in frame.columns: |
||||
series = frame[col] |
||||
|
||||
if not np.isnan(series.fill_value): |
||||
raise TypeError('This routine assumes NaN fill value') |
||||
|
||||
int_index = series.sp_index.to_int_index() |
||||
inds_to_concat.append(int_index.indices) |
||||
vals_to_concat.append(series.sp_values) |
||||
|
||||
major_labels = np.concatenate(inds_to_concat) |
||||
sparse_values = np.concatenate(vals_to_concat) |
||||
|
||||
return sparse_values, major_labels, minor_labels |
@ -1,279 +0,0 @@
|
||||
# pylint: disable-msg=E1101,W0612 |
||||
|
||||
import nose # noqa |
||||
from numpy import nan |
||||
import pandas as pd |
||||
|
||||
from pandas import DataFrame, bdate_range, Panel |
||||
from pandas.core.index import Index |
||||
import pandas.util.testing as tm |
||||
from pandas.sparse.api import SparseSeries, SparsePanel |
||||
import pandas.tests.test_panel as test_panel |
||||
|
||||
|
||||
def panel_data1(): |
||||
index = bdate_range('1/1/2011', periods=8) |
||||
|
||||
return DataFrame({ |
||||
'A': [nan, nan, nan, 0, 1, 2, 3, 4], |
||||
'B': [0, 1, 2, 3, 4, nan, nan, nan], |
||||
'C': [0, 1, 2, nan, nan, nan, 3, 4], |
||||
'D': [nan, 0, 1, nan, 2, 3, 4, nan] |
||||
}, index=index) |
||||
|
||||
|
||||
def panel_data2(): |
||||
index = bdate_range('1/1/2011', periods=9) |
||||
|
||||
return DataFrame({ |
||||
'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5], |
||||
'B': [0, 1, 2, 3, 4, 5, nan, nan, nan], |
||||
'C': [0, 1, 2, nan, nan, nan, 3, 4, 5], |
||||
'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan] |
||||
}, index=index) |
||||
|
||||
|
||||
def panel_data3(): |
||||
index = bdate_range('1/1/2011', periods=10).shift(-2) |
||||
|
||||
return DataFrame({ |
||||
'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], |
||||
'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan], |
||||
'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], |
||||
'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan] |
||||
}, index=index) |
||||
|
||||
|
||||
class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse, |
||||
test_panel.SafeForSparse): |
||||
_multiprocess_can_split_ = True |
||||
|
||||
def setUp(self): |
||||
self.data_dict = { |
||||
'ItemA': panel_data1(), |
||||
'ItemB': panel_data2(), |
||||
'ItemC': panel_data3(), |
||||
'ItemD': panel_data1(), |
||||
} |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
self.panel = SparsePanel(self.data_dict) |
||||
|
||||
@staticmethod |
||||
def _test_op(panel, op): |
||||
# arithmetic tests |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
result = op(panel, 1) |
||||
tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) |
||||
|
||||
def test_constructor(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
self.assertRaises(ValueError, SparsePanel, self.data_dict, |
||||
items=['Item0', 'ItemA', 'ItemB']) |
||||
with tm.assertRaisesRegexp(TypeError, |
||||
"input must be a dict, a 'list' was " |
||||
"passed"): |
||||
SparsePanel(['a', 'b', 'c']) |
||||
|
||||
# deprecation GH11157 |
||||
def test_deprecation(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
SparsePanel() |
||||
|
||||
# GH 9272 |
||||
def test_constructor_empty(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
sp = SparsePanel() |
||||
self.assertEqual(len(sp.items), 0) |
||||
self.assertEqual(len(sp.major_axis), 0) |
||||
self.assertEqual(len(sp.minor_axis), 0) |
||||
|
||||
def test_from_dict(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
fd = SparsePanel.from_dict(self.data_dict) |
||||
tm.assert_sp_panel_equal(fd, self.panel) |
||||
|
||||
def test_pickle(self): |
||||
def _test_roundtrip(panel): |
||||
result = self.round_trip_pickle(panel) |
||||
tm.assertIsInstance(result.items, Index) |
||||
tm.assertIsInstance(result.major_axis, Index) |
||||
tm.assertIsInstance(result.minor_axis, Index) |
||||
tm.assert_sp_panel_equal(panel, result) |
||||
|
||||
_test_roundtrip(self.panel) |
||||
|
||||
def test_dense_to_sparse(self): |
||||
wp = Panel.from_dict(self.data_dict) |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
dwp = wp.to_sparse() |
||||
tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries) |
||||
|
||||
def test_to_dense(self): |
||||
dwp = self.panel.to_dense() |
||||
dwp2 = Panel.from_dict(self.data_dict) |
||||
tm.assert_panel_equal(dwp, dwp2) |
||||
|
||||
def test_to_frame(self): |
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
|
||||
def _compare_with_dense(panel): |
||||
slp = panel.to_frame() |
||||
dlp = panel.to_dense().to_frame() |
||||
|
||||
self.assert_numpy_array_equal(slp.values, dlp.values) |
||||
self.assert_index_equal(slp.index, dlp.index, |
||||
check_names=False) |
||||
|
||||
_compare_with_dense(self.panel) |
||||
_compare_with_dense(self.panel.reindex(items=['ItemA'])) |
||||
|
||||
with tm.assert_produces_warning(FutureWarning, |
||||
check_stacklevel=False): |
||||
zero_panel = SparsePanel(self.data_dict, default_fill_value=0) |
||||
self.assertRaises(Exception, zero_panel.to_frame) |
||||
|
||||
self.assertRaises(Exception, self.panel.to_frame, |
||||
filter_observations=False) |
||||
|
||||
def test_long_to_wide_sparse(self): |
||||
pass |
||||
|
||||
def test_values(self): |
||||
pass |
||||
|
||||
def test_setitem(self): |
||||
self.panel['ItemE'] = self.panel['ItemC'] |
||||
self.panel['ItemF'] = self.panel['ItemC'].to_dense() |
||||
|
||||
tm.assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC']) |
||||
tm.assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC']) |
||||
|
||||
expected = pd.Index(['ItemA', 'ItemB', 'ItemC', |
||||
'ItemD', 'ItemE', 'ItemF']) |
||||
tm.assert_index_equal(self.panel.items, expected) |
||||
|
||||
self.assertRaises(Exception, self.panel.__setitem__, 'item6', 1) |
||||
|
||||
def test_set_value(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
def _check_loc(item, major, minor, val=1.5): |
||||
res = self.panel.set_value(item, major, minor, val) |
||||
self.assertIsNot(res, self.panel) |
||||
self.assertEqual(res.get_value(item, major, minor), val) |
||||
|
||||
_check_loc('ItemA', self.panel.major_axis[4], |
||||
self.panel.minor_axis[3]) |
||||
_check_loc('ItemF', self.panel.major_axis[4], |
||||
self.panel.minor_axis[3]) |
||||
_check_loc('ItemF', 'foo', self.panel.minor_axis[3]) |
||||
_check_loc('ItemE', 'foo', 'bar') |
||||
|
||||
def test_delitem_pop(self): |
||||
del self.panel['ItemB'] |
||||
tm.assert_index_equal(self.panel.items, |
||||
pd.Index(['ItemA', 'ItemC', 'ItemD'])) |
||||
crackle = self.panel['ItemC'] |
||||
pop = self.panel.pop('ItemC') |
||||
self.assertIs(pop, crackle) |
||||
tm.assert_almost_equal(self.panel.items, pd.Index(['ItemA', 'ItemD'])) |
||||
|
||||
self.assertRaises(KeyError, self.panel.__delitem__, 'ItemC') |
||||
|
||||
def test_copy(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
cop = self.panel.copy() |
||||
tm.assert_sp_panel_equal(cop, self.panel) |
||||
|
||||
def test_reindex(self): |
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
||||
|
||||
def _compare_with_dense(swp, items, major, minor): |
||||
swp_re = swp.reindex(items=items, major=major, minor=minor) |
||||
dwp_re = swp.to_dense().reindex(items=items, major=major, |
||||
minor=minor) |
||||
tm.assert_panel_equal(swp_re.to_dense(), dwp_re) |
||||
|
||||
_compare_with_dense(self.panel, self.panel.items[:2], |
||||
self.panel.major_axis[::2], |
||||
self.panel.minor_axis[::2]) |
||||
_compare_with_dense(self.panel, None, self.panel.major_axis[::2], |
||||
self.panel.minor_axis[::2]) |
||||
|
||||
self.assertRaises(ValueError, self.panel.reindex) |
||||
|
||||
# TODO: do something about this later... |
||||
self.assertRaises(Exception, self.panel.reindex, |
||||
items=['item0', 'ItemA', 'ItemB']) |
||||
|
||||
# test copying |
||||
cp = self.panel.reindex(self.panel.major_axis, copy=True) |
||||
cp['ItemA']['E'] = cp['ItemA']['A'] |
||||
self.assertNotIn('E', self.panel['ItemA']) |
||||
|
||||
def test_operators(self): |
||||
def _check_ops(panel): |
||||
def _dense_comp(op): |
||||
with tm.assert_produces_warning(FutureWarning, |
||||
check_stacklevel=False): |
||||
dense = panel.to_dense() |
||||
sparse_result = op(panel) |
||||
dense_result = op(dense) |
||||
tm.assert_panel_equal(sparse_result.to_dense(), |
||||
dense_result) |
||||
|
||||
def _mixed_comp(op): |
||||
with tm.assert_produces_warning(FutureWarning, |
||||
check_stacklevel=False): |
||||
result = op(panel, panel.to_dense()) |
||||
expected = op(panel.to_dense(), panel.to_dense()) |
||||
tm.assert_panel_equal(result, expected) |
||||
|
||||
op1 = lambda x: x + 2 |
||||
|
||||
_dense_comp(op1) |
||||
op2 = lambda x: x.add(x.reindex(major=x.major_axis[::2])) |
||||
_dense_comp(op2) |
||||
op3 = lambda x: x.subtract(x.mean(0), axis=0) |
||||
_dense_comp(op3) |
||||
op4 = lambda x: x.subtract(x.mean(1), axis=1) |
||||
_dense_comp(op4) |
||||
op5 = lambda x: x.subtract(x.mean(2), axis=2) |
||||
_dense_comp(op5) |
||||
|
||||
_mixed_comp(Panel.multiply) |
||||
_mixed_comp(Panel.subtract) |
||||
|
||||
# TODO: this case not yet supported! |
||||
# op6 = lambda x: x.add(x.to_frame()) |
||||
# _dense_comp(op6) |
||||
|
||||
_check_ops(self.panel) |
||||
|
||||
def test_major_xs(self): |
||||
def _dense_comp(sparse): |
||||
dense = sparse.to_dense() |
||||
|
||||
for idx in sparse.major_axis: |
||||
dslice = dense.major_xs(idx) |
||||
sslice = sparse.major_xs(idx) |
||||
tm.assert_frame_equal(dslice, sslice) |
||||
|
||||
_dense_comp(self.panel) |
||||
|
||||
def test_minor_xs(self): |
||||
def _dense_comp(sparse): |
||||
dense = sparse.to_dense() |
||||
|
||||
for idx in sparse.minor_axis: |
||||
dslice = dense.minor_xs(idx) |
||||
sslice = sparse.minor_xs(idx).to_dense() |
||||
tm.assert_frame_equal(dslice, sslice) |
||||
|
||||
_dense_comp(self.panel) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
import nose # noqa |
||||
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], |
||||
exit=False) |