BUG: GH11847 Unstack with mixed dtypes coerces everything to object

closes #11847

Changed the way
in which the original data frame is copied (dropped use of .values,
since it does not preserve dtypes).

Author: Pawel Kordek <pawel.kordek@gmail.com>

Closes #14053 from kordek/#11847 and squashes the following commits:

6a381ce [Pawel Kordek] BUG: GH11847 Unstack with mixed dtypes coerces everything to object
This commit is contained in:
Pawel Kordek 2016-12-10 10:36:51 -05:00 committed by Jeff Reback
parent 34807fc25e
commit d531718749
3 changed files with 46 additions and 2 deletions

View File

@ -76,3 +76,5 @@ Bug Fixes
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

View File

@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
verify_integrity=False)
if isinstance(data, Series):
dummy = Series(data.values, index=dummy_index)
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
new_levels = clevels
new_names = cnames
@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
return result
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
if isinstance(unstacked, Series):

View File

@ -282,6 +282,46 @@ class TestDataFrameReshape(tm.TestCase, TestData):
index=list('xyz'))
assert_frame_equal(result, expected)
def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
index=['a', 'b', 'c'],
some_categories=pd.Series(['a', 'b', 'c']
).astype('category'),
A=np.random.rand(3),
B=1,
C='foo',
D=pd.Timestamp('20010102'),
E=pd.Series([1.0, 50.0, 100.0]
).astype('float32'),
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
G=False,
H=pd.Series([1, 200, 923442], dtype='int8')))
def unstack_and_compare(df, column_name):
unstacked1 = df.unstack([column_name])
unstacked2 = df.unstack(column_name)
assert_frame_equal(unstacked1, unstacked2)
df1 = df.set_index(['state', 'index'])
unstack_and_compare(df1, 'index')
df1 = df.set_index(['state', 'some_categories'])
unstack_and_compare(df1, 'some_categories')
df1 = df.set_index(['F', 'C'])
unstack_and_compare(df1, 'F')
df1 = df.set_index(['G', 'B', 'state'])
unstack_and_compare(df1, 'B')
df1 = df.set_index(['E', 'A'])
unstack_and_compare(df1, 'E')
df1 = df.set_index(['state', 'index'])
s = df1['A']
unstack_and_compare(s, 'index')
def test_stack_ints(self):
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
repeat=3)))