BUG: GH11847 Unstack with mixed dtypes coerces everything to object

Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
This commit is contained in:
Pawel Kordek 2016-08-20 16:41:21 +02:00
parent 837db725b1
commit 6a381ce87f
3 changed files with 46 additions and 2 deletions

View File

@ -68,3 +68,5 @@ Bug Fixes
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

View File

@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
verify_integrity=False)
if isinstance(data, Series):
dummy = Series(data.values, index=dummy_index)
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
new_levels = clevels
new_names = cnames
@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
return result
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
if isinstance(unstacked, Series):

View File

@ -282,6 +282,46 @@ class TestDataFrameReshape(tm.TestCase, TestData):
index=list('xyz'))
assert_frame_equal(result, expected)
def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
index=['a', 'b', 'c'],
some_categories=pd.Series(['a', 'b', 'c']
).astype('category'),
A=np.random.rand(3),
B=1,
C='foo',
D=pd.Timestamp('20010102'),
E=pd.Series([1.0, 50.0, 100.0]
).astype('float32'),
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
G=False,
H=pd.Series([1, 200, 923442], dtype='int8')))
def unstack_and_compare(df, column_name):
unstacked1 = df.unstack([column_name])
unstacked2 = df.unstack(column_name)
assert_frame_equal(unstacked1, unstacked2)
df1 = df.set_index(['state', 'index'])
unstack_and_compare(df1, 'index')
df1 = df.set_index(['state', 'some_categories'])
unstack_and_compare(df1, 'some_categories')
df1 = df.set_index(['F', 'C'])
unstack_and_compare(df1, 'F')
df1 = df.set_index(['G', 'B', 'state'])
unstack_and_compare(df1, 'B')
df1 = df.set_index(['E', 'A'])
unstack_and_compare(df1, 'E')
df1 = df.set_index(['state', 'index'])
s = df1['A']
unstack_and_compare(s, 'index')
def test_stack_ints(self):
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
repeat=3)))