BUG: GH11847 Unstack with mixed dtypes coerces everything to object
Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
This commit is contained in:
parent
837db725b1
commit
6a381ce87f
|
@ -68,3 +68,5 @@ Bug Fixes
|
|||
|
||||
|
||||
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
|
||||
|
||||
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)
|
||||
|
|
|
@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
|
|||
verify_integrity=False)
|
||||
|
||||
if isinstance(data, Series):
|
||||
dummy = Series(data.values, index=dummy_index)
|
||||
dummy = data.copy()
|
||||
dummy.index = dummy_index
|
||||
unstacked = dummy.unstack('__placeholder__')
|
||||
new_levels = clevels
|
||||
new_names = cnames
|
||||
|
@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
|
|||
|
||||
return result
|
||||
|
||||
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
|
||||
dummy = data.copy()
|
||||
dummy.index = dummy_index
|
||||
|
||||
unstacked = dummy.unstack('__placeholder__')
|
||||
if isinstance(unstacked, Series):
|
||||
|
|
|
@ -282,6 +282,46 @@ class TestDataFrameReshape(tm.TestCase, TestData):
|
|||
index=list('xyz'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_unstack_preserve_dtypes(self):
|
||||
# Checks fix for #11847
|
||||
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
|
||||
index=['a', 'b', 'c'],
|
||||
some_categories=pd.Series(['a', 'b', 'c']
|
||||
).astype('category'),
|
||||
A=np.random.rand(3),
|
||||
B=1,
|
||||
C='foo',
|
||||
D=pd.Timestamp('20010102'),
|
||||
E=pd.Series([1.0, 50.0, 100.0]
|
||||
).astype('float32'),
|
||||
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
|
||||
G=False,
|
||||
H=pd.Series([1, 200, 923442], dtype='int8')))
|
||||
|
||||
def unstack_and_compare(df, column_name):
|
||||
unstacked1 = df.unstack([column_name])
|
||||
unstacked2 = df.unstack(column_name)
|
||||
assert_frame_equal(unstacked1, unstacked2)
|
||||
|
||||
df1 = df.set_index(['state', 'index'])
|
||||
unstack_and_compare(df1, 'index')
|
||||
|
||||
df1 = df.set_index(['state', 'some_categories'])
|
||||
unstack_and_compare(df1, 'some_categories')
|
||||
|
||||
df1 = df.set_index(['F', 'C'])
|
||||
unstack_and_compare(df1, 'F')
|
||||
|
||||
df1 = df.set_index(['G', 'B', 'state'])
|
||||
unstack_and_compare(df1, 'B')
|
||||
|
||||
df1 = df.set_index(['E', 'A'])
|
||||
unstack_and_compare(df1, 'E')
|
||||
|
||||
df1 = df.set_index(['state', 'index'])
|
||||
s = df1['A']
|
||||
unstack_and_compare(s, 'index')
|
||||
|
||||
def test_stack_ints(self):
|
||||
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
|
||||
repeat=3)))
|
||||
|
|
Loading…
Reference in New Issue