BUG: GH11847 Unstack with mixed dtypes coerces everything to object
closes #11847 Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes). Author: Pawel Kordek <pawel.kordek@gmail.com> Closes #14053 from kordek/#11847 and squashes the following commits:6a381ce
[Pawel Kordek] BUG: GH11847 Unstack with mixed dtypes coerces everything to object (cherry picked from commitd531718749
)
This commit is contained in:
parent
3276c8aeb2
commit
1bc64b1f5c
|
@ -76,3 +76,5 @@ Bug Fixes
|
|||
|
||||
|
||||
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
|
||||
|
||||
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)
|
||||
|
|
|
@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
|
|||
verify_integrity=False)
|
||||
|
||||
if isinstance(data, Series):
|
||||
dummy = Series(data.values, index=dummy_index)
|
||||
dummy = data.copy()
|
||||
dummy.index = dummy_index
|
||||
unstacked = dummy.unstack('__placeholder__')
|
||||
new_levels = clevels
|
||||
new_names = cnames
|
||||
|
@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
|
|||
|
||||
return result
|
||||
|
||||
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
|
||||
dummy = data.copy()
|
||||
dummy.index = dummy_index
|
||||
|
||||
unstacked = dummy.unstack('__placeholder__')
|
||||
if isinstance(unstacked, Series):
|
||||
|
|
|
@ -282,6 +282,46 @@ class TestDataFrameReshape(tm.TestCase, TestData):
|
|||
index=list('xyz'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_unstack_preserve_dtypes(self):
|
||||
# Checks fix for #11847
|
||||
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
|
||||
index=['a', 'b', 'c'],
|
||||
some_categories=pd.Series(['a', 'b', 'c']
|
||||
).astype('category'),
|
||||
A=np.random.rand(3),
|
||||
B=1,
|
||||
C='foo',
|
||||
D=pd.Timestamp('20010102'),
|
||||
E=pd.Series([1.0, 50.0, 100.0]
|
||||
).astype('float32'),
|
||||
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
|
||||
G=False,
|
||||
H=pd.Series([1, 200, 923442], dtype='int8')))
|
||||
|
||||
def unstack_and_compare(df, column_name):
|
||||
unstacked1 = df.unstack([column_name])
|
||||
unstacked2 = df.unstack(column_name)
|
||||
assert_frame_equal(unstacked1, unstacked2)
|
||||
|
||||
df1 = df.set_index(['state', 'index'])
|
||||
unstack_and_compare(df1, 'index')
|
||||
|
||||
df1 = df.set_index(['state', 'some_categories'])
|
||||
unstack_and_compare(df1, 'some_categories')
|
||||
|
||||
df1 = df.set_index(['F', 'C'])
|
||||
unstack_and_compare(df1, 'F')
|
||||
|
||||
df1 = df.set_index(['G', 'B', 'state'])
|
||||
unstack_and_compare(df1, 'B')
|
||||
|
||||
df1 = df.set_index(['E', 'A'])
|
||||
unstack_and_compare(df1, 'E')
|
||||
|
||||
df1 = df.set_index(['state', 'index'])
|
||||
s = df1['A']
|
||||
unstack_and_compare(s, 'index')
|
||||
|
||||
def test_stack_ints(self):
|
||||
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
|
||||
repeat=3)))
|
||||
|
|
Loading…
Reference in New Issue