[Backport #14777] BUG: Bug in a groupby of a non-lexsorted MultiIndex
closes #14776 Author: Jeff Reback <jeff@reback.net> Closes #14777 from jreback/mi_sort and squashes the following commits:cf31905
[Jeff Reback] BUG: Bug in a groupby of a non-lexsorted MultiIndex and multiple grouping levels (cherry picked from commitf23010aa93
)
This commit is contained in:
parent
7814a6654e
commit
04b83e021b
|
@ -36,7 +36,7 @@ Bug Fixes
|
|||
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
|
||||
|
||||
|
||||
|
||||
- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -861,7 +861,17 @@ class _GroupBy(PandasObject, SelectionMixin):
|
|||
if isinstance(result, Series):
|
||||
result = result.reindex(ax)
|
||||
else:
|
||||
result = result.reindex_axis(ax, axis=self.axis)
|
||||
|
||||
# this is a very unfortunate situation
|
||||
# we have a multi-index that is NOT lexsorted
|
||||
# and we have a result which is duplicated
|
||||
# we can't reindex, so we resort to this
|
||||
# GH 14776
|
||||
if isinstance(ax, MultiIndex) and not ax.is_unique:
|
||||
result = result.take(result.index.get_indexer_for(
|
||||
ax.values).unique(), axis=self.axis)
|
||||
else:
|
||||
result = result.reindex_axis(ax, axis=self.axis)
|
||||
|
||||
elif self.group_keys:
|
||||
|
||||
|
|
|
@ -4736,6 +4736,25 @@ class TestGroupBy(tm.TestCase):
|
|||
result = not_lexsorted_df.groupby('a').mean()
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
# a transforming function should work regardless of sort
|
||||
# GH 14776
|
||||
df = DataFrame({'x': ['a', 'a', 'b', 'a'],
|
||||
'y': [1, 1, 2, 2],
|
||||
'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
|
||||
self.assertFalse(df.index.is_lexsorted())
|
||||
|
||||
for level in [0, 1, [0, 1]]:
|
||||
for sort in [False, True]:
|
||||
result = df.groupby(level=level, sort=sort).apply(
|
||||
DataFrame.drop_duplicates)
|
||||
expected = df
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
result = df.sort_index().groupby(level=level, sort=sort).apply(
|
||||
DataFrame.drop_duplicates)
|
||||
expected = df.sort_index()
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
def test_groupby_levels_and_columns(self):
|
||||
# GH9344, GH9049
|
||||
idx_names = ['x', 'y']
|
||||
|
|
Loading…
Reference in New Issue