BUG: Apply min_itemsize to index even when not appending

closes #10381

Author: Pietro Battiston <me@pietrobattiston.it>

Closes #14812 from toobaz/to_hdf_min_itemsize and squashes the following commits:

c07f1e4 [Pietro Battiston] Whatsnew
38b8fcc [Pietro Battiston] Tests for previous commit
c838afa [Pietro Battiston] BUG: set min_itemsize even when there is no need to validate (#10381)

(cherry picked from commit e833096244)
This commit is contained in:
Pietro Battiston 2016-12-11 16:53:54 -05:00 committed by Joris Van den Bossche
parent 7f53ea8fac
commit 26920d1073
3 changed files with 39 additions and 3 deletions

View File

@ -60,6 +60,7 @@ Bug Fixes
- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`)
- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`)
- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`)
- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`)
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`)

View File

@ -3429,9 +3429,8 @@ class Table(Fixed):
j = len(self.index_axes)
# check for column conflicts
if validate:
for a in self.axes:
a.maybe_set_size(min_itemsize=min_itemsize)
for a in self.axes:
a.maybe_set_size(min_itemsize=min_itemsize)
# reindex by our non_index_axes & compute data_columns
for a in self.non_index_axes:

View File

@ -1372,6 +1372,22 @@ class TestHDFStore(Base, tm.TestCase):
min_itemsize={'index': 4})
tm.assert_series_equal(store.select('ss2'), df['B'])
# min_itemsize in index without appending (GH 10381)
store.put('ss3', df, format='table',
min_itemsize={'index': 6})
# just make sure there is a longer string:
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
store.append('ss3', df2)
tm.assert_frame_equal(store.select('ss3'),
pd.concat([df, df2]))
# same as above, with a Series
store.put('ss4', df['B'], format='table',
min_itemsize={'index': 6})
store.append('ss4', df2['B'])
tm.assert_series_equal(store.select('ss4'),
pd.concat([df['B'], df2['B']]))
# with nans
_maybe_remove(store, 'df')
df = tm.makeTimeDataFrame()
@ -1426,6 +1442,26 @@ class TestHDFStore(Base, tm.TestCase):
self.assertRaises(ValueError, store.append, 'df',
df, min_itemsize={'foo': 20, 'foobar': 20})
def test_to_hdf_with_min_itemsize(self):
with ensure_clean_path(self.path) as path:
# min_itemsize in index with to_hdf (GH 10381)
df = tm.makeMixedDataFrame().set_index('C')
df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
# just make sure there is a longer string:
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
df2.to_hdf(path, 'ss3', append=True, format='table')
tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
pd.concat([df, df2]))
# same as above, with a Series
df['B'].to_hdf(path, 'ss4', format='table',
min_itemsize={'index': 6})
df2['B'].to_hdf(path, 'ss4', append=True, format='table')
tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
pd.concat([df['B'], df2['B']]))
def test_append_with_data_columns(self):
with ensure_clean_store(self.path) as store: