BUG: Apply min_itemsize to index even when not appending
closes #10381 Author: Pietro Battiston <me@pietrobattiston.it> Closes #14812 from toobaz/to_hdf_min_itemsize and squashes the following commits:c07f1e4
[Pietro Battiston] Whatsnew38b8fcc
[Pietro Battiston] Tests for previous commitc838afa
[Pietro Battiston] BUG: set min_itemsize even when there is no need to validate (#10381) (cherry picked from commite833096244
)
This commit is contained in:
parent
7f53ea8fac
commit
26920d1073
|
@ -60,6 +60,7 @@ Bug Fixes
|
|||
|
||||
- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`)
|
||||
- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`)
|
||||
- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`)
|
||||
- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`)
|
||||
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`)
|
||||
|
||||
|
|
|
@ -3429,9 +3429,8 @@ class Table(Fixed):
|
|||
j = len(self.index_axes)
|
||||
|
||||
# check for column conflicts
|
||||
if validate:
|
||||
for a in self.axes:
|
||||
a.maybe_set_size(min_itemsize=min_itemsize)
|
||||
for a in self.axes:
|
||||
a.maybe_set_size(min_itemsize=min_itemsize)
|
||||
|
||||
# reindex by our non_index_axes & compute data_columns
|
||||
for a in self.non_index_axes:
|
||||
|
|
|
@ -1372,6 +1372,22 @@ class TestHDFStore(Base, tm.TestCase):
|
|||
min_itemsize={'index': 4})
|
||||
tm.assert_series_equal(store.select('ss2'), df['B'])
|
||||
|
||||
# min_itemsize in index without appending (GH 10381)
|
||||
store.put('ss3', df, format='table',
|
||||
min_itemsize={'index': 6})
|
||||
# just make sure there is a longer string:
|
||||
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
|
||||
store.append('ss3', df2)
|
||||
tm.assert_frame_equal(store.select('ss3'),
|
||||
pd.concat([df, df2]))
|
||||
|
||||
# same as above, with a Series
|
||||
store.put('ss4', df['B'], format='table',
|
||||
min_itemsize={'index': 6})
|
||||
store.append('ss4', df2['B'])
|
||||
tm.assert_series_equal(store.select('ss4'),
|
||||
pd.concat([df['B'], df2['B']]))
|
||||
|
||||
# with nans
|
||||
_maybe_remove(store, 'df')
|
||||
df = tm.makeTimeDataFrame()
|
||||
|
@ -1426,6 +1442,26 @@ class TestHDFStore(Base, tm.TestCase):
|
|||
self.assertRaises(ValueError, store.append, 'df',
|
||||
df, min_itemsize={'foo': 20, 'foobar': 20})
|
||||
|
||||
def test_to_hdf_with_min_itemsize(self):
|
||||
|
||||
with ensure_clean_path(self.path) as path:
|
||||
|
||||
# min_itemsize in index with to_hdf (GH 10381)
|
||||
df = tm.makeMixedDataFrame().set_index('C')
|
||||
df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
|
||||
# just make sure there is a longer string:
|
||||
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
|
||||
df2.to_hdf(path, 'ss3', append=True, format='table')
|
||||
tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
|
||||
pd.concat([df, df2]))
|
||||
|
||||
# same as above, with a Series
|
||||
df['B'].to_hdf(path, 'ss4', format='table',
|
||||
min_itemsize={'index': 6})
|
||||
df2['B'].to_hdf(path, 'ss4', append=True, format='table')
|
||||
tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
|
||||
pd.concat([df['B'], df2['B']]))
|
||||
|
||||
def test_append_with_data_columns(self):
|
||||
|
||||
with ensure_clean_store(self.path) as store:
|
||||
|
|
Loading…
Reference in New Issue