BUG: Prevent aliasing of dict na_values

This commit is contained in:
gfyoung 2016-11-25 23:30:47 -05:00
parent 033d34596f
commit 1439c2780b
3 changed files with 15 additions and 0 deletions

View File

@ -39,6 +39,7 @@ Bug Fixes
- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`)
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.

View File

@ -2767,6 +2767,7 @@ def _clean_na_values(na_values, keep_default_na=True):
na_values = []
na_fvalues = set()
elif isinstance(na_values, dict):
na_values = na_values.copy() # Prevent aliasing.
if keep_default_na:
for k, v in compat.iteritems(na_values):
if not is_list_like(v):

View File

@ -266,3 +266,16 @@ nan,B
out = self.read_csv(StringIO(data), names=names,
na_values={'a': 2, 'b': 1})
tm.assert_frame_equal(out, expected)
def test_na_values_dict_aliasing(self):
na_values = {'a': 2, 'b': 1}
na_values_copy = na_values.copy()
names = ['a', 'b']
data = '1,2\n2,1'
expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
out = self.read_csv(StringIO(data), names=names, na_values=na_values)
tm.assert_frame_equal(out, expected)
tm.assert_dict_equal(na_values, na_values_copy)