BUG: .fillna() for datetime64 with tz is passing thru floats

closes #14872

Author: Rodolfo Fernandez <opensourceworkAR@users.noreply.github.com>

Closes #14905 from RodolfoRFR/pandas-14872-e and squashes the following commits:

18802b4 [Rodolfo Fernandez] added 'self' to test_dtype_utc function in pandas/tests/series/test_missing
e0c6c7c [Rodolfo Fernandez] added line to whatsnew v0.19.2 and test to test_missing.py in series folder
e4ba7e0 [Rodolfo Fernandez] removed all references to _DATELIKE_DTYPES from /pandas/core/missing.py
5d37ce8 [Rodolfo Fernandez] added is_datetime64tz_dtype and changed evaluation from 'values' to dtype
19eecb2 [Rodolfo Fernandez] fixed style errors using flake8
59b91a1 [Rodolfo Fernandez] test modified
5a59eac [Rodolfo Fernandez] test modified
bc68bf7 [Rodolfo Fernandez] test modified
ba83fc8 [Rodolfo Fernandez] test
b7358de [Rodolfo Fernandez] bug fixed
This commit is contained in:
Rodolfo Fernandez 2016-12-18 14:36:21 -05:00 committed by Jeff Reback
parent e503d40ace
commit f3c5a427cc
4 changed files with 28 additions and 10 deletions

View File

@ -47,6 +47,7 @@ Bug Fixes
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
- Bug in ``.fillna()`` in which timezone aware datetime64 values were incorrectly rounded (:issue:`14872`)
- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`)

View File

@ -10,9 +10,8 @@ import pandas.lib as lib
from pandas.compat import range, string_types
from pandas.types.common import (is_numeric_v_string_like,
is_float_dtype, is_datetime64_dtype,
is_integer_dtype, _ensure_float64,
is_scalar,
_DATELIKE_DTYPES,
is_datetime64tz_dtype, is_integer_dtype,
_ensure_float64, is_scalar,
needs_i8_conversion)
from pandas.types.missing import isnull
@ -450,7 +449,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None):
_method = None
if is_float_dtype(values):
_method = getattr(algos, 'pad_inplace_%s' % dtype.name, None)
elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values):
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
_method = _pad_1d_datetime
elif is_integer_dtype(values):
values = _ensure_float64(values)
@ -475,7 +474,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None):
_method = None
if is_float_dtype(values):
_method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None)
elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values):
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
_method = _backfill_1d_datetime
elif is_integer_dtype(values):
values = _ensure_float64(values)
@ -501,7 +500,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None):
_method = None
if is_float_dtype(values):
_method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None)
elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values):
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
_method = _pad_2d_datetime
elif is_integer_dtype(values):
values = _ensure_float64(values)
@ -531,7 +530,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None):
_method = None
if is_float_dtype(values):
_method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values):
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
_method = _backfill_2d_datetime
elif is_integer_dtype(values):
values = _ensure_float64(values)

View File

@ -1,7 +1,8 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
from datetime import timedelta
import pytz
from datetime import timedelta, datetime
from numpy import nan
import numpy as np
@ -10,7 +11,6 @@ import pandas as pd
from pandas import (Series, isnull, date_range,
MultiIndex, Index)
from pandas.tseries.index import Timestamp
from pandas.compat import range
from pandas.util.testing import assert_series_equal
import pandas.util.testing as tm
@ -250,6 +250,24 @@ class TestSeriesMissingData(TestData, tm.TestCase):
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)
def test_datetime64tz_fillna_round_issue(self):
# GH 14872
data = pd.Series([pd.NaT, pd.NaT,
datetime(2016, 12, 12, 22, 24, 6, 100001,
tzinfo=pytz.utc)])
filled = data.fillna(method='bfill')
expected = pd.Series([datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc)])
assert_series_equal(filled, expected)
def test_fillna_int(self):
s = Series(np.random.randint(-100, 100, 50))
s.fillna(method='ffill', inplace=True)
@ -908,7 +926,6 @@ class TestSeriesInterpolateData(TestData, tm.TestCase):
index=pd.to_timedelta([1, 2, 4]))
assert_series_equal(result, expected)
if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

View File

@ -22,6 +22,7 @@ _POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_INT64_DTYPE = np.dtype(np.int64)
_DATELIKE_DTYPES = set([np.dtype(t)
for t in ['M8[ns]', '<M8[ns]', '>M8[ns]',
'm8[ns]', '<m8[ns]', '>m8[ns]']])