BUG: Improve error message for skipfooter malformed rows in Python engine (#14749)
Python's native CSV library does not respect the skipfooter parameter, so if one of those skipped rows is malformed, it will still raise an error. Closes gh-13879.
This commit is contained in:
parent
2f43ac4c4c
commit
dfeae396c8
|
@ -32,6 +32,7 @@ Bug Fixes
|
|||
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
|
||||
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
|
||||
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
|
||||
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2411,14 +2411,23 @@ class PythonParser(ParserBase):
|
|||
try:
|
||||
orig_line = next(self.data)
|
||||
except csv.Error as e:
|
||||
msg = str(e)
|
||||
|
||||
if 'NULL byte' in str(e):
|
||||
raise csv.Error(
|
||||
'NULL byte detected. This byte '
|
||||
'cannot be processed in Python\'s '
|
||||
'native csv library at the moment, '
|
||||
'so please pass in engine=\'c\' instead.')
|
||||
else:
|
||||
raise
|
||||
msg = ('NULL byte detected. This byte '
|
||||
'cannot be processed in Python\'s '
|
||||
'native csv library at the moment, '
|
||||
'so please pass in engine=\'c\' instead')
|
||||
|
||||
if self.skipfooter > 0:
|
||||
reason = ('Error could possibly be due to '
|
||||
'parsing errors in the skipped footer rows '
|
||||
'(the skipfooter keyword is only applied '
|
||||
'after Python\'s csv library has parsed '
|
||||
'all rows).')
|
||||
msg += '. ' + reason
|
||||
|
||||
raise csv.Error(msg)
|
||||
line = self._check_comments([orig_line])[0]
|
||||
self.pos += 1
|
||||
if (not self.skip_blank_lines and
|
||||
|
|
|
@ -221,3 +221,18 @@ x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
|
|||
with tm.assertRaisesRegexp(ValueError, msg):
|
||||
self.read_csv(StringIO(data), sep=',,',
|
||||
quoting=csv.QUOTE_NONE)
|
||||
|
||||
def test_skipfooter_bad_row(self):
|
||||
# see gh-13879
|
||||
|
||||
data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz'
|
||||
msg = 'parsing errors in the skipped footer rows'
|
||||
|
||||
with tm.assertRaisesRegexp(csv.Error, msg):
|
||||
self.read_csv(StringIO(data), skipfooter=1)
|
||||
|
||||
# We expect no match, so there should be an assertion
|
||||
# error out of the inner context manager.
|
||||
with tm.assertRaises(AssertionError):
|
||||
with tm.assertRaisesRegexp(csv.Error, msg):
|
||||
self.read_csv(StringIO(data))
|
||||
|
|
Loading…
Reference in New Issue