BUG: Improve error message for multi-char sep and quotes in Python engine (#14582)
If there is a field counts mismatch, check whether
a multi-char sep was used in conjunction with quotes.
Currently, that setup is not respected and can result
in improper line breaks.
Closes gh-13374.
(cherry picked from commit d8e427bda0
)
This commit is contained in:
parent
f30742feaa
commit
4a4bbace64
|
@ -30,6 +30,7 @@ Bug Fixes
|
|||
- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
|
||||
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
|
||||
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
|
||||
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2515,6 +2515,11 @@ class PythonParser(ParserBase):
|
|||
|
||||
msg = ('Expected %d fields in line %d, saw %d' %
|
||||
(col_len, row_num + 1, zip_len))
|
||||
if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE:
|
||||
# see gh-13374
|
||||
reason = ('Error could possibly be due to quotes being '
|
||||
'ignored when a multi-char delimiter is used.')
|
||||
msg += '. ' + reason
|
||||
raise ValueError(msg)
|
||||
|
||||
if self.usecols:
|
||||
|
|
|
@ -7,6 +7,7 @@ these tests out of this module as soon as the C parser can accept further
|
|||
arguments when parsing.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import sys
|
||||
import nose
|
||||
|
||||
|
@ -204,3 +205,19 @@ x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
|
|||
sep=sep, names=['a', 'b'],
|
||||
encoding=encoding)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multi_char_sep_quotes(self):
|
||||
# see gh-13374
|
||||
|
||||
data = 'a,,b\n1,,a\n2,,"2,,b"'
|
||||
msg = 'ignored when a multi-char delimiter is used'
|
||||
|
||||
with tm.assertRaisesRegexp(ValueError, msg):
|
||||
self.read_csv(StringIO(data), sep=',,')
|
||||
|
||||
# We expect no match, so there should be an assertion
|
||||
# error out of the inner context manager.
|
||||
with tm.assertRaises(AssertionError):
|
||||
with tm.assertRaisesRegexp(ValueError, msg):
|
||||
self.read_csv(StringIO(data), sep=',,',
|
||||
quoting=csv.QUOTE_NONE)
|
||||
|
|
Loading…
Reference in New Issue