closes #14734
closes #13654
(cherry picked from commit c5f219acfc
)
This commit is contained in:
parent
68c7529d79
commit
6c688b947c
|
@ -31,6 +31,7 @@ Bug Fixes
|
|||
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
|
||||
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
|
||||
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
|
||||
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -79,4 +79,3 @@ Performance Improvements
|
|||
|
||||
Bug Fixes
|
||||
~~~~~~~~~
|
||||
|
||||
|
|
|
@ -225,6 +225,12 @@ class SAS7BDATReader(BaseIterator):
|
|||
self.os_name = self.os_name.decode(
|
||||
self.encoding or self.default_encoding)
|
||||
|
||||
def __next__(self):
|
||||
da = self.read(nrows=self.chunksize or 1)
|
||||
if da is None:
|
||||
raise StopIteration
|
||||
return da
|
||||
|
||||
# Read a single float of the given width (4 or 8).
|
||||
def _read_float(self, offset, width):
|
||||
if width not in (4, 8):
|
||||
|
@ -591,6 +597,10 @@ class SAS7BDATReader(BaseIterator):
|
|||
if self._current_row_in_file_index >= self.row_count:
|
||||
return None
|
||||
|
||||
m = self.row_count - self._current_row_in_file_index
|
||||
if nrows > m:
|
||||
nrows = m
|
||||
|
||||
nd = (self.column_types == b'd').sum()
|
||||
ns = (self.column_types == b's').sum()
|
||||
|
||||
|
|
|
@ -47,7 +47,9 @@ class TestSAS7BDAT(tm.TestCase):
|
|||
with open(fname, 'rb') as f:
|
||||
byts = f.read()
|
||||
buf = io.BytesIO(byts)
|
||||
df = pd.read_sas(buf, format="sas7bdat", encoding='utf-8')
|
||||
rdr = pd.read_sas(buf, format="sas7bdat",
|
||||
iterator=True, encoding='utf-8')
|
||||
df = rdr.read()
|
||||
tm.assert_frame_equal(df, df0, check_exact=False)
|
||||
|
||||
def test_from_iterator(self):
|
||||
|
@ -55,16 +57,35 @@ class TestSAS7BDAT(tm.TestCase):
|
|||
df0 = self.data[j]
|
||||
for k in self.test_ix[j]:
|
||||
fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
|
||||
with open(fname, 'rb') as f:
|
||||
byts = f.read()
|
||||
buf = io.BytesIO(byts)
|
||||
rdr = pd.read_sas(buf, format="sas7bdat",
|
||||
iterator=True, encoding='utf-8')
|
||||
rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
|
||||
df = rdr.read(2)
|
||||
tm.assert_frame_equal(df, df0.iloc[0:2, :])
|
||||
df = rdr.read(3)
|
||||
tm.assert_frame_equal(df, df0.iloc[2:5, :])
|
||||
|
||||
def test_iterator_loop(self):
|
||||
# github #13654
|
||||
for j in 0, 1:
|
||||
for k in self.test_ix[j]:
|
||||
for chunksize in 3, 5, 10, 11:
|
||||
fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
|
||||
rdr = pd.read_sas(fname, chunksize=10, encoding='utf-8')
|
||||
y = 0
|
||||
for x in rdr:
|
||||
y += x.shape[0]
|
||||
self.assertTrue(y == rdr.row_count)
|
||||
|
||||
def test_iterator_read_too_much(self):
|
||||
# github #14734
|
||||
k = self.test_ix[0][0]
|
||||
fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
|
||||
rdr = pd.read_sas(fname, format="sas7bdat",
|
||||
iterator=True, encoding='utf-8')
|
||||
d1 = rdr.read(rdr.row_count + 20)
|
||||
rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
|
||||
d2 = rdr.read(rdr.row_count + 20)
|
||||
tm.assert_frame_equal(d1, d2)
|
||||
|
||||
|
||||
def test_encoding_options():
|
||||
dirpath = tm.get_data_path()
|
||||
|
|
|
@ -35,6 +35,13 @@ class TestXport(tm.TestCase):
|
|||
# Read full file
|
||||
data = read_sas(self.file01, format="xport")
|
||||
tm.assert_frame_equal(data, data_csv)
|
||||
num_rows = data.shape[0]
|
||||
|
||||
# Test reading beyond end of file
|
||||
reader = read_sas(self.file01, format="xport", iterator=True)
|
||||
data = reader.read(num_rows + 100)
|
||||
self.assertTrue(data.shape[0] == num_rows)
|
||||
reader.close()
|
||||
|
||||
# Test incremental read with `read` method.
|
||||
reader = read_sas(self.file01, format="xport", iterator=True)
|
||||
|
@ -48,6 +55,14 @@ class TestXport(tm.TestCase):
|
|||
reader.close()
|
||||
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
|
||||
|
||||
# Test read in loop
|
||||
m = 0
|
||||
reader = read_sas(self.file01, format="xport", chunksize=100)
|
||||
for x in reader:
|
||||
m += x.shape[0]
|
||||
reader.close()
|
||||
self.assertTrue(m == num_rows)
|
||||
|
||||
# Read full file with `read_sas` method
|
||||
data = read_sas(self.file01)
|
||||
tm.assert_frame_equal(data, data_csv)
|
||||
|
|
Loading…
Reference in New Issue