Merge master branch

This commit is contained in:
Christopher C. Aycock 2016-12-01 13:54:23 -05:00
commit f01142cf3c
131 changed files with 2633 additions and 20783 deletions

View File

@ -1,9 +1,12 @@
#### A small, complete example of the issue
#### Code Sample, a copy-pastable example if possible
```python
# Your code here
```
#### Problem description
[this should explain **why** the current behaviour is a problem and why the expected output is a better solution.]
#### Expected Output

1
.gitignore vendored
View File

@ -27,6 +27,7 @@
*.class
*.dll
*.exe
*.pxi
*.o
*.py[ocd]
*.so

View File

@ -14,13 +14,11 @@ cache:
env:
global:
# scatterci API key
#- secure: "Bx5umgo6WjuGY+5XFa004xjCiX/vq0CyMZ/ETzcs7EIBI1BE/0fIDXOoWhoxbY9HPfdPGlDnDgB9nGqr5wArO2s+BavyKBWg6osZ3dmkfuJPMOWeyCa92EeP+sfKw8e5HSU5MizW9e319wHWOF/xkzdHR7T67Qd5erhv91x4DnQ="
# ironcache API key
#- secure: "e4eEFn9nDQc3Xa5BWYkzfX37jaWVq89XidVX+rcCNEr5OlOImvveeXnF1IzbRXznH4Sv0YsLwUd8RGUWOmyCvkONq/VJeqCHWtTMyfaCIdqSyhIP9Odz8r9ahch+Y0XFepBey92AJHmlnTh+2GjCDgIiqq4fzglojnp56Vg1ojA="
#- secure: "CjmYmY5qEu3KrvMtel6zWFEtMq8ORBeS1S1odJHnjQpbwT1KY2YFZRVlLphfyDQXSz6svKUdeRrCNp65baBzs3DQNA8lIuXGIBYFeJxqVGtYAZZs6+TzBPfJJK798sGOj5RshrOJkFG2rdlWNuTq/XphI0JOrN3nPUkRrdQRpAw="
# pandas-docs-bot GH
- secure: "PCzUFR8CHmw9lH84p4ygnojdF7Z8U5h7YfY0RyT+5K/aiQ1ZTU3ZkDTPI0/rR5FVMxsEEKEQKMcc5fvqW0PeD7Q2wRmluloKgT9w4EVEJ1ppKf7lITPcvZR2QgVOvjv4AfDtibLHFNiaSjzoqyJVjM4igjOu8WTlF3JfZcmOQjQ="
# pandas-docs-travis GH
- secure: "U4GkUaX0K5FqHsHlxXiTr53t0zg8l9p3x7Xze3T0l4mEfhJdqVjayizRE0w0Uo3D54YY7X4lCRtI+bzFz20RxAEoEUyIoWtlUP7eNY3XuhViipY7gtYJpS+68VN5MnChzzz73cNj89fLBvCFyYhMTXHSrbm+yHSg6eRlqzzhHWc="
## original key - secure: "PCzUFR8CHmw9lH84p4ygnojdF7Z8U5h7YfY0RyT+5K/aiQ1ZTU3ZkDTPI0/rR5FVMxsEEKEQKMcc5fvqW0PeD7Q2wRmluloKgT9w4EVEJ1ppKf7lITPcvZR2QgVOvjv4AfDtibLHFNiaSjzoqyJVjM4igjOu8WTlF3JfZcmOQjQ="
git:
# for cloning
@ -34,6 +32,7 @@ matrix:
compiler: clang
osx_image: xcode6.4
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_osx"
- NOSE_ARGS="not slow and not network and not disabled"
- BUILD_TYPE=conda
@ -43,6 +42,7 @@ matrix:
- USE_CACHE=true
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_slow_nnet_LOCALE"
- NOSE_ARGS="slow and not network and not disabled"
- LOCALE_OVERRIDE="zh_CN.UTF-8"
@ -56,6 +56,7 @@ matrix:
- language-pack-zh-hans
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_nslow"
- NOSE_ARGS="not slow and not disabled"
- FULL_DEPS=true
@ -69,6 +70,7 @@ matrix:
- python-gtk2
- python: 3.4
env:
- PYTHON_VERSION=3.4
- JOB_NAME: "34_nslow"
- NOSE_ARGS="not slow and not disabled"
- FULL_DEPS=true
@ -81,6 +83,7 @@ matrix:
- xsel
- python: 3.5
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_nslow"
- NOSE_ARGS="not slow and not network and not disabled"
- FULL_DEPS=true
@ -95,6 +98,7 @@ matrix:
# In allow_failures
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_slow"
- JOB_TAG=_SLOW
- NOSE_ARGS="slow and not network and not disabled"
@ -104,6 +108,7 @@ matrix:
# In allow_failures
- python: 3.4
env:
- PYTHON_VERSION=3.4
- JOB_NAME: "34_slow"
- JOB_TAG=_SLOW
- NOSE_ARGS="slow and not network and not disabled"
@ -118,6 +123,7 @@ matrix:
# In allow_failures
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_build_test_conda"
- JOB_TAG=_BUILD_TEST
- NOSE_ARGS="not slow and not disabled"
@ -125,9 +131,23 @@ matrix:
- BUILD_TEST=true
- CACHE_NAME="27_build_test_conda"
- USE_CACHE=true
# In allow_failures
- python: 3.6-dev
env:
- PYTHON_VERSION=3.6
- JOB_NAME: "36_dev"
- JOB_TAG=_DEV
- NOSE_ARGS="not slow and not network and not disabled"
- PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
- libatlas-base-dev
- gfortran
# In allow_failures
- python: 3.5
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_numpy_dev"
- JOB_TAG=_NUMPY_DEV
- NOSE_ARGS="not slow and not network and not disabled"
@ -142,6 +162,7 @@ matrix:
# In allow_failures
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_nslow_nnet_COMPAT"
- NOSE_ARGS="not slow and not network and not disabled"
- LOCALE_OVERRIDE="it_IT.UTF-8"
@ -156,6 +177,7 @@ matrix:
# In allow_failures
- python: 3.5
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_ascii"
- JOB_TAG=_ASCII
- NOSE_ARGS="not slow and not network and not disabled"
@ -165,6 +187,7 @@ matrix:
# In allow_failures
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "doc_build"
- FULL_DEPS=true
- DOC_BUILD=true
@ -174,6 +197,7 @@ matrix:
allow_failures:
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_slow"
- JOB_TAG=_SLOW
- NOSE_ARGS="slow and not network and not disabled"
@ -182,6 +206,7 @@ matrix:
- USE_CACHE=true
- python: 3.4
env:
- PYTHON_VERSION=3.4
- JOB_NAME: "34_slow"
- JOB_TAG=_SLOW
- NOSE_ARGS="slow and not network and not disabled"
@ -195,6 +220,7 @@ matrix:
- xsel
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_build_test_conda"
- JOB_TAG=_BUILD_TEST
- NOSE_ARGS="not slow and not disabled"
@ -202,14 +228,27 @@ matrix:
- BUILD_TEST=true
- CACHE_NAME="27_build_test_conda"
- USE_CACHE=true
- python: 3.5
- python: 3.6-dev
env:
- JOB_NAME: "35_numpy_dev"
- JOB_TAG=_NUMPY_DEV
- PYTHON_VERSION=3.6
- JOB_NAME: "36_dev"
- JOB_TAG=_DEV
- NOSE_ARGS="not slow and not network and not disabled"
- PANDAS_TESTING_MODE="deprecate"
- CACHE_NAME="35_numpy_dev"
- USE_CACHE=true
addons:
apt:
packages:
- libatlas-base-dev
- gfortran
- python: 3.5
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_numpy_dev"
- JOB_TAG=_NUMPY_DEV
- NOSE_ARGS="not slow and not network and not disabled"
- PANDAS_TESTING_MODE="deprecate"
- CACHE_NAME="35_numpy_dev"
- USE_CACHE=true
addons:
apt:
packages:
@ -217,6 +256,7 @@ matrix:
- gfortran
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "27_nslow_nnet_COMPAT"
- NOSE_ARGS="not slow and not network and not disabled"
- LOCALE_OVERRIDE="it_IT.UTF-8"
@ -230,6 +270,7 @@ matrix:
- language-pack-it
- python: 3.5
env:
- PYTHON_VERSION=3.5
- JOB_NAME: "35_ascii"
- JOB_TAG=_ASCII
- NOSE_ARGS="not slow and not network and not disabled"
@ -238,6 +279,7 @@ matrix:
- USE_CACHE=true
- python: 2.7
env:
- PYTHON_VERSION=2.7
- JOB_NAME: "doc_build"
- FULL_DEPS=true
- DOC_BUILD=true
@ -249,7 +291,7 @@ before_install:
- echo "before_install"
- source ci/travis_process_gbq_encryption.sh
- echo $VIRTUAL_ENV
- export PATH="$HOME/miniconda/bin:$PATH"
- export PATH="$HOME/miniconda3/bin:$PATH"
- df -h
- date
- pwd

View File

@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
from pandas.util import testing as tm
class algorithm(object):
@ -55,3 +56,35 @@ class algorithm(object):
def time_add_overflow_mixed_arr(self):
self.checked_add(self.arr, self.arrmixed)
class hashing(object):
goal_time = 0.2
def setup(self):
N = 100000
self.df = pd.DataFrame(
{'A': pd.Series(tm.makeStringIndex(100).take(
np.random.randint(0, 100, size=N))),
'B': pd.Series(tm.makeStringIndex(10000).take(
np.random.randint(0, 10000, size=N))),
'D': np.random.randn(N),
'E': np.arange(N),
'F': pd.date_range('20110101', freq='s', periods=N),
'G': pd.timedelta_range('1 day', freq='s', periods=N),
})
self.df['C'] = self.df['B'].astype('category')
self.df.iloc[10:20] = np.nan
def time_frame(self):
self.df.hash()
def time_series_int(self):
self.df.E.hash()
def time_series_string(self):
self.df.B.hash()
def time_series_categorical(self):
self.df.C.hash()

View File

@ -32,6 +32,30 @@ class replace_large_dict(object):
self.s.replace(self.to_rep, inplace=True)
class replace_convert(object):
goal_time = 0.5
def setup(self):
self.n = (10 ** 3)
self.to_ts = dict(((i, pd.Timestamp(i)) for i in range(self.n)))
self.to_td = dict(((i, pd.Timedelta(i)) for i in range(self.n)))
self.s = Series(np.random.randint(self.n, size=(10 ** 3)))
self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)),
'B': np.random.randint(self.n, size=(10 ** 3))})
def time_replace_series_timestamp(self):
self.s.replace(self.to_ts)
def time_replace_series_timedelta(self):
self.s.replace(self.to_td)
def time_replace_frame_timestamp(self):
self.df.replace(self.to_ts)
def time_replace_frame_timedelta(self):
self.df.replace(self.to_td)
class replace_replacena(object):
goal_time = 0.2

View File

@ -1,18 +0,0 @@
#!/bin/bash
source activate pandas
echo "install numpy master wheel"
# remove the system installed numpy
pip uninstall numpy -y
# we need these for numpy
# these wheels don't play nice with the conda libgfortran / openblas
# time conda install -n pandas libgfortran openblas || exit 1
# install numpy wheel from master
pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy
true

16
ci/install-3.6_DEV.sh Normal file
View File

@ -0,0 +1,16 @@
#!/bin/bash
echo "install 3.6 dev"
conda config --set add_pip_as_python_dependency false
conda create -n pandas python=3.6 -c conda-forge/label/prerelease
source activate pandas
# ensure we have pip
python -m ensurepip
# install deps
pip3.6 install nose cython numpy pytz python-dateutil
true

View File

@ -31,10 +31,7 @@ edit_init
home_dir=$(pwd)
echo "home_dir: [$home_dir]"
python_major_version="${TRAVIS_PYTHON_VERSION:0:1}"
[ "$python_major_version" == "2" ] && python_major_version=""
MINICONDA_DIR="$HOME/miniconda"
MINICONDA_DIR="$HOME/miniconda3"
if [ -d "$MINICONDA_DIR" ] && [ -e "$MINICONDA_DIR/bin/conda" ] && [ "$USE_CACHE" ]; then
echo "Miniconda install already present from cache: $MINICONDA_DIR"
@ -63,9 +60,9 @@ else
rm -rf "$MINICONDA_DIR"
# install miniconda
if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
wget http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
else
wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
fi
bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
@ -84,21 +81,25 @@ else
# Useful for debugging any issues with conda
conda info -a || exit 1
time conda create -n pandas python=$TRAVIS_PYTHON_VERSION nose coverage flake8 || exit 1
fi
# build deps
REQ="ci/requirements-${TRAVIS_PYTHON_VERSION}${JOB_TAG}.build"
# may have additional installation instructions for this build
INSTALL="ci/install-${TRAVIS_PYTHON_VERSION}${JOB_TAG}.sh"
# may have installation instructions for this build
INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh"
if [ -e ${INSTALL} ]; then
time bash $INSTALL || exit 1
else
# create new env
time conda create -n pandas python=$PYTHON_VERSION nose coverage flake8 || exit 1
fi
# build deps
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build"
# install deps
time conda install -n pandas --file=${REQ} || exit 1
if [ -e ${REQ} ]; then
time conda install -n pandas --file=${REQ} || exit 1
fi
source activate pandas
@ -106,7 +107,7 @@ if [ "$BUILD_TEST" ]; then
# build testing
pip uninstall --yes cython
pip install cython==0.15.1
pip install cython==0.19.1
( python setup.py build_ext --inplace && python setup.py develop ) || true
else
@ -117,14 +118,22 @@ else
# we may have run installations
echo "conda installs"
REQ="ci/requirements-${TRAVIS_PYTHON_VERSION}${JOB_TAG}.run"
time conda install -n pandas --file=${REQ} || exit 1
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run"
if [ -e ${REQ} ]; then
time conda install -n pandas --file=${REQ} || exit 1
fi
# we may have additional pip installs
echo "pip installs"
REQ="ci/requirements-${TRAVIS_PYTHON_VERSION}${JOB_TAG}.pip"
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip"
if [ -e ${REQ} ]; then
pip install --upgrade -r $REQ
pip install --upgrade -r $REQ
fi
# may have addtl installation instructions for this build
REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh"
if [ -e ${REQ} ]; then
time bash $REQ || exit 1
fi
# remove any installed pandas package
@ -138,9 +147,5 @@ else
fi
if [ "$JOB_NAME" == "34_slow" ]; then
conda install -c conda-forge/label/rc -c conda-forge matplotlib
fi
echo "done"
exit 0

View File

@ -10,18 +10,24 @@ if [ "$LINT" ]; then
# pandas/rpy is deprecated and will be removed.
# pandas/src is C code, so no need to search there.
echo "Linting *.py"
flake8 pandas --filename '*.py' --exclude pandas/rpy,pandas/src
flake8 pandas --filename=*.py --exclude pandas/rpy,pandas/src
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.py DONE"
echo "Linting *.pyx"
flake8 pandas --filename '*.pyx' --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.pyx DONE"
echo "Linting *.pxi.in"
for path in 'src'
do
echo "linting -> pandas/$path"
flake8 pandas/$path --filename '*.pxi.in' --select=E501,E302,E203,E111,E114,E221,E303,E231,E126
flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126
if [ $? -ne "0" ]; then
RET=1
fi

View File

@ -16,4 +16,3 @@ bottleneck
html5lib
beautiful-soup
jinja2=2.8
pyqt=4.11.4

View File

@ -21,4 +21,3 @@ beautiful-soup=4.2.1
statsmodels
jinja2=2.8
xarray
pyqt=4.11.4

View File

@ -1,3 +0,0 @@
python-dateutil
pytz
cython

View File

@ -1,2 +0,0 @@
python-dateutil
pytz

View File

@ -0,0 +1,7 @@
#!/bin/bash
source activate pandas
echo "install 34_slow"
conda install -n pandas -c conda-forge/label/rc -c conda-forge matplotlib

View File

@ -10,4 +10,3 @@ numexpr
pytables
matplotlib
blosc
pyqt=4.11.4

View File

@ -18,7 +18,6 @@ pymysql
psycopg2
xarray
boto
pyqt=4.11.4
# incompat with conda ATM
# beautiful-soup

View File

@ -282,7 +282,7 @@ Using a single column's values to select data.
df[df.A > 0]
A ``where`` operation for getting.
Selecting values from a DataFrame where a boolean condition is met.
.. ipython:: python

View File

@ -27,6 +27,7 @@ Flat File
read_table
read_csv
read_fwf
read_msgpack
Clipboard
~~~~~~~~~

View File

@ -111,5 +111,4 @@ Visualizing Data in Qt applications
-----------------------------------
There is no support for such visualization in pandas. However, the external
package `pandas-qt <https://github.com/datalyze-solutions/pandas-qt>`_ does
provide this functionality.
package `pandas-qt <https://github.com/datalyze-solutions/pandas-qt>`_ provides this functionality for Python 2.x.

View File

@ -514,40 +514,6 @@ parse HTML tables in the top-level pandas io function ``read_html``.
text from the URL over the web, i.e., IO (input-output). For very large
tables, this might not be true.
**Issues with using** |Anaconda|_
* `Anaconda`_ ships with `lxml`_ version 3.2.0; the following workaround for
`Anaconda`_ was successfully used to deal with the versioning issues
surrounding `lxml`_ and `BeautifulSoup4`_.
.. note::
Unless you have *both*:
* A strong restriction on the upper bound of the runtime of some code
that incorporates :func:`~pandas.io.html.read_html`
* Complete knowledge that the HTML you will be parsing will be 100%
valid at all times
then you should install `html5lib`_ and things will work swimmingly
without you having to muck around with `conda`. If you want the best of
both worlds then install both `html5lib`_ and `lxml`_. If you do install
`lxml`_ then you need to perform the following commands to ensure that
lxml will work correctly:
.. code-block:: sh
# remove the included version
conda remove lxml
# install the latest version of lxml
pip install 'git+git://github.com/lxml/lxml.git'
# install the latest version of beautifulsoup4
pip install 'bzr+lp:beautifulsoup'
Note that you need `bzr <http://bazaar.canonical.com/en>`__ and `git
<http://git-scm.com>`__ installed to perform the last two operations.
.. |svm| replace:: **strictly valid markup**
.. _svm: http://validator.w3.org/docs/help.html#validation_basics
@ -561,9 +527,6 @@ parse HTML tables in the top-level pandas io function ``read_html``.
.. |lxml| replace:: **lxml**
.. _lxml: http://lxml.de
.. |Anaconda| replace:: **Anaconda**
.. _Anaconda: https://store.continuum.io/cshop/anaconda
Byte-Ordering Issues
--------------------

View File

@ -18,7 +18,7 @@ Instructions for installing from source,
Python version support
----------------------
Officially Python 2.7, 3.4, and 3.5
Officially Python 2.7, 3.4, 3.5, and 3.6
Installing pandas
-----------------

View File

@ -157,6 +157,9 @@ dtype : Type name or dict of column -> type, default ``None``
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
(unsupported with ``engine='python'``). Use `str` or `object` to preserve and
not interpret dtype.
.. versionadded:: 0.20.0 support for the Python parser.
engine : {``'c'``, ``'python'``}
Parser engine to use. The C engine is faster while the python engine is
currently more feature-complete.
@ -473,10 +476,9 @@ However, if you wanted for all the data to be coerced, no matter the type, then
using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be
worth trying.
.. note::
The ``dtype`` option is currently only supported by the C engine.
Specifying ``dtype`` with ``engine`` other than 'c' raises a
``ValueError``.
.. versionadded:: 0.20.0 support for the Python parser.
The ``dtype`` option is supported by the 'python' engine
.. note::
In some cases, reading in abnormal data with columns containing mixed dtypes
@ -1165,8 +1167,8 @@ too many will cause an error by default:
In [28]: pd.read_csv(StringIO(data))
---------------------------------------------------------------------------
CParserError Traceback (most recent call last)
CParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
ParserError Traceback (most recent call last)
ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
You can elect to skip bad lines:
@ -1266,11 +1268,22 @@ is whitespace).
df = pd.read_fwf('bar.csv', header=None, index_col=0)
df
.. versionadded:: 0.20.0
``read_fwf`` supports the ``dtype`` parameter for specifying the types of
parsed columns to be different from the inferred type.
.. ipython:: python
pd.read_fwf('bar.csv', header=None, index_col=0).dtypes
pd.read_fwf('bar.csv', header=None, dtype={2: 'object'}).dtypes
.. ipython:: python
:suppress:
os.remove('bar.csv')
Indexes
'''''''

View File

@ -323,6 +323,10 @@ Pivot tables
.. _reshaping.pivot:
While ``pivot`` provides general purpose pivoting of DataFrames with various
data types (strings, numerics, etc.), Pandas also provides the ``pivot_table``
function for pivoting with aggregation of numeric data.
The function ``pandas.pivot_table`` can be used to create spreadsheet-style pivot
tables. See the :ref:`cookbook<cookbook.pivot>` for some advanced strategies

View File

@ -1286,12 +1286,11 @@ secondly data into 5-minutely data). This is extremely common in, but not
limited to, financial applications.
``.resample()`` is a time-based groupby, followed by a reduction method on each of its groups.
See some :ref:`cookbook examples <cookbook.resample>` for some advanced strategies
.. note::
``.resample()`` is similar to using a ``.rolling()`` operation with a time-based offset, see a discussion `here <stats.moments.ts-versus-resampling>`
See some :ref:`cookbook examples <cookbook.resample>` for some advanced strategies
``.resample()`` is similar to using a ``.rolling()`` operation with a time-based offset, see a discussion :ref:`here <stats.moments.ts-versus-resampling>`
.. ipython:: python

View File

@ -7,6 +7,10 @@ This is a minor bug-fix release from 0.19.1 and includes some small regression f
bug fixes and performance improvements.
We recommend that all users upgrade to this version.
Highlights include:
- Compatibility with Python 3.6
.. contents:: What's new in v0.19.2
:local:
:backlinks: none
@ -17,11 +21,53 @@ We recommend that all users upgrade to this version.
Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~
- Improved performance of ``.replace()`` (:issue:`12745`)
.. _whatsnew_0192.bug_fixes:
Bug Fixes
~~~~~~~~~
- compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
- allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
- Bug in ``pd.cut`` with negative values and a single bin (:issue:`14652`)
- Bug in ``pd.to_numeric`` where a 0 was not unsigned on a ``downcast='unsigned'`` argument (:issue:`14401`)
- Bug in plotting regular and irregular timeseries using shared axes
(``sharex=True`` or ``ax.twinx()``) (:issue:`13341`, :issue:`14322`).
- Bug in not propogating exceptions in parsing invalid datetimes, noted in python 3.6 (:issue:`14561`)
- Compat with python 3.6 for pickling of some offsets (:issue:`14685`)
- Compat with python 3.6 for some indexing exception types (:issue:`14684`, :issue:`14689`)
- Compat with python 3.6 for deprecation warnings in the test suite (:issue:`14681`)
- Compat with python 3.6 for Timestamp pickles (:issue:`14689`)
- Bug in resampling a ``DatetimeIndex`` in local TZ, covering a DST change, which would raise ``AmbiguousTimeError`` (:issue:`14682`)
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)

View File

@ -12,7 +12,7 @@ Highlights include:
Check the :ref:`API Changes <whatsnew_0200.api_breaking>` and :ref:`deprecations <whatsnew_0200.deprecations>` before updating.
.. contents:: What's new in v0.19.0
.. contents:: What's new in v0.20.0
:local:
:backlinks: none
@ -22,8 +22,26 @@ New features
~~~~~~~~~~~~
``read_csv`` supports ``dtype`` keyword for python engine
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns
is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs <io.dtypes>` for more information.
.. ipython:: python
data = "a,b\n1,2\n3,4"
pd.read_csv(StringIO(data), engine='python').dtypes
pd.read_csv(StringIO(data), engine='python', dtype={'a':'float64', 'b':'object'}).dtypes
The ``dtype`` keyword argument is also now supported in the :func:`read_fwf` function for parsing
fixed-width text files.
.. ipython:: python
data = "a b\n1 2\n3 4"
pd.read_fwf(StringIO(data)).dtypes
pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes
.. _whatsnew_0200.enhancements.other:
@ -41,6 +59,7 @@ Backwards incompatible API changes
.. _whatsnew_0200.api:
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
@ -53,6 +72,10 @@ Other API Changes
Deprecations
^^^^^^^^^^^^
- ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`)
- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`)
- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`)
- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`)
@ -63,6 +86,8 @@ Removal of prior version deprecations/changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`)
- ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`)
- ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`)

View File

@ -41,6 +41,7 @@ from collections import namedtuple
PY2 = sys.version_info[0] == 2
PY3 = (sys.version_info[0] >= 3)
PY35 = (sys.version_info >= (3, 5))
PY36 = (sys.version_info >= (3, 6))
try:
import __builtin__ as builtins

View File

@ -1091,12 +1091,12 @@ class IndexOpsMixin(object):
"""Find indices where elements should be inserted to maintain order.
Find the indices into a sorted %(klass)s `self` such that, if the
corresponding elements in `v` were inserted before the indices, the
order of `self` would be preserved.
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.
Parameters
----------
%(value)s : array_like
value : array_like
Values to insert into `self`.
side : {'left', 'right'}, optional
If 'left', the index of the first suitable location found is given.
@ -1109,7 +1109,7 @@ class IndexOpsMixin(object):
Returns
-------
indices : array of ints
Array of insertion points with the same shape as `v`.
Array of insertion points with the same shape as `value`.
See Also
--------
@ -1149,11 +1149,12 @@ class IndexOpsMixin(object):
array([3, 4]) # eggs before milk
""")
@Substitution(klass='IndexOpsMixin', value='key')
@Substitution(klass='IndexOpsMixin')
@Appender(_shared_docs['searchsorted'])
def searchsorted(self, key, side='left', sorter=None):
@deprecate_kwarg(old_arg_name='key', new_arg_name='value')
def searchsorted(self, value, side='left', sorter=None):
# needs coercion on the key (DatetimeIndex does already)
return self.values.searchsorted(key, side=side, sorter=sorter)
return self.values.searchsorted(value, side=side, sorter=sorter)
_shared_docs['drop_duplicates'] = (
"""Return %(klass)s with duplicate values removed

View File

@ -1076,9 +1076,10 @@ class Categorical(PandasObject):
"""
return self._codes.nbytes + self._categories.memory_usage(deep=deep)
@Substitution(klass='Categorical', value='v')
@Substitution(klass='Categorical')
@Appender(_shared_docs['searchsorted'])
def searchsorted(self, v, side='left', sorter=None):
@deprecate_kwarg(old_arg_name='v', new_arg_name='value')
def searchsorted(self, value, side='left', sorter=None):
if not self.ordered:
raise ValueError("Categorical not ordered\nyou can use "
".as_ordered() to change the Categorical to an "
@ -1086,7 +1087,7 @@ class Categorical(PandasObject):
from pandas.core.series import Series
values_as_codes = self.categories.values.searchsorted(
Series(v).values, side=side)
Series(value).values, side=side)
return self.codes.searchsorted(values_as_codes, sorter=sorter)

View File

@ -1346,7 +1346,7 @@ class DataFrame(NDFrame):
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are comverted to strings and thus csv.QUOTE_NONNUMERIC
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
@ -3233,7 +3233,7 @@ class DataFrame(NDFrame):
# try to be helpful
if isinstance(self.columns, MultiIndex):
raise ValueError('Cannot sort by column %s in a '
'multi-index you need to explicity '
'multi-index you need to explicitly '
'provide all the levels' % str(by))
raise ValueError('Cannot sort by duplicate column %s' %

View File

@ -1066,7 +1066,7 @@ class NDFrame(PandasObject):
Handler to call if object cannot otherwise be converted to a
suitable format for JSON. Should receive a single argument which is
the object to convert and return a serialisable object.
lines : boolean, defalut False
lines : boolean, default False
If 'orient' is 'records' write out line delimited json format. Will
throw ValueError if incorrect 'orient' since others are not list
like.
@ -3477,20 +3477,27 @@ class NDFrame(PandasObject):
res = self if inplace else self.copy()
for c, src in compat.iteritems(to_replace):
if c in value and c in self:
# object conversion is handled in
# series.replace which is called recursivelly
res[c] = res[c].replace(to_replace=src,
value=value[c],
inplace=False, regex=regex)
inplace=False,
regex=regex)
return None if inplace else res
# {'A': NA} -> 0
elif not is_list_like(value):
for k, src in compat.iteritems(to_replace):
if k in self:
new_data = new_data.replace(to_replace=src,
value=value,
filter=[k],
inplace=inplace,
regex=regex)
keys = [(k, src) for k, src in compat.iteritems(to_replace)
if k in self]
keys_len = len(keys) - 1
for i, (k, src) in enumerate(keys):
convert = i == keys_len
new_data = new_data.replace(to_replace=src,
value=value,
filter=[k],
inplace=inplace,
regex=regex,
convert=convert)
else:
raise TypeError('value argument must be scalar, dict, or '
'Series')

View File

@ -175,8 +175,8 @@ class Grouper(object):
freq : string / frequency object, defaults to None
This will groupby the specified frequency if the target selection
(via key or level) is a datetime-like object. For full specification
of available frequencies, please see
`here <http://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
of available frequencies, please see `here
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`_.
axis : number/name of the axis, defaults to 0
sort : boolean, default to False
whether to sort the resulting labels

View File

@ -11,6 +11,7 @@ from pandas.types.common import (is_integer_dtype,
is_sequence,
is_scalar,
is_sparse,
_is_unorderable_exception,
_ensure_platform_int)
from pandas.types.missing import isnull, _infer_fill_value
@ -1411,7 +1412,7 @@ class _LocIndexer(_LocationIndexer):
except TypeError as e:
# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
if _is_unorderable_exception(e):
error()
raise
except:

View File

@ -622,7 +622,6 @@ class Block(PandasObject):
original_to_replace = to_replace
mask = isnull(self.values)
# try to replace, if we raise an error, convert to ObjectBlock and
# retry
try:
@ -1795,13 +1794,14 @@ class BoolBlock(NumericBlock):
return issubclass(value.dtype.type, np.bool_)
def replace(self, to_replace, value, inplace=False, filter=None,
regex=False, mgr=None):
regex=False, convert=True, mgr=None):
to_replace_values = np.atleast_1d(to_replace)
if not np.can_cast(to_replace_values, bool):
return self
return super(BoolBlock, self).replace(to_replace, value,
inplace=inplace, filter=filter,
regex=regex, mgr=mgr)
regex=regex, convert=convert,
mgr=mgr)
class ObjectBlock(Block):
@ -3214,6 +3214,7 @@ class BlockManager(PandasObject):
masks = [comp(s) for i, s in enumerate(src_list)]
result_blocks = []
src_len = len(src_list) - 1
for blk in self.blocks:
# its possible to get multiple result blocks here
@ -3223,8 +3224,9 @@ class BlockManager(PandasObject):
new_rb = []
for b in rb:
if b.dtype == np.object_:
convert = i == src_len
result = b.replace(s, d, inplace=inplace, regex=regex,
mgr=mgr)
mgr=mgr, convert=convert)
new_rb = _extend_blocks(result, new_rb)
else:
# get our mask for this element, sized to this
@ -4788,7 +4790,12 @@ def _putmask_smart(v, m, n):
# change the dtype
dtype, _ = _maybe_promote(n.dtype)
nv = v.astype(dtype)
if is_extension_type(v.dtype) and is_object_dtype(dtype):
nv = v.get_values(dtype)
else:
nv = v.astype(dtype)
try:
nv[m] = n[m]
except ValueError:

View File

@ -357,6 +357,11 @@ def pivot_simple(index, columns, values):
Returns
-------
DataFrame
See also
--------
DataFrame.pivot_table : generalization of pivot that can handle
duplicate values for one index/column pair
"""
if (len(index) != len(columns)) or (len(columns) != len(values)):
raise AssertionError('Length of index, columns, and values must be the'

View File

@ -25,6 +25,7 @@ from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype,
is_iterator,
is_dict_like,
is_scalar,
_is_unorderable_exception,
_ensure_platform_int)
from pandas.types.generic import ABCSparseArray, ABCDataFrame
from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar,
@ -102,11 +103,11 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
"""
One-dimensional ndarray with axis labels (including time series).
Labels need not be unique but must be any hashable type. The object
Labels need not be unique but must be a hashable type. The object
supports both integer- and label-based indexing and provides a host of
methods for performing operations involving the index. Statistical
methods from ndarray have been overridden to automatically exclude
missing data (currently represented as NaN)
missing data (currently represented as NaN).
Operations between Series (+, -, /, *, **) align values based on their
associated index values-- they need not be the same length. The result
@ -117,8 +118,8 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
data : array-like, dict, or scalar value
Contains data stored in Series
index : array-like or Index (1d)
Values must be unique and hashable, same length as data. Index
object (or other iterable of same length as data) Will default to
Values must be hashable and have the same length as `data`.
Non-unique index values are allowed. Will default to
RangeIndex(len(data)) if not provided. If both a dict and index
sequence are used, the index will override the keys found in the
dict.
@ -753,7 +754,7 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
raise ValueError("Can only tuple-index with a MultiIndex")
# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
if _is_unorderable_exception(e):
raise IndexError(key)
if com.is_bool_indexer(key):
@ -831,18 +832,19 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
self._data = self._data.setitem(indexer=key, value=value)
self._maybe_update_cacher()
def repeat(self, reps, *args, **kwargs):
@deprecate_kwarg(old_arg_name='reps', new_arg_name='repeats')
def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an Series. Refer to `numpy.ndarray.repeat`
for more information about the `reps` argument.
for more information about the `repeats` argument.
See also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(args, kwargs)
new_index = self.index.repeat(reps)
new_values = self._values.repeat(reps)
new_index = self.index.repeat(repeats)
new_values = self._values.repeat(repeats)
return self._constructor(new_values,
index=new_index).__finalize__(self)
@ -1216,16 +1218,10 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
dtype='int64').__finalize__(self)
def mode(self):
"""Returns the mode(s) of the dataset.
"""Return the mode(s) of the dataset.
Empty if nothing occurs at least 2 times. Always returns Series even
if only one value.
Parameters
----------
sort : bool, default True
If True, will lexicographically sort values, if False skips
sorting. Result ordering when ``sort=False`` is not defined.
Empty if nothing occurs at least 2 times. Always returns Series even
if only one value is returned.
Returns
-------
@ -1514,12 +1510,13 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
else: # pragma: no cover
raise TypeError('unsupported type: %s' % type(other))
@Substitution(klass='Series', value='v')
@Substitution(klass='Series')
@Appender(base._shared_docs['searchsorted'])
def searchsorted(self, v, side='left', sorter=None):
@deprecate_kwarg(old_arg_name='v', new_arg_name='value')
def searchsorted(self, value, side='left', sorter=None):
if sorter is not None:
sorter = _ensure_platform_int(sorter)
return self._values.searchsorted(Series(v)._values,
return self._values.searchsorted(Series(value)._values,
side=side, sorter=sorter)
# -------------------------------------------------------------------

View File

@ -535,17 +535,18 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
"""
return list(self.values)
def repeat(self, n, *args, **kwargs):
@deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
def repeat(self, repeats, *args, **kwargs):
"""