Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/pandas-dev/pandas into op…
Browse files Browse the repository at this point in the history
…s-kwargs9
  • Loading branch information
jbrockmendel committed Feb 14, 2018
2 parents ba4695d + 49812cf commit a0c14cd
Show file tree
Hide file tree
Showing 179 changed files with 6,611 additions and 4,355 deletions.
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ matrix:
env:
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
# In allow_failures
- dist: trusty
env:
- JOB="3.6_ASV" ASV=true
# In allow_failures
- dist: trusty
env:
- JOB="3.6_DOC" DOC=true
Expand All @@ -93,6 +97,9 @@ matrix:
- dist: trusty
env:
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
- dist: trusty
env:
- JOB="3.6_ASV" ASV=true
- dist: trusty
env:
- JOB="3.6_DOC" DOC=true
Expand Down Expand Up @@ -128,6 +135,7 @@ script:
- ci/script_single.sh
- ci/script_multi.sh
- ci/lint.sh
- ci/asv.sh
- echo "checking imports"
- source activate pandas && python ci/check_imports.py
- echo "script done"
Expand Down
4 changes: 3 additions & 1 deletion asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from importlib import import_module

import numpy as np
Expand Down Expand Up @@ -83,7 +84,8 @@ def setup(self):
self.all = self.uniques.repeat(10)

def time_match_string(self):
pd.match(self.all, self.uniques)
with warnings.catch_warnings(record=True):
pd.match(self.all, self.uniques)


class Hashing(object):
Expand Down
10 changes: 8 additions & 2 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import pandas as pd
import pandas.util.testing as tm
Expand Down Expand Up @@ -119,11 +121,15 @@ def setup(self):

self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
self.s_str_cat = self.s_str.astype('category')
self.s_str_cat_ordered = self.s_str.astype('category', ordered=True)
with warnings.catch_warnings(record=True):
self.s_str_cat_ordered = self.s_str.astype('category',
ordered=True)

self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
self.s_int_cat = self.s_int.astype('category')
self.s_int_cat_ordered = self.s_int.astype('category', ordered=True)
with warnings.catch_warnings(record=True):
self.s_int_cat_ordered = self.s_int.astype('category',
ordered=True)

def time_rank_string(self):
self.s_str.rank()
Expand Down
9 changes: 6 additions & 3 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import string
import warnings

import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
Expand All @@ -15,7 +17,8 @@ def setup(self):
self.df = DataFrame(np.random.randn(10000, 25))
self.df['foo'] = 'bar'
self.df['bar'] = 'baz'
self.df = self.df.consolidate()
with warnings.catch_warnings(record=True):
self.df = self.df.consolidate()

def time_frame_get_numeric_data(self):
self.df._get_numeric_data()
Expand Down Expand Up @@ -141,8 +144,8 @@ class Repr(object):
def setup(self):
nrows = 10000
data = np.random.randn(nrows, 10)
idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100),
100))
arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
idx = MultiIndex.from_arrays(arrays)
self.df3 = DataFrame(data, index=idx)
self.df4 = DataFrame(data, index=np.random.randn(nrows))
self.df_tall = DataFrame(np.random.randn(nrows, 10))
Expand Down
43 changes: 20 additions & 23 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, rolling_median, rolling_mean,
rolling_min, rolling_max, rolling_var, rolling_skew,
rolling_kurt, rolling_std, read_csv, factorize, date_range)
from pandas import DataFrame, Series, read_csv, factorize, date_range
from pandas.core.algorithms import take_1d
try:
from pandas import (rolling_median, rolling_mean, rolling_min, rolling_max,
rolling_var, rolling_skew, rolling_kurt, rolling_std)
have_rolling_methods = True
except ImportError:
have_rolling_methods = False
try:
from pandas._libs import algos
except ImportError:
Expand Down Expand Up @@ -171,8 +175,7 @@ def run(period):
class ParallelRolling(object):

goal_time = 0.2
params = ['rolling_median', 'rolling_mean', 'rolling_min', 'rolling_max',
'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_std']
params = ['median', 'mean', 'min', 'max', 'var', 'skew', 'kurt', 'std']
param_names = ['method']

def setup(self, method):
Expand All @@ -181,34 +184,28 @@ def setup(self, method):
win = 100
arr = np.random.rand(100000)
if hasattr(DataFrame, 'rolling'):
rolling = {'rolling_median': 'median',
'rolling_mean': 'mean',
'rolling_min': 'min',
'rolling_max': 'max',
'rolling_var': 'var',
'rolling_skew': 'skew',
'rolling_kurt': 'kurt',
'rolling_std': 'std'}
df = DataFrame(arr).rolling(win)

@test_parallel(num_threads=2)
def parallel_rolling():
getattr(df, rolling[method])()
getattr(df, method)()
self.parallel_rolling = parallel_rolling
else:
rolling = {'rolling_median': rolling_median,
'rolling_mean': rolling_mean,
'rolling_min': rolling_min,
'rolling_max': rolling_max,
'rolling_var': rolling_var,
'rolling_skew': rolling_skew,
'rolling_kurt': rolling_kurt,
'rolling_std': rolling_std}
elif have_rolling_methods:
rolling = {'median': rolling_median,
'mean': rolling_mean,
'min': rolling_min,
'max': rolling_max,
'var': rolling_var,
'skew': rolling_skew,
'kurt': rolling_kurt,
'std': rolling_std}

@test_parallel(num_threads=2)
def parallel_rolling():
rolling[method](arr, win)
self.parallel_rolling = parallel_rolling
else:
raise NotImplementedError

def time_rolling(self, method):
self.parallel_rolling()
Expand Down
22 changes: 20 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from string import ascii_letters
from itertools import product
from functools import partial
Expand Down Expand Up @@ -159,6 +160,22 @@ def time_series_nth(self, df):
df[1].groupby(df[0]).nth(0)


class NthObject(object):

goal_time = 0.2

def setup_cache(self):
df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g'])
df['obj'] = ['a'] * 5000 + ['b'] * 5000
return df

def time_nth(self, df):
df.groupby('g').nth(5)

def time_nth_last(self, df):
df.groupby('g').last()


class DateAttributes(object):

goal_time = 0.2
Expand Down Expand Up @@ -340,7 +357,8 @@ def time_dt_size(self):
self.df.groupby(['dates']).size()

def time_dt_timegrouper_size(self):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
with warnings.catch_warnings(record=True):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()

def time_category_size(self):
self.draws.groupby(self.cats).size()
Expand Down Expand Up @@ -467,7 +485,7 @@ class SumMultiLevel(object):

def setup(self):
N = 50
self.df = DataFrame({'A': range(N) * 2,
self.df = DataFrame({'A': list(range(N)) * 2,
'B': range(N * 2),
'C': 1}).set_index(['A', 'B'])

Expand Down
17 changes: 17 additions & 0 deletions asv_bench/benchmarks/index_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ def setup(self, dtype):
self.idx = getattr(tm, 'make{}Index'.format(dtype))(N)
self.array_mask = (np.arange(N) % 3) == 0
self.series_mask = Series(self.array_mask)
self.sorted = self.idx.sort_values()
half = N // 2
self.non_unique = self.idx[:half].append(self.idx[:half])
self.non_unique_sorted = self.sorted[:half].append(self.sorted[:half])
self.key = self.sorted[N // 4]

def time_boolean_array(self, dtype):
self.idx[self.array_mask]
Expand All @@ -163,6 +168,18 @@ def time_slice(self, dtype):
def time_slice_step(self, dtype):
self.idx[::2]

def time_get_loc(self, dtype):
self.idx.get_loc(self.key)

def time_get_loc_sorted(self, dtype):
self.sorted.get_loc(self.key)

def time_get_loc_non_unique(self, dtype):
self.non_unique.get_loc(self.key)

def time_get_loc_non_unique_sorted(self, dtype):
self.non_unique_sorted.get_loc(self.key)


class Float64IndexMethod(object):
# GH 13166
Expand Down
19 changes: 13 additions & 6 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import pandas.util.testing as tm
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
Expand Down Expand Up @@ -91,7 +93,8 @@ def time_getitem_pos_slice(self, index):
self.s[:80000]

def time_get_value(self, index):
self.s.get_value(self.lbl)
with warnings.catch_warnings(record=True):
self.s.get_value(self.lbl)

def time_getitem_scalar(self, index):
self.s[self.lbl]
Expand All @@ -112,7 +115,8 @@ def setup(self):
self.bool_obj_indexer = self.bool_indexer.astype(object)

def time_get_value(self):
self.df.get_value(self.idx_scalar, self.col_scalar)
with warnings.catch_warnings(record=True):
self.df.get_value(self.idx_scalar, self.col_scalar)

def time_ix(self):
self.df.ix[self.idx_scalar, self.col_scalar]
Expand Down Expand Up @@ -231,11 +235,13 @@ class PanelIndexing(object):
goal_time = 0.2

def setup(self):
self.p = Panel(np.random.randn(100, 100, 100))
self.inds = range(0, 100, 10)
with warnings.catch_warnings(record=True):
self.p = Panel(np.random.randn(100, 100, 100))
self.inds = range(0, 100, 10)

def time_subset(self):
self.p.ix[(self.inds, self.inds, self.inds)]
with warnings.catch_warnings(record=True):
self.p.ix[(self.inds, self.inds, self.inds)]


class MethodLookup(object):
Expand Down Expand Up @@ -295,7 +301,8 @@ def setup(self):
def time_insert(self):
np.random.seed(1234)
for i in range(100):
self.df.insert(0, i, np.random.randn(self.N))
self.df.insert(0, i, np.random.randn(self.N),
allow_duplicates=True)

def time_assign_with_setitem(self):
np.random.seed(1234)
Expand Down
21 changes: 13 additions & 8 deletions asv_bench/benchmarks/io/hdf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf
import pandas.util.testing as tm
Expand Down Expand Up @@ -105,22 +107,25 @@ class HDFStorePanel(BaseIO):

def setup(self):
self.fname = '__test__.h5'
self.p = Panel(np.random.randn(20, 1000, 25),
items=['Item%03d' % i for i in range(20)],
major_axis=date_range('1/1/2000', periods=1000),
minor_axis=['E%03d' % i for i in range(25)])
self.store = HDFStore(self.fname)
self.store.append('p1', self.p)
with warnings.catch_warnings(record=True):
self.p = Panel(np.random.randn(20, 1000, 25),
items=['Item%03d' % i for i in range(20)],
major_axis=date_range('1/1/2000', periods=1000),
minor_axis=['E%03d' % i for i in range(25)])
self.store = HDFStore(self.fname)
self.store.append('p1', self.p)

def teardown(self):
self.store.close()
self.remove(self.fname)

def time_read_store_table_panel(self):
self.store.select('p1')
with warnings.catch_warnings(record=True):
self.store.select('p1')

def time_write_store_table_panel(self):
self.store.append('p2', self.p)
with warnings.catch_warnings(record=True):
self.store.append('p2', self.p)


class HDF(BaseIO):
Expand Down
23 changes: 16 additions & 7 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
import string

import numpy as np
Expand Down Expand Up @@ -26,7 +27,8 @@ def setup(self):
self.mdf1['obj2'] = 'bar'
self.mdf1['int1'] = 5
try:
self.mdf1.consolidate(inplace=True)
with warnings.catch_warnings(record=True):
self.mdf1.consolidate(inplace=True)
except:
pass
self.mdf2 = self.mdf1.copy()
Expand Down Expand Up @@ -75,16 +77,23 @@ class ConcatPanels(object):
param_names = ['axis', 'ignore_index']

def setup(self, axis, ignore_index):
panel_c = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='C'))
self.panels_c = [panel_c] * 20
panel_f = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='F'))
self.panels_f = [panel_f] * 20
with warnings.catch_warnings(record=True):
panel_c = Panel(np.zeros((10000, 200, 2),
dtype=np.float32,
order='C'))
self.panels_c = [panel_c] * 20
panel_f = Panel(np.zeros((10000, 200, 2),
dtype=np.float32,
order='F'))
self.panels_f = [panel_f] * 20

def time_c_ordered(self, axis, ignore_index):
concat(self.panels_c, axis=axis, ignore_index=ignore_index)
with warnings.catch_warnings(record=True):
concat(self.panels_c, axis=axis, ignore_index=ignore_index)

def time_f_ordered(self, axis, ignore_index):
concat(self.panels_f, axis=axis, ignore_index=ignore_index)
with warnings.catch_warnings(record=True):
concat(self.panels_f, axis=axis, ignore_index=ignore_index)


class ConcatDataFrames(object):
Expand Down
Loading

0 comments on commit a0c14cd

Please sign in to comment.