From 645b77fcd23db79f7e8407db8bf0cd4a3303fcec Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 27 Nov 2017 00:01:07 -0800
Subject: [PATCH 1/3] CLN: ASV frame_methods benchmark

Move relevant benchmark to other files

Add more cleaning
---
 asv_bench/benchmarks/frame_methods.py | 631 ++++++++++----------------
 asv_bench/benchmarks/indexing.py      |  66 +++
 asv_bench/benchmarks/strings.py       |  12 +
 3 files changed, 326 insertions(+), 383 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 53ee4d8019938..9a6221b9fd6d2 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -1,13 +1,16 @@
-from .pandas_vb_common import *
 import string
+import numpy as np
+import pandas.util.testing as tm
+from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
+                    isnull, NaT)
 
-#----------------------------------------------------------------------
-# get_numeric_data
 
-class frame_get_numeric_data(object):
+class GetNumericData(object):
+
     goal_time = 0.2
 
     def setup(self):
+        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 25))
         self.df['foo'] = 'bar'
         self.df['bar'] = 'baz'
@@ -16,19 +19,22 @@ def setup(self):
     def time_frame_get_numeric_data(self):
         self.df._get_numeric_data()
 
-#----------------------------------------------------------------------
-# lookup
 
-class frame_fancy_lookup(object):
+class Lookup(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.df = DataFrame(np.random.randn(10000, 8), columns=list('abcdefgh'))
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(10000, 8),
+                            columns=list('abcdefgh'))
         self.df['foo'] = 'bar'
         self.row_labels = list(self.df.index[::10])[:900]
-        self.col_labels = (list(self.df.columns) * 100)
-        self.row_labels_all = np.array((list(self.df.index) * len(self.df.columns)), dtype='object')
-        self.col_labels_all = np.array((list(self.df.columns) * len(self.df.index)), dtype='object')
+        self.col_labels = list(self.df.columns) * 100
+        self.row_labels_all = np.array(
+            list(self.df.index) * len(self.df.columns), dtype='object')
+        self.col_labels_all = np.array(
+            list(self.df.columns) * len(self.df.index), dtype='object')
 
     def time_frame_fancy_lookup(self):
         self.df.lookup(self.row_labels, self.col_labels)
@@ -37,25 +43,21 @@ def time_frame_fancy_lookup_all(self):
         self.df.lookup(self.row_labels_all, self.col_labels_all)
 
 
-#----------------------------------------------------------------------
-# reindex
-
 class Reindex(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.df = DataFrame(randn(10000, 1000))
-        self.idx = np.arange(4000, 7000)
-
+        N = 10**3
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(N * 10, N))
+        self.idx = np.arange(4 * N, 7 * N)
         self.df2 = DataFrame(
-            dict([(c, {0: randint(0, 2, 1000).astype(np.bool_),
-                       1: randint(0, 1000, 1000).astype(
-                           np.int16),
-                       2: randint(0, 1000, 1000).astype(
-                           np.int32),
-                       3: randint(0, 1000, 1000).astype(
-                           np.int64),}[randint(0, 4)]) for c in
-                  range(1000)]))
+            {c: {0: np.random.randint(0, 2, N).astype(np.bool_),
+                 1: np.random.randint(0, N, N).astype(np.int16),
+                 2: np.random.randint(0, N, N).astype(np.int32),
+                 3: np.random.randint(0, N, N).astype(np.int64)}
+                [np.random.randint(0, 4)] for c in range(N)})
 
     def time_reindex_axis0(self):
         self.df.reindex(self.idx)
@@ -67,81 +69,85 @@ def time_reindex_both_axes(self):
         self.df.reindex(index=self.idx, columns=self.idx)
 
     def time_reindex_both_axes_ix(self):
-        self.df.ix[(self.idx, self.idx)]
+        self.df.ix[self.idx, self.idx]
 
     def time_reindex_upcast(self):
+        np.random.seed(1234)
         self.df2.reindex(np.random.permutation(range(1200)))
 
 
-#----------------------------------------------------------------------
-# iteritems (monitor no-copying behaviour)
-
 class Iteration(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.df = DataFrame(randn(10000, 1000))
-        self.df2 = DataFrame(np.random.randn(50000, 10))
-        self.df3 = pd.DataFrame(np.random.randn(1000,5000),
-                                columns=['C'+str(c) for c in range(5000)])
+        N = 1000
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(N * 10, N))
+        self.df2 = DataFrame(np.random.randn(N * 50, 10))
+        self.df3 = DataFrame(np.random.randn(N, 5 * N),
+                             columns=['C' + str(c) for c in range(N * 5)])
 
-    def f(self):
+    def time_iteritems(self):
+        # (monitor no-copying behaviour)
         if hasattr(self.df, '_item_cache'):
             self.df._item_cache.clear()
-        for (name, col) in self.df.iteritems():
-            pass
-
-    def g(self):
-        for (name, col) in self.df.iteritems():
+        for name, col in self.df.iteritems():
             pass
 
-    def time_iteritems(self):
-        self.f()
-
     def time_iteritems_cached(self):
-        self.g()
+        for name, col in self.df.iteritems():
+            pass
 
     def time_iteritems_indexing(self):
-        df = self.df3
-        for col in df:
-            df[col]
+        for col in self.df3:
+            self.df3[col]
 
     def time_itertuples(self):
         for row in self.df2.itertuples():
             pass
 
 
-#----------------------------------------------------------------------
-# to_string, to_html, repr
+class ToString(object):
 
-class Formatting(object):
     goal_time = 0.2
 
     def setup(self):
-        self.df = DataFrame(randn(100, 10))
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(100, 10))
 
-        self.nrows = 500
-        self.df2 = DataFrame(randn(self.nrows, 10))
-        self.df2[0] = period_range('2000', '2010', self.nrows)
-        self.df2[1] = range(self.nrows)
+    def time_to_string_floats(self):
+        self.df.to_string()
 
-        self.nrows = 10000
-        self.data = randn(self.nrows, 10)
-        self.idx = MultiIndex.from_arrays(np.tile(randn(3, int(self.nrows / 100)), 100))
-        self.df3 = DataFrame(self.data, index=self.idx)
-        self.idx = randn(self.nrows)
-        self.df4 = DataFrame(self.data, index=self.idx)
 
-        self.df_tall = pandas.DataFrame(np.random.randn(10000, 10))
+class ToHTML(object):
 
-        self.df_wide = pandas.DataFrame(np.random.randn(10, 10000))
+    goal_time = 0.2
 
-    def time_to_string_floats(self):
-        self.df.to_string()
+    def setup(self):
+        nrows = 500
+        self.df2 = DataFrame(np.random.randn(nrows, 10))
+        self.df2[0] = period_range('2000', '2010', nrows)
+        self.df2[1] = range(nrows)
 
     def time_to_html_mixed(self):
         self.df2.to_html()
 
+
+class Repr(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        nrows = 10000
+        data = np.random.randn(nrows, 10)
+        idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100),
+                                             100))
+        self.df3 = DataFrame(data, index=idx)
+        self.df4 = DataFrame(data, index=np.random.randn(nrows))
+        self.df_tall = DataFrame(np.random.randn(nrows, 10))
+        self.df_wide = DataFrame(np.random.randn(10, nrows))
+
     def time_html_repr_trunc_mi(self):
         self.df3._repr_html_()
 
@@ -155,21 +161,17 @@ def time_frame_repr_wide(self):
         repr(self.df_wide)
 
 
-#----------------------------------------------------------------------
-# nulls/masking
-
-
-## masking
+class MaskBool(object):
 
-class frame_mask_bools(object):
     goal_time = 0.2
 
     def setup(self):
-        self.data = np.random.randn(1000, 500)
-        self.df = DataFrame(self.data)
-        self.df = self.df.where((self.df > 0))
-        self.bools = (self.df > 0)
-        self.mask = isnull(self.df)
+        np.random.seed(1234)
+        data = np.random.randn(1000, 500)
+        df = DataFrame(data)
+        df = df.where(df > 0)
+        self.bools = df > 0
+        self.mask = isnull(df)
 
     def time_frame_mask_bools(self):
         self.bools.mask(self.mask)
@@ -178,31 +180,27 @@ def time_frame_mask_floats(self):
         self.bools.astype(float).mask(self.mask)
 
 
-## isnull
+class Isnull(object):
 
-class FrameIsnull(object):
     goal_time = 0.2
 
     def setup(self):
-        self.df_no_null = DataFrame(np.random.randn(1000, 1000))
-
+        N = 10**3
         np.random.seed(1234)
-        self.sample = np.array([np.nan, 1.0])
-        self.data = np.random.choice(self.sample, (1000, 1000))
-        self.df = DataFrame(self.data)
+        self.df_no_null = DataFrame(np.random.randn(N, N))
 
-        np.random.seed(1234)
-        self.sample = np.array(list(string.ascii_lowercase) +
-                               list(string.ascii_uppercase) +
-                               list(string.whitespace))
-        self.data = np.random.choice(self.sample, (1000, 1000))
-        self.df_strings= DataFrame(self.data)
+        sample = np.array([np.nan, 1.0])
+        data = np.random.choice(sample, (N, N))
+        self.df = DataFrame(data)
 
-        np.random.seed(1234)
-        self.sample = np.array([NaT, np.nan, None, np.datetime64('NaT'),
-                                np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd'])
-        self.data = np.random.choice(self.sample, (1000, 1000))
-        self.df_obj = DataFrame(self.data)
+        sample = np.array(list(string.ascii_letters + string.whitespace))
+        data = np.random.choice(sample, (N, N))
+        self.df_strings = DataFrame(data)
+
+        sample = np.array([NaT, np.nan, None, np.datetime64('NaT'),
+                           np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd'])
+        data = np.random.choice(sample, (N, N))
+        self.df_obj = DataFrame(data)
 
     def time_isnull_floats_no_null(self):
         isnull(self.df_no_null)
@@ -217,458 +215,325 @@ def time_isnull_obj(self):
         isnull(self.df_obj)
 
 
-# ----------------------------------------------------------------------
-# fillna in place
-
-class frame_fillna_inplace(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.df = DataFrame(randn(10000, 100))
-        self.df.values[::2] = np.nan
-
-    def time_frame_fillna_inplace(self):
-        self.df.fillna(0, inplace=True)
-
+class Fillna(object):
 
-
-class frame_fillna_many_columns_pad(object):
     goal_time = 0.2
+    params = ([True, False], ['pad', 'bfill'])
+    param_names = ['inplace', 'method']
 
-    def setup(self):
-        self.values = np.random.randn(1000, 1000)
-        self.values[::2] = np.nan
-        self.df = DataFrame(self.values)
-
-    def time_frame_fillna_many_columns_pad(self):
-        self.df.fillna(method='pad')
+    def setup(self, inplace, method):
+        np.random.seed(1234)
+        values = np.random.randn(10000, 100)
+        values[::2] = np.nan
+        self.df = DataFrame(values)
 
+    def time_frame_fillna(self, inplace, method):
+        self.df.fillna(inplace=inplace, method=method)
 
 
 class Dropna(object):
+
     goal_time = 0.2
+    params = (['all', 'any'], [0, 1])
+    param_names = ['how', 'axis']
 
-    def setup(self):
-        self.data = np.random.randn(10000, 1000)
-        self.df = DataFrame(self.data)
+    def setup(self, how, axis):
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(10000, 1000))
         self.df.ix[50:1000, 20:50] = np.nan
         self.df.ix[2000:3000] = np.nan
         self.df.ix[:, 60:70] = np.nan
         self.df_mixed = self.df.copy()
         self.df_mixed['foo'] = 'bar'
 
-        self.df_mi = self.df.copy()
-        self.df_mi.index = MultiIndex.from_tuples(self.df_mi.index.map((lambda x: (x, x))))
-        self.df_mi.columns = MultiIndex.from_tuples(self.df_mi.columns.map((lambda x: (x, x))))
-
-        self.df_mixed_mi = self.df_mixed.copy()
-        self.df_mixed_mi.index = MultiIndex.from_tuples(self.df_mixed_mi.index.map((lambda x: (x, x))))
-        self.df_mixed_mi.columns = MultiIndex.from_tuples(self.df_mixed_mi.columns.map((lambda x: (x, x))))
-
-    def time_dropna_axis0_all(self):
-        self.df.dropna(how='all', axis=0)
-
-    def time_dropna_axis0_any(self):
-        self.df.dropna(how='any', axis=0)
+    def time_dropna(self, how, axis):
+        self.df.dropna(how=how, axis=axis)
 
-    def time_dropna_axis1_all(self):
-        self.df.dropna(how='all', axis=1)
+    def time_dropna_axis_mixed_dtypes(self, how, axis):
+        self.df_mixed.dropna(how=how, axis=axis)
 
-    def time_dropna_axis1_any(self):
-        self.df.dropna(how='any', axis=1)
 
-    def time_dropna_axis0_all_mixed_dtypes(self):
-        self.df_mixed.dropna(how='all', axis=0)
+class Count(object):
 
-    def time_dropna_axis0_any_mixed_dtypes(self):
-        self.df_mixed.dropna(how='any', axis=0)
-
-    def time_dropna_axis1_all_mixed_dtypes(self):
-        self.df_mixed.dropna(how='all', axis=1)
+    goal_time = 0.2
 
-    def time_dropna_axis1_any_mixed_dtypes(self):
-        self.df_mixed.dropna(how='any', axis=1)
+    params = [0, 1]
+    param_names = ['axis']
 
-    def time_count_level_axis0_multi(self):
-        self.df_mi.count(axis=0, level=1)
+    def setup(self, axis):
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(10000, 1000))
+        self.df.ix[50:1000, 20:50] = np.nan
+        self.df.ix[2000:3000] = np.nan
+        self.df.ix[:, 60:70] = np.nan
+        self.df_mixed = self.df.copy()
+        self.df_mixed['foo'] = 'bar'
 
-    def time_count_level_axis1_multi(self):
-        self.df_mi.count(axis=1, level=1)
+        self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index])
+        self.df.columns = MultiIndex.from_arrays([self.df.columns,
+                                                  self.df.columns])
+        self.df_mixed.index = MultiIndex.from_arrays([self.df_mixed.index,
+                                                      self.df_mixed.index])
+        self.df_mixed.columns = MultiIndex.from_arrays([self.df_mixed.columns,
+                                                        self.df_mixed.columns])
 
-    def time_count_level_axis0_mixed_dtypes_multi(self):
-        self.df_mixed_mi.count(axis=0, level=1)
+    def time_count_level_multi(self, axis):
+        self.df.count(axis=axis, level=1)
 
-    def time_count_level_axis1_mixed_dtypes_multi(self):
-        self.df_mixed_mi.count(axis=1, level=1)
+    def time_count_level_mixed_dtypes_multi(self, axis):
+        self.df_mixed.count(axis=axis, level=1)
 
 
 class Apply(object):
+
     goal_time = 0.2
 
     def setup(self):
+        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000, 100))
 
         self.s = Series(np.arange(1028.0))
         self.df2 = DataFrame({i: self.s for i in range(1028)})
-
         self.df3 = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
 
     def time_apply_user_func(self):
-        self.df2.apply((lambda x: np.corrcoef(x, self.s)[(0, 1)]))
+        self.df2.apply(lambda x: np.corrcoef(x, self.s)[(0, 1)])
 
     def time_apply_axis_1(self):
-        self.df.apply((lambda x: (x + 1)), axis=1)
+        self.df.apply(lambda x: x + 1, axis=1)
 
     def time_apply_lambda_mean(self):
-        self.df.apply((lambda x: x.mean()))
+        self.df.apply(lambda x: x.mean())
 
     def time_apply_np_mean(self):
         self.df.apply(np.mean)
 
     def time_apply_pass_thru(self):
-        self.df.apply((lambda x: x))
+        self.df.apply(lambda x: x)
 
     def time_apply_ref_by_name(self):
-        self.df3.apply((lambda x: (x['A'] + x['B'])), axis=1)
+        self.df3.apply(lambda x: x['A'] + x['B'], axis=1)
 
 
-#----------------------------------------------------------------------
-# dtypes
+class Dtypes(object):
 
-class frame_dtypes(object):
     goal_time = 0.2
 
     def setup(self):
+        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000, 1000))
 
     def time_frame_dtypes(self):
         self.df.dtypes
 
-#----------------------------------------------------------------------
-# equals
 
 class Equals(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.float_df = DataFrame(np.random.randn(1000, 1000))
-        self.object_df = DataFrame(([(['foo'] * 1000)] * 1000))
-        self.nonunique_cols = self.object_df.copy()
-        self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns))
-        self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in (
-            ('float_df', self.float_df), ('object_df', self.object_df),
-            ('nonunique_cols', self.nonunique_cols))])
-
-    def make_pair(self, frame):
-        self.df = frame
-        self.df2 = self.df.copy()
-        self.df2.ix[((-1), (-1))] = np.nan
-        return (self.df, self.df2)
+        N = 10**3
+        np.random.seed(1234)
+        self.float_df = DataFrame(np.random.randn(N, N))
+        self.float_df_nan = self.float_df.copy()
+        self.float_df_nan.iloc[-1, -1] = np.nan
 
-    def test_equal(self, name):
-        (self.df, self.df2) = self.pairs[name]
-        return self.df.equals(self.df)
+        self.object_df = DataFrame('foo', index=range(N), columns=range(N))
+        self.object_df_nan = self.object_df.copy()
+        self.object_df_nan.iloc[-1, -1] = np.nan
 
-    def test_unequal(self, name):
-        (self.df, self.df2) = self.pairs[name]
-        return self.df.equals(self.df2)
+        self.nonunique_cols = self.object_df.copy()
+        self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns)
+        self.nonunique_cols_nan = self.nonunique_cols.copy()
+        self.nonunique_cols_nan.iloc[-1, -1] = np.nan
 
     def time_frame_float_equal(self):
-        self.test_equal('float_df')
+        self.float_df.equals(self.float_df)
 
     def time_frame_float_unequal(self):
-        self.test_unequal('float_df')
+        self.float_df.equals(self.float_df_nan)
 
     def time_frame_nonunique_equal(self):
-        self.test_equal('nonunique_cols')
+        self.nonunique_cols.equals(self.nonunique_cols)
 
     def time_frame_nonunique_unequal(self):
-        self.test_unequal('nonunique_cols')
+        self.nonunique_cols.equals(self.nonunique_cols_nan)
 
     def time_frame_object_equal(self):
-        self.test_equal('object_df')
+        self.object_df.equals(self.object_df)
 
     def time_frame_object_unequal(self):
-        self.test_unequal('object_df')
+        self.object_df.equals(self.object_df_nan)
 
 
 class Interpolate(object):
+
     goal_time = 0.2
+    params = [None, 'infer']
+    param_names = ['downcast']
 
-    def setup(self):
+    def setup(self, downcast):
+        N = 10000
+        np.random.seed(1234)
         # this is the worst case, where every column has NaNs.
-        self.df = DataFrame(randn(10000, 100))
+        self.df = DataFrame(np.random.randn(N, 100))
         self.df.values[::2] = np.nan
 
-        self.df2 = DataFrame(
-            {'A': np.arange(0, 10000), 'B': np.random.randint(0, 100, 10000),
-             'C': randn(10000), 'D': randn(10000),})
+        self.df2 = DataFrame({'A': np.arange(0, N),
+                              'B': np.random.randint(0, 100, N),
+                              'C': np.random.randn(N),
+                              'D': np.random.randn(N)})
         self.df2.loc[1::5, 'A'] = np.nan
         self.df2.loc[1::5, 'C'] = np.nan
 
-    def time_interpolate(self):
-        self.df.interpolate()
-
-    def time_interpolate_some_good(self):
-        self.df2.interpolate()
+    def time_interpolate(self, downcast):
+        self.df.interpolate(downcast=downcast)
 
-    def time_interpolate_some_good_infer(self):
-        self.df2.interpolate(downcast='infer')
+    def time_interpolate_some_good(self, downcast):
+        self.df2.interpolate(downcast=downcast)
 
 
 class Shift(object):
     # frame shift speedup issue-5609
     goal_time = 0.2
+    params = [0, 1]
+    param_names = ['axis']
 
-    def setup(self):
+    def setup(self, axis):
+        np.random.seed(1234)
         self.df = DataFrame(np.random.rand(10000, 500))
 
-    def time_shift_axis0(self):
-        self.df.shift(1, axis=0)
-
-    def time_shift_axis_1(self):
-        self.df.shift(1, axis=1)
+    def time_shift(self, axis):
+        self.df.shift(1, axis=axis)
 
 
-#-----------------------------------------------------------------------------
-# from_records issue-6700
+class FromRecords(object):
 
-class frame_from_records_generator(object):
     goal_time = 0.2
+    params = [None, 1000]
+    param_names = ['nrows']
 
-    def get_data(self, n=100000):
-        return ((x, (x * 20), (x * 100)) for x in range(n))
+    def setup(self, nrows):
+        N = 100000
+        self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
 
-    def time_frame_from_records_generator(self):
-        self.df = DataFrame.from_records(self.get_data())
+    def time_frame_from_records_generator(self, nrows):
+        # issue-6700
+        self.df = DataFrame.from_records(self.gen, nrows=nrows)
 
-    def time_frame_from_records_generator_nrows(self):
-        self.df = DataFrame.from_records(self.get_data(), nrows=1000)
 
-
-
-#-----------------------------------------------------------------------------
-# nunique
-
-class frame_nunique(object):
+class Nunique(object):
 
     def setup(self):
-        self.data = np.random.randn(10000, 1000)
-        self.df = DataFrame(self.data)
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(10000, 1000))
 
     def time_frame_nunique(self):
         self.df.nunique()
 
 
+class Duplicated(object):
 
-#-----------------------------------------------------------------------------
-# duplicated
-
-class frame_duplicated(object):
     goal_time = 0.2
 
     def setup(self):
-        self.n = (1 << 20)
-        self.t = date_range('2015-01-01', freq='S', periods=(self.n // 64))
-        self.xs = np.random.randn((self.n // 64)).round(2)
-        self.df = DataFrame({'a': np.random.randint(((-1) << 8), (1 << 8), self.n), 'b': np.random.choice(self.t, self.n), 'c': np.random.choice(self.xs, self.n), })
-
-        self.df2 = DataFrame(np.random.randn(1000, 100).astype(str))
+        np.random.seed(1234)
+        n = (1 << 20)
+        t = date_range('2015-01-01', freq='S', periods=(n // 64))
+        xs = np.random.randn(n // 64).round(2)
+        self.df = DataFrame({'a': np.random.randint(-1 << 8, 1 << 8, n),
+                             'b': np.random.choice(t, n),
+                             'c': np.random.choice(xs, n)})
+        self.df2 = DataFrame(np.random.randn(1000, 100).astype(str)).T
 
     def time_frame_duplicated(self):
         self.df.duplicated()
 
     def time_frame_duplicated_wide(self):
-        self.df2.T.duplicated()
-
-
-
-
-
-
-
-
-
-
-
-
-
+        self.df2.duplicated()
 
 
+class XS(object):
 
-
-class frame_xs_col(object):
     goal_time = 0.2
+    params = [0, 1]
+    param_names = ['axis']
 
-    def setup(self):
-        self.df = DataFrame(randn(1, 100000))
-
-    def time_frame_xs_col(self):
-        self.df.xs(50000, axis=1)
-
-
-class frame_xs_row(object):
-    goal_time = 0.2
+    def setup(self, axis):
+        np.random.seed(1234)
+        self.N = 10**4
+        self.df = DataFrame(np.random.randn(self.N, self.N))
 
-    def setup(self):
-        self.df = DataFrame(randn(100000, 1))
+    def time_frame_xs(self, axis):
+        self.df.xs(self.N / 2, axis=axis)
 
-    def time_frame_xs_row(self):
-        self.df.xs(50000)
 
+class SortIndex(object):
 
-class frame_sort_index(object):
     goal_time = 0.2
+    params = [True, False]
+    param_names = ['ascending']
 
-    def setup(self):
-        self.df = DataFrame(randn(1000000, 2), columns=list('AB'))
+    def setup(self, ascending):
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(1000000, 2), columns=list('AB'))
 
-    def time_frame_sort_index(self):
-        self.df.sort_index()
+    def time_frame_sort_index(self, ascending):
+        self.df.sort_index(ascending=ascending)
 
 
-class frame_sort_index_by_columns(object):
+class SortIndexByColumns(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.N = 10000
-        self.K = 10
-        self.key1 = tm.makeStringIndex(self.N).values.repeat(self.K)
-        self.key2 = tm.makeStringIndex(self.N).values.repeat(self.K)
-        self.df = DataFrame({'key1': self.key1, 'key2': self.key2, 'value': np.random.randn((self.N * self.K)), })
-        self.col_array_list = list(self.df.values.T)
+        np.random.seed(1234)
+        N = 10000
+        K = 10
+        self.df = DataFrame({'key1': tm.makeStringIndex(N).values.repeat(K),
+                             'key2': tm.makeStringIndex(N).values.repeat(K),
+                             'value': np.random.randn(N * K)})
 
     def time_frame_sort_index_by_columns(self):
         self.df.sort_index(by=['key1', 'key2'])
 
 
-class frame_quantile_axis1(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.df = DataFrame(np.random.randn(1000, 3),
-                            columns=list('ABC'))
-
-    def time_frame_quantile_axis1(self):
-        self.df.quantile([0.1, 0.5], axis=1)
-
-
-#----------------------------------------------------------------------
-# boolean indexing
-
-class frame_boolean_row_select(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.df = DataFrame(randn(10000, 100))
-        self.bool_arr = np.zeros(10000, dtype=bool)
-        self.bool_arr[:1000] = True
-
-    def time_frame_boolean_row_select(self):
-        self.df[self.bool_arr]
-
-class frame_getitem_single_column(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.df = DataFrame(randn(10000, 1000))
-        self.df2 = DataFrame(randn(3000, 1), columns=['A'])
-        self.df3 = DataFrame(randn(3000, 1))
-
-    def h(self):
-        for i in range(10000):
-            self.df2['A']
-
-    def j(self):
-        for i in range(10000):
-            self.df3[0]
-
-    def time_frame_getitem_single_column(self):
-        self.h()
-
-    def time_frame_getitem_single_column2(self):
-        self.j()
-
-
-#----------------------------------------------------------------------
-# assignment
-
-class frame_assign_timeseries_index(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.idx = date_range('1/1/2000', periods=100000, freq='H')
-        self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx)
-
-    def time_frame_assign_timeseries_index(self):
-        self.f(self.df)
-
-    def f(self, df):
-        self.x = self.df.copy()
-        self.x['date'] = self.x.index
-
+class Quantile(object):
 
-
-# insert many columns
-
-class frame_insert_100_columns_begin(object):
-    goal_time = 0.2
-
-    def setup(self):
-        self.N = 1000
-
-    def f(self, K=100):
-        self.df = DataFrame(index=range(self.N))
-        self.new_col = np.random.randn(self.N)
-        for i in range(K):
-            self.df.insert(0, i, self.new_col)
-
-    def g(self, K=500):
-        self.df = DataFrame(index=range(self.N))
-        self.new_col = np.random.randn(self.N)
-        for i in range(K):
-            self.df[i] = self.new_col
-
-    def time_frame_insert_100_columns_begin(self):
-        self.f()
-
-    def time_frame_insert_500_columns_end(self):
-        self.g()
-
-
-
-#----------------------------------------------------------------------
-# strings methods, #2602
-
-class series_string_vector_slice(object):
     goal_time = 0.2
+    params = [0, 1]
+    param_names = ['axis']
 
-    def setup(self):
-        self.s = Series((['abcdefg', np.nan] * 500000))
-
-    def time_series_string_vector_slice(self):
-        self.s.str[:5]
+    def setup(self, axis):
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
 
+    def time_frame_quantile(self, axis):
+        self.df.quantile([0.1, 0.5], axis=axis)
 
-#----------------------------------------------------------------------
-# df.info() and get_dtype_counts() # 2807
 
-class frame_get_dtype_counts(object):
+class GetDtypeCounts(object):
+    # 2807
     goal_time = 0.2
 
     def setup(self):
+        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10, 10000))
 
     def time_frame_get_dtype_counts(self):
         self.df.get_dtype_counts()
 
+    def time_info(self):
+        self.df.info()
+
+
+class Nlargest(object):
 
-class frame_nlargest(object):
     goal_time = 0.2
 
     def setup(self):
-        self.df = DataFrame(np.random.randn(1000, 3),
-                            columns=list('ABC'))
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
 
     def time_frame_nlargest(self):
         self.df.nlargest(100, 'A')
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index f3e7ebbbd33e8..f271b82c758ee 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -303,3 +303,69 @@ def time_lookup_ix(self):
 
     def time_lookup_loc(self):
         self.s.loc
+
+
+class BooleanRowSelect(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        N = 10000
+        np.random.seed(1234)
+        self.df = DataFrame(np.random.randn(N, 100))
+        self.bool_arr = np.zeros(N, dtype=bool)
+        self.bool_arr[:1000] = True
+
+    def time_frame_boolean_row_select(self):
+        self.df[self.bool_arr]
+
+
+class GetItemSingleColumn(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1234)
+        self.df2 = DataFrame(np.random.randn(3000, 1), columns=['A'])
+        self.df3 = DataFrame(np.random.randn(3000, 1))
+
+    def time_frame_getitem_single_column_label(self):
+        self.df2['A']
+
+    def time_frame_getitem_single_column_int(self):
+        self.df3[0]
+
+
+class AssignTimeseriesIndex(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        N = 100000
+        np.random.seed(1234)
+        dx = date_range('1/1/2000', periods=N, freq='H')
+        self.df = DataFrame(np.random.randn(N, 1), columns=['A'], index=idx)
+
+    def time_frame_assign_timeseries_index(self):
+        self.df['date'] = self.df.index
+
+
+class InsertColumns(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        self.N = 10**3
+        self.df = DataFrame(index=range(N))
+
+    def time_insert(self):
+        np.random.seed(1234)
+        for i in range(100):
+            self.df.insert(0, i, np.random.randn(self.N))
+
+    def time_assign_with_setitem(self):
+        np.random.seed(1234)
+        for i in range(100):
+            self.df[i] = np.random.randn(self.N)
+
+
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index c1600d4e07f58..0c36c85413c76 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -105,3 +105,15 @@ def setup(self):
 
     def time_encode_decode(self):
         self.ser.str.encode('utf-8').str.decode('utf-8')
+
+
+class StringSlice(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = Series(['abcdefg', np.nan] * 500000)
+
+    def time_series_string_vector_slice(self):
+        # GH 2602
+        self.s.str[:5]
\ No newline at end of file

From 6b581f3a74071909791c9307c30eed3594134a6d Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 27 Nov 2017 17:07:08 -0800
Subject: [PATCH 2/3] Add blank line at the end of string.py

---
 asv_bench/benchmarks/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 0c36c85413c76..948d4b92a5a57 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -116,4 +116,4 @@ def setup(self):
 
     def time_series_string_vector_slice(self):
         # GH 2602
-        self.s.str[:5]
\ No newline at end of file
+        self.s.str[:5]

From 12e4686f2925eb9f1168f2a8939a9d5562d7515e Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 28 Nov 2017 22:21:45 -0800
Subject: [PATCH 3/3] Address comments

---
 asv_bench/benchmarks/frame_ctor.py       | 15 ++++++
 asv_bench/benchmarks/frame_methods.py    | 67 +++++++-----------------
 asv_bench/benchmarks/pandas_vb_common.py |  6 ++-
 3 files changed, 39 insertions(+), 49 deletions(-)

diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
index 5fad7b682c2ed..d577ebc20a31c 100644
--- a/asv_bench/benchmarks/frame_ctor.py
+++ b/asv_bench/benchmarks/frame_ctor.py
@@ -124,3 +124,18 @@ def setup(self, offset, n_steps):
 
     def time_frame_ctor(self, offset, n_steps):
         DataFrame(self.d)
+
+
+class FromRecords(object):
+
+    goal_time = 0.2
+    params = [None, 1000]
+    param_names = ['nrows']
+
+    def setup(self, nrows):
+        N = 100000
+        self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
+
+    def time_frame_from_records_generator(self, nrows):
+        # issue-6700
+        self.df = DataFrame.from_records(self.gen, nrows=nrows)
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 9a6221b9fd6d2..7ed341425e561 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -3,6 +3,7 @@
 import pandas.util.testing as tm
 from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
                     isnull, NaT)
+from .pandas_vb_common import setup
 
 
 class GetNumericData(object):
@@ -10,7 +11,6 @@ class GetNumericData(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 25))
         self.df['foo'] = 'bar'
         self.df['bar'] = 'baz'
@@ -25,7 +25,6 @@ class Lookup(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 8),
                             columns=list('abcdefgh'))
         self.df['foo'] = 'bar'
@@ -49,7 +48,6 @@ class Reindex(object):
 
     def setup(self):
         N = 10**3
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(N * 10, N))
         self.idx = np.arange(4 * N, 7 * N)
         self.df2 = DataFrame(
@@ -72,7 +70,6 @@ def time_reindex_both_axes_ix(self):
         self.df.ix[self.idx, self.idx]
 
     def time_reindex_upcast(self):
-        np.random.seed(1234)
         self.df2.reindex(np.random.permutation(range(1200)))
 
 
@@ -82,7 +79,6 @@ class Iteration(object):
 
     def setup(self):
         N = 1000
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(N * 10, N))
         self.df2 = DataFrame(np.random.randn(N * 50, 10))
         self.df3 = DataFrame(np.random.randn(N, 5 * N),
@@ -107,13 +103,16 @@ def time_itertuples(self):
         for row in self.df2.itertuples():
             pass
 
+    def time_iterrows(self):
+        for row in self.df.iterrows():
+            pass
+
 
 class ToString(object):
 
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(100, 10))
 
     def time_to_string_floats(self):
@@ -166,7 +165,6 @@ class MaskBool(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         data = np.random.randn(1000, 500)
         df = DataFrame(data)
         df = df.where(df > 0)
@@ -186,7 +184,6 @@ class Isnull(object):
 
     def setup(self):
         N = 10**3
-        np.random.seed(1234)
         self.df_no_null = DataFrame(np.random.randn(N, N))
 
         sample = np.array([np.nan, 1.0])
@@ -222,7 +219,6 @@ class Fillna(object):
     param_names = ['inplace', 'method']
 
     def setup(self, inplace, method):
-        np.random.seed(1234)
         values = np.random.randn(10000, 100)
         values[::2] = np.nan
         self.df = DataFrame(values)
@@ -238,7 +234,6 @@ class Dropna(object):
     param_names = ['how', 'axis']
 
     def setup(self, how, axis):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 1000))
         self.df.ix[50:1000, 20:50] = np.nan
         self.df.ix[2000:3000] = np.nan
@@ -261,7 +256,6 @@ class Count(object):
     param_names = ['axis']
 
     def setup(self, axis):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 1000))
         self.df.ix[50:1000, 20:50] = np.nan
         self.df.ix[2000:3000] = np.nan
@@ -289,7 +283,6 @@ class Apply(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000, 100))
 
         self.s = Series(np.arange(1028.0))
@@ -320,7 +313,6 @@ class Dtypes(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000, 1000))
 
     def time_frame_dtypes(self):
@@ -333,7 +325,6 @@ class Equals(object):
 
     def setup(self):
         N = 10**3
-        np.random.seed(1234)
         self.float_df = DataFrame(np.random.randn(N, N))
         self.float_df_nan = self.float_df.copy()
         self.float_df_nan.iloc[-1, -1] = np.nan
@@ -374,7 +365,6 @@ class Interpolate(object):
 
     def setup(self, downcast):
         N = 10000
-        np.random.seed(1234)
         # this is the worst case, where every column has NaNs.
         self.df = DataFrame(np.random.randn(N, 100))
         self.df.values[::2] = np.nan
@@ -400,32 +390,15 @@ class Shift(object):
     param_names = ['axis']
 
     def setup(self, axis):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.rand(10000, 500))
 
     def time_shift(self, axis):
         self.df.shift(1, axis=axis)
 
 
-class FromRecords(object):
-
-    goal_time = 0.2
-    params = [None, 1000]
-    param_names = ['nrows']
-
-    def setup(self, nrows):
-        N = 100000
-        self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
-
-    def time_frame_from_records_generator(self, nrows):
-        # issue-6700
-        self.df = DataFrame.from_records(self.gen, nrows=nrows)
-
-
 class Nunique(object):
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10000, 1000))
 
     def time_frame_nunique(self):
@@ -437,7 +410,6 @@ class Duplicated(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         n = (1 << 20)
         t = date_range('2015-01-01', freq='S', periods=(n // 64))
         xs = np.random.randn(n // 64).round(2)
@@ -460,7 +432,6 @@ class XS(object):
     param_names = ['axis']
 
     def setup(self, axis):
-        np.random.seed(1234)
         self.N = 10**4
         self.df = DataFrame(np.random.randn(self.N, self.N))
 
@@ -468,18 +439,17 @@ def time_frame_xs(self, axis):
         self.df.xs(self.N / 2, axis=axis)
 
 
-class SortIndex(object):
+class SortValues(object):
 
     goal_time = 0.2
     params = [True, False]
     param_names = ['ascending']
 
     def setup(self, ascending):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000000, 2), columns=list('AB'))
 
-    def time_frame_sort_index(self, ascending):
-        self.df.sort_index(ascending=ascending)
+    def time_frame_sort_values(self, ascending):
+        self.df.sort_values(by='A', ascending=ascending)
 
 
 class SortIndexByColumns(object):
@@ -487,15 +457,14 @@ class SortIndexByColumns(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         N = 10000
         K = 10
         self.df = DataFrame({'key1': tm.makeStringIndex(N).values.repeat(K),
                              'key2': tm.makeStringIndex(N).values.repeat(K),
                              'value': np.random.randn(N * K)})
 
-    def time_frame_sort_index_by_columns(self):
-        self.df.sort_index(by=['key1', 'key2'])
+    def time_frame_sort_values_by_columns(self):
+        self.df.sort_values(by=['key1', 'key2'])
 
 
 class Quantile(object):
@@ -505,7 +474,6 @@ class Quantile(object):
     param_names = ['axis']
 
     def setup(self, axis):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
 
     def time_frame_quantile(self, axis):
@@ -517,7 +485,6 @@ class GetDtypeCounts(object):
     goal_time = 0.2
 
     def setup(self):
-        np.random.seed(1234)
         self.df = DataFrame(np.random.randn(10, 10000))
 
     def time_frame_get_dtype_counts(self):
@@ -527,13 +494,17 @@ def time_info(self):
         self.df.info()
 
 
-class Nlargest(object):
+class NSort(object):
 
     goal_time = 0.2
+    params = ['first', 'last']
+    param_names = ['keep']
 
-    def setup(self):
-        np.random.seed(1234)
+    def setup(self, keep):
         self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
 
-    def time_frame_nlargest(self):
-        self.df.nlargest(100, 'A')
+    def time_nlargest(self, keep):
+        self.df.nlargest(100, 'A', keep=keep)
+
+    def time_nsmallest(self, keep):
+        self.df.nsmallest(100, 'A', keep=keep)
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index b1a58e49fe86c..62eb826418030 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -13,7 +13,11 @@
 except ImportError:
     pass
 
-np.random.seed(1234)
+# This function just needs to be imported into each benchmark file in order to 
+# sets up the random seed before each function. 
+# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
+def setup(*args, **kwargs):
+    np.random.seed(1234)
 
 # try em until it works!
 for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']: